SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pzpbtrsv.f
Go to the documentation of this file.
1 SUBROUTINE pzpbtrsv( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B,
2 $ IB, DESCB, AF, LAF, WORK, LWORK, INFO )
3*
4* -- ScaLAPACK routine (version 2.0.2) --
5* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6* May 1 2012
7*
8* .. Scalar Arguments ..
9 CHARACTER TRANS, UPLO
10 INTEGER BW, IB, INFO, JA, LAF, LWORK, N, NRHS
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * ), DESCB( * )
14 COMPLEX*16 A( * ), AF( * ), B( * ), WORK( * )
15* ..
16*
17*
18* Purpose
19* =======
20*
21* PZPBTRSV solves a banded triangular system of linear equations
22*
23* A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS)
24* or
25* A(1:N, JA:JA+N-1)^H * X = B(IB:IB+N-1, 1:NRHS)
26*
27* where A(1:N, JA:JA+N-1) is a banded
28* triangular matrix factor produced by the
29* Cholesky factorization code PZPBTRF
30* and is stored in A(1:N,JA:JA+N-1) and AF.
31* The matrix stored in A(1:N, JA:JA+N-1) is either
32* upper or lower triangular according to UPLO,
33* and the choice of solving A(1:N, JA:JA+N-1) or A(1:N, JA:JA+N-1)^H
34* is dictated by the user by the parameter TRANS.
35*
36* Routine PZPBTRF MUST be called first.
37*
38* =====================================================================
39*
40* Arguments
41* =========
42*
43* UPLO (global input) CHARACTER
44* = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored;
45* = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored.
46*
47* TRANS (global input) CHARACTER
48* = 'N': Solve with A(1:N, JA:JA+N-1);
49* = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) );
50*
51* N (global input) INTEGER
52* The number of rows and columns to be operated on, i.e. the
53* order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0.
54*
55* BW (global input) INTEGER
56* Number of subdiagonals in L or U. 0 <= BW <= N-1
57*
58* NRHS (global input) INTEGER
59* The number of right hand sides, i.e., the number of columns
60* of the distributed submatrix B(IB:IB+N-1, 1:NRHS).
61* NRHS >= 0.
62*
63* A (local input/local output) COMPLEX*16 pointer into
64* local memory to an array with first dimension
65* LLD_A >=(bw+1) (stored in DESCA).
66* On entry, this array contains the local pieces of the
67* N-by-N symmetric banded distributed Cholesky factor L or
68* L^T A(1:N, JA:JA+N-1).
69* This local portion is stored in the packed banded format
70* used in LAPACK. Please see the Notes below and the
71* ScaLAPACK manual for more detail on the format of
72* distributed matrices.
73*
74* JA (global input) INTEGER
75* The index in the global array A that points to the start of
76* the matrix to be operated on (which may be either all of A
77* or a submatrix of A).
78*
79* DESCA (global and local input) INTEGER array of dimension DLEN.
80* if 1D type (DTYPE_A=501), DLEN >= 7;
81* if 2D type (DTYPE_A=1), DLEN >= 9 .
82* The array descriptor for the distributed matrix A.
83* Contains information of mapping of A to memory. Please
84* see NOTES below for full description and options.
85*
86* B (local input/local output) COMPLEX*16 pointer into
87* local memory to an array of local lead dimension lld_b>=NB.
88* On entry, this array contains the
89* the local pieces of the right hand sides
90* B(IB:IB+N-1, 1:NRHS).
91* On exit, this contains the local piece of the solutions
92* distributed matrix X.
93*
94* IB (global input) INTEGER
95* The row index in the global array B that points to the first
96* row of the matrix to be operated on (which may be either
97* all of B or a submatrix of B).
98*
99* DESCB (global and local input) INTEGER array of dimension DLEN.
100* if 1D type (DTYPE_B=502), DLEN >=7;
101* if 2D type (DTYPE_B=1), DLEN >= 9.
102* The array descriptor for the distributed matrix B.
103* Contains information of mapping of B to memory. Please
104* see NOTES below for full description and options.
105*
106* AF (local output) COMPLEX*16 array, dimension LAF.
107* Auxiliary Fillin Space.
108* Fillin is created during the factorization routine
109* PZPBTRF and this is stored in AF. If a linear system
110* is to be solved using PZPBTRS after the factorization
111* routine, AF *must not be altered* after the factorization.
112*
113* LAF (local input) INTEGER
114* Size of user-input Auxiliary Fillin space AF. Must be >=
115* (NB+2*bw)*bw
116* If LAF is not large enough, an error code will be returned
117* and the minimum acceptable size will be returned in AF( 1 )
118*
119* WORK (local workspace/local output)
120* COMPLEX*16 temporary workspace. This space may
121* be overwritten in between calls to routines. WORK must be
122* the size given in LWORK.
123* On exit, WORK( 1 ) contains the minimal LWORK.
124*
125* LWORK (local input or global input) INTEGER
126* Size of user-input workspace WORK.
127* If LWORK is too small, the minimal acceptable size will be
128* returned in WORK(1) and an error code is returned. LWORK>=
129* (bw*NRHS)
130*
131* INFO (global output) INTEGER
132* = 0: successful exit
133* < 0: If the i-th argument is an array and the j-entry had
134* an illegal value, then INFO = -(i*100+j), if the i-th
135* argument is a scalar and had an illegal value, then
136* INFO = -i.
137*
138* =====================================================================
139*
140*
141* Restrictions
142* ============
143*
144* The following are restrictions on the input parameters. Some of these
145* are temporary and will be removed in future releases, while others
146* may reflect fundamental technical limitations.
147*
148* Non-cyclic restriction: VERY IMPORTANT!
149* P*NB>= mod(JA-1,NB)+N.
150* The mapping for matrices must be blocked, reflecting the nature
151* of the divide and conquer algorithm as a task-parallel algorithm.
152* This formula in words is: no processor may have more than one
153* chunk of the matrix.
154*
155* Blocksize cannot be too small:
156* If the matrix spans more than one processor, the following
157* restriction on NB, the size of each block on each processor,
158* must hold:
159* NB >= 2*BW
160* The bulk of parallel computation is done on the matrix of size
161* O(NB) on each processor. If this is too small, divide and conquer
162* is a poor choice of algorithm.
163*
164* Submatrix reference:
165* JA = IB
166* Alignment restriction that prevents unnecessary communication.
167*
168*
169* =====================================================================
170*
171*
172* Notes
173* =====
174*
175* If the factorization routine and the solve routine are to be called
176* separately (to solve various sets of righthand sides using the same
177* coefficient matrix), the auxiliary space AF *must not be altered*
178* between calls to the factorization routine and the solve routine.
179*
180* The best algorithm for solving banded and tridiagonal linear systems
181* depends on a variety of parameters, especially the bandwidth.
182* Currently, only algorithms designed for the case N/P >> bw are
183* implemented. These go by many names, including Divide and Conquer,
184* Partitioning, domain decomposition-type, etc.
185*
186* Algorithm description: Divide and Conquer
187*
188* The Divide and Conqer algorithm assumes the matrix is narrowly
189* banded compared with the number of equations. In this situation,
190* it is best to distribute the input matrix A one-dimensionally,
191* with columns atomic and rows divided amongst the processes.
192* The basic algorithm divides the banded matrix up into
193* P pieces with one stored on each processor,
194* and then proceeds in 2 phases for the factorization or 3 for the
195* solution of a linear system.
196* 1) Local Phase:
197* The individual pieces are factored independently and in
198* parallel. These factors are applied to the matrix creating
199* fillin, which is stored in a non-inspectable way in auxiliary
200* space AF. Mathematically, this is equivalent to reordering
201* the matrix A as P A P^T and then factoring the principal
202* leading submatrix of size equal to the sum of the sizes of
203* the matrices factored on each processor. The factors of
204* these submatrices overwrite the corresponding parts of A
205* in memory.
206* 2) Reduced System Phase:
207* A small (BW* (P-1)) system is formed representing
208* interaction of the larger blocks, and is stored (as are its
209* factors) in the space AF. A parallel Block Cyclic Reduction
210* algorithm is used. For a linear system, a parallel front solve
211* followed by an analagous backsolve, both using the structure
212* of the factored matrix, are performed.
213* 3) Backsubsitution Phase:
214* For a linear system, a local backsubstitution is performed on
215* each processor in parallel.
216*
217*
218* Descriptors
219* ===========
220*
221* Descriptors now have *types* and differ from ScaLAPACK 1.0.
222*
223* Note: banded codes can use either the old two dimensional
224* or new one-dimensional descriptors, though the processor grid in
225* both cases *must be one-dimensional*. We describe both types below.
226*
227* Each global data object is described by an associated description
228* vector. This vector stores the information required to establish
229* the mapping between an object element and its corresponding process
230* and memory location.
231*
232* Let A be a generic term for any 2D block cyclicly distributed array.
233* Such a global array has an associated description vector DESCA.
234* In the following comments, the character _ should be read as
235* "of the global array".
236*
237* NOTATION STORED IN EXPLANATION
238* --------------- -------------- --------------------------------------
239* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
240* DTYPE_A = 1.
241* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
242* the BLACS process grid A is distribu-
243* ted over. The context itself is glo-
244* bal, but the handle (the integer
245* value) may vary.
246* M_A (global) DESCA( M_ ) The number of rows in the global
247* array A.
248* N_A (global) DESCA( N_ ) The number of columns in the global
249* array A.
250* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
251* the rows of the array.
252* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
253* the columns of the array.
254* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
255* row of the array A is distributed.
256* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
257* first column of the array A is
258* distributed.
259* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
260* array. LLD_A >= MAX(1,LOCr(M_A)).
261*
262* Let K be the number of rows or columns of a distributed matrix,
263* and assume that its process grid has dimension p x q.
264* LOCr( K ) denotes the number of elements of K that a process
265* would receive if K were distributed over the p processes of its
266* process column.
267* Similarly, LOCc( K ) denotes the number of elements of K that a
268* process would receive if K were distributed over the q processes of
269* its process row.
270* The values of LOCr() and LOCc() may be determined via a call to the
271* ScaLAPACK tool function, NUMROC:
272* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
273* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
274* An upper bound for these quantities may be computed by:
275* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
276* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
277*
278*
279* One-dimensional descriptors:
280*
281* One-dimensional descriptors are a new addition to ScaLAPACK since
282* version 1.0. They simplify and shorten the descriptor for 1D
283* arrays.
284*
285* Since ScaLAPACK supports two-dimensional arrays as the fundamental
286* object, we allow 1D arrays to be distributed either over the
287* first dimension of the array (as if the grid were P-by-1) or the
288* 2nd dimension (as if the grid were 1-by-P). This choice is
289* indicated by the descriptor type (501 or 502)
290* as described below.
291*
292* IMPORTANT NOTE: the actual BLACS grid represented by the
293* CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P
294* irrespective of which one-dimensional descriptor type
295* (501 or 502) is input.
296* This routine will interpret the grid properly either way.
297* ScaLAPACK routines *do not support intercontext operations* so that
298* the grid passed to a single ScaLAPACK routine *must be the same*
299* for all array descriptors passed to that routine.
300*
301* NOTE: In all cases where 1D descriptors are used, 2D descriptors
302* may also be used, since a one-dimensional array is a special case
303* of a two-dimensional array with one dimension of size unity.
304* The two-dimensional array used in this case *must* be of the
305* proper orientation:
306* If the appropriate one-dimensional descriptor is DTYPEA=501
307* (1 by P type), then the two dimensional descriptor must
308* have a CTXT value that refers to a 1 by P BLACS grid;
309* If the appropriate one-dimensional descriptor is DTYPEA=502
310* (P by 1 type), then the two dimensional descriptor must
311* have a CTXT value that refers to a P by 1 BLACS grid.
312*
313*
314* Summary of allowed descriptors, types, and BLACS grids:
315* DTYPE 501 502 1 1
316* BLACS grid 1xP or Px1 1xP or Px1 1xP Px1
317* -----------------------------------------------------
318* A OK NO OK NO
319* B NO OK NO OK
320*
321* Note that a consequence of this chart is that it is not possible
322* for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead
323* to opposite requirements for the orientation of the BLACS grid,
324* and as noted before, the *same* BLACS context must be used in
325* all descriptors in a single ScaLAPACK subroutine call.
326*
327* Let A be a generic term for any 1D block cyclicly distributed array.
328* Such a global array has an associated description vector DESCA.
329* In the following comments, the character _ should be read as
330* "of the global array".
331*
332* NOTATION STORED IN EXPLANATION
333* --------------- ---------- ------------------------------------------
334* DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids,
335* TYPE_A = 501: 1-by-P grid.
336* TYPE_A = 502: P-by-1 grid.
337* CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating
338* the BLACS process grid A is distribu-
339* ted over. The context itself is glo-
340* bal, but the handle (the integer
341* value) may vary.
342* N_A (global) DESCA( 3 ) The size of the array dimension being
343* distributed.
344* NB_A (global) DESCA( 4 ) The blocking factor used to distribute
345* the distributed dimension of the array.
346* SRC_A (global) DESCA( 5 ) The process row or column over which the
347* first row or column of the array
348* is distributed.
349* LLD_A (local) DESCA( 6 ) The leading dimension of the local array
350* storing the local blocks of the distri-
351* buted array A. Minimum value of LLD_A
352* depends on TYPE_A.
353* TYPE_A = 501: LLD_A >=
354* size of undistributed dimension, 1.
355* TYPE_A = 502: LLD_A >=NB_A, 1.
356* Reserved DESCA( 7 ) Reserved for future use.
357*
358*
359*
360* =====================================================================
361*
362* Code Developer: Andrew J. Cleary, University of Tennessee.
363* Current address: Lawrence Livermore National Labs.
364* This version released: August, 2001.
365*
366* =====================================================================
367*
368* ..
369* .. Parameters ..
370 DOUBLE PRECISION ONE, ZERO
371 parameter( one = 1.0d+0 )
372 parameter( zero = 0.0d+0 )
373 COMPLEX*16 CONE, CZERO
374 parameter( cone = ( 1.0d+0, 0.0d+0 ) )
375 parameter( czero = ( 0.0d+0, 0.0d+0 ) )
376 INTEGER INT_ONE
377 parameter( int_one = 1 )
378 INTEGER DESCMULT, BIGNUM
379 parameter(descmult = 100, bignum = descmult * descmult)
380 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
381 $ lld_, mb_, m_, nb_, n_, rsrc_
382 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
383 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
384 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
385* ..
386* .. Local Scalars ..
387 INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
388 $ idum1, idum2, idum3, ja_new, level_dist, llda,
389 $ lldb, mbw2, mycol, myrow, my_num_cols, nb, np,
390 $ npcol, nprow, np_save, odd_size, ofst,
391 $ part_offset, part_size, return_code, store_m_b,
392 $ store_n_a, work_size_min
393* ..
394* .. Local Arrays ..
395 INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
396 $ param_check( 17, 3 )
397* ..
398* .. External Subroutines ..
399 EXTERNAL blacs_get, blacs_gridexit, blacs_gridinfo,
401 $ zgerv2d, zgesd2d, zlamov, zmatadd, ztbtrs,
402 $ ztrmm, ztrtrs
403* ..
404* .. External Functions ..
405 LOGICAL LSAME
406 INTEGER NUMROC
407 EXTERNAL lsame, numroc
408* ..
409* .. Intrinsic Functions ..
410 INTRINSIC ichar, min, mod
411* ..
412* .. Executable Statements ..
413*
414* Test the input parameters
415*
416 info = 0
417*
418* Convert descriptor into standard form for easy access to
419* parameters, check that grid is of right shape.
420*
421 desca_1xp( 1 ) = 501
422 descb_px1( 1 ) = 502
423*
424 CALL desc_convert( desca, desca_1xp, return_code )
425*
426 IF( return_code .NE. 0) THEN
427 info = -( 8*100 + 2 )
428 ENDIF
429*
430 CALL desc_convert( descb, descb_px1, return_code )
431*
432 IF( return_code .NE. 0) THEN
433 info = -( 11*100 + 2 )
434 ENDIF
435*
436* Consistency checks for DESCA and DESCB.
437*
438* Context must be the same
439 IF( desca_1xp( 2 ) .NE. descb_px1( 2 ) ) THEN
440 info = -( 11*100 + 2 )
441 ENDIF
442*
443* These are alignment restrictions that may or may not be removed
444* in future releases. -Andy Cleary, April 14, 1996.
445*
446* Block sizes must be the same
447 IF( desca_1xp( 4 ) .NE. descb_px1( 4 ) ) THEN
448 info = -( 11*100 + 4 )
449 ENDIF
450*
451* Source processor must be the same
452*
453 IF( desca_1xp( 5 ) .NE. descb_px1( 5 ) ) THEN
454 info = -( 11*100 + 5 )
455 ENDIF
456*
457* Get values out of descriptor for use in code.
458*
459 ictxt = desca_1xp( 2 )
460 csrc = desca_1xp( 5 )
461 nb = desca_1xp( 4 )
462 llda = desca_1xp( 6 )
463 store_n_a = desca_1xp( 3 )
464 lldb = descb_px1( 6 )
465 store_m_b = descb_px1( 3 )
466*
467* Get grid parameters
468*
469*
470* Pre-calculate bw^2
471*
472 mbw2 = bw * bw
473*
474 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
475 np = nprow * npcol
476*
477*
478*
479 IF( lsame( uplo, 'U' ) ) THEN
480 idum1 = ichar( 'U' )
481 ELSE IF ( lsame( uplo, 'L' ) ) THEN
482 idum1 = ichar( 'L' )
483 ELSE
484 info = -1
485 END IF
486*
487 IF( lsame( trans, 'N' ) ) THEN
488 idum2 = ichar( 'N' )
489 ELSE IF ( lsame( trans, 'C' ) ) THEN
490 idum2 = ichar( 'C' )
491 ELSE
492 info = -2
493 END IF
494*
495 IF( lwork .LT. -1) THEN
496 info = -14
497 ELSE IF ( lwork .EQ. -1 ) THEN
498 idum3 = -1
499 ELSE
500 idum3 = 1
501 ENDIF
502*
503 IF( n .LT. 0 ) THEN
504 info = -3
505 ENDIF
506*
507 IF( n+ja-1 .GT. store_n_a ) THEN
508 info = -( 8*100 + 6 )
509 ENDIF
510*
511 IF(( bw .GT. n-1 ) .OR.
512 $ ( bw .LT. 0 ) ) THEN
513 info = -4
514 ENDIF
515*
516 IF( llda .LT. (bw+1) ) THEN
517 info = -( 8*100 + 6 )
518 ENDIF
519*
520 IF( nb .LE. 0 ) THEN
521 info = -( 8*100 + 4 )
522 ENDIF
523*
524 IF( n+ib-1 .GT. store_m_b ) THEN
525 info = -( 11*100 + 3 )
526 ENDIF
527*
528 IF( lldb .LT. nb ) THEN
529 info = -( 11*100 + 6 )
530 ENDIF
531*
532 IF( nrhs .LT. 0 ) THEN
533 info = -5
534 ENDIF
535*
536* Current alignment restriction
537*
538 IF( ja .NE. ib) THEN
539 info = -7
540 ENDIF
541*
542* Argument checking that is specific to Divide & Conquer routine
543*
544 IF( nprow .NE. 1 ) THEN
545 info = -( 8*100+2 )
546 ENDIF
547*
548 IF( n .GT. np*nb-mod( ja-1, nb )) THEN
549 info = -( 3 )
550 CALL pxerbla( ictxt,
551 $ 'PZPBTRSV, D&C alg.: only 1 block per proc',
552 $ -info )
553 RETURN
554 ENDIF
555*
556 IF((ja+n-1.GT.nb) .AND. ( nb.LT.2*bw )) THEN
557 info = -( 8*100+4 )
558 CALL pxerbla( ictxt,
559 $ 'PZPBTRSV, D&C alg.: NB too small',
560 $ -info )
561 RETURN
562 ENDIF
563*
564*
565 work_size_min =
566 $ bw*nrhs
567*
568 work( 1 ) = work_size_min
569*
570 IF( lwork .LT. work_size_min ) THEN
571 IF( lwork .NE. -1 ) THEN
572 info = -14
573 CALL pxerbla( ictxt,
574 $ 'PZPBTRSV: worksize error',
575 $ -info )
576 ENDIF
577 RETURN
578 ENDIF
579*
580* Pack params and positions into arrays for global consistency check
581*
582 param_check( 17, 1 ) = descb(5)
583 param_check( 16, 1 ) = descb(4)
584 param_check( 15, 1 ) = descb(3)
585 param_check( 14, 1 ) = descb(2)
586 param_check( 13, 1 ) = descb(1)
587 param_check( 12, 1 ) = ib
588 param_check( 11, 1 ) = desca(5)
589 param_check( 10, 1 ) = desca(4)
590 param_check( 9, 1 ) = desca(3)
591 param_check( 8, 1 ) = desca(1)
592 param_check( 7, 1 ) = ja
593 param_check( 6, 1 ) = nrhs
594 param_check( 5, 1 ) = bw
595 param_check( 4, 1 ) = n
596 param_check( 3, 1 ) = idum3
597 param_check( 2, 1 ) = idum2
598 param_check( 1, 1 ) = idum1
599*
600 param_check( 17, 2 ) = 1105
601 param_check( 16, 2 ) = 1104
602 param_check( 15, 2 ) = 1103
603 param_check( 14, 2 ) = 1102
604 param_check( 13, 2 ) = 1101
605 param_check( 12, 2 ) = 10
606 param_check( 11, 2 ) = 805
607 param_check( 10, 2 ) = 804
608 param_check( 9, 2 ) = 803
609 param_check( 8, 2 ) = 801
610 param_check( 7, 2 ) = 7
611 param_check( 6, 2 ) = 5
612 param_check( 5, 2 ) = 4
613 param_check( 4, 2 ) = 3
614 param_check( 3, 2 ) = 14
615 param_check( 2, 2 ) = 2
616 param_check( 1, 2 ) = 1
617*
618* Want to find errors with MIN( ), so if no error, set it to a big
619* number. If there already is an error, multiply by the the
620* descriptor multiplier.
621*
622 IF( info.GE.0 ) THEN
623 info = bignum
624 ELSE IF( info.LT.-descmult ) THEN
625 info = -info
626 ELSE
627 info = -info * descmult
628 END IF
629*
630* Check consistency across processors
631*
632 CALL globchk( ictxt, 17, param_check, 17,
633 $ param_check( 1, 3 ), info )
634*
635* Prepare output: set info = 0 if no error, and divide by DESCMULT
636* if error is not in a descriptor entry.
637*
638 IF( info.EQ.bignum ) THEN
639 info = 0
640 ELSE IF( mod( info, descmult ) .EQ. 0 ) THEN
641 info = -info / descmult
642 ELSE
643 info = -info
644 END IF
645*
646 IF( info.LT.0 ) THEN
647 CALL pxerbla( ictxt, 'PZPBTRSV', -info )
648 RETURN
649 END IF
650*
651* Quick return if possible
652*
653 IF( n.EQ.0 )
654 $ RETURN
655*
656 IF( nrhs.EQ.0 )
657 $ RETURN
658*
659*
660* Adjust addressing into matrix space to properly get into
661* the beginning part of the relevant data
662*
663 part_offset = nb*( (ja-1)/(npcol*nb) )
664*
665 IF ( (mycol-csrc) .LT. (ja-part_offset-1)/nb ) THEN
666 part_offset = part_offset + nb
667 ENDIF
668*
669 IF ( mycol .LT. csrc ) THEN
670 part_offset = part_offset - nb
671 ENDIF
672*
673* Form a new BLACS grid (the "standard form" grid) with only procs
674* holding part of the matrix, of size 1xNP where NP is adjusted,
675* starting at csrc=0, with JA modified to reflect dropped procs.
676*
677* First processor to hold part of the matrix:
678*
679 first_proc = mod( ( ja-1 )/nb+csrc, npcol )
680*
681* Calculate new JA one while dropping off unused processors.
682*
683 ja_new = mod( ja-1, nb ) + 1
684*
685* Save and compute new value of NP
686*
687 np_save = np
688 np = ( ja_new+n-2 )/nb + 1
689*
690* Call utility routine that forms "standard-form" grid
691*
692 CALL reshape( ictxt, int_one, ictxt_new, int_one,
693 $ first_proc, int_one, np )
694*
695* Use new context from standard grid as context.
696*
697 ictxt_save = ictxt
698 ictxt = ictxt_new
699 desca_1xp( 2 ) = ictxt_new
700 descb_px1( 2 ) = ictxt_new
701*
702* Get information about new grid.
703*
704 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
705*
706* Drop out processors that do not have part of the matrix.
707*
708 IF( myrow .LT. 0 ) THEN
709 GOTO 1234
710 ENDIF
711*
712* ********************************
713* Values reused throughout routine
714*
715* User-input value of partition size
716*
717 part_size = nb
718*
719* Number of columns in each processor
720*
721 my_num_cols = numroc( n, part_size, mycol, 0, npcol )
722*
723* Offset in columns to beginning of main partition in each proc
724*
725 IF ( mycol .EQ. 0 ) THEN
726 part_offset = part_offset+mod( ja_new-1, part_size )
727 my_num_cols = my_num_cols - mod(ja_new-1, part_size )
728 ENDIF
729*
730* Offset in elements
731*
732 ofst = part_offset*llda
733*
734* Size of main (or odd) partition in each processor
735*
736 odd_size = my_num_cols
737 IF ( mycol .LT. np-1 ) THEN
738 odd_size = odd_size - bw
739 ENDIF
740*
741*
742*
743* Begin main code
744*
745 IF ( lsame( uplo, 'L' ) ) THEN
746*
747 IF ( lsame( trans, 'N' ) ) THEN
748*
749* Frontsolve
750*
751*
752******************************************
753* Local computation phase
754******************************************
755*
756* Use main partition in each processor to solve locally
757*
758 CALL ztbtrs( uplo, 'N', 'N', odd_size,
759 $ bw, nrhs,
760 $ a( ofst+1 ), llda,
761 $ b( part_offset+1 ), lldb, info )
762*
763*
764 IF ( mycol .LT. np-1 ) THEN
765* Use factorization of odd-even connection block to modify
766* locally stored portion of right hand side(s)
767*
768*
769* First copy and multiply it into temporary storage,
770* then use it on RHS
771*
772 CALL zlamov( 'N', bw, nrhs,
773 $ b( part_offset+odd_size-bw+1), lldb,
774 $ work( 1 ), bw )
775*
776 CALL ztrmm( 'L', 'U', 'N', 'N', bw, nrhs, -cone,
777 $ a(( ofst+(bw+1)+(odd_size-bw)*llda )), llda-1,
778 $ work( 1 ), bw )
779*
780 CALL zmatadd( bw, nrhs, cone, work( 1 ), bw,
781 $ cone, b( part_offset+odd_size+1 ), lldb )
782*
783 ENDIF
784*
785*
786 IF ( mycol .NE. 0 ) THEN
787* Use the "spike" fillin to calculate contribution to previous
788* processor's righthand-side.
789*
790 CALL zgemm( 'C', 'N', bw, nrhs, odd_size, -cone, af( 1 ),
791 $ odd_size, b( part_offset+1 ), lldb, czero,
792 $ work( 1+bw-bw ), bw )
793 ENDIF
794*
795*
796************************************************
797* Formation and solution of reduced system
798************************************************
799*
800*
801* Send modifications to prior processor's right hand sides
802*
803 IF( mycol .GT. 0) THEN
804*
805 CALL zgesd2d( ictxt, bw, nrhs,
806 $ work( 1 ), bw,
807 $ 0, mycol - 1 )
808*
809 ENDIF
810*
811* Receive modifications to processor's right hand sides
812*
813 IF( mycol .LT. npcol-1) THEN
814*
815 CALL zgerv2d( ictxt, bw, nrhs,
816 $ work( 1 ), bw,
817 $ 0, mycol + 1 )
818*
819* Combine contribution to locally stored right hand sides
820*
821 CALL zmatadd( bw, nrhs, cone,
822 $ work( 1 ), bw, cone,
823 $ b( part_offset+odd_size + 1 ), lldb )
824*
825 ENDIF
826*
827*
828* The last processor does not participate in the solution of the
829* reduced system, having sent its contribution already.
830 IF( mycol .EQ. npcol-1 ) THEN
831 GOTO 14
832 ENDIF
833*
834*
835* *************************************
836* Modification Loop
837*
838* The distance for sending and receiving for each level starts
839* at 1 for the first level.
840 level_dist = 1
841*
842* Do until this proc is needed to modify other procs' equations
843*
844 12 CONTINUE
845 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 11
846*
847* Receive and add contribution to righthand sides from left
848*
849 IF( mycol-level_dist .GE. 0 ) THEN
850*
851 CALL zgerv2d( ictxt, bw, nrhs,
852 $ work( 1 ),
853 $ bw, 0, mycol-level_dist )
854*
855 CALL zmatadd( bw, nrhs, cone,
856 $ work( 1 ), bw, cone,
857 $ b( part_offset+odd_size + 1 ), lldb )
858*
859 ENDIF
860*
861* Receive and add contribution to righthand sides from right
862*
863 IF( mycol+level_dist .LT. npcol-1 ) THEN
864*
865 CALL zgerv2d( ictxt, bw, nrhs,
866 $ work( 1 ),
867 $ bw, 0, mycol+level_dist )
868*
869 CALL zmatadd( bw, nrhs, cone,
870 $ work( 1 ), bw, cone,
871 $ b( part_offset+odd_size + 1 ), lldb )
872*
873 ENDIF
874*
875 level_dist = level_dist*2
876*
877 GOTO 12
878 11 CONTINUE
879* [End of GOTO Loop]
880*
881*
882*
883* *********************************
884* Calculate and use this proc's blocks to modify other procs
885*
886* Solve with diagonal block
887*
888 CALL ztrtrs( 'L', 'N', 'N', bw, nrhs, af( odd_size*bw+mbw2+1 ),
889 $ bw, b( part_offset+odd_size+1 ), lldb, info )
890*
891 IF( info.NE.0 ) THEN
892 GO TO 1000
893 ENDIF
894*
895*
896*
897* *********
898 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
899*
900* Calculate contribution from this block to next diagonal block
901*
902 CALL zgemm( 'C', 'N', bw, nrhs, bw, -cone,
903 $ af( (odd_size)*bw+1 ),
904 $ bw,
905 $ b( part_offset+odd_size+1 ),
906 $ lldb, czero,
907 $ work( 1 ),
908 $ bw )
909*
910* Send contribution to diagonal block's owning processor.
911*
912 CALL zgesd2d( ictxt, bw, nrhs,
913 $ work( 1 ),
914 $ bw, 0, mycol+level_dist )
915*
916 ENDIF
917* End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
918*
919* ************
920 IF( (mycol/level_dist .GT. 0 ).AND.
921 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
922*
923*
924* Use offdiagonal block to calculate modification to diag block
925* of processor to the left
926*
927 CALL zgemm( 'N', 'N', bw, nrhs, bw, -cone,
928 $ af( odd_size*bw+2*mbw2+1 ),
929 $ bw,
930 $ b( part_offset+odd_size+1 ),
931 $ lldb, czero,
932 $ work( 1 ),
933 $ bw )
934*
935* Send contribution to diagonal block's owning processor.
936*
937 CALL zgesd2d( ictxt, bw, nrhs,
938 $ work( 1 ),
939 $ bw, 0, mycol-level_dist )
940*
941 ENDIF
942* End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
943*
944 14 CONTINUE
945*
946 ELSE
947*
948******************** BACKSOLVE *************************************
949*
950********************************************************************
951* .. Begin reduced system phase of algorithm ..
952********************************************************************
953*
954*
955*
956* The last processor does not participate in the solution of the
957* reduced system and just waits to receive its solution.
958 IF( mycol .EQ. npcol-1 ) THEN
959 GOTO 24
960 ENDIF
961*
962* Determine number of steps in tree loop
963*
964 level_dist = 1
965 27 CONTINUE
966 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 26
967*
968 level_dist = level_dist*2
969*
970 GOTO 27
971 26 CONTINUE
972*
973*
974 IF( (mycol/level_dist .GT. 0 ).AND.
975 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
976*
977* Receive solution from processor to left
978*
979 CALL zgerv2d( ictxt, bw, nrhs,
980 $ work( 1 ),
981 $ bw, 0, mycol-level_dist )
982*
983*
984* Use offdiagonal block to calculate modification to RHS stored
985* on this processor
986*
987 CALL zgemm( 'C', 'N', bw, nrhs, bw, -cone,
988 $ af( odd_size*bw+2*mbw2+1 ),
989 $ bw,
990 $ work( 1 ),
991 $ bw, cone,
992 $ b( part_offset+odd_size+1 ),
993 $ lldb )
994 ENDIF
995* End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
996*
997*
998 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
999*
1000* Receive solution from processor to right
1001*
1002 CALL zgerv2d( ictxt, bw, nrhs,
1003 $ work( 1 ),
1004 $ bw, 0, mycol+level_dist )
1005*
1006* Calculate contribution from this block to next diagonal block
1007*
1008 CALL zgemm( 'N', 'N', bw, nrhs, bw, -cone,
1009 $ af( (odd_size)*bw+1 ),
1010 $ bw,
1011 $ work( 1 ),
1012 $ bw, cone,
1013 $ b( part_offset+odd_size+1 ),
1014 $ lldb )
1015*
1016 ENDIF
1017* End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
1018*
1019*
1020* Solve with diagonal block
1021*
1022 CALL ztrtrs( 'L', 'C', 'N', bw, nrhs, af( odd_size*bw+mbw2+1 ),
1023 $ bw, b( part_offset+odd_size+1 ), lldb, info )
1024*
1025 IF( info.NE.0 ) THEN
1026 GO TO 1000
1027 ENDIF
1028*
1029*
1030*
1031***Modification Loop *******
1032*
1033 22 CONTINUE
1034 IF( level_dist .EQ. 1 ) GOTO 21
1035*
1036 level_dist = level_dist/2
1037*
1038* Send solution to the right
1039*
1040 IF( mycol+level_dist .LT. npcol-1 ) THEN
1041*
1042 CALL zgesd2d( ictxt, bw, nrhs,
1043 $ b( part_offset+odd_size+1 ),
1044 $ lldb, 0, mycol+level_dist )
1045*
1046 ENDIF
1047*
1048* Send solution to left
1049*
1050 IF( mycol-level_dist .GE. 0 ) THEN
1051*
1052 CALL zgesd2d( ictxt, bw, nrhs,
1053 $ b( part_offset+odd_size+1 ),
1054 $ lldb, 0, mycol-level_dist )
1055*
1056 ENDIF
1057*
1058 GOTO 22
1059 21 CONTINUE
1060* [End of GOTO Loop]
1061*
1062 24 CONTINUE
1063* [Processor npcol - 1 jumped to here to await next stage]
1064*
1065*******************************
1066* Reduced system has been solved, communicate solutions to nearest
1067* neighbors in preparation for local computation phase.
1068*
1069*
1070* Send elements of solution to next proc
1071*
1072 IF( mycol .LT. npcol-1) THEN
1073*
1074 CALL zgesd2d( ictxt, bw, nrhs,
1075 $ b( part_offset+odd_size+1 ), lldb,
1076 $ 0, mycol +1 )
1077*
1078 ENDIF
1079*
1080* Receive modifications to processor's right hand sides
1081*
1082 IF( mycol .GT. 0) THEN
1083*
1084 CALL zgerv2d( ictxt, bw, nrhs,
1085 $ work( 1 ), bw,
1086 $ 0, mycol - 1 )
1087*
1088 ENDIF
1089*
1090*
1091*
1092**********************************************
1093* Local computation phase
1094**********************************************
1095*
1096 IF ( mycol .NE. 0 ) THEN
1097* Use the "spike" fillin to calculate contribution from previous
1098* processor's solution.
1099*
1100 CALL zgemm( 'N', 'N', odd_size, nrhs, bw, -cone, af( 1 ),
1101 $ odd_size, work( 1+bw-bw ), bw, cone,
1102 $ b( part_offset+1 ), lldb )
1103*
1104 ENDIF
1105*
1106*
1107 IF ( mycol .LT. np-1 ) THEN
1108* Use factorization of odd-even connection block to modify
1109* locally stored portion of right hand side(s)
1110*
1111*
1112* First copy and multiply it into temporary storage,
1113* then use it on RHS
1114*
1115 CALL zlamov( 'N', bw, nrhs, b( part_offset+odd_size+1), lldb,
1116 $ work( 1+bw-bw ), bw )
1117*
1118 CALL ztrmm( 'L', 'U', 'C', 'N', bw, nrhs, -cone,
1119 $ a(( ofst+(bw+1)+(odd_size-bw)*llda )), llda-1,
1120 $ work( 1+bw-bw ), bw )
1121*
1122 CALL zmatadd( bw, nrhs, cone, work( 1+bw-bw ), bw, cone,
1123 $ b( part_offset+odd_size-bw+1 ), lldb )
1124*
1125 ENDIF
1126*
1127* Use main partition in each processor to solve locally
1128*
1129 CALL ztbtrs( uplo, 'C', 'N', odd_size,
1130 $ bw, nrhs,
1131 $ a( ofst+1 ),
1132 $ llda, b( part_offset+1 ),
1133 $ lldb, info )
1134*
1135 ENDIF
1136* End of "IF( LSAME( TRANS, 'N' ) )"...
1137*
1138*
1139 ELSE
1140***************************************************************
1141* CASE UPLO = 'U' *
1142***************************************************************
1143 IF ( lsame( trans, 'C' ) ) THEN
1144*
1145* Frontsolve
1146*
1147*
1148******************************************
1149* Local computation phase
1150******************************************
1151*
1152* Use main partition in each processor to solve locally
1153*
1154 CALL ztbtrs( uplo, 'C', 'N', odd_size,
1155 $ bw, nrhs,
1156 $ a( ofst+1 ), llda,
1157 $ b( part_offset+1 ), lldb, info )
1158*
1159*
1160 IF ( mycol .LT. np-1 ) THEN
1161* Use factorization of odd-even connection block to modify
1162* locally stored portion of right hand side(s)
1163*
1164*
1165* First copy and multiply it into temporary storage,
1166* then use it on RHS
1167*
1168 CALL zlamov( 'N', bw, nrhs,
1169 $ b( part_offset+odd_size-bw+1), lldb,
1170 $ work( 1 ), bw )
1171*
1172 CALL ztrmm( 'L', 'L', 'C', 'N', bw, nrhs, -cone,
1173 $ a(( ofst+1+odd_size*llda )), llda-1, work( 1 ),
1174 $ bw )
1175*
1176 CALL zmatadd( bw, nrhs, cone, work( 1 ), bw,
1177 $ cone, b( part_offset+odd_size+1 ), lldb )
1178*
1179 ENDIF
1180*
1181*
1182 IF ( mycol .NE. 0 ) THEN
1183* Use the "spike" fillin to calculate contribution to previous
1184* processor's righthand-side.
1185*
1186 CALL zgemm( 'C', 'N', bw, nrhs, odd_size, -cone, af( 1 ),
1187 $ odd_size, b( part_offset+1 ), lldb, czero,
1188 $ work( 1+bw-bw ), bw )
1189 ENDIF
1190*
1191*
1192************************************************
1193* Formation and solution of reduced system
1194************************************************
1195*
1196*
1197* Send modifications to prior processor's right hand sides
1198*
1199 IF( mycol .GT. 0) THEN
1200*
1201 CALL zgesd2d( ictxt, bw, nrhs,
1202 $ work( 1 ), bw,
1203 $ 0, mycol - 1 )
1204*
1205 ENDIF
1206*
1207* Receive modifications to processor's right hand sides
1208*
1209 IF( mycol .LT. npcol-1) THEN
1210*
1211 CALL zgerv2d( ictxt, bw, nrhs,
1212 $ work( 1 ), bw,
1213 $ 0, mycol + 1 )
1214*
1215* Combine contribution to locally stored right hand sides
1216*
1217 CALL zmatadd( bw, nrhs, cone,
1218 $ work( 1 ), bw, cone,
1219 $ b( part_offset+odd_size + 1 ), lldb )
1220*
1221 ENDIF
1222*
1223*
1224* The last processor does not participate in the solution of the
1225* reduced system, having sent its contribution already.
1226 IF( mycol .EQ. npcol-1 ) THEN
1227 GOTO 44
1228 ENDIF
1229*
1230*
1231* *************************************
1232* Modification Loop
1233*
1234* The distance for sending and receiving for each level starts
1235* at 1 for the first level.
1236 level_dist = 1
1237*
1238* Do until this proc is needed to modify other procs' equations
1239*
1240 42 CONTINUE
1241 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 41
1242*
1243* Receive and add contribution to righthand sides from left
1244*
1245 IF( mycol-level_dist .GE. 0 ) THEN
1246*
1247 CALL zgerv2d( ictxt, bw, nrhs,
1248 $ work( 1 ),
1249 $ bw, 0, mycol-level_dist )
1250*
1251 CALL zmatadd( bw, nrhs, cone,
1252 $ work( 1 ), bw, cone,
1253 $ b( part_offset+odd_size + 1 ), lldb )
1254*
1255 ENDIF
1256*
1257* Receive and add contribution to righthand sides from right
1258*
1259 IF( mycol+level_dist .LT. npcol-1 ) THEN
1260*
1261 CALL zgerv2d( ictxt, bw, nrhs,
1262 $ work( 1 ),
1263 $ bw, 0, mycol+level_dist )
1264*
1265 CALL zmatadd( bw, nrhs, cone,
1266 $ work( 1 ), bw, cone,
1267 $ b( part_offset+odd_size + 1 ), lldb )
1268*
1269 ENDIF
1270*
1271 level_dist = level_dist*2
1272*
1273 GOTO 42
1274 41 CONTINUE
1275* [End of GOTO Loop]
1276*
1277*
1278*
1279* *********************************
1280* Calculate and use this proc's blocks to modify other procs
1281*
1282* Solve with diagonal block
1283*
1284 CALL ztrtrs( 'L', 'N', 'N', bw, nrhs, af( odd_size*bw+mbw2+1 ),
1285 $ bw, b( part_offset+odd_size+1 ), lldb, info )
1286*
1287 IF( info.NE.0 ) THEN
1288 GO TO 1000
1289 ENDIF
1290*
1291*
1292*
1293* *********
1294 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
1295*
1296* Calculate contribution from this block to next diagonal block
1297*
1298 CALL zgemm( 'C', 'N', bw, nrhs, bw, -cone,
1299 $ af( (odd_size)*bw+1 ),
1300 $ bw,
1301 $ b( part_offset+odd_size+1 ),
1302 $ lldb, czero,
1303 $ work( 1 ),
1304 $ bw )
1305*
1306* Send contribution to diagonal block's owning processor.
1307*
1308 CALL zgesd2d( ictxt, bw, nrhs,
1309 $ work( 1 ),
1310 $ bw, 0, mycol+level_dist )
1311*
1312 ENDIF
1313* End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
1314*
1315* ************
1316 IF( (mycol/level_dist .GT. 0 ).AND.
1317 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
1318*
1319*
1320* Use offdiagonal block to calculate modification to diag block
1321* of processor to the left
1322*
1323 CALL zgemm( 'N', 'N', bw, nrhs, bw, -cone,
1324 $ af( odd_size*bw+2*mbw2+1 ),
1325 $ bw,
1326 $ b( part_offset+odd_size+1 ),
1327 $ lldb, czero,
1328 $ work( 1 ),
1329 $ bw )
1330*
1331* Send contribution to diagonal block's owning processor.
1332*
1333 CALL zgesd2d( ictxt, bw, nrhs,
1334 $ work( 1 ),
1335 $ bw, 0, mycol-level_dist )
1336*
1337 ENDIF
1338* End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
1339*
1340 44 CONTINUE
1341*
1342 ELSE
1343*
1344******************** BACKSOLVE *************************************
1345*
1346********************************************************************
1347* .. Begin reduced system phase of algorithm ..
1348********************************************************************
1349*
1350*
1351*
1352* The last processor does not participate in the solution of the
1353* reduced system and just waits to receive its solution.
1354 IF( mycol .EQ. npcol-1 ) THEN
1355 GOTO 54
1356 ENDIF
1357*
1358* Determine number of steps in tree loop
1359*
1360 level_dist = 1
1361 57 CONTINUE
1362 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 56
1363*
1364 level_dist = level_dist*2
1365*
1366 GOTO 57
1367 56 CONTINUE
1368*
1369*
1370 IF( (mycol/level_dist .GT. 0 ).AND.
1371 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
1372*
1373* Receive solution from processor to left
1374*
1375 CALL zgerv2d( ictxt, bw, nrhs,
1376 $ work( 1 ),
1377 $ bw, 0, mycol-level_dist )
1378*
1379*
1380* Use offdiagonal block to calculate modification to RHS stored
1381* on this processor
1382*
1383 CALL zgemm( 'C', 'N', bw, nrhs, bw, -cone,
1384 $ af( odd_size*bw+2*mbw2+1 ),
1385 $ bw,
1386 $ work( 1 ),
1387 $ bw, cone,
1388 $ b( part_offset+odd_size+1 ),
1389 $ lldb )
1390 ENDIF
1391* End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
1392*
1393*
1394 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
1395*
1396* Receive solution from processor to right
1397*
1398 CALL zgerv2d( ictxt, bw, nrhs,
1399 $ work( 1 ),
1400 $ bw, 0, mycol+level_dist )
1401*
1402* Calculate contribution from this block to next diagonal block
1403*
1404 CALL zgemm( 'N', 'N', bw, nrhs, bw, -cone,
1405 $ af( (odd_size)*bw+1 ),
1406 $ bw,
1407 $ work( 1 ),
1408 $ bw, cone,
1409 $ b( part_offset+odd_size+1 ),
1410 $ lldb )
1411*
1412 ENDIF
1413* End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
1414*
1415*
1416* Solve with diagonal block
1417*
1418 CALL ztrtrs( 'L', 'C', 'N', bw, nrhs, af( odd_size*bw+mbw2+1 ),
1419 $ bw, b( part_offset+odd_size+1 ), lldb, info )
1420*
1421 IF( info.NE.0 ) THEN
1422 GO TO 1000
1423 ENDIF
1424*
1425*
1426*
1427***Modification Loop *******
1428*
1429 52 CONTINUE
1430 IF( level_dist .EQ. 1 ) GOTO 51
1431*
1432 level_dist = level_dist/2
1433*
1434* Send solution to the right
1435*
1436 IF( mycol+level_dist .LT. npcol-1 ) THEN
1437*
1438 CALL zgesd2d( ictxt, bw, nrhs,
1439 $ b( part_offset+odd_size+1 ),
1440 $ lldb, 0, mycol+level_dist )
1441*
1442 ENDIF
1443*
1444* Send solution to left
1445*
1446 IF( mycol-level_dist .GE. 0 ) THEN
1447*
1448 CALL zgesd2d( ictxt, bw, nrhs,
1449 $ b( part_offset+odd_size+1 ),
1450 $ lldb, 0, mycol-level_dist )
1451*
1452 ENDIF
1453*
1454 GOTO 52
1455 51 CONTINUE
1456* [End of GOTO Loop]
1457*
1458 54 CONTINUE
1459* [Processor npcol - 1 jumped to here to await next stage]
1460*
1461*******************************
1462* Reduced system has been solved, communicate solutions to nearest
1463* neighbors in preparation for local computation phase.
1464*
1465*
1466* Send elements of solution to next proc
1467*
1468 IF( mycol .LT. npcol-1) THEN
1469*
1470 CALL zgesd2d( ictxt, bw, nrhs,
1471 $ b( part_offset+odd_size+1 ), lldb,
1472 $ 0, mycol +1 )
1473*
1474 ENDIF
1475*
1476* Receive modifications to processor's right hand sides
1477*
1478 IF( mycol .GT. 0) THEN
1479*
1480 CALL zgerv2d( ictxt, bw, nrhs,
1481 $ work( 1 ), bw,
1482 $ 0, mycol - 1 )
1483*
1484 ENDIF
1485*
1486*
1487*
1488**********************************************
1489* Local computation phase
1490**********************************************
1491*
1492 IF ( mycol .NE. 0 ) THEN
1493* Use the "spike" fillin to calculate contribution from previous
1494* processor's solution.
1495*
1496 CALL zgemm( 'N', 'N', odd_size, nrhs, bw, -cone, af( 1 ),
1497 $ odd_size, work( 1+bw-bw ), bw, cone,
1498 $ b( part_offset+1 ), lldb )
1499*
1500 ENDIF
1501*
1502*
1503 IF ( mycol .LT. np-1 ) THEN
1504* Use factorization of odd-even connection block to modify
1505* locally stored portion of right hand side(s)
1506*
1507*
1508* First copy and multiply it into temporary storage,
1509* then use it on RHS
1510*
1511 CALL zlamov( 'N', bw, nrhs, b( part_offset+odd_size+1), lldb,
1512 $ work( 1+bw-bw ), bw )
1513*
1514 CALL ztrmm( 'L', 'L', 'N', 'N', bw, nrhs, -cone,
1515 $ a(( ofst+1+odd_size*llda )), llda-1,
1516 $ work( 1+bw-bw ), bw )
1517*
1518 CALL zmatadd( bw, nrhs, cone, work( 1+bw-bw ), bw, cone,
1519 $ b( part_offset+odd_size-bw+1 ), lldb )
1520*
1521 ENDIF
1522*
1523* Use main partition in each processor to solve locally
1524*
1525 CALL ztbtrs( uplo, 'N', 'N', odd_size,
1526 $ bw, nrhs,
1527 $ a( ofst+1 ),
1528 $ llda, b( part_offset+1 ),
1529 $ lldb, info )
1530*
1531 ENDIF
1532* End of "IF( LSAME( TRANS, 'N' ) )"...
1533*
1534*
1535 ENDIF
1536* End of "IF( LSAME( UPLO, 'L' ) )"...
1537 1000 CONTINUE
1538*
1539*
1540* Free BLACS space used to hold standard-form grid.
1541*
1542 IF( ictxt_save .NE. ictxt_new ) THEN
1543 CALL blacs_gridexit( ictxt_new )
1544 ENDIF
1545*
1546 1234 CONTINUE
1547*
1548* Restore saved input parameters
1549*
1550 ictxt = ictxt_save
1551 np = np_save
1552*
1553* Output minimum worksize
1554*
1555 work( 1 ) = work_size_min
1556*
1557*
1558 RETURN
1559*
1560* End of PZPBTRSV
1561*
1562 END
subroutine desc_convert(desc_in, desc_out, info)
Definition desc_convert.f:2
#define min(A, B)
Definition pcgemr.c:181
subroutine globchk(ictxt, n, x, ldx, iwork, info)
Definition pchkxmat.f:403
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
subroutine pzpbtrsv(uplo, trans, n, bw, nrhs, a, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition pzpbtrsv.f:3
void reshape(Int *context_in, Int *major_in, Int *context_out, Int *major_out, Int *first_proc, Int *nprow_new, Int *npcol_new)
Definition reshape.c:80
subroutine zmatadd(m, n, alpha, a, lda, beta, c, ldc)
Definition zmatadd.f:2