SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
All Classes Files Functions Variables Typedefs Macros
pzpttrsv.f
Go to the documentation of this file.
1 SUBROUTINE pzpttrsv( UPLO, TRANS, N, NRHS, D, E, JA, DESCA, B, IB,
2 $ DESCB, AF, LAF, WORK, LWORK, INFO )
3*
4* -- ScaLAPACK routine (version 2.0.2) --
5* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6* May 1 2012
7*
8* .. Scalar Arguments ..
9 CHARACTER TRANS, UPLO
10 INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * ), DESCB( * )
14 COMPLEX*16 AF( * ), B( * ), E( * ), WORK( * )
15 DOUBLE PRECISION D( * )
16* ..
17*
18*
19* Purpose
20* =======
21*
22* PZPTTRSV solves a tridiagonal triangular system of linear equations
23*
24* A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS)
25* or
26* A(1:N, JA:JA+N-1)^H * X = B(IB:IB+N-1, 1:NRHS)
27*
28* where A(1:N, JA:JA+N-1) is a tridiagonal
29* triangular matrix factor produced by the
30* Cholesky factorization code PZPTTRF
31* and is stored in A(1:N,JA:JA+N-1) and AF.
32* The matrix stored in A(1:N, JA:JA+N-1) is either
33* upper or lower triangular according to UPLO,
34* and the choice of solving A(1:N, JA:JA+N-1) or A(1:N, JA:JA+N-1)^H
35* is dictated by the user by the parameter TRANS.
36*
37* Routine PZPTTRF MUST be called first.
38*
39* =====================================================================
40*
41* Arguments
42* =========
43*
44* UPLO (global input) CHARACTER
45* = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored;
46* = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored.
47*
48* TRANS (global input) CHARACTER
49* = 'N': Solve with A(1:N, JA:JA+N-1);
50* = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) );
51*
52* N (global input) INTEGER
53* The number of rows and columns to be operated on, i.e. the
54* order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0.
55*
56* NRHS (global input) INTEGER
57* The number of right hand sides, i.e., the number of columns
58* of the distributed submatrix B(IB:IB+N-1, 1:NRHS).
59* NRHS >= 0.
60*
61* D (local input/local output) COMPLEX*16 pointer to local
62* part of global vector storing the main diagonal of the
63* matrix.
64* On exit, this array contains information containing the
65* factors of the matrix.
66* Must be of size >= DESCA( NB_ ).
67*
68* E (local input/local output) COMPLEX*16 pointer to local
69* part of global vector storing the upper diagonal of the
70* matrix. Globally, DU(n) is not referenced, and DU must be
71* aligned with D.
72* On exit, this array contains information containing the
73* factors of the matrix.
74* Must be of size >= DESCA( NB_ ).
75*
76* JA (global input) INTEGER
77* The index in the global array A that points to the start of
78* the matrix to be operated on (which may be either all of A
79* or a submatrix of A).
80*
81* DESCA (global and local input) INTEGER array of dimension DLEN.
82* if 1D type (DTYPE_A=501 or 502), DLEN >= 7;
83* if 2D type (DTYPE_A=1), DLEN >= 9.
84* The array descriptor for the distributed matrix A.
85* Contains information of mapping of A to memory. Please
86* see NOTES below for full description and options.
87*
88* B (local input/local output) COMPLEX*16 pointer into
89* local memory to an array of local lead dimension lld_b>=NB.
90* On entry, this array contains the
91* the local pieces of the right hand sides
92* B(IB:IB+N-1, 1:NRHS).
93* On exit, this contains the local piece of the solutions
94* distributed matrix X.
95*
96* IB (global input) INTEGER
97* The row index in the global array B that points to the first
98* row of the matrix to be operated on (which may be either
99* all of B or a submatrix of B).
100*
101* DESCB (global and local input) INTEGER array of dimension DLEN.
102* if 1D type (DTYPE_B=502), DLEN >=7;
103* if 2D type (DTYPE_B=1), DLEN >= 9.
104* The array descriptor for the distributed matrix B.
105* Contains information of mapping of B to memory. Please
106* see NOTES below for full description and options.
107*
108* AF (local output) COMPLEX*16 array, dimension LAF.
109* Auxiliary Fillin Space.
110* Fillin is created during the factorization routine
111* PZPTTRF and this is stored in AF. If a linear system
112* is to be solved using PZPTTRS after the factorization
113* routine, AF *must not be altered* after the factorization.
114*
115* LAF (local input) INTEGER
116* Size of user-input Auxiliary Fillin space AF. Must be >=
117* (NB+2)
118* If LAF is not large enough, an error code will be returned
119* and the minimum acceptable size will be returned in AF( 1 )
120*
121* WORK (local workspace/local output)
122* COMPLEX*16 temporary workspace. This space may
123* be overwritten in between calls to routines. WORK must be
124* the size given in LWORK.
125* On exit, WORK( 1 ) contains the minimal LWORK.
126*
127* LWORK (local input or global input) INTEGER
128* Size of user-input workspace WORK.
129* If LWORK is too small, the minimal acceptable size will be
130* returned in WORK(1) and an error code is returned. LWORK>=
131* (10+2*min(100,NRHS))*NPCOL+4*NRHS
132*
133* INFO (local output) INTEGER
134* = 0: successful exit
135* < 0: If the i-th argument is an array and the j-entry had
136* an illegal value, then INFO = -(i*100+j), if the i-th
137* argument is a scalar and had an illegal value, then
138* INFO = -i.
139*
140* =====================================================================
141*
142*
143* Restrictions
144* ============
145*
146* The following are restrictions on the input parameters. Some of these
147* are temporary and will be removed in future releases, while others
148* may reflect fundamental technical limitations.
149*
150* Non-cyclic restriction: VERY IMPORTANT!
151* P*NB>= mod(JA-1,NB)+N.
152* The mapping for matrices must be blocked, reflecting the nature
153* of the divide and conquer algorithm as a task-parallel algorithm.
154* This formula in words is: no processor may have more than one
155* chunk of the matrix.
156*
157* Blocksize cannot be too small:
158* If the matrix spans more than one processor, the following
159* restriction on NB, the size of each block on each processor,
160* must hold:
161* NB >= 2
162* The bulk of parallel computation is done on the matrix of size
163* O(NB) on each processor. If this is too small, divide and conquer
164* is a poor choice of algorithm.
165*
166* Submatrix reference:
167* JA = IB
168* Alignment restriction that prevents unnecessary communication.
169*
170*
171* =====================================================================
172*
173*
174* Notes
175* =====
176*
177* If the factorization routine and the solve routine are to be called
178* separately (to solve various sets of righthand sides using the same
179* coefficient matrix), the auxiliary space AF *must not be altered*
180* between calls to the factorization routine and the solve routine.
181*
182* The best algorithm for solving banded and tridiagonal linear systems
183* depends on a variety of parameters, especially the bandwidth.
184* Currently, only algorithms designed for the case N/P >> bw are
185* implemented. These go by many names, including Divide and Conquer,
186* Partitioning, domain decomposition-type, etc.
187* For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C
188* algorithms are the appropriate choice.
189*
190* Algorithm description: Divide and Conquer
191*
192* The Divide and Conqer algorithm assumes the matrix is narrowly
193* banded compared with the number of equations. In this situation,
194* it is best to distribute the input matrix A one-dimensionally,
195* with columns atomic and rows divided amongst the processes.
196* The basic algorithm divides the tridiagonal matrix up into
197* P pieces with one stored on each processor,
198* and then proceeds in 2 phases for the factorization or 3 for the
199* solution of a linear system.
200* 1) Local Phase:
201* The individual pieces are factored independently and in
202* parallel. These factors are applied to the matrix creating
203* fillin, which is stored in a non-inspectable way in auxiliary
204* space AF. Mathematically, this is equivalent to reordering
205* the matrix A as P A P^T and then factoring the principal
206* leading submatrix of size equal to the sum of the sizes of
207* the matrices factored on each processor. The factors of
208* these submatrices overwrite the corresponding parts of A
209* in memory.
210* 2) Reduced System Phase:
211* A small ((P-1)) system is formed representing
212* interaction of the larger blocks, and is stored (as are its
213* factors) in the space AF. A parallel Block Cyclic Reduction
214* algorithm is used. For a linear system, a parallel front solve
215* followed by an analagous backsolve, both using the structure
216* of the factored matrix, are performed.
217* 3) Backsubsitution Phase:
218* For a linear system, a local backsubstitution is performed on
219* each processor in parallel.
220*
221*
222* Descriptors
223* ===========
224*
225* Descriptors now have *types* and differ from ScaLAPACK 1.0.
226*
227* Note: tridiagonal codes can use either the old two dimensional
228* or new one-dimensional descriptors, though the processor grid in
229* both cases *must be one-dimensional*. We describe both types below.
230*
231* Each global data object is described by an associated description
232* vector. This vector stores the information required to establish
233* the mapping between an object element and its corresponding process
234* and memory location.
235*
236* Let A be a generic term for any 2D block cyclicly distributed array.
237* Such a global array has an associated description vector DESCA.
238* In the following comments, the character _ should be read as
239* "of the global array".
240*
241* NOTATION STORED IN EXPLANATION
242* --------------- -------------- --------------------------------------
243* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
244* DTYPE_A = 1.
245* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
246* the BLACS process grid A is distribu-
247* ted over. The context itself is glo-
248* bal, but the handle (the integer
249* value) may vary.
250* M_A (global) DESCA( M_ ) The number of rows in the global
251* array A.
252* N_A (global) DESCA( N_ ) The number of columns in the global
253* array A.
254* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
255* the rows of the array.
256* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
257* the columns of the array.
258* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
259* row of the array A is distributed.
260* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
261* first column of the array A is
262* distributed.
263* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
264* array. LLD_A >= MAX(1,LOCr(M_A)).
265*
266* Let K be the number of rows or columns of a distributed matrix,
267* and assume that its process grid has dimension p x q.
268* LOCr( K ) denotes the number of elements of K that a process
269* would receive if K were distributed over the p processes of its
270* process column.
271* Similarly, LOCc( K ) denotes the number of elements of K that a
272* process would receive if K were distributed over the q processes of
273* its process row.
274* The values of LOCr() and LOCc() may be determined via a call to the
275* ScaLAPACK tool function, NUMROC:
276* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
277* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
278* An upper bound for these quantities may be computed by:
279* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
280* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
281*
282*
283* One-dimensional descriptors:
284*
285* One-dimensional descriptors are a new addition to ScaLAPACK since
286* version 1.0. They simplify and shorten the descriptor for 1D
287* arrays.
288*
289* Since ScaLAPACK supports two-dimensional arrays as the fundamental
290* object, we allow 1D arrays to be distributed either over the
291* first dimension of the array (as if the grid were P-by-1) or the
292* 2nd dimension (as if the grid were 1-by-P). This choice is
293* indicated by the descriptor type (501 or 502)
294* as described below.
295* However, for tridiagonal matrices, since the objects being
296* distributed are the individual vectors storing the diagonals, we
297* have adopted the convention that both the P-by-1 descriptor and
298* the 1-by-P descriptor are allowed and are equivalent for
299* tridiagonal matrices. Thus, for tridiagonal matrices,
300* DTYPE_A = 501 or 502 can be used interchangeably
301* without any other change.
302* We require that the distributed vectors storing the diagonals of a
303* tridiagonal matrix be aligned with each other. Because of this, a
304* single descriptor, DESCA, serves to describe the distribution of
305* of all diagonals simultaneously.
306*
307* IMPORTANT NOTE: the actual BLACS grid represented by the
308* CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P
309* irrespective of which one-dimensional descriptor type
310* (501 or 502) is input.
311* This routine will interpret the grid properly either way.
312* ScaLAPACK routines *do not support intercontext operations* so that
313* the grid passed to a single ScaLAPACK routine *must be the same*
314* for all array descriptors passed to that routine.
315*
316* NOTE: In all cases where 1D descriptors are used, 2D descriptors
317* may also be used, since a one-dimensional array is a special case
318* of a two-dimensional array with one dimension of size unity.
319* The two-dimensional array used in this case *must* be of the
320* proper orientation:
321* If the appropriate one-dimensional descriptor is DTYPEA=501
322* (1 by P type), then the two dimensional descriptor must
323* have a CTXT value that refers to a 1 by P BLACS grid;
324* If the appropriate one-dimensional descriptor is DTYPEA=502
325* (P by 1 type), then the two dimensional descriptor must
326* have a CTXT value that refers to a P by 1 BLACS grid.
327*
328*
329* Summary of allowed descriptors, types, and BLACS grids:
330* DTYPE 501 502 1 1
331* BLACS grid 1xP or Px1 1xP or Px1 1xP Px1
332* -----------------------------------------------------
333* A OK OK OK NO
334* B NO OK NO OK
335*
336* Note that a consequence of this chart is that it is not possible
337* for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead
338* to opposite requirements for the orientation of the BLACS grid,
339* and as noted before, the *same* BLACS context must be used in
340* all descriptors in a single ScaLAPACK subroutine call.
341*
342* Let A be a generic term for any 1D block cyclicly distributed array.
343* Such a global array has an associated description vector DESCA.
344* In the following comments, the character _ should be read as
345* "of the global array".
346*
347* NOTATION STORED IN EXPLANATION
348* --------------- ---------- ------------------------------------------
349* DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids,
350* TYPE_A = 501: 1-by-P grid.
351* TYPE_A = 502: P-by-1 grid.
352* CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating
353* the BLACS process grid A is distribu-
354* ted over. The context itself is glo-
355* bal, but the handle (the integer
356* value) may vary.
357* N_A (global) DESCA( 3 ) The size of the array dimension being
358* distributed.
359* NB_A (global) DESCA( 4 ) The blocking factor used to distribute
360* the distributed dimension of the array.
361* SRC_A (global) DESCA( 5 ) The process row or column over which the
362* first row or column of the array
363* is distributed.
364* Ignored DESCA( 6 ) Ignored for tridiagonal matrices.
365* Reserved DESCA( 7 ) Reserved for future use.
366*
367*
368*
369* =====================================================================
370*
371* Code Developer: Andrew J. Cleary, University of Tennessee.
372* Current address: Lawrence Livermore National Labs.
373* This version released: August, 2001.
374*
375* =====================================================================
376*
377* ..
378* .. Parameters ..
379 DOUBLE PRECISION ONE, ZERO
380 parameter( one = 1.0d+0 )
381 parameter( zero = 0.0d+0 )
382 COMPLEX*16 CONE, CZERO
383 parameter( cone = ( 1.0d+0, 0.0d+0 ) )
384 parameter( czero = ( 0.0d+0, 0.0d+0 ) )
385 INTEGER INT_ONE
386 parameter( int_one = 1 )
387 INTEGER DESCMULT, BIGNUM
388 parameter(descmult = 100, bignum = descmult * descmult)
389 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
390 $ lld_, mb_, m_, nb_, n_, rsrc_
391 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
392 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
393 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
394* ..
395* .. Local Scalars ..
396 INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
397 $ idum1, idum2, idum3, ja_new, level_dist, llda,
398 $ lldb, mycol, myrow, my_num_cols, nb, np, npcol,
399 $ nprow, np_save, odd_size, part_offset,
400 $ part_size, return_code, store_m_b, store_n_a,
401 $ temp, work_size_min
402* ..
403* .. Local Arrays ..
404 INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
405 $ param_check( 16, 3 )
406* ..
407* .. External Subroutines ..
408 EXTERNAL blacs_get, blacs_gridexit, blacs_gridinfo,
410 $ zgerv2d, zgesd2d, zlamov, zmatadd, ztbtrs,
411 $ ztrmm, ztrtrs
412* ..
413* .. External Functions ..
414 LOGICAL LSAME
415 INTEGER NUMROC
416 EXTERNAL lsame, numroc
417* ..
418* .. Intrinsic Functions ..
419 INTRINSIC ichar, min, mod
420* ..
421* .. Executable Statements ..
422*
423* Test the input parameters
424*
425 info = 0
426*
427* Convert descriptor into standard form for easy access to
428* parameters, check that grid is of right shape.
429*
430 desca_1xp( 1 ) = 501
431 descb_px1( 1 ) = 502
432*
433 temp = desca( dtype_ )
434 IF( temp .EQ. 502 ) THEN
435* Temporarily set the descriptor type to 1xP type
436 desca( dtype_ ) = 501
437 ENDIF
438*
439 CALL desc_convert( desca, desca_1xp, return_code )
440*
441 desca( dtype_ ) = temp
442*
443 IF( return_code .NE. 0) THEN
444 info = -( 8*100 + 2 )
445 ENDIF
446*
447 CALL desc_convert( descb, descb_px1, return_code )
448*
449 IF( return_code .NE. 0) THEN
450 info = -( 11*100 + 2 )
451 ENDIF
452*
453* Consistency checks for DESCA and DESCB.
454*
455* Context must be the same
456 IF( desca_1xp( 2 ) .NE. descb_px1( 2 ) ) THEN
457 info = -( 11*100 + 2 )
458 ENDIF
459*
460* These are alignment restrictions that may or may not be removed
461* in future releases. -Andy Cleary, April 14, 1996.
462*
463* Block sizes must be the same
464 IF( desca_1xp( 4 ) .NE. descb_px1( 4 ) ) THEN
465 info = -( 11*100 + 4 )
466 ENDIF
467*
468* Source processor must be the same
469*
470 IF( desca_1xp( 5 ) .NE. descb_px1( 5 ) ) THEN
471 info = -( 11*100 + 5 )
472 ENDIF
473*
474* Get values out of descriptor for use in code.
475*
476 ictxt = desca_1xp( 2 )
477 csrc = desca_1xp( 5 )
478 nb = desca_1xp( 4 )
479 llda = desca_1xp( 6 )
480 store_n_a = desca_1xp( 3 )
481 lldb = descb_px1( 6 )
482 store_m_b = descb_px1( 3 )
483*
484* Get grid parameters
485*
486*
487 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
488 np = nprow * npcol
489*
490*
491*
492 IF( lsame( uplo, 'U' ) ) THEN
493 idum1 = ichar( 'U' )
494 ELSE IF ( lsame( uplo, 'L' ) ) THEN
495 idum1 = ichar( 'L' )
496 ELSE
497 info = -1
498 END IF
499*
500 IF( lsame( trans, 'N' ) ) THEN
501 idum2 = ichar( 'N' )
502 ELSE IF ( lsame( trans, 'C' ) ) THEN
503 idum2 = ichar( 'C' )
504 ELSE
505 info = -2
506 END IF
507*
508 IF( lwork .LT. -1) THEN
509 info = -15
510 ELSE IF ( lwork .EQ. -1 ) THEN
511 idum3 = -1
512 ELSE
513 idum3 = 1
514 ENDIF
515*
516 IF( n .LT. 0 ) THEN
517 info = -3
518 ENDIF
519*
520 IF( n+ja-1 .GT. store_n_a ) THEN
521 info = -( 8*100 + 6 )
522 ENDIF
523*
524 IF( n+ib-1 .GT. store_m_b ) THEN
525 info = -( 11*100 + 3 )
526 ENDIF
527*
528 IF( lldb .LT. nb ) THEN
529 info = -( 11*100 + 6 )
530 ENDIF
531*
532 IF( nrhs .LT. 0 ) THEN
533 info = -4
534 ENDIF
535*
536* Current alignment restriction
537*
538 IF( ja .NE. ib) THEN
539 info = -7
540 ENDIF
541*
542* Argument checking that is specific to Divide & Conquer routine
543*
544 IF( nprow .NE. 1 ) THEN
545 info = -( 8*100+2 )
546 ENDIF
547*
548 IF( n .GT. np*nb-mod( ja-1, nb )) THEN
549 info = -( 3 )
550 CALL pxerbla( ictxt,
551 $ 'PZPTTRSV, D&C alg.: only 1 block per proc',
552 $ -info )
553 RETURN
554 ENDIF
555*
556 IF((ja+n-1.GT.nb) .AND. ( nb.LT.2*int_one )) THEN
557 info = -( 8*100+4 )
558 CALL pxerbla( ictxt,
559 $ 'PZPTTRSV, D&C alg.: NB too small',
560 $ -info )
561 RETURN
562 ENDIF
563*
564*
565 work_size_min =
566 $ int_one*nrhs
567*
568 work( 1 ) = work_size_min
569*
570 IF( lwork .LT. work_size_min ) THEN
571 IF( lwork .NE. -1 ) THEN
572 info = -15
573 CALL pxerbla( ictxt,
574 $ 'PZPTTRSV: worksize error',
575 $ -info )
576 ENDIF
577 RETURN
578 ENDIF
579*
580* Pack params and positions into arrays for global consistency check
581*
582 param_check( 16, 1 ) = descb(5)
583 param_check( 15, 1 ) = descb(4)
584 param_check( 14, 1 ) = descb(3)
585 param_check( 13, 1 ) = descb(2)
586 param_check( 12, 1 ) = descb(1)
587 param_check( 11, 1 ) = ib
588 param_check( 10, 1 ) = desca(5)
589 param_check( 9, 1 ) = desca(4)
590 param_check( 8, 1 ) = desca(3)
591 param_check( 7, 1 ) = desca(1)
592 param_check( 6, 1 ) = ja
593 param_check( 5, 1 ) = nrhs
594 param_check( 4, 1 ) = n
595 param_check( 3, 1 ) = idum3
596 param_check( 2, 1 ) = idum2
597 param_check( 1, 1 ) = idum1
598*
599 param_check( 16, 2 ) = 1105
600 param_check( 15, 2 ) = 1104
601 param_check( 14, 2 ) = 1103
602 param_check( 13, 2 ) = 1102
603 param_check( 12, 2 ) = 1101
604 param_check( 11, 2 ) = 10
605 param_check( 10, 2 ) = 805
606 param_check( 9, 2 ) = 804
607 param_check( 8, 2 ) = 803
608 param_check( 7, 2 ) = 801
609 param_check( 6, 2 ) = 7
610 param_check( 5, 2 ) = 4
611 param_check( 4, 2 ) = 3
612 param_check( 3, 2 ) = 14
613 param_check( 2, 2 ) = 2
614 param_check( 1, 2 ) = 1
615*
616* Want to find errors with MIN( ), so if no error, set it to a big
617* number. If there already is an error, multiply by the the
618* descriptor multiplier.
619*
620 IF( info.GE.0 ) THEN
621 info = bignum
622 ELSE IF( info.LT.-descmult ) THEN
623 info = -info
624 ELSE
625 info = -info * descmult
626 END IF
627*
628* Check consistency across processors
629*
630 CALL globchk( ictxt, 16, param_check, 16,
631 $ param_check( 1, 3 ), info )
632*
633* Prepare output: set info = 0 if no error, and divide by DESCMULT
634* if error is not in a descriptor entry.
635*
636 IF( info.EQ.bignum ) THEN
637 info = 0
638 ELSE IF( mod( info, descmult ) .EQ. 0 ) THEN
639 info = -info / descmult
640 ELSE
641 info = -info
642 END IF
643*
644 IF( info.LT.0 ) THEN
645 CALL pxerbla( ictxt, 'PZPTTRSV', -info )
646 RETURN
647 END IF
648*
649* Quick return if possible
650*
651 IF( n.EQ.0 )
652 $ RETURN
653*
654 IF( nrhs.EQ.0 )
655 $ RETURN
656*
657*
658* Adjust addressing into matrix space to properly get into
659* the beginning part of the relevant data
660*
661 part_offset = nb*( (ja-1)/(npcol*nb) )
662*
663 IF ( (mycol-csrc) .LT. (ja-part_offset-1)/nb ) THEN
664 part_offset = part_offset + nb
665 ENDIF
666*
667 IF ( mycol .LT. csrc ) THEN
668 part_offset = part_offset - nb
669 ENDIF
670*
671* Form a new BLACS grid (the "standard form" grid) with only procs
672* holding part of the matrix, of size 1xNP where NP is adjusted,
673* starting at csrc=0, with JA modified to reflect dropped procs.
674*
675* First processor to hold part of the matrix:
676*
677 first_proc = mod( ( ja-1 )/nb+csrc, npcol )
678*
679* Calculate new JA one while dropping off unused processors.
680*
681 ja_new = mod( ja-1, nb ) + 1
682*
683* Save and compute new value of NP
684*
685 np_save = np
686 np = ( ja_new+n-2 )/nb + 1
687*
688* Call utility routine that forms "standard-form" grid
689*
690 CALL reshape( ictxt, int_one, ictxt_new, int_one,
691 $ first_proc, int_one, np )
692*
693* Use new context from standard grid as context.
694*
695 ictxt_save = ictxt
696 ictxt = ictxt_new
697 desca_1xp( 2 ) = ictxt_new
698 descb_px1( 2 ) = ictxt_new
699*
700* Get information about new grid.
701*
702 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
703*
704* Drop out processors that do not have part of the matrix.
705*
706 IF( myrow .LT. 0 ) THEN
707 GOTO 1234
708 ENDIF
709*
710* ********************************
711* Values reused throughout routine
712*
713* User-input value of partition size
714*
715 part_size = nb
716*
717* Number of columns in each processor
718*
719 my_num_cols = numroc( n, part_size, mycol, 0, npcol )
720*
721* Offset in columns to beginning of main partition in each proc
722*
723 IF ( mycol .EQ. 0 ) THEN
724 part_offset = part_offset+mod( ja_new-1, part_size )
725 my_num_cols = my_num_cols - mod(ja_new-1, part_size )
726 ENDIF
727*
728* Size of main (or odd) partition in each processor
729*
730 odd_size = my_num_cols
731 IF ( mycol .LT. np-1 ) THEN
732 odd_size = odd_size - int_one
733 ENDIF
734*
735*
736*
737* Begin main code
738*
739 IF ( lsame( uplo, 'L' ) ) THEN
740*
741 IF ( lsame( trans, 'N' ) ) THEN
742*
743* Frontsolve
744*
745*
746******************************************
747* Local computation phase
748******************************************
749*
750* Use main partition in each processor to solve locally
751*
752 CALL zpttrsv( uplo, 'N', odd_size, nrhs, d( part_offset+1 ),
753 $ e( part_offset+1 ), b( part_offset+1 ), lldb,
754 $ info )
755*
756*
757 IF ( mycol .LT. np-1 ) THEN
758* Use factorization of odd-even connection block to modify
759* locally stored portion of right hand side(s)
760*
761 CALL zaxpy( nrhs, -e( part_offset+odd_size ),
762 $ b( part_offset+odd_size ), lldb,
763 $ b( part_offset+odd_size+1 ), lldb )
764*
765 ENDIF
766*
767*
768 IF ( mycol .NE. 0 ) THEN
769* Use the "spike" fillin to calculate contribution to previous
770* processor's righthand-side.
771*
772 CALL zgemm( 'C', 'N', 1, nrhs, odd_size, -cone, af( 1 ),
773 $ odd_size, b( part_offset+1 ), lldb, czero,
774 $ work( 1+int_one-1 ), int_one )
775 ENDIF
776*
777*
778************************************************
779* Formation and solution of reduced system
780************************************************
781*
782*
783* Send modifications to prior processor's right hand sides
784*
785 IF( mycol .GT. 0) THEN
786*
787 CALL zgesd2d( ictxt, int_one, nrhs,
788 $ work( 1 ), int_one,
789 $ 0, mycol - 1 )
790*
791 ENDIF
792*
793* Receive modifications to processor's right hand sides
794*
795 IF( mycol .LT. npcol-1) THEN
796*
797 CALL zgerv2d( ictxt, int_one, nrhs,
798 $ work( 1 ), int_one,
799 $ 0, mycol + 1 )
800*
801* Combine contribution to locally stored right hand sides
802*
803 CALL zmatadd( int_one, nrhs, cone,
804 $ work( 1 ), int_one, cone,
805 $ b( part_offset+odd_size + 1 ), lldb )
806*
807 ENDIF
808*
809*
810* The last processor does not participate in the solution of the
811* reduced system, having sent its contribution already.
812 IF( mycol .EQ. npcol-1 ) THEN
813 GOTO 14
814 ENDIF
815*
816*
817* *************************************
818* Modification Loop
819*
820* The distance for sending and receiving for each level starts
821* at 1 for the first level.
822 level_dist = 1
823*
824* Do until this proc is needed to modify other procs' equations
825*
826 12 CONTINUE
827 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 11
828*
829* Receive and add contribution to righthand sides from left
830*
831 IF( mycol-level_dist .GE. 0 ) THEN
832*
833 CALL zgerv2d( ictxt, int_one, nrhs,
834 $ work( 1 ),
835 $ int_one, 0, mycol-level_dist )
836*
837 CALL zmatadd( int_one, nrhs, cone,
838 $ work( 1 ), int_one, cone,
839 $ b( part_offset+odd_size + 1 ), lldb )
840*
841 ENDIF
842*
843* Receive and add contribution to righthand sides from right
844*
845 IF( mycol+level_dist .LT. npcol-1 ) THEN
846*
847 CALL zgerv2d( ictxt, int_one, nrhs,
848 $ work( 1 ),
849 $ int_one, 0, mycol+level_dist )
850*
851 CALL zmatadd( int_one, nrhs, cone,
852 $ work( 1 ), int_one, cone,
853 $ b( part_offset+odd_size + 1 ), lldb )
854*
855 ENDIF
856*
857 level_dist = level_dist*2
858*
859 GOTO 12
860 11 CONTINUE
861* [End of GOTO Loop]
862*
863*
864*
865* *********************************
866* Calculate and use this proc's blocks to modify other procs
867*
868* Solve with diagonal block
869*
870 CALL ztrtrs( 'L', 'N', 'U', int_one, nrhs, af( odd_size+2 ),
871 $ int_one, b( part_offset+odd_size+1 ), lldb, info )
872*
873 IF( info.NE.0 ) THEN
874 GO TO 1000
875 ENDIF
876*
877*
878*
879* *********
880 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
881*
882* Calculate contribution from this block to next diagonal block
883*
884 CALL zgemm( 'C', 'N', int_one, nrhs, int_one, -cone,
885 $ af( (odd_size)*1+1 ),
886 $ int_one,
887 $ b( part_offset+odd_size+1 ),
888 $ lldb, czero,
889 $ work( 1 ),
890 $ int_one )
891*
892* Send contribution to diagonal block's owning processor.
893*
894 CALL zgesd2d( ictxt, int_one, nrhs,
895 $ work( 1 ),
896 $ int_one, 0, mycol+level_dist )
897*
898 ENDIF
899* End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
900*
901* ************
902 IF( (mycol/level_dist .GT. 0 ).AND.
903 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
904*
905*
906* Use offdiagonal block to calculate modification to diag block
907* of processor to the left
908*
909 CALL zgemm( 'N', 'N', int_one, nrhs, int_one, -cone,
910 $ af( odd_size*1+2+1 ),
911 $ int_one,
912 $ b( part_offset+odd_size+1 ),
913 $ lldb, czero,
914 $ work( 1 ),
915 $ int_one )
916*
917* Send contribution to diagonal block's owning processor.
918*
919 CALL zgesd2d( ictxt, int_one, nrhs,
920 $ work( 1 ),
921 $ int_one, 0, mycol-level_dist )
922*
923 ENDIF
924* End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
925*
926 14 CONTINUE
927*
928 ELSE
929*
930******************** BACKSOLVE *************************************
931*
932********************************************************************
933* .. Begin reduced system phase of algorithm ..
934********************************************************************
935*
936*
937*
938* The last processor does not participate in the solution of the
939* reduced system and just waits to receive its solution.
940 IF( mycol .EQ. npcol-1 ) THEN
941 GOTO 24
942 ENDIF
943*
944* Determine number of steps in tree loop
945*
946 level_dist = 1
947 27 CONTINUE
948 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 26
949*
950 level_dist = level_dist*2
951*
952 GOTO 27
953 26 CONTINUE
954*
955*
956 IF( (mycol/level_dist .GT. 0 ).AND.
957 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
958*
959* Receive solution from processor to left
960*
961 CALL zgerv2d( ictxt, int_one, nrhs,
962 $ work( 1 ),
963 $ int_one, 0, mycol-level_dist )
964*
965*
966* Use offdiagonal block to calculate modification to RHS stored
967* on this processor
968*
969 CALL zgemm( 'C', 'N', int_one, nrhs, int_one, -cone,
970 $ af( odd_size*1+2+1 ),
971 $ int_one,
972 $ work( 1 ),
973 $ int_one, cone,
974 $ b( part_offset+odd_size+1 ),
975 $ lldb )
976 ENDIF
977* End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
978*
979*
980 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
981*
982* Receive solution from processor to right
983*
984 CALL zgerv2d( ictxt, int_one, nrhs,
985 $ work( 1 ),
986 $ int_one, 0, mycol+level_dist )
987*
988* Calculate contribution from this block to next diagonal block
989*
990 CALL zgemm( 'N', 'N', int_one, nrhs, int_one, -cone,
991 $ af( (odd_size)*1+1 ),
992 $ int_one,
993 $ work( 1 ),
994 $ int_one, cone,
995 $ b( part_offset+odd_size+1 ),
996 $ lldb )
997*
998 ENDIF
999* End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
1000*
1001*
1002* Solve with diagonal block
1003*
1004 CALL ztrtrs( 'L', 'C', 'U', int_one, nrhs, af( odd_size+2 ),
1005 $ int_one, b( part_offset+odd_size+1 ), lldb, info )
1006*
1007 IF( info.NE.0 ) THEN
1008 GO TO 1000
1009 ENDIF
1010*
1011*
1012*
1013***Modification Loop *******
1014*
1015 22 CONTINUE
1016 IF( level_dist .EQ. 1 ) GOTO 21
1017*
1018 level_dist = level_dist/2
1019*
1020* Send solution to the right
1021*
1022 IF( mycol+level_dist .LT. npcol-1 ) THEN
1023*
1024 CALL zgesd2d( ictxt, int_one, nrhs,
1025 $ b( part_offset+odd_size+1 ),
1026 $ lldb, 0, mycol+level_dist )
1027*
1028 ENDIF
1029*
1030* Send solution to left
1031*
1032 IF( mycol-level_dist .GE. 0 ) THEN
1033*
1034 CALL zgesd2d( ictxt, int_one, nrhs,
1035 $ b( part_offset+odd_size+1 ),
1036 $ lldb, 0, mycol-level_dist )
1037*
1038 ENDIF
1039*
1040 GOTO 22
1041 21 CONTINUE
1042* [End of GOTO Loop]
1043*
1044 24 CONTINUE
1045* [Processor npcol - 1 jumped to here to await next stage]
1046*
1047*******************************
1048* Reduced system has been solved, communicate solutions to nearest
1049* neighbors in preparation for local computation phase.
1050*
1051*
1052* Send elements of solution to next proc
1053*
1054 IF( mycol .LT. npcol-1) THEN
1055*
1056 CALL zgesd2d( ictxt, int_one, nrhs,
1057 $ b( part_offset+odd_size+1 ), lldb,
1058 $ 0, mycol +1 )
1059*
1060 ENDIF
1061*
1062* Receive modifications to processor's right hand sides
1063*
1064 IF( mycol .GT. 0) THEN
1065*
1066 CALL zgerv2d( ictxt, int_one, nrhs,
1067 $ work( 1 ), int_one,
1068 $ 0, mycol - 1 )
1069*
1070 ENDIF
1071*
1072*
1073*
1074**********************************************
1075* Local computation phase
1076**********************************************
1077*
1078 IF ( mycol .NE. 0 ) THEN
1079* Use the "spike" fillin to calculate contribution from previous
1080* processor's solution.
1081*
1082 CALL zgemm( 'N', 'N', odd_size, nrhs, 1, -cone, af( 1 ),
1083 $ odd_size, work( 1+int_one-1 ), int_one, cone,
1084 $ b( part_offset+1 ), lldb )
1085*
1086 ENDIF
1087*
1088*
1089 IF ( mycol .LT. np-1 ) THEN
1090* Use factorization of odd-even connection block to modify
1091* locally stored portion of right hand side(s)
1092*
1093 CALL zaxpy( nrhs, -dconjg( e( part_offset+odd_size ) ),
1094 $ b( part_offset+odd_size+1 ), lldb,
1095 $ b( part_offset+odd_size ), lldb )
1096*
1097 ENDIF
1098*
1099* Use main partition in each processor to solve locally
1100*
1101 CALL zpttrsv( uplo, 'C', odd_size, nrhs, d( part_offset+1 ),
1102 $ e( part_offset+1 ), b( part_offset+1 ), lldb,
1103 $ info )
1104*
1105 ENDIF
1106* End of "IF( LSAME( TRANS, 'N' ) )"...
1107*
1108*
1109 ELSE
1110***************************************************************
1111* CASE UPLO = 'U' *
1112***************************************************************
1113 IF ( lsame( trans, 'C' ) ) THEN
1114*
1115* Frontsolve
1116*
1117*
1118******************************************
1119* Local computation phase
1120******************************************
1121*
1122* Use main partition in each processor to solve locally
1123*
1124 CALL zpttrsv( uplo, 'C', odd_size, nrhs, d( part_offset+1 ),
1125 $ e( part_offset+1 ), b( part_offset+1 ), lldb,
1126 $ info )
1127*
1128*
1129 IF ( mycol .LT. np-1 ) THEN
1130* Use factorization of odd-even connection block to modify
1131* locally stored portion of right hand side(s)
1132*
1133 CALL zaxpy( nrhs, -dconjg( e( part_offset+odd_size ) ),
1134 $ b( part_offset+odd_size ), lldb,
1135 $ b( part_offset+odd_size+1 ), lldb )
1136*
1137 ENDIF
1138*
1139*
1140 IF ( mycol .NE. 0 ) THEN
1141* Use the "spike" fillin to calculate contribution to previous
1142* processor's righthand-side.
1143*
1144 CALL zgemm( 'T', 'N', 1, nrhs, odd_size, -cone, af( 1 ),
1145 $ odd_size, b( part_offset+1 ), lldb, czero,
1146 $ work( 1+int_one-1 ), int_one )
1147 ENDIF
1148*
1149*
1150************************************************
1151* Formation and solution of reduced system
1152************************************************
1153*
1154*
1155* Send modifications to prior processor's right hand sides
1156*
1157 IF( mycol .GT. 0) THEN
1158*
1159 CALL zgesd2d( ictxt, int_one, nrhs,
1160 $ work( 1 ), int_one,
1161 $ 0, mycol - 1 )
1162*
1163 ENDIF
1164*
1165* Receive modifications to processor's right hand sides
1166*
1167 IF( mycol .LT. npcol-1) THEN
1168*
1169 CALL zgerv2d( ictxt, int_one, nrhs,
1170 $ work( 1 ), int_one,
1171 $ 0, mycol + 1 )
1172*
1173* Combine contribution to locally stored right hand sides
1174*
1175 CALL zmatadd( int_one, nrhs, cone,
1176 $ work( 1 ), int_one, cone,
1177 $ b( part_offset+odd_size + 1 ), lldb )
1178*
1179 ENDIF
1180*
1181*
1182* The last processor does not participate in the solution of the
1183* reduced system, having sent its contribution already.
1184 IF( mycol .EQ. npcol-1 ) THEN
1185 GOTO 44
1186 ENDIF
1187*
1188*
1189* *************************************
1190* Modification Loop
1191*
1192* The distance for sending and receiving for each level starts
1193* at 1 for the first level.
1194 level_dist = 1
1195*
1196* Do until this proc is needed to modify other procs' equations
1197*
1198 42 CONTINUE
1199 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 41
1200*
1201* Receive and add contribution to righthand sides from left
1202*
1203 IF( mycol-level_dist .GE. 0 ) THEN
1204*
1205 CALL zgerv2d( ictxt, int_one, nrhs,
1206 $ work( 1 ),
1207 $ int_one, 0, mycol-level_dist )
1208*
1209 CALL zmatadd( int_one, nrhs, cone,
1210 $ work( 1 ), int_one, cone,
1211 $ b( part_offset+odd_size + 1 ), lldb )
1212*
1213 ENDIF
1214*
1215* Receive and add contribution to righthand sides from right
1216*
1217 IF( mycol+level_dist .LT. npcol-1 ) THEN
1218*
1219 CALL zgerv2d( ictxt, int_one, nrhs,
1220 $ work( 1 ),
1221 $ int_one, 0, mycol+level_dist )
1222*
1223 CALL zmatadd( int_one, nrhs, cone,
1224 $ work( 1 ), int_one, cone,
1225 $ b( part_offset+odd_size + 1 ), lldb )
1226*
1227 ENDIF
1228*
1229 level_dist = level_dist*2
1230*
1231 GOTO 42
1232 41 CONTINUE
1233* [End of GOTO Loop]
1234*
1235*
1236*
1237* *********************************
1238* Calculate and use this proc's blocks to modify other procs
1239*
1240* Solve with diagonal block
1241*
1242 CALL ztrtrs( 'L', 'N', 'U', int_one, nrhs, af( odd_size+2 ),
1243 $ int_one, b( part_offset+odd_size+1 ), lldb, info )
1244*
1245 IF( info.NE.0 ) THEN
1246 GO TO 1000
1247 ENDIF
1248*
1249*
1250*
1251* *********
1252 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
1253*
1254* Calculate contribution from this block to next diagonal block
1255*
1256 CALL zgemm( 'T', 'N', int_one, nrhs, int_one, -cone,
1257 $ af( (odd_size)*1+1 ),
1258 $ int_one,
1259 $ b( part_offset+odd_size+1 ),
1260 $ lldb, czero,
1261 $ work( 1 ),
1262 $ int_one )
1263*
1264* Send contribution to diagonal block's owning processor.
1265*
1266 CALL zgesd2d( ictxt, int_one, nrhs,
1267 $ work( 1 ),
1268 $ int_one, 0, mycol+level_dist )
1269*
1270 ENDIF
1271* End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
1272*
1273* ************
1274 IF( (mycol/level_dist .GT. 0 ).AND.
1275 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
1276*
1277*
1278* Use offdiagonal block to calculate modification to diag block
1279* of processor to the left
1280*
1281 CALL zgemm( 'C', 'N', int_one, nrhs, int_one, -cone,
1282 $ af( odd_size*1+2+1 ),
1283 $ int_one,
1284 $ b( part_offset+odd_size+1 ),
1285 $ lldb, czero,
1286 $ work( 1 ),
1287 $ int_one )
1288*
1289* Send contribution to diagonal block's owning processor.
1290*
1291 CALL zgesd2d( ictxt, int_one, nrhs,
1292 $ work( 1 ),
1293 $ int_one, 0, mycol-level_dist )
1294*
1295 ENDIF
1296* End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
1297*
1298 44 CONTINUE
1299*
1300 ELSE
1301*
1302******************** BACKSOLVE *************************************
1303*
1304********************************************************************
1305* .. Begin reduced system phase of algorithm ..
1306********************************************************************
1307*
1308*
1309*
1310* The last processor does not participate in the solution of the
1311* reduced system and just waits to receive its solution.
1312 IF( mycol .EQ. npcol-1 ) THEN
1313 GOTO 54
1314 ENDIF
1315*
1316* Determine number of steps in tree loop
1317*
1318 level_dist = 1
1319 57 CONTINUE
1320 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 56
1321*
1322 level_dist = level_dist*2
1323*
1324 GOTO 57
1325 56 CONTINUE
1326*
1327*
1328 IF( (mycol/level_dist .GT. 0 ).AND.
1329 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
1330*
1331* Receive solution from processor to left
1332*
1333 CALL zgerv2d( ictxt, int_one, nrhs,
1334 $ work( 1 ),
1335 $ int_one, 0, mycol-level_dist )
1336*
1337*
1338* Use offdiagonal block to calculate modification to RHS stored
1339* on this processor
1340*
1341 CALL zgemm( 'T', 'N', int_one, nrhs, int_one, -cone,
1342 $ af( odd_size*1+2+1 ),
1343 $ int_one,
1344 $ work( 1 ),
1345 $ int_one, cone,
1346 $ b( part_offset+odd_size+1 ),
1347 $ lldb )
1348 ENDIF
1349* End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
1350*
1351*
1352 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
1353*
1354* Receive solution from processor to right
1355*
1356 CALL zgerv2d( ictxt, int_one, nrhs,
1357 $ work( 1 ),
1358 $ int_one, 0, mycol+level_dist )
1359*
1360* Calculate contribution from this block to next diagonal block
1361*
1362 CALL zgemm( 'C', 'N', int_one, nrhs, int_one, -cone,
1363 $ af( (odd_size)*1+1 ),
1364 $ int_one,
1365 $ work( 1 ),
1366 $ int_one, cone,
1367 $ b( part_offset+odd_size+1 ),
1368 $ lldb )
1369*
1370 ENDIF
1371* End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
1372*
1373*
1374* Solve with diagonal block
1375*
1376 CALL ztrtrs( 'L', 'C', 'U', int_one, nrhs, af( odd_size+2 ),
1377 $ int_one, b( part_offset+odd_size+1 ), lldb, info )
1378*
1379 IF( info.NE.0 ) THEN
1380 GO TO 1000
1381 ENDIF
1382*
1383*
1384*
1385***Modification Loop *******
1386*
1387 52 CONTINUE
1388 IF( level_dist .EQ. 1 ) GOTO 51
1389*
1390 level_dist = level_dist/2
1391*
1392* Send solution to the right
1393*
1394 IF( mycol+level_dist .LT. npcol-1 ) THEN
1395*
1396 CALL zgesd2d( ictxt, int_one, nrhs,
1397 $ b( part_offset+odd_size+1 ),
1398 $ lldb, 0, mycol+level_dist )
1399*
1400 ENDIF
1401*
1402* Send solution to left
1403*
1404 IF( mycol-level_dist .GE. 0 ) THEN
1405*
1406 CALL zgesd2d( ictxt, int_one, nrhs,
1407 $ b( part_offset+odd_size+1 ),
1408 $ lldb, 0, mycol-level_dist )
1409*
1410 ENDIF
1411*
1412 GOTO 52
1413 51 CONTINUE
1414* [End of GOTO Loop]
1415*
1416 54 CONTINUE
1417* [Processor npcol - 1 jumped to here to await next stage]
1418*
1419*******************************
1420* Reduced system has been solved, communicate solutions to nearest
1421* neighbors in preparation for local computation phase.
1422*
1423*
1424* Send elements of solution to next proc
1425*
1426 IF( mycol .LT. npcol-1) THEN
1427*
1428 CALL zgesd2d( ictxt, int_one, nrhs,
1429 $ b( part_offset+odd_size+1 ), lldb,
1430 $ 0, mycol +1 )
1431*
1432 ENDIF
1433*
1434* Receive modifications to processor's right hand sides
1435*
1436 IF( mycol .GT. 0) THEN
1437*
1438 CALL zgerv2d( ictxt, int_one, nrhs,
1439 $ work( 1 ), int_one,
1440 $ 0, mycol - 1 )
1441*
1442 ENDIF
1443*
1444*
1445*
1446**********************************************
1447* Local computation phase
1448**********************************************
1449*
1450 IF ( mycol .NE. 0 ) THEN
1451* Use the "spike" fillin to calculate contribution from previous
1452* processor's solution.
1453*
1454 CALL zgemm( 'C', 'N', odd_size, nrhs, 1, -cone, af( 1 ),
1455 $ int_one, work( 1 ), int_one, cone,
1456 $ b( part_offset+1 ), lldb )
1457*
1458 ENDIF
1459*
1460*
1461 IF ( mycol .LT. np-1 ) THEN
1462* Use factorization of odd-even connection block to modify
1463* locally stored portion of right hand side(s)
1464*
1465 CALL zaxpy( nrhs, -e( part_offset+odd_size ),
1466 $ b( part_offset+odd_size+1 ), lldb,
1467 $ b( part_offset+odd_size ), lldb )
1468*
1469 ENDIF
1470*
1471* Use main partition in each processor to solve locally
1472*
1473 CALL zpttrsv( uplo, 'N', odd_size, nrhs, d( part_offset+1 ),
1474 $ e( part_offset+1 ), b( part_offset+1 ), lldb,
1475 $ info )
1476*
1477 ENDIF
1478* End of "IF( LSAME( TRANS, 'N' ) )"...
1479*
1480*
1481 ENDIF
1482* End of "IF( LSAME( UPLO, 'L' ) )"...
1483 1000 CONTINUE
1484*
1485*
1486* Free BLACS space used to hold standard-form grid.
1487*
1488 IF( ictxt_save .NE. ictxt_new ) THEN
1489 CALL blacs_gridexit( ictxt_new )
1490 ENDIF
1491*
1492 1234 CONTINUE
1493*
1494* Restore saved input parameters
1495*
1496 ictxt = ictxt_save
1497 np = np_save
1498*
1499* Output minimum worksize
1500*
1501 work( 1 ) = work_size_min
1502*
1503*
1504 RETURN
1505*
1506* End of PZPTTRSV
1507*
1508 END
subroutine desc_convert(desc_in, desc_out, info)
Definition desc_convert.f:2
#define min(A, B)
Definition pcgemr.c:181
subroutine globchk(ictxt, n, x, ldx, iwork, info)
Definition pchkxmat.f:403
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
subroutine pzpttrsv(uplo, trans, n, nrhs, d, e, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition pzpttrsv.f:3
void reshape(Int *context_in, Int *major_in, Int *context_out, Int *major_out, Int *first_proc, Int *nprow_new, Int *npcol_new)
Definition reshape.c:80
subroutine zmatadd(m, n, alpha, a, lda, beta, c, ldc)
Definition zmatadd.f:2
subroutine zpttrsv(uplo, trans, n, nrhs, d, e, b, ldb, info)
Definition zpttrsv.f:3