SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pzdttrsv.f
Go to the documentation of this file.
1 SUBROUTINE pzdttrsv( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA,
2 $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO )
3*
4* -- ScaLAPACK routine (version 2.0.2) --
5* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6* May 1 2012
7*
8* .. Scalar Arguments ..
9 CHARACTER TRANS, UPLO
10 INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * ), DESCB( * )
14 COMPLEX*16 AF( * ), B( * ), D( * ), DL( * ), DU( * ),
15 $ work( * )
16* ..
17*
18*
19* Purpose
20* =======
21*
22* PZDTTRSV solves a tridiagonal triangular system of linear equations
23*
24* A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS)
25* or
26* A(1:N, JA:JA+N-1)^H * X = B(IB:IB+N-1, 1:NRHS)
27*
28* where A(1:N, JA:JA+N-1) is a tridiagonal
29* triangular matrix factor produced by the
30* Gaussian elimination code PZ@(dom_pre)TTRF
31* and is stored in A(1:N,JA:JA+N-1) and AF.
32* The matrix stored in A(1:N, JA:JA+N-1) is either
33* upper or lower triangular according to UPLO,
34* and the choice of solving A(1:N, JA:JA+N-1) or A(1:N, JA:JA+N-1)^H
35* is dictated by the user by the parameter TRANS.
36*
37* Routine PZDTTRF MUST be called first.
38*
39* =====================================================================
40*
41* Arguments
42* =========
43*
44* UPLO (global input) CHARACTER
45* = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored;
46* = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored.
47*
48* TRANS (global input) CHARACTER
49* = 'N': Solve with A(1:N, JA:JA+N-1);
50* = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) );
51*
52* N (global input) INTEGER
53* The number of rows and columns to be operated on, i.e. the
54* order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0.
55*
56* NRHS (global input) INTEGER
57* The number of right hand sides, i.e., the number of columns
58* of the distributed submatrix B(IB:IB+N-1, 1:NRHS).
59* NRHS >= 0.
60*
61* DL (local input/local output) COMPLEX*16 pointer to local
62* part of global vector storing the lower diagonal of the
63* matrix. Globally, DL(1) is not referenced, and DL must be
64* aligned with D.
65* Must be of size >= DESCA( NB_ ).
66* On exit, this array contains information containing the
67* factors of the matrix.
68*
69* D (local input/local output) COMPLEX*16 pointer to local
70* part of global vector storing the main diagonal of the
71* matrix.
72* On exit, this array contains information containing the
73* factors of the matrix.
74* Must be of size >= DESCA( NB_ ).
75*
76* DU (local input/local output) COMPLEX*16 pointer to local
77* part of global vector storing the upper diagonal of the
78* matrix. Globally, DU(n) is not referenced, and DU must be
79* aligned with D.
80* On exit, this array contains information containing the
81* factors of the matrix.
82* Must be of size >= DESCA( NB_ ).
83*
84* JA (global input) INTEGER
85* The index in the global array A that points to the start of
86* the matrix to be operated on (which may be either all of A
87* or a submatrix of A).
88*
89* DESCA (global and local input) INTEGER array of dimension DLEN.
90* if 1D type (DTYPE_A=501 or 502), DLEN >= 7;
91* if 2D type (DTYPE_A=1), DLEN >= 9.
92* The array descriptor for the distributed matrix A.
93* Contains information of mapping of A to memory. Please
94* see NOTES below for full description and options.
95*
96* B (local input/local output) COMPLEX*16 pointer into
97* local memory to an array of local lead dimension lld_b>=NB.
98* On entry, this array contains the
99* the local pieces of the right hand sides
100* B(IB:IB+N-1, 1:NRHS).
101* On exit, this contains the local piece of the solutions
102* distributed matrix X.
103*
104* IB (global input) INTEGER
105* The row index in the global array B that points to the first
106* row of the matrix to be operated on (which may be either
107* all of B or a submatrix of B).
108*
109* DESCB (global and local input) INTEGER array of dimension DLEN.
110* if 1D type (DTYPE_B=502), DLEN >=7;
111* if 2D type (DTYPE_B=1), DLEN >= 9.
112* The array descriptor for the distributed matrix B.
113* Contains information of mapping of B to memory. Please
114* see NOTES below for full description and options.
115*
116* AF (local output) COMPLEX*16 array, dimension LAF.
117* Auxiliary Fillin Space.
118* Fillin is created during the factorization routine
119* PZDTTRF and this is stored in AF. If a linear system
120* is to be solved using PZDTTRS after the factorization
121* routine, AF *must not be altered* after the factorization.
122*
123* LAF (local input) INTEGER
124* Size of user-input Auxiliary Fillin space AF. Must be >=
125* 2*(NB+2)
126* If LAF is not large enough, an error code will be returned
127* and the minimum acceptable size will be returned in AF( 1 )
128*
129* WORK (local workspace/local output)
130* COMPLEX*16 temporary workspace. This space may
131* be overwritten in between calls to routines. WORK must be
132* the size given in LWORK.
133* On exit, WORK( 1 ) contains the minimal LWORK.
134*
135* LWORK (local input or global input) INTEGER
136* Size of user-input workspace WORK.
137* If LWORK is too small, the minimal acceptable size will be
138* returned in WORK(1) and an error code is returned. LWORK>=
139* 10*NPCOL+4*NRHS
140*
141* INFO (local output) INTEGER
142* = 0: successful exit
143* < 0: If the i-th argument is an array and the j-entry had
144* an illegal value, then INFO = -(i*100+j), if the i-th
145* argument is a scalar and had an illegal value, then
146* INFO = -i.
147*
148* =====================================================================
149*
150*
151* Restrictions
152* ============
153*
154* The following are restrictions on the input parameters. Some of these
155* are temporary and will be removed in future releases, while others
156* may reflect fundamental technical limitations.
157*
158* Non-cyclic restriction: VERY IMPORTANT!
159* P*NB>= mod(JA-1,NB)+N.
160* The mapping for matrices must be blocked, reflecting the nature
161* of the divide and conquer algorithm as a task-parallel algorithm.
162* This formula in words is: no processor may have more than one
163* chunk of the matrix.
164*
165* Blocksize cannot be too small:
166* If the matrix spans more than one processor, the following
167* restriction on NB, the size of each block on each processor,
168* must hold:
169* NB >= 2
170* The bulk of parallel computation is done on the matrix of size
171* O(NB) on each processor. If this is too small, divide and conquer
172* is a poor choice of algorithm.
173*
174* Submatrix reference:
175* JA = IB
176* Alignment restriction that prevents unnecessary communication.
177*
178*
179* =====================================================================
180*
181*
182* Notes
183* =====
184*
185* If the factorization routine and the solve routine are to be called
186* separately (to solve various sets of righthand sides using the same
187* coefficient matrix), the auxiliary space AF *must not be altered*
188* between calls to the factorization routine and the solve routine.
189*
190* The best algorithm for solving banded and tridiagonal linear systems
191* depends on a variety of parameters, especially the bandwidth.
192* Currently, only algorithms designed for the case N/P >> bw are
193* implemented. These go by many names, including Divide and Conquer,
194* Partitioning, domain decomposition-type, etc.
195* For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C
196* algorithms are the appropriate choice.
197*
198* Algorithm description: Divide and Conquer
199*
200* The Divide and Conqer algorithm assumes the matrix is narrowly
201* banded compared with the number of equations. In this situation,
202* it is best to distribute the input matrix A one-dimensionally,
203* with columns atomic and rows divided amongst the processes.
204* The basic algorithm divides the tridiagonal matrix up into
205* P pieces with one stored on each processor,
206* and then proceeds in 2 phases for the factorization or 3 for the
207* solution of a linear system.
208* 1) Local Phase:
209* The individual pieces are factored independently and in
210* parallel. These factors are applied to the matrix creating
211* fillin, which is stored in a non-inspectable way in auxiliary
212* space AF. Mathematically, this is equivalent to reordering
213* the matrix A as P A P^T and then factoring the principal
214* leading submatrix of size equal to the sum of the sizes of
215* the matrices factored on each processor. The factors of
216* these submatrices overwrite the corresponding parts of A
217* in memory.
218* 2) Reduced System Phase:
219* A small ((P-1)) system is formed representing
220* interaction of the larger blocks, and is stored (as are its
221* factors) in the space AF. A parallel Block Cyclic Reduction
222* algorithm is used. For a linear system, a parallel front solve
223* followed by an analagous backsolve, both using the structure
224* of the factored matrix, are performed.
225* 3) Backsubsitution Phase:
226* For a linear system, a local backsubstitution is performed on
227* each processor in parallel.
228*
229*
230* Descriptors
231* ===========
232*
233* Descriptors now have *types* and differ from ScaLAPACK 1.0.
234*
235* Note: tridiagonal codes can use either the old two dimensional
236* or new one-dimensional descriptors, though the processor grid in
237* both cases *must be one-dimensional*. We describe both types below.
238*
239* Each global data object is described by an associated description
240* vector. This vector stores the information required to establish
241* the mapping between an object element and its corresponding process
242* and memory location.
243*
244* Let A be a generic term for any 2D block cyclicly distributed array.
245* Such a global array has an associated description vector DESCA.
246* In the following comments, the character _ should be read as
247* "of the global array".
248*
249* NOTATION STORED IN EXPLANATION
250* --------------- -------------- --------------------------------------
251* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
252* DTYPE_A = 1.
253* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
254* the BLACS process grid A is distribu-
255* ted over. The context itself is glo-
256* bal, but the handle (the integer
257* value) may vary.
258* M_A (global) DESCA( M_ ) The number of rows in the global
259* array A.
260* N_A (global) DESCA( N_ ) The number of columns in the global
261* array A.
262* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
263* the rows of the array.
264* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
265* the columns of the array.
266* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
267* row of the array A is distributed.
268* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
269* first column of the array A is
270* distributed.
271* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
272* array. LLD_A >= MAX(1,LOCr(M_A)).
273*
274* Let K be the number of rows or columns of a distributed matrix,
275* and assume that its process grid has dimension p x q.
276* LOCr( K ) denotes the number of elements of K that a process
277* would receive if K were distributed over the p processes of its
278* process column.
279* Similarly, LOCc( K ) denotes the number of elements of K that a
280* process would receive if K were distributed over the q processes of
281* its process row.
282* The values of LOCr() and LOCc() may be determined via a call to the
283* ScaLAPACK tool function, NUMROC:
284* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
285* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
286* An upper bound for these quantities may be computed by:
287* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
288* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
289*
290*
291* One-dimensional descriptors:
292*
293* One-dimensional descriptors are a new addition to ScaLAPACK since
294* version 1.0. They simplify and shorten the descriptor for 1D
295* arrays.
296*
297* Since ScaLAPACK supports two-dimensional arrays as the fundamental
298* object, we allow 1D arrays to be distributed either over the
299* first dimension of the array (as if the grid were P-by-1) or the
300* 2nd dimension (as if the grid were 1-by-P). This choice is
301* indicated by the descriptor type (501 or 502)
302* as described below.
303* However, for tridiagonal matrices, since the objects being
304* distributed are the individual vectors storing the diagonals, we
305* have adopted the convention that both the P-by-1 descriptor and
306* the 1-by-P descriptor are allowed and are equivalent for
307* tridiagonal matrices. Thus, for tridiagonal matrices,
308* DTYPE_A = 501 or 502 can be used interchangeably
309* without any other change.
310* We require that the distributed vectors storing the diagonals of a
311* tridiagonal matrix be aligned with each other. Because of this, a
312* single descriptor, DESCA, serves to describe the distribution of
313* of all diagonals simultaneously.
314*
315* IMPORTANT NOTE: the actual BLACS grid represented by the
316* CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P
317* irrespective of which one-dimensional descriptor type
318* (501 or 502) is input.
319* This routine will interpret the grid properly either way.
320* ScaLAPACK routines *do not support intercontext operations* so that
321* the grid passed to a single ScaLAPACK routine *must be the same*
322* for all array descriptors passed to that routine.
323*
324* NOTE: In all cases where 1D descriptors are used, 2D descriptors
325* may also be used, since a one-dimensional array is a special case
326* of a two-dimensional array with one dimension of size unity.
327* The two-dimensional array used in this case *must* be of the
328* proper orientation:
329* If the appropriate one-dimensional descriptor is DTYPEA=501
330* (1 by P type), then the two dimensional descriptor must
331* have a CTXT value that refers to a 1 by P BLACS grid;
332* If the appropriate one-dimensional descriptor is DTYPEA=502
333* (P by 1 type), then the two dimensional descriptor must
334* have a CTXT value that refers to a P by 1 BLACS grid.
335*
336*
337* Summary of allowed descriptors, types, and BLACS grids:
338* DTYPE 501 502 1 1
339* BLACS grid 1xP or Px1 1xP or Px1 1xP Px1
340* -----------------------------------------------------
341* A OK OK OK NO
342* B NO OK NO OK
343*
344* Note that a consequence of this chart is that it is not possible
345* for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead
346* to opposite requirements for the orientation of the BLACS grid,
347* and as noted before, the *same* BLACS context must be used in
348* all descriptors in a single ScaLAPACK subroutine call.
349*
350* Let A be a generic term for any 1D block cyclicly distributed array.
351* Such a global array has an associated description vector DESCA.
352* In the following comments, the character _ should be read as
353* "of the global array".
354*
355* NOTATION STORED IN EXPLANATION
356* --------------- ---------- ------------------------------------------
357* DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids,
358* TYPE_A = 501: 1-by-P grid.
359* TYPE_A = 502: P-by-1 grid.
360* CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating
361* the BLACS process grid A is distribu-
362* ted over. The context itself is glo-
363* bal, but the handle (the integer
364* value) may vary.
365* N_A (global) DESCA( 3 ) The size of the array dimension being
366* distributed.
367* NB_A (global) DESCA( 4 ) The blocking factor used to distribute
368* the distributed dimension of the array.
369* SRC_A (global) DESCA( 5 ) The process row or column over which the
370* first row or column of the array
371* is distributed.
372* Ignored DESCA( 6 ) Ignored for tridiagonal matrices.
373* Reserved DESCA( 7 ) Reserved for future use.
374*
375*
376*
377* =====================================================================
378*
379* Code Developer: Andrew J. Cleary, University of Tennessee.
380* Current address: Lawrence Livermore National Labs.
381* This version released: August, 2001.
382*
383* =====================================================================
384*
385* ..
386* .. Parameters ..
387 DOUBLE PRECISION ONE, ZERO
388 parameter( one = 1.0d+0 )
389 parameter( zero = 0.0d+0 )
390 COMPLEX*16 CONE, CZERO
391 parameter( cone = ( 1.0d+0, 0.0d+0 ) )
392 parameter( czero = ( 0.0d+0, 0.0d+0 ) )
393 INTEGER INT_ONE
394 parameter( int_one = 1 )
395 INTEGER DESCMULT, BIGNUM
396 parameter(descmult = 100, bignum = descmult * descmult)
397 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
398 $ lld_, mb_, m_, nb_, n_, rsrc_
399 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
400 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
401 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
402* ..
403* .. Local Scalars ..
404 INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
405 $ idum1, idum2, idum3, ja_new, level_dist, llda,
406 $ lldb, mycol, myrow, my_num_cols, nb, np, npcol,
407 $ nprow, np_save, odd_size, part_offset,
408 $ part_size, return_code, store_m_b, store_n_a,
409 $ temp, work_size_min, work_u
410* ..
411* .. Local Arrays ..
412 INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
413 $ param_check( 16, 3 )
414* ..
415* .. External Subroutines ..
416 EXTERNAL blacs_get, blacs_gridexit, blacs_gridinfo,
418 $ zgerv2d, zgesd2d, zlamov, zmatadd, ztbtrs,
419 $ ztrmm, ztrtrs
420* ..
421* .. External Functions ..
422 LOGICAL LSAME
423 INTEGER NUMROC
424 COMPLEX*16 ZDOTC
425 EXTERNAL lsame, numroc, zdotc
426* ..
427* .. Intrinsic Functions ..
428 INTRINSIC ichar, min, mod
429* ..
430* .. Executable Statements ..
431*
432* Test the input parameters
433*
434 info = 0
435*
436* Convert descriptor into standard form for easy access to
437* parameters, check that grid is of right shape.
438*
439 desca_1xp( 1 ) = 501
440 descb_px1( 1 ) = 502
441*
442 temp = desca( dtype_ )
443 IF( temp .EQ. 502 ) THEN
444* Temporarily set the descriptor type to 1xP type
445 desca( dtype_ ) = 501
446 ENDIF
447*
448 CALL desc_convert( desca, desca_1xp, return_code )
449*
450 desca( dtype_ ) = temp
451*
452 IF( return_code .NE. 0) THEN
453 info = -( 9*100 + 2 )
454 ENDIF
455*
456 CALL desc_convert( descb, descb_px1, return_code )
457*
458 IF( return_code .NE. 0) THEN
459 info = -( 12*100 + 2 )
460 ENDIF
461*
462* Consistency checks for DESCA and DESCB.
463*
464* Context must be the same
465 IF( desca_1xp( 2 ) .NE. descb_px1( 2 ) ) THEN
466 info = -( 12*100 + 2 )
467 ENDIF
468*
469* These are alignment restrictions that may or may not be removed
470* in future releases. -Andy Cleary, April 14, 1996.
471*
472* Block sizes must be the same
473 IF( desca_1xp( 4 ) .NE. descb_px1( 4 ) ) THEN
474 info = -( 12*100 + 4 )
475 ENDIF
476*
477* Source processor must be the same
478*
479 IF( desca_1xp( 5 ) .NE. descb_px1( 5 ) ) THEN
480 info = -( 12*100 + 5 )
481 ENDIF
482*
483* Get values out of descriptor for use in code.
484*
485 ictxt = desca_1xp( 2 )
486 csrc = desca_1xp( 5 )
487 nb = desca_1xp( 4 )
488 llda = desca_1xp( 6 )
489 store_n_a = desca_1xp( 3 )
490 lldb = descb_px1( 6 )
491 store_m_b = descb_px1( 3 )
492*
493* Get grid parameters
494*
495*
496 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
497 np = nprow * npcol
498*
499*
500*
501 IF( lsame( uplo, 'U' ) ) THEN
502 idum1 = ichar( 'U' )
503 ELSE IF ( lsame( uplo, 'L' ) ) THEN
504 idum1 = ichar( 'L' )
505 ELSE
506 info = -1
507 END IF
508*
509 IF( lsame( trans, 'N' ) ) THEN
510 idum2 = ichar( 'N' )
511 ELSE IF ( lsame( trans, 'C' ) ) THEN
512 idum2 = ichar( 'C' )
513 ELSE
514 info = -2
515 END IF
516*
517 IF( lwork .LT. -1) THEN
518 info = -16
519 ELSE IF ( lwork .EQ. -1 ) THEN
520 idum3 = -1
521 ELSE
522 idum3 = 1
523 ENDIF
524*
525 IF( n .LT. 0 ) THEN
526 info = -3
527 ENDIF
528*
529 IF( n+ja-1 .GT. store_n_a ) THEN
530 info = -( 9*100 + 6 )
531 ENDIF
532*
533 IF( n+ib-1 .GT. store_m_b ) THEN
534 info = -( 12*100 + 3 )
535 ENDIF
536*
537 IF( lldb .LT. nb ) THEN
538 info = -( 12*100 + 6 )
539 ENDIF
540*
541 IF( nrhs .LT. 0 ) THEN
542 info = -4
543 ENDIF
544*
545* Current alignment restriction
546*
547 IF( ja .NE. ib) THEN
548 info = -8
549 ENDIF
550*
551* Argument checking that is specific to Divide & Conquer routine
552*
553 IF( nprow .NE. 1 ) THEN
554 info = -( 9*100+2 )
555 ENDIF
556*
557 IF( n .GT. np*nb-mod( ja-1, nb )) THEN
558 info = -( 3 )
559 CALL pxerbla( ictxt,
560 $ 'PZDTTRSV, D&C alg.: only 1 block per proc',
561 $ -info )
562 RETURN
563 ENDIF
564*
565 IF((ja+n-1.GT.nb) .AND. ( nb.LT.2*int_one )) THEN
566 info = -( 9*100+4 )
567 CALL pxerbla( ictxt,
568 $ 'PZDTTRSV, D&C alg.: NB too small',
569 $ -info )
570 RETURN
571 ENDIF
572*
573*
574 work_size_min =
575 $ int_one*nrhs
576*
577 work( 1 ) = work_size_min
578*
579 IF( lwork .LT. work_size_min ) THEN
580 IF( lwork .NE. -1 ) THEN
581 info = -16
582 CALL pxerbla( ictxt,
583 $ 'PZDTTRSV: worksize error',
584 $ -info )
585 ENDIF
586 RETURN
587 ENDIF
588*
589* Pack params and positions into arrays for global consistency check
590*
591 param_check( 16, 1 ) = descb(5)
592 param_check( 15, 1 ) = descb(4)
593 param_check( 14, 1 ) = descb(3)
594 param_check( 13, 1 ) = descb(2)
595 param_check( 12, 1 ) = descb(1)
596 param_check( 11, 1 ) = ib
597 param_check( 10, 1 ) = desca(5)
598 param_check( 9, 1 ) = desca(4)
599 param_check( 8, 1 ) = desca(3)
600 param_check( 7, 1 ) = desca(1)
601 param_check( 6, 1 ) = ja
602 param_check( 5, 1 ) = nrhs
603 param_check( 4, 1 ) = n
604 param_check( 3, 1 ) = idum3
605 param_check( 2, 1 ) = idum2
606 param_check( 1, 1 ) = idum1
607*
608 param_check( 16, 2 ) = 1205
609 param_check( 15, 2 ) = 1204
610 param_check( 14, 2 ) = 1203
611 param_check( 13, 2 ) = 1202
612 param_check( 12, 2 ) = 1201
613 param_check( 11, 2 ) = 11
614 param_check( 10, 2 ) = 905
615 param_check( 9, 2 ) = 904
616 param_check( 8, 2 ) = 903
617 param_check( 7, 2 ) = 901
618 param_check( 6, 2 ) = 8
619 param_check( 5, 2 ) = 4
620 param_check( 4, 2 ) = 3
621 param_check( 3, 2 ) = 16
622 param_check( 2, 2 ) = 2
623 param_check( 1, 2 ) = 1
624*
625* Want to find errors with MIN( ), so if no error, set it to a big
626* number. If there already is an error, multiply by the the
627* descriptor multiplier.
628*
629 IF( info.GE.0 ) THEN
630 info = bignum
631 ELSE IF( info.LT.-descmult ) THEN
632 info = -info
633 ELSE
634 info = -info * descmult
635 END IF
636*
637* Check consistency across processors
638*
639 CALL globchk( ictxt, 16, param_check, 16,
640 $ param_check( 1, 3 ), info )
641*
642* Prepare output: set info = 0 if no error, and divide by DESCMULT
643* if error is not in a descriptor entry.
644*
645 IF( info.EQ.bignum ) THEN
646 info = 0
647 ELSE IF( mod( info, descmult ) .EQ. 0 ) THEN
648 info = -info / descmult
649 ELSE
650 info = -info
651 END IF
652*
653 IF( info.LT.0 ) THEN
654 CALL pxerbla( ictxt, 'PZDTTRSV', -info )
655 RETURN
656 END IF
657*
658* Quick return if possible
659*
660 IF( n.EQ.0 )
661 $ RETURN
662*
663 IF( nrhs.EQ.0 )
664 $ RETURN
665*
666*
667* Adjust addressing into matrix space to properly get into
668* the beginning part of the relevant data
669*
670 part_offset = nb*( (ja-1)/(npcol*nb) )
671*
672 IF ( (mycol-csrc) .LT. (ja-part_offset-1)/nb ) THEN
673 part_offset = part_offset + nb
674 ENDIF
675*
676 IF ( mycol .LT. csrc ) THEN
677 part_offset = part_offset - nb
678 ENDIF
679*
680* Form a new BLACS grid (the "standard form" grid) with only procs
681* holding part of the matrix, of size 1xNP where NP is adjusted,
682* starting at csrc=0, with JA modified to reflect dropped procs.
683*
684* First processor to hold part of the matrix:
685*
686 first_proc = mod( ( ja-1 )/nb+csrc, npcol )
687*
688* Calculate new JA one while dropping off unused processors.
689*
690 ja_new = mod( ja-1, nb ) + 1
691*
692* Save and compute new value of NP
693*
694 np_save = np
695 np = ( ja_new+n-2 )/nb + 1
696*
697* Call utility routine that forms "standard-form" grid
698*
699 CALL reshape( ictxt, int_one, ictxt_new, int_one,
700 $ first_proc, int_one, np )
701*
702* Use new context from standard grid as context.
703*
704 ictxt_save = ictxt
705 ictxt = ictxt_new
706 desca_1xp( 2 ) = ictxt_new
707 descb_px1( 2 ) = ictxt_new
708*
709* Get information about new grid.
710*
711 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
712*
713* Drop out processors that do not have part of the matrix.
714*
715 IF( myrow .LT. 0 ) THEN
716 GOTO 1234
717 ENDIF
718*
719* ********************************
720* Values reused throughout routine
721*
722* User-input value of partition size
723*
724 part_size = nb
725*
726* Number of columns in each processor
727*
728 my_num_cols = numroc( n, part_size, mycol, 0, npcol )
729*
730* Offset in columns to beginning of main partition in each proc
731*
732 IF ( mycol .EQ. 0 ) THEN
733 part_offset = part_offset+mod( ja_new-1, part_size )
734 my_num_cols = my_num_cols - mod(ja_new-1, part_size )
735 ENDIF
736*
737* Size of main (or odd) partition in each processor
738*
739 odd_size = my_num_cols
740 IF ( mycol .LT. np-1 ) THEN
741 odd_size = odd_size - int_one
742 ENDIF
743*
744* Offset to workspace for Upper triangular factor
745*
746 work_u = int_one*odd_size + 3
747*
748*
749*
750* Begin main code
751*
752 IF ( lsame( uplo, 'L' ) ) THEN
753*
754 IF ( lsame( trans, 'N' ) ) THEN
755*
756* Frontsolve
757*
758*
759******************************************
760* Local computation phase
761******************************************
762*
763* Use main partition in each processor to solve locally
764*
765 CALL zdttrsv( uplo, 'N', odd_size, nrhs, dl( part_offset+2 ),
766 $ d( part_offset+1 ), du( part_offset+1 ),
767 $ b( part_offset+1 ), lldb, info )
768*
769*
770 IF ( mycol .LT. np-1 ) THEN
771* Use factorization of odd-even connection block to modify
772* locally stored portion of right hand side(s)
773*
774 CALL zaxpy( nrhs, -dl( part_offset+odd_size+1 ),
775 $ b( part_offset+odd_size ), lldb,
776 $ b( part_offset+odd_size+1 ), lldb )
777*
778 ENDIF
779*
780*
781 IF ( mycol .NE. 0 ) THEN
782* Use the "spike" fillin to calculate contribution to previous
783* processor's righthand-side.
784*
785 CALL zgemm( 'C', 'N', int_one, nrhs, odd_size, -cone,
786 $ af( 1 ), odd_size, b( part_offset+1 ), lldb,
787 $ czero, work( 1+int_one-int_one ), int_one )
788 ENDIF
789*
790*
791************************************************
792* Formation and solution of reduced system
793************************************************
794*
795*
796* Send modifications to prior processor's right hand sides
797*
798 IF( mycol .GT. 0) THEN
799*
800 CALL zgesd2d( ictxt, int_one, nrhs,
801 $ work( 1 ), int_one,
802 $ 0, mycol - 1 )
803*
804 ENDIF
805*
806* Receive modifications to processor's right hand sides
807*
808 IF( mycol .LT. npcol-1) THEN
809*
810 CALL zgerv2d( ictxt, int_one, nrhs,
811 $ work( 1 ), int_one,
812 $ 0, mycol + 1 )
813*
814* Combine contribution to locally stored right hand sides
815*
816 CALL zmatadd( int_one, nrhs, cone,
817 $ work( 1 ), int_one, cone,
818 $ b( part_offset+odd_size + 1 ), lldb )
819*
820 ENDIF
821*
822*
823* The last processor does not participate in the solution of the
824* reduced system, having sent its contribution already.
825 IF( mycol .EQ. npcol-1 ) THEN
826 GOTO 14
827 ENDIF
828*
829*
830* *************************************
831* Modification Loop
832*
833* The distance for sending and receiving for each level starts
834* at 1 for the first level.
835 level_dist = 1
836*
837* Do until this proc is needed to modify other procs' equations
838*
839 12 CONTINUE
840 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 11
841*
842* Receive and add contribution to righthand sides from left
843*
844 IF( mycol-level_dist .GE. 0 ) THEN
845*
846 CALL zgerv2d( ictxt, int_one, nrhs,
847 $ work( 1 ),
848 $ int_one, 0, mycol-level_dist )
849*
850 CALL zmatadd( int_one, nrhs, cone,
851 $ work( 1 ), int_one, cone,
852 $ b( part_offset+odd_size + 1 ), lldb )
853*
854 ENDIF
855*
856* Receive and add contribution to righthand sides from right
857*
858 IF( mycol+level_dist .LT. npcol-1 ) THEN
859*
860 CALL zgerv2d( ictxt, int_one, nrhs,
861 $ work( 1 ),
862 $ int_one, 0, mycol+level_dist )
863*
864 CALL zmatadd( int_one, nrhs, cone,
865 $ work( 1 ), int_one, cone,
866 $ b( part_offset+odd_size + 1 ), lldb )
867*
868 ENDIF
869*
870 level_dist = level_dist*2
871*
872 GOTO 12
873 11 CONTINUE
874* [End of GOTO Loop]
875*
876*
877*
878* *********************************
879* Calculate and use this proc's blocks to modify other procs
880*
881* Solve with diagonal block
882*
883 CALL ztbtrs( 'L', 'N', 'U', int_one, min( int_one, int_one-1 ),
884 $ nrhs, af( odd_size+2 ), int_one+1,
885 $ b( part_offset+odd_size+1 ), lldb, info )
886*
887 IF( info.NE.0 ) THEN
888 GO TO 1000
889 ENDIF
890*
891*
892*
893* *********
894 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
895*
896* Calculate contribution from this block to next diagonal block
897*
898 CALL zgemm( 'C', 'N', int_one, nrhs, int_one, -cone,
899 $ af( (odd_size)*int_one+1 ),
900 $ int_one,
901 $ b( part_offset+odd_size+1 ),
902 $ lldb, czero,
903 $ work( 1 ),
904 $ int_one )
905*
906* Send contribution to diagonal block's owning processor.
907*
908 CALL zgesd2d( ictxt, int_one, nrhs,
909 $ work( 1 ),
910 $ int_one, 0, mycol+level_dist )
911*
912 ENDIF
913* End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
914*
915* ************
916 IF( (mycol/level_dist .GT. 0 ).AND.
917 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
918*
919*
920* Use offdiagonal block to calculate modification to diag block
921* of processor to the left
922*
923 CALL zgemm( 'N', 'N', int_one, nrhs, int_one, -cone,
924 $ af( odd_size*int_one+2+1 ),
925 $ int_one,
926 $ b( part_offset+odd_size+1 ),
927 $ lldb, czero,
928 $ work( 1 ),
929 $ int_one )
930*
931* Send contribution to diagonal block's owning processor.
932*
933 CALL zgesd2d( ictxt, int_one, nrhs,
934 $ work( 1 ),
935 $ int_one, 0, mycol-level_dist )
936*
937 ENDIF
938* End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
939*
940 14 CONTINUE
941*
942 ELSE
943*
944******************** BACKSOLVE *************************************
945*
946********************************************************************
947* .. Begin reduced system phase of algorithm ..
948********************************************************************
949*
950*
951*
952* The last processor does not participate in the solution of the
953* reduced system and just waits to receive its solution.
954 IF( mycol .EQ. npcol-1 ) THEN
955 GOTO 24
956 ENDIF
957*
958* Determine number of steps in tree loop
959*
960 level_dist = 1
961 27 CONTINUE
962 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 26
963*
964 level_dist = level_dist*2
965*
966 GOTO 27
967 26 CONTINUE
968*
969*
970 IF( (mycol/level_dist .GT. 0 ).AND.
971 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
972*
973* Receive solution from processor to left
974*
975 CALL zgerv2d( ictxt, int_one, nrhs,
976 $ work( 1 ),
977 $ int_one, 0, mycol-level_dist )
978*
979*
980* Use offdiagonal block to calculate modification to RHS stored
981* on this processor
982*
983 CALL zgemm( 'C', 'N', int_one, nrhs, int_one, -cone,
984 $ af( odd_size*int_one+2+1 ),
985 $ int_one,
986 $ work( 1 ),
987 $ int_one, cone,
988 $ b( part_offset+odd_size+1 ),
989 $ lldb )
990 ENDIF
991* End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
992*
993*
994 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
995*
996* Receive solution from processor to right
997*
998 CALL zgerv2d( ictxt, int_one, nrhs,
999 $ work( 1 ),
1000 $ int_one, 0, mycol+level_dist )
1001*
1002* Calculate contribution from this block to next diagonal block
1003*
1004 CALL zgemm( 'N', 'N', int_one, nrhs, int_one, -cone,
1005 $ af( (odd_size)*int_one+1 ),
1006 $ int_one,
1007 $ work( 1 ),
1008 $ int_one, cone,
1009 $ b( part_offset+odd_size+1 ),
1010 $ lldb )
1011*
1012 ENDIF
1013* End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
1014*
1015*
1016* Solve with diagonal block
1017*
1018 CALL ztbtrs( 'L', 'C', 'U', int_one, min( int_one, int_one-1 ),
1019 $ nrhs, af( odd_size+2 ), int_one+1,
1020 $ b( part_offset+odd_size+1 ), lldb, info )
1021*
1022 IF( info.NE.0 ) THEN
1023 GO TO 1000
1024 ENDIF
1025*
1026*
1027*
1028***Modification Loop *******
1029*
1030 22 CONTINUE
1031 IF( level_dist .EQ. 1 ) GOTO 21
1032*
1033 level_dist = level_dist/2
1034*
1035* Send solution to the right
1036*
1037 IF( mycol+level_dist .LT. npcol-1 ) THEN
1038*
1039 CALL zgesd2d( ictxt, int_one, nrhs,
1040 $ b( part_offset+odd_size+1 ),
1041 $ lldb, 0, mycol+level_dist )
1042*
1043 ENDIF
1044*
1045* Send solution to left
1046*
1047 IF( mycol-level_dist .GE. 0 ) THEN
1048*
1049 CALL zgesd2d( ictxt, int_one, nrhs,
1050 $ b( part_offset+odd_size+1 ),
1051 $ lldb, 0, mycol-level_dist )
1052*
1053 ENDIF
1054*
1055 GOTO 22
1056 21 CONTINUE
1057* [End of GOTO Loop]
1058*
1059 24 CONTINUE
1060* [Processor npcol - 1 jumped to here to await next stage]
1061*
1062*******************************
1063* Reduced system has been solved, communicate solutions to nearest
1064* neighbors in preparation for local computation phase.
1065*
1066*
1067* Send elements of solution to next proc
1068*
1069 IF( mycol .LT. npcol-1) THEN
1070*
1071 CALL zgesd2d( ictxt, int_one, nrhs,
1072 $ b( part_offset+odd_size+1 ), lldb,
1073 $ 0, mycol +1 )
1074*
1075 ENDIF
1076*
1077* Receive modifications to processor's right hand sides
1078*
1079 IF( mycol .GT. 0) THEN
1080*
1081 CALL zgerv2d( ictxt, int_one, nrhs,
1082 $ work( 1 ), int_one,
1083 $ 0, mycol - 1 )
1084*
1085 ENDIF
1086*
1087*
1088*
1089**********************************************
1090* Local computation phase
1091**********************************************
1092*
1093 IF ( mycol .NE. 0 ) THEN
1094* Use the "spike" fillin to calculate contribution from previous
1095* processor's solution.
1096*
1097 CALL zgemm( 'N', 'N', odd_size, nrhs, int_one, -cone, af( 1 ),
1098 $ odd_size, work( 1+int_one-int_one ), int_one,
1099 $ cone, b( part_offset+1 ), lldb )
1100*
1101 ENDIF
1102*
1103*
1104 IF ( mycol .LT. np-1 ) THEN
1105* Use factorization of odd-even connection block to modify
1106* locally stored portion of right hand side(s)
1107*
1108 CALL zaxpy( nrhs, -dconjg( dl( part_offset+odd_size+1 ) ),
1109 $ b( part_offset+odd_size+1 ), lldb,
1110 $ b( part_offset+odd_size ), lldb )
1111*
1112 ENDIF
1113*
1114* Use main partition in each processor to solve locally
1115*
1116 CALL zdttrsv( uplo, 'C', odd_size, nrhs, dl( part_offset+2 ),
1117 $ d( part_offset+1 ), du( part_offset+1 ),
1118 $ b( part_offset+1 ), lldb, info )
1119*
1120 ENDIF
1121* End of "IF( LSAME( TRANS, 'N' ) )"...
1122*
1123*
1124 ELSE
1125***************************************************************
1126* CASE UPLO = 'U' *
1127***************************************************************
1128 IF ( lsame( trans, 'C' ) ) THEN
1129*
1130* Frontsolve
1131*
1132*
1133******************************************
1134* Local computation phase
1135******************************************
1136*
1137* Use main partition in each processor to solve locally
1138*
1139 CALL zdttrsv( uplo, 'C', odd_size, nrhs, dl( part_offset+2 ),
1140 $ d( part_offset+1 ), du( part_offset+1 ),
1141 $ b( part_offset+1 ), lldb, info )
1142*
1143*
1144 IF ( mycol .LT. np-1 ) THEN
1145* Use factorization of odd-even connection block to modify
1146* locally stored portion of right hand side(s)
1147*
1148 CALL zaxpy( nrhs, -dconjg( du( part_offset+odd_size ) ),
1149 $ b( part_offset+odd_size ), lldb,
1150 $ b( part_offset+odd_size+1 ), lldb )
1151*
1152 ENDIF
1153*
1154*
1155 IF ( mycol .NE. 0 ) THEN
1156* Use the "spike" fillin to calculate contribution to previous
1157* processor's righthand-side.
1158*
1159 CALL zgemm( 'C', 'N', int_one, nrhs, odd_size, -cone,
1160 $ af( work_u+1 ), odd_size, b( part_offset+1 ),
1161 $ lldb, czero, work( 1+int_one-int_one ),
1162 $ int_one )
1163 ENDIF
1164*
1165*
1166************************************************
1167* Formation and solution of reduced system
1168************************************************
1169*
1170*
1171* Send modifications to prior processor's right hand sides
1172*
1173 IF( mycol .GT. 0) THEN
1174*
1175 CALL zgesd2d( ictxt, int_one, nrhs,
1176 $ work( 1 ), int_one,
1177 $ 0, mycol - 1 )
1178*
1179 ENDIF
1180*
1181* Receive modifications to processor's right hand sides
1182*
1183 IF( mycol .LT. npcol-1) THEN
1184*
1185 CALL zgerv2d( ictxt, int_one, nrhs,
1186 $ work( 1 ), int_one,
1187 $ 0, mycol + 1 )
1188*
1189* Combine contribution to locally stored right hand sides
1190*
1191 CALL zmatadd( int_one, nrhs, cone,
1192 $ work( 1 ), int_one, cone,
1193 $ b( part_offset+odd_size + 1 ), lldb )
1194*
1195 ENDIF
1196*
1197*
1198* The last processor does not participate in the solution of the
1199* reduced system, having sent its contribution already.
1200 IF( mycol .EQ. npcol-1 ) THEN
1201 GOTO 44
1202 ENDIF
1203*
1204*
1205* *************************************
1206* Modification Loop
1207*
1208* The distance for sending and receiving for each level starts
1209* at 1 for the first level.
1210 level_dist = 1
1211*
1212* Do until this proc is needed to modify other procs' equations
1213*
1214 42 CONTINUE
1215 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 41
1216*
1217* Receive and add contribution to righthand sides from left
1218*
1219 IF( mycol-level_dist .GE. 0 ) THEN
1220*
1221 CALL zgerv2d( ictxt, int_one, nrhs,
1222 $ work( 1 ),
1223 $ int_one, 0, mycol-level_dist )
1224*
1225 CALL zmatadd( int_one, nrhs, cone,
1226 $ work( 1 ), int_one, cone,
1227 $ b( part_offset+odd_size + 1 ), lldb )
1228*
1229 ENDIF
1230*
1231* Receive and add contribution to righthand sides from right
1232*
1233 IF( mycol+level_dist .LT. npcol-1 ) THEN
1234*
1235 CALL zgerv2d( ictxt, int_one, nrhs,
1236 $ work( 1 ),
1237 $ int_one, 0, mycol+level_dist )
1238*
1239 CALL zmatadd( int_one, nrhs, cone,
1240 $ work( 1 ), int_one, cone,
1241 $ b( part_offset+odd_size + 1 ), lldb )
1242*
1243 ENDIF
1244*
1245 level_dist = level_dist*2
1246*
1247 GOTO 42
1248 41 CONTINUE
1249* [End of GOTO Loop]
1250*
1251*
1252*
1253* *********************************
1254* Calculate and use this proc's blocks to modify other procs
1255*
1256* Solve with diagonal block
1257*
1258 CALL ztbtrs( 'U', 'C', 'N', int_one, min( int_one, int_one-1 ),
1259 $ nrhs, af( odd_size+2 ), int_one+1,
1260 $ b( part_offset+odd_size+1 ), lldb, info )
1261*
1262 IF( info.NE.0 ) THEN
1263 GO TO 1000
1264 ENDIF
1265*
1266*
1267*
1268* *********
1269 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
1270*
1271* Calculate contribution from this block to next diagonal block
1272*
1273 CALL zgemm( 'C', 'N', int_one, nrhs, int_one, -cone,
1274 $ af( work_u+(odd_size)*int_one+1 ),
1275 $ int_one,
1276 $ b( part_offset+odd_size+1 ),
1277 $ lldb, czero,
1278 $ work( 1 ),
1279 $ int_one )
1280*
1281* Send contribution to diagonal block's owning processor.
1282*
1283 CALL zgesd2d( ictxt, int_one, nrhs,
1284 $ work( 1 ),
1285 $ int_one, 0, mycol+level_dist )
1286*
1287 ENDIF
1288* End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
1289*
1290* ************
1291 IF( (mycol/level_dist .GT. 0 ).AND.
1292 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
1293*
1294*
1295* Use offdiagonal block to calculate modification to diag block
1296* of processor to the left
1297*
1298 CALL zgemm( 'N', 'N', int_one, nrhs, int_one, -cone,
1299 $ af( work_u+odd_size*int_one+2+1 ),
1300 $ int_one,
1301 $ b( part_offset+odd_size+1 ),
1302 $ lldb, czero,
1303 $ work( 1 ),
1304 $ int_one )
1305*
1306* Send contribution to diagonal block's owning processor.
1307*
1308 CALL zgesd2d( ictxt, int_one, nrhs,
1309 $ work( 1 ),
1310 $ int_one, 0, mycol-level_dist )
1311*
1312 ENDIF
1313* End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
1314*
1315 44 CONTINUE
1316*
1317 ELSE
1318*
1319******************** BACKSOLVE *************************************
1320*
1321********************************************************************
1322* .. Begin reduced system phase of algorithm ..
1323********************************************************************
1324*
1325*
1326*
1327* The last processor does not participate in the solution of the
1328* reduced system and just waits to receive its solution.
1329 IF( mycol .EQ. npcol-1 ) THEN
1330 GOTO 54
1331 ENDIF
1332*
1333* Determine number of steps in tree loop
1334*
1335 level_dist = 1
1336 57 CONTINUE
1337 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 56
1338*
1339 level_dist = level_dist*2
1340*
1341 GOTO 57
1342 56 CONTINUE
1343*
1344*
1345 IF( (mycol/level_dist .GT. 0 ).AND.
1346 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
1347*
1348* Receive solution from processor to left
1349*
1350 CALL zgerv2d( ictxt, int_one, nrhs,
1351 $ work( 1 ),
1352 $ int_one, 0, mycol-level_dist )
1353*
1354*
1355* Use offdiagonal block to calculate modification to RHS stored
1356* on this processor
1357*
1358 CALL zgemm( 'C', 'N', int_one, nrhs, int_one, -cone,
1359 $ af( work_u+odd_size*int_one+2+1 ),
1360 $ int_one,
1361 $ work( 1 ),
1362 $ int_one, cone,
1363 $ b( part_offset+odd_size+1 ),
1364 $ lldb )
1365 ENDIF
1366* End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
1367*
1368*
1369 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
1370*
1371* Receive solution from processor to right
1372*
1373 CALL zgerv2d( ictxt, int_one, nrhs,
1374 $ work( 1 ),
1375 $ int_one, 0, mycol+level_dist )
1376*
1377* Calculate contribution from this block to next diagonal block
1378*
1379 CALL zgemm( 'N', 'N', int_one, nrhs, int_one, -cone,
1380 $ af( work_u+(odd_size)*int_one+1 ),
1381 $ int_one,
1382 $ work( 1 ),
1383 $ int_one, cone,
1384 $ b( part_offset+odd_size+1 ),
1385 $ lldb )
1386*
1387 ENDIF
1388* End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
1389*
1390*
1391* Solve with diagonal block
1392*
1393 CALL ztbtrs( 'U', 'N', 'N', int_one, min( int_one, int_one-1 ),
1394 $ nrhs, af( odd_size+2 ), int_one+1,
1395 $ b( part_offset+odd_size+1 ), lldb, info )
1396*
1397 IF( info.NE.0 ) THEN
1398 GO TO 1000
1399 ENDIF
1400*
1401*
1402*
1403***Modification Loop *******
1404*
1405 52 CONTINUE
1406 IF( level_dist .EQ. 1 ) GOTO 51
1407*
1408 level_dist = level_dist/2
1409*
1410* Send solution to the right
1411*
1412 IF( mycol+level_dist .LT. npcol-1 ) THEN
1413*
1414 CALL zgesd2d( ictxt, int_one, nrhs,
1415 $ b( part_offset+odd_size+1 ),
1416 $ lldb, 0, mycol+level_dist )
1417*
1418 ENDIF
1419*
1420* Send solution to left
1421*
1422 IF( mycol-level_dist .GE. 0 ) THEN
1423*
1424 CALL zgesd2d( ictxt, int_one, nrhs,
1425 $ b( part_offset+odd_size+1 ),
1426 $ lldb, 0, mycol-level_dist )
1427*
1428 ENDIF
1429*
1430 GOTO 52
1431 51 CONTINUE
1432* [End of GOTO Loop]
1433*
1434 54 CONTINUE
1435* [Processor npcol - 1 jumped to here to await next stage]
1436*
1437*******************************
1438* Reduced system has been solved, communicate solutions to nearest
1439* neighbors in preparation for local computation phase.
1440*
1441*
1442* Send elements of solution to next proc
1443*
1444 IF( mycol .LT. npcol-1) THEN
1445*
1446 CALL zgesd2d( ictxt, int_one, nrhs,
1447 $ b( part_offset+odd_size+1 ), lldb,
1448 $ 0, mycol +1 )
1449*
1450 ENDIF
1451*
1452* Receive modifications to processor's right hand sides
1453*
1454 IF( mycol .GT. 0) THEN
1455*
1456 CALL zgerv2d( ictxt, int_one, nrhs,
1457 $ work( 1 ), int_one,
1458 $ 0, mycol - 1 )
1459*
1460 ENDIF
1461*
1462*
1463*
1464**********************************************
1465* Local computation phase
1466**********************************************
1467*
1468 IF ( mycol .NE. 0 ) THEN
1469* Use the "spike" fillin to calculate contribution from previous
1470* processor's solution.
1471*
1472 CALL zgemm( 'N', 'N', odd_size, nrhs, int_one, -cone,
1473 $ af( work_u+1 ), odd_size,
1474 $ work( 1+int_one-int_one ), int_one, cone,
1475 $ b( part_offset+1 ), lldb )
1476*
1477 ENDIF
1478*
1479*
1480 IF ( mycol .LT. np-1 ) THEN
1481* Use factorization of odd-even connection block to modify
1482* locally stored portion of right hand side(s)
1483*
1484 CALL zaxpy( nrhs, -( du( part_offset+odd_size ) ),
1485 $ b( part_offset+odd_size+1 ), lldb,
1486 $ b( part_offset+odd_size ), lldb )
1487*
1488 ENDIF
1489*
1490* Use main partition in each processor to solve locally
1491*
1492 CALL zdttrsv( uplo, 'N', odd_size, nrhs, du( part_offset+2 ),
1493 $ d( part_offset+1 ), du( part_offset+1 ),
1494 $ b( part_offset+1 ), lldb, info )
1495*
1496 ENDIF
1497* End of "IF( LSAME( TRANS, 'N' ) )"...
1498*
1499*
1500 ENDIF
1501* End of "IF( LSAME( UPLO, 'L' ) )"...
1502 1000 CONTINUE
1503*
1504*
1505* Free BLACS space used to hold standard-form grid.
1506*
1507 IF( ictxt_save .NE. ictxt_new ) THEN
1508 CALL blacs_gridexit( ictxt_new )
1509 ENDIF
1510*
1511 1234 CONTINUE
1512*
1513* Restore saved input parameters
1514*
1515 ictxt = ictxt_save
1516 np = np_save
1517*
1518* Output minimum worksize
1519*
1520 work( 1 ) = work_size_min
1521*
1522*
1523 RETURN
1524*
1525* End of PZDTTRSV
1526*
1527 END
subroutine desc_convert(desc_in, desc_out, info)
Definition desc_convert.f:2
#define min(A, B)
Definition pcgemr.c:181
subroutine globchk(ictxt, n, x, ldx, iwork, info)
Definition pchkxmat.f:403
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
subroutine pzdttrsv(uplo, trans, n, nrhs, dl, d, du, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition pzdttrsv.f:3
void reshape(Int *context_in, Int *major_in, Int *context_out, Int *major_out, Int *first_proc, Int *nprow_new, Int *npcol_new)
Definition reshape.c:80
subroutine zdttrsv(uplo, trans, n, nrhs, dl, d, du, b, ldb, info)
Definition zdttrsv.f:3
subroutine zmatadd(m, n, alpha, a, lda, beta, c, ldc)
Definition zmatadd.f:2