ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pzdttrsv.f
Go to the documentation of this file.
1  SUBROUTINE pzdttrsv( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA,
2  $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO )
3 *
4 * -- ScaLAPACK routine (version 2.0.2) --
5 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6 * May 1 2012
7 *
8 * .. Scalar Arguments ..
9  CHARACTER TRANS, UPLO
10  INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCA( * ), DESCB( * )
14  COMPLEX*16 AF( * ), B( * ), D( * ), DL( * ), DU( * ),
15  $ work( * )
16 * ..
17 *
18 *
19 * Purpose
20 * =======
21 *
22 * PZDTTRSV solves a tridiagonal triangular system of linear equations
23 *
24 * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS)
25 * or
26 * A(1:N, JA:JA+N-1)^H * X = B(IB:IB+N-1, 1:NRHS)
27 *
28 * where A(1:N, JA:JA+N-1) is a tridiagonal
29 * triangular matrix factor produced by the
30 * Gaussian elimination code PZ@(dom_pre)TTRF
31 * and is stored in A(1:N,JA:JA+N-1) and AF.
32 * The matrix stored in A(1:N, JA:JA+N-1) is either
33 * upper or lower triangular according to UPLO,
34 * and the choice of solving A(1:N, JA:JA+N-1) or A(1:N, JA:JA+N-1)^H
35 * is dictated by the user by the parameter TRANS.
36 *
37 * Routine PZDTTRF MUST be called first.
38 *
39 * =====================================================================
40 *
41 * Arguments
42 * =========
43 *
44 * UPLO (global input) CHARACTER
45 * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored;
46 * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored.
47 *
48 * TRANS (global input) CHARACTER
49 * = 'N': Solve with A(1:N, JA:JA+N-1);
50 * = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) );
51 *
52 * N (global input) INTEGER
53 * The number of rows and columns to be operated on, i.e. the
54 * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0.
55 *
56 * NRHS (global input) INTEGER
57 * The number of right hand sides, i.e., the number of columns
58 * of the distributed submatrix B(IB:IB+N-1, 1:NRHS).
59 * NRHS >= 0.
60 *
61 * DL (local input/local output) COMPLEX*16 pointer to local
62 * part of global vector storing the lower diagonal of the
63 * matrix. Globally, DL(1) is not referenced, and DL must be
64 * aligned with D.
65 * Must be of size >= DESCA( NB_ ).
66 * On exit, this array contains information containing the
67 * factors of the matrix.
68 *
69 * D (local input/local output) COMPLEX*16 pointer to local
70 * part of global vector storing the main diagonal of the
71 * matrix.
72 * On exit, this array contains information containing the
73 * factors of the matrix.
74 * Must be of size >= DESCA( NB_ ).
75 *
76 * DU (local input/local output) COMPLEX*16 pointer to local
77 * part of global vector storing the upper diagonal of the
78 * matrix. Globally, DU(n) is not referenced, and DU must be
79 * aligned with D.
80 * On exit, this array contains information containing the
81 * factors of the matrix.
82 * Must be of size >= DESCA( NB_ ).
83 *
84 * JA (global input) INTEGER
85 * The index in the global array A that points to the start of
86 * the matrix to be operated on (which may be either all of A
87 * or a submatrix of A).
88 *
89 * DESCA (global and local input) INTEGER array of dimension DLEN.
90 * if 1D type (DTYPE_A=501 or 502), DLEN >= 7;
91 * if 2D type (DTYPE_A=1), DLEN >= 9.
92 * The array descriptor for the distributed matrix A.
93 * Contains information of mapping of A to memory. Please
94 * see NOTES below for full description and options.
95 *
96 * B (local input/local output) COMPLEX*16 pointer into
97 * local memory to an array of local lead dimension lld_b>=NB.
98 * On entry, this array contains the
99 * the local pieces of the right hand sides
100 * B(IB:IB+N-1, 1:NRHS).
101 * On exit, this contains the local piece of the solutions
102 * distributed matrix X.
103 *
104 * IB (global input) INTEGER
105 * The row index in the global array B that points to the first
106 * row of the matrix to be operated on (which may be either
107 * all of B or a submatrix of B).
108 *
109 * DESCB (global and local input) INTEGER array of dimension DLEN.
110 * if 1D type (DTYPE_B=502), DLEN >=7;
111 * if 2D type (DTYPE_B=1), DLEN >= 9.
112 * The array descriptor for the distributed matrix B.
113 * Contains information of mapping of B to memory. Please
114 * see NOTES below for full description and options.
115 *
116 * AF (local output) COMPLEX*16 array, dimension LAF.
117 * Auxiliary Fillin Space.
118 * Fillin is created during the factorization routine
119 * PZDTTRF and this is stored in AF. If a linear system
120 * is to be solved using PZDTTRS after the factorization
121 * routine, AF *must not be altered* after the factorization.
122 *
123 * LAF (local input) INTEGER
124 * Size of user-input Auxiliary Fillin space AF. Must be >=
125 * 2*(NB+2)
126 * If LAF is not large enough, an error code will be returned
127 * and the minimum acceptable size will be returned in AF( 1 )
128 *
129 * WORK (local workspace/local output)
130 * COMPLEX*16 temporary workspace. This space may
131 * be overwritten in between calls to routines. WORK must be
132 * the size given in LWORK.
133 * On exit, WORK( 1 ) contains the minimal LWORK.
134 *
135 * LWORK (local input or global input) INTEGER
136 * Size of user-input workspace WORK.
137 * If LWORK is too small, the minimal acceptable size will be
138 * returned in WORK(1) and an error code is returned. LWORK>=
139 * 10*NPCOL+4*NRHS
140 *
141 * INFO (local output) INTEGER
142 * = 0: successful exit
143 * < 0: If the i-th argument is an array and the j-entry had
144 * an illegal value, then INFO = -(i*100+j), if the i-th
145 * argument is a scalar and had an illegal value, then
146 * INFO = -i.
147 *
148 * =====================================================================
149 *
150 *
151 * Restrictions
152 * ============
153 *
154 * The following are restrictions on the input parameters. Some of these
155 * are temporary and will be removed in future releases, while others
156 * may reflect fundamental technical limitations.
157 *
158 * Non-cyclic restriction: VERY IMPORTANT!
159 * P*NB>= mod(JA-1,NB)+N.
160 * The mapping for matrices must be blocked, reflecting the nature
161 * of the divide and conquer algorithm as a task-parallel algorithm.
162 * This formula in words is: no processor may have more than one
163 * chunk of the matrix.
164 *
165 * Blocksize cannot be too small:
166 * If the matrix spans more than one processor, the following
167 * restriction on NB, the size of each block on each processor,
168 * must hold:
169 * NB >= 2
170 * The bulk of parallel computation is done on the matrix of size
171 * O(NB) on each processor. If this is too small, divide and conquer
172 * is a poor choice of algorithm.
173 *
174 * Submatrix reference:
175 * JA = IB
176 * Alignment restriction that prevents unnecessary communication.
177 *
178 *
179 * =====================================================================
180 *
181 *
182 * Notes
183 * =====
184 *
185 * If the factorization routine and the solve routine are to be called
186 * separately (to solve various sets of righthand sides using the same
187 * coefficient matrix), the auxiliary space AF *must not be altered*
188 * between calls to the factorization routine and the solve routine.
189 *
190 * The best algorithm for solving banded and tridiagonal linear systems
191 * depends on a variety of parameters, especially the bandwidth.
192 * Currently, only algorithms designed for the case N/P >> bw are
193 * implemented. These go by many names, including Divide and Conquer,
194 * Partitioning, domain decomposition-type, etc.
195 * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C
196 * algorithms are the appropriate choice.
197 *
198 * Algorithm description: Divide and Conquer
199 *
200 * The Divide and Conqer algorithm assumes the matrix is narrowly
201 * banded compared with the number of equations. In this situation,
202 * it is best to distribute the input matrix A one-dimensionally,
203 * with columns atomic and rows divided amongst the processes.
204 * The basic algorithm divides the tridiagonal matrix up into
205 * P pieces with one stored on each processor,
206 * and then proceeds in 2 phases for the factorization or 3 for the
207 * solution of a linear system.
208 * 1) Local Phase:
209 * The individual pieces are factored independently and in
210 * parallel. These factors are applied to the matrix creating
211 * fillin, which is stored in a non-inspectable way in auxiliary
212 * space AF. Mathematically, this is equivalent to reordering
213 * the matrix A as P A P^T and then factoring the principal
214 * leading submatrix of size equal to the sum of the sizes of
215 * the matrices factored on each processor. The factors of
216 * these submatrices overwrite the corresponding parts of A
217 * in memory.
218 * 2) Reduced System Phase:
219 * A small ((P-1)) system is formed representing
220 * interaction of the larger blocks, and is stored (as are its
221 * factors) in the space AF. A parallel Block Cyclic Reduction
222 * algorithm is used. For a linear system, a parallel front solve
223 * followed by an analagous backsolve, both using the structure
224 * of the factored matrix, are performed.
225 * 3) Backsubsitution Phase:
226 * For a linear system, a local backsubstitution is performed on
227 * each processor in parallel.
228 *
229 *
230 * Descriptors
231 * ===========
232 *
233 * Descriptors now have *types* and differ from ScaLAPACK 1.0.
234 *
235 * Note: tridiagonal codes can use either the old two dimensional
236 * or new one-dimensional descriptors, though the processor grid in
237 * both cases *must be one-dimensional*. We describe both types below.
238 *
239 * Each global data object is described by an associated description
240 * vector. This vector stores the information required to establish
241 * the mapping between an object element and its corresponding process
242 * and memory location.
243 *
244 * Let A be a generic term for any 2D block cyclicly distributed array.
245 * Such a global array has an associated description vector DESCA.
246 * In the following comments, the character _ should be read as
247 * "of the global array".
248 *
249 * NOTATION STORED IN EXPLANATION
250 * --------------- -------------- --------------------------------------
251 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
252 * DTYPE_A = 1.
253 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
254 * the BLACS process grid A is distribu-
255 * ted over. The context itself is glo-
256 * bal, but the handle (the integer
257 * value) may vary.
258 * M_A (global) DESCA( M_ ) The number of rows in the global
259 * array A.
260 * N_A (global) DESCA( N_ ) The number of columns in the global
261 * array A.
262 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
263 * the rows of the array.
264 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
265 * the columns of the array.
266 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
267 * row of the array A is distributed.
268 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
269 * first column of the array A is
270 * distributed.
271 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
272 * array. LLD_A >= MAX(1,LOCr(M_A)).
273 *
274 * Let K be the number of rows or columns of a distributed matrix,
275 * and assume that its process grid has dimension p x q.
276 * LOCr( K ) denotes the number of elements of K that a process
277 * would receive if K were distributed over the p processes of its
278 * process column.
279 * Similarly, LOCc( K ) denotes the number of elements of K that a
280 * process would receive if K were distributed over the q processes of
281 * its process row.
282 * The values of LOCr() and LOCc() may be determined via a call to the
283 * ScaLAPACK tool function, NUMROC:
284 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
285 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
286 * An upper bound for these quantities may be computed by:
287 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
288 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
289 *
290 *
291 * One-dimensional descriptors:
292 *
293 * One-dimensional descriptors are a new addition to ScaLAPACK since
294 * version 1.0. They simplify and shorten the descriptor for 1D
295 * arrays.
296 *
297 * Since ScaLAPACK supports two-dimensional arrays as the fundamental
298 * object, we allow 1D arrays to be distributed either over the
299 * first dimension of the array (as if the grid were P-by-1) or the
300 * 2nd dimension (as if the grid were 1-by-P). This choice is
301 * indicated by the descriptor type (501 or 502)
302 * as described below.
303 * However, for tridiagonal matrices, since the objects being
304 * distributed are the individual vectors storing the diagonals, we
305 * have adopted the convention that both the P-by-1 descriptor and
306 * the 1-by-P descriptor are allowed and are equivalent for
307 * tridiagonal matrices. Thus, for tridiagonal matrices,
308 * DTYPE_A = 501 or 502 can be used interchangeably
309 * without any other change.
310 * We require that the distributed vectors storing the diagonals of a
311 * tridiagonal matrix be aligned with each other. Because of this, a
312 * single descriptor, DESCA, serves to describe the distribution of
313 * of all diagonals simultaneously.
314 *
315 * IMPORTANT NOTE: the actual BLACS grid represented by the
316 * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P
317 * irrespective of which one-dimensional descriptor type
318 * (501 or 502) is input.
319 * This routine will interpret the grid properly either way.
320 * ScaLAPACK routines *do not support intercontext operations* so that
321 * the grid passed to a single ScaLAPACK routine *must be the same*
322 * for all array descriptors passed to that routine.
323 *
324 * NOTE: In all cases where 1D descriptors are used, 2D descriptors
325 * may also be used, since a one-dimensional array is a special case
326 * of a two-dimensional array with one dimension of size unity.
327 * The two-dimensional array used in this case *must* be of the
328 * proper orientation:
329 * If the appropriate one-dimensional descriptor is DTYPEA=501
330 * (1 by P type), then the two dimensional descriptor must
331 * have a CTXT value that refers to a 1 by P BLACS grid;
332 * If the appropriate one-dimensional descriptor is DTYPEA=502
333 * (P by 1 type), then the two dimensional descriptor must
334 * have a CTXT value that refers to a P by 1 BLACS grid.
335 *
336 *
337 * Summary of allowed descriptors, types, and BLACS grids:
338 * DTYPE 501 502 1 1
339 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1
340 * -----------------------------------------------------
341 * A OK OK OK NO
342 * B NO OK NO OK
343 *
344 * Note that a consequence of this chart is that it is not possible
345 * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead
346 * to opposite requirements for the orientation of the BLACS grid,
347 * and as noted before, the *same* BLACS context must be used in
348 * all descriptors in a single ScaLAPACK subroutine call.
349 *
350 * Let A be a generic term for any 1D block cyclicly distributed array.
351 * Such a global array has an associated description vector DESCA.
352 * In the following comments, the character _ should be read as
353 * "of the global array".
354 *
355 * NOTATION STORED IN EXPLANATION
356 * --------------- ---------- ------------------------------------------
357 * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids,
358 * TYPE_A = 501: 1-by-P grid.
359 * TYPE_A = 502: P-by-1 grid.
360 * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating
361 * the BLACS process grid A is distribu-
362 * ted over. The context itself is glo-
363 * bal, but the handle (the integer
364 * value) may vary.
365 * N_A (global) DESCA( 3 ) The size of the array dimension being
366 * distributed.
367 * NB_A (global) DESCA( 4 ) The blocking factor used to distribute
368 * the distributed dimension of the array.
369 * SRC_A (global) DESCA( 5 ) The process row or column over which the
370 * first row or column of the array
371 * is distributed.
372 * Ignored DESCA( 6 ) Ignored for tridiagonal matrices.
373 * Reserved DESCA( 7 ) Reserved for future use.
374 *
375 *
376 *
377 * =====================================================================
378 *
379 * Code Developer: Andrew J. Cleary, University of Tennessee.
380 * Current address: Lawrence Livermore National Labs.
381 * This version released: August, 2001.
382 *
383 * =====================================================================
384 *
385 * ..
386 * .. Parameters ..
387  DOUBLE PRECISION ONE, ZERO
388  parameter( one = 1.0d+0 )
389  parameter( zero = 0.0d+0 )
390  COMPLEX*16 CONE, CZERO
391  parameter( cone = ( 1.0d+0, 0.0d+0 ) )
392  parameter( czero = ( 0.0d+0, 0.0d+0 ) )
393  INTEGER INT_ONE
394  parameter( int_one = 1 )
395  INTEGER DESCMULT, BIGNUM
396  parameter(descmult = 100, bignum = descmult * descmult)
397  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
398  $ lld_, mb_, m_, nb_, n_, rsrc_
399  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
400  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
401  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
402 * ..
403 * .. Local Scalars ..
404  INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
405  $ idum1, idum2, idum3, ja_new, level_dist, llda,
406  $ lldb, mycol, myrow, my_num_cols, nb, np, npcol,
407  $ nprow, np_save, odd_size, part_offset,
408  $ part_size, return_code, store_m_b, store_n_a,
409  $ temp, work_size_min, work_u
410 * ..
411 * .. Local Arrays ..
412  INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
413  $ param_check( 16, 3 )
414 * ..
415 * .. External Subroutines ..
416  EXTERNAL blacs_get, blacs_gridexit, blacs_gridinfo,
417  $ desc_convert, globchk, pxerbla, reshape, zgemm,
418  $ zgerv2d, zgesd2d, zlamov, zmatadd, ztbtrs,
419  $ ztrmm, ztrtrs
420 * ..
421 * .. External Functions ..
422  LOGICAL LSAME
423  INTEGER NUMROC
424  COMPLEX*16 ZDOTC
425  EXTERNAL lsame, numroc, zdotc
426 * ..
427 * .. Intrinsic Functions ..
428  INTRINSIC ichar, min, mod
429 * ..
430 * .. Executable Statements ..
431 *
432 * Test the input parameters
433 *
434  info = 0
435 *
436 * Convert descriptor into standard form for easy access to
437 * parameters, check that grid is of right shape.
438 *
439  desca_1xp( 1 ) = 501
440  descb_px1( 1 ) = 502
441 *
442  temp = desca( dtype_ )
443  IF( temp .EQ. 502 ) THEN
444 * Temporarily set the descriptor type to 1xP type
445  desca( dtype_ ) = 501
446  ENDIF
447 *
448  CALL desc_convert( desca, desca_1xp, return_code )
449 *
450  desca( dtype_ ) = temp
451 *
452  IF( return_code .NE. 0) THEN
453  info = -( 9*100 + 2 )
454  ENDIF
455 *
456  CALL desc_convert( descb, descb_px1, return_code )
457 *
458  IF( return_code .NE. 0) THEN
459  info = -( 12*100 + 2 )
460  ENDIF
461 *
462 * Consistency checks for DESCA and DESCB.
463 *
464 * Context must be the same
465  IF( desca_1xp( 2 ) .NE. descb_px1( 2 ) ) THEN
466  info = -( 12*100 + 2 )
467  ENDIF
468 *
469 * These are alignment restrictions that may or may not be removed
470 * in future releases. -Andy Cleary, April 14, 1996.
471 *
472 * Block sizes must be the same
473  IF( desca_1xp( 4 ) .NE. descb_px1( 4 ) ) THEN
474  info = -( 12*100 + 4 )
475  ENDIF
476 *
477 * Source processor must be the same
478 *
479  IF( desca_1xp( 5 ) .NE. descb_px1( 5 ) ) THEN
480  info = -( 12*100 + 5 )
481  ENDIF
482 *
483 * Get values out of descriptor for use in code.
484 *
485  ictxt = desca_1xp( 2 )
486  csrc = desca_1xp( 5 )
487  nb = desca_1xp( 4 )
488  llda = desca_1xp( 6 )
489  store_n_a = desca_1xp( 3 )
490  lldb = descb_px1( 6 )
491  store_m_b = descb_px1( 3 )
492 *
493 * Get grid parameters
494 *
495 *
496  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
497  np = nprow * npcol
498 *
499 *
500 *
501  IF( lsame( uplo, 'U' ) ) THEN
502  idum1 = ichar( 'U' )
503  ELSE IF ( lsame( uplo, 'L' ) ) THEN
504  idum1 = ichar( 'L' )
505  ELSE
506  info = -1
507  END IF
508 *
509  IF( lsame( trans, 'N' ) ) THEN
510  idum2 = ichar( 'N' )
511  ELSE IF ( lsame( trans, 'C' ) ) THEN
512  idum2 = ichar( 'C' )
513  ELSE
514  info = -2
515  END IF
516 *
517  IF( lwork .LT. -1) THEN
518  info = -16
519  ELSE IF ( lwork .EQ. -1 ) THEN
520  idum3 = -1
521  ELSE
522  idum3 = 1
523  ENDIF
524 *
525  IF( n .LT. 0 ) THEN
526  info = -3
527  ENDIF
528 *
529  IF( n+ja-1 .GT. store_n_a ) THEN
530  info = -( 9*100 + 6 )
531  ENDIF
532 *
533  IF( n+ib-1 .GT. store_m_b ) THEN
534  info = -( 12*100 + 3 )
535  ENDIF
536 *
537  IF( lldb .LT. nb ) THEN
538  info = -( 12*100 + 6 )
539  ENDIF
540 *
541  IF( nrhs .LT. 0 ) THEN
542  info = -4
543  ENDIF
544 *
545 * Current alignment restriction
546 *
547  IF( ja .NE. ib) THEN
548  info = -8
549  ENDIF
550 *
551 * Argument checking that is specific to Divide & Conquer routine
552 *
553  IF( nprow .NE. 1 ) THEN
554  info = -( 9*100+2 )
555  ENDIF
556 *
557  IF( n .GT. np*nb-mod( ja-1, nb )) THEN
558  info = -( 3 )
559  CALL pxerbla( ictxt,
560  $ 'PZDTTRSV, D&C alg.: only 1 block per proc',
561  $ -info )
562  RETURN
563  ENDIF
564 *
565  IF((ja+n-1.GT.nb) .AND. ( nb.LT.2*int_one )) THEN
566  info = -( 9*100+4 )
567  CALL pxerbla( ictxt,
568  $ 'PZDTTRSV, D&C alg.: NB too small',
569  $ -info )
570  RETURN
571  ENDIF
572 *
573 *
574  work_size_min =
575  $ int_one*nrhs
576 *
577  work( 1 ) = work_size_min
578 *
579  IF( lwork .LT. work_size_min ) THEN
580  IF( lwork .NE. -1 ) THEN
581  info = -16
582  CALL pxerbla( ictxt,
583  $ 'PZDTTRSV: worksize error',
584  $ -info )
585  ENDIF
586  RETURN
587  ENDIF
588 *
589 * Pack params and positions into arrays for global consistency check
590 *
591  param_check( 16, 1 ) = descb(5)
592  param_check( 15, 1 ) = descb(4)
593  param_check( 14, 1 ) = descb(3)
594  param_check( 13, 1 ) = descb(2)
595  param_check( 12, 1 ) = descb(1)
596  param_check( 11, 1 ) = ib
597  param_check( 10, 1 ) = desca(5)
598  param_check( 9, 1 ) = desca(4)
599  param_check( 8, 1 ) = desca(3)
600  param_check( 7, 1 ) = desca(1)
601  param_check( 6, 1 ) = ja
602  param_check( 5, 1 ) = nrhs
603  param_check( 4, 1 ) = n
604  param_check( 3, 1 ) = idum3
605  param_check( 2, 1 ) = idum2
606  param_check( 1, 1 ) = idum1
607 *
608  param_check( 16, 2 ) = 1205
609  param_check( 15, 2 ) = 1204
610  param_check( 14, 2 ) = 1203
611  param_check( 13, 2 ) = 1202
612  param_check( 12, 2 ) = 1201
613  param_check( 11, 2 ) = 11
614  param_check( 10, 2 ) = 905
615  param_check( 9, 2 ) = 904
616  param_check( 8, 2 ) = 903
617  param_check( 7, 2 ) = 901
618  param_check( 6, 2 ) = 8
619  param_check( 5, 2 ) = 4
620  param_check( 4, 2 ) = 3
621  param_check( 3, 2 ) = 16
622  param_check( 2, 2 ) = 2
623  param_check( 1, 2 ) = 1
624 *
625 * Want to find errors with MIN( ), so if no error, set it to a big
626 * number. If there already is an error, multiply by the the
627 * descriptor multiplier.
628 *
629  IF( info.GE.0 ) THEN
630  info = bignum
631  ELSE IF( info.LT.-descmult ) THEN
632  info = -info
633  ELSE
634  info = -info * descmult
635  END IF
636 *
637 * Check consistency across processors
638 *
639  CALL globchk( ictxt, 16, param_check, 16,
640  $ param_check( 1, 3 ), info )
641 *
642 * Prepare output: set info = 0 if no error, and divide by DESCMULT
643 * if error is not in a descriptor entry.
644 *
645  IF( info.EQ.bignum ) THEN
646  info = 0
647  ELSE IF( mod( info, descmult ) .EQ. 0 ) THEN
648  info = -info / descmult
649  ELSE
650  info = -info
651  END IF
652 *
653  IF( info.LT.0 ) THEN
654  CALL pxerbla( ictxt, 'PZDTTRSV', -info )
655  RETURN
656  END IF
657 *
658 * Quick return if possible
659 *
660  IF( n.EQ.0 )
661  $ RETURN
662 *
663  IF( nrhs.EQ.0 )
664  $ RETURN
665 *
666 *
667 * Adjust addressing into matrix space to properly get into
668 * the beginning part of the relevant data
669 *
670  part_offset = nb*( (ja-1)/(npcol*nb) )
671 *
672  IF ( (mycol-csrc) .LT. (ja-part_offset-1)/nb ) THEN
673  part_offset = part_offset + nb
674  ENDIF
675 *
676  IF ( mycol .LT. csrc ) THEN
677  part_offset = part_offset - nb
678  ENDIF
679 *
680 * Form a new BLACS grid (the "standard form" grid) with only procs
681 * holding part of the matrix, of size 1xNP where NP is adjusted,
682 * starting at csrc=0, with JA modified to reflect dropped procs.
683 *
684 * First processor to hold part of the matrix:
685 *
686  first_proc = mod( ( ja-1 )/nb+csrc, npcol )
687 *
688 * Calculate new JA one while dropping off unused processors.
689 *
690  ja_new = mod( ja-1, nb ) + 1
691 *
692 * Save and compute new value of NP
693 *
694  np_save = np
695  np = ( ja_new+n-2 )/nb + 1
696 *
697 * Call utility routine that forms "standard-form" grid
698 *
699  CALL reshape( ictxt, int_one, ictxt_new, int_one,
700  $ first_proc, int_one, np )
701 *
702 * Use new context from standard grid as context.
703 *
704  ictxt_save = ictxt
705  ictxt = ictxt_new
706  desca_1xp( 2 ) = ictxt_new
707  descb_px1( 2 ) = ictxt_new
708 *
709 * Get information about new grid.
710 *
711  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
712 *
713 * Drop out processors that do not have part of the matrix.
714 *
715  IF( myrow .LT. 0 ) THEN
716  GOTO 1234
717  ENDIF
718 *
719 * ********************************
720 * Values reused throughout routine
721 *
722 * User-input value of partition size
723 *
724  part_size = nb
725 *
726 * Number of columns in each processor
727 *
728  my_num_cols = numroc( n, part_size, mycol, 0, npcol )
729 *
730 * Offset in columns to beginning of main partition in each proc
731 *
732  IF ( mycol .EQ. 0 ) THEN
733  part_offset = part_offset+mod( ja_new-1, part_size )
734  my_num_cols = my_num_cols - mod(ja_new-1, part_size )
735  ENDIF
736 *
737 * Size of main (or odd) partition in each processor
738 *
739  odd_size = my_num_cols
740  IF ( mycol .LT. np-1 ) THEN
741  odd_size = odd_size - int_one
742  ENDIF
743 *
744 * Offset to workspace for Upper triangular factor
745 *
746  work_u = int_one*odd_size + 3
747 *
748 *
749 *
750 * Begin main code
751 *
752  IF ( lsame( uplo, 'L' ) ) THEN
753 *
754  IF ( lsame( trans, 'N' ) ) THEN
755 *
756 * Frontsolve
757 *
758 *
759 ******************************************
760 * Local computation phase
761 ******************************************
762 *
763 * Use main partition in each processor to solve locally
764 *
765  CALL zdttrsv( uplo, 'N', odd_size, nrhs, dl( part_offset+2 ),
766  $ d( part_offset+1 ), du( part_offset+1 ),
767  $ b( part_offset+1 ), lldb, info )
768 *
769 *
770  IF ( mycol .LT. np-1 ) THEN
771 * Use factorization of odd-even connection block to modify
772 * locally stored portion of right hand side(s)
773 *
774  CALL zaxpy( nrhs, -dl( part_offset+odd_size+1 ),
775  $ b( part_offset+odd_size ), lldb,
776  $ b( part_offset+odd_size+1 ), lldb )
777 *
778  ENDIF
779 *
780 *
781  IF ( mycol .NE. 0 ) THEN
782 * Use the "spike" fillin to calculate contribution to previous
783 * processor's righthand-side.
784 *
785  CALL zgemm( 'C', 'N', int_one, nrhs, odd_size, -cone,
786  $ af( 1 ), odd_size, b( part_offset+1 ), lldb,
787  $ czero, work( 1+int_one-int_one ), int_one )
788  ENDIF
789 *
790 *
791 ************************************************
792 * Formation and solution of reduced system
793 ************************************************
794 *
795 *
796 * Send modifications to prior processor's right hand sides
797 *
798  IF( mycol .GT. 0) THEN
799 *
800  CALL zgesd2d( ictxt, int_one, nrhs,
801  $ work( 1 ), int_one,
802  $ 0, mycol - 1 )
803 *
804  ENDIF
805 *
806 * Receive modifications to processor's right hand sides
807 *
808  IF( mycol .LT. npcol-1) THEN
809 *
810  CALL zgerv2d( ictxt, int_one, nrhs,
811  $ work( 1 ), int_one,
812  $ 0, mycol + 1 )
813 *
814 * Combine contribution to locally stored right hand sides
815 *
816  CALL zmatadd( int_one, nrhs, cone,
817  $ work( 1 ), int_one, cone,
818  $ b( part_offset+odd_size + 1 ), lldb )
819 *
820  ENDIF
821 *
822 *
823 * The last processor does not participate in the solution of the
824 * reduced system, having sent its contribution already.
825  IF( mycol .EQ. npcol-1 ) THEN
826  GOTO 14
827  ENDIF
828 *
829 *
830 * *************************************
831 * Modification Loop
832 *
833 * The distance for sending and receiving for each level starts
834 * at 1 for the first level.
835  level_dist = 1
836 *
837 * Do until this proc is needed to modify other procs' equations
838 *
839  12 CONTINUE
840  IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 11
841 *
842 * Receive and add contribution to righthand sides from left
843 *
844  IF( mycol-level_dist .GE. 0 ) THEN
845 *
846  CALL zgerv2d( ictxt, int_one, nrhs,
847  $ work( 1 ),
848  $ int_one, 0, mycol-level_dist )
849 *
850  CALL zmatadd( int_one, nrhs, cone,
851  $ work( 1 ), int_one, cone,
852  $ b( part_offset+odd_size + 1 ), lldb )
853 *
854  ENDIF
855 *
856 * Receive and add contribution to righthand sides from right
857 *
858  IF( mycol+level_dist .LT. npcol-1 ) THEN
859 *
860  CALL zgerv2d( ictxt, int_one, nrhs,
861  $ work( 1 ),
862  $ int_one, 0, mycol+level_dist )
863 *
864  CALL zmatadd( int_one, nrhs, cone,
865  $ work( 1 ), int_one, cone,
866  $ b( part_offset+odd_size + 1 ), lldb )
867 *
868  ENDIF
869 *
870  level_dist = level_dist*2
871 *
872  GOTO 12
873  11 CONTINUE
874 * [End of GOTO Loop]
875 *
876 *
877 *
878 * *********************************
879 * Calculate and use this proc's blocks to modify other procs
880 *
881 * Solve with diagonal block
882 *
883  CALL ztbtrs( 'L', 'N', 'U', int_one, min( int_one, int_one-1 ),
884  $ nrhs, af( odd_size+2 ), int_one+1,
885  $ b( part_offset+odd_size+1 ), lldb, info )
886 *
887  IF( info.NE.0 ) THEN
888  GO TO 1000
889  ENDIF
890 *
891 *
892 *
893 * *********
894  IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
895 *
896 * Calculate contribution from this block to next diagonal block
897 *
898  CALL zgemm( 'C', 'N', int_one, nrhs, int_one, -cone,
899  $ af( (odd_size)*int_one+1 ),
900  $ int_one,
901  $ b( part_offset+odd_size+1 ),
902  $ lldb, czero,
903  $ work( 1 ),
904  $ int_one )
905 *
906 * Send contribution to diagonal block's owning processor.
907 *
908  CALL zgesd2d( ictxt, int_one, nrhs,
909  $ work( 1 ),
910  $ int_one, 0, mycol+level_dist )
911 *
912  ENDIF
913 * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
914 *
915 * ************
916  IF( (mycol/level_dist .GT. 0 ).AND.
917  $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
918 *
919 *
920 * Use offdiagonal block to calculate modification to diag block
921 * of processor to the left
922 *
923  CALL zgemm( 'N', 'N', int_one, nrhs, int_one, -cone,
924  $ af( odd_size*int_one+2+1 ),
925  $ int_one,
926  $ b( part_offset+odd_size+1 ),
927  $ lldb, czero,
928  $ work( 1 ),
929  $ int_one )
930 *
931 * Send contribution to diagonal block's owning processor.
932 *
933  CALL zgesd2d( ictxt, int_one, nrhs,
934  $ work( 1 ),
935  $ int_one, 0, mycol-level_dist )
936 *
937  ENDIF
938 * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
939 *
940  14 CONTINUE
941 *
942  ELSE
943 *
944 ******************** BACKSOLVE *************************************
945 *
946 ********************************************************************
947 * .. Begin reduced system phase of algorithm ..
948 ********************************************************************
949 *
950 *
951 *
952 * The last processor does not participate in the solution of the
953 * reduced system and just waits to receive its solution.
954  IF( mycol .EQ. npcol-1 ) THEN
955  GOTO 24
956  ENDIF
957 *
958 * Determine number of steps in tree loop
959 *
960  level_dist = 1
961  27 CONTINUE
962  IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 26
963 *
964  level_dist = level_dist*2
965 *
966  GOTO 27
967  26 CONTINUE
968 *
969 *
970  IF( (mycol/level_dist .GT. 0 ).AND.
971  $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
972 *
973 * Receive solution from processor to left
974 *
975  CALL zgerv2d( ictxt, int_one, nrhs,
976  $ work( 1 ),
977  $ int_one, 0, mycol-level_dist )
978 *
979 *
980 * Use offdiagonal block to calculate modification to RHS stored
981 * on this processor
982 *
983  CALL zgemm( 'C', 'N', int_one, nrhs, int_one, -cone,
984  $ af( odd_size*int_one+2+1 ),
985  $ int_one,
986  $ work( 1 ),
987  $ int_one, cone,
988  $ b( part_offset+odd_size+1 ),
989  $ lldb )
990  ENDIF
991 * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
992 *
993 *
994  IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
995 *
996 * Receive solution from processor to right
997 *
998  CALL zgerv2d( ictxt, int_one, nrhs,
999  $ work( 1 ),
1000  $ int_one, 0, mycol+level_dist )
1001 *
1002 * Calculate contribution from this block to next diagonal block
1003 *
1004  CALL zgemm( 'N', 'N', int_one, nrhs, int_one, -cone,
1005  $ af( (odd_size)*int_one+1 ),
1006  $ int_one,
1007  $ work( 1 ),
1008  $ int_one, cone,
1009  $ b( part_offset+odd_size+1 ),
1010  $ lldb )
1011 *
1012  ENDIF
1013 * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
1014 *
1015 *
1016 * Solve with diagonal block
1017 *
1018  CALL ztbtrs( 'L', 'C', 'U', int_one, min( int_one, int_one-1 ),
1019  $ nrhs, af( odd_size+2 ), int_one+1,
1020  $ b( part_offset+odd_size+1 ), lldb, info )
1021 *
1022  IF( info.NE.0 ) THEN
1023  GO TO 1000
1024  ENDIF
1025 *
1026 *
1027 *
1028 ***Modification Loop *******
1029 *
1030  22 CONTINUE
1031  IF( level_dist .EQ. 1 ) GOTO 21
1032 *
1033  level_dist = level_dist/2
1034 *
1035 * Send solution to the right
1036 *
1037  IF( mycol+level_dist .LT. npcol-1 ) THEN
1038 *
1039  CALL zgesd2d( ictxt, int_one, nrhs,
1040  $ b( part_offset+odd_size+1 ),
1041  $ lldb, 0, mycol+level_dist )
1042 *
1043  ENDIF
1044 *
1045 * Send solution to left
1046 *
1047  IF( mycol-level_dist .GE. 0 ) THEN
1048 *
1049  CALL zgesd2d( ictxt, int_one, nrhs,
1050  $ b( part_offset+odd_size+1 ),
1051  $ lldb, 0, mycol-level_dist )
1052 *
1053  ENDIF
1054 *
1055  GOTO 22
1056  21 CONTINUE
1057 * [End of GOTO Loop]
1058 *
1059  24 CONTINUE
1060 * [Processor npcol - 1 jumped to here to await next stage]
1061 *
1062 *******************************
1063 * Reduced system has been solved, communicate solutions to nearest
1064 * neighbors in preparation for local computation phase.
1065 *
1066 *
1067 * Send elements of solution to next proc
1068 *
1069  IF( mycol .LT. npcol-1) THEN
1070 *
1071  CALL zgesd2d( ictxt, int_one, nrhs,
1072  $ b( part_offset+odd_size+1 ), lldb,
1073  $ 0, mycol +1 )
1074 *
1075  ENDIF
1076 *
1077 * Receive modifications to processor's right hand sides
1078 *
1079  IF( mycol .GT. 0) THEN
1080 *
1081  CALL zgerv2d( ictxt, int_one, nrhs,
1082  $ work( 1 ), int_one,
1083  $ 0, mycol - 1 )
1084 *
1085  ENDIF
1086 *
1087 *
1088 *
1089 **********************************************
1090 * Local computation phase
1091 **********************************************
1092 *
1093  IF ( mycol .NE. 0 ) THEN
1094 * Use the "spike" fillin to calculate contribution from previous
1095 * processor's solution.
1096 *
1097  CALL zgemm( 'N', 'N', odd_size, nrhs, int_one, -cone, af( 1 ),
1098  $ odd_size, work( 1+int_one-int_one ), int_one,
1099  $ cone, b( part_offset+1 ), lldb )
1100 *
1101  ENDIF
1102 *
1103 *
1104  IF ( mycol .LT. np-1 ) THEN
1105 * Use factorization of odd-even connection block to modify
1106 * locally stored portion of right hand side(s)
1107 *
1108  CALL zaxpy( nrhs, -dconjg( dl( part_offset+odd_size+1 ) ),
1109  $ b( part_offset+odd_size+1 ), lldb,
1110  $ b( part_offset+odd_size ), lldb )
1111 *
1112  ENDIF
1113 *
1114 * Use main partition in each processor to solve locally
1115 *
1116  CALL zdttrsv( uplo, 'C', odd_size, nrhs, dl( part_offset+2 ),
1117  $ d( part_offset+1 ), du( part_offset+1 ),
1118  $ b( part_offset+1 ), lldb, info )
1119 *
1120  ENDIF
1121 * End of "IF( LSAME( TRANS, 'N' ) )"...
1122 *
1123 *
1124  ELSE
1125 ***************************************************************
1126 * CASE UPLO = 'U' *
1127 ***************************************************************
1128  IF ( lsame( trans, 'C' ) ) THEN
1129 *
1130 * Frontsolve
1131 *
1132 *
1133 ******************************************
1134 * Local computation phase
1135 ******************************************
1136 *
1137 * Use main partition in each processor to solve locally
1138 *
1139  CALL zdttrsv( uplo, 'C', odd_size, nrhs, dl( part_offset+2 ),
1140  $ d( part_offset+1 ), du( part_offset+1 ),
1141  $ b( part_offset+1 ), lldb, info )
1142 *
1143 *
1144  IF ( mycol .LT. np-1 ) THEN
1145 * Use factorization of odd-even connection block to modify
1146 * locally stored portion of right hand side(s)
1147 *
1148  CALL zaxpy( nrhs, -dconjg( du( part_offset+odd_size ) ),
1149  $ b( part_offset+odd_size ), lldb,
1150  $ b( part_offset+odd_size+1 ), lldb )
1151 *
1152  ENDIF
1153 *
1154 *
1155  IF ( mycol .NE. 0 ) THEN
1156 * Use the "spike" fillin to calculate contribution to previous
1157 * processor's righthand-side.
1158 *
1159  CALL zgemm( 'C', 'N', int_one, nrhs, odd_size, -cone,
1160  $ af( work_u+1 ), odd_size, b( part_offset+1 ),
1161  $ lldb, czero, work( 1+int_one-int_one ),
1162  $ int_one )
1163  ENDIF
1164 *
1165 *
1166 ************************************************
1167 * Formation and solution of reduced system
1168 ************************************************
1169 *
1170 *
1171 * Send modifications to prior processor's right hand sides
1172 *
1173  IF( mycol .GT. 0) THEN
1174 *
1175  CALL zgesd2d( ictxt, int_one, nrhs,
1176  $ work( 1 ), int_one,
1177  $ 0, mycol - 1 )
1178 *
1179  ENDIF
1180 *
1181 * Receive modifications to processor's right hand sides
1182 *
1183  IF( mycol .LT. npcol-1) THEN
1184 *
1185  CALL zgerv2d( ictxt, int_one, nrhs,
1186  $ work( 1 ), int_one,
1187  $ 0, mycol + 1 )
1188 *
1189 * Combine contribution to locally stored right hand sides
1190 *
1191  CALL zmatadd( int_one, nrhs, cone,
1192  $ work( 1 ), int_one, cone,
1193  $ b( part_offset+odd_size + 1 ), lldb )
1194 *
1195  ENDIF
1196 *
1197 *
1198 * The last processor does not participate in the solution of the
1199 * reduced system, having sent its contribution already.
1200  IF( mycol .EQ. npcol-1 ) THEN
1201  GOTO 44
1202  ENDIF
1203 *
1204 *
1205 * *************************************
1206 * Modification Loop
1207 *
1208 * The distance for sending and receiving for each level starts
1209 * at 1 for the first level.
1210  level_dist = 1
1211 *
1212 * Do until this proc is needed to modify other procs' equations
1213 *
1214  42 CONTINUE
1215  IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 41
1216 *
1217 * Receive and add contribution to righthand sides from left
1218 *
1219  IF( mycol-level_dist .GE. 0 ) THEN
1220 *
1221  CALL zgerv2d( ictxt, int_one, nrhs,
1222  $ work( 1 ),
1223  $ int_one, 0, mycol-level_dist )
1224 *
1225  CALL zmatadd( int_one, nrhs, cone,
1226  $ work( 1 ), int_one, cone,
1227  $ b( part_offset+odd_size + 1 ), lldb )
1228 *
1229  ENDIF
1230 *
1231 * Receive and add contribution to righthand sides from right
1232 *
1233  IF( mycol+level_dist .LT. npcol-1 ) THEN
1234 *
1235  CALL zgerv2d( ictxt, int_one, nrhs,
1236  $ work( 1 ),
1237  $ int_one, 0, mycol+level_dist )
1238 *
1239  CALL zmatadd( int_one, nrhs, cone,
1240  $ work( 1 ), int_one, cone,
1241  $ b( part_offset+odd_size + 1 ), lldb )
1242 *
1243  ENDIF
1244 *
1245  level_dist = level_dist*2
1246 *
1247  GOTO 42
1248  41 CONTINUE
1249 * [End of GOTO Loop]
1250 *
1251 *
1252 *
1253 * *********************************
1254 * Calculate and use this proc's blocks to modify other procs
1255 *
1256 * Solve with diagonal block
1257 *
1258  CALL ztbtrs( 'U', 'C', 'N', int_one, min( int_one, int_one-1 ),
1259  $ nrhs, af( odd_size+2 ), int_one+1,
1260  $ b( part_offset+odd_size+1 ), lldb, info )
1261 *
1262  IF( info.NE.0 ) THEN
1263  GO TO 1000
1264  ENDIF
1265 *
1266 *
1267 *
1268 * *********
1269  IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
1270 *
1271 * Calculate contribution from this block to next diagonal block
1272 *
1273  CALL zgemm( 'C', 'N', int_one, nrhs, int_one, -cone,
1274  $ af( work_u+(odd_size)*int_one+1 ),
1275  $ int_one,
1276  $ b( part_offset+odd_size+1 ),
1277  $ lldb, czero,
1278  $ work( 1 ),
1279  $ int_one )
1280 *
1281 * Send contribution to diagonal block's owning processor.
1282 *
1283  CALL zgesd2d( ictxt, int_one, nrhs,
1284  $ work( 1 ),
1285  $ int_one, 0, mycol+level_dist )
1286 *
1287  ENDIF
1288 * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
1289 *
1290 * ************
1291  IF( (mycol/level_dist .GT. 0 ).AND.
1292  $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
1293 *
1294 *
1295 * Use offdiagonal block to calculate modification to diag block
1296 * of processor to the left
1297 *
1298  CALL zgemm( 'N', 'N', int_one, nrhs, int_one, -cone,
1299  $ af( work_u+odd_size*int_one+2+1 ),
1300  $ int_one,
1301  $ b( part_offset+odd_size+1 ),
1302  $ lldb, czero,
1303  $ work( 1 ),
1304  $ int_one )
1305 *
1306 * Send contribution to diagonal block's owning processor.
1307 *
1308  CALL zgesd2d( ictxt, int_one, nrhs,
1309  $ work( 1 ),
1310  $ int_one, 0, mycol-level_dist )
1311 *
1312  ENDIF
1313 * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
1314 *
1315  44 CONTINUE
1316 *
1317  ELSE
1318 *
1319 ******************** BACKSOLVE *************************************
1320 *
1321 ********************************************************************
1322 * .. Begin reduced system phase of algorithm ..
1323 ********************************************************************
1324 *
1325 *
1326 *
1327 * The last processor does not participate in the solution of the
1328 * reduced system and just waits to receive its solution.
1329  IF( mycol .EQ. npcol-1 ) THEN
1330  GOTO 54
1331  ENDIF
1332 *
1333 * Determine number of steps in tree loop
1334 *
1335  level_dist = 1
1336  57 CONTINUE
1337  IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 56
1338 *
1339  level_dist = level_dist*2
1340 *
1341  GOTO 57
1342  56 CONTINUE
1343 *
1344 *
1345  IF( (mycol/level_dist .GT. 0 ).AND.
1346  $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
1347 *
1348 * Receive solution from processor to left
1349 *
1350  CALL zgerv2d( ictxt, int_one, nrhs,
1351  $ work( 1 ),
1352  $ int_one, 0, mycol-level_dist )
1353 *
1354 *
1355 * Use offdiagonal block to calculate modification to RHS stored
1356 * on this processor
1357 *
1358  CALL zgemm( 'C', 'N', int_one, nrhs, int_one, -cone,
1359  $ af( work_u+odd_size*int_one+2+1 ),
1360  $ int_one,
1361  $ work( 1 ),
1362  $ int_one, cone,
1363  $ b( part_offset+odd_size+1 ),
1364  $ lldb )
1365  ENDIF
1366 * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
1367 *
1368 *
1369  IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
1370 *
1371 * Receive solution from processor to right
1372 *
1373  CALL zgerv2d( ictxt, int_one, nrhs,
1374  $ work( 1 ),
1375  $ int_one, 0, mycol+level_dist )
1376 *
1377 * Calculate contribution from this block to next diagonal block
1378 *
1379  CALL zgemm( 'N', 'N', int_one, nrhs, int_one, -cone,
1380  $ af( work_u+(odd_size)*int_one+1 ),
1381  $ int_one,
1382  $ work( 1 ),
1383  $ int_one, cone,
1384  $ b( part_offset+odd_size+1 ),
1385  $ lldb )
1386 *
1387  ENDIF
1388 * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
1389 *
1390 *
1391 * Solve with diagonal block
1392 *
1393  CALL ztbtrs( 'U', 'N', 'N', int_one, min( int_one, int_one-1 ),
1394  $ nrhs, af( odd_size+2 ), int_one+1,
1395  $ b( part_offset+odd_size+1 ), lldb, info )
1396 *
1397  IF( info.NE.0 ) THEN
1398  GO TO 1000
1399  ENDIF
1400 *
1401 *
1402 *
1403 ***Modification Loop *******
1404 *
1405  52 CONTINUE
1406  IF( level_dist .EQ. 1 ) GOTO 51
1407 *
1408  level_dist = level_dist/2
1409 *
1410 * Send solution to the right
1411 *
1412  IF( mycol+level_dist .LT. npcol-1 ) THEN
1413 *
1414  CALL zgesd2d( ictxt, int_one, nrhs,
1415  $ b( part_offset+odd_size+1 ),
1416  $ lldb, 0, mycol+level_dist )
1417 *
1418  ENDIF
1419 *
1420 * Send solution to left
1421 *
1422  IF( mycol-level_dist .GE. 0 ) THEN
1423 *
1424  CALL zgesd2d( ictxt, int_one, nrhs,
1425  $ b( part_offset+odd_size+1 ),
1426  $ lldb, 0, mycol-level_dist )
1427 *
1428  ENDIF
1429 *
1430  GOTO 52
1431  51 CONTINUE
1432 * [End of GOTO Loop]
1433 *
1434  54 CONTINUE
1435 * [Processor npcol - 1 jumped to here to await next stage]
1436 *
1437 *******************************
1438 * Reduced system has been solved, communicate solutions to nearest
1439 * neighbors in preparation for local computation phase.
1440 *
1441 *
1442 * Send elements of solution to next proc
1443 *
1444  IF( mycol .LT. npcol-1) THEN
1445 *
1446  CALL zgesd2d( ictxt, int_one, nrhs,
1447  $ b( part_offset+odd_size+1 ), lldb,
1448  $ 0, mycol +1 )
1449 *
1450  ENDIF
1451 *
1452 * Receive modifications to processor's right hand sides
1453 *
1454  IF( mycol .GT. 0) THEN
1455 *
1456  CALL zgerv2d( ictxt, int_one, nrhs,
1457  $ work( 1 ), int_one,
1458  $ 0, mycol - 1 )
1459 *
1460  ENDIF
1461 *
1462 *
1463 *
1464 **********************************************
1465 * Local computation phase
1466 **********************************************
1467 *
1468  IF ( mycol .NE. 0 ) THEN
1469 * Use the "spike" fillin to calculate contribution from previous
1470 * processor's solution.
1471 *
1472  CALL zgemm( 'N', 'N', odd_size, nrhs, int_one, -cone,
1473  $ af( work_u+1 ), odd_size,
1474  $ work( 1+int_one-int_one ), int_one, cone,
1475  $ b( part_offset+1 ), lldb )
1476 *
1477  ENDIF
1478 *
1479 *
1480  IF ( mycol .LT. np-1 ) THEN
1481 * Use factorization of odd-even connection block to modify
1482 * locally stored portion of right hand side(s)
1483 *
1484  CALL zaxpy( nrhs, -( du( part_offset+odd_size ) ),
1485  $ b( part_offset+odd_size+1 ), lldb,
1486  $ b( part_offset+odd_size ), lldb )
1487 *
1488  ENDIF
1489 *
1490 * Use main partition in each processor to solve locally
1491 *
1492  CALL zdttrsv( uplo, 'N', odd_size, nrhs, du( part_offset+2 ),
1493  $ d( part_offset+1 ), du( part_offset+1 ),
1494  $ b( part_offset+1 ), lldb, info )
1495 *
1496  ENDIF
1497 * End of "IF( LSAME( TRANS, 'N' ) )"...
1498 *
1499 *
1500  ENDIF
1501 * End of "IF( LSAME( UPLO, 'L' ) )"...
1502  1000 CONTINUE
1503 *
1504 *
1505 * Free BLACS space used to hold standard-form grid.
1506 *
1507  IF( ictxt_save .NE. ictxt_new ) THEN
1508  CALL blacs_gridexit( ictxt_new )
1509  ENDIF
1510 *
1511  1234 CONTINUE
1512 *
1513 * Restore saved input parameters
1514 *
1515  ictxt = ictxt_save
1516  np = np_save
1517 *
1518 * Output minimum worksize
1519 *
1520  work( 1 ) = work_size_min
1521 *
1522 *
1523  RETURN
1524 *
1525 * End of PZDTTRSV
1526 *
1527  END
globchk
subroutine globchk(ICTXT, N, X, LDX, IWORK, INFO)
Definition: pchkxmat.f:403
zmatadd
subroutine zmatadd(M, N, ALPHA, A, LDA, BETA, C, LDC)
Definition: zmatadd.f:2
reshape
void reshape(int *context_in, int *major_in, int *context_out, int *major_out, int *first_proc, int *nprow_new, int *npcol_new)
Definition: reshape.c:77
desc_convert
subroutine desc_convert(DESC_IN, DESC_OUT, INFO)
Definition: desc_convert.f:2
zdttrsv
subroutine zdttrsv(UPLO, TRANS, N, NRHS, DL, D, DU, B, LDB, INFO)
Definition: zdttrsv.f:3
pzdttrsv
subroutine pzdttrsv(UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, AF, LAF, WORK, LWORK, INFO)
Definition: pzdttrsv.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
min
#define min(A, B)
Definition: pcgemr.c:181