ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pzpttrsv.f
Go to the documentation of this file.
1  SUBROUTINE pzpttrsv( UPLO, TRANS, N, NRHS, D, E, JA, DESCA, B, IB,
2  $ DESCB, AF, LAF, WORK, LWORK, INFO )
3 *
4 * -- ScaLAPACK routine (version 2.0.2) --
5 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6 * May 1 2012
7 *
8 * .. Scalar Arguments ..
9  CHARACTER TRANS, UPLO
10  INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCA( * ), DESCB( * )
14  COMPLEX*16 AF( * ), B( * ), E( * ), WORK( * )
15  DOUBLE PRECISION D( * )
16 * ..
17 *
18 *
19 * Purpose
20 * =======
21 *
22 * PZPTTRSV solves a tridiagonal triangular system of linear equations
23 *
24 * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS)
25 * or
26 * A(1:N, JA:JA+N-1)^H * X = B(IB:IB+N-1, 1:NRHS)
27 *
28 * where A(1:N, JA:JA+N-1) is a tridiagonal
29 * triangular matrix factor produced by the
30 * Cholesky factorization code PZPTTRF
31 * and is stored in A(1:N,JA:JA+N-1) and AF.
32 * The matrix stored in A(1:N, JA:JA+N-1) is either
33 * upper or lower triangular according to UPLO,
34 * and the choice of solving A(1:N, JA:JA+N-1) or A(1:N, JA:JA+N-1)^H
35 * is dictated by the user by the parameter TRANS.
36 *
37 * Routine PZPTTRF MUST be called first.
38 *
39 * =====================================================================
40 *
41 * Arguments
42 * =========
43 *
44 * UPLO (global input) CHARACTER
45 * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored;
46 * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored.
47 *
48 * TRANS (global input) CHARACTER
49 * = 'N': Solve with A(1:N, JA:JA+N-1);
50 * = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) );
51 *
52 * N (global input) INTEGER
53 * The number of rows and columns to be operated on, i.e. the
54 * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0.
55 *
56 * NRHS (global input) INTEGER
57 * The number of right hand sides, i.e., the number of columns
58 * of the distributed submatrix B(IB:IB+N-1, 1:NRHS).
59 * NRHS >= 0.
60 *
61 * D (local input/local output) COMPLEX*16 pointer to local
62 * part of global vector storing the main diagonal of the
63 * matrix.
64 * On exit, this array contains information containing the
65 * factors of the matrix.
66 * Must be of size >= DESCA( NB_ ).
67 *
68 * E (local input/local output) COMPLEX*16 pointer to local
69 * part of global vector storing the upper diagonal of the
70 * matrix. Globally, DU(n) is not referenced, and DU must be
71 * aligned with D.
72 * On exit, this array contains information containing the
73 * factors of the matrix.
74 * Must be of size >= DESCA( NB_ ).
75 *
76 * JA (global input) INTEGER
77 * The index in the global array A that points to the start of
78 * the matrix to be operated on (which may be either all of A
79 * or a submatrix of A).
80 *
81 * DESCA (global and local input) INTEGER array of dimension DLEN.
82 * if 1D type (DTYPE_A=501 or 502), DLEN >= 7;
83 * if 2D type (DTYPE_A=1), DLEN >= 9.
84 * The array descriptor for the distributed matrix A.
85 * Contains information of mapping of A to memory. Please
86 * see NOTES below for full description and options.
87 *
88 * B (local input/local output) COMPLEX*16 pointer into
89 * local memory to an array of local lead dimension lld_b>=NB.
90 * On entry, this array contains the
91 * the local pieces of the right hand sides
92 * B(IB:IB+N-1, 1:NRHS).
93 * On exit, this contains the local piece of the solutions
94 * distributed matrix X.
95 *
96 * IB (global input) INTEGER
97 * The row index in the global array B that points to the first
98 * row of the matrix to be operated on (which may be either
99 * all of B or a submatrix of B).
100 *
101 * DESCB (global and local input) INTEGER array of dimension DLEN.
102 * if 1D type (DTYPE_B=502), DLEN >=7;
103 * if 2D type (DTYPE_B=1), DLEN >= 9.
104 * The array descriptor for the distributed matrix B.
105 * Contains information of mapping of B to memory. Please
106 * see NOTES below for full description and options.
107 *
108 * AF (local output) COMPLEX*16 array, dimension LAF.
109 * Auxiliary Fillin Space.
110 * Fillin is created during the factorization routine
111 * PZPTTRF and this is stored in AF. If a linear system
112 * is to be solved using PZPTTRS after the factorization
113 * routine, AF *must not be altered* after the factorization.
114 *
115 * LAF (local input) INTEGER
116 * Size of user-input Auxiliary Fillin space AF. Must be >=
117 * (NB+2)
118 * If LAF is not large enough, an error code will be returned
119 * and the minimum acceptable size will be returned in AF( 1 )
120 *
121 * WORK (local workspace/local output)
122 * COMPLEX*16 temporary workspace. This space may
123 * be overwritten in between calls to routines. WORK must be
124 * the size given in LWORK.
125 * On exit, WORK( 1 ) contains the minimal LWORK.
126 *
127 * LWORK (local input or global input) INTEGER
128 * Size of user-input workspace WORK.
129 * If LWORK is too small, the minimal acceptable size will be
130 * returned in WORK(1) and an error code is returned. LWORK>=
131 * (10+2*min(100,NRHS))*NPCOL+4*NRHS
132 *
133 * INFO (local output) INTEGER
134 * = 0: successful exit
135 * < 0: If the i-th argument is an array and the j-entry had
136 * an illegal value, then INFO = -(i*100+j), if the i-th
137 * argument is a scalar and had an illegal value, then
138 * INFO = -i.
139 *
140 * =====================================================================
141 *
142 *
143 * Restrictions
144 * ============
145 *
146 * The following are restrictions on the input parameters. Some of these
147 * are temporary and will be removed in future releases, while others
148 * may reflect fundamental technical limitations.
149 *
150 * Non-cyclic restriction: VERY IMPORTANT!
151 * P*NB>= mod(JA-1,NB)+N.
152 * The mapping for matrices must be blocked, reflecting the nature
153 * of the divide and conquer algorithm as a task-parallel algorithm.
154 * This formula in words is: no processor may have more than one
155 * chunk of the matrix.
156 *
157 * Blocksize cannot be too small:
158 * If the matrix spans more than one processor, the following
159 * restriction on NB, the size of each block on each processor,
160 * must hold:
161 * NB >= 2
162 * The bulk of parallel computation is done on the matrix of size
163 * O(NB) on each processor. If this is too small, divide and conquer
164 * is a poor choice of algorithm.
165 *
166 * Submatrix reference:
167 * JA = IB
168 * Alignment restriction that prevents unnecessary communication.
169 *
170 *
171 * =====================================================================
172 *
173 *
174 * Notes
175 * =====
176 *
177 * If the factorization routine and the solve routine are to be called
178 * separately (to solve various sets of righthand sides using the same
179 * coefficient matrix), the auxiliary space AF *must not be altered*
180 * between calls to the factorization routine and the solve routine.
181 *
182 * The best algorithm for solving banded and tridiagonal linear systems
183 * depends on a variety of parameters, especially the bandwidth.
184 * Currently, only algorithms designed for the case N/P >> bw are
185 * implemented. These go by many names, including Divide and Conquer,
186 * Partitioning, domain decomposition-type, etc.
187 * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C
188 * algorithms are the appropriate choice.
189 *
190 * Algorithm description: Divide and Conquer
191 *
192 * The Divide and Conqer algorithm assumes the matrix is narrowly
193 * banded compared with the number of equations. In this situation,
194 * it is best to distribute the input matrix A one-dimensionally,
195 * with columns atomic and rows divided amongst the processes.
196 * The basic algorithm divides the tridiagonal matrix up into
197 * P pieces with one stored on each processor,
198 * and then proceeds in 2 phases for the factorization or 3 for the
199 * solution of a linear system.
200 * 1) Local Phase:
201 * The individual pieces are factored independently and in
202 * parallel. These factors are applied to the matrix creating
203 * fillin, which is stored in a non-inspectable way in auxiliary
204 * space AF. Mathematically, this is equivalent to reordering
205 * the matrix A as P A P^T and then factoring the principal
206 * leading submatrix of size equal to the sum of the sizes of
207 * the matrices factored on each processor. The factors of
208 * these submatrices overwrite the corresponding parts of A
209 * in memory.
210 * 2) Reduced System Phase:
211 * A small ((P-1)) system is formed representing
212 * interaction of the larger blocks, and is stored (as are its
213 * factors) in the space AF. A parallel Block Cyclic Reduction
214 * algorithm is used. For a linear system, a parallel front solve
215 * followed by an analagous backsolve, both using the structure
216 * of the factored matrix, are performed.
217 * 3) Backsubsitution Phase:
218 * For a linear system, a local backsubstitution is performed on
219 * each processor in parallel.
220 *
221 *
222 * Descriptors
223 * ===========
224 *
225 * Descriptors now have *types* and differ from ScaLAPACK 1.0.
226 *
227 * Note: tridiagonal codes can use either the old two dimensional
228 * or new one-dimensional descriptors, though the processor grid in
229 * both cases *must be one-dimensional*. We describe both types below.
230 *
231 * Each global data object is described by an associated description
232 * vector. This vector stores the information required to establish
233 * the mapping between an object element and its corresponding process
234 * and memory location.
235 *
236 * Let A be a generic term for any 2D block cyclicly distributed array.
237 * Such a global array has an associated description vector DESCA.
238 * In the following comments, the character _ should be read as
239 * "of the global array".
240 *
241 * NOTATION STORED IN EXPLANATION
242 * --------------- -------------- --------------------------------------
243 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
244 * DTYPE_A = 1.
245 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
246 * the BLACS process grid A is distribu-
247 * ted over. The context itself is glo-
248 * bal, but the handle (the integer
249 * value) may vary.
250 * M_A (global) DESCA( M_ ) The number of rows in the global
251 * array A.
252 * N_A (global) DESCA( N_ ) The number of columns in the global
253 * array A.
254 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
255 * the rows of the array.
256 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
257 * the columns of the array.
258 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
259 * row of the array A is distributed.
260 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
261 * first column of the array A is
262 * distributed.
263 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
264 * array. LLD_A >= MAX(1,LOCr(M_A)).
265 *
266 * Let K be the number of rows or columns of a distributed matrix,
267 * and assume that its process grid has dimension p x q.
268 * LOCr( K ) denotes the number of elements of K that a process
269 * would receive if K were distributed over the p processes of its
270 * process column.
271 * Similarly, LOCc( K ) denotes the number of elements of K that a
272 * process would receive if K were distributed over the q processes of
273 * its process row.
274 * The values of LOCr() and LOCc() may be determined via a call to the
275 * ScaLAPACK tool function, NUMROC:
276 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
277 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
278 * An upper bound for these quantities may be computed by:
279 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
280 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
281 *
282 *
283 * One-dimensional descriptors:
284 *
285 * One-dimensional descriptors are a new addition to ScaLAPACK since
286 * version 1.0. They simplify and shorten the descriptor for 1D
287 * arrays.
288 *
289 * Since ScaLAPACK supports two-dimensional arrays as the fundamental
290 * object, we allow 1D arrays to be distributed either over the
291 * first dimension of the array (as if the grid were P-by-1) or the
292 * 2nd dimension (as if the grid were 1-by-P). This choice is
293 * indicated by the descriptor type (501 or 502)
294 * as described below.
295 * However, for tridiagonal matrices, since the objects being
296 * distributed are the individual vectors storing the diagonals, we
297 * have adopted the convention that both the P-by-1 descriptor and
298 * the 1-by-P descriptor are allowed and are equivalent for
299 * tridiagonal matrices. Thus, for tridiagonal matrices,
300 * DTYPE_A = 501 or 502 can be used interchangeably
301 * without any other change.
302 * We require that the distributed vectors storing the diagonals of a
303 * tridiagonal matrix be aligned with each other. Because of this, a
304 * single descriptor, DESCA, serves to describe the distribution of
305 * of all diagonals simultaneously.
306 *
307 * IMPORTANT NOTE: the actual BLACS grid represented by the
308 * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P
309 * irrespective of which one-dimensional descriptor type
310 * (501 or 502) is input.
311 * This routine will interpret the grid properly either way.
312 * ScaLAPACK routines *do not support intercontext operations* so that
313 * the grid passed to a single ScaLAPACK routine *must be the same*
314 * for all array descriptors passed to that routine.
315 *
316 * NOTE: In all cases where 1D descriptors are used, 2D descriptors
317 * may also be used, since a one-dimensional array is a special case
318 * of a two-dimensional array with one dimension of size unity.
319 * The two-dimensional array used in this case *must* be of the
320 * proper orientation:
321 * If the appropriate one-dimensional descriptor is DTYPEA=501
322 * (1 by P type), then the two dimensional descriptor must
323 * have a CTXT value that refers to a 1 by P BLACS grid;
324 * If the appropriate one-dimensional descriptor is DTYPEA=502
325 * (P by 1 type), then the two dimensional descriptor must
326 * have a CTXT value that refers to a P by 1 BLACS grid.
327 *
328 *
329 * Summary of allowed descriptors, types, and BLACS grids:
330 * DTYPE 501 502 1 1
331 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1
332 * -----------------------------------------------------
333 * A OK OK OK NO
334 * B NO OK NO OK
335 *
336 * Note that a consequence of this chart is that it is not possible
337 * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead
338 * to opposite requirements for the orientation of the BLACS grid,
339 * and as noted before, the *same* BLACS context must be used in
340 * all descriptors in a single ScaLAPACK subroutine call.
341 *
342 * Let A be a generic term for any 1D block cyclicly distributed array.
343 * Such a global array has an associated description vector DESCA.
344 * In the following comments, the character _ should be read as
345 * "of the global array".
346 *
347 * NOTATION STORED IN EXPLANATION
348 * --------------- ---------- ------------------------------------------
349 * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids,
350 * TYPE_A = 501: 1-by-P grid.
351 * TYPE_A = 502: P-by-1 grid.
352 * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating
353 * the BLACS process grid A is distribu-
354 * ted over. The context itself is glo-
355 * bal, but the handle (the integer
356 * value) may vary.
357 * N_A (global) DESCA( 3 ) The size of the array dimension being
358 * distributed.
359 * NB_A (global) DESCA( 4 ) The blocking factor used to distribute
360 * the distributed dimension of the array.
361 * SRC_A (global) DESCA( 5 ) The process row or column over which the
362 * first row or column of the array
363 * is distributed.
364 * Ignored DESCA( 6 ) Ignored for tridiagonal matrices.
365 * Reserved DESCA( 7 ) Reserved for future use.
366 *
367 *
368 *
369 * =====================================================================
370 *
371 * Code Developer: Andrew J. Cleary, University of Tennessee.
372 * Current address: Lawrence Livermore National Labs.
373 * This version released: August, 2001.
374 *
375 * =====================================================================
376 *
377 * ..
378 * .. Parameters ..
379  DOUBLE PRECISION ONE, ZERO
380  parameter( one = 1.0d+0 )
381  parameter( zero = 0.0d+0 )
382  COMPLEX*16 CONE, CZERO
383  parameter( cone = ( 1.0d+0, 0.0d+0 ) )
384  parameter( czero = ( 0.0d+0, 0.0d+0 ) )
385  INTEGER INT_ONE
386  parameter( int_one = 1 )
387  INTEGER DESCMULT, BIGNUM
388  parameter(descmult = 100, bignum = descmult * descmult)
389  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
390  $ lld_, mb_, m_, nb_, n_, rsrc_
391  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
392  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
393  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
394 * ..
395 * .. Local Scalars ..
396  INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
397  $ idum1, idum2, idum3, ja_new, level_dist, llda,
398  $ lldb, mycol, myrow, my_num_cols, nb, np, npcol,
399  $ nprow, np_save, odd_size, part_offset,
400  $ part_size, return_code, store_m_b, store_n_a,
401  $ temp, work_size_min
402 * ..
403 * .. Local Arrays ..
404  INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
405  $ param_check( 16, 3 )
406 * ..
407 * .. External Subroutines ..
408  EXTERNAL blacs_get, blacs_gridexit, blacs_gridinfo,
409  $ desc_convert, globchk, pxerbla, reshape, zgemm,
410  $ zgerv2d, zgesd2d, zlamov, zmatadd, ztbtrs,
411  $ ztrmm, ztrtrs
412 * ..
413 * .. External Functions ..
414  LOGICAL LSAME
415  INTEGER NUMROC
416  EXTERNAL lsame, numroc
417 * ..
418 * .. Intrinsic Functions ..
419  INTRINSIC ichar, min, mod
420 * ..
421 * .. Executable Statements ..
422 *
423 * Test the input parameters
424 *
425  info = 0
426 *
427 * Convert descriptor into standard form for easy access to
428 * parameters, check that grid is of right shape.
429 *
430  desca_1xp( 1 ) = 501
431  descb_px1( 1 ) = 502
432 *
433  temp = desca( dtype_ )
434  IF( temp .EQ. 502 ) THEN
435 * Temporarily set the descriptor type to 1xP type
436  desca( dtype_ ) = 501
437  ENDIF
438 *
439  CALL desc_convert( desca, desca_1xp, return_code )
440 *
441  desca( dtype_ ) = temp
442 *
443  IF( return_code .NE. 0) THEN
444  info = -( 8*100 + 2 )
445  ENDIF
446 *
447  CALL desc_convert( descb, descb_px1, return_code )
448 *
449  IF( return_code .NE. 0) THEN
450  info = -( 11*100 + 2 )
451  ENDIF
452 *
453 * Consistency checks for DESCA and DESCB.
454 *
455 * Context must be the same
456  IF( desca_1xp( 2 ) .NE. descb_px1( 2 ) ) THEN
457  info = -( 11*100 + 2 )
458  ENDIF
459 *
460 * These are alignment restrictions that may or may not be removed
461 * in future releases. -Andy Cleary, April 14, 1996.
462 *
463 * Block sizes must be the same
464  IF( desca_1xp( 4 ) .NE. descb_px1( 4 ) ) THEN
465  info = -( 11*100 + 4 )
466  ENDIF
467 *
468 * Source processor must be the same
469 *
470  IF( desca_1xp( 5 ) .NE. descb_px1( 5 ) ) THEN
471  info = -( 11*100 + 5 )
472  ENDIF
473 *
474 * Get values out of descriptor for use in code.
475 *
476  ictxt = desca_1xp( 2 )
477  csrc = desca_1xp( 5 )
478  nb = desca_1xp( 4 )
479  llda = desca_1xp( 6 )
480  store_n_a = desca_1xp( 3 )
481  lldb = descb_px1( 6 )
482  store_m_b = descb_px1( 3 )
483 *
484 * Get grid parameters
485 *
486 *
487  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
488  np = nprow * npcol
489 *
490 *
491 *
492  IF( lsame( uplo, 'U' ) ) THEN
493  idum1 = ichar( 'U' )
494  ELSE IF ( lsame( uplo, 'L' ) ) THEN
495  idum1 = ichar( 'L' )
496  ELSE
497  info = -1
498  END IF
499 *
500  IF( lsame( trans, 'N' ) ) THEN
501  idum2 = ichar( 'N' )
502  ELSE IF ( lsame( trans, 'C' ) ) THEN
503  idum2 = ichar( 'C' )
504  ELSE
505  info = -2
506  END IF
507 *
508  IF( lwork .LT. -1) THEN
509  info = -15
510  ELSE IF ( lwork .EQ. -1 ) THEN
511  idum3 = -1
512  ELSE
513  idum3 = 1
514  ENDIF
515 *
516  IF( n .LT. 0 ) THEN
517  info = -3
518  ENDIF
519 *
520  IF( n+ja-1 .GT. store_n_a ) THEN
521  info = -( 8*100 + 6 )
522  ENDIF
523 *
524  IF( n+ib-1 .GT. store_m_b ) THEN
525  info = -( 11*100 + 3 )
526  ENDIF
527 *
528  IF( lldb .LT. nb ) THEN
529  info = -( 11*100 + 6 )
530  ENDIF
531 *
532  IF( nrhs .LT. 0 ) THEN
533  info = -4
534  ENDIF
535 *
536 * Current alignment restriction
537 *
538  IF( ja .NE. ib) THEN
539  info = -7
540  ENDIF
541 *
542 * Argument checking that is specific to Divide & Conquer routine
543 *
544  IF( nprow .NE. 1 ) THEN
545  info = -( 8*100+2 )
546  ENDIF
547 *
548  IF( n .GT. np*nb-mod( ja-1, nb )) THEN
549  info = -( 3 )
550  CALL pxerbla( ictxt,
551  $ 'PZPTTRSV, D&C alg.: only 1 block per proc',
552  $ -info )
553  RETURN
554  ENDIF
555 *
556  IF((ja+n-1.GT.nb) .AND. ( nb.LT.2*int_one )) THEN
557  info = -( 8*100+4 )
558  CALL pxerbla( ictxt,
559  $ 'PZPTTRSV, D&C alg.: NB too small',
560  $ -info )
561  RETURN
562  ENDIF
563 *
564 *
565  work_size_min =
566  $ int_one*nrhs
567 *
568  work( 1 ) = work_size_min
569 *
570  IF( lwork .LT. work_size_min ) THEN
571  IF( lwork .NE. -1 ) THEN
572  info = -15
573  CALL pxerbla( ictxt,
574  $ 'PZPTTRSV: worksize error',
575  $ -info )
576  ENDIF
577  RETURN
578  ENDIF
579 *
580 * Pack params and positions into arrays for global consistency check
581 *
582  param_check( 16, 1 ) = descb(5)
583  param_check( 15, 1 ) = descb(4)
584  param_check( 14, 1 ) = descb(3)
585  param_check( 13, 1 ) = descb(2)
586  param_check( 12, 1 ) = descb(1)
587  param_check( 11, 1 ) = ib
588  param_check( 10, 1 ) = desca(5)
589  param_check( 9, 1 ) = desca(4)
590  param_check( 8, 1 ) = desca(3)
591  param_check( 7, 1 ) = desca(1)
592  param_check( 6, 1 ) = ja
593  param_check( 5, 1 ) = nrhs
594  param_check( 4, 1 ) = n
595  param_check( 3, 1 ) = idum3
596  param_check( 2, 1 ) = idum2
597  param_check( 1, 1 ) = idum1
598 *
599  param_check( 16, 2 ) = 1105
600  param_check( 15, 2 ) = 1104
601  param_check( 14, 2 ) = 1103
602  param_check( 13, 2 ) = 1102
603  param_check( 12, 2 ) = 1101
604  param_check( 11, 2 ) = 10
605  param_check( 10, 2 ) = 805
606  param_check( 9, 2 ) = 804
607  param_check( 8, 2 ) = 803
608  param_check( 7, 2 ) = 801
609  param_check( 6, 2 ) = 7
610  param_check( 5, 2 ) = 4
611  param_check( 4, 2 ) = 3
612  param_check( 3, 2 ) = 14
613  param_check( 2, 2 ) = 2
614  param_check( 1, 2 ) = 1
615 *
616 * Want to find errors with MIN( ), so if no error, set it to a big
617 * number. If there already is an error, multiply by the the
618 * descriptor multiplier.
619 *
620  IF( info.GE.0 ) THEN
621  info = bignum
622  ELSE IF( info.LT.-descmult ) THEN
623  info = -info
624  ELSE
625  info = -info * descmult
626  END IF
627 *
628 * Check consistency across processors
629 *
630  CALL globchk( ictxt, 16, param_check, 16,
631  $ param_check( 1, 3 ), info )
632 *
633 * Prepare output: set info = 0 if no error, and divide by DESCMULT
634 * if error is not in a descriptor entry.
635 *
636  IF( info.EQ.bignum ) THEN
637  info = 0
638  ELSE IF( mod( info, descmult ) .EQ. 0 ) THEN
639  info = -info / descmult
640  ELSE
641  info = -info
642  END IF
643 *
644  IF( info.LT.0 ) THEN
645  CALL pxerbla( ictxt, 'PZPTTRSV', -info )
646  RETURN
647  END IF
648 *
649 * Quick return if possible
650 *
651  IF( n.EQ.0 )
652  $ RETURN
653 *
654  IF( nrhs.EQ.0 )
655  $ RETURN
656 *
657 *
658 * Adjust addressing into matrix space to properly get into
659 * the beginning part of the relevant data
660 *
661  part_offset = nb*( (ja-1)/(npcol*nb) )
662 *
663  IF ( (mycol-csrc) .LT. (ja-part_offset-1)/nb ) THEN
664  part_offset = part_offset + nb
665  ENDIF
666 *
667  IF ( mycol .LT. csrc ) THEN
668  part_offset = part_offset - nb
669  ENDIF
670 *
671 * Form a new BLACS grid (the "standard form" grid) with only procs
672 * holding part of the matrix, of size 1xNP where NP is adjusted,
673 * starting at csrc=0, with JA modified to reflect dropped procs.
674 *
675 * First processor to hold part of the matrix:
676 *
677  first_proc = mod( ( ja-1 )/nb+csrc, npcol )
678 *
679 * Calculate new JA one while dropping off unused processors.
680 *
681  ja_new = mod( ja-1, nb ) + 1
682 *
683 * Save and compute new value of NP
684 *
685  np_save = np
686  np = ( ja_new+n-2 )/nb + 1
687 *
688 * Call utility routine that forms "standard-form" grid
689 *
690  CALL reshape( ictxt, int_one, ictxt_new, int_one,
691  $ first_proc, int_one, np )
692 *
693 * Use new context from standard grid as context.
694 *
695  ictxt_save = ictxt
696  ictxt = ictxt_new
697  desca_1xp( 2 ) = ictxt_new
698  descb_px1( 2 ) = ictxt_new
699 *
700 * Get information about new grid.
701 *
702  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
703 *
704 * Drop out processors that do not have part of the matrix.
705 *
706  IF( myrow .LT. 0 ) THEN
707  GOTO 1234
708  ENDIF
709 *
710 * ********************************
711 * Values reused throughout routine
712 *
713 * User-input value of partition size
714 *
715  part_size = nb
716 *
717 * Number of columns in each processor
718 *
719  my_num_cols = numroc( n, part_size, mycol, 0, npcol )
720 *
721 * Offset in columns to beginning of main partition in each proc
722 *
723  IF ( mycol .EQ. 0 ) THEN
724  part_offset = part_offset+mod( ja_new-1, part_size )
725  my_num_cols = my_num_cols - mod(ja_new-1, part_size )
726  ENDIF
727 *
728 * Size of main (or odd) partition in each processor
729 *
730  odd_size = my_num_cols
731  IF ( mycol .LT. np-1 ) THEN
732  odd_size = odd_size - int_one
733  ENDIF
734 *
735 *
736 *
737 * Begin main code
738 *
739  IF ( lsame( uplo, 'L' ) ) THEN
740 *
741  IF ( lsame( trans, 'N' ) ) THEN
742 *
743 * Frontsolve
744 *
745 *
746 ******************************************
747 * Local computation phase
748 ******************************************
749 *
750 * Use main partition in each processor to solve locally
751 *
752  CALL zpttrsv( uplo, 'N', odd_size, nrhs, d( part_offset+1 ),
753  $ e( part_offset+1 ), b( part_offset+1 ), lldb,
754  $ info )
755 *
756 *
757  IF ( mycol .LT. np-1 ) THEN
758 * Use factorization of odd-even connection block to modify
759 * locally stored portion of right hand side(s)
760 *
761  CALL zaxpy( nrhs, -e( part_offset+odd_size ),
762  $ b( part_offset+odd_size ), lldb,
763  $ b( part_offset+odd_size+1 ), lldb )
764 *
765  ENDIF
766 *
767 *
768  IF ( mycol .NE. 0 ) THEN
769 * Use the "spike" fillin to calculate contribution to previous
770 * processor's righthand-side.
771 *
772  CALL zgemm( 'C', 'N', 1, nrhs, odd_size, -cone, af( 1 ),
773  $ odd_size, b( part_offset+1 ), lldb, czero,
774  $ work( 1+int_one-1 ), int_one )
775  ENDIF
776 *
777 *
778 ************************************************
779 * Formation and solution of reduced system
780 ************************************************
781 *
782 *
783 * Send modifications to prior processor's right hand sides
784 *
785  IF( mycol .GT. 0) THEN
786 *
787  CALL zgesd2d( ictxt, int_one, nrhs,
788  $ work( 1 ), int_one,
789  $ 0, mycol - 1 )
790 *
791  ENDIF
792 *
793 * Receive modifications to processor's right hand sides
794 *
795  IF( mycol .LT. npcol-1) THEN
796 *
797  CALL zgerv2d( ictxt, int_one, nrhs,
798  $ work( 1 ), int_one,
799  $ 0, mycol + 1 )
800 *
801 * Combine contribution to locally stored right hand sides
802 *
803  CALL zmatadd( int_one, nrhs, cone,
804  $ work( 1 ), int_one, cone,
805  $ b( part_offset+odd_size + 1 ), lldb )
806 *
807  ENDIF
808 *
809 *
810 * The last processor does not participate in the solution of the
811 * reduced system, having sent its contribution already.
812  IF( mycol .EQ. npcol-1 ) THEN
813  GOTO 14
814  ENDIF
815 *
816 *
817 * *************************************
818 * Modification Loop
819 *
820 * The distance for sending and receiving for each level starts
821 * at 1 for the first level.
822  level_dist = 1
823 *
824 * Do until this proc is needed to modify other procs' equations
825 *
826  12 CONTINUE
827  IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 11
828 *
829 * Receive and add contribution to righthand sides from left
830 *
831  IF( mycol-level_dist .GE. 0 ) THEN
832 *
833  CALL zgerv2d( ictxt, int_one, nrhs,
834  $ work( 1 ),
835  $ int_one, 0, mycol-level_dist )
836 *
837  CALL zmatadd( int_one, nrhs, cone,
838  $ work( 1 ), int_one, cone,
839  $ b( part_offset+odd_size + 1 ), lldb )
840 *
841  ENDIF
842 *
843 * Receive and add contribution to righthand sides from right
844 *
845  IF( mycol+level_dist .LT. npcol-1 ) THEN
846 *
847  CALL zgerv2d( ictxt, int_one, nrhs,
848  $ work( 1 ),
849  $ int_one, 0, mycol+level_dist )
850 *
851  CALL zmatadd( int_one, nrhs, cone,
852  $ work( 1 ), int_one, cone,
853  $ b( part_offset+odd_size + 1 ), lldb )
854 *
855  ENDIF
856 *
857  level_dist = level_dist*2
858 *
859  GOTO 12
860  11 CONTINUE
861 * [End of GOTO Loop]
862 *
863 *
864 *
865 * *********************************
866 * Calculate and use this proc's blocks to modify other procs
867 *
868 * Solve with diagonal block
869 *
870  CALL ztrtrs( 'L', 'N', 'U', int_one, nrhs, af( odd_size+2 ),
871  $ int_one, b( part_offset+odd_size+1 ), lldb, info )
872 *
873  IF( info.NE.0 ) THEN
874  GO TO 1000
875  ENDIF
876 *
877 *
878 *
879 * *********
880  IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
881 *
882 * Calculate contribution from this block to next diagonal block
883 *
884  CALL zgemm( 'C', 'N', int_one, nrhs, int_one, -cone,
885  $ af( (odd_size)*1+1 ),
886  $ int_one,
887  $ b( part_offset+odd_size+1 ),
888  $ lldb, czero,
889  $ work( 1 ),
890  $ int_one )
891 *
892 * Send contribution to diagonal block's owning processor.
893 *
894  CALL zgesd2d( ictxt, int_one, nrhs,
895  $ work( 1 ),
896  $ int_one, 0, mycol+level_dist )
897 *
898  ENDIF
899 * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
900 *
901 * ************
902  IF( (mycol/level_dist .GT. 0 ).AND.
903  $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
904 *
905 *
906 * Use offdiagonal block to calculate modification to diag block
907 * of processor to the left
908 *
909  CALL zgemm( 'N', 'N', int_one, nrhs, int_one, -cone,
910  $ af( odd_size*1+2+1 ),
911  $ int_one,
912  $ b( part_offset+odd_size+1 ),
913  $ lldb, czero,
914  $ work( 1 ),
915  $ int_one )
916 *
917 * Send contribution to diagonal block's owning processor.
918 *
919  CALL zgesd2d( ictxt, int_one, nrhs,
920  $ work( 1 ),
921  $ int_one, 0, mycol-level_dist )
922 *
923  ENDIF
924 * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
925 *
926  14 CONTINUE
927 *
928  ELSE
929 *
930 ******************** BACKSOLVE *************************************
931 *
932 ********************************************************************
933 * .. Begin reduced system phase of algorithm ..
934 ********************************************************************
935 *
936 *
937 *
938 * The last processor does not participate in the solution of the
939 * reduced system and just waits to receive its solution.
940  IF( mycol .EQ. npcol-1 ) THEN
941  GOTO 24
942  ENDIF
943 *
944 * Determine number of steps in tree loop
945 *
946  level_dist = 1
947  27 CONTINUE
948  IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 26
949 *
950  level_dist = level_dist*2
951 *
952  GOTO 27
953  26 CONTINUE
954 *
955 *
956  IF( (mycol/level_dist .GT. 0 ).AND.
957  $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
958 *
959 * Receive solution from processor to left
960 *
961  CALL zgerv2d( ictxt, int_one, nrhs,
962  $ work( 1 ),
963  $ int_one, 0, mycol-level_dist )
964 *
965 *
966 * Use offdiagonal block to calculate modification to RHS stored
967 * on this processor
968 *
969  CALL zgemm( 'C', 'N', int_one, nrhs, int_one, -cone,
970  $ af( odd_size*1+2+1 ),
971  $ int_one,
972  $ work( 1 ),
973  $ int_one, cone,
974  $ b( part_offset+odd_size+1 ),
975  $ lldb )
976  ENDIF
977 * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
978 *
979 *
980  IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
981 *
982 * Receive solution from processor to right
983 *
984  CALL zgerv2d( ictxt, int_one, nrhs,
985  $ work( 1 ),
986  $ int_one, 0, mycol+level_dist )
987 *
988 * Calculate contribution from this block to next diagonal block
989 *
990  CALL zgemm( 'N', 'N', int_one, nrhs, int_one, -cone,
991  $ af( (odd_size)*1+1 ),
992  $ int_one,
993  $ work( 1 ),
994  $ int_one, cone,
995  $ b( part_offset+odd_size+1 ),
996  $ lldb )
997 *
998  ENDIF
999 * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
1000 *
1001 *
1002 * Solve with diagonal block
1003 *
1004  CALL ztrtrs( 'L', 'C', 'U', int_one, nrhs, af( odd_size+2 ),
1005  $ int_one, b( part_offset+odd_size+1 ), lldb, info )
1006 *
1007  IF( info.NE.0 ) THEN
1008  GO TO 1000
1009  ENDIF
1010 *
1011 *
1012 *
1013 ***Modification Loop *******
1014 *
1015  22 CONTINUE
1016  IF( level_dist .EQ. 1 ) GOTO 21
1017 *
1018  level_dist = level_dist/2
1019 *
1020 * Send solution to the right
1021 *
1022  IF( mycol+level_dist .LT. npcol-1 ) THEN
1023 *
1024  CALL zgesd2d( ictxt, int_one, nrhs,
1025  $ b( part_offset+odd_size+1 ),
1026  $ lldb, 0, mycol+level_dist )
1027 *
1028  ENDIF
1029 *
1030 * Send solution to left
1031 *
1032  IF( mycol-level_dist .GE. 0 ) THEN
1033 *
1034  CALL zgesd2d( ictxt, int_one, nrhs,
1035  $ b( part_offset+odd_size+1 ),
1036  $ lldb, 0, mycol-level_dist )
1037 *
1038  ENDIF
1039 *
1040  GOTO 22
1041  21 CONTINUE
1042 * [End of GOTO Loop]
1043 *
1044  24 CONTINUE
1045 * [Processor npcol - 1 jumped to here to await next stage]
1046 *
1047 *******************************
1048 * Reduced system has been solved, communicate solutions to nearest
1049 * neighbors in preparation for local computation phase.
1050 *
1051 *
1052 * Send elements of solution to next proc
1053 *
1054  IF( mycol .LT. npcol-1) THEN
1055 *
1056  CALL zgesd2d( ictxt, int_one, nrhs,
1057  $ b( part_offset+odd_size+1 ), lldb,
1058  $ 0, mycol +1 )
1059 *
1060  ENDIF
1061 *
1062 * Receive modifications to processor's right hand sides
1063 *
1064  IF( mycol .GT. 0) THEN
1065 *
1066  CALL zgerv2d( ictxt, int_one, nrhs,
1067  $ work( 1 ), int_one,
1068  $ 0, mycol - 1 )
1069 *
1070  ENDIF
1071 *
1072 *
1073 *
1074 **********************************************
1075 * Local computation phase
1076 **********************************************
1077 *
1078  IF ( mycol .NE. 0 ) THEN
1079 * Use the "spike" fillin to calculate contribution from previous
1080 * processor's solution.
1081 *
1082  CALL zgemm( 'N', 'N', odd_size, nrhs, 1, -cone, af( 1 ),
1083  $ odd_size, work( 1+int_one-1 ), int_one, cone,
1084  $ b( part_offset+1 ), lldb )
1085 *
1086  ENDIF
1087 *
1088 *
1089  IF ( mycol .LT. np-1 ) THEN
1090 * Use factorization of odd-even connection block to modify
1091 * locally stored portion of right hand side(s)
1092 *
1093  CALL zaxpy( nrhs, -dconjg( e( part_offset+odd_size ) ),
1094  $ b( part_offset+odd_size+1 ), lldb,
1095  $ b( part_offset+odd_size ), lldb )
1096 *
1097  ENDIF
1098 *
1099 * Use main partition in each processor to solve locally
1100 *
1101  CALL zpttrsv( uplo, 'C', odd_size, nrhs, d( part_offset+1 ),
1102  $ e( part_offset+1 ), b( part_offset+1 ), lldb,
1103  $ info )
1104 *
1105  ENDIF
1106 * End of "IF( LSAME( TRANS, 'N' ) )"...
1107 *
1108 *
1109  ELSE
1110 ***************************************************************
1111 * CASE UPLO = 'U' *
1112 ***************************************************************
1113  IF ( lsame( trans, 'C' ) ) THEN
1114 *
1115 * Frontsolve
1116 *
1117 *
1118 ******************************************
1119 * Local computation phase
1120 ******************************************
1121 *
1122 * Use main partition in each processor to solve locally
1123 *
1124  CALL zpttrsv( uplo, 'C', odd_size, nrhs, d( part_offset+1 ),
1125  $ e( part_offset+1 ), b( part_offset+1 ), lldb,
1126  $ info )
1127 *
1128 *
1129  IF ( mycol .LT. np-1 ) THEN
1130 * Use factorization of odd-even connection block to modify
1131 * locally stored portion of right hand side(s)
1132 *
1133  CALL zaxpy( nrhs, -dconjg( e( part_offset+odd_size ) ),
1134  $ b( part_offset+odd_size ), lldb,
1135  $ b( part_offset+odd_size+1 ), lldb )
1136 *
1137  ENDIF
1138 *
1139 *
1140  IF ( mycol .NE. 0 ) THEN
1141 * Use the "spike" fillin to calculate contribution to previous
1142 * processor's righthand-side.
1143 *
1144  CALL zgemm( 'T', 'N', 1, nrhs, odd_size, -cone, af( 1 ),
1145  $ odd_size, b( part_offset+1 ), lldb, czero,
1146  $ work( 1+int_one-1 ), int_one )
1147  ENDIF
1148 *
1149 *
1150 ************************************************
1151 * Formation and solution of reduced system
1152 ************************************************
1153 *
1154 *
1155 * Send modifications to prior processor's right hand sides
1156 *
1157  IF( mycol .GT. 0) THEN
1158 *
1159  CALL zgesd2d( ictxt, int_one, nrhs,
1160  $ work( 1 ), int_one,
1161  $ 0, mycol - 1 )
1162 *
1163  ENDIF
1164 *
1165 * Receive modifications to processor's right hand sides
1166 *
1167  IF( mycol .LT. npcol-1) THEN
1168 *
1169  CALL zgerv2d( ictxt, int_one, nrhs,
1170  $ work( 1 ), int_one,
1171  $ 0, mycol + 1 )
1172 *
1173 * Combine contribution to locally stored right hand sides
1174 *
1175  CALL zmatadd( int_one, nrhs, cone,
1176  $ work( 1 ), int_one, cone,
1177  $ b( part_offset+odd_size + 1 ), lldb )
1178 *
1179  ENDIF
1180 *
1181 *
1182 * The last processor does not participate in the solution of the
1183 * reduced system, having sent its contribution already.
1184  IF( mycol .EQ. npcol-1 ) THEN
1185  GOTO 44
1186  ENDIF
1187 *
1188 *
1189 * *************************************
1190 * Modification Loop
1191 *
1192 * The distance for sending and receiving for each level starts
1193 * at 1 for the first level.
1194  level_dist = 1
1195 *
1196 * Do until this proc is needed to modify other procs' equations
1197 *
1198  42 CONTINUE
1199  IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 41
1200 *
1201 * Receive and add contribution to righthand sides from left
1202 *
1203  IF( mycol-level_dist .GE. 0 ) THEN
1204 *
1205  CALL zgerv2d( ictxt, int_one, nrhs,
1206  $ work( 1 ),
1207  $ int_one, 0, mycol-level_dist )
1208 *
1209  CALL zmatadd( int_one, nrhs, cone,
1210  $ work( 1 ), int_one, cone,
1211  $ b( part_offset+odd_size + 1 ), lldb )
1212 *
1213  ENDIF
1214 *
1215 * Receive and add contribution to righthand sides from right
1216 *
1217  IF( mycol+level_dist .LT. npcol-1 ) THEN
1218 *
1219  CALL zgerv2d( ictxt, int_one, nrhs,
1220  $ work( 1 ),
1221  $ int_one, 0, mycol+level_dist )
1222 *
1223  CALL zmatadd( int_one, nrhs, cone,
1224  $ work( 1 ), int_one, cone,
1225  $ b( part_offset+odd_size + 1 ), lldb )
1226 *
1227  ENDIF
1228 *
1229  level_dist = level_dist*2
1230 *
1231  GOTO 42
1232  41 CONTINUE
1233 * [End of GOTO Loop]
1234 *
1235 *
1236 *
1237 * *********************************
1238 * Calculate and use this proc's blocks to modify other procs
1239 *
1240 * Solve with diagonal block
1241 *
1242  CALL ztrtrs( 'L', 'N', 'U', int_one, nrhs, af( odd_size+2 ),
1243  $ int_one, b( part_offset+odd_size+1 ), lldb, info )
1244 *
1245  IF( info.NE.0 ) THEN
1246  GO TO 1000
1247  ENDIF
1248 *
1249 *
1250 *
1251 * *********
1252  IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
1253 *
1254 * Calculate contribution from this block to next diagonal block
1255 *
1256  CALL zgemm( 'T', 'N', int_one, nrhs, int_one, -cone,
1257  $ af( (odd_size)*1+1 ),
1258  $ int_one,
1259  $ b( part_offset+odd_size+1 ),
1260  $ lldb, czero,
1261  $ work( 1 ),
1262  $ int_one )
1263 *
1264 * Send contribution to diagonal block's owning processor.
1265 *
1266  CALL zgesd2d( ictxt, int_one, nrhs,
1267  $ work( 1 ),
1268  $ int_one, 0, mycol+level_dist )
1269 *
1270  ENDIF
1271 * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
1272 *
1273 * ************
1274  IF( (mycol/level_dist .GT. 0 ).AND.
1275  $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
1276 *
1277 *
1278 * Use offdiagonal block to calculate modification to diag block
1279 * of processor to the left
1280 *
1281  CALL zgemm( 'C', 'N', int_one, nrhs, int_one, -cone,
1282  $ af( odd_size*1+2+1 ),
1283  $ int_one,
1284  $ b( part_offset+odd_size+1 ),
1285  $ lldb, czero,
1286  $ work( 1 ),
1287  $ int_one )
1288 *
1289 * Send contribution to diagonal block's owning processor.
1290 *
1291  CALL zgesd2d( ictxt, int_one, nrhs,
1292  $ work( 1 ),
1293  $ int_one, 0, mycol-level_dist )
1294 *
1295  ENDIF
1296 * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
1297 *
1298  44 CONTINUE
1299 *
1300  ELSE
1301 *
1302 ******************** BACKSOLVE *************************************
1303 *
1304 ********************************************************************
1305 * .. Begin reduced system phase of algorithm ..
1306 ********************************************************************
1307 *
1308 *
1309 *
1310 * The last processor does not participate in the solution of the
1311 * reduced system and just waits to receive its solution.
1312  IF( mycol .EQ. npcol-1 ) THEN
1313  GOTO 54
1314  ENDIF
1315 *
1316 * Determine number of steps in tree loop
1317 *
1318  level_dist = 1
1319  57 CONTINUE
1320  IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 56
1321 *
1322  level_dist = level_dist*2
1323 *
1324  GOTO 57
1325  56 CONTINUE
1326 *
1327 *
1328  IF( (mycol/level_dist .GT. 0 ).AND.
1329  $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
1330 *
1331 * Receive solution from processor to left
1332 *
1333  CALL zgerv2d( ictxt, int_one, nrhs,
1334  $ work( 1 ),
1335  $ int_one, 0, mycol-level_dist )
1336 *
1337 *
1338 * Use offdiagonal block to calculate modification to RHS stored
1339 * on this processor
1340 *
1341  CALL zgemm( 'T', 'N', int_one, nrhs, int_one, -cone,
1342  $ af( odd_size*1+2+1 ),
1343  $ int_one,
1344  $ work( 1 ),
1345  $ int_one, cone,
1346  $ b( part_offset+odd_size+1 ),
1347  $ lldb )
1348  ENDIF
1349 * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
1350 *
1351 *
1352  IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
1353 *
1354 * Receive solution from processor to right
1355 *
1356  CALL zgerv2d( ictxt, int_one, nrhs,
1357  $ work( 1 ),
1358  $ int_one, 0, mycol+level_dist )
1359 *
1360 * Calculate contribution from this block to next diagonal block
1361 *
1362  CALL zgemm( 'C', 'N', int_one, nrhs, int_one, -cone,
1363  $ af( (odd_size)*1+1 ),
1364  $ int_one,
1365  $ work( 1 ),
1366  $ int_one, cone,
1367  $ b( part_offset+odd_size+1 ),
1368  $ lldb )
1369 *
1370  ENDIF
1371 * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
1372 *
1373 *
1374 * Solve with diagonal block
1375 *
1376  CALL ztrtrs( 'L', 'C', 'U', int_one, nrhs, af( odd_size+2 ),
1377  $ int_one, b( part_offset+odd_size+1 ), lldb, info )
1378 *
1379  IF( info.NE.0 ) THEN
1380  GO TO 1000
1381  ENDIF
1382 *
1383 *
1384 *
1385 ***Modification Loop *******
1386 *
1387  52 CONTINUE
1388  IF( level_dist .EQ. 1 ) GOTO 51
1389 *
1390  level_dist = level_dist/2
1391 *
1392 * Send solution to the right
1393 *
1394  IF( mycol+level_dist .LT. npcol-1 ) THEN
1395 *
1396  CALL zgesd2d( ictxt, int_one, nrhs,
1397  $ b( part_offset+odd_size+1 ),
1398  $ lldb, 0, mycol+level_dist )
1399 *
1400  ENDIF
1401 *
1402 * Send solution to left
1403 *
1404  IF( mycol-level_dist .GE. 0 ) THEN
1405 *
1406  CALL zgesd2d( ictxt, int_one, nrhs,
1407  $ b( part_offset+odd_size+1 ),
1408  $ lldb, 0, mycol-level_dist )
1409 *
1410  ENDIF
1411 *
1412  GOTO 52
1413  51 CONTINUE
1414 * [End of GOTO Loop]
1415 *
1416  54 CONTINUE
1417 * [Processor npcol - 1 jumped to here to await next stage]
1418 *
1419 *******************************
1420 * Reduced system has been solved, communicate solutions to nearest
1421 * neighbors in preparation for local computation phase.
1422 *
1423 *
1424 * Send elements of solution to next proc
1425 *
1426  IF( mycol .LT. npcol-1) THEN
1427 *
1428  CALL zgesd2d( ictxt, int_one, nrhs,
1429  $ b( part_offset+odd_size+1 ), lldb,
1430  $ 0, mycol +1 )
1431 *
1432  ENDIF
1433 *
1434 * Receive modifications to processor's right hand sides
1435 *
1436  IF( mycol .GT. 0) THEN
1437 *
1438  CALL zgerv2d( ictxt, int_one, nrhs,
1439  $ work( 1 ), int_one,
1440  $ 0, mycol - 1 )
1441 *
1442  ENDIF
1443 *
1444 *
1445 *
1446 **********************************************
1447 * Local computation phase
1448 **********************************************
1449 *
1450  IF ( mycol .NE. 0 ) THEN
1451 * Use the "spike" fillin to calculate contribution from previous
1452 * processor's solution.
1453 *
1454  CALL zgemm( 'C', 'N', odd_size, nrhs, 1, -cone, af( 1 ),
1455  $ int_one, work( 1 ), int_one, cone,
1456  $ b( part_offset+1 ), lldb )
1457 *
1458  ENDIF
1459 *
1460 *
1461  IF ( mycol .LT. np-1 ) THEN
1462 * Use factorization of odd-even connection block to modify
1463 * locally stored portion of right hand side(s)
1464 *
1465  CALL zaxpy( nrhs, -e( part_offset+odd_size ),
1466  $ b( part_offset+odd_size+1 ), lldb,
1467  $ b( part_offset+odd_size ), lldb )
1468 *
1469  ENDIF
1470 *
1471 * Use main partition in each processor to solve locally
1472 *
1473  CALL zpttrsv( uplo, 'N', odd_size, nrhs, d( part_offset+1 ),
1474  $ e( part_offset+1 ), b( part_offset+1 ), lldb,
1475  $ info )
1476 *
1477  ENDIF
1478 * End of "IF( LSAME( TRANS, 'N' ) )"...
1479 *
1480 *
1481  ENDIF
1482 * End of "IF( LSAME( UPLO, 'L' ) )"...
1483  1000 CONTINUE
1484 *
1485 *
1486 * Free BLACS space used to hold standard-form grid.
1487 *
1488  IF( ictxt_save .NE. ictxt_new ) THEN
1489  CALL blacs_gridexit( ictxt_new )
1490  ENDIF
1491 *
1492  1234 CONTINUE
1493 *
1494 * Restore saved input parameters
1495 *
1496  ictxt = ictxt_save
1497  np = np_save
1498 *
1499 * Output minimum worksize
1500 *
1501  work( 1 ) = work_size_min
1502 *
1503 *
1504  RETURN
1505 *
1506 * End of PZPTTRSV
1507 *
1508  END
globchk
subroutine globchk(ICTXT, N, X, LDX, IWORK, INFO)
Definition: pchkxmat.f:403
zpttrsv
subroutine zpttrsv(UPLO, TRANS, N, NRHS, D, E, B, LDB, INFO)
Definition: zpttrsv.f:3
pzpttrsv
subroutine pzpttrsv(UPLO, TRANS, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, LAF, WORK, LWORK, INFO)
Definition: pzpttrsv.f:3
zmatadd
subroutine zmatadd(M, N, ALPHA, A, LDA, BETA, C, LDC)
Definition: zmatadd.f:2
reshape
void reshape(int *context_in, int *major_in, int *context_out, int *major_out, int *first_proc, int *nprow_new, int *npcol_new)
Definition: reshape.c:77
desc_convert
subroutine desc_convert(DESC_IN, DESC_OUT, INFO)
Definition: desc_convert.f:2
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
min
#define min(A, B)
Definition: pcgemr.c:181