ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pddttrs.f
Go to the documentation of this file.
1  SUBROUTINE pddttrs( TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB,
2  $ DESCB, AF, LAF, WORK, LWORK, INFO )
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * April 3, 2000
8 *
9 * .. Scalar Arguments ..
10  CHARACTER TRANS
11  INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCA( * ), DESCB( * )
15  DOUBLE PRECISION AF( * ), B( * ), D( * ), DL( * ), DU( * ),
16  $ work( * )
17 * ..
18 *
19 *
20 * Purpose
21 * =======
22 *
23 * PDDTTRS solves a system of linear equations
24 *
25 * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS)
26 * or
27 * A(1:N, JA:JA+N-1)' * X = B(IB:IB+N-1, 1:NRHS)
28 *
29 * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors
30 * stored in A(1:N,JA:JA+N-1) and AF by PDDTTRF.
31 * A(1:N, JA:JA+N-1) is an N-by-N real
32 * tridiagonal diagonally dominant-like distributed
33 * matrix.
34 *
35 * Routine PDDTTRF MUST be called first.
36 *
37 * =====================================================================
38 *
39 * Arguments
40 * =========
41 *
42 *
43 * TRANS (global input) CHARACTER
44 * = 'N': Solve with A(1:N, JA:JA+N-1);
45 * = 'T' or 'C': Solve with A(1:N, JA:JA+N-1)^T;
46 *
47 * N (global input) INTEGER
48 * The number of rows and columns to be operated on, i.e. the
49 * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0.
50 *
51 * NRHS (global input) INTEGER
52 * The number of right hand sides, i.e., the number of columns
53 * of the distributed submatrix B(IB:IB+N-1, 1:NRHS).
54 * NRHS >= 0.
55 *
56 * DL (local input/local output) DOUBLE PRECISION pointer to local
57 * part of global vector storing the lower diagonal of the
58 * matrix. Globally, DL(1) is not referenced, and DL must be
59 * aligned with D.
60 * Must be of size >= DESCA( NB_ ).
61 * On exit, this array contains information containing the
62 * factors of the matrix.
63 *
64 * D (local input/local output) DOUBLE PRECISION pointer to local
65 * part of global vector storing the main diagonal of the
66 * matrix.
67 * On exit, this array contains information containing the
68 * factors of the matrix.
69 * Must be of size >= DESCA( NB_ ).
70 *
71 * DU (local input/local output) DOUBLE PRECISION pointer to local
72 * part of global vector storing the upper diagonal of the
73 * matrix. Globally, DU(n) is not referenced, and DU must be
74 * aligned with D.
75 * On exit, this array contains information containing the
76 * factors of the matrix.
77 * Must be of size >= DESCA( NB_ ).
78 *
79 * JA (global input) INTEGER
80 * The index in the global array A that points to the start of
81 * the matrix to be operated on (which may be either all of A
82 * or a submatrix of A).
83 *
84 * DESCA (global and local input) INTEGER array of dimension DLEN.
85 * if 1D type (DTYPE_A=501 or 502), DLEN >= 7;
86 * if 2D type (DTYPE_A=1), DLEN >= 9.
87 * The array descriptor for the distributed matrix A.
88 * Contains information of mapping of A to memory. Please
89 * see NOTES below for full description and options.
90 *
91 * B (local input/local output) DOUBLE PRECISION pointer into
92 * local memory to an array of local lead dimension lld_b>=NB.
93 * On entry, this array contains the
94 * the local pieces of the right hand sides
95 * B(IB:IB+N-1, 1:NRHS).
96 * On exit, this contains the local piece of the solutions
97 * distributed matrix X.
98 *
99 * IB (global input) INTEGER
100 * The row index in the global array B that points to the first
101 * row of the matrix to be operated on (which may be either
102 * all of B or a submatrix of B).
103 *
104 * DESCB (global and local input) INTEGER array of dimension DLEN.
105 * if 1D type (DTYPE_B=502), DLEN >=7;
106 * if 2D type (DTYPE_B=1), DLEN >= 9.
107 * The array descriptor for the distributed matrix B.
108 * Contains information of mapping of B to memory. Please
109 * see NOTES below for full description and options.
110 *
111 * AF (local output) DOUBLE PRECISION array, dimension LAF.
112 * Auxiliary Fillin Space.
113 * Fillin is created during the factorization routine
114 * PDDTTRF and this is stored in AF. If a linear system
115 * is to be solved using PDDTTRS after the factorization
116 * routine, AF *must not be altered* after the factorization.
117 *
118 * LAF (local input) INTEGER
119 * Size of user-input Auxiliary Fillin space AF. Must be >=
120 * 2*(NB+2)
121 * If LAF is not large enough, an error code will be returned
122 * and the minimum acceptable size will be returned in AF( 1 )
123 *
124 * WORK (local workspace/local output)
125 * DOUBLE PRECISION temporary workspace. This space may
126 * be overwritten in between calls to routines. WORK must be
127 * the size given in LWORK.
128 * On exit, WORK( 1 ) contains the minimal LWORK.
129 *
130 * LWORK (local input or global input) INTEGER
131 * Size of user-input workspace WORK.
132 * If LWORK is too small, the minimal acceptable size will be
133 * returned in WORK(1) and an error code is returned. LWORK>=
134 * 10*NPCOL+4*NRHS
135 *
136 * INFO (local output) INTEGER
137 * = 0: successful exit
138 * < 0: If the i-th argument is an array and the j-entry had
139 * an illegal value, then INFO = -(i*100+j), if the i-th
140 * argument is a scalar and had an illegal value, then
141 * INFO = -i.
142 *
143 * =====================================================================
144 *
145 *
146 * Restrictions
147 * ============
148 *
149 * The following are restrictions on the input parameters. Some of these
150 * are temporary and will be removed in future releases, while others
151 * may reflect fundamental technical limitations.
152 *
153 * Non-cyclic restriction: VERY IMPORTANT!
154 * P*NB>= mod(JA-1,NB)+N.
155 * The mapping for matrices must be blocked, reflecting the nature
156 * of the divide and conquer algorithm as a task-parallel algorithm.
157 * This formula in words is: no processor may have more than one
158 * chunk of the matrix.
159 *
160 * Blocksize cannot be too small:
161 * If the matrix spans more than one processor, the following
162 * restriction on NB, the size of each block on each processor,
163 * must hold:
164 * NB >= 2
165 * The bulk of parallel computation is done on the matrix of size
166 * O(NB) on each processor. If this is too small, divide and conquer
167 * is a poor choice of algorithm.
168 *
169 * Submatrix reference:
170 * JA = IB
171 * Alignment restriction that prevents unnecessary communication.
172 *
173 *
174 * =====================================================================
175 *
176 *
177 * Notes
178 * =====
179 *
180 * If the factorization routine and the solve routine are to be called
181 * separately (to solve various sets of righthand sides using the same
182 * coefficient matrix), the auxiliary space AF *must not be altered*
183 * between calls to the factorization routine and the solve routine.
184 *
185 * The best algorithm for solving banded and tridiagonal linear systems
186 * depends on a variety of parameters, especially the bandwidth.
187 * Currently, only algorithms designed for the case N/P >> bw are
188 * implemented. These go by many names, including Divide and Conquer,
189 * Partitioning, domain decomposition-type, etc.
190 * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C
191 * algorithms are the appropriate choice.
192 *
193 * Algorithm description: Divide and Conquer
194 *
195 * The Divide and Conqer algorithm assumes the matrix is narrowly
196 * banded compared with the number of equations. In this situation,
197 * it is best to distribute the input matrix A one-dimensionally,
198 * with columns atomic and rows divided amongst the processes.
199 * The basic algorithm divides the tridiagonal matrix up into
200 * P pieces with one stored on each processor,
201 * and then proceeds in 2 phases for the factorization or 3 for the
202 * solution of a linear system.
203 * 1) Local Phase:
204 * The individual pieces are factored independently and in
205 * parallel. These factors are applied to the matrix creating
206 * fillin, which is stored in a non-inspectable way in auxiliary
207 * space AF. Mathematically, this is equivalent to reordering
208 * the matrix A as P A P^T and then factoring the principal
209 * leading submatrix of size equal to the sum of the sizes of
210 * the matrices factored on each processor. The factors of
211 * these submatrices overwrite the corresponding parts of A
212 * in memory.
213 * 2) Reduced System Phase:
214 * A small ((P-1)) system is formed representing
215 * interaction of the larger blocks, and is stored (as are its
216 * factors) in the space AF. A parallel Block Cyclic Reduction
217 * algorithm is used. For a linear system, a parallel front solve
218 * followed by an analagous backsolve, both using the structure
219 * of the factored matrix, are performed.
220 * 3) Backsubsitution Phase:
221 * For a linear system, a local backsubstitution is performed on
222 * each processor in parallel.
223 *
224 *
225 * Descriptors
226 * ===========
227 *
228 * Descriptors now have *types* and differ from ScaLAPACK 1.0.
229 *
230 * Note: tridiagonal codes can use either the old two dimensional
231 * or new one-dimensional descriptors, though the processor grid in
232 * both cases *must be one-dimensional*. We describe both types below.
233 *
234 * Each global data object is described by an associated description
235 * vector. This vector stores the information required to establish
236 * the mapping between an object element and its corresponding process
237 * and memory location.
238 *
239 * Let A be a generic term for any 2D block cyclicly distributed array.
240 * Such a global array has an associated description vector DESCA.
241 * In the following comments, the character _ should be read as
242 * "of the global array".
243 *
244 * NOTATION STORED IN EXPLANATION
245 * --------------- -------------- --------------------------------------
246 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
247 * DTYPE_A = 1.
248 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
249 * the BLACS process grid A is distribu-
250 * ted over. The context itself is glo-
251 * bal, but the handle (the integer
252 * value) may vary.
253 * M_A (global) DESCA( M_ ) The number of rows in the global
254 * array A.
255 * N_A (global) DESCA( N_ ) The number of columns in the global
256 * array A.
257 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
258 * the rows of the array.
259 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
260 * the columns of the array.
261 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
262 * row of the array A is distributed.
263 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
264 * first column of the array A is
265 * distributed.
266 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
267 * array. LLD_A >= MAX(1,LOCr(M_A)).
268 *
269 * Let K be the number of rows or columns of a distributed matrix,
270 * and assume that its process grid has dimension p x q.
271 * LOCr( K ) denotes the number of elements of K that a process
272 * would receive if K were distributed over the p processes of its
273 * process column.
274 * Similarly, LOCc( K ) denotes the number of elements of K that a
275 * process would receive if K were distributed over the q processes of
276 * its process row.
277 * The values of LOCr() and LOCc() may be determined via a call to the
278 * ScaLAPACK tool function, NUMROC:
279 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
280 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
281 * An upper bound for these quantities may be computed by:
282 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
283 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
284 *
285 *
286 * One-dimensional descriptors:
287 *
288 * One-dimensional descriptors are a new addition to ScaLAPACK since
289 * version 1.0. They simplify and shorten the descriptor for 1D
290 * arrays.
291 *
292 * Since ScaLAPACK supports two-dimensional arrays as the fundamental
293 * object, we allow 1D arrays to be distributed either over the
294 * first dimension of the array (as if the grid were P-by-1) or the
295 * 2nd dimension (as if the grid were 1-by-P). This choice is
296 * indicated by the descriptor type (501 or 502)
297 * as described below.
298 * However, for tridiagonal matrices, since the objects being
299 * distributed are the individual vectors storing the diagonals, we
300 * have adopted the convention that both the P-by-1 descriptor and
301 * the 1-by-P descriptor are allowed and are equivalent for
302 * tridiagonal matrices. Thus, for tridiagonal matrices,
303 * DTYPE_A = 501 or 502 can be used interchangeably
304 * without any other change.
305 * We require that the distributed vectors storing the diagonals of a
306 * tridiagonal matrix be aligned with each other. Because of this, a
307 * single descriptor, DESCA, serves to describe the distribution of
308 * of all diagonals simultaneously.
309 *
310 * IMPORTANT NOTE: the actual BLACS grid represented by the
311 * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P
312 * irrespective of which one-dimensional descriptor type
313 * (501 or 502) is input.
314 * This routine will interpret the grid properly either way.
315 * ScaLAPACK routines *do not support intercontext operations* so that
316 * the grid passed to a single ScaLAPACK routine *must be the same*
317 * for all array descriptors passed to that routine.
318 *
319 * NOTE: In all cases where 1D descriptors are used, 2D descriptors
320 * may also be used, since a one-dimensional array is a special case
321 * of a two-dimensional array with one dimension of size unity.
322 * The two-dimensional array used in this case *must* be of the
323 * proper orientation:
324 * If the appropriate one-dimensional descriptor is DTYPEA=501
325 * (1 by P type), then the two dimensional descriptor must
326 * have a CTXT value that refers to a 1 by P BLACS grid;
327 * If the appropriate one-dimensional descriptor is DTYPEA=502
328 * (P by 1 type), then the two dimensional descriptor must
329 * have a CTXT value that refers to a P by 1 BLACS grid.
330 *
331 *
332 * Summary of allowed descriptors, types, and BLACS grids:
333 * DTYPE 501 502 1 1
334 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1
335 * -----------------------------------------------------
336 * A OK OK OK NO
337 * B NO OK NO OK
338 *
339 * Note that a consequence of this chart is that it is not possible
340 * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead
341 * to opposite requirements for the orientation of the BLACS grid,
342 * and as noted before, the *same* BLACS context must be used in
343 * all descriptors in a single ScaLAPACK subroutine call.
344 *
345 * Let A be a generic term for any 1D block cyclicly distributed array.
346 * Such a global array has an associated description vector DESCA.
347 * In the following comments, the character _ should be read as
348 * "of the global array".
349 *
350 * NOTATION STORED IN EXPLANATION
351 * --------------- ---------- ------------------------------------------
352 * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids,
353 * TYPE_A = 501: 1-by-P grid.
354 * TYPE_A = 502: P-by-1 grid.
355 * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating
356 * the BLACS process grid A is distribu-
357 * ted over. The context itself is glo-
358 * bal, but the handle (the integer
359 * value) may vary.
360 * N_A (global) DESCA( 3 ) The size of the array dimension being
361 * distributed.
362 * NB_A (global) DESCA( 4 ) The blocking factor used to distribute
363 * the distributed dimension of the array.
364 * SRC_A (global) DESCA( 5 ) The process row or column over which the
365 * first row or column of the array
366 * is distributed.
367 * Ignored DESCA( 6 ) Ignored for tridiagonal matrices.
368 * Reserved DESCA( 7 ) Reserved for future use.
369 *
370 *
371 *
372 * =====================================================================
373 *
374 * Code Developer: Andrew J. Cleary, University of Tennessee.
375 * Current address: Lawrence Livermore National Labs.
376 *
377 * =====================================================================
378 *
379 * .. Parameters ..
380  INTEGER INT_ONE
381  parameter( int_one = 1 )
382  INTEGER DESCMULT, BIGNUM
383  parameter( descmult = 100, bignum = descmult*descmult )
384  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
385  $ lld_, mb_, m_, nb_, n_, rsrc_
386  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
387  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
388  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
389 * ..
390 * .. Local Scalars ..
391  INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
392  $ idum2, idum3, ja_new, llda, lldb, mycol, myrow,
393  $ my_num_cols, nb, np, npcol, nprow, np_save,
394  $ odd_size, part_offset, part_size, return_code,
395  $ store_m_b, store_n_a, temp, work_size_min
396 * ..
397 * .. Local Arrays ..
398  INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
399  $ param_check( 15, 3 )
400 * ..
401 * .. External Subroutines ..
402  EXTERNAL blacs_gridexit, blacs_gridinfo, desc_convert,
404 * ..
405 * .. External Functions ..
406  LOGICAL LSAME
407  INTEGER NUMROC
408  EXTERNAL lsame, numroc
409 * ..
410 * .. Intrinsic Functions ..
411  INTRINSIC ichar, mod
412 * ..
413 * .. Executable Statements ..
414 *
415 * Test the input parameters
416 *
417  info = 0
418 *
419 * Convert descriptor into standard form for easy access to
420 * parameters, check that grid is of right shape.
421 *
422  desca_1xp( 1 ) = 501
423  descb_px1( 1 ) = 502
424 *
425  temp = desca( dtype_ )
426  IF( temp.EQ.502 ) THEN
427 * Temporarily set the descriptor type to 1xP type
428  desca( dtype_ ) = 501
429  END IF
430 *
431  CALL desc_convert( desca, desca_1xp, return_code )
432 *
433  desca( dtype_ ) = temp
434 *
435  IF( return_code.NE.0 ) THEN
436  info = -( 8*100+2 )
437  END IF
438 *
439  CALL desc_convert( descb, descb_px1, return_code )
440 *
441  IF( return_code.NE.0 ) THEN
442  info = -( 11*100+2 )
443  END IF
444 *
445 * Consistency checks for DESCA and DESCB.
446 *
447 * Context must be the same
448  IF( desca_1xp( 2 ).NE.descb_px1( 2 ) ) THEN
449  info = -( 11*100+2 )
450  END IF
451 *
452 * These are alignment restrictions that may or may not be removed
453 * in future releases. -Andy Cleary, April 14, 1996.
454 *
455 * Block sizes must be the same
456  IF( desca_1xp( 4 ).NE.descb_px1( 4 ) ) THEN
457  info = -( 11*100+4 )
458  END IF
459 *
460 * Source processor must be the same
461 *
462  IF( desca_1xp( 5 ).NE.descb_px1( 5 ) ) THEN
463  info = -( 11*100+5 )
464  END IF
465 *
466 * Get values out of descriptor for use in code.
467 *
468  ictxt = desca_1xp( 2 )
469  csrc = desca_1xp( 5 )
470  nb = desca_1xp( 4 )
471  llda = desca_1xp( 6 )
472  store_n_a = desca_1xp( 3 )
473  lldb = descb_px1( 6 )
474  store_m_b = descb_px1( 3 )
475 *
476 * Get grid parameters
477 *
478 *
479  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
480  np = nprow*npcol
481 *
482 *
483 *
484  IF( lsame( trans, 'N' ) ) THEN
485  idum2 = ichar( 'N' )
486  ELSE IF( lsame( trans, 'T' ) ) THEN
487  idum2 = ichar( 'T' )
488  ELSE IF( lsame( trans, 'C' ) ) THEN
489  idum2 = ichar( 'T' )
490  ELSE
491  info = -1
492  END IF
493 *
494  IF( lwork.LT.-1 ) THEN
495  info = -15
496  ELSE IF( lwork.EQ.-1 ) THEN
497  idum3 = -1
498  ELSE
499  idum3 = 1
500  END IF
501 *
502  IF( n.LT.0 ) THEN
503  info = -2
504  END IF
505 *
506  IF( n+ja-1.GT.store_n_a ) THEN
507  info = -( 8*100+6 )
508  END IF
509 *
510  IF( n+ib-1.GT.store_m_b ) THEN
511  info = -( 11*100+3 )
512  END IF
513 *
514  IF( lldb.LT.nb ) THEN
515  info = -( 11*100+6 )
516  END IF
517 *
518  IF( nrhs.LT.0 ) THEN
519  info = -3
520  END IF
521 *
522 * Current alignment restriction
523 *
524  IF( ja.NE.ib ) THEN
525  info = -7
526  END IF
527 *
528 * Argument checking that is specific to Divide & Conquer routine
529 *
530  IF( nprow.NE.1 ) THEN
531  info = -( 8*100+2 )
532  END IF
533 *
534  IF( n.GT.np*nb-mod( ja-1, nb ) ) THEN
535  info = -( 2 )
536  CALL pxerbla( ictxt, 'PDDTTRS, D&C alg.: only 1 block per proc'
537  $ , -info )
538  RETURN
539  END IF
540 *
541  IF( ( ja+n-1.GT.nb ) .AND. ( nb.LT.2*int_one ) ) THEN
542  info = -( 8*100+4 )
543  CALL pxerbla( ictxt, 'PDDTTRS, D&C alg.: NB too small', -info )
544  RETURN
545  END IF
546 *
547 *
548  work_size_min = 10*npcol + 4*nrhs
549 *
550  work( 1 ) = work_size_min
551 *
552  IF( lwork.LT.work_size_min ) THEN
553  IF( lwork.NE.-1 ) THEN
554  info = -15
555  CALL pxerbla( ictxt, 'PDDTTRS: worksize error', -info )
556  END IF
557  RETURN
558  END IF
559 *
560 * Pack params and positions into arrays for global consistency check
561 *
562  param_check( 15, 1 ) = descb( 5 )
563  param_check( 14, 1 ) = descb( 4 )
564  param_check( 13, 1 ) = descb( 3 )
565  param_check( 12, 1 ) = descb( 2 )
566  param_check( 11, 1 ) = descb( 1 )
567  param_check( 10, 1 ) = ib
568  param_check( 9, 1 ) = desca( 5 )
569  param_check( 8, 1 ) = desca( 4 )
570  param_check( 7, 1 ) = desca( 3 )
571  param_check( 6, 1 ) = desca( 1 )
572  param_check( 5, 1 ) = ja
573  param_check( 4, 1 ) = nrhs
574  param_check( 3, 1 ) = n
575  param_check( 2, 1 ) = idum3
576  param_check( 1, 1 ) = idum2
577 *
578  param_check( 15, 2 ) = 1105
579  param_check( 14, 2 ) = 1104
580  param_check( 13, 2 ) = 1103
581  param_check( 12, 2 ) = 1102
582  param_check( 11, 2 ) = 1101
583  param_check( 10, 2 ) = 10
584  param_check( 9, 2 ) = 805
585  param_check( 8, 2 ) = 804
586  param_check( 7, 2 ) = 803
587  param_check( 6, 2 ) = 801
588  param_check( 5, 2 ) = 7
589  param_check( 4, 2 ) = 3
590  param_check( 3, 2 ) = 2
591  param_check( 2, 2 ) = 15
592  param_check( 1, 2 ) = 1
593 *
594 * Want to find errors with MIN( ), so if no error, set it to a big
595 * number. If there already is an error, multiply by the the
596 * descriptor multiplier.
597 *
598  IF( info.GE.0 ) THEN
599  info = bignum
600  ELSE IF( info.LT.-descmult ) THEN
601  info = -info
602  ELSE
603  info = -info*descmult
604  END IF
605 *
606 * Check consistency across processors
607 *
608  CALL globchk( ictxt, 15, param_check, 15, param_check( 1, 3 ),
609  $ info )
610 *
611 * Prepare output: set info = 0 if no error, and divide by DESCMULT
612 * if error is not in a descriptor entry.
613 *
614  IF( info.EQ.bignum ) THEN
615  info = 0
616  ELSE IF( mod( info, descmult ).EQ.0 ) THEN
617  info = -info / descmult
618  ELSE
619  info = -info
620  END IF
621 *
622  IF( info.LT.0 ) THEN
623  CALL pxerbla( ictxt, 'PDDTTRS', -info )
624  RETURN
625  END IF
626 *
627 * Quick return if possible
628 *
629  IF( n.EQ.0 )
630  $ RETURN
631 *
632  IF( nrhs.EQ.0 )
633  $ RETURN
634 *
635 *
636 * Adjust addressing into matrix space to properly get into
637 * the beginning part of the relevant data
638 *
639  part_offset = nb*( ( ja-1 ) / ( npcol*nb ) )
640 *
641  IF( ( mycol-csrc ).LT.( ja-part_offset-1 ) / nb ) THEN
642  part_offset = part_offset + nb
643  END IF
644 *
645  IF( mycol.LT.csrc ) THEN
646  part_offset = part_offset - nb
647  END IF
648 *
649 * Form a new BLACS grid (the "standard form" grid) with only procs
650 * holding part of the matrix, of size 1xNP where NP is adjusted,
651 * starting at csrc=0, with JA modified to reflect dropped procs.
652 *
653 * First processor to hold part of the matrix:
654 *
655  first_proc = mod( ( ja-1 ) / nb+csrc, npcol )
656 *
657 * Calculate new JA one while dropping off unused processors.
658 *
659  ja_new = mod( ja-1, nb ) + 1
660 *
661 * Save and compute new value of NP
662 *
663  np_save = np
664  np = ( ja_new+n-2 ) / nb + 1
665 *
666 * Call utility routine that forms "standard-form" grid
667 *
668  CALL reshape( ictxt, int_one, ictxt_new, int_one, first_proc,
669  $ int_one, np )
670 *
671 * Use new context from standard grid as context.
672 *
673  ictxt_save = ictxt
674  ictxt = ictxt_new
675  desca_1xp( 2 ) = ictxt_new
676  descb_px1( 2 ) = ictxt_new
677 *
678 * Get information about new grid.
679 *
680  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
681 *
682 * Drop out processors that do not have part of the matrix.
683 *
684  IF( myrow.LT.0 ) THEN
685  GO TO 20
686  END IF
687 *
688 * ********************************
689 * Values reused throughout routine
690 *
691 * User-input value of partition size
692 *
693  part_size = nb
694 *
695 * Number of columns in each processor
696 *
697  my_num_cols = numroc( n, part_size, mycol, 0, npcol )
698 *
699 * Offset in columns to beginning of main partition in each proc
700 *
701  IF( mycol.EQ.0 ) THEN
702  part_offset = part_offset + mod( ja_new-1, part_size )
703  my_num_cols = my_num_cols - mod( ja_new-1, part_size )
704  END IF
705 *
706 * Size of main (or odd) partition in each processor
707 *
708  odd_size = my_num_cols
709  IF( mycol.LT.np-1 ) THEN
710  odd_size = odd_size - int_one
711  END IF
712 *
713 *
714 *
715 * Begin main code
716 *
717  info = 0
718 *
719 * Call frontsolve routine
720 *
721  IF( lsame( trans, 'N' ) ) THEN
722 *
723  CALL pddttrsv( 'L', 'N', n, nrhs, dl( part_offset+1 ),
724  $ d( part_offset+1 ), du( part_offset+1 ), ja_new,
725  $ desca_1xp, b, ib, descb_px1, af, laf, work,
726  $ lwork, info )
727 *
728  ELSE
729 *
730  CALL pddttrsv( 'U', 'T', n, nrhs, dl( part_offset+1 ),
731  $ d( part_offset+1 ), du( part_offset+1 ), ja_new,
732  $ desca_1xp, b, ib, descb_px1, af, laf, work,
733  $ lwork, info )
734 *
735  END IF
736 *
737 * Call backsolve routine
738 *
739  IF( ( lsame( trans, 'C' ) ) .OR. ( lsame( trans, 'T' ) ) ) THEN
740 *
741  CALL pddttrsv( 'L', 'T', n, nrhs, dl( part_offset+1 ),
742  $ d( part_offset+1 ), du( part_offset+1 ), ja_new,
743  $ desca_1xp, b, ib, descb_px1, af, laf, work,
744  $ lwork, info )
745 *
746  ELSE
747 *
748  CALL pddttrsv( 'U', 'N', n, nrhs, dl( part_offset+1 ),
749  $ d( part_offset+1 ), du( part_offset+1 ), ja_new,
750  $ desca_1xp, b, ib, descb_px1, af, laf, work,
751  $ lwork, info )
752 *
753  END IF
754  10 CONTINUE
755 *
756 *
757 * Free BLACS space used to hold standard-form grid.
758 *
759  IF( ictxt_save.NE.ictxt_new ) THEN
760  CALL blacs_gridexit( ictxt_new )
761  END IF
762 *
763  20 CONTINUE
764 *
765 * Restore saved input parameters
766 *
767  ictxt = ictxt_save
768  np = np_save
769 *
770 * Output minimum worksize
771 *
772  work( 1 ) = work_size_min
773 *
774 *
775  RETURN
776 *
777 * End of PDDTTRS
778 *
779  END
globchk
subroutine globchk(ICTXT, N, X, LDX, IWORK, INFO)
Definition: pchkxmat.f:403
pddttrs
subroutine pddttrs(TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, AF, LAF, WORK, LWORK, INFO)
Definition: pddttrs.f:3
reshape
void reshape(int *context_in, int *major_in, int *context_out, int *major_out, int *first_proc, int *nprow_new, int *npcol_new)
Definition: reshape.c:77
pddttrsv
subroutine pddttrsv(UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, AF, LAF, WORK, LWORK, INFO)
Definition: pddttrsv.f:3
desc_convert
subroutine desc_convert(DESC_IN, DESC_OUT, INFO)
Definition: desc_convert.f:2
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2