ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pspbtrs.f
Go to the documentation of this file.
1  SUBROUTINE pspbtrs( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB,
2  $ AF, LAF, WORK, LWORK, INFO )
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * April 3, 2000
8 *
9 * .. Scalar Arguments ..
10  CHARACTER UPLO
11  INTEGER BW, IB, INFO, JA, LAF, LWORK, N, NRHS
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCA( * ), DESCB( * )
15  REAL A( * ), AF( * ), B( * ), WORK( * )
16 * ..
17 *
18 *
19 * Purpose
20 * =======
21 *
22 * PSPBTRS solves a system of linear equations
23 *
24 * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS)
25 *
26 * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors
27 * stored in A(1:N,JA:JA+N-1) and AF by PSPBTRF.
28 * A(1:N, JA:JA+N-1) is an N-by-N real
29 * banded symmetric positive definite distributed
30 * matrix with bandwidth BW.
31 * Depending on the value of UPLO, A stores either U or L in the equn
32 * A(1:N, JA:JA+N-1) = U'*U or L*L' as computed by PSPBTRF.
33 *
34 * Routine PSPBTRF MUST be called first.
35 *
36 * =====================================================================
37 *
38 * Arguments
39 * =========
40 *
41 * UPLO (global input) CHARACTER
42 * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored;
43 * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored.
44 *
45 * N (global input) INTEGER
46 * The number of rows and columns to be operated on, i.e. the
47 * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0.
48 *
49 * BW (global input) INTEGER
50 * Number of subdiagonals in L or U. 0 <= BW <= N-1
51 *
52 * NRHS (global input) INTEGER
53 * The number of right hand sides, i.e., the number of columns
54 * of the distributed submatrix B(IB:IB+N-1, 1:NRHS).
55 * NRHS >= 0.
56 *
57 * A (local input/local output) REAL pointer into
58 * local memory to an array with first dimension
59 * LLD_A >=(bw+1) (stored in DESCA).
60 * On entry, this array contains the local pieces of the
61 * N-by-N symmetric banded distributed Cholesky factor L or
62 * L^T A(1:N, JA:JA+N-1).
63 * This local portion is stored in the packed banded format
64 * used in LAPACK. Please see the Notes below and the
65 * ScaLAPACK manual for more detail on the format of
66 * distributed matrices.
67 *
68 * JA (global input) INTEGER
69 * The index in the global array A that points to the start of
70 * the matrix to be operated on (which may be either all of A
71 * or a submatrix of A).
72 *
73 * DESCA (global and local input) INTEGER array of dimension DLEN.
74 * if 1D type (DTYPE_A=501), DLEN >= 7;
75 * if 2D type (DTYPE_A=1), DLEN >= 9 .
76 * The array descriptor for the distributed matrix A.
77 * Contains information of mapping of A to memory. Please
78 * see NOTES below for full description and options.
79 *
80 * B (local input/local output) REAL pointer into
81 * local memory to an array of local lead dimension lld_b>=NB.
82 * On entry, this array contains the
83 * the local pieces of the right hand sides
84 * B(IB:IB+N-1, 1:NRHS).
85 * On exit, this contains the local piece of the solutions
86 * distributed matrix X.
87 *
88 * IB (global input) INTEGER
89 * The row index in the global array B that points to the first
90 * row of the matrix to be operated on (which may be either
91 * all of B or a submatrix of B).
92 *
93 * DESCB (global and local input) INTEGER array of dimension DLEN.
94 * if 1D type (DTYPE_B=502), DLEN >=7;
95 * if 2D type (DTYPE_B=1), DLEN >= 9.
96 * The array descriptor for the distributed matrix B.
97 * Contains information of mapping of B to memory. Please
98 * see NOTES below for full description and options.
99 *
100 * AF (local output) REAL array, dimension LAF.
101 * Auxiliary Fillin Space.
102 * Fillin is created during the factorization routine
103 * PSPBTRF and this is stored in AF. If a linear system
104 * is to be solved using PSPBTRS after the factorization
105 * routine, AF *must not be altered* after the factorization.
106 *
107 * LAF (local input) INTEGER
108 * Size of user-input Auxiliary Fillin space AF. Must be >=
109 * (NB+2*bw)*bw
110 * If LAF is not large enough, an error code will be returned
111 * and the minimum acceptable size will be returned in AF( 1 )
112 *
113 * WORK (local workspace/local output)
114 * REAL temporary workspace. This space may
115 * be overwritten in between calls to routines. WORK must be
116 * the size given in LWORK.
117 * On exit, WORK( 1 ) contains the minimal LWORK.
118 *
119 * LWORK (local input or global input) INTEGER
120 * Size of user-input workspace WORK.
121 * If LWORK is too small, the minimal acceptable size will be
122 * returned in WORK(1) and an error code is returned. LWORK>=
123 * (bw*NRHS)
124 *
125 * INFO (global output) INTEGER
126 * = 0: successful exit
127 * < 0: If the i-th argument is an array and the j-entry had
128 * an illegal value, then INFO = -(i*100+j), if the i-th
129 * argument is a scalar and had an illegal value, then
130 * INFO = -i.
131 *
132 * =====================================================================
133 *
134 *
135 * Restrictions
136 * ============
137 *
138 * The following are restrictions on the input parameters. Some of these
139 * are temporary and will be removed in future releases, while others
140 * may reflect fundamental technical limitations.
141 *
142 * Non-cyclic restriction: VERY IMPORTANT!
143 * P*NB>= mod(JA-1,NB)+N.
144 * The mapping for matrices must be blocked, reflecting the nature
145 * of the divide and conquer algorithm as a task-parallel algorithm.
146 * This formula in words is: no processor may have more than one
147 * chunk of the matrix.
148 *
149 * Blocksize cannot be too small:
150 * If the matrix spans more than one processor, the following
151 * restriction on NB, the size of each block on each processor,
152 * must hold:
153 * NB >= 2*BW
154 * The bulk of parallel computation is done on the matrix of size
155 * O(NB) on each processor. If this is too small, divide and conquer
156 * is a poor choice of algorithm.
157 *
158 * Submatrix reference:
159 * JA = IB
160 * Alignment restriction that prevents unnecessary communication.
161 *
162 *
163 * =====================================================================
164 *
165 *
166 * Notes
167 * =====
168 *
169 * If the factorization routine and the solve routine are to be called
170 * separately (to solve various sets of righthand sides using the same
171 * coefficient matrix), the auxiliary space AF *must not be altered*
172 * between calls to the factorization routine and the solve routine.
173 *
174 * The best algorithm for solving banded and tridiagonal linear systems
175 * depends on a variety of parameters, especially the bandwidth.
176 * Currently, only algorithms designed for the case N/P >> bw are
177 * implemented. These go by many names, including Divide and Conquer,
178 * Partitioning, domain decomposition-type, etc.
179 *
180 * Algorithm description: Divide and Conquer
181 *
182 * The Divide and Conqer algorithm assumes the matrix is narrowly
183 * banded compared with the number of equations. In this situation,
184 * it is best to distribute the input matrix A one-dimensionally,
185 * with columns atomic and rows divided amongst the processes.
186 * The basic algorithm divides the banded matrix up into
187 * P pieces with one stored on each processor,
188 * and then proceeds in 2 phases for the factorization or 3 for the
189 * solution of a linear system.
190 * 1) Local Phase:
191 * The individual pieces are factored independently and in
192 * parallel. These factors are applied to the matrix creating
193 * fillin, which is stored in a non-inspectable way in auxiliary
194 * space AF. Mathematically, this is equivalent to reordering
195 * the matrix A as P A P^T and then factoring the principal
196 * leading submatrix of size equal to the sum of the sizes of
197 * the matrices factored on each processor. The factors of
198 * these submatrices overwrite the corresponding parts of A
199 * in memory.
200 * 2) Reduced System Phase:
201 * A small (BW* (P-1)) system is formed representing
202 * interaction of the larger blocks, and is stored (as are its
203 * factors) in the space AF. A parallel Block Cyclic Reduction
204 * algorithm is used. For a linear system, a parallel front solve
205 * followed by an analagous backsolve, both using the structure
206 * of the factored matrix, are performed.
207 * 3) Backsubsitution Phase:
208 * For a linear system, a local backsubstitution is performed on
209 * each processor in parallel.
210 *
211 *
212 * Descriptors
213 * ===========
214 *
215 * Descriptors now have *types* and differ from ScaLAPACK 1.0.
216 *
217 * Note: banded codes can use either the old two dimensional
218 * or new one-dimensional descriptors, though the processor grid in
219 * both cases *must be one-dimensional*. We describe both types below.
220 *
221 * Each global data object is described by an associated description
222 * vector. This vector stores the information required to establish
223 * the mapping between an object element and its corresponding process
224 * and memory location.
225 *
226 * Let A be a generic term for any 2D block cyclicly distributed array.
227 * Such a global array has an associated description vector DESCA.
228 * In the following comments, the character _ should be read as
229 * "of the global array".
230 *
231 * NOTATION STORED IN EXPLANATION
232 * --------------- -------------- --------------------------------------
233 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
234 * DTYPE_A = 1.
235 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
236 * the BLACS process grid A is distribu-
237 * ted over. The context itself is glo-
238 * bal, but the handle (the integer
239 * value) may vary.
240 * M_A (global) DESCA( M_ ) The number of rows in the global
241 * array A.
242 * N_A (global) DESCA( N_ ) The number of columns in the global
243 * array A.
244 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
245 * the rows of the array.
246 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
247 * the columns of the array.
248 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
249 * row of the array A is distributed.
250 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
251 * first column of the array A is
252 * distributed.
253 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
254 * array. LLD_A >= MAX(1,LOCr(M_A)).
255 *
256 * Let K be the number of rows or columns of a distributed matrix,
257 * and assume that its process grid has dimension p x q.
258 * LOCr( K ) denotes the number of elements of K that a process
259 * would receive if K were distributed over the p processes of its
260 * process column.
261 * Similarly, LOCc( K ) denotes the number of elements of K that a
262 * process would receive if K were distributed over the q processes of
263 * its process row.
264 * The values of LOCr() and LOCc() may be determined via a call to the
265 * ScaLAPACK tool function, NUMROC:
266 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
267 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
268 * An upper bound for these quantities may be computed by:
269 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
270 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
271 *
272 *
273 * One-dimensional descriptors:
274 *
275 * One-dimensional descriptors are a new addition to ScaLAPACK since
276 * version 1.0. They simplify and shorten the descriptor for 1D
277 * arrays.
278 *
279 * Since ScaLAPACK supports two-dimensional arrays as the fundamental
280 * object, we allow 1D arrays to be distributed either over the
281 * first dimension of the array (as if the grid were P-by-1) or the
282 * 2nd dimension (as if the grid were 1-by-P). This choice is
283 * indicated by the descriptor type (501 or 502)
284 * as described below.
285 *
286 * IMPORTANT NOTE: the actual BLACS grid represented by the
287 * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P
288 * irrespective of which one-dimensional descriptor type
289 * (501 or 502) is input.
290 * This routine will interpret the grid properly either way.
291 * ScaLAPACK routines *do not support intercontext operations* so that
292 * the grid passed to a single ScaLAPACK routine *must be the same*
293 * for all array descriptors passed to that routine.
294 *
295 * NOTE: In all cases where 1D descriptors are used, 2D descriptors
296 * may also be used, since a one-dimensional array is a special case
297 * of a two-dimensional array with one dimension of size unity.
298 * The two-dimensional array used in this case *must* be of the
299 * proper orientation:
300 * If the appropriate one-dimensional descriptor is DTYPEA=501
301 * (1 by P type), then the two dimensional descriptor must
302 * have a CTXT value that refers to a 1 by P BLACS grid;
303 * If the appropriate one-dimensional descriptor is DTYPEA=502
304 * (P by 1 type), then the two dimensional descriptor must
305 * have a CTXT value that refers to a P by 1 BLACS grid.
306 *
307 *
308 * Summary of allowed descriptors, types, and BLACS grids:
309 * DTYPE 501 502 1 1
310 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1
311 * -----------------------------------------------------
312 * A OK NO OK NO
313 * B NO OK NO OK
314 *
315 * Note that a consequence of this chart is that it is not possible
316 * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead
317 * to opposite requirements for the orientation of the BLACS grid,
318 * and as noted before, the *same* BLACS context must be used in
319 * all descriptors in a single ScaLAPACK subroutine call.
320 *
321 * Let A be a generic term for any 1D block cyclicly distributed array.
322 * Such a global array has an associated description vector DESCA.
323 * In the following comments, the character _ should be read as
324 * "of the global array".
325 *
326 * NOTATION STORED IN EXPLANATION
327 * --------------- ---------- ------------------------------------------
328 * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids,
329 * TYPE_A = 501: 1-by-P grid.
330 * TYPE_A = 502: P-by-1 grid.
331 * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating
332 * the BLACS process grid A is distribu-
333 * ted over. The context itself is glo-
334 * bal, but the handle (the integer
335 * value) may vary.
336 * N_A (global) DESCA( 3 ) The size of the array dimension being
337 * distributed.
338 * NB_A (global) DESCA( 4 ) The blocking factor used to distribute
339 * the distributed dimension of the array.
340 * SRC_A (global) DESCA( 5 ) The process row or column over which the
341 * first row or column of the array
342 * is distributed.
343 * LLD_A (local) DESCA( 6 ) The leading dimension of the local array
344 * storing the local blocks of the distri-
345 * buted array A. Minimum value of LLD_A
346 * depends on TYPE_A.
347 * TYPE_A = 501: LLD_A >=
348 * size of undistributed dimension, 1.
349 * TYPE_A = 502: LLD_A >=NB_A, 1.
350 * Reserved DESCA( 7 ) Reserved for future use.
351 *
352 *
353 *
354 * =====================================================================
355 *
356 * Code Developer: Andrew J. Cleary, University of Tennessee.
357 * Current address: Lawrence Livermore National Labs.
358 *
359 * =====================================================================
360 *
361 * .. Parameters ..
362  INTEGER INT_ONE
363  parameter( int_one = 1 )
364  INTEGER DESCMULT, BIGNUM
365  parameter( descmult = 100, bignum = descmult*descmult )
366  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
367  $ lld_, mb_, m_, nb_, n_, rsrc_
368  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
369  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
370  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
371 * ..
372 * .. Local Scalars ..
373  INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
374  $ idum1, idum3, ja_new, llda, lldb, mycol, myrow,
375  $ nb, np, npcol, nprow, np_save, part_offset,
376  $ return_code, store_m_b, store_n_a,
377  $ work_size_min
378 * ..
379 * .. Local Arrays ..
380  INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
381  $ param_check( 16, 3 )
382 * ..
383 * .. External Subroutines ..
384  EXTERNAL blacs_gridexit, blacs_gridinfo, desc_convert,
386 * ..
387 * .. External Functions ..
388  LOGICAL LSAME
389  EXTERNAL lsame
390 * ..
391 * .. Intrinsic Functions ..
392  INTRINSIC ichar, mod
393 * ..
394 * .. Executable Statements ..
395 *
396 * Test the input parameters
397 *
398  info = 0
399 *
400 * Convert descriptor into standard form for easy access to
401 * parameters, check that grid is of right shape.
402 *
403  desca_1xp( 1 ) = 501
404  descb_px1( 1 ) = 502
405 *
406  CALL desc_convert( desca, desca_1xp, return_code )
407 *
408  IF( return_code.NE.0 ) THEN
409  info = -( 7*100+2 )
410  END IF
411 *
412  CALL desc_convert( descb, descb_px1, return_code )
413 *
414  IF( return_code.NE.0 ) THEN
415  info = -( 10*100+2 )
416  END IF
417 *
418 * Consistency checks for DESCA and DESCB.
419 *
420 * Context must be the same
421  IF( desca_1xp( 2 ).NE.descb_px1( 2 ) ) THEN
422  info = -( 10*100+2 )
423  END IF
424 *
425 * These are alignment restrictions that may or may not be removed
426 * in future releases. -Andy Cleary, April 14, 1996.
427 *
428 * Block sizes must be the same
429  IF( desca_1xp( 4 ).NE.descb_px1( 4 ) ) THEN
430  info = -( 10*100+4 )
431  END IF
432 *
433 * Source processor must be the same
434 *
435  IF( desca_1xp( 5 ).NE.descb_px1( 5 ) ) THEN
436  info = -( 10*100+5 )
437  END IF
438 *
439 * Get values out of descriptor for use in code.
440 *
441  ictxt = desca_1xp( 2 )
442  csrc = desca_1xp( 5 )
443  nb = desca_1xp( 4 )
444  llda = desca_1xp( 6 )
445  store_n_a = desca_1xp( 3 )
446  lldb = descb_px1( 6 )
447  store_m_b = descb_px1( 3 )
448 *
449 * Get grid parameters
450 *
451 *
452  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
453  np = nprow*npcol
454 *
455 *
456 *
457  IF( lsame( uplo, 'U' ) ) THEN
458  idum1 = ichar( 'U' )
459  ELSE IF( lsame( uplo, 'L' ) ) THEN
460  idum1 = ichar( 'L' )
461  ELSE
462  info = -1
463  END IF
464 *
465  IF( lwork.LT.-1 ) THEN
466  info = -14
467  ELSE IF( lwork.EQ.-1 ) THEN
468  idum3 = -1
469  ELSE
470  idum3 = 1
471  END IF
472 *
473  IF( n.LT.0 ) THEN
474  info = -2
475  END IF
476 *
477  IF( n+ja-1.GT.store_n_a ) THEN
478  info = -( 7*100+6 )
479  END IF
480 *
481  IF( ( bw.GT.n-1 ) .OR. ( bw.LT.0 ) ) THEN
482  info = -3
483  END IF
484 *
485  IF( llda.LT.( bw+1 ) ) THEN
486  info = -( 7*100+6 )
487  END IF
488 *
489  IF( nb.LE.0 ) THEN
490  info = -( 7*100+4 )
491  END IF
492 *
493  IF( n+ib-1.GT.store_m_b ) THEN
494  info = -( 10*100+3 )
495  END IF
496 *
497  IF( lldb.LT.nb ) THEN
498  info = -( 10*100+6 )
499  END IF
500 *
501  IF( nrhs.LT.0 ) THEN
502  info = -3
503  END IF
504 *
505 * Current alignment restriction
506 *
507  IF( ja.NE.ib ) THEN
508  info = -6
509  END IF
510 *
511 * Argument checking that is specific to Divide & Conquer routine
512 *
513  IF( nprow.NE.1 ) THEN
514  info = -( 7*100+2 )
515  END IF
516 *
517  IF( n.GT.np*nb-mod( ja-1, nb ) ) THEN
518  info = -( 2 )
519  CALL pxerbla( ictxt, 'PSPBTRS, D&C alg.: only 1 block per proc'
520  $ , -info )
521  RETURN
522  END IF
523 *
524  IF( ( ja+n-1.GT.nb ) .AND. ( nb.LT.2*bw ) ) THEN
525  info = -( 7*100+4 )
526  CALL pxerbla( ictxt, 'PSPBTRS, D&C alg.: NB too small', -info )
527  RETURN
528  END IF
529 *
530 *
531  work_size_min = ( bw*nrhs )
532 *
533  work( 1 ) = work_size_min
534 *
535  IF( lwork.LT.work_size_min ) THEN
536  IF( lwork.NE.-1 ) THEN
537  info = -14
538  CALL pxerbla( ictxt, 'PSPBTRS: worksize error', -info )
539  END IF
540  RETURN
541  END IF
542 *
543 * Pack params and positions into arrays for global consistency check
544 *
545  param_check( 16, 1 ) = descb( 5 )
546  param_check( 15, 1 ) = descb( 4 )
547  param_check( 14, 1 ) = descb( 3 )
548  param_check( 13, 1 ) = descb( 2 )
549  param_check( 12, 1 ) = descb( 1 )
550  param_check( 11, 1 ) = ib
551  param_check( 10, 1 ) = desca( 5 )
552  param_check( 9, 1 ) = desca( 4 )
553  param_check( 8, 1 ) = desca( 3 )
554  param_check( 7, 1 ) = desca( 1 )
555  param_check( 6, 1 ) = ja
556  param_check( 5, 1 ) = nrhs
557  param_check( 4, 1 ) = bw
558  param_check( 3, 1 ) = n
559  param_check( 2, 1 ) = idum3
560  param_check( 1, 1 ) = idum1
561 *
562  param_check( 16, 2 ) = 1005
563  param_check( 15, 2 ) = 1004
564  param_check( 14, 2 ) = 1003
565  param_check( 13, 2 ) = 1002
566  param_check( 12, 2 ) = 1001
567  param_check( 11, 2 ) = 9
568  param_check( 10, 2 ) = 705
569  param_check( 9, 2 ) = 704
570  param_check( 8, 2 ) = 703
571  param_check( 7, 2 ) = 701
572  param_check( 6, 2 ) = 6
573  param_check( 5, 2 ) = 4
574  param_check( 4, 2 ) = 3
575  param_check( 3, 2 ) = 2
576  param_check( 2, 2 ) = 14
577  param_check( 1, 2 ) = 1
578 *
579 * Want to find errors with MIN( ), so if no error, set it to a big
580 * number. If there already is an error, multiply by the the
581 * descriptor multiplier.
582 *
583  IF( info.GE.0 ) THEN
584  info = bignum
585  ELSE IF( info.LT.-descmult ) THEN
586  info = -info
587  ELSE
588  info = -info*descmult
589  END IF
590 *
591 * Check consistency across processors
592 *
593  CALL globchk( ictxt, 16, param_check, 16, param_check( 1, 3 ),
594  $ info )
595 *
596 * Prepare output: set info = 0 if no error, and divide by DESCMULT
597 * if error is not in a descriptor entry.
598 *
599  IF( info.EQ.bignum ) THEN
600  info = 0
601  ELSE IF( mod( info, descmult ).EQ.0 ) THEN
602  info = -info / descmult
603  ELSE
604  info = -info
605  END IF
606 *
607  IF( info.LT.0 ) THEN
608  CALL pxerbla( ictxt, 'PSPBTRS', -info )
609  RETURN
610  END IF
611 *
612 * Quick return if possible
613 *
614  IF( n.EQ.0 )
615  $ RETURN
616 *
617  IF( nrhs.EQ.0 )
618  $ RETURN
619 *
620 *
621 * Adjust addressing into matrix space to properly get into
622 * the beginning part of the relevant data
623 *
624  part_offset = nb*( ( ja-1 ) / ( npcol*nb ) )
625 *
626  IF( ( mycol-csrc ).LT.( ja-part_offset-1 ) / nb ) THEN
627  part_offset = part_offset + nb
628  END IF
629 *
630  IF( mycol.LT.csrc ) THEN
631  part_offset = part_offset - nb
632  END IF
633 *
634 * Form a new BLACS grid (the "standard form" grid) with only procs
635 * holding part of the matrix, of size 1xNP where NP is adjusted,
636 * starting at csrc=0, with JA modified to reflect dropped procs.
637 *
638 * First processor to hold part of the matrix:
639 *
640  first_proc = mod( ( ja-1 ) / nb+csrc, npcol )
641 *
642 * Calculate new JA one while dropping off unused processors.
643 *
644  ja_new = mod( ja-1, nb ) + 1
645 *
646 * Save and compute new value of NP
647 *
648  np_save = np
649  np = ( ja_new+n-2 ) / nb + 1
650 *
651 * Call utility routine that forms "standard-form" grid
652 *
653  CALL reshape( ictxt, int_one, ictxt_new, int_one, first_proc,
654  $ int_one, np )
655 *
656 * Use new context from standard grid as context.
657 *
658  ictxt_save = ictxt
659  ictxt = ictxt_new
660  desca_1xp( 2 ) = ictxt_new
661  descb_px1( 2 ) = ictxt_new
662 *
663 * Get information about new grid.
664 *
665  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
666 *
667 * Drop out processors that do not have part of the matrix.
668 *
669  IF( myrow.LT.0 ) THEN
670  GO TO 20
671  END IF
672 *
673 *
674 *
675 * Begin main code
676 *
677  info = 0
678 *
679 * Call frontsolve routine
680 *
681  IF( lsame( uplo, 'L' ) ) THEN
682 *
683  CALL pspbtrsv( 'L', 'N', n, bw, nrhs, a( part_offset+1 ),
684  $ ja_new, desca_1xp, b, ib, descb_px1, af, laf,
685  $ work, lwork, info )
686 *
687  ELSE
688 *
689  CALL pspbtrsv( 'U', 'T', n, bw, nrhs, a( part_offset+1 ),
690  $ ja_new, desca_1xp, b, ib, descb_px1, af, laf,
691  $ work, lwork, info )
692 *
693  END IF
694 *
695 * Call backsolve routine
696 *
697  IF( lsame( uplo, 'L' ) ) THEN
698 *
699  CALL pspbtrsv( 'L', 'T', n, bw, nrhs, a( part_offset+1 ),
700  $ ja_new, desca_1xp, b, ib, descb_px1, af, laf,
701  $ work, lwork, info )
702 *
703  ELSE
704 *
705  CALL pspbtrsv( 'U', 'N', n, bw, nrhs, a( part_offset+1 ),
706  $ ja_new, desca_1xp, b, ib, descb_px1, af, laf,
707  $ work, lwork, info )
708 *
709  END IF
710  10 CONTINUE
711 *
712 *
713 * Free BLACS space used to hold standard-form grid.
714 *
715  IF( ictxt_save.NE.ictxt_new ) THEN
716  CALL blacs_gridexit( ictxt_new )
717  END IF
718 *
719  20 CONTINUE
720 *
721 * Restore saved input parameters
722 *
723  ictxt = ictxt_save
724  np = np_save
725 *
726 * Output minimum worksize
727 *
728  work( 1 ) = work_size_min
729 *
730 *
731  RETURN
732 *
733 * End of PSPBTRS
734 *
735  END
globchk
subroutine globchk(ICTXT, N, X, LDX, IWORK, INFO)
Definition: pchkxmat.f:403
pspbtrs
subroutine pspbtrs(UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, AF, LAF, WORK, LWORK, INFO)
Definition: pspbtrs.f:3
reshape
void reshape(int *context_in, int *major_in, int *context_out, int *major_out, int *first_proc, int *nprow_new, int *npcol_new)
Definition: reshape.c:77
pspbtrsv
subroutine pspbtrsv(UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, AF, LAF, WORK, LWORK, INFO)
Definition: pspbtrsv.f:3
desc_convert
subroutine desc_convert(DESC_IN, DESC_OUT, INFO)
Definition: desc_convert.f:2
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2