ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pspttrs.f
Go to the documentation of this file.
1  SUBROUTINE pspttrs( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF,
2  $ LAF, WORK, LWORK, INFO )
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * April 3, 2000
8 *
9 * .. Scalar Arguments ..
10  INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCA( * ), DESCB( * )
14  REAL AF( * ), B( * ), D( * ), E( * ), WORK( * )
15 * ..
16 *
17 *
18 * Purpose
19 * =======
20 *
21 * PSPTTRS solves a system of linear equations
22 *
23 * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS)
24 *
25 * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors
26 * stored in A(1:N,JA:JA+N-1) and AF by PSPTTRF.
27 * A(1:N, JA:JA+N-1) is an N-by-N real
28 * tridiagonal symmetric positive definite distributed
29 * matrix.
30 *
31 * Routine PSPTTRF MUST be called first.
32 *
33 * =====================================================================
34 *
35 * Arguments
36 * =========
37 *
38 *
39 * N (global input) INTEGER
40 * The number of rows and columns to be operated on, i.e. the
41 * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0.
42 *
43 * NRHS (global input) INTEGER
44 * The number of right hand sides, i.e., the number of columns
45 * of the distributed submatrix B(IB:IB+N-1, 1:NRHS).
46 * NRHS >= 0.
47 *
48 * D (local input/local output) REAL pointer to local
49 * part of global vector storing the main diagonal of the
50 * matrix.
51 * On exit, this array contains information containing the
52 * factors of the matrix.
53 * Must be of size >= DESCA( NB_ ).
54 *
55 * E (local input/local output) REAL pointer to local
56 * part of global vector storing the upper diagonal of the
57 * matrix. Globally, DU(n) is not referenced, and DU must be
58 * aligned with D.
59 * On exit, this array contains information containing the
60 * factors of the matrix.
61 * Must be of size >= DESCA( NB_ ).
62 *
63 * JA (global input) INTEGER
64 * The index in the global array A that points to the start of
65 * the matrix to be operated on (which may be either all of A
66 * or a submatrix of A).
67 *
68 * DESCA (global and local input) INTEGER array of dimension DLEN.
69 * if 1D type (DTYPE_A=501 or 502), DLEN >= 7;
70 * if 2D type (DTYPE_A=1), DLEN >= 9.
71 * The array descriptor for the distributed matrix A.
72 * Contains information of mapping of A to memory. Please
73 * see NOTES below for full description and options.
74 *
75 * B (local input/local output) REAL pointer into
76 * local memory to an array of local lead dimension lld_b>=NB.
77 * On entry, this array contains the
78 * the local pieces of the right hand sides
79 * B(IB:IB+N-1, 1:NRHS).
80 * On exit, this contains the local piece of the solutions
81 * distributed matrix X.
82 *
83 * IB (global input) INTEGER
84 * The row index in the global array B that points to the first
85 * row of the matrix to be operated on (which may be either
86 * all of B or a submatrix of B).
87 * IMPORTANT NOTE: The current version of this code supports
88 * only IB=JA
89 *
90 * DESCB (global and local input) INTEGER array of dimension DLEN.
91 * if 1D type (DTYPE_B=502), DLEN >=7;
92 * if 2D type (DTYPE_B=1), DLEN >= 9.
93 * The array descriptor for the distributed matrix B.
94 * Contains information of mapping of B to memory. Please
95 * see NOTES below for full description and options.
96 *
97 * AF (local output) REAL array, dimension LAF.
98 * Auxiliary Fillin Space.
99 * Fillin is created during the factorization routine
100 * PSPTTRF and this is stored in AF. If a linear system
101 * is to be solved using PSPTTRS after the factorization
102 * routine, AF *must not be altered* after the factorization.
103 *
104 * LAF (local input) INTEGER
105 * Size of user-input Auxiliary Fillin space AF. Must be >=
106 * (NB+2)
107 * If LAF is not large enough, an error code will be returned
108 * and the minimum acceptable size will be returned in AF( 1 )
109 *
110 * WORK (local workspace/local output)
111 * REAL temporary workspace. This space may
112 * be overwritten in between calls to routines. WORK must be
113 * the size given in LWORK.
114 * On exit, WORK( 1 ) contains the minimal LWORK.
115 *
116 * LWORK (local input or global input) INTEGER
117 * Size of user-input workspace WORK.
118 * If LWORK is too small, the minimal acceptable size will be
119 * returned in WORK(1) and an error code is returned. LWORK>=
120 * (10+2*min(100,NRHS))*NPCOL+4*NRHS
121 *
122 * INFO (local output) INTEGER
123 * = 0: successful exit
124 * < 0: If the i-th argument is an array and the j-entry had
125 * an illegal value, then INFO = -(i*100+j), if the i-th
126 * argument is a scalar and had an illegal value, then
127 * INFO = -i.
128 *
129 * =====================================================================
130 *
131 *
132 * Restrictions
133 * ============
134 *
135 * The following are restrictions on the input parameters. Some of these
136 * are temporary and will be removed in future releases, while others
137 * may reflect fundamental technical limitations.
138 *
139 * Non-cyclic restriction: VERY IMPORTANT!
140 * P*NB>= mod(JA-1,NB)+N.
141 * The mapping for matrices must be blocked, reflecting the nature
142 * of the divide and conquer algorithm as a task-parallel algorithm.
143 * This formula in words is: no processor may have more than one
144 * chunk of the matrix.
145 *
146 * Blocksize cannot be too small:
147 * If the matrix spans more than one processor, the following
148 * restriction on NB, the size of each block on each processor,
149 * must hold:
150 * NB >= 2
151 * The bulk of parallel computation is done on the matrix of size
152 * O(NB) on each processor. If this is too small, divide and conquer
153 * is a poor choice of algorithm.
154 *
155 * Submatrix reference:
156 * JA = IB
157 * Alignment restriction that prevents unnecessary communication.
158 *
159 *
160 * =====================================================================
161 *
162 *
163 * Notes
164 * =====
165 *
166 * If the factorization routine and the solve routine are to be called
167 * separately (to solve various sets of righthand sides using the same
168 * coefficient matrix), the auxiliary space AF *must not be altered*
169 * between calls to the factorization routine and the solve routine.
170 *
171 * The best algorithm for solving banded and tridiagonal linear systems
172 * depends on a variety of parameters, especially the bandwidth.
173 * Currently, only algorithms designed for the case N/P >> bw are
174 * implemented. These go by many names, including Divide and Conquer,
175 * Partitioning, domain decomposition-type, etc.
176 * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C
177 * algorithms are the appropriate choice.
178 *
179 * Algorithm description: Divide and Conquer
180 *
181 * The Divide and Conqer algorithm assumes the matrix is narrowly
182 * banded compared with the number of equations. In this situation,
183 * it is best to distribute the input matrix A one-dimensionally,
184 * with columns atomic and rows divided amongst the processes.
185 * The basic algorithm divides the tridiagonal matrix up into
186 * P pieces with one stored on each processor,
187 * and then proceeds in 2 phases for the factorization or 3 for the
188 * solution of a linear system.
189 * 1) Local Phase:
190 * The individual pieces are factored independently and in
191 * parallel. These factors are applied to the matrix creating
192 * fillin, which is stored in a non-inspectable way in auxiliary
193 * space AF. Mathematically, this is equivalent to reordering
194 * the matrix A as P A P^T and then factoring the principal
195 * leading submatrix of size equal to the sum of the sizes of
196 * the matrices factored on each processor. The factors of
197 * these submatrices overwrite the corresponding parts of A
198 * in memory.
199 * 2) Reduced System Phase:
200 * A small ((P-1)) system is formed representing
201 * interaction of the larger blocks, and is stored (as are its
202 * factors) in the space AF. A parallel Block Cyclic Reduction
203 * algorithm is used. For a linear system, a parallel front solve
204 * followed by an analagous backsolve, both using the structure
205 * of the factored matrix, are performed.
206 * 3) Backsubsitution Phase:
207 * For a linear system, a local backsubstitution is performed on
208 * each processor in parallel.
209 *
210 *
211 * Descriptors
212 * ===========
213 *
214 * Descriptors now have *types* and differ from ScaLAPACK 1.0.
215 *
216 * Note: tridiagonal codes can use either the old two dimensional
217 * or new one-dimensional descriptors, though the processor grid in
218 * both cases *must be one-dimensional*. We describe both types below.
219 *
220 * Each global data object is described by an associated description
221 * vector. This vector stores the information required to establish
222 * the mapping between an object element and its corresponding process
223 * and memory location.
224 *
225 * Let A be a generic term for any 2D block cyclicly distributed array.
226 * Such a global array has an associated description vector DESCA.
227 * In the following comments, the character _ should be read as
228 * "of the global array".
229 *
230 * NOTATION STORED IN EXPLANATION
231 * --------------- -------------- --------------------------------------
232 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
233 * DTYPE_A = 1.
234 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
235 * the BLACS process grid A is distribu-
236 * ted over. The context itself is glo-
237 * bal, but the handle (the integer
238 * value) may vary.
239 * M_A (global) DESCA( M_ ) The number of rows in the global
240 * array A.
241 * N_A (global) DESCA( N_ ) The number of columns in the global
242 * array A.
243 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
244 * the rows of the array.
245 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
246 * the columns of the array.
247 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
248 * row of the array A is distributed.
249 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
250 * first column of the array A is
251 * distributed.
252 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
253 * array. LLD_A >= MAX(1,LOCr(M_A)).
254 *
255 * Let K be the number of rows or columns of a distributed matrix,
256 * and assume that its process grid has dimension p x q.
257 * LOCr( K ) denotes the number of elements of K that a process
258 * would receive if K were distributed over the p processes of its
259 * process column.
260 * Similarly, LOCc( K ) denotes the number of elements of K that a
261 * process would receive if K were distributed over the q processes of
262 * its process row.
263 * The values of LOCr() and LOCc() may be determined via a call to the
264 * ScaLAPACK tool function, NUMROC:
265 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
266 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
267 * An upper bound for these quantities may be computed by:
268 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
269 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
270 *
271 *
272 * One-dimensional descriptors:
273 *
274 * One-dimensional descriptors are a new addition to ScaLAPACK since
275 * version 1.0. They simplify and shorten the descriptor for 1D
276 * arrays.
277 *
278 * Since ScaLAPACK supports two-dimensional arrays as the fundamental
279 * object, we allow 1D arrays to be distributed either over the
280 * first dimension of the array (as if the grid were P-by-1) or the
281 * 2nd dimension (as if the grid were 1-by-P). This choice is
282 * indicated by the descriptor type (501 or 502)
283 * as described below.
284 * However, for tridiagonal matrices, since the objects being
285 * distributed are the individual vectors storing the diagonals, we
286 * have adopted the convention that both the P-by-1 descriptor and
287 * the 1-by-P descriptor are allowed and are equivalent for
288 * tridiagonal matrices. Thus, for tridiagonal matrices,
289 * DTYPE_A = 501 or 502 can be used interchangeably
290 * without any other change.
291 * We require that the distributed vectors storing the diagonals of a
292 * tridiagonal matrix be aligned with each other. Because of this, a
293 * single descriptor, DESCA, serves to describe the distribution of
294 * of all diagonals simultaneously.
295 *
296 * IMPORTANT NOTE: the actual BLACS grid represented by the
297 * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P
298 * irrespective of which one-dimensional descriptor type
299 * (501 or 502) is input.
300 * This routine will interpret the grid properly either way.
301 * ScaLAPACK routines *do not support intercontext operations* so that
302 * the grid passed to a single ScaLAPACK routine *must be the same*
303 * for all array descriptors passed to that routine.
304 *
305 * NOTE: In all cases where 1D descriptors are used, 2D descriptors
306 * may also be used, since a one-dimensional array is a special case
307 * of a two-dimensional array with one dimension of size unity.
308 * The two-dimensional array used in this case *must* be of the
309 * proper orientation:
310 * If the appropriate one-dimensional descriptor is DTYPEA=501
311 * (1 by P type), then the two dimensional descriptor must
312 * have a CTXT value that refers to a 1 by P BLACS grid;
313 * If the appropriate one-dimensional descriptor is DTYPEA=502
314 * (P by 1 type), then the two dimensional descriptor must
315 * have a CTXT value that refers to a P by 1 BLACS grid.
316 *
317 *
318 * Summary of allowed descriptors, types, and BLACS grids:
319 * DTYPE 501 502 1 1
320 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1
321 * -----------------------------------------------------
322 * A OK OK OK NO
323 * B NO OK NO OK
324 *
325 * Note that a consequence of this chart is that it is not possible
326 * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead
327 * to opposite requirements for the orientation of the BLACS grid,
328 * and as noted before, the *same* BLACS context must be used in
329 * all descriptors in a single ScaLAPACK subroutine call.
330 *
331 * Let A be a generic term for any 1D block cyclicly distributed array.
332 * Such a global array has an associated description vector DESCA.
333 * In the following comments, the character _ should be read as
334 * "of the global array".
335 *
336 * NOTATION STORED IN EXPLANATION
337 * --------------- ---------- ------------------------------------------
338 * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids,
339 * TYPE_A = 501: 1-by-P grid.
340 * TYPE_A = 502: P-by-1 grid.
341 * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating
342 * the BLACS process grid A is distribu-
343 * ted over. The context itself is glo-
344 * bal, but the handle (the integer
345 * value) may vary.
346 * N_A (global) DESCA( 3 ) The size of the array dimension being
347 * distributed.
348 * NB_A (global) DESCA( 4 ) The blocking factor used to distribute
349 * the distributed dimension of the array.
350 * SRC_A (global) DESCA( 5 ) The process row or column over which the
351 * first row or column of the array
352 * is distributed.
353 * Ignored DESCA( 6 ) Ignored for tridiagonal matrices.
354 * Reserved DESCA( 7 ) Reserved for future use.
355 *
356 *
357 *
358 * =====================================================================
359 *
360 * Code Developer: Andrew J. Cleary, University of Tennessee.
361 * Current address: Lawrence Livermore National Labs.
362 *
363 * =====================================================================
364 *
365 * .. Parameters ..
366  REAL ONE
367  parameter( one = 1.0e+0 )
368  INTEGER INT_ONE
369  parameter( int_one = 1 )
370  INTEGER DESCMULT, BIGNUM
371  parameter( descmult = 100, bignum = descmult*descmult )
372  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
373  $ lld_, mb_, m_, nb_, n_, rsrc_
374  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
375  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
376  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
377 * ..
378 * .. Local Scalars ..
379  INTEGER CSRC, FIRST_PROC, I, ICTXT, ICTXT_NEW,
380  $ ictxt_save, idum3, ja_new, llda, lldb, mycol,
381  $ myrow, my_num_cols, nb, np, npcol, nprow,
382  $ np_save, odd_size, part_offset, part_size,
383  $ return_code, store_m_b, store_n_a, temp,
384  $ work_size_min
385 * ..
386 * .. Local Arrays ..
387  INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
388  $ param_check( 14, 3 )
389 * ..
390 * .. External Subroutines ..
391  EXTERNAL blacs_gridexit, blacs_gridinfo, desc_convert,
392  $ globchk, pspttrsv, pxerbla, reshape, sscal
393 * ..
394 * .. External Functions ..
395  INTEGER NUMROC
396  EXTERNAL numroc
397 * ..
398 * .. Intrinsic Functions ..
399  INTRINSIC min, mod, real
400 * ..
401 * .. Executable Statements ..
402 *
403 * Test the input parameters
404 *
405  info = 0
406 *
407 * Convert descriptor into standard form for easy access to
408 * parameters, check that grid is of right shape.
409 *
410  desca_1xp( 1 ) = 501
411  descb_px1( 1 ) = 502
412 *
413  temp = desca( dtype_ )
414  IF( temp.EQ.502 ) THEN
415 * Temporarily set the descriptor type to 1xP type
416  desca( dtype_ ) = 501
417  END IF
418 *
419  CALL desc_convert( desca, desca_1xp, return_code )
420 *
421  desca( dtype_ ) = temp
422 *
423  IF( return_code.NE.0 ) THEN
424  info = -( 5*100+2 )
425  END IF
426 *
427  CALL desc_convert( descb, descb_px1, return_code )
428 *
429  IF( return_code.NE.0 ) THEN
430  info = -( 8*100+2 )
431  END IF
432 *
433 * Consistency checks for DESCA and DESCB.
434 *
435 * Context must be the same
436  IF( desca_1xp( 2 ).NE.descb_px1( 2 ) ) THEN
437  info = -( 8*100+2 )
438  END IF
439 *
440 * These are alignment restrictions that may or may not be removed
441 * in future releases. -Andy Cleary, April 14, 1996.
442 *
443 * Block sizes must be the same
444  IF( desca_1xp( 4 ).NE.descb_px1( 4 ) ) THEN
445  info = -( 8*100+4 )
446  END IF
447 *
448 * Source processor must be the same
449 *
450  IF( desca_1xp( 5 ).NE.descb_px1( 5 ) ) THEN
451  info = -( 8*100+5 )
452  END IF
453 *
454 * Get values out of descriptor for use in code.
455 *
456  ictxt = desca_1xp( 2 )
457  csrc = desca_1xp( 5 )
458  nb = desca_1xp( 4 )
459  llda = desca_1xp( 6 )
460  store_n_a = desca_1xp( 3 )
461  lldb = descb_px1( 6 )
462  store_m_b = descb_px1( 3 )
463 *
464 * Get grid parameters
465 *
466 *
467  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
468  np = nprow*npcol
469 *
470 *
471 *
472  IF( lwork.LT.-1 ) THEN
473  info = -12
474  ELSE IF( lwork.EQ.-1 ) THEN
475  idum3 = -1
476  ELSE
477  idum3 = 1
478  END IF
479 *
480  IF( n.LT.0 ) THEN
481  info = -1
482  END IF
483 *
484  IF( n+ja-1.GT.store_n_a ) THEN
485  info = -( 5*100+6 )
486  END IF
487 *
488  IF( n+ib-1.GT.store_m_b ) THEN
489  info = -( 8*100+3 )
490  END IF
491 *
492  IF( lldb.LT.nb ) THEN
493  info = -( 8*100+6 )
494  END IF
495 *
496  IF( nrhs.LT.0 ) THEN
497  info = -2
498  END IF
499 *
500 * Current alignment restriction
501 *
502  IF( ja.NE.ib ) THEN
503  info = -4
504  END IF
505 *
506 * Argument checking that is specific to Divide & Conquer routine
507 *
508  IF( nprow.NE.1 ) THEN
509  info = -( 5*100+2 )
510  END IF
511 *
512  IF( n.GT.np*nb-mod( ja-1, nb ) ) THEN
513  info = -( 1 )
514  CALL pxerbla( ictxt, 'PSPTTRS, D&C alg.: only 1 block per proc'
515  $ , -info )
516  RETURN
517  END IF
518 *
519  IF( ( ja+n-1.GT.nb ) .AND. ( nb.LT.2*int_one ) ) THEN
520  info = -( 5*100+4 )
521  CALL pxerbla( ictxt, 'PSPTTRS, D&C alg.: NB too small', -info )
522  RETURN
523  END IF
524 *
525 *
526  work_size_min = ( 10+2*min( 100, nrhs ) )*npcol + 4*nrhs
527 *
528  work( 1 ) = work_size_min
529 *
530  IF( lwork.LT.work_size_min ) THEN
531  IF( lwork.NE.-1 ) THEN
532  info = -12
533  CALL pxerbla( ictxt, 'PSPTTRS: worksize error', -info )
534  END IF
535  RETURN
536  END IF
537 *
538 * Pack params and positions into arrays for global consistency check
539 *
540  param_check( 14, 1 ) = descb( 5 )
541  param_check( 13, 1 ) = descb( 4 )
542  param_check( 12, 1 ) = descb( 3 )
543  param_check( 11, 1 ) = descb( 2 )
544  param_check( 10, 1 ) = descb( 1 )
545  param_check( 9, 1 ) = ib
546  param_check( 8, 1 ) = desca( 5 )
547  param_check( 7, 1 ) = desca( 4 )
548  param_check( 6, 1 ) = desca( 3 )
549  param_check( 5, 1 ) = desca( 1 )
550  param_check( 4, 1 ) = ja
551  param_check( 3, 1 ) = nrhs
552  param_check( 2, 1 ) = n
553  param_check( 1, 1 ) = idum3
554 *
555  param_check( 14, 2 ) = 905
556  param_check( 13, 2 ) = 904
557  param_check( 12, 2 ) = 903
558  param_check( 11, 2 ) = 902
559  param_check( 10, 2 ) = 901
560  param_check( 9, 2 ) = 8
561  param_check( 8, 2 ) = 505
562  param_check( 7, 2 ) = 504
563  param_check( 6, 2 ) = 503
564  param_check( 5, 2 ) = 501
565  param_check( 4, 2 ) = 4
566  param_check( 3, 2 ) = 2
567  param_check( 2, 2 ) = 1
568  param_check( 1, 2 ) = 12
569 *
570 * Want to find errors with MIN( ), so if no error, set it to a big
571 * number. If there already is an error, multiply by the the
572 * descriptor multiplier.
573 *
574  IF( info.GE.0 ) THEN
575  info = bignum
576  ELSE IF( info.LT.-descmult ) THEN
577  info = -info
578  ELSE
579  info = -info*descmult
580  END IF
581 *
582 * Check consistency across processors
583 *
584  CALL globchk( ictxt, 14, param_check, 14, param_check( 1, 3 ),
585  $ info )
586 *
587 * Prepare output: set info = 0 if no error, and divide by DESCMULT
588 * if error is not in a descriptor entry.
589 *
590  IF( info.EQ.bignum ) THEN
591  info = 0
592  ELSE IF( mod( info, descmult ).EQ.0 ) THEN
593  info = -info / descmult
594  ELSE
595  info = -info
596  END IF
597 *
598  IF( info.LT.0 ) THEN
599  CALL pxerbla( ictxt, 'PSPTTRS', -info )
600  RETURN
601  END IF
602 *
603 * Quick return if possible
604 *
605  IF( n.EQ.0 )
606  $ RETURN
607 *
608  IF( nrhs.EQ.0 )
609  $ RETURN
610 *
611 *
612 * Adjust addressing into matrix space to properly get into
613 * the beginning part of the relevant data
614 *
615  part_offset = nb*( ( ja-1 ) / ( npcol*nb ) )
616 *
617  IF( ( mycol-csrc ).LT.( ja-part_offset-1 ) / nb ) THEN
618  part_offset = part_offset + nb
619  END IF
620 *
621  IF( mycol.LT.csrc ) THEN
622  part_offset = part_offset - nb
623  END IF
624 *
625 * Form a new BLACS grid (the "standard form" grid) with only procs
626 * holding part of the matrix, of size 1xNP where NP is adjusted,
627 * starting at csrc=0, with JA modified to reflect dropped procs.
628 *
629 * First processor to hold part of the matrix:
630 *
631  first_proc = mod( ( ja-1 ) / nb+csrc, npcol )
632 *
633 * Calculate new JA one while dropping off unused processors.
634 *
635  ja_new = mod( ja-1, nb ) + 1
636 *
637 * Save and compute new value of NP
638 *
639  np_save = np
640  np = ( ja_new+n-2 ) / nb + 1
641 *
642 * Call utility routine that forms "standard-form" grid
643 *
644  CALL reshape( ictxt, int_one, ictxt_new, int_one, first_proc,
645  $ int_one, np )
646 *
647 * Use new context from standard grid as context.
648 *
649  ictxt_save = ictxt
650  ictxt = ictxt_new
651  desca_1xp( 2 ) = ictxt_new
652  descb_px1( 2 ) = ictxt_new
653 *
654 * Get information about new grid.
655 *
656  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
657 *
658 * Drop out processors that do not have part of the matrix.
659 *
660  IF( myrow.LT.0 ) THEN
661  GO TO 30
662  END IF
663 *
664 * ********************************
665 * Values reused throughout routine
666 *
667 * User-input value of partition size
668 *
669  part_size = nb
670 *
671 * Number of columns in each processor
672 *
673  my_num_cols = numroc( n, part_size, mycol, 0, npcol )
674 *
675 * Offset in columns to beginning of main partition in each proc
676 *
677  IF( mycol.EQ.0 ) THEN
678  part_offset = part_offset + mod( ja_new-1, part_size )
679  my_num_cols = my_num_cols - mod( ja_new-1, part_size )
680  END IF
681 *
682 * Size of main (or odd) partition in each processor
683 *
684  odd_size = my_num_cols
685  IF( mycol.LT.np-1 ) THEN
686  odd_size = odd_size - int_one
687  END IF
688 *
689 *
690 *
691 * Begin main code
692 *
693  info = 0
694 *
695 * Call frontsolve routine
696 *
697 *
698  CALL pspttrsv( 'L', n, nrhs, d( part_offset+1 ),
699  $ e( part_offset+1 ), ja_new, desca_1xp, b, ib,
700  $ descb_px1, af, laf, work, lwork, info )
701 *
702 * Divide by the main diagonal: B <- D^{-1} B
703 *
704 * The main partition is first
705 *
706  DO 10 i = part_offset + 1, part_offset + odd_size
707  CALL sscal( nrhs, real( one / d( i ) ), b( i ), lldb )
708  10 CONTINUE
709 *
710 * Reduced system is next
711 *
712  IF( mycol.LT.npcol-1 ) THEN
713  i = part_offset + odd_size + 1
714  CALL sscal( nrhs, one / af( odd_size+2 ), b( i ), lldb )
715  END IF
716 *
717 * Call backsolve routine
718 *
719 *
720  CALL pspttrsv( 'U', n, nrhs, d( part_offset+1 ),
721  $ e( part_offset+1 ), ja_new, desca_1xp, b, ib,
722  $ descb_px1, af, laf, work, lwork, info )
723  20 CONTINUE
724 *
725 *
726 * Free BLACS space used to hold standard-form grid.
727 *
728  IF( ictxt_save.NE.ictxt_new ) THEN
729  CALL blacs_gridexit( ictxt_new )
730  END IF
731 *
732  30 CONTINUE
733 *
734 * Restore saved input parameters
735 *
736  ictxt = ictxt_save
737  np = np_save
738 *
739 * Output minimum worksize
740 *
741  work( 1 ) = work_size_min
742 *
743 *
744  RETURN
745 *
746 * End of PSPTTRS
747 *
748  END
globchk
subroutine globchk(ICTXT, N, X, LDX, IWORK, INFO)
Definition: pchkxmat.f:403
pspttrs
subroutine pspttrs(N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, LAF, WORK, LWORK, INFO)
Definition: pspttrs.f:3
reshape
void reshape(int *context_in, int *major_in, int *context_out, int *major_out, int *first_proc, int *nprow_new, int *npcol_new)
Definition: reshape.c:77
desc_convert
subroutine desc_convert(DESC_IN, DESC_OUT, INFO)
Definition: desc_convert.f:2
pspttrsv
subroutine pspttrsv(UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, LAF, WORK, LWORK, INFO)
Definition: pspttrsv.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
min
#define min(A, B)
Definition: pcgemr.c:181