SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
psdbtrs.f
Go to the documentation of this file.
1 SUBROUTINE psdbtrs( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB,
2 $ DESCB, AF, LAF, WORK, LWORK, INFO )
3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* April 3, 2000
8*
9* .. Scalar Arguments ..
10 CHARACTER TRANS
11 INTEGER BWL, BWU, IB, INFO, JA, LAF, LWORK, N, NRHS
12* ..
13* .. Array Arguments ..
14 INTEGER DESCA( * ), DESCB( * )
15 REAL A( * ), AF( * ), B( * ), WORK( * )
16* ..
17*
18*
19* Purpose
20* =======
21*
22* PSDBTRS solves a system of linear equations
23*
24* A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS)
25* or
26* A(1:N, JA:JA+N-1)' * X = B(IB:IB+N-1, 1:NRHS)
27*
28* where A(1:N, JA:JA+N-1) is the matrix used to produce the factors
29* stored in A(1:N,JA:JA+N-1) and AF by PSDBTRF.
30* A(1:N, JA:JA+N-1) is an N-by-N real
31* banded diagonally dominant-like distributed
32* matrix with bandwidth BWL, BWU.
33*
34* Routine PSDBTRF MUST be called first.
35*
36* =====================================================================
37*
38* Arguments
39* =========
40*
41*
42* TRANS (global input) CHARACTER
43* = 'N': Solve with A(1:N, JA:JA+N-1);
44* = 'T' or 'C': Solve with A(1:N, JA:JA+N-1)^T;
45*
46* N (global input) INTEGER
47* The number of rows and columns to be operated on, i.e. the
48* order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0.
49*
50* BWL (global input) INTEGER
51* Number of subdiagonals. 0 <= BWL <= N-1
52*
53* BWU (global input) INTEGER
54* Number of superdiagonals. 0 <= BWU <= N-1
55*
56* NRHS (global input) INTEGER
57* The number of right hand sides, i.e., the number of columns
58* of the distributed submatrix B(IB:IB+N-1, 1:NRHS).
59* NRHS >= 0.
60*
61* A (local input/local output) REAL pointer into
62* local memory to an array with first dimension
63* LLD_A >=(bwl+bwu+1) (stored in DESCA).
64* On entry, this array contains the local pieces of the
65* N-by-N unsymmetric banded distributed Cholesky factor L or
66* L^T A(1:N, JA:JA+N-1).
67* This local portion is stored in the packed banded format
68* used in LAPACK. Please see the Notes below and the
69* ScaLAPACK manual for more detail on the format of
70* distributed matrices.
71*
72* JA (global input) INTEGER
73* The index in the global array A that points to the start of
74* the matrix to be operated on (which may be either all of A
75* or a submatrix of A).
76*
77* DESCA (global and local input) INTEGER array of dimension DLEN.
78* if 1D type (DTYPE_A=501), DLEN >= 7;
79* if 2D type (DTYPE_A=1), DLEN >= 9 .
80* The array descriptor for the distributed matrix A.
81* Contains information of mapping of A to memory. Please
82* see NOTES below for full description and options.
83*
84* B (local input/local output) REAL pointer into
85* local memory to an array of local lead dimension lld_b>=NB.
86* On entry, this array contains the
87* the local pieces of the right hand sides
88* B(IB:IB+N-1, 1:NRHS).
89* On exit, this contains the local piece of the solutions
90* distributed matrix X.
91*
92* IB (global input) INTEGER
93* The row index in the global array B that points to the first
94* row of the matrix to be operated on (which may be either
95* all of B or a submatrix of B).
96*
97* DESCB (global and local input) INTEGER array of dimension DLEN.
98* if 1D type (DTYPE_B=502), DLEN >=7;
99* if 2D type (DTYPE_B=1), DLEN >= 9.
100* The array descriptor for the distributed matrix B.
101* Contains information of mapping of B to memory. Please
102* see NOTES below for full description and options.
103*
104* AF (local output) REAL array, dimension LAF.
105* Auxiliary Fillin Space.
106* Fillin is created during the factorization routine
107* PSDBTRF and this is stored in AF. If a linear system
108* is to be solved using PSDBTRS after the factorization
109* routine, AF *must not be altered* after the factorization.
110*
111* LAF (local input) INTEGER
112* Size of user-input Auxiliary Fillin space AF. Must be >=
113* NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu)
114* If LAF is not large enough, an error code will be returned
115* and the minimum acceptable size will be returned in AF( 1 )
116*
117* WORK (local workspace/local output)
118* REAL temporary workspace. This space may
119* be overwritten in between calls to routines. WORK must be
120* the size given in LWORK.
121* On exit, WORK( 1 ) contains the minimal LWORK.
122*
123* LWORK (local input or global input) INTEGER
124* Size of user-input workspace WORK.
125* If LWORK is too small, the minimal acceptable size will be
126* returned in WORK(1) and an error code is returned. LWORK>=
127* (max(bwl,bwu)*NRHS)
128*
129* INFO (global output) INTEGER
130* = 0: successful exit
131* < 0: If the i-th argument is an array and the j-entry had
132* an illegal value, then INFO = -(i*100+j), if the i-th
133* argument is a scalar and had an illegal value, then
134* INFO = -i.
135*
136* =====================================================================
137*
138*
139* Restrictions
140* ============
141*
142* The following are restrictions on the input parameters. Some of these
143* are temporary and will be removed in future releases, while others
144* may reflect fundamental technical limitations.
145*
146* Non-cyclic restriction: VERY IMPORTANT!
147* P*NB>= mod(JA-1,NB)+N.
148* The mapping for matrices must be blocked, reflecting the nature
149* of the divide and conquer algorithm as a task-parallel algorithm.
150* This formula in words is: no processor may have more than one
151* chunk of the matrix.
152*
153* Blocksize cannot be too small:
154* If the matrix spans more than one processor, the following
155* restriction on NB, the size of each block on each processor,
156* must hold:
157* NB >= 2*MAX(BWL,BWU)
158* The bulk of parallel computation is done on the matrix of size
159* O(NB) on each processor. If this is too small, divide and conquer
160* is a poor choice of algorithm.
161*
162* Submatrix reference:
163* JA = IB
164* Alignment restriction that prevents unnecessary communication.
165*
166*
167* =====================================================================
168*
169*
170* Notes
171* =====
172*
173* If the factorization routine and the solve routine are to be called
174* separately (to solve various sets of righthand sides using the same
175* coefficient matrix), the auxiliary space AF *must not be altered*
176* between calls to the factorization routine and the solve routine.
177*
178* The best algorithm for solving banded and tridiagonal linear systems
179* depends on a variety of parameters, especially the bandwidth.
180* Currently, only algorithms designed for the case N/P >> bw are
181* implemented. These go by many names, including Divide and Conquer,
182* Partitioning, domain decomposition-type, etc.
183*
184* Algorithm description: Divide and Conquer
185*
186* The Divide and Conqer algorithm assumes the matrix is narrowly
187* banded compared with the number of equations. In this situation,
188* it is best to distribute the input matrix A one-dimensionally,
189* with columns atomic and rows divided amongst the processes.
190* The basic algorithm divides the banded matrix up into
191* P pieces with one stored on each processor,
192* and then proceeds in 2 phases for the factorization or 3 for the
193* solution of a linear system.
194* 1) Local Phase:
195* The individual pieces are factored independently and in
196* parallel. These factors are applied to the matrix creating
197* fillin, which is stored in a non-inspectable way in auxiliary
198* space AF. Mathematically, this is equivalent to reordering
199* the matrix A as P A P^T and then factoring the principal
200* leading submatrix of size equal to the sum of the sizes of
201* the matrices factored on each processor. The factors of
202* these submatrices overwrite the corresponding parts of A
203* in memory.
204* 2) Reduced System Phase:
205* A small (max(bwl,bwu)* (P-1)) system is formed representing
206* interaction of the larger blocks, and is stored (as are its
207* factors) in the space AF. A parallel Block Cyclic Reduction
208* algorithm is used. For a linear system, a parallel front solve
209* followed by an analagous backsolve, both using the structure
210* of the factored matrix, are performed.
211* 3) Backsubsitution Phase:
212* For a linear system, a local backsubstitution is performed on
213* each processor in parallel.
214*
215*
216* Descriptors
217* ===========
218*
219* Descriptors now have *types* and differ from ScaLAPACK 1.0.
220*
221* Note: banded codes can use either the old two dimensional
222* or new one-dimensional descriptors, though the processor grid in
223* both cases *must be one-dimensional*. We describe both types below.
224*
225* Each global data object is described by an associated description
226* vector. This vector stores the information required to establish
227* the mapping between an object element and its corresponding process
228* and memory location.
229*
230* Let A be a generic term for any 2D block cyclicly distributed array.
231* Such a global array has an associated description vector DESCA.
232* In the following comments, the character _ should be read as
233* "of the global array".
234*
235* NOTATION STORED IN EXPLANATION
236* --------------- -------------- --------------------------------------
237* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
238* DTYPE_A = 1.
239* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
240* the BLACS process grid A is distribu-
241* ted over. The context itself is glo-
242* bal, but the handle (the integer
243* value) may vary.
244* M_A (global) DESCA( M_ ) The number of rows in the global
245* array A.
246* N_A (global) DESCA( N_ ) The number of columns in the global
247* array A.
248* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
249* the rows of the array.
250* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
251* the columns of the array.
252* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
253* row of the array A is distributed.
254* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
255* first column of the array A is
256* distributed.
257* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
258* array. LLD_A >= MAX(1,LOCr(M_A)).
259*
260* Let K be the number of rows or columns of a distributed matrix,
261* and assume that its process grid has dimension p x q.
262* LOCr( K ) denotes the number of elements of K that a process
263* would receive if K were distributed over the p processes of its
264* process column.
265* Similarly, LOCc( K ) denotes the number of elements of K that a
266* process would receive if K were distributed over the q processes of
267* its process row.
268* The values of LOCr() and LOCc() may be determined via a call to the
269* ScaLAPACK tool function, NUMROC:
270* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
271* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
272* An upper bound for these quantities may be computed by:
273* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
274* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
275*
276*
277* One-dimensional descriptors:
278*
279* One-dimensional descriptors are a new addition to ScaLAPACK since
280* version 1.0. They simplify and shorten the descriptor for 1D
281* arrays.
282*
283* Since ScaLAPACK supports two-dimensional arrays as the fundamental
284* object, we allow 1D arrays to be distributed either over the
285* first dimension of the array (as if the grid were P-by-1) or the
286* 2nd dimension (as if the grid were 1-by-P). This choice is
287* indicated by the descriptor type (501 or 502)
288* as described below.
289*
290* IMPORTANT NOTE: the actual BLACS grid represented by the
291* CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P
292* irrespective of which one-dimensional descriptor type
293* (501 or 502) is input.
294* This routine will interpret the grid properly either way.
295* ScaLAPACK routines *do not support intercontext operations* so that
296* the grid passed to a single ScaLAPACK routine *must be the same*
297* for all array descriptors passed to that routine.
298*
299* NOTE: In all cases where 1D descriptors are used, 2D descriptors
300* may also be used, since a one-dimensional array is a special case
301* of a two-dimensional array with one dimension of size unity.
302* The two-dimensional array used in this case *must* be of the
303* proper orientation:
304* If the appropriate one-dimensional descriptor is DTYPEA=501
305* (1 by P type), then the two dimensional descriptor must
306* have a CTXT value that refers to a 1 by P BLACS grid;
307* If the appropriate one-dimensional descriptor is DTYPEA=502
308* (P by 1 type), then the two dimensional descriptor must
309* have a CTXT value that refers to a P by 1 BLACS grid.
310*
311*
312* Summary of allowed descriptors, types, and BLACS grids:
313* DTYPE 501 502 1 1
314* BLACS grid 1xP or Px1 1xP or Px1 1xP Px1
315* -----------------------------------------------------
316* A OK NO OK NO
317* B NO OK NO OK
318*
319* Note that a consequence of this chart is that it is not possible
320* for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead
321* to opposite requirements for the orientation of the BLACS grid,
322* and as noted before, the *same* BLACS context must be used in
323* all descriptors in a single ScaLAPACK subroutine call.
324*
325* Let A be a generic term for any 1D block cyclicly distributed array.
326* Such a global array has an associated description vector DESCA.
327* In the following comments, the character _ should be read as
328* "of the global array".
329*
330* NOTATION STORED IN EXPLANATION
331* --------------- ---------- ------------------------------------------
332* DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids,
333* TYPE_A = 501: 1-by-P grid.
334* TYPE_A = 502: P-by-1 grid.
335* CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating
336* the BLACS process grid A is distribu-
337* ted over. The context itself is glo-
338* bal, but the handle (the integer
339* value) may vary.
340* N_A (global) DESCA( 3 ) The size of the array dimension being
341* distributed.
342* NB_A (global) DESCA( 4 ) The blocking factor used to distribute
343* the distributed dimension of the array.
344* SRC_A (global) DESCA( 5 ) The process row or column over which the
345* first row or column of the array
346* is distributed.
347* LLD_A (local) DESCA( 6 ) The leading dimension of the local array
348* storing the local blocks of the distri-
349* buted array A. Minimum value of LLD_A
350* depends on TYPE_A.
351* TYPE_A = 501: LLD_A >=
352* size of undistributed dimension, 1.
353* TYPE_A = 502: LLD_A >=NB_A, 1.
354* Reserved DESCA( 7 ) Reserved for future use.
355*
356*
357*
358* =====================================================================
359*
360* Code Developer: Andrew J. Cleary, University of Tennessee.
361* Current address: Lawrence Livermore National Labs.
362*
363* =====================================================================
364*
365* .. Parameters ..
366 INTEGER INT_ONE
367 parameter( int_one = 1 )
368 INTEGER DESCMULT, BIGNUM
369 parameter( descmult = 100, bignum = descmult*descmult )
370 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
371 $ lld_, mb_, m_, nb_, n_, rsrc_
372 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
373 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
374 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
375* ..
376* .. Local Scalars ..
377 INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
378 $ idum2, idum3, ja_new, llda, lldb, mycol, myrow,
379 $ nb, np, npcol, nprow, np_save, part_offset,
380 $ return_code, store_m_b, store_n_a,
381 $ work_size_min
382* ..
383* .. Local Arrays ..
384 INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
385 $ param_check( 17, 3 )
386* ..
387* .. External Subroutines ..
388 EXTERNAL blacs_gridexit, blacs_gridinfo, desc_convert,
390* ..
391* .. External Functions ..
392 LOGICAL LSAME
393 EXTERNAL lsame
394* ..
395* .. Intrinsic Functions ..
396 INTRINSIC ichar, max, mod
397* ..
398* .. Executable Statements ..
399*
400* Test the input parameters
401*
402 info = 0
403*
404* Convert descriptor into standard form for easy access to
405* parameters, check that grid is of right shape.
406*
407 desca_1xp( 1 ) = 501
408 descb_px1( 1 ) = 502
409*
410 CALL desc_convert( desca, desca_1xp, return_code )
411*
412 IF( return_code.NE.0 ) THEN
413 info = -( 8*100+2 )
414 END IF
415*
416 CALL desc_convert( descb, descb_px1, return_code )
417*
418 IF( return_code.NE.0 ) THEN
419 info = -( 11*100+2 )
420 END IF
421*
422* Consistency checks for DESCA and DESCB.
423*
424* Context must be the same
425 IF( desca_1xp( 2 ).NE.descb_px1( 2 ) ) THEN
426 info = -( 11*100+2 )
427 END IF
428*
429* These are alignment restrictions that may or may not be removed
430* in future releases. -Andy Cleary, April 14, 1996.
431*
432* Block sizes must be the same
433 IF( desca_1xp( 4 ).NE.descb_px1( 4 ) ) THEN
434 info = -( 11*100+4 )
435 END IF
436*
437* Source processor must be the same
438*
439 IF( desca_1xp( 5 ).NE.descb_px1( 5 ) ) THEN
440 info = -( 11*100+5 )
441 END IF
442*
443* Get values out of descriptor for use in code.
444*
445 ictxt = desca_1xp( 2 )
446 csrc = desca_1xp( 5 )
447 nb = desca_1xp( 4 )
448 llda = desca_1xp( 6 )
449 store_n_a = desca_1xp( 3 )
450 lldb = descb_px1( 6 )
451 store_m_b = descb_px1( 3 )
452*
453* Get grid parameters
454*
455*
456 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
457 np = nprow*npcol
458*
459*
460*
461 IF( lsame( trans, 'N' ) ) THEN
462 idum2 = ichar( 'N' )
463 ELSE IF( lsame( trans, 'T' ) ) THEN
464 idum2 = ichar( 'T' )
465 ELSE IF( lsame( trans, 'C' ) ) THEN
466 idum2 = ichar( 'T' )
467 ELSE
468 info = -1
469 END IF
470*
471 IF( lwork.LT.-1 ) THEN
472 info = -15
473 ELSE IF( lwork.EQ.-1 ) THEN
474 idum3 = -1
475 ELSE
476 idum3 = 1
477 END IF
478*
479 IF( n.LT.0 ) THEN
480 info = -2
481 END IF
482*
483 IF( n+ja-1.GT.store_n_a ) THEN
484 info = -( 8*100+6 )
485 END IF
486*
487 IF( ( bwl.GT.n-1 ) .OR. ( bwl.LT.0 ) ) THEN
488 info = -3
489 END IF
490*
491 IF( ( bwu.GT.n-1 ) .OR. ( bwu.LT.0 ) ) THEN
492 info = -4
493 END IF
494*
495 IF( llda.LT.( bwl+bwu+1 ) ) THEN
496 info = -( 8*100+6 )
497 END IF
498*
499 IF( nb.LE.0 ) THEN
500 info = -( 8*100+4 )
501 END IF
502*
503 IF( n+ib-1.GT.store_m_b ) THEN
504 info = -( 11*100+3 )
505 END IF
506*
507 IF( lldb.LT.nb ) THEN
508 info = -( 11*100+6 )
509 END IF
510*
511 IF( nrhs.LT.0 ) THEN
512 info = -5
513 END IF
514*
515* Current alignment restriction
516*
517 IF( ja.NE.ib ) THEN
518 info = -7
519 END IF
520*
521* Argument checking that is specific to Divide & Conquer routine
522*
523 IF( nprow.NE.1 ) THEN
524 info = -( 8*100+2 )
525 END IF
526*
527 IF( n.GT.np*nb-mod( ja-1, nb ) ) THEN
528 info = -( 2 )
529 CALL pxerbla( ictxt, 'PSDBTRS, D&C alg.: only 1 block per proc'
530 $ , -info )
531 RETURN
532 END IF
533*
534 IF( ( ja+n-1.GT.nb ) .AND. ( nb.LT.2*max( bwl, bwu ) ) ) THEN
535 info = -( 8*100+4 )
536 CALL pxerbla( ictxt, 'PSDBTRS, D&C alg.: NB too small', -info )
537 RETURN
538 END IF
539*
540*
541 work_size_min = ( max( bwl, bwu )*nrhs )
542*
543 work( 1 ) = work_size_min
544*
545 IF( lwork.LT.work_size_min ) THEN
546 IF( lwork.NE.-1 ) THEN
547 info = -15
548 CALL pxerbla( ictxt, 'PSDBTRS: worksize error', -info )
549 END IF
550 RETURN
551 END IF
552*
553* Pack params and positions into arrays for global consistency check
554*
555 param_check( 17, 1 ) = descb( 5 )
556 param_check( 16, 1 ) = descb( 4 )
557 param_check( 15, 1 ) = descb( 3 )
558 param_check( 14, 1 ) = descb( 2 )
559 param_check( 13, 1 ) = descb( 1 )
560 param_check( 12, 1 ) = ib
561 param_check( 11, 1 ) = desca( 5 )
562 param_check( 10, 1 ) = desca( 4 )
563 param_check( 9, 1 ) = desca( 3 )
564 param_check( 8, 1 ) = desca( 1 )
565 param_check( 7, 1 ) = ja
566 param_check( 6, 1 ) = nrhs
567 param_check( 5, 1 ) = bwu
568 param_check( 4, 1 ) = bwl
569 param_check( 3, 1 ) = n
570 param_check( 2, 1 ) = idum3
571 param_check( 1, 1 ) = idum2
572*
573 param_check( 17, 2 ) = 1105
574 param_check( 16, 2 ) = 1104
575 param_check( 15, 2 ) = 1103
576 param_check( 14, 2 ) = 1102
577 param_check( 13, 2 ) = 1101
578 param_check( 12, 2 ) = 10
579 param_check( 11, 2 ) = 805
580 param_check( 10, 2 ) = 804
581 param_check( 9, 2 ) = 803
582 param_check( 8, 2 ) = 801
583 param_check( 7, 2 ) = 7
584 param_check( 6, 2 ) = 5
585 param_check( 5, 2 ) = 4
586 param_check( 4, 2 ) = 3
587 param_check( 3, 2 ) = 2
588 param_check( 2, 2 ) = 15
589 param_check( 1, 2 ) = 1
590*
591* Want to find errors with MIN( ), so if no error, set it to a big
592* number. If there already is an error, multiply by the the
593* descriptor multiplier.
594*
595 IF( info.GE.0 ) THEN
596 info = bignum
597 ELSE IF( info.LT.-descmult ) THEN
598 info = -info
599 ELSE
600 info = -info*descmult
601 END IF
602*
603* Check consistency across processors
604*
605 CALL globchk( ictxt, 17, param_check, 17, param_check( 1, 3 ),
606 $ info )
607*
608* Prepare output: set info = 0 if no error, and divide by DESCMULT
609* if error is not in a descriptor entry.
610*
611 IF( info.EQ.bignum ) THEN
612 info = 0
613 ELSE IF( mod( info, descmult ).EQ.0 ) THEN
614 info = -info / descmult
615 ELSE
616 info = -info
617 END IF
618*
619 IF( info.LT.0 ) THEN
620 CALL pxerbla( ictxt, 'PSDBTRS', -info )
621 RETURN
622 END IF
623*
624* Quick return if possible
625*
626 IF( n.EQ.0 )
627 $ RETURN
628*
629 IF( nrhs.EQ.0 )
630 $ RETURN
631*
632*
633* Adjust addressing into matrix space to properly get into
634* the beginning part of the relevant data
635*
636 part_offset = nb*( ( ja-1 ) / ( npcol*nb ) )
637*
638 IF( ( mycol-csrc ).LT.( ja-part_offset-1 ) / nb ) THEN
639 part_offset = part_offset + nb
640 END IF
641*
642 IF( mycol.LT.csrc ) THEN
643 part_offset = part_offset - nb
644 END IF
645*
646* Form a new BLACS grid (the "standard form" grid) with only procs
647* holding part of the matrix, of size 1xNP where NP is adjusted,
648* starting at csrc=0, with JA modified to reflect dropped procs.
649*
650* First processor to hold part of the matrix:
651*
652 first_proc = mod( ( ja-1 ) / nb+csrc, npcol )
653*
654* Calculate new JA one while dropping off unused processors.
655*
656 ja_new = mod( ja-1, nb ) + 1
657*
658* Save and compute new value of NP
659*
660 np_save = np
661 np = ( ja_new+n-2 ) / nb + 1
662*
663* Call utility routine that forms "standard-form" grid
664*
665 CALL reshape( ictxt, int_one, ictxt_new, int_one, first_proc,
666 $ int_one, np )
667*
668* Use new context from standard grid as context.
669*
670 ictxt_save = ictxt
671 ictxt = ictxt_new
672 desca_1xp( 2 ) = ictxt_new
673 descb_px1( 2 ) = ictxt_new
674*
675* Get information about new grid.
676*
677 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
678*
679* Drop out processors that do not have part of the matrix.
680*
681 IF( myrow.LT.0 ) THEN
682 GO TO 20
683 END IF
684*
685*
686*
687* Begin main code
688*
689 info = 0
690*
691* Call frontsolve routine
692*
693 IF( lsame( trans, 'N' ) ) THEN
694*
695 CALL psdbtrsv( 'L', 'N', n, bwl, bwu, nrhs, a( part_offset+1 ),
696 $ ja_new, desca_1xp, b, ib, descb_px1, af, laf,
697 $ work, lwork, info )
698*
699 ELSE
700*
701 CALL psdbtrsv( 'U', 'T', n, bwl, bwu, nrhs, a( part_offset+1 ),
702 $ ja_new, desca_1xp, b, ib, descb_px1, af, laf,
703 $ work, lwork, info )
704*
705 END IF
706*
707* Call backsolve routine
708*
709 IF( ( lsame( trans, 'C' ) ) .OR. ( lsame( trans, 'T' ) ) ) THEN
710*
711 CALL psdbtrsv( 'L', 'T', n, bwl, bwu, nrhs, a( part_offset+1 ),
712 $ ja_new, desca_1xp, b, ib, descb_px1, af, laf,
713 $ work, lwork, info )
714*
715 ELSE
716*
717 CALL psdbtrsv( 'U', 'N', n, bwl, bwu, nrhs, a( part_offset+1 ),
718 $ ja_new, desca_1xp, b, ib, descb_px1, af, laf,
719 $ work, lwork, info )
720*
721 END IF
722 10 CONTINUE
723*
724*
725* Free BLACS space used to hold standard-form grid.
726*
727 IF( ictxt_save.NE.ictxt_new ) THEN
728 CALL blacs_gridexit( ictxt_new )
729 END IF
730*
731 20 CONTINUE
732*
733* Restore saved input parameters
734*
735 ictxt = ictxt_save
736 np = np_save
737*
738* Output minimum worksize
739*
740 work( 1 ) = work_size_min
741*
742*
743 RETURN
744*
745* End of PSDBTRS
746*
747 END
subroutine desc_convert(desc_in, desc_out, info)
Definition desc_convert.f:2
#define max(A, B)
Definition pcgemr.c:180
subroutine globchk(ictxt, n, x, ldx, iwork, info)
Definition pchkxmat.f:403
subroutine psdbtrs(trans, n, bwl, bwu, nrhs, a, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition psdbtrs.f:3
subroutine psdbtrsv(uplo, trans, n, bwl, bwu, nrhs, a, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition psdbtrsv.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
void reshape(Int *context_in, Int *major_in, Int *context_out, Int *major_out, Int *first_proc, Int *nprow_new, Int *npcol_new)
Definition reshape.c:80