SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
All Classes Files Functions Variables Typedefs Macros
pspbtrs.f
Go to the documentation of this file.
1 SUBROUTINE pspbtrs( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB,
2 $ AF, LAF, WORK, LWORK, INFO )
3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* April 3, 2000
8*
9* .. Scalar Arguments ..
10 CHARACTER UPLO
11 INTEGER BW, IB, INFO, JA, LAF, LWORK, N, NRHS
12* ..
13* .. Array Arguments ..
14 INTEGER DESCA( * ), DESCB( * )
15 REAL A( * ), AF( * ), B( * ), WORK( * )
16* ..
17*
18*
19* Purpose
20* =======
21*
22* PSPBTRS solves a system of linear equations
23*
24* A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS)
25*
26* where A(1:N, JA:JA+N-1) is the matrix used to produce the factors
27* stored in A(1:N,JA:JA+N-1) and AF by PSPBTRF.
28* A(1:N, JA:JA+N-1) is an N-by-N real
29* banded symmetric positive definite distributed
30* matrix with bandwidth BW.
31* Depending on the value of UPLO, A stores either U or L in the equn
32* A(1:N, JA:JA+N-1) = U'*U or L*L' as computed by PSPBTRF.
33*
34* Routine PSPBTRF MUST be called first.
35*
36* =====================================================================
37*
38* Arguments
39* =========
40*
41* UPLO (global input) CHARACTER
42* = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored;
43* = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored.
44*
45* N (global input) INTEGER
46* The number of rows and columns to be operated on, i.e. the
47* order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0.
48*
49* BW (global input) INTEGER
50* Number of subdiagonals in L or U. 0 <= BW <= N-1
51*
52* NRHS (global input) INTEGER
53* The number of right hand sides, i.e., the number of columns
54* of the distributed submatrix B(IB:IB+N-1, 1:NRHS).
55* NRHS >= 0.
56*
57* A (local input/local output) REAL pointer into
58* local memory to an array with first dimension
59* LLD_A >=(bw+1) (stored in DESCA).
60* On entry, this array contains the local pieces of the
61* N-by-N symmetric banded distributed Cholesky factor L or
62* L^T A(1:N, JA:JA+N-1).
63* This local portion is stored in the packed banded format
64* used in LAPACK. Please see the Notes below and the
65* ScaLAPACK manual for more detail on the format of
66* distributed matrices.
67*
68* JA (global input) INTEGER
69* The index in the global array A that points to the start of
70* the matrix to be operated on (which may be either all of A
71* or a submatrix of A).
72*
73* DESCA (global and local input) INTEGER array of dimension DLEN.
74* if 1D type (DTYPE_A=501), DLEN >= 7;
75* if 2D type (DTYPE_A=1), DLEN >= 9 .
76* The array descriptor for the distributed matrix A.
77* Contains information of mapping of A to memory. Please
78* see NOTES below for full description and options.
79*
80* B (local input/local output) REAL pointer into
81* local memory to an array of local lead dimension lld_b>=NB.
82* On entry, this array contains the
83* the local pieces of the right hand sides
84* B(IB:IB+N-1, 1:NRHS).
85* On exit, this contains the local piece of the solutions
86* distributed matrix X.
87*
88* IB (global input) INTEGER
89* The row index in the global array B that points to the first
90* row of the matrix to be operated on (which may be either
91* all of B or a submatrix of B).
92*
93* DESCB (global and local input) INTEGER array of dimension DLEN.
94* if 1D type (DTYPE_B=502), DLEN >=7;
95* if 2D type (DTYPE_B=1), DLEN >= 9.
96* The array descriptor for the distributed matrix B.
97* Contains information of mapping of B to memory. Please
98* see NOTES below for full description and options.
99*
100* AF (local output) REAL array, dimension LAF.
101* Auxiliary Fillin Space.
102* Fillin is created during the factorization routine
103* PSPBTRF and this is stored in AF. If a linear system
104* is to be solved using PSPBTRS after the factorization
105* routine, AF *must not be altered* after the factorization.
106*
107* LAF (local input) INTEGER
108* Size of user-input Auxiliary Fillin space AF. Must be >=
109* (NB+2*bw)*bw
110* If LAF is not large enough, an error code will be returned
111* and the minimum acceptable size will be returned in AF( 1 )
112*
113* WORK (local workspace/local output)
114* REAL temporary workspace. This space may
115* be overwritten in between calls to routines. WORK must be
116* the size given in LWORK.
117* On exit, WORK( 1 ) contains the minimal LWORK.
118*
119* LWORK (local input or global input) INTEGER
120* Size of user-input workspace WORK.
121* If LWORK is too small, the minimal acceptable size will be
122* returned in WORK(1) and an error code is returned. LWORK>=
123* (bw*NRHS)
124*
125* INFO (global output) INTEGER
126* = 0: successful exit
127* < 0: If the i-th argument is an array and the j-entry had
128* an illegal value, then INFO = -(i*100+j), if the i-th
129* argument is a scalar and had an illegal value, then
130* INFO = -i.
131*
132* =====================================================================
133*
134*
135* Restrictions
136* ============
137*
138* The following are restrictions on the input parameters. Some of these
139* are temporary and will be removed in future releases, while others
140* may reflect fundamental technical limitations.
141*
142* Non-cyclic restriction: VERY IMPORTANT!
143* P*NB>= mod(JA-1,NB)+N.
144* The mapping for matrices must be blocked, reflecting the nature
145* of the divide and conquer algorithm as a task-parallel algorithm.
146* This formula in words is: no processor may have more than one
147* chunk of the matrix.
148*
149* Blocksize cannot be too small:
150* If the matrix spans more than one processor, the following
151* restriction on NB, the size of each block on each processor,
152* must hold:
153* NB >= 2*BW
154* The bulk of parallel computation is done on the matrix of size
155* O(NB) on each processor. If this is too small, divide and conquer
156* is a poor choice of algorithm.
157*
158* Submatrix reference:
159* JA = IB
160* Alignment restriction that prevents unnecessary communication.
161*
162*
163* =====================================================================
164*
165*
166* Notes
167* =====
168*
169* If the factorization routine and the solve routine are to be called
170* separately (to solve various sets of righthand sides using the same
171* coefficient matrix), the auxiliary space AF *must not be altered*
172* between calls to the factorization routine and the solve routine.
173*
174* The best algorithm for solving banded and tridiagonal linear systems
175* depends on a variety of parameters, especially the bandwidth.
176* Currently, only algorithms designed for the case N/P >> bw are
177* implemented. These go by many names, including Divide and Conquer,
178* Partitioning, domain decomposition-type, etc.
179*
180* Algorithm description: Divide and Conquer
181*
182* The Divide and Conqer algorithm assumes the matrix is narrowly
183* banded compared with the number of equations. In this situation,
184* it is best to distribute the input matrix A one-dimensionally,
185* with columns atomic and rows divided amongst the processes.
186* The basic algorithm divides the banded matrix up into
187* P pieces with one stored on each processor,
188* and then proceeds in 2 phases for the factorization or 3 for the
189* solution of a linear system.
190* 1) Local Phase:
191* The individual pieces are factored independently and in
192* parallel. These factors are applied to the matrix creating
193* fillin, which is stored in a non-inspectable way in auxiliary
194* space AF. Mathematically, this is equivalent to reordering
195* the matrix A as P A P^T and then factoring the principal
196* leading submatrix of size equal to the sum of the sizes of
197* the matrices factored on each processor. The factors of
198* these submatrices overwrite the corresponding parts of A
199* in memory.
200* 2) Reduced System Phase:
201* A small (BW* (P-1)) system is formed representing
202* interaction of the larger blocks, and is stored (as are its
203* factors) in the space AF. A parallel Block Cyclic Reduction
204* algorithm is used. For a linear system, a parallel front solve
205* followed by an analagous backsolve, both using the structure
206* of the factored matrix, are performed.
207* 3) Backsubsitution Phase:
208* For a linear system, a local backsubstitution is performed on
209* each processor in parallel.
210*
211*
212* Descriptors
213* ===========
214*
215* Descriptors now have *types* and differ from ScaLAPACK 1.0.
216*
217* Note: banded codes can use either the old two dimensional
218* or new one-dimensional descriptors, though the processor grid in
219* both cases *must be one-dimensional*. We describe both types below.
220*
221* Each global data object is described by an associated description
222* vector. This vector stores the information required to establish
223* the mapping between an object element and its corresponding process
224* and memory location.
225*
226* Let A be a generic term for any 2D block cyclicly distributed array.
227* Such a global array has an associated description vector DESCA.
228* In the following comments, the character _ should be read as
229* "of the global array".
230*
231* NOTATION STORED IN EXPLANATION
232* --------------- -------------- --------------------------------------
233* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
234* DTYPE_A = 1.
235* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
236* the BLACS process grid A is distribu-
237* ted over. The context itself is glo-
238* bal, but the handle (the integer
239* value) may vary.
240* M_A (global) DESCA( M_ ) The number of rows in the global
241* array A.
242* N_A (global) DESCA( N_ ) The number of columns in the global
243* array A.
244* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
245* the rows of the array.
246* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
247* the columns of the array.
248* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
249* row of the array A is distributed.
250* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
251* first column of the array A is
252* distributed.
253* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
254* array. LLD_A >= MAX(1,LOCr(M_A)).
255*
256* Let K be the number of rows or columns of a distributed matrix,
257* and assume that its process grid has dimension p x q.
258* LOCr( K ) denotes the number of elements of K that a process
259* would receive if K were distributed over the p processes of its
260* process column.
261* Similarly, LOCc( K ) denotes the number of elements of K that a
262* process would receive if K were distributed over the q processes of
263* its process row.
264* The values of LOCr() and LOCc() may be determined via a call to the
265* ScaLAPACK tool function, NUMROC:
266* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
267* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
268* An upper bound for these quantities may be computed by:
269* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
270* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
271*
272*
273* One-dimensional descriptors:
274*
275* One-dimensional descriptors are a new addition to ScaLAPACK since
276* version 1.0. They simplify and shorten the descriptor for 1D
277* arrays.
278*
279* Since ScaLAPACK supports two-dimensional arrays as the fundamental
280* object, we allow 1D arrays to be distributed either over the
281* first dimension of the array (as if the grid were P-by-1) or the
282* 2nd dimension (as if the grid were 1-by-P). This choice is
283* indicated by the descriptor type (501 or 502)
284* as described below.
285*
286* IMPORTANT NOTE: the actual BLACS grid represented by the
287* CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P
288* irrespective of which one-dimensional descriptor type
289* (501 or 502) is input.
290* This routine will interpret the grid properly either way.
291* ScaLAPACK routines *do not support intercontext operations* so that
292* the grid passed to a single ScaLAPACK routine *must be the same*
293* for all array descriptors passed to that routine.
294*
295* NOTE: In all cases where 1D descriptors are used, 2D descriptors
296* may also be used, since a one-dimensional array is a special case
297* of a two-dimensional array with one dimension of size unity.
298* The two-dimensional array used in this case *must* be of the
299* proper orientation:
300* If the appropriate one-dimensional descriptor is DTYPEA=501
301* (1 by P type), then the two dimensional descriptor must
302* have a CTXT value that refers to a 1 by P BLACS grid;
303* If the appropriate one-dimensional descriptor is DTYPEA=502
304* (P by 1 type), then the two dimensional descriptor must
305* have a CTXT value that refers to a P by 1 BLACS grid.
306*
307*
308* Summary of allowed descriptors, types, and BLACS grids:
309* DTYPE 501 502 1 1
310* BLACS grid 1xP or Px1 1xP or Px1 1xP Px1
311* -----------------------------------------------------
312* A OK NO OK NO
313* B NO OK NO OK
314*
315* Note that a consequence of this chart is that it is not possible
316* for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead
317* to opposite requirements for the orientation of the BLACS grid,
318* and as noted before, the *same* BLACS context must be used in
319* all descriptors in a single ScaLAPACK subroutine call.
320*
321* Let A be a generic term for any 1D block cyclicly distributed array.
322* Such a global array has an associated description vector DESCA.
323* In the following comments, the character _ should be read as
324* "of the global array".
325*
326* NOTATION STORED IN EXPLANATION
327* --------------- ---------- ------------------------------------------
328* DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids,
329* TYPE_A = 501: 1-by-P grid.
330* TYPE_A = 502: P-by-1 grid.
331* CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating
332* the BLACS process grid A is distribu-
333* ted over. The context itself is glo-
334* bal, but the handle (the integer
335* value) may vary.
336* N_A (global) DESCA( 3 ) The size of the array dimension being
337* distributed.
338* NB_A (global) DESCA( 4 ) The blocking factor used to distribute
339* the distributed dimension of the array.
340* SRC_A (global) DESCA( 5 ) The process row or column over which the
341* first row or column of the array
342* is distributed.
343* LLD_A (local) DESCA( 6 ) The leading dimension of the local array
344* storing the local blocks of the distri-
345* buted array A. Minimum value of LLD_A
346* depends on TYPE_A.
347* TYPE_A = 501: LLD_A >=
348* size of undistributed dimension, 1.
349* TYPE_A = 502: LLD_A >=NB_A, 1.
350* Reserved DESCA( 7 ) Reserved for future use.
351*
352*
353*
354* =====================================================================
355*
356* Code Developer: Andrew J. Cleary, University of Tennessee.
357* Current address: Lawrence Livermore National Labs.
358*
359* =====================================================================
360*
361* .. Parameters ..
362 INTEGER INT_ONE
363 parameter( int_one = 1 )
364 INTEGER DESCMULT, BIGNUM
365 parameter( descmult = 100, bignum = descmult*descmult )
366 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
367 $ lld_, mb_, m_, nb_, n_, rsrc_
368 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
369 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
370 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
371* ..
372* .. Local Scalars ..
373 INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
374 $ idum1, idum3, ja_new, llda, lldb, mycol, myrow,
375 $ nb, np, npcol, nprow, np_save, part_offset,
376 $ return_code, store_m_b, store_n_a,
377 $ work_size_min
378* ..
379* .. Local Arrays ..
380 INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
381 $ param_check( 16, 3 )
382* ..
383* .. External Subroutines ..
384 EXTERNAL blacs_gridexit, blacs_gridinfo, desc_convert,
386* ..
387* .. External Functions ..
388 LOGICAL LSAME
389 EXTERNAL lsame
390* ..
391* .. Intrinsic Functions ..
392 INTRINSIC ichar, mod
393* ..
394* .. Executable Statements ..
395*
396* Test the input parameters
397*
398 info = 0
399*
400* Convert descriptor into standard form for easy access to
401* parameters, check that grid is of right shape.
402*
403 desca_1xp( 1 ) = 501
404 descb_px1( 1 ) = 502
405*
406 CALL desc_convert( desca, desca_1xp, return_code )
407*
408 IF( return_code.NE.0 ) THEN
409 info = -( 7*100+2 )
410 END IF
411*
412 CALL desc_convert( descb, descb_px1, return_code )
413*
414 IF( return_code.NE.0 ) THEN
415 info = -( 10*100+2 )
416 END IF
417*
418* Consistency checks for DESCA and DESCB.
419*
420* Context must be the same
421 IF( desca_1xp( 2 ).NE.descb_px1( 2 ) ) THEN
422 info = -( 10*100+2 )
423 END IF
424*
425* These are alignment restrictions that may or may not be removed
426* in future releases. -Andy Cleary, April 14, 1996.
427*
428* Block sizes must be the same
429 IF( desca_1xp( 4 ).NE.descb_px1( 4 ) ) THEN
430 info = -( 10*100+4 )
431 END IF
432*
433* Source processor must be the same
434*
435 IF( desca_1xp( 5 ).NE.descb_px1( 5 ) ) THEN
436 info = -( 10*100+5 )
437 END IF
438*
439* Get values out of descriptor for use in code.
440*
441 ictxt = desca_1xp( 2 )
442 csrc = desca_1xp( 5 )
443 nb = desca_1xp( 4 )
444 llda = desca_1xp( 6 )
445 store_n_a = desca_1xp( 3 )
446 lldb = descb_px1( 6 )
447 store_m_b = descb_px1( 3 )
448*
449* Get grid parameters
450*
451*
452 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
453 np = nprow*npcol
454*
455*
456*
457 IF( lsame( uplo, 'U' ) ) THEN
458 idum1 = ichar( 'U' )
459 ELSE IF( lsame( uplo, 'L' ) ) THEN
460 idum1 = ichar( 'L' )
461 ELSE
462 info = -1
463 END IF
464*
465 IF( lwork.LT.-1 ) THEN
466 info = -14
467 ELSE IF( lwork.EQ.-1 ) THEN
468 idum3 = -1
469 ELSE
470 idum3 = 1
471 END IF
472*
473 IF( n.LT.0 ) THEN
474 info = -2
475 END IF
476*
477 IF( n+ja-1.GT.store_n_a ) THEN
478 info = -( 7*100+6 )
479 END IF
480*
481 IF( ( bw.GT.n-1 ) .OR. ( bw.LT.0 ) ) THEN
482 info = -3
483 END IF
484*
485 IF( llda.LT.( bw+1 ) ) THEN
486 info = -( 7*100+6 )
487 END IF
488*
489 IF( nb.LE.0 ) THEN
490 info = -( 7*100+4 )
491 END IF
492*
493 IF( n+ib-1.GT.store_m_b ) THEN
494 info = -( 10*100+3 )
495 END IF
496*
497 IF( lldb.LT.nb ) THEN
498 info = -( 10*100+6 )
499 END IF
500*
501 IF( nrhs.LT.0 ) THEN
502 info = -3
503 END IF
504*
505* Current alignment restriction
506*
507 IF( ja.NE.ib ) THEN
508 info = -6
509 END IF
510*
511* Argument checking that is specific to Divide & Conquer routine
512*
513 IF( nprow.NE.1 ) THEN
514 info = -( 7*100+2 )
515 END IF
516*
517 IF( n.GT.np*nb-mod( ja-1, nb ) ) THEN
518 info = -( 2 )
519 CALL pxerbla( ictxt, 'PSPBTRS, D&C alg.: only 1 block per proc'
520 $ , -info )
521 RETURN
522 END IF
523*
524 IF( ( ja+n-1.GT.nb ) .AND. ( nb.LT.2*bw ) ) THEN
525 info = -( 7*100+4 )
526 CALL pxerbla( ictxt, 'PSPBTRS, D&C alg.: NB too small', -info )
527 RETURN
528 END IF
529*
530*
531 work_size_min = ( bw*nrhs )
532*
533 work( 1 ) = work_size_min
534*
535 IF( lwork.LT.work_size_min ) THEN
536 IF( lwork.NE.-1 ) THEN
537 info = -14
538 CALL pxerbla( ictxt, 'PSPBTRS: worksize error', -info )
539 END IF
540 RETURN
541 END IF
542*
543* Pack params and positions into arrays for global consistency check
544*
545 param_check( 16, 1 ) = descb( 5 )
546 param_check( 15, 1 ) = descb( 4 )
547 param_check( 14, 1 ) = descb( 3 )
548 param_check( 13, 1 ) = descb( 2 )
549 param_check( 12, 1 ) = descb( 1 )
550 param_check( 11, 1 ) = ib
551 param_check( 10, 1 ) = desca( 5 )
552 param_check( 9, 1 ) = desca( 4 )
553 param_check( 8, 1 ) = desca( 3 )
554 param_check( 7, 1 ) = desca( 1 )
555 param_check( 6, 1 ) = ja
556 param_check( 5, 1 ) = nrhs
557 param_check( 4, 1 ) = bw
558 param_check( 3, 1 ) = n
559 param_check( 2, 1 ) = idum3
560 param_check( 1, 1 ) = idum1
561*
562 param_check( 16, 2 ) = 1005
563 param_check( 15, 2 ) = 1004
564 param_check( 14, 2 ) = 1003
565 param_check( 13, 2 ) = 1002
566 param_check( 12, 2 ) = 1001
567 param_check( 11, 2 ) = 9
568 param_check( 10, 2 ) = 705
569 param_check( 9, 2 ) = 704
570 param_check( 8, 2 ) = 703
571 param_check( 7, 2 ) = 701
572 param_check( 6, 2 ) = 6
573 param_check( 5, 2 ) = 4
574 param_check( 4, 2 ) = 3
575 param_check( 3, 2 ) = 2
576 param_check( 2, 2 ) = 14
577 param_check( 1, 2 ) = 1
578*
579* Want to find errors with MIN( ), so if no error, set it to a big
580* number. If there already is an error, multiply by the the
581* descriptor multiplier.
582*
583 IF( info.GE.0 ) THEN
584 info = bignum
585 ELSE IF( info.LT.-descmult ) THEN
586 info = -info
587 ELSE
588 info = -info*descmult
589 END IF
590*
591* Check consistency across processors
592*
593 CALL globchk( ictxt, 16, param_check, 16, param_check( 1, 3 ),
594 $ info )
595*
596* Prepare output: set info = 0 if no error, and divide by DESCMULT
597* if error is not in a descriptor entry.
598*
599 IF( info.EQ.bignum ) THEN
600 info = 0
601 ELSE IF( mod( info, descmult ).EQ.0 ) THEN
602 info = -info / descmult
603 ELSE
604 info = -info
605 END IF
606*
607 IF( info.LT.0 ) THEN
608 CALL pxerbla( ictxt, 'PSPBTRS', -info )
609 RETURN
610 END IF
611*
612* Quick return if possible
613*
614 IF( n.EQ.0 )
615 $ RETURN
616*
617 IF( nrhs.EQ.0 )
618 $ RETURN
619*
620*
621* Adjust addressing into matrix space to properly get into
622* the beginning part of the relevant data
623*
624 part_offset = nb*( ( ja-1 ) / ( npcol*nb ) )
625*
626 IF( ( mycol-csrc ).LT.( ja-part_offset-1 ) / nb ) THEN
627 part_offset = part_offset + nb
628 END IF
629*
630 IF( mycol.LT.csrc ) THEN
631 part_offset = part_offset - nb
632 END IF
633*
634* Form a new BLACS grid (the "standard form" grid) with only procs
635* holding part of the matrix, of size 1xNP where NP is adjusted,
636* starting at csrc=0, with JA modified to reflect dropped procs.
637*
638* First processor to hold part of the matrix:
639*
640 first_proc = mod( ( ja-1 ) / nb+csrc, npcol )
641*
642* Calculate new JA one while dropping off unused processors.
643*
644 ja_new = mod( ja-1, nb ) + 1
645*
646* Save and compute new value of NP
647*
648 np_save = np
649 np = ( ja_new+n-2 ) / nb + 1
650*
651* Call utility routine that forms "standard-form" grid
652*
653 CALL reshape( ictxt, int_one, ictxt_new, int_one, first_proc,
654 $ int_one, np )
655*
656* Use new context from standard grid as context.
657*
658 ictxt_save = ictxt
659 ictxt = ictxt_new
660 desca_1xp( 2 ) = ictxt_new
661 descb_px1( 2 ) = ictxt_new
662*
663* Get information about new grid.
664*
665 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
666*
667* Drop out processors that do not have part of the matrix.
668*
669 IF( myrow.LT.0 ) THEN
670 GO TO 20
671 END IF
672*
673*
674*
675* Begin main code
676*
677 info = 0
678*
679* Call frontsolve routine
680*
681 IF( lsame( uplo, 'L' ) ) THEN
682*
683 CALL pspbtrsv( 'L', 'N', n, bw, nrhs, a( part_offset+1 ),
684 $ ja_new, desca_1xp, b, ib, descb_px1, af, laf,
685 $ work, lwork, info )
686*
687 ELSE
688*
689 CALL pspbtrsv( 'U', 'T', n, bw, nrhs, a( part_offset+1 ),
690 $ ja_new, desca_1xp, b, ib, descb_px1, af, laf,
691 $ work, lwork, info )
692*
693 END IF
694*
695* Call backsolve routine
696*
697 IF( lsame( uplo, 'L' ) ) THEN
698*
699 CALL pspbtrsv( 'L', 'T', n, bw, nrhs, a( part_offset+1 ),
700 $ ja_new, desca_1xp, b, ib, descb_px1, af, laf,
701 $ work, lwork, info )
702*
703 ELSE
704*
705 CALL pspbtrsv( 'U', 'N', n, bw, nrhs, a( part_offset+1 ),
706 $ ja_new, desca_1xp, b, ib, descb_px1, af, laf,
707 $ work, lwork, info )
708*
709 END IF
710 10 CONTINUE
711*
712*
713* Free BLACS space used to hold standard-form grid.
714*
715 IF( ictxt_save.NE.ictxt_new ) THEN
716 CALL blacs_gridexit( ictxt_new )
717 END IF
718*
719 20 CONTINUE
720*
721* Restore saved input parameters
722*
723 ictxt = ictxt_save
724 np = np_save
725*
726* Output minimum worksize
727*
728 work( 1 ) = work_size_min
729*
730*
731 RETURN
732*
733* End of PSPBTRS
734*
735 END
subroutine desc_convert(desc_in, desc_out, info)
Definition desc_convert.f:2
subroutine globchk(ictxt, n, x, ldx, iwork, info)
Definition pchkxmat.f:403
subroutine pspbtrs(uplo, n, bw, nrhs, a, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition pspbtrs.f:3
subroutine pspbtrsv(uplo, trans, n, bw, nrhs, a, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition pspbtrsv.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
void reshape(Int *context_in, Int *major_in, Int *context_out, Int *major_out, Int *first_proc, Int *nprow_new, Int *npcol_new)
Definition reshape.c:80