SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pzpbtrs.f
Go to the documentation of this file.
1 SUBROUTINE pzpbtrs( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB,
2 $ AF, LAF, WORK, LWORK, INFO )
3*
4*
5*
6* -- ScaLAPACK routine (version 1.7) --
7* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8* and University of California, Berkeley.
9* August 7, 2001
10*
11* .. Scalar Arguments ..
12 CHARACTER UPLO
13 INTEGER BW, IB, INFO, JA, LAF, LWORK, N, NRHS
14* ..
15* .. Array Arguments ..
16 INTEGER DESCA( * ), DESCB( * )
17 COMPLEX*16 A( * ), AF( * ), B( * ), WORK( * )
18* ..
19*
20*
21* Purpose
22* =======
23*
24* PZPBTRS solves a system of linear equations
25*
26* A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS)
27*
28* where A(1:N, JA:JA+N-1) is the matrix used to produce the factors
29* stored in A(1:N,JA:JA+N-1) and AF by PZPBTRF.
30* A(1:N, JA:JA+N-1) is an N-by-N complex
31* banded symmetric positive definite distributed
32* matrix with bandwidth BW.
33* Depending on the value of UPLO, A stores either U or L in the equn
34* A(1:N, JA:JA+N-1) = U'*U or L*L' as computed by PZPBTRF.
35*
36* Routine PZPBTRF MUST be called first.
37*
38* =====================================================================
39*
40* Arguments
41* =========
42*
43* UPLO (global input) CHARACTER
44* = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored;
45* = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored.
46*
47* N (global input) INTEGER
48* The number of rows and columns to be operated on, i.e. the
49* order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0.
50*
51* BW (global input) INTEGER
52* Number of subdiagonals in L or U. 0 <= BW <= N-1
53*
54* NRHS (global input) INTEGER
55* The number of right hand sides, i.e., the number of columns
56* of the distributed submatrix B(IB:IB+N-1, 1:NRHS).
57* NRHS >= 0.
58*
59* A (local input/local output) COMPLEX*16 pointer into
60* local memory to an array with first dimension
61* LLD_A >=(bw+1) (stored in DESCA).
62* On entry, this array contains the local pieces of the
63* N-by-N symmetric banded distributed Cholesky factor L or
64* L^T A(1:N, JA:JA+N-1).
65* This local portion is stored in the packed banded format
66* used in LAPACK. Please see the Notes below and the
67* ScaLAPACK manual for more detail on the format of
68* distributed matrices.
69*
70* JA (global input) INTEGER
71* The index in the global array A that points to the start of
72* the matrix to be operated on (which may be either all of A
73* or a submatrix of A).
74*
75* DESCA (global and local input) INTEGER array of dimension DLEN.
76* if 1D type (DTYPE_A=501), DLEN >= 7;
77* if 2D type (DTYPE_A=1), DLEN >= 9 .
78* The array descriptor for the distributed matrix A.
79* Contains information of mapping of A to memory. Please
80* see NOTES below for full description and options.
81*
82* B (local input/local output) COMPLEX*16 pointer into
83* local memory to an array of local lead dimension lld_b>=NB.
84* On entry, this array contains the
85* the local pieces of the right hand sides
86* B(IB:IB+N-1, 1:NRHS).
87* On exit, this contains the local piece of the solutions
88* distributed matrix X.
89*
90* IB (global input) INTEGER
91* The row index in the global array B that points to the first
92* row of the matrix to be operated on (which may be either
93* all of B or a submatrix of B).
94*
95* DESCB (global and local input) INTEGER array of dimension DLEN.
96* if 1D type (DTYPE_B=502), DLEN >=7;
97* if 2D type (DTYPE_B=1), DLEN >= 9.
98* The array descriptor for the distributed matrix B.
99* Contains information of mapping of B to memory. Please
100* see NOTES below for full description and options.
101*
102* AF (local output) COMPLEX*16 array, dimension LAF.
103* Auxiliary Fillin Space.
104* Fillin is created during the factorization routine
105* PZPBTRF and this is stored in AF. If a linear system
106* is to be solved using PZPBTRS after the factorization
107* routine, AF *must not be altered* after the factorization.
108*
109* LAF (local input) INTEGER
110* Size of user-input Auxiliary Fillin space AF. Must be >=
111* (NB+2*bw)*bw
112* If LAF is not large enough, an error code will be returned
113* and the minimum acceptable size will be returned in AF( 1 )
114*
115* WORK (local workspace/local output)
116* COMPLEX*16 temporary workspace. This space may
117* be overwritten in between calls to routines. WORK must be
118* the size given in LWORK.
119* On exit, WORK( 1 ) contains the minimal LWORK.
120*
121* LWORK (local input or global input) INTEGER
122* Size of user-input workspace WORK.
123* If LWORK is too small, the minimal acceptable size will be
124* returned in WORK(1) and an error code is returned. LWORK>=
125* (bw*NRHS)
126*
127* INFO (global output) INTEGER
128* = 0: successful exit
129* < 0: If the i-th argument is an array and the j-entry had
130* an illegal value, then INFO = -(i*100+j), if the i-th
131* argument is a scalar and had an illegal value, then
132* INFO = -i.
133*
134* =====================================================================
135*
136*
137* Restrictions
138* ============
139*
140* The following are restrictions on the input parameters. Some of these
141* are temporary and will be removed in future releases, while others
142* may reflect fundamental technical limitations.
143*
144* Non-cyclic restriction: VERY IMPORTANT!
145* P*NB>= mod(JA-1,NB)+N.
146* The mapping for matrices must be blocked, reflecting the nature
147* of the divide and conquer algorithm as a task-parallel algorithm.
148* This formula in words is: no processor may have more than one
149* chunk of the matrix.
150*
151* Blocksize cannot be too small:
152* If the matrix spans more than one processor, the following
153* restriction on NB, the size of each block on each processor,
154* must hold:
155* NB >= 2*BW
156* The bulk of parallel computation is done on the matrix of size
157* O(NB) on each processor. If this is too small, divide and conquer
158* is a poor choice of algorithm.
159*
160* Submatrix reference:
161* JA = IB
162* Alignment restriction that prevents unnecessary communication.
163*
164*
165* =====================================================================
166*
167*
168* Notes
169* =====
170*
171* If the factorization routine and the solve routine are to be called
172* separately (to solve various sets of righthand sides using the same
173* coefficient matrix), the auxiliary space AF *must not be altered*
174* between calls to the factorization routine and the solve routine.
175*
176* The best algorithm for solving banded and tridiagonal linear systems
177* depends on a variety of parameters, especially the bandwidth.
178* Currently, only algorithms designed for the case N/P >> bw are
179* implemented. These go by many names, including Divide and Conquer,
180* Partitioning, domain decomposition-type, etc.
181*
182* Algorithm description: Divide and Conquer
183*
184* The Divide and Conqer algorithm assumes the matrix is narrowly
185* banded compared with the number of equations. In this situation,
186* it is best to distribute the input matrix A one-dimensionally,
187* with columns atomic and rows divided amongst the processes.
188* The basic algorithm divides the banded matrix up into
189* P pieces with one stored on each processor,
190* and then proceeds in 2 phases for the factorization or 3 for the
191* solution of a linear system.
192* 1) Local Phase:
193* The individual pieces are factored independently and in
194* parallel. These factors are applied to the matrix creating
195* fillin, which is stored in a non-inspectable way in auxiliary
196* space AF. Mathematically, this is equivalent to reordering
197* the matrix A as P A P^T and then factoring the principal
198* leading submatrix of size equal to the sum of the sizes of
199* the matrices factored on each processor. The factors of
200* these submatrices overwrite the corresponding parts of A
201* in memory.
202* 2) Reduced System Phase:
203* A small (BW* (P-1)) system is formed representing
204* interaction of the larger blocks, and is stored (as are its
205* factors) in the space AF. A parallel Block Cyclic Reduction
206* algorithm is used. For a linear system, a parallel front solve
207* followed by an analagous backsolve, both using the structure
208* of the factored matrix, are performed.
209* 3) Backsubsitution Phase:
210* For a linear system, a local backsubstitution is performed on
211* each processor in parallel.
212*
213*
214* Descriptors
215* ===========
216*
217* Descriptors now have *types* and differ from ScaLAPACK 1.0.
218*
219* Note: banded codes can use either the old two dimensional
220* or new one-dimensional descriptors, though the processor grid in
221* both cases *must be one-dimensional*. We describe both types below.
222*
223* Each global data object is described by an associated description
224* vector. This vector stores the information required to establish
225* the mapping between an object element and its corresponding process
226* and memory location.
227*
228* Let A be a generic term for any 2D block cyclicly distributed array.
229* Such a global array has an associated description vector DESCA.
230* In the following comments, the character _ should be read as
231* "of the global array".
232*
233* NOTATION STORED IN EXPLANATION
234* --------------- -------------- --------------------------------------
235* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
236* DTYPE_A = 1.
237* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
238* the BLACS process grid A is distribu-
239* ted over. The context itself is glo-
240* bal, but the handle (the integer
241* value) may vary.
242* M_A (global) DESCA( M_ ) The number of rows in the global
243* array A.
244* N_A (global) DESCA( N_ ) The number of columns in the global
245* array A.
246* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
247* the rows of the array.
248* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
249* the columns of the array.
250* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
251* row of the array A is distributed.
252* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
253* first column of the array A is
254* distributed.
255* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
256* array. LLD_A >= MAX(1,LOCr(M_A)).
257*
258* Let K be the number of rows or columns of a distributed matrix,
259* and assume that its process grid has dimension p x q.
260* LOCr( K ) denotes the number of elements of K that a process
261* would receive if K were distributed over the p processes of its
262* process column.
263* Similarly, LOCc( K ) denotes the number of elements of K that a
264* process would receive if K were distributed over the q processes of
265* its process row.
266* The values of LOCr() and LOCc() may be determined via a call to the
267* ScaLAPACK tool function, NUMROC:
268* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
269* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
270* An upper bound for these quantities may be computed by:
271* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
272* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
273*
274*
275* One-dimensional descriptors:
276*
277* One-dimensional descriptors are a new addition to ScaLAPACK since
278* version 1.0. They simplify and shorten the descriptor for 1D
279* arrays.
280*
281* Since ScaLAPACK supports two-dimensional arrays as the fundamental
282* object, we allow 1D arrays to be distributed either over the
283* first dimension of the array (as if the grid were P-by-1) or the
284* 2nd dimension (as if the grid were 1-by-P). This choice is
285* indicated by the descriptor type (501 or 502)
286* as described below.
287*
288* IMPORTANT NOTE: the actual BLACS grid represented by the
289* CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P
290* irrespective of which one-dimensional descriptor type
291* (501 or 502) is input.
292* This routine will interpret the grid properly either way.
293* ScaLAPACK routines *do not support intercontext operations* so that
294* the grid passed to a single ScaLAPACK routine *must be the same*
295* for all array descriptors passed to that routine.
296*
297* NOTE: In all cases where 1D descriptors are used, 2D descriptors
298* may also be used, since a one-dimensional array is a special case
299* of a two-dimensional array with one dimension of size unity.
300* The two-dimensional array used in this case *must* be of the
301* proper orientation:
302* If the appropriate one-dimensional descriptor is DTYPEA=501
303* (1 by P type), then the two dimensional descriptor must
304* have a CTXT value that refers to a 1 by P BLACS grid;
305* If the appropriate one-dimensional descriptor is DTYPEA=502
306* (P by 1 type), then the two dimensional descriptor must
307* have a CTXT value that refers to a P by 1 BLACS grid.
308*
309*
310* Summary of allowed descriptors, types, and BLACS grids:
311* DTYPE 501 502 1 1
312* BLACS grid 1xP or Px1 1xP or Px1 1xP Px1
313* -----------------------------------------------------
314* A OK NO OK NO
315* B NO OK NO OK
316*
317* Note that a consequence of this chart is that it is not possible
318* for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead
319* to opposite requirements for the orientation of the BLACS grid,
320* and as noted before, the *same* BLACS context must be used in
321* all descriptors in a single ScaLAPACK subroutine call.
322*
323* Let A be a generic term for any 1D block cyclicly distributed array.
324* Such a global array has an associated description vector DESCA.
325* In the following comments, the character _ should be read as
326* "of the global array".
327*
328* NOTATION STORED IN EXPLANATION
329* --------------- ---------- ------------------------------------------
330* DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids,
331* TYPE_A = 501: 1-by-P grid.
332* TYPE_A = 502: P-by-1 grid.
333* CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating
334* the BLACS process grid A is distribu-
335* ted over. The context itself is glo-
336* bal, but the handle (the integer
337* value) may vary.
338* N_A (global) DESCA( 3 ) The size of the array dimension being
339* distributed.
340* NB_A (global) DESCA( 4 ) The blocking factor used to distribute
341* the distributed dimension of the array.
342* SRC_A (global) DESCA( 5 ) The process row or column over which the
343* first row or column of the array
344* is distributed.
345* LLD_A (local) DESCA( 6 ) The leading dimension of the local array
346* storing the local blocks of the distri-
347* buted array A. Minimum value of LLD_A
348* depends on TYPE_A.
349* TYPE_A = 501: LLD_A >=
350* size of undistributed dimension, 1.
351* TYPE_A = 502: LLD_A >=NB_A, 1.
352* Reserved DESCA( 7 ) Reserved for future use.
353*
354*
355*
356* =====================================================================
357*
358* Code Developer: Andrew J. Cleary, University of Tennessee.
359* Current address: Lawrence Livermore National Labs.
360* This version released: August, 2001.
361*
362* =====================================================================
363*
364* ..
365* .. Parameters ..
366 DOUBLE PRECISION ONE, ZERO
367 parameter( one = 1.0d+0 )
368 parameter( zero = 0.0d+0 )
369 COMPLEX*16 CONE, CZERO
370 parameter( cone = ( 1.0d+0, 0.0d+0 ) )
371 parameter( czero = ( 0.0d+0, 0.0d+0 ) )
372 INTEGER INT_ONE
373 parameter( int_one = 1 )
374 INTEGER DESCMULT, BIGNUM
375 parameter(descmult = 100, bignum = descmult * descmult)
376 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
377 $ lld_, mb_, m_, nb_, n_, rsrc_
378 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
379 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
380 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
381* ..
382* .. Local Scalars ..
383 INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
384 $ idum1, idum3, ja_new, llda, lldb, mycol, myrow,
385 $ nb, np, npcol, nprow, np_save, part_offset,
386 $ return_code, store_m_b, store_n_a,
387 $ work_size_min
388* ..
389* .. Local Arrays ..
390 INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
391 $ param_check( 16, 3 )
392* ..
393* .. External Subroutines ..
394 EXTERNAL blacs_gridinfo, desc_convert, globchk, pxerbla,
396* ..
397* .. External Functions ..
398 LOGICAL LSAME
399 INTEGER NUMROC
400 EXTERNAL lsame, numroc
401* ..
402* .. Intrinsic Functions ..
403 INTRINSIC ichar, min, mod
404* ..
405* .. Executable Statements ..
406*
407* Test the input parameters
408*
409 info = 0
410*
411* Convert descriptor into standard form for easy access to
412* parameters, check that grid is of right shape.
413*
414 desca_1xp( 1 ) = 501
415 descb_px1( 1 ) = 502
416*
417 CALL desc_convert( desca, desca_1xp, return_code )
418*
419 IF( return_code .NE. 0) THEN
420 info = -( 7*100 + 2 )
421 ENDIF
422*
423 CALL desc_convert( descb, descb_px1, return_code )
424*
425 IF( return_code .NE. 0) THEN
426 info = -( 10*100 + 2 )
427 ENDIF
428*
429* Consistency checks for DESCA and DESCB.
430*
431* Context must be the same
432 IF( desca_1xp( 2 ) .NE. descb_px1( 2 ) ) THEN
433 info = -( 10*100 + 2 )
434 ENDIF
435*
436* These are alignment restrictions that may or may not be removed
437* in future releases. -Andy Cleary, April 14, 1996.
438*
439* Block sizes must be the same
440 IF( desca_1xp( 4 ) .NE. descb_px1( 4 ) ) THEN
441 info = -( 10*100 + 4 )
442 ENDIF
443*
444* Source processor must be the same
445*
446 IF( desca_1xp( 5 ) .NE. descb_px1( 5 ) ) THEN
447 info = -( 10*100 + 5 )
448 ENDIF
449*
450* Get values out of descriptor for use in code.
451*
452 ictxt = desca_1xp( 2 )
453 csrc = desca_1xp( 5 )
454 nb = desca_1xp( 4 )
455 llda = desca_1xp( 6 )
456 store_n_a = desca_1xp( 3 )
457 lldb = descb_px1( 6 )
458 store_m_b = descb_px1( 3 )
459*
460* Get grid parameters
461*
462*
463 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
464 np = nprow * npcol
465*
466*
467*
468 IF( lsame( uplo, 'U' ) ) THEN
469 idum1 = ichar( 'U' )
470 ELSE IF ( lsame( uplo, 'L' ) ) THEN
471 idum1 = ichar( 'L' )
472 ELSE
473 info = -1
474 END IF
475*
476 IF( lwork .LT. -1) THEN
477 info = -14
478 ELSE IF ( lwork .EQ. -1 ) THEN
479 idum3 = -1
480 ELSE
481 idum3 = 1
482 ENDIF
483*
484 IF( n .LT. 0 ) THEN
485 info = -2
486 ENDIF
487*
488 IF( n+ja-1 .GT. store_n_a ) THEN
489 info = -( 7*100 + 6 )
490 ENDIF
491*
492 IF(( bw .GT. n-1 ) .OR.
493 $ ( bw .LT. 0 ) ) THEN
494 info = -3
495 ENDIF
496*
497 IF( llda .LT. (bw+1) ) THEN
498 info = -( 7*100 + 6 )
499 ENDIF
500*
501 IF( nb .LE. 0 ) THEN
502 info = -( 7*100 + 4 )
503 ENDIF
504*
505 IF( n+ib-1 .GT. store_m_b ) THEN
506 info = -( 10*100 + 3 )
507 ENDIF
508*
509 IF( lldb .LT. nb ) THEN
510 info = -( 10*100 + 6 )
511 ENDIF
512*
513 IF( nrhs .LT. 0 ) THEN
514 info = -3
515 ENDIF
516*
517* Current alignment restriction
518*
519 IF( ja .NE. ib) THEN
520 info = -6
521 ENDIF
522*
523* Argument checking that is specific to Divide & Conquer routine
524*
525 IF( nprow .NE. 1 ) THEN
526 info = -( 7*100+2 )
527 ENDIF
528*
529 IF( n .GT. np*nb-mod( ja-1, nb )) THEN
530 info = -( 2 )
531 CALL pxerbla( ictxt,
532 $ 'PZPBTRS, D&C alg.: only 1 block per proc',
533 $ -info )
534 RETURN
535 ENDIF
536*
537 IF((ja+n-1.GT.nb) .AND. ( nb.LT.2*bw )) THEN
538 info = -( 7*100+4 )
539 CALL pxerbla( ictxt,
540 $ 'PZPBTRS, D&C alg.: NB too small',
541 $ -info )
542 RETURN
543 ENDIF
544*
545*
546 work_size_min =
547 $ (bw*nrhs)
548*
549 work( 1 ) = work_size_min
550*
551 IF( lwork .LT. work_size_min ) THEN
552 IF( lwork .NE. -1 ) THEN
553 info = -14
554 CALL pxerbla( ictxt,
555 $ 'PZPBTRS: worksize error',
556 $ -info )
557 ENDIF
558 RETURN
559 ENDIF
560*
561* Pack params and positions into arrays for global consistency check
562*
563 param_check( 16, 1 ) = descb(5)
564 param_check( 15, 1 ) = descb(4)
565 param_check( 14, 1 ) = descb(3)
566 param_check( 13, 1 ) = descb(2)
567 param_check( 12, 1 ) = descb(1)
568 param_check( 11, 1 ) = ib
569 param_check( 10, 1 ) = desca(5)
570 param_check( 9, 1 ) = desca(4)
571 param_check( 8, 1 ) = desca(3)
572 param_check( 7, 1 ) = desca(1)
573 param_check( 6, 1 ) = ja
574 param_check( 5, 1 ) = nrhs
575 param_check( 4, 1 ) = bw
576 param_check( 3, 1 ) = n
577 param_check( 2, 1 ) = idum3
578 param_check( 1, 1 ) = idum1
579*
580 param_check( 16, 2 ) = 1005
581 param_check( 15, 2 ) = 1004
582 param_check( 14, 2 ) = 1003
583 param_check( 13, 2 ) = 1002
584 param_check( 12, 2 ) = 1001
585 param_check( 11, 2 ) = 9
586 param_check( 10, 2 ) = 705
587 param_check( 9, 2 ) = 704
588 param_check( 8, 2 ) = 703
589 param_check( 7, 2 ) = 701
590 param_check( 6, 2 ) = 6
591 param_check( 5, 2 ) = 4
592 param_check( 4, 2 ) = 3
593 param_check( 3, 2 ) = 2
594 param_check( 2, 2 ) = 14
595 param_check( 1, 2 ) = 1
596*
597* Want to find errors with MIN( ), so if no error, set it to a big
598* number. If there already is an error, multiply by the the
599* descriptor multiplier.
600*
601 IF( info.GE.0 ) THEN
602 info = bignum
603 ELSE IF( info.LT.-descmult ) THEN
604 info = -info
605 ELSE
606 info = -info * descmult
607 END IF
608*
609* Check consistency across processors
610*
611 CALL globchk( ictxt, 16, param_check, 16,
612 $ param_check( 1, 3 ), info )
613*
614* Prepare output: set info = 0 if no error, and divide by DESCMULT
615* if error is not in a descriptor entry.
616*
617 IF( info.EQ.bignum ) THEN
618 info = 0
619 ELSE IF( mod( info, descmult ) .EQ. 0 ) THEN
620 info = -info / descmult
621 ELSE
622 info = -info
623 END IF
624*
625 IF( info.LT.0 ) THEN
626 CALL pxerbla( ictxt, 'PZPBTRS', -info )
627 RETURN
628 END IF
629*
630* Quick return if possible
631*
632 IF( n.EQ.0 )
633 $ RETURN
634*
635 IF( nrhs.EQ.0 )
636 $ RETURN
637*
638*
639* Adjust addressing into matrix space to properly get into
640* the beginning part of the relevant data
641*
642 part_offset = nb*( (ja-1)/(npcol*nb) )
643*
644 IF ( (mycol-csrc) .LT. (ja-part_offset-1)/nb ) THEN
645 part_offset = part_offset + nb
646 ENDIF
647*
648 IF ( mycol .LT. csrc ) THEN
649 part_offset = part_offset - nb
650 ENDIF
651*
652* Form a new BLACS grid (the "standard form" grid) with only procs
653* holding part of the matrix, of size 1xNP where NP is adjusted,
654* starting at csrc=0, with JA modified to reflect dropped procs.
655*
656* First processor to hold part of the matrix:
657*
658 first_proc = mod( ( ja-1 )/nb+csrc, npcol )
659*
660* Calculate new JA one while dropping off unused processors.
661*
662 ja_new = mod( ja-1, nb ) + 1
663*
664* Save and compute new value of NP
665*
666 np_save = np
667 np = ( ja_new+n-2 )/nb + 1
668*
669* Call utility routine that forms "standard-form" grid
670*
671 CALL reshape( ictxt, int_one, ictxt_new, int_one,
672 $ first_proc, int_one, np )
673*
674* Use new context from standard grid as context.
675*
676 ictxt_save = ictxt
677 ictxt = ictxt_new
678 desca_1xp( 2 ) = ictxt_new
679 descb_px1( 2 ) = ictxt_new
680*
681* Get information about new grid.
682*
683 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
684*
685* Drop out processors that do not have part of the matrix.
686*
687 IF( myrow .LT. 0 ) THEN
688 GOTO 1234
689 ENDIF
690*
691*
692*
693* Begin main code
694*
695 info = 0
696*
697* Call frontsolve routine
698*
699 IF( lsame( uplo, 'L' ) ) THEN
700*
701 CALL pzpbtrsv( 'L', 'N', n, bw, nrhs, a( part_offset+1 ),
702 $ ja_new, desca_1xp, b, ib, descb_px1, af, laf,
703 $ work, lwork, info )
704*
705 ELSE
706*
707 CALL pzpbtrsv( 'U', 'C', n, bw, nrhs, a( part_offset+1 ),
708 $ ja_new, desca_1xp, b, ib, descb_px1, af, laf,
709 $ work, lwork, info )
710*
711 ENDIF
712*
713* Call backsolve routine
714*
715 IF( lsame( uplo, 'L' ) ) THEN
716*
717 CALL pzpbtrsv( 'L', 'C', n, bw, nrhs, a( part_offset+1 ),
718 $ ja_new, desca_1xp, b, ib, descb_px1, af, laf,
719 $ work, lwork, info )
720*
721 ELSE
722*
723 CALL pzpbtrsv( 'U', 'N', n, bw, nrhs, a( part_offset+1 ),
724 $ ja_new, desca_1xp, b, ib, descb_px1, af, laf,
725 $ work, lwork, info )
726*
727 ENDIF
728 1000 CONTINUE
729*
730*
731* Free BLACS space used to hold standard-form grid.
732*
733 IF( ictxt_save .NE. ictxt_new ) THEN
734 CALL blacs_gridexit( ictxt_new )
735 ENDIF
736*
737 1234 CONTINUE
738*
739* Restore saved input parameters
740*
741 ictxt = ictxt_save
742 np = np_save
743*
744* Output minimum worksize
745*
746 work( 1 ) = work_size_min
747*
748*
749 RETURN
750*
751* End of PZPBTRS
752*
753 END
subroutine desc_convert(desc_in, desc_out, info)
Definition desc_convert.f:2
#define min(A, B)
Definition pcgemr.c:181
subroutine globchk(ictxt, n, x, ldx, iwork, info)
Definition pchkxmat.f:403
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
subroutine pzpbtrs(uplo, n, bw, nrhs, a, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition pzpbtrs.f:3
subroutine pzpbtrsv(uplo, trans, n, bw, nrhs, a, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition pzpbtrsv.f:3
void reshape(Int *context_in, Int *major_in, Int *context_out, Int *major_out, Int *first_proc, Int *nprow_new, Int *npcol_new)
Definition reshape.c:80