SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pzdttrs.f
Go to the documentation of this file.
1 SUBROUTINE pzdttrs( TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB,
2 $ DESCB, AF, LAF, WORK, LWORK, INFO )
3*
4*
5*
6* -- ScaLAPACK routine (version 1.7) --
7* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8* and University of California, Berkeley.
9* August 7, 2001
10*
11* .. Scalar Arguments ..
12 CHARACTER TRANS
13 INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS
14* ..
15* .. Array Arguments ..
16 INTEGER DESCA( * ), DESCB( * )
17 COMPLEX*16 AF( * ), B( * ), D( * ), DL( * ), DU( * ),
18 $ work( * )
19* ..
20*
21*
22* Purpose
23* =======
24*
25* PZDTTRS solves a system of linear equations
26*
27* A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS)
28* or
29* A(1:N, JA:JA+N-1)' * X = B(IB:IB+N-1, 1:NRHS)
30*
31* where A(1:N, JA:JA+N-1) is the matrix used to produce the factors
32* stored in A(1:N,JA:JA+N-1) and AF by PZDTTRF.
33* A(1:N, JA:JA+N-1) is an N-by-N complex
34* tridiagonal diagonally dominant-like distributed
35* matrix.
36*
37* Routine PZDTTRF MUST be called first.
38*
39* =====================================================================
40*
41* Arguments
42* =========
43*
44*
45* TRANS (global input) CHARACTER
46* = 'N': Solve with A(1:N, JA:JA+N-1);
47* = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) );
48*
49* N (global input) INTEGER
50* The number of rows and columns to be operated on, i.e. the
51* order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0.
52*
53* NRHS (global input) INTEGER
54* The number of right hand sides, i.e., the number of columns
55* of the distributed submatrix B(IB:IB+N-1, 1:NRHS).
56* NRHS >= 0.
57*
58* DL (local input/local output) COMPLEX*16 pointer to local
59* part of global vector storing the lower diagonal of the
60* matrix. Globally, DL(1) is not referenced, and DL must be
61* aligned with D.
62* Must be of size >= DESCA( NB_ ).
63* On exit, this array contains information containing the
64* factors of the matrix.
65*
66* D (local input/local output) COMPLEX*16 pointer to local
67* part of global vector storing the main diagonal of the
68* matrix.
69* On exit, this array contains information containing the
70* factors of the matrix.
71* Must be of size >= DESCA( NB_ ).
72*
73* DU (local input/local output) COMPLEX*16 pointer to local
74* part of global vector storing the upper diagonal of the
75* matrix. Globally, DU(n) is not referenced, and DU must be
76* aligned with D.
77* On exit, this array contains information containing the
78* factors of the matrix.
79* Must be of size >= DESCA( NB_ ).
80*
81* JA (global input) INTEGER
82* The index in the global array A that points to the start of
83* the matrix to be operated on (which may be either all of A
84* or a submatrix of A).
85*
86* DESCA (global and local input) INTEGER array of dimension DLEN.
87* if 1D type (DTYPE_A=501 or 502), DLEN >= 7;
88* if 2D type (DTYPE_A=1), DLEN >= 9.
89* The array descriptor for the distributed matrix A.
90* Contains information of mapping of A to memory. Please
91* see NOTES below for full description and options.
92*
93* B (local input/local output) COMPLEX*16 pointer into
94* local memory to an array of local lead dimension lld_b>=NB.
95* On entry, this array contains the
96* the local pieces of the right hand sides
97* B(IB:IB+N-1, 1:NRHS).
98* On exit, this contains the local piece of the solutions
99* distributed matrix X.
100*
101* IB (global input) INTEGER
102* The row index in the global array B that points to the first
103* row of the matrix to be operated on (which may be either
104* all of B or a submatrix of B).
105*
106* DESCB (global and local input) INTEGER array of dimension DLEN.
107* if 1D type (DTYPE_B=502), DLEN >=7;
108* if 2D type (DTYPE_B=1), DLEN >= 9.
109* The array descriptor for the distributed matrix B.
110* Contains information of mapping of B to memory. Please
111* see NOTES below for full description and options.
112*
113* AF (local output) COMPLEX*16 array, dimension LAF.
114* Auxiliary Fillin Space.
115* Fillin is created during the factorization routine
116* PZDTTRF and this is stored in AF. If a linear system
117* is to be solved using PZDTTRS after the factorization
118* routine, AF *must not be altered* after the factorization.
119*
120* LAF (local input) INTEGER
121* Size of user-input Auxiliary Fillin space AF. Must be >=
122* 2*(NB+2)
123* If LAF is not large enough, an error code will be returned
124* and the minimum acceptable size will be returned in AF( 1 )
125*
126* WORK (local workspace/local output)
127* COMPLEX*16 temporary workspace. This space may
128* be overwritten in between calls to routines. WORK must be
129* the size given in LWORK.
130* On exit, WORK( 1 ) contains the minimal LWORK.
131*
132* LWORK (local input or global input) INTEGER
133* Size of user-input workspace WORK.
134* If LWORK is too small, the minimal acceptable size will be
135* returned in WORK(1) and an error code is returned. LWORK>=
136* 10*NPCOL+4*NRHS
137*
138* INFO (local output) INTEGER
139* = 0: successful exit
140* < 0: If the i-th argument is an array and the j-entry had
141* an illegal value, then INFO = -(i*100+j), if the i-th
142* argument is a scalar and had an illegal value, then
143* INFO = -i.
144*
145* =====================================================================
146*
147*
148* Restrictions
149* ============
150*
151* The following are restrictions on the input parameters. Some of these
152* are temporary and will be removed in future releases, while others
153* may reflect fundamental technical limitations.
154*
155* Non-cyclic restriction: VERY IMPORTANT!
156* P*NB>= mod(JA-1,NB)+N.
157* The mapping for matrices must be blocked, reflecting the nature
158* of the divide and conquer algorithm as a task-parallel algorithm.
159* This formula in words is: no processor may have more than one
160* chunk of the matrix.
161*
162* Blocksize cannot be too small:
163* If the matrix spans more than one processor, the following
164* restriction on NB, the size of each block on each processor,
165* must hold:
166* NB >= 2
167* The bulk of parallel computation is done on the matrix of size
168* O(NB) on each processor. If this is too small, divide and conquer
169* is a poor choice of algorithm.
170*
171* Submatrix reference:
172* JA = IB
173* Alignment restriction that prevents unnecessary communication.
174*
175*
176* =====================================================================
177*
178*
179* Notes
180* =====
181*
182* If the factorization routine and the solve routine are to be called
183* separately (to solve various sets of righthand sides using the same
184* coefficient matrix), the auxiliary space AF *must not be altered*
185* between calls to the factorization routine and the solve routine.
186*
187* The best algorithm for solving banded and tridiagonal linear systems
188* depends on a variety of parameters, especially the bandwidth.
189* Currently, only algorithms designed for the case N/P >> bw are
190* implemented. These go by many names, including Divide and Conquer,
191* Partitioning, domain decomposition-type, etc.
192* For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C
193* algorithms are the appropriate choice.
194*
195* Algorithm description: Divide and Conquer
196*
197* The Divide and Conqer algorithm assumes the matrix is narrowly
198* banded compared with the number of equations. In this situation,
199* it is best to distribute the input matrix A one-dimensionally,
200* with columns atomic and rows divided amongst the processes.
201* The basic algorithm divides the tridiagonal matrix up into
202* P pieces with one stored on each processor,
203* and then proceeds in 2 phases for the factorization or 3 for the
204* solution of a linear system.
205* 1) Local Phase:
206* The individual pieces are factored independently and in
207* parallel. These factors are applied to the matrix creating
208* fillin, which is stored in a non-inspectable way in auxiliary
209* space AF. Mathematically, this is equivalent to reordering
210* the matrix A as P A P^T and then factoring the principal
211* leading submatrix of size equal to the sum of the sizes of
212* the matrices factored on each processor. The factors of
213* these submatrices overwrite the corresponding parts of A
214* in memory.
215* 2) Reduced System Phase:
216* A small ((P-1)) system is formed representing
217* interaction of the larger blocks, and is stored (as are its
218* factors) in the space AF. A parallel Block Cyclic Reduction
219* algorithm is used. For a linear system, a parallel front solve
220* followed by an analagous backsolve, both using the structure
221* of the factored matrix, are performed.
222* 3) Backsubsitution Phase:
223* For a linear system, a local backsubstitution is performed on
224* each processor in parallel.
225*
226*
227* Descriptors
228* ===========
229*
230* Descriptors now have *types* and differ from ScaLAPACK 1.0.
231*
232* Note: tridiagonal codes can use either the old two dimensional
233* or new one-dimensional descriptors, though the processor grid in
234* both cases *must be one-dimensional*. We describe both types below.
235*
236* Each global data object is described by an associated description
237* vector. This vector stores the information required to establish
238* the mapping between an object element and its corresponding process
239* and memory location.
240*
241* Let A be a generic term for any 2D block cyclicly distributed array.
242* Such a global array has an associated description vector DESCA.
243* In the following comments, the character _ should be read as
244* "of the global array".
245*
246* NOTATION STORED IN EXPLANATION
247* --------------- -------------- --------------------------------------
248* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
249* DTYPE_A = 1.
250* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
251* the BLACS process grid A is distribu-
252* ted over. The context itself is glo-
253* bal, but the handle (the integer
254* value) may vary.
255* M_A (global) DESCA( M_ ) The number of rows in the global
256* array A.
257* N_A (global) DESCA( N_ ) The number of columns in the global
258* array A.
259* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
260* the rows of the array.
261* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
262* the columns of the array.
263* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
264* row of the array A is distributed.
265* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
266* first column of the array A is
267* distributed.
268* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
269* array. LLD_A >= MAX(1,LOCr(M_A)).
270*
271* Let K be the number of rows or columns of a distributed matrix,
272* and assume that its process grid has dimension p x q.
273* LOCr( K ) denotes the number of elements of K that a process
274* would receive if K were distributed over the p processes of its
275* process column.
276* Similarly, LOCc( K ) denotes the number of elements of K that a
277* process would receive if K were distributed over the q processes of
278* its process row.
279* The values of LOCr() and LOCc() may be determined via a call to the
280* ScaLAPACK tool function, NUMROC:
281* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
282* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
283* An upper bound for these quantities may be computed by:
284* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
285* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
286*
287*
288* One-dimensional descriptors:
289*
290* One-dimensional descriptors are a new addition to ScaLAPACK since
291* version 1.0. They simplify and shorten the descriptor for 1D
292* arrays.
293*
294* Since ScaLAPACK supports two-dimensional arrays as the fundamental
295* object, we allow 1D arrays to be distributed either over the
296* first dimension of the array (as if the grid were P-by-1) or the
297* 2nd dimension (as if the grid were 1-by-P). This choice is
298* indicated by the descriptor type (501 or 502)
299* as described below.
300* However, for tridiagonal matrices, since the objects being
301* distributed are the individual vectors storing the diagonals, we
302* have adopted the convention that both the P-by-1 descriptor and
303* the 1-by-P descriptor are allowed and are equivalent for
304* tridiagonal matrices. Thus, for tridiagonal matrices,
305* DTYPE_A = 501 or 502 can be used interchangeably
306* without any other change.
307* We require that the distributed vectors storing the diagonals of a
308* tridiagonal matrix be aligned with each other. Because of this, a
309* single descriptor, DESCA, serves to describe the distribution of
310* of all diagonals simultaneously.
311*
312* IMPORTANT NOTE: the actual BLACS grid represented by the
313* CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P
314* irrespective of which one-dimensional descriptor type
315* (501 or 502) is input.
316* This routine will interpret the grid properly either way.
317* ScaLAPACK routines *do not support intercontext operations* so that
318* the grid passed to a single ScaLAPACK routine *must be the same*
319* for all array descriptors passed to that routine.
320*
321* NOTE: In all cases where 1D descriptors are used, 2D descriptors
322* may also be used, since a one-dimensional array is a special case
323* of a two-dimensional array with one dimension of size unity.
324* The two-dimensional array used in this case *must* be of the
325* proper orientation:
326* If the appropriate one-dimensional descriptor is DTYPEA=501
327* (1 by P type), then the two dimensional descriptor must
328* have a CTXT value that refers to a 1 by P BLACS grid;
329* If the appropriate one-dimensional descriptor is DTYPEA=502
330* (P by 1 type), then the two dimensional descriptor must
331* have a CTXT value that refers to a P by 1 BLACS grid.
332*
333*
334* Summary of allowed descriptors, types, and BLACS grids:
335* DTYPE 501 502 1 1
336* BLACS grid 1xP or Px1 1xP or Px1 1xP Px1
337* -----------------------------------------------------
338* A OK OK OK NO
339* B NO OK NO OK
340*
341* Note that a consequence of this chart is that it is not possible
342* for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead
343* to opposite requirements for the orientation of the BLACS grid,
344* and as noted before, the *same* BLACS context must be used in
345* all descriptors in a single ScaLAPACK subroutine call.
346*
347* Let A be a generic term for any 1D block cyclicly distributed array.
348* Such a global array has an associated description vector DESCA.
349* In the following comments, the character _ should be read as
350* "of the global array".
351*
352* NOTATION STORED IN EXPLANATION
353* --------------- ---------- ------------------------------------------
354* DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids,
355* TYPE_A = 501: 1-by-P grid.
356* TYPE_A = 502: P-by-1 grid.
357* CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating
358* the BLACS process grid A is distribu-
359* ted over. The context itself is glo-
360* bal, but the handle (the integer
361* value) may vary.
362* N_A (global) DESCA( 3 ) The size of the array dimension being
363* distributed.
364* NB_A (global) DESCA( 4 ) The blocking factor used to distribute
365* the distributed dimension of the array.
366* SRC_A (global) DESCA( 5 ) The process row or column over which the
367* first row or column of the array
368* is distributed.
369* Ignored DESCA( 6 ) Ignored for tridiagonal matrices.
370* Reserved DESCA( 7 ) Reserved for future use.
371*
372*
373*
374* =====================================================================
375*
376* Code Developer: Andrew J. Cleary, University of Tennessee.
377* Current address: Lawrence Livermore National Labs.
378* This version released: August, 2001.
379*
380* =====================================================================
381*
382* ..
383* .. Parameters ..
384 DOUBLE PRECISION ONE, ZERO
385 parameter( one = 1.0d+0 )
386 parameter( zero = 0.0d+0 )
387 COMPLEX*16 CONE, CZERO
388 parameter( cone = ( 1.0d+0, 0.0d+0 ) )
389 parameter( czero = ( 0.0d+0, 0.0d+0 ) )
390 INTEGER INT_ONE
391 parameter( int_one = 1 )
392 INTEGER DESCMULT, BIGNUM
393 parameter(descmult = 100, bignum = descmult * descmult)
394 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
395 $ lld_, mb_, m_, nb_, n_, rsrc_
396 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
397 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
398 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
399* ..
400* .. Local Scalars ..
401 INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
402 $ idum2, idum3, ja_new, llda, lldb, mycol, myrow,
403 $ my_num_cols, nb, np, npcol, nprow, np_save,
404 $ odd_size, part_offset, part_size,
405 $ return_code, store_m_b, store_n_a, temp,
406 $ work_size_min
407* ..
408* .. Local Arrays ..
409 INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
410 $ param_check( 15, 3 )
411* ..
412* .. External Subroutines ..
413 EXTERNAL blacs_gridinfo, desc_convert, globchk, pxerbla,
415* ..
416* .. External Functions ..
417 LOGICAL LSAME
418 INTEGER NUMROC
419 COMPLEX*16 ZDOTC
420 EXTERNAL lsame, numroc, zdotc
421* ..
422* .. Intrinsic Functions ..
423 INTRINSIC ichar, min, mod
424* ..
425* .. Executable Statements ..
426*
427* Test the input parameters
428*
429 info = 0
430*
431* Convert descriptor into standard form for easy access to
432* parameters, check that grid is of right shape.
433*
434 desca_1xp( 1 ) = 501
435 descb_px1( 1 ) = 502
436*
437 temp = desca( dtype_ )
438 IF( temp .EQ. 502 ) THEN
439* Temporarily set the descriptor type to 1xP type
440 desca( dtype_ ) = 501
441 ENDIF
442*
443 CALL desc_convert( desca, desca_1xp, return_code )
444*
445 desca( dtype_ ) = temp
446*
447 IF( return_code .NE. 0) THEN
448 info = -( 8*100 + 2 )
449 ENDIF
450*
451 CALL desc_convert( descb, descb_px1, return_code )
452*
453 IF( return_code .NE. 0) THEN
454 info = -( 11*100 + 2 )
455 ENDIF
456*
457* Consistency checks for DESCA and DESCB.
458*
459* Context must be the same
460 IF( desca_1xp( 2 ) .NE. descb_px1( 2 ) ) THEN
461 info = -( 11*100 + 2 )
462 ENDIF
463*
464* These are alignment restrictions that may or may not be removed
465* in future releases. -Andy Cleary, April 14, 1996.
466*
467* Block sizes must be the same
468 IF( desca_1xp( 4 ) .NE. descb_px1( 4 ) ) THEN
469 info = -( 11*100 + 4 )
470 ENDIF
471*
472* Source processor must be the same
473*
474 IF( desca_1xp( 5 ) .NE. descb_px1( 5 ) ) THEN
475 info = -( 11*100 + 5 )
476 ENDIF
477*
478* Get values out of descriptor for use in code.
479*
480 ictxt = desca_1xp( 2 )
481 csrc = desca_1xp( 5 )
482 nb = desca_1xp( 4 )
483 llda = desca_1xp( 6 )
484 store_n_a = desca_1xp( 3 )
485 lldb = descb_px1( 6 )
486 store_m_b = descb_px1( 3 )
487*
488* Get grid parameters
489*
490*
491 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
492 np = nprow * npcol
493*
494*
495*
496 IF( lsame( trans, 'N' ) ) THEN
497 idum2 = ichar( 'N' )
498 ELSE IF ( lsame( trans, 'C' ) ) THEN
499 idum2 = ichar( 'C' )
500 ELSE
501 info = -1
502 END IF
503*
504 IF( lwork .LT. -1) THEN
505 info = -15
506 ELSE IF ( lwork .EQ. -1 ) THEN
507 idum3 = -1
508 ELSE
509 idum3 = 1
510 ENDIF
511*
512 IF( n .LT. 0 ) THEN
513 info = -2
514 ENDIF
515*
516 IF( n+ja-1 .GT. store_n_a ) THEN
517 info = -( 8*100 + 6 )
518 ENDIF
519*
520 IF( n+ib-1 .GT. store_m_b ) THEN
521 info = -( 11*100 + 3 )
522 ENDIF
523*
524 IF( lldb .LT. nb ) THEN
525 info = -( 11*100 + 6 )
526 ENDIF
527*
528 IF( nrhs .LT. 0 ) THEN
529 info = -3
530 ENDIF
531*
532* Current alignment restriction
533*
534 IF( ja .NE. ib) THEN
535 info = -7
536 ENDIF
537*
538* Argument checking that is specific to Divide & Conquer routine
539*
540 IF( nprow .NE. 1 ) THEN
541 info = -( 8*100+2 )
542 ENDIF
543*
544 IF( n .GT. np*nb-mod( ja-1, nb )) THEN
545 info = -( 2 )
546 CALL pxerbla( ictxt,
547 $ 'PZDTTRS, D&C alg.: only 1 block per proc',
548 $ -info )
549 RETURN
550 ENDIF
551*
552 IF((ja+n-1.GT.nb) .AND. ( nb.LT.2*int_one )) THEN
553 info = -( 8*100+4 )
554 CALL pxerbla( ictxt,
555 $ 'PZDTTRS, D&C alg.: NB too small',
556 $ -info )
557 RETURN
558 ENDIF
559*
560*
561 work_size_min =
562 $ 10*npcol+4*nrhs
563*
564 work( 1 ) = work_size_min
565*
566 IF( lwork .LT. work_size_min ) THEN
567 IF( lwork .NE. -1 ) THEN
568 info = -15
569 CALL pxerbla( ictxt,
570 $ 'PZDTTRS: worksize error',
571 $ -info )
572 ENDIF
573 RETURN
574 ENDIF
575*
576* Pack params and positions into arrays for global consistency check
577*
578 param_check( 15, 1 ) = descb(5)
579 param_check( 14, 1 ) = descb(4)
580 param_check( 13, 1 ) = descb(3)
581 param_check( 12, 1 ) = descb(2)
582 param_check( 11, 1 ) = descb(1)
583 param_check( 10, 1 ) = ib
584 param_check( 9, 1 ) = desca(5)
585 param_check( 8, 1 ) = desca(4)
586 param_check( 7, 1 ) = desca(3)
587 param_check( 6, 1 ) = desca(1)
588 param_check( 5, 1 ) = ja
589 param_check( 4, 1 ) = nrhs
590 param_check( 3, 1 ) = n
591 param_check( 2, 1 ) = idum3
592 param_check( 1, 1 ) = idum2
593*
594 param_check( 15, 2 ) = 1105
595 param_check( 14, 2 ) = 1104
596 param_check( 13, 2 ) = 1103
597 param_check( 12, 2 ) = 1102
598 param_check( 11, 2 ) = 1101
599 param_check( 10, 2 ) = 10
600 param_check( 9, 2 ) = 805
601 param_check( 8, 2 ) = 804
602 param_check( 7, 2 ) = 803
603 param_check( 6, 2 ) = 801
604 param_check( 5, 2 ) = 7
605 param_check( 4, 2 ) = 3
606 param_check( 3, 2 ) = 2
607 param_check( 2, 2 ) = 15
608 param_check( 1, 2 ) = 1
609*
610* Want to find errors with MIN( ), so if no error, set it to a big
611* number. If there already is an error, multiply by the the
612* descriptor multiplier.
613*
614 IF( info.GE.0 ) THEN
615 info = bignum
616 ELSE IF( info.LT.-descmult ) THEN
617 info = -info
618 ELSE
619 info = -info * descmult
620 END IF
621*
622* Check consistency across processors
623*
624 CALL globchk( ictxt, 15, param_check, 15,
625 $ param_check( 1, 3 ), info )
626*
627* Prepare output: set info = 0 if no error, and divide by DESCMULT
628* if error is not in a descriptor entry.
629*
630 IF( info.EQ.bignum ) THEN
631 info = 0
632 ELSE IF( mod( info, descmult ) .EQ. 0 ) THEN
633 info = -info / descmult
634 ELSE
635 info = -info
636 END IF
637*
638 IF( info.LT.0 ) THEN
639 CALL pxerbla( ictxt, 'PZDTTRS', -info )
640 RETURN
641 END IF
642*
643* Quick return if possible
644*
645 IF( n.EQ.0 )
646 $ RETURN
647*
648 IF( nrhs.EQ.0 )
649 $ RETURN
650*
651*
652* Adjust addressing into matrix space to properly get into
653* the beginning part of the relevant data
654*
655 part_offset = nb*( (ja-1)/(npcol*nb) )
656*
657 IF ( (mycol-csrc) .LT. (ja-part_offset-1)/nb ) THEN
658 part_offset = part_offset + nb
659 ENDIF
660*
661 IF ( mycol .LT. csrc ) THEN
662 part_offset = part_offset - nb
663 ENDIF
664*
665* Form a new BLACS grid (the "standard form" grid) with only procs
666* holding part of the matrix, of size 1xNP where NP is adjusted,
667* starting at csrc=0, with JA modified to reflect dropped procs.
668*
669* First processor to hold part of the matrix:
670*
671 first_proc = mod( ( ja-1 )/nb+csrc, npcol )
672*
673* Calculate new JA one while dropping off unused processors.
674*
675 ja_new = mod( ja-1, nb ) + 1
676*
677* Save and compute new value of NP
678*
679 np_save = np
680 np = ( ja_new+n-2 )/nb + 1
681*
682* Call utility routine that forms "standard-form" grid
683*
684 CALL reshape( ictxt, int_one, ictxt_new, int_one,
685 $ first_proc, int_one, np )
686*
687* Use new context from standard grid as context.
688*
689 ictxt_save = ictxt
690 ictxt = ictxt_new
691 desca_1xp( 2 ) = ictxt_new
692 descb_px1( 2 ) = ictxt_new
693*
694* Get information about new grid.
695*
696 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
697*
698* Drop out processors that do not have part of the matrix.
699*
700 IF( myrow .LT. 0 ) THEN
701 GOTO 1234
702 ENDIF
703*
704* ********************************
705* Values reused throughout routine
706*
707* User-input value of partition size
708*
709 part_size = nb
710*
711* Number of columns in each processor
712*
713 my_num_cols = numroc( n, part_size, mycol, 0, npcol )
714*
715* Offset in columns to beginning of main partition in each proc
716*
717 IF ( mycol .EQ. 0 ) THEN
718 part_offset = part_offset+mod( ja_new-1, part_size )
719 my_num_cols = my_num_cols - mod(ja_new-1, part_size )
720 ENDIF
721*
722* Size of main (or odd) partition in each processor
723*
724 odd_size = my_num_cols
725 IF ( mycol .LT. np-1 ) THEN
726 odd_size = odd_size - int_one
727 ENDIF
728*
729*
730*
731* Begin main code
732*
733 info = 0
734*
735* Call frontsolve routine
736*
737 IF( lsame( trans, 'N' ) ) THEN
738*
739 CALL pzdttrsv( 'L', 'N', n, nrhs, dl( part_offset+1 ),
740 $ d( part_offset+1 ), du( part_offset+1 ), ja_new,
741 $ desca_1xp, b, ib, descb_px1, af, laf, work,
742 $ lwork, info )
743*
744 ELSE
745*
746 CALL pzdttrsv( 'U', 'C', n, nrhs, dl( part_offset+1 ),
747 $ d( part_offset+1 ), du( part_offset+1 ), ja_new,
748 $ desca_1xp, b, ib, descb_px1, af, laf, work,
749 $ lwork, info )
750*
751 ENDIF
752*
753* Call backsolve routine
754*
755 IF( lsame( trans, 'C' ) ) THEN
756*
757 CALL pzdttrsv( 'L', 'C', n, nrhs, dl( part_offset+1 ),
758 $ d( part_offset+1 ), du( part_offset+1 ), ja_new,
759 $ desca_1xp, b, ib, descb_px1, af, laf, work,
760 $ lwork, info )
761*
762 ELSE
763*
764 CALL pzdttrsv( 'U', 'N', n, nrhs, dl( part_offset+1 ),
765 $ d( part_offset+1 ), du( part_offset+1 ), ja_new,
766 $ desca_1xp, b, ib, descb_px1, af, laf, work,
767 $ lwork, info )
768*
769 ENDIF
770 1000 CONTINUE
771*
772*
773* Free BLACS space used to hold standard-form grid.
774*
775 IF( ictxt_save .NE. ictxt_new ) THEN
776 CALL blacs_gridexit( ictxt_new )
777 ENDIF
778*
779 1234 CONTINUE
780*
781* Restore saved input parameters
782*
783 ictxt = ictxt_save
784 np = np_save
785*
786* Output minimum worksize
787*
788 work( 1 ) = work_size_min
789*
790*
791 RETURN
792*
793* End of PZDTTRS
794*
795 END
subroutine desc_convert(desc_in, desc_out, info)
Definition desc_convert.f:2
#define min(A, B)
Definition pcgemr.c:181
subroutine globchk(ictxt, n, x, ldx, iwork, info)
Definition pchkxmat.f:403
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
subroutine pzdttrs(trans, n, nrhs, dl, d, du, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition pzdttrs.f:3
subroutine pzdttrsv(uplo, trans, n, nrhs, dl, d, du, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition pzdttrsv.f:3
void reshape(Int *context_in, Int *major_in, Int *context_out, Int *major_out, Int *first_proc, Int *nprow_new, Int *npcol_new)
Definition reshape.c:80