SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pspttrs.f
Go to the documentation of this file.
1 SUBROUTINE pspttrs( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF,
2 $ LAF, WORK, LWORK, INFO )
3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* April 3, 2000
8*
9* .. Scalar Arguments ..
10 INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * ), DESCB( * )
14 REAL AF( * ), B( * ), D( * ), E( * ), WORK( * )
15* ..
16*
17*
18* Purpose
19* =======
20*
21* PSPTTRS solves a system of linear equations
22*
23* A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS)
24*
25* where A(1:N, JA:JA+N-1) is the matrix used to produce the factors
26* stored in A(1:N,JA:JA+N-1) and AF by PSPTTRF.
27* A(1:N, JA:JA+N-1) is an N-by-N real
28* tridiagonal symmetric positive definite distributed
29* matrix.
30*
31* Routine PSPTTRF MUST be called first.
32*
33* =====================================================================
34*
35* Arguments
36* =========
37*
38*
39* N (global input) INTEGER
40* The number of rows and columns to be operated on, i.e. the
41* order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0.
42*
43* NRHS (global input) INTEGER
44* The number of right hand sides, i.e., the number of columns
45* of the distributed submatrix B(IB:IB+N-1, 1:NRHS).
46* NRHS >= 0.
47*
48* D (local input/local output) REAL pointer to local
49* part of global vector storing the main diagonal of the
50* matrix.
51* On exit, this array contains information containing the
52* factors of the matrix.
53* Must be of size >= DESCA( NB_ ).
54*
55* E (local input/local output) REAL pointer to local
56* part of global vector storing the upper diagonal of the
57* matrix. Globally, DU(n) is not referenced, and DU must be
58* aligned with D.
59* On exit, this array contains information containing the
60* factors of the matrix.
61* Must be of size >= DESCA( NB_ ).
62*
63* JA (global input) INTEGER
64* The index in the global array A that points to the start of
65* the matrix to be operated on (which may be either all of A
66* or a submatrix of A).
67*
68* DESCA (global and local input) INTEGER array of dimension DLEN.
69* if 1D type (DTYPE_A=501 or 502), DLEN >= 7;
70* if 2D type (DTYPE_A=1), DLEN >= 9.
71* The array descriptor for the distributed matrix A.
72* Contains information of mapping of A to memory. Please
73* see NOTES below for full description and options.
74*
75* B (local input/local output) REAL pointer into
76* local memory to an array of local lead dimension lld_b>=NB.
77* On entry, this array contains the
78* the local pieces of the right hand sides
79* B(IB:IB+N-1, 1:NRHS).
80* On exit, this contains the local piece of the solutions
81* distributed matrix X.
82*
83* IB (global input) INTEGER
84* The row index in the global array B that points to the first
85* row of the matrix to be operated on (which may be either
86* all of B or a submatrix of B).
87* IMPORTANT NOTE: The current version of this code supports
88* only IB=JA
89*
90* DESCB (global and local input) INTEGER array of dimension DLEN.
91* if 1D type (DTYPE_B=502), DLEN >=7;
92* if 2D type (DTYPE_B=1), DLEN >= 9.
93* The array descriptor for the distributed matrix B.
94* Contains information of mapping of B to memory. Please
95* see NOTES below for full description and options.
96*
97* AF (local output) REAL array, dimension LAF.
98* Auxiliary Fillin Space.
99* Fillin is created during the factorization routine
100* PSPTTRF and this is stored in AF. If a linear system
101* is to be solved using PSPTTRS after the factorization
102* routine, AF *must not be altered* after the factorization.
103*
104* LAF (local input) INTEGER
105* Size of user-input Auxiliary Fillin space AF. Must be >=
106* (NB+2)
107* If LAF is not large enough, an error code will be returned
108* and the minimum acceptable size will be returned in AF( 1 )
109*
110* WORK (local workspace/local output)
111* REAL temporary workspace. This space may
112* be overwritten in between calls to routines. WORK must be
113* the size given in LWORK.
114* On exit, WORK( 1 ) contains the minimal LWORK.
115*
116* LWORK (local input or global input) INTEGER
117* Size of user-input workspace WORK.
118* If LWORK is too small, the minimal acceptable size will be
119* returned in WORK(1) and an error code is returned. LWORK>=
120* (10+2*min(100,NRHS))*NPCOL+4*NRHS
121*
122* INFO (local output) INTEGER
123* = 0: successful exit
124* < 0: If the i-th argument is an array and the j-entry had
125* an illegal value, then INFO = -(i*100+j), if the i-th
126* argument is a scalar and had an illegal value, then
127* INFO = -i.
128*
129* =====================================================================
130*
131*
132* Restrictions
133* ============
134*
135* The following are restrictions on the input parameters. Some of these
136* are temporary and will be removed in future releases, while others
137* may reflect fundamental technical limitations.
138*
139* Non-cyclic restriction: VERY IMPORTANT!
140* P*NB>= mod(JA-1,NB)+N.
141* The mapping for matrices must be blocked, reflecting the nature
142* of the divide and conquer algorithm as a task-parallel algorithm.
143* This formula in words is: no processor may have more than one
144* chunk of the matrix.
145*
146* Blocksize cannot be too small:
147* If the matrix spans more than one processor, the following
148* restriction on NB, the size of each block on each processor,
149* must hold:
150* NB >= 2
151* The bulk of parallel computation is done on the matrix of size
152* O(NB) on each processor. If this is too small, divide and conquer
153* is a poor choice of algorithm.
154*
155* Submatrix reference:
156* JA = IB
157* Alignment restriction that prevents unnecessary communication.
158*
159*
160* =====================================================================
161*
162*
163* Notes
164* =====
165*
166* If the factorization routine and the solve routine are to be called
167* separately (to solve various sets of righthand sides using the same
168* coefficient matrix), the auxiliary space AF *must not be altered*
169* between calls to the factorization routine and the solve routine.
170*
171* The best algorithm for solving banded and tridiagonal linear systems
172* depends on a variety of parameters, especially the bandwidth.
173* Currently, only algorithms designed for the case N/P >> bw are
174* implemented. These go by many names, including Divide and Conquer,
175* Partitioning, domain decomposition-type, etc.
176* For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C
177* algorithms are the appropriate choice.
178*
179* Algorithm description: Divide and Conquer
180*
181* The Divide and Conqer algorithm assumes the matrix is narrowly
182* banded compared with the number of equations. In this situation,
183* it is best to distribute the input matrix A one-dimensionally,
184* with columns atomic and rows divided amongst the processes.
185* The basic algorithm divides the tridiagonal matrix up into
186* P pieces with one stored on each processor,
187* and then proceeds in 2 phases for the factorization or 3 for the
188* solution of a linear system.
189* 1) Local Phase:
190* The individual pieces are factored independently and in
191* parallel. These factors are applied to the matrix creating
192* fillin, which is stored in a non-inspectable way in auxiliary
193* space AF. Mathematically, this is equivalent to reordering
194* the matrix A as P A P^T and then factoring the principal
195* leading submatrix of size equal to the sum of the sizes of
196* the matrices factored on each processor. The factors of
197* these submatrices overwrite the corresponding parts of A
198* in memory.
199* 2) Reduced System Phase:
200* A small ((P-1)) system is formed representing
201* interaction of the larger blocks, and is stored (as are its
202* factors) in the space AF. A parallel Block Cyclic Reduction
203* algorithm is used. For a linear system, a parallel front solve
204* followed by an analagous backsolve, both using the structure
205* of the factored matrix, are performed.
206* 3) Backsubsitution Phase:
207* For a linear system, a local backsubstitution is performed on
208* each processor in parallel.
209*
210*
211* Descriptors
212* ===========
213*
214* Descriptors now have *types* and differ from ScaLAPACK 1.0.
215*
216* Note: tridiagonal codes can use either the old two dimensional
217* or new one-dimensional descriptors, though the processor grid in
218* both cases *must be one-dimensional*. We describe both types below.
219*
220* Each global data object is described by an associated description
221* vector. This vector stores the information required to establish
222* the mapping between an object element and its corresponding process
223* and memory location.
224*
225* Let A be a generic term for any 2D block cyclicly distributed array.
226* Such a global array has an associated description vector DESCA.
227* In the following comments, the character _ should be read as
228* "of the global array".
229*
230* NOTATION STORED IN EXPLANATION
231* --------------- -------------- --------------------------------------
232* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
233* DTYPE_A = 1.
234* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
235* the BLACS process grid A is distribu-
236* ted over. The context itself is glo-
237* bal, but the handle (the integer
238* value) may vary.
239* M_A (global) DESCA( M_ ) The number of rows in the global
240* array A.
241* N_A (global) DESCA( N_ ) The number of columns in the global
242* array A.
243* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
244* the rows of the array.
245* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
246* the columns of the array.
247* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
248* row of the array A is distributed.
249* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
250* first column of the array A is
251* distributed.
252* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
253* array. LLD_A >= MAX(1,LOCr(M_A)).
254*
255* Let K be the number of rows or columns of a distributed matrix,
256* and assume that its process grid has dimension p x q.
257* LOCr( K ) denotes the number of elements of K that a process
258* would receive if K were distributed over the p processes of its
259* process column.
260* Similarly, LOCc( K ) denotes the number of elements of K that a
261* process would receive if K were distributed over the q processes of
262* its process row.
263* The values of LOCr() and LOCc() may be determined via a call to the
264* ScaLAPACK tool function, NUMROC:
265* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
266* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
267* An upper bound for these quantities may be computed by:
268* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
269* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
270*
271*
272* One-dimensional descriptors:
273*
274* One-dimensional descriptors are a new addition to ScaLAPACK since
275* version 1.0. They simplify and shorten the descriptor for 1D
276* arrays.
277*
278* Since ScaLAPACK supports two-dimensional arrays as the fundamental
279* object, we allow 1D arrays to be distributed either over the
280* first dimension of the array (as if the grid were P-by-1) or the
281* 2nd dimension (as if the grid were 1-by-P). This choice is
282* indicated by the descriptor type (501 or 502)
283* as described below.
284* However, for tridiagonal matrices, since the objects being
285* distributed are the individual vectors storing the diagonals, we
286* have adopted the convention that both the P-by-1 descriptor and
287* the 1-by-P descriptor are allowed and are equivalent for
288* tridiagonal matrices. Thus, for tridiagonal matrices,
289* DTYPE_A = 501 or 502 can be used interchangeably
290* without any other change.
291* We require that the distributed vectors storing the diagonals of a
292* tridiagonal matrix be aligned with each other. Because of this, a
293* single descriptor, DESCA, serves to describe the distribution of
294* of all diagonals simultaneously.
295*
296* IMPORTANT NOTE: the actual BLACS grid represented by the
297* CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P
298* irrespective of which one-dimensional descriptor type
299* (501 or 502) is input.
300* This routine will interpret the grid properly either way.
301* ScaLAPACK routines *do not support intercontext operations* so that
302* the grid passed to a single ScaLAPACK routine *must be the same*
303* for all array descriptors passed to that routine.
304*
305* NOTE: In all cases where 1D descriptors are used, 2D descriptors
306* may also be used, since a one-dimensional array is a special case
307* of a two-dimensional array with one dimension of size unity.
308* The two-dimensional array used in this case *must* be of the
309* proper orientation:
310* If the appropriate one-dimensional descriptor is DTYPEA=501
311* (1 by P type), then the two dimensional descriptor must
312* have a CTXT value that refers to a 1 by P BLACS grid;
313* If the appropriate one-dimensional descriptor is DTYPEA=502
314* (P by 1 type), then the two dimensional descriptor must
315* have a CTXT value that refers to a P by 1 BLACS grid.
316*
317*
318* Summary of allowed descriptors, types, and BLACS grids:
319* DTYPE 501 502 1 1
320* BLACS grid 1xP or Px1 1xP or Px1 1xP Px1
321* -----------------------------------------------------
322* A OK OK OK NO
323* B NO OK NO OK
324*
325* Note that a consequence of this chart is that it is not possible
326* for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead
327* to opposite requirements for the orientation of the BLACS grid,
328* and as noted before, the *same* BLACS context must be used in
329* all descriptors in a single ScaLAPACK subroutine call.
330*
331* Let A be a generic term for any 1D block cyclicly distributed array.
332* Such a global array has an associated description vector DESCA.
333* In the following comments, the character _ should be read as
334* "of the global array".
335*
336* NOTATION STORED IN EXPLANATION
337* --------------- ---------- ------------------------------------------
338* DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids,
339* TYPE_A = 501: 1-by-P grid.
340* TYPE_A = 502: P-by-1 grid.
341* CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating
342* the BLACS process grid A is distribu-
343* ted over. The context itself is glo-
344* bal, but the handle (the integer
345* value) may vary.
346* N_A (global) DESCA( 3 ) The size of the array dimension being
347* distributed.
348* NB_A (global) DESCA( 4 ) The blocking factor used to distribute
349* the distributed dimension of the array.
350* SRC_A (global) DESCA( 5 ) The process row or column over which the
351* first row or column of the array
352* is distributed.
353* Ignored DESCA( 6 ) Ignored for tridiagonal matrices.
354* Reserved DESCA( 7 ) Reserved for future use.
355*
356*
357*
358* =====================================================================
359*
360* Code Developer: Andrew J. Cleary, University of Tennessee.
361* Current address: Lawrence Livermore National Labs.
362*
363* =====================================================================
364*
365* .. Parameters ..
366 REAL ONE
367 parameter( one = 1.0e+0 )
368 INTEGER INT_ONE
369 parameter( int_one = 1 )
370 INTEGER DESCMULT, BIGNUM
371 parameter( descmult = 100, bignum = descmult*descmult )
372 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
373 $ lld_, mb_, m_, nb_, n_, rsrc_
374 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
375 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
376 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
377* ..
378* .. Local Scalars ..
379 INTEGER CSRC, FIRST_PROC, I, ICTXT, ICTXT_NEW,
380 $ ictxt_save, idum3, ja_new, llda, lldb, mycol,
381 $ myrow, my_num_cols, nb, np, npcol, nprow,
382 $ np_save, odd_size, part_offset, part_size,
383 $ return_code, store_m_b, store_n_a, temp,
384 $ work_size_min
385* ..
386* .. Local Arrays ..
387 INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
388 $ param_check( 14, 3 )
389* ..
390* .. External Subroutines ..
391 EXTERNAL blacs_gridexit, blacs_gridinfo, desc_convert,
392 $ globchk, pspttrsv, pxerbla, reshape, sscal
393* ..
394* .. External Functions ..
395 INTEGER NUMROC
396 EXTERNAL numroc
397* ..
398* .. Intrinsic Functions ..
399 INTRINSIC min, mod, real
400* ..
401* .. Executable Statements ..
402*
403* Test the input parameters
404*
405 info = 0
406*
407* Convert descriptor into standard form for easy access to
408* parameters, check that grid is of right shape.
409*
410 desca_1xp( 1 ) = 501
411 descb_px1( 1 ) = 502
412*
413 temp = desca( dtype_ )
414 IF( temp.EQ.502 ) THEN
415* Temporarily set the descriptor type to 1xP type
416 desca( dtype_ ) = 501
417 END IF
418*
419 CALL desc_convert( desca, desca_1xp, return_code )
420*
421 desca( dtype_ ) = temp
422*
423 IF( return_code.NE.0 ) THEN
424 info = -( 5*100+2 )
425 END IF
426*
427 CALL desc_convert( descb, descb_px1, return_code )
428*
429 IF( return_code.NE.0 ) THEN
430 info = -( 8*100+2 )
431 END IF
432*
433* Consistency checks for DESCA and DESCB.
434*
435* Context must be the same
436 IF( desca_1xp( 2 ).NE.descb_px1( 2 ) ) THEN
437 info = -( 8*100+2 )
438 END IF
439*
440* These are alignment restrictions that may or may not be removed
441* in future releases. -Andy Cleary, April 14, 1996.
442*
443* Block sizes must be the same
444 IF( desca_1xp( 4 ).NE.descb_px1( 4 ) ) THEN
445 info = -( 8*100+4 )
446 END IF
447*
448* Source processor must be the same
449*
450 IF( desca_1xp( 5 ).NE.descb_px1( 5 ) ) THEN
451 info = -( 8*100+5 )
452 END IF
453*
454* Get values out of descriptor for use in code.
455*
456 ictxt = desca_1xp( 2 )
457 csrc = desca_1xp( 5 )
458 nb = desca_1xp( 4 )
459 llda = desca_1xp( 6 )
460 store_n_a = desca_1xp( 3 )
461 lldb = descb_px1( 6 )
462 store_m_b = descb_px1( 3 )
463*
464* Get grid parameters
465*
466*
467 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
468 np = nprow*npcol
469*
470*
471*
472 IF( lwork.LT.-1 ) THEN
473 info = -12
474 ELSE IF( lwork.EQ.-1 ) THEN
475 idum3 = -1
476 ELSE
477 idum3 = 1
478 END IF
479*
480 IF( n.LT.0 ) THEN
481 info = -1
482 END IF
483*
484 IF( n+ja-1.GT.store_n_a ) THEN
485 info = -( 5*100+6 )
486 END IF
487*
488 IF( n+ib-1.GT.store_m_b ) THEN
489 info = -( 8*100+3 )
490 END IF
491*
492 IF( lldb.LT.nb ) THEN
493 info = -( 8*100+6 )
494 END IF
495*
496 IF( nrhs.LT.0 ) THEN
497 info = -2
498 END IF
499*
500* Current alignment restriction
501*
502 IF( ja.NE.ib ) THEN
503 info = -4
504 END IF
505*
506* Argument checking that is specific to Divide & Conquer routine
507*
508 IF( nprow.NE.1 ) THEN
509 info = -( 5*100+2 )
510 END IF
511*
512 IF( n.GT.np*nb-mod( ja-1, nb ) ) THEN
513 info = -( 1 )
514 CALL pxerbla( ictxt, 'PSPTTRS, D&C alg.: only 1 block per proc'
515 $ , -info )
516 RETURN
517 END IF
518*
519 IF( ( ja+n-1.GT.nb ) .AND. ( nb.LT.2*int_one ) ) THEN
520 info = -( 5*100+4 )
521 CALL pxerbla( ictxt, 'PSPTTRS, D&C alg.: NB too small', -info )
522 RETURN
523 END IF
524*
525*
526 work_size_min = ( 10+2*min( 100, nrhs ) )*npcol + 4*nrhs
527*
528 work( 1 ) = work_size_min
529*
530 IF( lwork.LT.work_size_min ) THEN
531 IF( lwork.NE.-1 ) THEN
532 info = -12
533 CALL pxerbla( ictxt, 'PSPTTRS: worksize error', -info )
534 END IF
535 RETURN
536 END IF
537*
538* Pack params and positions into arrays for global consistency check
539*
540 param_check( 14, 1 ) = descb( 5 )
541 param_check( 13, 1 ) = descb( 4 )
542 param_check( 12, 1 ) = descb( 3 )
543 param_check( 11, 1 ) = descb( 2 )
544 param_check( 10, 1 ) = descb( 1 )
545 param_check( 9, 1 ) = ib
546 param_check( 8, 1 ) = desca( 5 )
547 param_check( 7, 1 ) = desca( 4 )
548 param_check( 6, 1 ) = desca( 3 )
549 param_check( 5, 1 ) = desca( 1 )
550 param_check( 4, 1 ) = ja
551 param_check( 3, 1 ) = nrhs
552 param_check( 2, 1 ) = n
553 param_check( 1, 1 ) = idum3
554*
555 param_check( 14, 2 ) = 905
556 param_check( 13, 2 ) = 904
557 param_check( 12, 2 ) = 903
558 param_check( 11, 2 ) = 902
559 param_check( 10, 2 ) = 901
560 param_check( 9, 2 ) = 8
561 param_check( 8, 2 ) = 505
562 param_check( 7, 2 ) = 504
563 param_check( 6, 2 ) = 503
564 param_check( 5, 2 ) = 501
565 param_check( 4, 2 ) = 4
566 param_check( 3, 2 ) = 2
567 param_check( 2, 2 ) = 1
568 param_check( 1, 2 ) = 12
569*
570* Want to find errors with MIN( ), so if no error, set it to a big
571* number. If there already is an error, multiply by the the
572* descriptor multiplier.
573*
574 IF( info.GE.0 ) THEN
575 info = bignum
576 ELSE IF( info.LT.-descmult ) THEN
577 info = -info
578 ELSE
579 info = -info*descmult
580 END IF
581*
582* Check consistency across processors
583*
584 CALL globchk( ictxt, 14, param_check, 14, param_check( 1, 3 ),
585 $ info )
586*
587* Prepare output: set info = 0 if no error, and divide by DESCMULT
588* if error is not in a descriptor entry.
589*
590 IF( info.EQ.bignum ) THEN
591 info = 0
592 ELSE IF( mod( info, descmult ).EQ.0 ) THEN
593 info = -info / descmult
594 ELSE
595 info = -info
596 END IF
597*
598 IF( info.LT.0 ) THEN
599 CALL pxerbla( ictxt, 'PSPTTRS', -info )
600 RETURN
601 END IF
602*
603* Quick return if possible
604*
605 IF( n.EQ.0 )
606 $ RETURN
607*
608 IF( nrhs.EQ.0 )
609 $ RETURN
610*
611*
612* Adjust addressing into matrix space to properly get into
613* the beginning part of the relevant data
614*
615 part_offset = nb*( ( ja-1 ) / ( npcol*nb ) )
616*
617 IF( ( mycol-csrc ).LT.( ja-part_offset-1 ) / nb ) THEN
618 part_offset = part_offset + nb
619 END IF
620*
621 IF( mycol.LT.csrc ) THEN
622 part_offset = part_offset - nb
623 END IF
624*
625* Form a new BLACS grid (the "standard form" grid) with only procs
626* holding part of the matrix, of size 1xNP where NP is adjusted,
627* starting at csrc=0, with JA modified to reflect dropped procs.
628*
629* First processor to hold part of the matrix:
630*
631 first_proc = mod( ( ja-1 ) / nb+csrc, npcol )
632*
633* Calculate new JA one while dropping off unused processors.
634*
635 ja_new = mod( ja-1, nb ) + 1
636*
637* Save and compute new value of NP
638*
639 np_save = np
640 np = ( ja_new+n-2 ) / nb + 1
641*
642* Call utility routine that forms "standard-form" grid
643*
644 CALL reshape( ictxt, int_one, ictxt_new, int_one, first_proc,
645 $ int_one, np )
646*
647* Use new context from standard grid as context.
648*
649 ictxt_save = ictxt
650 ictxt = ictxt_new
651 desca_1xp( 2 ) = ictxt_new
652 descb_px1( 2 ) = ictxt_new
653*
654* Get information about new grid.
655*
656 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
657*
658* Drop out processors that do not have part of the matrix.
659*
660 IF( myrow.LT.0 ) THEN
661 GO TO 30
662 END IF
663*
664* ********************************
665* Values reused throughout routine
666*
667* User-input value of partition size
668*
669 part_size = nb
670*
671* Number of columns in each processor
672*
673 my_num_cols = numroc( n, part_size, mycol, 0, npcol )
674*
675* Offset in columns to beginning of main partition in each proc
676*
677 IF( mycol.EQ.0 ) THEN
678 part_offset = part_offset + mod( ja_new-1, part_size )
679 my_num_cols = my_num_cols - mod( ja_new-1, part_size )
680 END IF
681*
682* Size of main (or odd) partition in each processor
683*
684 odd_size = my_num_cols
685 IF( mycol.LT.np-1 ) THEN
686 odd_size = odd_size - int_one
687 END IF
688*
689*
690*
691* Begin main code
692*
693 info = 0
694*
695* Call frontsolve routine
696*
697*
698 CALL pspttrsv( 'L', n, nrhs, d( part_offset+1 ),
699 $ e( part_offset+1 ), ja_new, desca_1xp, b, ib,
700 $ descb_px1, af, laf, work, lwork, info )
701*
702* Divide by the main diagonal: B <- D^{-1} B
703*
704* The main partition is first
705*
706 DO 10 i = part_offset + 1, part_offset + odd_size
707 CALL sscal( nrhs, real( one / d( i ) ), b( i ), lldb )
708 10 CONTINUE
709*
710* Reduced system is next
711*
712 IF( mycol.LT.npcol-1 ) THEN
713 i = part_offset + odd_size + 1
714 CALL sscal( nrhs, one / af( odd_size+2 ), b( i ), lldb )
715 END IF
716*
717* Call backsolve routine
718*
719*
720 CALL pspttrsv( 'U', n, nrhs, d( part_offset+1 ),
721 $ e( part_offset+1 ), ja_new, desca_1xp, b, ib,
722 $ descb_px1, af, laf, work, lwork, info )
723 20 CONTINUE
724*
725*
726* Free BLACS space used to hold standard-form grid.
727*
728 IF( ictxt_save.NE.ictxt_new ) THEN
729 CALL blacs_gridexit( ictxt_new )
730 END IF
731*
732 30 CONTINUE
733*
734* Restore saved input parameters
735*
736 ictxt = ictxt_save
737 np = np_save
738*
739* Output minimum worksize
740*
741 work( 1 ) = work_size_min
742*
743*
744 RETURN
745*
746* End of PSPTTRS
747*
748 END
subroutine desc_convert(desc_in, desc_out, info)
Definition desc_convert.f:2
#define min(A, B)
Definition pcgemr.c:181
subroutine globchk(ictxt, n, x, ldx, iwork, info)
Definition pchkxmat.f:403
subroutine pspttrs(n, nrhs, d, e, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition pspttrs.f:3
subroutine pspttrsv(uplo, n, nrhs, d, e, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition pspttrsv.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
void reshape(Int *context_in, Int *major_in, Int *context_out, Int *major_out, Int *first_proc, Int *nprow_new, Int *npcol_new)
Definition reshape.c:80