ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdlascl.f
Go to the documentation of this file.
1  SUBROUTINE pdlascl( TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA,
2  $ INFO )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  CHARACTER TYPE
11  INTEGER IA, INFO, JA, M, N
12  DOUBLE PRECISION CFROM, CTO
13 * ..
14 * .. Array Arguments ..
15  INTEGER DESCA( * )
16  DOUBLE PRECISION A( * )
17 * ..
18 *
19 * Purpose
20 * =======
21 *
22 * PDLASCL multiplies the M-by-N real distributed matrix sub( A )
23 * denoting A(IA:IA+M-1,JA:JA+N-1) by the real scalar CTO/CFROM. This
24 * is done without over/underflow as long as the final result
25 * CTO * A(I,J) / CFROM does not over/underflow. TYPE specifies that
26 * sub( A ) may be full, upper triangular, lower triangular or upper
27 * Hessenberg.
28 *
29 * Notes
30 * =====
31 *
32 * Each global data object is described by an associated description
33 * vector. This vector stores the information required to establish
34 * the mapping between an object element and its corresponding process
35 * and memory location.
36 *
37 * Let A be a generic term for any 2D block cyclicly distributed array.
38 * Such a global array has an associated description vector DESCA.
39 * In the following comments, the character _ should be read as
40 * "of the global array".
41 *
42 * NOTATION STORED IN EXPLANATION
43 * --------------- -------------- --------------------------------------
44 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
45 * DTYPE_A = 1.
46 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
47 * the BLACS process grid A is distribu-
48 * ted over. The context itself is glo-
49 * bal, but the handle (the integer
50 * value) may vary.
51 * M_A (global) DESCA( M_ ) The number of rows in the global
52 * array A.
53 * N_A (global) DESCA( N_ ) The number of columns in the global
54 * array A.
55 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
56 * the rows of the array.
57 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
58 * the columns of the array.
59 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
60 * row of the array A is distributed.
61 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
62 * first column of the array A is
63 * distributed.
64 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
65 * array. LLD_A >= MAX(1,LOCr(M_A)).
66 *
67 * Let K be the number of rows or columns of a distributed matrix,
68 * and assume that its process grid has dimension p x q.
69 * LOCr( K ) denotes the number of elements of K that a process
70 * would receive if K were distributed over the p processes of its
71 * process column.
72 * Similarly, LOCc( K ) denotes the number of elements of K that a
73 * process would receive if K were distributed over the q processes of
74 * its process row.
75 * The values of LOCr() and LOCc() may be determined via a call to the
76 * ScaLAPACK tool function, NUMROC:
77 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
78 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
79 * An upper bound for these quantities may be computed by:
80 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
81 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
82 *
83 * Arguments
84 * =========
85 *
86 * TYPE (global input) CHARACTER
87 * TYPE indices the storage type of the input distributed
88 * matrix.
89 * = 'G': sub( A ) is a full matrix,
90 * = 'L': sub( A ) is a lower triangular matrix,
91 * = 'U': sub( A ) is an upper triangular matrix,
92 * = 'H': sub( A ) is an upper Hessenberg matrix.
93 *
94 * CFROM (global input) DOUBLE PRECISION
95 * CTO (global input) DOUBLE PRECISION
96 * The distributed matrix sub( A ) is multiplied by CTO/CFROM.
97 * A(I,J) is computed without over/underflow if the final
98 * result CTO * A(I,J) / CFROM can be represented without
99 * over/underflow. CFROM must be nonzero.
100 *
101 * M (global input) INTEGER
102 * The number of rows to be operated on i.e the number of rows
103 * of the distributed submatrix sub( A ). M >= 0.
104 *
105 * N (global input) INTEGER
106 * The number of columns to be operated on i.e the number of
107 * columns of the distributed submatrix sub( A ). N >= 0.
108 *
109 * A (local input/local output) DOUBLE PRECISION pointer into the
110 * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
111 * This array contains the local pieces of the distributed
112 * matrix sub( A ). On exit, this array contains the local
113 * pieces of the distributed matrix multiplied by CTO/CFROM.
114 *
115 * IA (global input) INTEGER
116 * The row index in the global array A indicating the first
117 * row of sub( A ).
118 *
119 * JA (global input) INTEGER
120 * The column index in the global array A indicating the
121 * first column of sub( A ).
122 *
123 * DESCA (global and local input) INTEGER array of dimension DLEN_.
124 * The array descriptor for the distributed matrix A.
125 *
126 * INFO (local output) INTEGER
127 * = 0: successful exit
128 * < 0: If the i-th argument is an array and the j-entry had
129 * an illegal value, then INFO = -(i*100+j), if the i-th
130 * argument is a scalar and had an illegal value, then
131 * INFO = -i.
132 *
133 * =====================================================================
134 *
135 * .. Parameters ..
136  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
137  $ lld_, mb_, m_, nb_, n_, rsrc_
138  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
139  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
140  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
141  DOUBLE PRECISION ONE, ZERO
142  parameter( zero = 0.0d0, one = 1.0d0 )
143 * ..
144 * .. Local Scalars ..
145  LOGICAL DONE
146  INTEGER IACOL, IAROW, ICOFFA, ICTXT, ICURCOL, ICURROW,
147  $ iia, ii, inxtrow, ioffa, iroffa, itype, j, jb,
148  $ jja, jj, jn, kk, lda, ll, mycol, myrow, mp,
149  $ npcol, nprow, nq
150  DOUBLE PRECISION BIGNUM, CFROM1, CFROMC, CTO1, CTOC, MUL, SMLNUM
151 * ..
152 * .. External Subroutines ..
153  EXTERNAL blacs_gridinfo, chk1mat, infog2l, pxerbla
154 * ..
155 * .. External Functions ..
156  LOGICAL LSAME, DISNAN
157  INTEGER ICEIL, NUMROC
158  DOUBLE PRECISION PDLAMCH
159  EXTERNAL disnan, iceil, lsame, numroc, pdlamch
160 * ..
161 * .. Intrinsic Functions ..
162  INTRINSIC abs, min, mod
163 * ..
164 * .. Executable Statements ..
165 *
166 * Get grid parameters
167 *
168  ictxt = desca( ctxt_ )
169  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
170 *
171 * Test the input parameters
172 *
173  IF( nprow.EQ.-1 ) THEN
174  info = -907
175  ELSE
176  info = 0
177  CALL chk1mat( m, 4, n, 6, ia, ja, desca, 9, info )
178  IF( info.EQ.0 ) THEN
179  IF( lsame( TYPE, 'G' ) ) then
180  itype = 0
181  ELSE IF( lsame( TYPE, 'L' ) ) then
182  itype = 1
183  ELSE IF( lsame( TYPE, 'U' ) ) then
184  itype = 2
185  ELSE IF( lsame( TYPE, 'H' ) ) then
186  itype = 3
187  ELSE
188  itype = -1
189  END IF
190  IF( itype.EQ.-1 ) THEN
191  info = -1
192  ELSE IF( cfrom.EQ.zero .OR. disnan(cfrom) ) THEN
193  info = -4
194  ELSE IF( disnan(cto) ) THEN
195  info = -5
196  END IF
197  END IF
198  END IF
199 *
200  IF( info.NE.0 ) THEN
201  CALL pxerbla( ictxt, 'PDLASCL', -info )
202  RETURN
203  END IF
204 *
205 * Quick return if possible
206 *
207  IF( n.EQ.0 .OR. m.EQ.0 )
208  $ RETURN
209 *
210 * Get machine parameters
211 *
212  smlnum = pdlamch( ictxt, 'S' )
213  bignum = one / smlnum
214 *
215  cfromc = cfrom
216  ctoc = cto
217 *
218 * Compute local indexes
219 *
220  lda = desca( lld_ )
221  iroffa = mod( ia-1, desca( mb_ ) )
222  icoffa = mod( ja-1, desca( nb_ ) )
223  jn = min( iceil( ja, desca( nb_ ) ) * desca( nb_ ), ja+n-1 )
224  CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia, jja,
225  $ iarow, iacol )
226  mp = numroc( m+iroffa, desca( mb_ ), myrow, iarow, nprow )
227  IF( myrow.EQ.iarow )
228  $ mp = mp - iroffa
229  nq = numroc( n+icoffa, desca( nb_ ), mycol, iacol, npcol )
230  IF( mycol.EQ.iacol )
231  $ nq = nq - icoffa
232 *
233  10 CONTINUE
234  cfrom1 = cfromc*smlnum
235  IF( cfrom1.EQ.cfromc ) THEN
236 ! CFROMC is an inf. Multiply by a correctly signed zero for
237 ! finite CTOC, or a NaN if CTOC is infinite.
238  mul = ctoc / cfromc
239  done = .true.
240  cto1 = ctoc
241  ELSE
242  cto1 = ctoc / bignum
243  IF( cto1.EQ.ctoc ) THEN
244 ! CTOC is either 0 or an inf. In both cases, CTOC itself
245 ! serves as the correct multiplication factor.
246  mul = ctoc
247  done = .true.
248  cfromc = one
249  ELSE IF( abs( cfrom1 ).GT.abs( ctoc ) .AND. ctoc.NE.zero ) THEN
250  mul = smlnum
251  done = .false.
252  cfromc = cfrom1
253  ELSE IF( abs( cto1 ).GT.abs( cfromc ) ) THEN
254  mul = bignum
255  done = .false.
256  ctoc = cto1
257  ELSE
258  mul = ctoc / cfromc
259  done = .true.
260  END IF
261  END IF
262 *
263  ioffa = ( jja - 1 ) * lda
264  icurrow = iarow
265  icurcol = iacol
266 *
267  IF( itype.EQ.0 ) THEN
268 *
269 * Full matrix
270 *
271  DO 30 jj = jja, jja+nq-1
272  DO 20 ii = iia, iia+mp-1
273  a( ioffa+ii ) = a( ioffa+ii ) * mul
274  20 CONTINUE
275  ioffa = ioffa + lda
276  30 CONTINUE
277 *
278  ELSE IF( itype.EQ.1 ) THEN
279 *
280 * Lower triangular matrix
281 *
282  ii = iia
283  jj = jja
284  jb = jn-ja+1
285 *
286  IF( mycol.EQ.icurcol ) THEN
287  IF( myrow.EQ.icurrow ) THEN
288  DO 50 ll = jj, jj + jb -1
289  DO 40 kk = ii+ll-jj, iia+mp-1
290  a( ioffa+kk ) = a( ioffa+kk ) * mul
291  40 CONTINUE
292  ioffa = ioffa + lda
293  50 CONTINUE
294  ELSE
295  DO 70 ll = jj, jj + jb -1
296  DO 60 kk = ii, iia+mp-1
297  a( ioffa+kk ) = a( ioffa+kk ) * mul
298  60 CONTINUE
299  ioffa = ioffa + lda
300  70 CONTINUE
301  END IF
302  jj = jj + jb
303  END IF
304 *
305  IF( myrow.EQ.icurrow )
306  $ ii = ii + jb
307  icurrow = mod( icurrow+1, nprow )
308  icurcol = mod( icurcol+1, npcol )
309 *
310 * Loop over remaining block of columns
311 *
312  DO 120 j = jn+1, ja+n-1, desca( nb_ )
313  jb = min( ja+n-j, desca( nb_ ) )
314 *
315  IF( mycol.EQ.icurcol ) THEN
316  IF( myrow.EQ.icurrow ) THEN
317  DO 90 ll = jj, jj + jb -1
318  DO 80 kk = ii+ll-jj, iia+mp-1
319  a( ioffa+kk ) = a( ioffa+kk ) * mul
320  80 CONTINUE
321  ioffa = ioffa + lda
322  90 CONTINUE
323  ELSE
324  DO 110 ll = jj, jj + jb -1
325  DO 100 kk = ii, iia+mp-1
326  a( ioffa+kk ) = a( ioffa+kk ) * mul
327  100 CONTINUE
328  ioffa = ioffa + lda
329  110 CONTINUE
330  END IF
331  jj = jj + jb
332  END IF
333 *
334  IF( myrow.EQ.icurrow )
335  $ ii = ii + jb
336  icurrow = mod( icurrow+1, nprow )
337  icurcol = mod( icurcol+1, npcol )
338 *
339  120 CONTINUE
340 *
341  ELSE IF( itype.EQ.2 ) THEN
342 *
343 * Upper triangular matrix
344 *
345  ii = iia
346  jj = jja
347  jb = jn-ja+1
348 *
349  IF( mycol.EQ.icurcol ) THEN
350  IF( myrow.EQ.icurrow ) THEN
351  DO 140 ll = jj, jj + jb -1
352  DO 130 kk = iia, min(ii+ll-jj,iia+mp-1)
353  a( ioffa+kk ) = a( ioffa+kk ) * mul
354  130 CONTINUE
355  ioffa = ioffa + lda
356  140 CONTINUE
357  ELSE
358  DO 160 ll = jj, jj + jb -1
359  DO 150 kk = iia, min(ii-1,iia+mp-1)
360  a( ioffa+kk ) = a( ioffa+kk ) * mul
361  150 CONTINUE
362  ioffa = ioffa + lda
363  160 CONTINUE
364  END IF
365  jj = jj + jb
366  END IF
367 *
368  IF( myrow.EQ.icurrow )
369  $ ii = ii + jb
370  icurrow = mod( icurrow+1, nprow )
371  icurcol = mod( icurcol+1, npcol )
372 *
373 * Loop over remaining block of columns
374 *
375  DO 210 j = jn+1, ja+n-1, desca( nb_ )
376  jb = min( ja+n-j, desca( nb_ ) )
377 *
378  IF( mycol.EQ.icurcol ) THEN
379  IF( myrow.EQ.icurrow ) THEN
380  DO 180 ll = jj, jj + jb -1
381  DO 170 kk = iia, min(ii+ll-jj,iia+mp-1)
382  a( ioffa+kk ) = a( ioffa+kk )*mul
383  170 CONTINUE
384  ioffa = ioffa + lda
385  180 CONTINUE
386  ELSE
387  DO 200 ll = jj, jj + jb -1
388  DO 190 kk = iia, min(ii-1,iia+mp-1)
389  a( ioffa+kk ) = a( ioffa+kk ) * mul
390  190 CONTINUE
391  ioffa = ioffa + lda
392  200 CONTINUE
393  END IF
394  jj = jj + jb
395  END IF
396 *
397  IF( myrow.EQ.icurrow )
398  $ ii = ii + jb
399  icurrow = mod( icurrow+1, nprow )
400  icurcol = mod( icurcol+1, npcol )
401 *
402  210 CONTINUE
403 *
404  ELSE IF( itype.EQ.3 ) THEN
405 *
406 * Upper Hessenberg matrix
407 *
408  ii = iia
409  jj = jja
410  jb = jn-ja+1
411 *
412 * Only one process row
413 *
414  IF( nprow.EQ.1 ) THEN
415 *
416 * Handle first block of columns separately
417 *
418  IF( mycol.EQ.icurcol ) THEN
419  DO 230 ll = jj, jj+jb-1
420  DO 220 kk = iia, min( ii+ll-jj+1, iia+mp-1 )
421  a( ioffa+kk ) = a( ioffa+kk )*mul
422  220 CONTINUE
423  ioffa = ioffa + lda
424  230 CONTINUE
425  jj = jj + jb
426  END IF
427 *
428  icurcol = mod( icurcol+1, npcol )
429 *
430 * Loop over remaining block of columns
431 *
432  DO 260 j = jn+1, ja+n-1, desca( nb_ )
433  jb = min( ja+n-j, desca( nb_ ) )
434 *
435  IF( mycol.EQ.icurcol ) THEN
436  DO 250 ll = jj, jj+jb-1
437  DO 240 kk = iia, min( ii+ll-jj+1, iia+mp-1 )
438  a( ioffa+kk ) = a( ioffa+kk )*mul
439  240 CONTINUE
440  ioffa = ioffa + lda
441  250 CONTINUE
442  jj = jj + jb
443  END IF
444 *
445  ii = ii + jb
446  icurcol = mod( icurcol+1, npcol )
447 *
448  260 CONTINUE
449 *
450  ELSE
451 *
452 * Handle first block of columns separately
453 *
454  inxtrow = mod( icurrow+1, nprow )
455  IF( mycol.EQ.icurcol ) THEN
456  IF( myrow.EQ.icurrow ) THEN
457  DO 280 ll = jj, jj + jb -1
458  DO 270 kk = iia, min(ii+ll-jj+1,iia+mp-1)
459  a( ioffa+kk ) = a( ioffa+kk ) * mul
460  270 CONTINUE
461  ioffa = ioffa + lda
462  280 CONTINUE
463  ELSE
464  DO 300 ll = jj, jj + jb -1
465  DO 290 kk = iia, min(ii-1,iia+mp-1)
466  a( ioffa+kk ) = a( ioffa+kk ) * mul
467  290 CONTINUE
468  ioffa = ioffa + lda
469  300 CONTINUE
470  IF( myrow.EQ.inxtrow .AND. ii.LE.iia+mp-1 )
471  $ a( ii+(jj+jb-2)*lda ) = a( ii+(jj+jb-2)*lda ) * mul
472  END IF
473  jj = jj + jb
474  END IF
475 *
476  IF( myrow.EQ.icurrow )
477  $ ii = ii + jb
478  icurrow = inxtrow
479  icurrow = mod( icurrow+1, nprow )
480  icurcol = mod( icurcol+1, npcol )
481 *
482 * Loop over remaining block of columns
483 *
484  DO 350 j = jn+1, ja+n-1, desca( nb_ )
485  jb = min( ja+n-j, desca( nb_ ) )
486 *
487  IF( mycol.EQ.icurcol ) THEN
488  IF( myrow.EQ.icurrow ) THEN
489  DO 320 ll = jj, jj + jb -1
490  DO 310 kk = iia, min( ii+ll-jj+1, iia+mp-1 )
491  a( ioffa+kk ) = a( ioffa+kk ) * mul
492  310 CONTINUE
493  ioffa = ioffa + lda
494  320 CONTINUE
495  ELSE
496  DO 340 ll = jj, jj + jb -1
497  DO 330 kk = iia, min( ii-1, iia+mp-1 )
498  a( ioffa+kk ) = a( ioffa+kk ) * mul
499  330 CONTINUE
500  ioffa = ioffa + lda
501  340 CONTINUE
502  IF( myrow.EQ.inxtrow .AND. ii.LE.iia+mp-1 )
503  $ a( ii+(jj+jb-2)*lda ) = a( ii+(jj+jb-2)*lda ) *
504  $ mul
505  END IF
506  jj = jj + jb
507  END IF
508 *
509  IF( myrow.EQ.icurrow )
510  $ ii = ii + jb
511  icurrow = inxtrow
512  icurrow = mod( icurrow+1, nprow )
513  icurcol = mod( icurcol+1, npcol )
514 *
515  350 CONTINUE
516 *
517  END IF
518 *
519  END IF
520 *
521  IF( .NOT.done )
522  $ GO TO 10
523 *
524  RETURN
525 *
526 * End of PDLASCL
527 *
528  END
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pdlascl
subroutine pdlascl(TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, INFO)
Definition: pdlascl.f:3
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
min
#define min(A, B)
Definition: pcgemr.c:181