ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdblastst.f
Go to the documentation of this file.
1  SUBROUTINE pdoptee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
2 *
3 * -- PBLAS test routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * .. Scalar Arguments ..
9  INTEGER ICTXT, NOUT, SCODE
10 * ..
11 * .. Array Arguments ..
12  CHARACTER*(*) SNAME
13 * ..
14 * .. Subroutine Arguments ..
15  EXTERNAL subptr
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PDOPTEE tests whether the PBLAS respond correctly to a bad option
22 * argument.
23 *
24 * Notes
25 * =====
26 *
27 * A description vector is associated with each 2D block-cyclicly dis-
28 * tributed matrix. This vector stores the information required to
29 * establish the mapping between a matrix entry and its corresponding
30 * process and memory location.
31 *
32 * In the following comments, the character _ should be read as
33 * "of the distributed matrix". Let A be a generic term for any 2D
34 * block cyclicly distributed matrix. Its description vector is DESCA:
35 *
36 * NOTATION STORED IN EXPLANATION
37 * ---------------- --------------- ------------------------------------
38 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
39 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
40 * the NPROW x NPCOL BLACS process grid
41 * A is distributed over. The context
42 * itself is global, but the handle
43 * (the integer value) may vary.
44 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
45 * ted matrix A, M_A >= 0.
46 * N_A (global) DESCA( N_ ) The number of columns in the distri-
47 * buted matrix A, N_A >= 0.
48 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
49 * block of the matrix A, IMB_A > 0.
50 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
51 * left block of the matrix A,
52 * INB_A > 0.
53 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
54 * bute the last M_A-IMB_A rows of A,
55 * MB_A > 0.
56 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
57 * bute the last N_A-INB_A columns of
58 * A, NB_A > 0.
59 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
60 * row of the matrix A is distributed,
61 * NPROW > RSRC_A >= 0.
62 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
63 * first column of A is distributed.
64 * NPCOL > CSRC_A >= 0.
65 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
66 * array storing the local blocks of
67 * the distributed matrix A,
68 * IF( Lc( 1, N_A ) > 0 )
69 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
70 * ELSE
71 * LLD_A >= 1.
72 *
73 * Let K be the number of rows of a matrix A starting at the global in-
74 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
75 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
76 * receive if these K rows were distributed over NPROW processes. If K
77 * is the number of columns of a matrix A starting at the global index
78 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
79 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
80 * these K columns were distributed over NPCOL processes.
81 *
82 * The values of Lr() and Lc() may be determined via a call to the func-
83 * tion PB_NUMROC:
84 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
85 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
86 *
87 * Arguments
88 * =========
89 *
90 * ICTXT (local input) INTEGER
91 * On entry, ICTXT specifies the BLACS context handle, indica-
92 * ting the global context of the operation. The context itself
93 * is global, but the value of ICTXT is local.
94 *
95 * NOUT (global input) INTEGER
96 * On entry, NOUT specifies the unit number for the output file.
97 * When NOUT is 6, output to screen, when NOUT is 0, output to
98 * stderr. NOUT is only defined for process 0.
99 *
100 * SUBPTR (global input) SUBROUTINE
101 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
102 * EXTERNAL in the calling subroutine.
103 *
104 * SCODE (global input) INTEGER
105 * On entry, SCODE specifies the calling sequence code.
106 *
107 * SNAME (global input) CHARACTER*(*)
108 * On entry, SNAME specifies the subroutine name calling this
109 * subprogram.
110 *
111 * Calling sequence encodings
112 * ==========================
113 *
114 * code Formal argument list Examples
115 *
116 * 11 (n, v1,v2) _SWAP, _COPY
117 * 12 (n,s1, v1 ) _SCAL, _SCAL
118 * 13 (n,s1, v1,v2) _AXPY, _DOT_
119 * 14 (n,s1,i1,v1 ) _AMAX
120 * 15 (n,u1, v1 ) _ASUM, _NRM2
121 *
122 * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
123 * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
124 * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
125 * 24 ( m,n,s1,v1,v2,m1) _GER_
126 * 25 (uplo, n,s1,v1, m1) _SYR
127 * 26 (uplo, n,u1,v1, m1) _HER
128 * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
129 *
130 * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
131 * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
132 * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
133 * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
134 * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
135 * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
136 * 37 ( m,n, s1,m1, s2,m3) _TRAN_
137 * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
138 * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
139 * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
140 *
141 * -- Written on April 1, 1998 by
142 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
143 *
144 * =====================================================================
145 *
146 * .. Local Scalars ..
147  INTEGER APOS
148 * ..
149 * .. External Subroutines ..
150  EXTERNAL pdchkopt
151 * ..
152 * .. Executable Statements ..
153 *
154 * Level 2 PBLAS
155 *
156  IF( scode.EQ.21 ) THEN
157 *
158 * Check 1st (and only) option
159 *
160  apos = 1
161  CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
162 *
163  ELSE IF( scode.EQ.22 .OR. scode.EQ.25 .OR. scode.EQ.26 .OR.
164  $ scode.EQ.27 ) THEN
165 *
166 * Check 1st (and only) option
167 *
168  apos = 1
169  CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
170 *
171  ELSE IF( scode.EQ.23 ) THEN
172 *
173 * Check 1st option
174 *
175  apos = 1
176  CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
177 *
178 * Check 2nd option
179 *
180  apos = 2
181  CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
182 *
183 * Check 3rd option
184 *
185  apos = 3
186  CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'D', apos )
187 *
188 * Level 3 PBLAS
189 *
190  ELSE IF( scode.EQ.31 ) THEN
191 *
192 * Check 1st option
193 *
194  apos = 1
195  CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
196 *
197 * Check 2'nd option
198 *
199  apos = 2
200  CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'B', apos )
201 *
202  ELSE IF( scode.EQ.32 ) THEN
203 *
204 * Check 1st option
205 *
206  apos = 1
207  CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'S', apos )
208 *
209 * Check 2nd option
210 *
211  apos = 2
212  CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
213 *
214  ELSE IF( scode.EQ.33 .OR. scode.EQ.34 .OR. scode.EQ.35 .OR.
215  $ scode.EQ.36 .OR. scode.EQ.40 ) THEN
216 *
217 * Check 1st option
218 *
219  apos = 1
220  CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
221 *
222 * Check 2'nd option
223 *
224  apos = 2
225  CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
226 *
227  ELSE IF( scode.EQ.38 ) THEN
228 *
229 * Check 1st option
230 *
231  apos = 1
232  CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'S', apos )
233 *
234 * Check 2nd option
235 *
236  apos = 2
237  CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
238 *
239 * Check 3rd option
240 *
241  apos = 3
242  CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
243 *
244 * Check 4th option
245 *
246  apos = 4
247  CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'D', apos )
248 *
249 *
250  ELSE IF( scode.EQ.39 ) THEN
251 *
252 * Check 1st option
253 *
254  apos = 1
255  CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
256 *
257  END IF
258 *
259  RETURN
260 *
261 * End of PDOPTEE
262 *
263  END
264  SUBROUTINE pdchkopt( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM,
265  $ ARGPOS )
266 *
267 * -- PBLAS test routine (version 2.0) --
268 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
269 * and University of California, Berkeley.
270 * April 1, 1998
271 *
272 * .. Scalar Arguments ..
273  CHARACTER*1 ARGNAM
274  INTEGER ARGPOS, ICTXT, NOUT, SCODE
275 * ..
276 * .. Array Arguments ..
277  CHARACTER*(*) SNAME
278 * ..
279 * .. Subroutine Arguments ..
280  EXTERNAL subptr
281 * ..
282 *
283 * Purpose
284 * =======
285 *
286 * PDCHKOPT tests the option ARGNAM in any PBLAS routine.
287 *
288 * Notes
289 * =====
290 *
291 * A description vector is associated with each 2D block-cyclicly dis-
292 * tributed matrix. This vector stores the information required to
293 * establish the mapping between a matrix entry and its corresponding
294 * process and memory location.
295 *
296 * In the following comments, the character _ should be read as
297 * "of the distributed matrix". Let A be a generic term for any 2D
298 * block cyclicly distributed matrix. Its description vector is DESCA:
299 *
300 * NOTATION STORED IN EXPLANATION
301 * ---------------- --------------- ------------------------------------
302 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
303 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
304 * the NPROW x NPCOL BLACS process grid
305 * A is distributed over. The context
306 * itself is global, but the handle
307 * (the integer value) may vary.
308 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
309 * ted matrix A, M_A >= 0.
310 * N_A (global) DESCA( N_ ) The number of columns in the distri-
311 * buted matrix A, N_A >= 0.
312 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
313 * block of the matrix A, IMB_A > 0.
314 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
315 * left block of the matrix A,
316 * INB_A > 0.
317 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
318 * bute the last M_A-IMB_A rows of A,
319 * MB_A > 0.
320 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
321 * bute the last N_A-INB_A columns of
322 * A, NB_A > 0.
323 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
324 * row of the matrix A is distributed,
325 * NPROW > RSRC_A >= 0.
326 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
327 * first column of A is distributed.
328 * NPCOL > CSRC_A >= 0.
329 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
330 * array storing the local blocks of
331 * the distributed matrix A,
332 * IF( Lc( 1, N_A ) > 0 )
333 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
334 * ELSE
335 * LLD_A >= 1.
336 *
337 * Let K be the number of rows of a matrix A starting at the global in-
338 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
339 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
340 * receive if these K rows were distributed over NPROW processes. If K
341 * is the number of columns of a matrix A starting at the global index
342 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
343 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
344 * these K columns were distributed over NPCOL processes.
345 *
346 * The values of Lr() and Lc() may be determined via a call to the func-
347 * tion PB_NUMROC:
348 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
349 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
350 *
351 * Arguments
352 * =========
353 *
354 * ICTXT (local input) INTEGER
355 * On entry, ICTXT specifies the BLACS context handle, indica-
356 * ting the global context of the operation. The context itself
357 * is global, but the value of ICTXT is local.
358 *
359 * NOUT (global input) INTEGER
360 * On entry, NOUT specifies the unit number for the output file.
361 * When NOUT is 6, output to screen, when NOUT is 0, output to
362 * stderr. NOUT is only defined for process 0.
363 *
364 * SUBPTR (global input) SUBROUTINE
365 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
366 * EXTERNAL in the calling subroutine.
367 *
368 * SCODE (global input) INTEGER
369 * On entry, SCODE specifies the calling sequence code.
370 *
371 * SNAME (global input) CHARACTER*(*)
372 * On entry, SNAME specifies the subroutine name calling this
373 * subprogram.
374 *
375 * ARGNAM (global input) CHARACTER*(*)
376 * On entry, ARGNAM specifies the name of the option to be
377 * checked. ARGNAM can either be 'D', 'S', 'A', 'B', or 'U'.
378 *
379 * ARGPOS (global input) INTEGER
380 * On entry, ARGPOS indicates the position of the option ARGNAM
381 * to be tested.
382 *
383 * -- Written on April 1, 1998 by
384 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
385 *
386 * =====================================================================
387 *
388 * .. Local Scalars ..
389  INTEGER INFOT
390 * ..
391 * .. External Subroutines ..
392  EXTERNAL pchkpbe, pdcallsub, pdsetpblas
393 * ..
394 * .. External Functions ..
395  LOGICAL LSAME
396  EXTERNAL lsame
397 * ..
398 * .. Common Blocks ..
399  CHARACTER DIAG, SIDE, TRANSA, TRANSB, UPLO
400  COMMON /pblasc/diag, side, transa, transb, uplo
401 * ..
402 * .. Executable Statements ..
403 *
404 * Reiniatilize the dummy arguments to correct values
405 *
406  CALL pdsetpblas( ictxt )
407 *
408  IF( lsame( argnam, 'D' ) ) THEN
409 *
410 * Generate bad DIAG option
411 *
412  diag = '/'
413 *
414  ELSE IF( lsame( argnam, 'S' ) ) THEN
415 *
416 * Generate bad SIDE option
417 *
418  side = '/'
419 *
420  ELSE IF( lsame( argnam, 'A' ) ) THEN
421 *
422 * Generate bad TRANSA option
423 *
424  transa = '/'
425 *
426  ELSE IF( lsame( argnam, 'B' ) ) THEN
427 *
428 * Generate bad TRANSB option
429 *
430  transb = '/'
431 *
432  ELSE IF( lsame( argnam, 'U' ) ) THEN
433 *
434 * Generate bad UPLO option
435 *
436  uplo = '/'
437 *
438  END IF
439 *
440 * Set INFOT to the position of the bad dimension argument
441 *
442  infot = argpos
443 *
444 * Call the PBLAS routine
445 *
446  CALL pdcallsub( subptr, scode )
447  CALL pchkpbe( ictxt, nout, sname, infot )
448 *
449  RETURN
450 *
451 * End of PDCHKOPT
452 *
453  END
454  SUBROUTINE pddimee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
455 *
456 * -- PBLAS test routine (version 2.0) --
457 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
458 * and University of California, Berkeley.
459 * April 1, 1998
460 *
461 * .. Scalar Arguments ..
462  INTEGER ICTXT, NOUT, SCODE
463 * ..
464 * .. Array Arguments ..
465  CHARACTER*(*) SNAME
466 * ..
467 * .. Subroutine Arguments ..
468  EXTERNAL subptr
469 * ..
470 *
471 * Purpose
472 * =======
473 *
474 * PDDIMEE tests whether the PBLAS respond correctly to a bad dimension
475 * argument.
476 *
477 * Notes
478 * =====
479 *
480 * A description vector is associated with each 2D block-cyclicly dis-
481 * tributed matrix. This vector stores the information required to
482 * establish the mapping between a matrix entry and its corresponding
483 * process and memory location.
484 *
485 * In the following comments, the character _ should be read as
486 * "of the distributed matrix". Let A be a generic term for any 2D
487 * block cyclicly distributed matrix. Its description vector is DESCA:
488 *
489 * NOTATION STORED IN EXPLANATION
490 * ---------------- --------------- ------------------------------------
491 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
492 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
493 * the NPROW x NPCOL BLACS process grid
494 * A is distributed over. The context
495 * itself is global, but the handle
496 * (the integer value) may vary.
497 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
498 * ted matrix A, M_A >= 0.
499 * N_A (global) DESCA( N_ ) The number of columns in the distri-
500 * buted matrix A, N_A >= 0.
501 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
502 * block of the matrix A, IMB_A > 0.
503 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
504 * left block of the matrix A,
505 * INB_A > 0.
506 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
507 * bute the last M_A-IMB_A rows of A,
508 * MB_A > 0.
509 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
510 * bute the last N_A-INB_A columns of
511 * A, NB_A > 0.
512 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
513 * row of the matrix A is distributed,
514 * NPROW > RSRC_A >= 0.
515 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
516 * first column of A is distributed.
517 * NPCOL > CSRC_A >= 0.
518 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
519 * array storing the local blocks of
520 * the distributed matrix A,
521 * IF( Lc( 1, N_A ) > 0 )
522 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
523 * ELSE
524 * LLD_A >= 1.
525 *
526 * Let K be the number of rows of a matrix A starting at the global in-
527 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
528 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
529 * receive if these K rows were distributed over NPROW processes. If K
530 * is the number of columns of a matrix A starting at the global index
531 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
532 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
533 * these K columns were distributed over NPCOL processes.
534 *
535 * The values of Lr() and Lc() may be determined via a call to the func-
536 * tion PB_NUMROC:
537 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
538 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
539 *
540 * Arguments
541 * =========
542 *
543 * ICTXT (local input) INTEGER
544 * On entry, ICTXT specifies the BLACS context handle, indica-
545 * ting the global context of the operation. The context itself
546 * is global, but the value of ICTXT is local.
547 *
548 * NOUT (global input) INTEGER
549 * On entry, NOUT specifies the unit number for the output file.
550 * When NOUT is 6, output to screen, when NOUT is 0, output to
551 * stderr. NOUT is only defined for process 0.
552 *
553 * SUBPTR (global input) SUBROUTINE
554 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
555 * EXTERNAL in the calling subroutine.
556 *
557 * SCODE (global input) INTEGER
558 * On entry, SCODE specifies the calling sequence code.
559 *
560 * SNAME (global input) CHARACTER*(*)
561 * On entry, SNAME specifies the subroutine name calling this
562 * subprogram.
563 *
564 * Calling sequence encodings
565 * ==========================
566 *
567 * code Formal argument list Examples
568 *
569 * 11 (n, v1,v2) _SWAP, _COPY
570 * 12 (n,s1, v1 ) _SCAL, _SCAL
571 * 13 (n,s1, v1,v2) _AXPY, _DOT_
572 * 14 (n,s1,i1,v1 ) _AMAX
573 * 15 (n,u1, v1 ) _ASUM, _NRM2
574 *
575 * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
576 * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
577 * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
578 * 24 ( m,n,s1,v1,v2,m1) _GER_
579 * 25 (uplo, n,s1,v1, m1) _SYR
580 * 26 (uplo, n,u1,v1, m1) _HER
581 * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
582 *
583 * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
584 * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
585 * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
586 * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
587 * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
588 * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
589 * 37 ( m,n, s1,m1, s2,m3) _TRAN_
590 * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
591 * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
592 * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
593 *
594 * -- Written on April 1, 1998 by
595 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
596 *
597 * =====================================================================
598 *
599 * .. Local Scalars ..
600  INTEGER APOS
601 * ..
602 * .. External Subroutines ..
603  EXTERNAL pdchkdim
604 * ..
605 * .. Executable Statements ..
606 *
607 * Level 1 PBLAS
608 *
609  IF( scode.EQ.11 .OR. scode.EQ.12 .OR. scode.EQ.13 .OR.
610  $ scode.EQ.14 .OR. scode.EQ.15 ) THEN
611 *
612 * Check 1st (and only) dimension
613 *
614  apos = 1
615  CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
616 *
617 * Level 2 PBLAS
618 *
619  ELSE IF( scode.EQ.21 ) THEN
620 *
621 * Check 1st dimension
622 *
623  apos = 2
624  CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
625 *
626 * Check 2nd dimension
627 *
628  apos = 3
629  CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
630 *
631  ELSE IF( scode.EQ.22 .OR. scode.EQ.25 .OR. scode.EQ.26 .OR.
632  $ scode.EQ.27 ) THEN
633 *
634 * Check 1st (and only) dimension
635 *
636  apos = 2
637  CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
638 *
639  ELSE IF( scode.EQ.23 ) THEN
640 *
641 * Check 1st (and only) dimension
642 *
643  apos = 4
644  CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
645 *
646  ELSE IF( scode.EQ.24 ) THEN
647 *
648 * Check 1st dimension
649 *
650  apos = 1
651  CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
652 *
653 * Check 2nd dimension
654 *
655  apos = 2
656  CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
657 *
658 * Level 3 PBLAS
659 *
660  ELSE IF( scode.EQ.31 ) THEN
661 *
662 * Check 1st dimension
663 *
664  apos = 3
665  CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
666 *
667 * Check 2nd dimension
668 *
669  apos = 4
670  CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
671 *
672 * Check 3rd dimension
673 *
674  apos = 5
675  CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'K', apos )
676 *
677  ELSE IF( scode.EQ.32 ) THEN
678 *
679 * Check 1st dimension
680 *
681  apos = 3
682  CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
683 *
684 * Check 2nd dimension
685 *
686  apos = 4
687  CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
688 *
689  ELSE IF( scode.EQ.33 .OR. scode.EQ.34 .OR. scode.EQ.35 .OR.
690  $ scode.EQ.36 ) THEN
691 *
692 * Check 1st dimension
693 *
694  apos = 3
695  CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
696 *
697 * Check 2nd dimension
698 *
699  apos = 4
700  CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'K', apos )
701 *
702  ELSE IF( scode.EQ.37 ) THEN
703 *
704 * Check 1st dimension
705 *
706  apos = 1
707  CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
708 *
709 * Check 2nd dimension
710 *
711  apos = 2
712  CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
713 *
714  ELSE IF( scode.EQ.38 ) THEN
715 *
716 * Check 1st dimension
717 *
718  apos = 5
719  CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
720 *
721 * Check 2nd dimension
722 *
723  apos = 6
724  CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
725 *
726  ELSE IF( scode.EQ.39 ) THEN
727 *
728 * Check 1st dimension
729 *
730  apos = 2
731  CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
732 *
733 * Check 2nd dimension
734 *
735  apos = 3
736  CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
737 *
738  ELSE IF( scode.EQ.40 ) THEN
739 *
740 * Check 1st dimension
741 *
742  apos = 3
743  CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
744 *
745 * Check 2nd dimension
746 *
747  apos = 4
748  CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
749 *
750  END IF
751 *
752  RETURN
753 *
754 * End of PDDIMEE
755 *
756  END
757  SUBROUTINE pdchkdim( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM,
758  $ ARGPOS )
759 *
760 * -- PBLAS test routine (version 2.0) --
761 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
762 * and University of California, Berkeley.
763 * April 1, 1998
764 *
765 * .. Scalar Arguments ..
766  CHARACTER*1 ARGNAM
767  INTEGER ARGPOS, ICTXT, NOUT, SCODE
768 * ..
769 * .. Array Arguments ..
770  CHARACTER*(*) SNAME
771 * ..
772 * .. Subroutine Arguments ..
773  EXTERNAL subptr
774 * ..
775 *
776 * Purpose
777 * =======
778 *
779 * PDCHKDIM tests the dimension ARGNAM in any PBLAS routine.
780 *
781 * Notes
782 * =====
783 *
784 * A description vector is associated with each 2D block-cyclicly dis-
785 * tributed matrix. This vector stores the information required to
786 * establish the mapping between a matrix entry and its corresponding
787 * process and memory location.
788 *
789 * In the following comments, the character _ should be read as
790 * "of the distributed matrix". Let A be a generic term for any 2D
791 * block cyclicly distributed matrix. Its description vector is DESCA:
792 *
793 * NOTATION STORED IN EXPLANATION
794 * ---------------- --------------- ------------------------------------
795 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
796 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
797 * the NPROW x NPCOL BLACS process grid
798 * A is distributed over. The context
799 * itself is global, but the handle
800 * (the integer value) may vary.
801 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
802 * ted matrix A, M_A >= 0.
803 * N_A (global) DESCA( N_ ) The number of columns in the distri-
804 * buted matrix A, N_A >= 0.
805 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
806 * block of the matrix A, IMB_A > 0.
807 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
808 * left block of the matrix A,
809 * INB_A > 0.
810 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
811 * bute the last M_A-IMB_A rows of A,
812 * MB_A > 0.
813 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
814 * bute the last N_A-INB_A columns of
815 * A, NB_A > 0.
816 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
817 * row of the matrix A is distributed,
818 * NPROW > RSRC_A >= 0.
819 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
820 * first column of A is distributed.
821 * NPCOL > CSRC_A >= 0.
822 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
823 * array storing the local blocks of
824 * the distributed matrix A,
825 * IF( Lc( 1, N_A ) > 0 )
826 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
827 * ELSE
828 * LLD_A >= 1.
829 *
830 * Let K be the number of rows of a matrix A starting at the global in-
831 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
832 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
833 * receive if these K rows were distributed over NPROW processes. If K
834 * is the number of columns of a matrix A starting at the global index
835 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
836 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
837 * these K columns were distributed over NPCOL processes.
838 *
839 * The values of Lr() and Lc() may be determined via a call to the func-
840 * tion PB_NUMROC:
841 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
842 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
843 *
844 * Arguments
845 * =========
846 *
847 * ICTXT (local input) INTEGER
848 * On entry, ICTXT specifies the BLACS context handle, indica-
849 * ting the global context of the operation. The context itself
850 * is global, but the value of ICTXT is local.
851 *
852 * NOUT (global input) INTEGER
853 * On entry, NOUT specifies the unit number for the output file.
854 * When NOUT is 6, output to screen, when NOUT is 0, output to
855 * stderr. NOUT is only defined for process 0.
856 *
857 * SUBPTR (global input) SUBROUTINE
858 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
859 * EXTERNAL in the calling subroutine.
860 *
861 * SCODE (global input) INTEGER
862 * On entry, SCODE specifies the calling sequence code.
863 *
864 * SNAME (global input) CHARACTER*(*)
865 * On entry, SNAME specifies the subroutine name calling this
866 * subprogram.
867 *
868 * ARGNAM (global input) CHARACTER*(*)
869 * On entry, ARGNAM specifies the name of the dimension to be
870 * checked. ARGNAM can either be 'M', 'N' or 'K'.
871 *
872 * ARGPOS (global input) INTEGER
873 * On entry, ARGPOS indicates the position of the option ARGNAM
874 * to be tested.
875 *
876 * -- Written on April 1, 1998 by
877 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
878 *
879 * =====================================================================
880 *
881 * .. Local Scalars ..
882  INTEGER INFOT
883 * ..
884 * .. External Subroutines ..
885  EXTERNAL pchkpbe, pdcallsub, pdsetpblas
886 * ..
887 * .. External Functions ..
888  LOGICAL LSAME
889  EXTERNAL LSAME
890 * ..
891 * .. Common Blocks ..
892  INTEGER KDIM, MDIM, NDIM
893  COMMON /PBLASN/KDIM, MDIM, NDIM
894 * ..
895 * .. Executable Statements ..
896 *
897 * Reiniatilize the dummy arguments to correct values
898 *
899  CALL pdsetpblas( ictxt )
900 *
901  IF( lsame( argnam, 'M' ) ) THEN
902 *
903 * Generate bad MDIM
904 *
905  mdim = -1
906 *
907  ELSE IF( lsame( argnam, 'N' ) ) THEN
908 *
909 * Generate bad NDIM
910 *
911  ndim = -1
912 *
913  ELSE
914 *
915 * Generate bad KDIM
916 *
917  kdim = -1
918 *
919  END IF
920 *
921 * Set INFOT to the position of the bad dimension argument
922 *
923  infot = argpos
924 *
925 * Call the PBLAS routine
926 *
927  CALL pdcallsub( subptr, scode )
928  CALL pchkpbe( ictxt, nout, sname, infot )
929 *
930  RETURN
931 *
932 * End of PDCHKDIM
933 *
934  END
935  SUBROUTINE pdvecee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
936 *
937 * -- PBLAS test routine (version 2.0) --
938 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
939 * and University of California, Berkeley.
940 * April 1, 1998
941 *
942 * .. Scalar Arguments ..
943  INTEGER ICTXT, NOUT, SCODE
944 * ..
945 * .. Array Arguments ..
946  CHARACTER*7 SNAME
947 * ..
948 * .. Subroutine Arguments ..
949  EXTERNAL subptr
950 * ..
951 *
952 * Purpose
953 * =======
954 *
955 * PDVECEE tests whether the PBLAS respond correctly to a bad vector
956 * argument. Each vector <vec> is described by: <vec>, I<vec>, J<vec>,
957 * DESC<vec>, INC<vec>. Out of all these, only I<vec>, J<vec>,
958 * DESC<vec>, and INC<vec> can be tested.
959 *
960 * Notes
961 * =====
962 *
963 * A description vector is associated with each 2D block-cyclicly dis-
964 * tributed matrix. This vector stores the information required to
965 * establish the mapping between a matrix entry and its corresponding
966 * process and memory location.
967 *
968 * In the following comments, the character _ should be read as
969 * "of the distributed matrix". Let A be a generic term for any 2D
970 * block cyclicly distributed matrix. Its description vector is DESCA:
971 *
972 * NOTATION STORED IN EXPLANATION
973 * ---------------- --------------- ------------------------------------
974 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
975 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
976 * the NPROW x NPCOL BLACS process grid
977 * A is distributed over. The context
978 * itself is global, but the handle
979 * (the integer value) may vary.
980 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
981 * ted matrix A, M_A >= 0.
982 * N_A (global) DESCA( N_ ) The number of columns in the distri-
983 * buted matrix A, N_A >= 0.
984 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
985 * block of the matrix A, IMB_A > 0.
986 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
987 * left block of the matrix A,
988 * INB_A > 0.
989 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
990 * bute the last M_A-IMB_A rows of A,
991 * MB_A > 0.
992 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
993 * bute the last N_A-INB_A columns of
994 * A, NB_A > 0.
995 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
996 * row of the matrix A is distributed,
997 * NPROW > RSRC_A >= 0.
998 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
999 * first column of A is distributed.
1000 * NPCOL > CSRC_A >= 0.
1001 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1002 * array storing the local blocks of
1003 * the distributed matrix A,
1004 * IF( Lc( 1, N_A ) > 0 )
1005 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
1006 * ELSE
1007 * LLD_A >= 1.
1008 *
1009 * Let K be the number of rows of a matrix A starting at the global in-
1010 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1011 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1012 * receive if these K rows were distributed over NPROW processes. If K
1013 * is the number of columns of a matrix A starting at the global index
1014 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1015 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1016 * these K columns were distributed over NPCOL processes.
1017 *
1018 * The values of Lr() and Lc() may be determined via a call to the func-
1019 * tion PB_NUMROC:
1020 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1021 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1022 *
1023 * Arguments
1024 * =========
1025 *
1026 * ICTXT (local input) INTEGER
1027 * On entry, ICTXT specifies the BLACS context handle, indica-
1028 * ting the global context of the operation. The context itself
1029 * is global, but the value of ICTXT is local.
1030 *
1031 * NOUT (global input) INTEGER
1032 * On entry, NOUT specifies the unit number for the output file.
1033 * When NOUT is 6, output to screen, when NOUT is 0, output to
1034 * stderr. NOUT is only defined for process 0.
1035 *
1036 * SUBPTR (global input) SUBROUTINE
1037 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
1038 * EXTERNAL in the calling subroutine.
1039 *
1040 * SCODE (global input) INTEGER
1041 * On entry, SCODE specifies the calling sequence code.
1042 *
1043 * SNAME (global input) CHARACTER*(*)
1044 * On entry, SNAME specifies the subroutine name calling this
1045 * subprogram.
1046 *
1047 * Calling sequence encodings
1048 * ==========================
1049 *
1050 * code Formal argument list Examples
1051 *
1052 * 11 (n, v1,v2) _SWAP, _COPY
1053 * 12 (n,s1, v1 ) _SCAL, _SCAL
1054 * 13 (n,s1, v1,v2) _AXPY, _DOT_
1055 * 14 (n,s1,i1,v1 ) _AMAX
1056 * 15 (n,u1, v1 ) _ASUM, _NRM2
1057 *
1058 * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
1059 * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
1060 * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
1061 * 24 ( m,n,s1,v1,v2,m1) _GER_
1062 * 25 (uplo, n,s1,v1, m1) _SYR
1063 * 26 (uplo, n,u1,v1, m1) _HER
1064 * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
1065 *
1066 * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
1067 * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
1068 * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
1069 * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
1070 * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
1071 * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
1072 * 37 ( m,n, s1,m1, s2,m3) _TRAN_
1073 * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
1074 * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
1075 * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
1076 *
1077 * -- Written on April 1, 1998 by
1078 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1079 *
1080 * =====================================================================
1081 *
1082 * .. Local Scalars ..
1083  INTEGER APOS
1084 * ..
1085 * .. External Subroutines ..
1086  EXTERNAL pdchkmat
1087 * ..
1088 * .. Executable Statements ..
1089 *
1090 * Level 1 PBLAS
1091 *
1092  IF( scode.EQ.11 ) THEN
1093 *
1094 * Check 1st vector
1095 *
1096  apos = 2
1097  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1098 *
1099 * Check 2nd vector
1100 *
1101  apos = 7
1102  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1103 *
1104  ELSE IF( scode.EQ.12 .OR. scode.EQ.15 ) THEN
1105 *
1106 * Check 1st (and only) vector
1107 *
1108  apos = 3
1109  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1110 *
1111  ELSE IF( scode.EQ.13 ) THEN
1112 *
1113 * Check 1st vector
1114 *
1115  apos = 3
1116  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1117 *
1118 * Check 2nd vector
1119 *
1120  apos = 8
1121  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1122 *
1123  ELSE IF( scode.EQ.14 ) THEN
1124 *
1125 * Check 1st (and only) vector
1126 *
1127  apos = 4
1128  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1129 *
1130 * Level 2 PBLAS
1131 *
1132  ELSE IF( scode.EQ.21 ) THEN
1133 *
1134 * Check 1st vector
1135 *
1136  apos = 9
1137  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1138 *
1139 * Check 2nd vector
1140 *
1141  apos = 15
1142  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1143 *
1144  ELSE IF( scode.EQ.22 ) THEN
1145 *
1146 * Check 1st vector
1147 *
1148  apos = 8
1149  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1150 *
1151 * Check 2nd vector
1152 *
1153  apos = 14
1154  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1155 *
1156  ELSE IF( scode.EQ.23 ) THEN
1157 *
1158 * Check 1st (and only) vector
1159 *
1160  apos = 9
1161  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1162 *
1163  ELSE IF( scode.EQ.24 .OR. scode.EQ.27 ) THEN
1164 *
1165 * Check 1st vector
1166 *
1167  apos = 4
1168  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1169 *
1170 * Check 2nd vector
1171 *
1172  apos = 9
1173  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1174 *
1175  ELSE IF( scode.EQ.26 .OR. scode.EQ.27 ) THEN
1176 *
1177 * Check 1'st (and only) vector
1178 *
1179  apos = 4
1180  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1181 *
1182  END IF
1183 *
1184  RETURN
1185 *
1186 * End of PDVECEE
1187 *
1188  END
1189  SUBROUTINE pdmatee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
1191 * -- PBLAS test routine (version 2.0) --
1192 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
1193 * and University of California, Berkeley.
1194 * April 1, 1998
1195 *
1196 * .. Scalar Arguments ..
1197  INTEGER ICTXT, NOUT, SCODE
1198 * ..
1199 * .. Array Arguments ..
1200  CHARACTER*7 SNAME
1201 * ..
1202 * .. Subroutine Arguments ..
1203  EXTERNAL subptr
1204 * ..
1205 *
1206 * Purpose
1207 * =======
1208 *
1209 * PDMATEE tests whether the PBLAS respond correctly to a bad matrix
1210 * argument. Each matrix <mat> is described by: <mat>, I<mat>, J<mat>,
1211 * and DESC<mat>. Out of all these, only I<vec>, J<vec> and DESC<mat>
1212 * can be tested.
1213 *
1214 * Notes
1215 * =====
1216 *
1217 * A description vector is associated with each 2D block-cyclicly dis-
1218 * tributed matrix. This vector stores the information required to
1219 * establish the mapping between a matrix entry and its corresponding
1220 * process and memory location.
1221 *
1222 * In the following comments, the character _ should be read as
1223 * "of the distributed matrix". Let A be a generic term for any 2D
1224 * block cyclicly distributed matrix. Its description vector is DESCA:
1225 *
1226 * NOTATION STORED IN EXPLANATION
1227 * ---------------- --------------- ------------------------------------
1228 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
1229 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
1230 * the NPROW x NPCOL BLACS process grid
1231 * A is distributed over. The context
1232 * itself is global, but the handle
1233 * (the integer value) may vary.
1234 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
1235 * ted matrix A, M_A >= 0.
1236 * N_A (global) DESCA( N_ ) The number of columns in the distri-
1237 * buted matrix A, N_A >= 0.
1238 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
1239 * block of the matrix A, IMB_A > 0.
1240 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
1241 * left block of the matrix A,
1242 * INB_A > 0.
1243 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
1244 * bute the last M_A-IMB_A rows of A,
1245 * MB_A > 0.
1246 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
1247 * bute the last N_A-INB_A columns of
1248 * A, NB_A > 0.
1249 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
1250 * row of the matrix A is distributed,
1251 * NPROW > RSRC_A >= 0.
1252 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
1253 * first column of A is distributed.
1254 * NPCOL > CSRC_A >= 0.
1255 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1256 * array storing the local blocks of
1257 * the distributed matrix A,
1258 * IF( Lc( 1, N_A ) > 0 )
1259 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
1260 * ELSE
1261 * LLD_A >= 1.
1262 *
1263 * Let K be the number of rows of a matrix A starting at the global in-
1264 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1265 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1266 * receive if these K rows were distributed over NPROW processes. If K
1267 * is the number of columns of a matrix A starting at the global index
1268 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1269 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1270 * these K columns were distributed over NPCOL processes.
1271 *
1272 * The values of Lr() and Lc() may be determined via a call to the func-
1273 * tion PB_NUMROC:
1274 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1275 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1276 *
1277 * Arguments
1278 * =========
1279 *
1280 * ICTXT (local input) INTEGER
1281 * On entry, ICTXT specifies the BLACS context handle, indica-
1282 * ting the global context of the operation. The context itself
1283 * is global, but the value of ICTXT is local.
1284 *
1285 * NOUT (global input) INTEGER
1286 * On entry, NOUT specifies the unit number for the output file.
1287 * When NOUT is 6, output to screen, when NOUT is 0, output to
1288 * stderr. NOUT is only defined for process 0.
1289 *
1290 * SUBPTR (global input) SUBROUTINE
1291 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
1292 * EXTERNAL in the calling subroutine.
1293 *
1294 * SCODE (global input) INTEGER
1295 * On entry, SCODE specifies the calling sequence code.
1296 *
1297 * SNAME (global input) CHARACTER*(*)
1298 * On entry, SNAME specifies the subroutine name calling this
1299 * subprogram.
1300 *
1301 * Calling sequence encodings
1302 * ==========================
1303 *
1304 * code Formal argument list Examples
1305 *
1306 * 11 (n, v1,v2) _SWAP, _COPY
1307 * 12 (n,s1, v1 ) _SCAL, _SCAL
1308 * 13 (n,s1, v1,v2) _AXPY, _DOT_
1309 * 14 (n,s1,i1,v1 ) _AMAX
1310 * 15 (n,u1, v1 ) _ASUM, _NRM2
1311 *
1312 * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
1313 * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
1314 * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
1315 * 24 ( m,n,s1,v1,v2,m1) _GER_
1316 * 25 (uplo, n,s1,v1, m1) _SYR
1317 * 26 (uplo, n,u1,v1, m1) _HER
1318 * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
1319 *
1320 * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
1321 * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
1322 * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
1323 * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
1324 * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
1325 * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
1326 * 37 ( m,n, s1,m1, s2,m3) _TRAN_
1327 * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
1328 * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
1329 * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
1330 *
1331 * -- Written on April 1, 1998 by
1332 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1333 *
1334 * =====================================================================
1335 *
1336 * .. Local Scalars ..
1337  INTEGER APOS
1338 * ..
1339 * .. External Subroutines ..
1340  EXTERNAL pdchkmat
1341 * ..
1342 * .. Executable Statements ..
1343 *
1344 * Level 2 PBLAS
1345 *
1346  IF( scode.EQ.21 .OR. scode.EQ.23 ) THEN
1347 *
1348 * Check 1st (and only) matrix
1349 *
1350  apos = 5
1351  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1352 *
1353  ELSE IF( scode.EQ.22 ) THEN
1354 *
1355 * Check 1st (and only) matrix
1356 *
1357  apos = 4
1358  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1359 *
1360  ELSE IF( scode.EQ.24 .OR. scode.EQ.27 ) THEN
1361 *
1362 * Check 1st (and only) matrix
1363 *
1364  apos = 14
1365  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1366 *
1367  ELSE IF( scode.EQ.25 .OR. scode.EQ.26 ) THEN
1368 *
1369 * Check 1st (and only) matrix
1370 *
1371  apos = 9
1372  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1373 *
1374 * Level 3 PBLAS
1375 *
1376  ELSE IF( scode.EQ.31 ) THEN
1377 *
1378 * Check 1st matrix
1379 *
1380  apos = 7
1381  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1382 *
1383 * Check 2nd matrix
1384 *
1385  apos = 11
1386  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'B', apos )
1387 *
1388 * Check 3nd matrix
1389 *
1390  apos = 16
1391  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1392 *
1393  ELSE IF( scode.EQ.32 .OR. scode.EQ.35 .OR. scode.EQ.36 ) THEN
1394 *
1395 * Check 1st matrix
1396 *
1397  apos = 6
1398  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1399 *
1400 * Check 2nd matrix
1401 *
1402  apos = 10
1403  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'B', apos )
1404 *
1405 * Check 3nd matrix
1406 *
1407  apos = 15
1408  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1409 *
1410  ELSE IF( scode.EQ.33 .OR. scode.EQ.34 ) THEN
1411 *
1412 * Check 1st matrix
1413 *
1414  apos = 6
1415  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1416 *
1417 * Check 2nd matrix
1418 *
1419  apos = 11
1420  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1421 *
1422  ELSE IF( scode.EQ.37 ) THEN
1423 *
1424 * Check 1st matrix
1425 *
1426  apos = 4
1427  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1428 *
1429 * Check 2nd matrix
1430 *
1431  apos = 9
1432  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1433 *
1434  ELSE IF( scode.EQ.38 ) THEN
1435 *
1436 * Check 1st matrix
1437 *
1438  apos = 8
1439  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1440 *
1441 * Check 2nd matrix
1442 *
1443  apos = 12
1444  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'B', apos )
1445 *
1446  ELSE IF( scode.EQ.39 ) THEN
1447 *
1448 * Check 1st matrix
1449 *
1450  apos = 5
1451  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1452 *
1453 * Check 2nd matrix
1454 *
1455  apos = 10
1456  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1457 *
1458  ELSE IF( scode.EQ.40 ) THEN
1459 *
1460 * Check 1st matrix
1461 *
1462  apos = 6
1463  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1464 *
1465 * Check 2nd matrix
1466 *
1467  apos = 11
1468  CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1469 *
1470  END IF
1471 *
1472  RETURN
1473 *
1474 * End of PDMATEE
1475 *
1476  END
1477  SUBROUTINE pdsetpblas( ICTXT )
1479 * -- PBLAS test routine (version 2.0) --
1480 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
1481 * and University of California, Berkeley.
1482 * April 1, 1998
1483 *
1484 * .. Scalar Arguments ..
1485  INTEGER ICTXT
1486 * ..
1487 *
1488 * Purpose
1489 * =======
1490 *
1491 * PDSETPBLAS initializes *all* the dummy arguments to correct values.
1492 *
1493 * Notes
1494 * =====
1495 *
1496 * A description vector is associated with each 2D block-cyclicly dis-
1497 * tributed matrix. This vector stores the information required to
1498 * establish the mapping between a matrix entry and its corresponding
1499 * process and memory location.
1500 *
1501 * In the following comments, the character _ should be read as
1502 * "of the distributed matrix". Let A be a generic term for any 2D
1503 * block cyclicly distributed matrix. Its description vector is DESCA:
1504 *
1505 * NOTATION STORED IN EXPLANATION
1506 * ---------------- --------------- ------------------------------------
1507 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
1508 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
1509 * the NPROW x NPCOL BLACS process grid
1510 * A is distributed over. The context
1511 * itself is global, but the handle
1512 * (the integer value) may vary.
1513 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
1514 * ted matrix A, M_A >= 0.
1515 * N_A (global) DESCA( N_ ) The number of columns in the distri-
1516 * buted matrix A, N_A >= 0.
1517 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
1518 * block of the matrix A, IMB_A > 0.
1519 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
1520 * left block of the matrix A,
1521 * INB_A > 0.
1522 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
1523 * bute the last M_A-IMB_A rows of A,
1524 * MB_A > 0.
1525 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
1526 * bute the last N_A-INB_A columns of
1527 * A, NB_A > 0.
1528 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
1529 * row of the matrix A is distributed,
1530 * NPROW > RSRC_A >= 0.
1531 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
1532 * first column of A is distributed.
1533 * NPCOL > CSRC_A >= 0.
1534 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1535 * array storing the local blocks of
1536 * the distributed matrix A,
1537 * IF( Lc( 1, N_A ) > 0 )
1538 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
1539 * ELSE
1540 * LLD_A >= 1.
1541 *
1542 * Let K be the number of rows of a matrix A starting at the global in-
1543 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1544 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1545 * receive if these K rows were distributed over NPROW processes. If K
1546 * is the number of columns of a matrix A starting at the global index
1547 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1548 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1549 * these K columns were distributed over NPCOL processes.
1550 *
1551 * The values of Lr() and Lc() may be determined via a call to the func-
1552 * tion PB_NUMROC:
1553 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1554 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1555 *
1556 * Arguments
1557 * =========
1558 *
1559 * ICTXT (local input) INTEGER
1560 * On entry, ICTXT specifies the BLACS context handle, indica-
1561 * ting the global context of the operation. The context itself
1562 * is global, but the value of ICTXT is local.
1563 *
1564 * -- Written on April 1, 1998 by
1565 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1566 *
1567 * =====================================================================
1568 *
1569 * .. Parameters ..
1570  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
1571  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
1572  $ rsrc_
1573  parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
1574  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
1575  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
1576  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
1577  DOUBLE PRECISION ONE
1578  PARAMETER ( ONE = 1.0d+0 )
1579 * ..
1580 * .. External Subroutines ..
1581  EXTERNAL pb_descset2
1582 * ..
1583 * .. Common Blocks ..
1584  CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO
1585  INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB,
1586  $ jc, jx, jy, kdim, mdim, ndim
1587  DOUBLE PRECISION USCLR, SCLR
1588  INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ),
1589  $ descx( dlen_ ), descy( dlen_ )
1590  DOUBLE PRECISION A( 2, 2 ), B( 2, 2 ), C( 2, 2 ), X( 2 ), Y( 2 )
1591  COMMON /PBLASC/DIAG, SIDE, TRANSA, TRANSB, UPLO
1592  COMMON /pblasd/desca, descb, descc, descx, descy
1593  COMMON /pblasi/ia, ib, ic, incx, incy, isclr, ix, iy,
1594  $ ja, jb, jc, jx, jy
1595  COMMON /pblasm/a, b, c
1596  COMMON /pblasn/kdim, mdim, ndim
1597  COMMON /pblass/sclr, usclr
1598  COMMON /pblasv/x, y
1599 * ..
1600 * .. Executable Statements ..
1601 *
1602 * Set default values for options
1603 *
1604  diag = 'N'
1605  side = 'L'
1606  transa = 'N'
1607  transb = 'N'
1608  uplo = 'U'
1609 *
1610 * Set default values for scalars
1611 *
1612  kdim = 1
1613  mdim = 1
1614  ndim = 1
1615  isclr = 1
1616  sclr = one
1617  usclr = one
1618 *
1619 * Set default values for distributed matrix A
1620 *
1621  a( 1, 1 ) = one
1622  a( 2, 1 ) = one
1623  a( 1, 2 ) = one
1624  a( 2, 2 ) = one
1625  ia = 1
1626  ja = 1
1627  CALL pb_descset2( desca, 2, 2, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1628 *
1629 * Set default values for distributed matrix B
1630 *
1631  b( 1, 1 ) = one
1632  b( 2, 1 ) = one
1633  b( 1, 2 ) = one
1634  b( 2, 2 ) = one
1635  ib = 1
1636  jb = 1
1637  CALL pb_descset2( descb, 2, 2, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1638 *
1639 * Set default values for distributed matrix C
1640 *
1641  c( 1, 1 ) = one
1642  c( 2, 1 ) = one
1643  c( 1, 2 ) = one
1644  c( 2, 2 ) = one
1645  ic = 1
1646  jc = 1
1647  CALL pb_descset2( descc, 2, 2, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1648 *
1649 * Set default values for distributed matrix X
1650 *
1651  x( 1 ) = one
1652  x( 2 ) = one
1653  ix = 1
1654  jx = 1
1655  CALL pb_descset2( descx, 2, 1, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1656  incx = 1
1657 *
1658 * Set default values for distributed matrix Y
1659 *
1660  y( 1 ) = one
1661  y( 2 ) = one
1662  iy = 1
1663  jy = 1
1664  CALL pb_descset2( descy, 2, 1, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1665  incy = 1
1666 *
1667  RETURN
1668 *
1669 * End of PDSETPBLAS
1670 *
1671  END
1672  SUBROUTINE pdchkmat( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM,
1673  $ ARGPOS )
1675 * -- PBLAS test routine (version 2.0) --
1676 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
1677 * and University of California, Berkeley.
1678 * April 1, 1998
1679 *
1680 * .. Scalar Arguments ..
1681  CHARACTER*1 ARGNAM
1682  INTEGER ARGPOS, ICTXT, NOUT, SCODE
1683 * ..
1684 * .. Array Arguments ..
1685  CHARACTER*(*) SNAME
1686 * ..
1687 * .. Subroutine Arguments ..
1688  EXTERNAL subptr
1689 * ..
1690 *
1691 * Purpose
1692 * =======
1693 *
1694 * PDCHKMAT tests the matrix (or vector) ARGNAM in any PBLAS routine.
1695 *
1696 * Notes
1697 * =====
1698 *
1699 * A description vector is associated with each 2D block-cyclicly dis-
1700 * tributed matrix. This vector stores the information required to
1701 * establish the mapping between a matrix entry and its corresponding
1702 * process and memory location.
1703 *
1704 * In the following comments, the character _ should be read as
1705 * "of the distributed matrix". Let A be a generic term for any 2D
1706 * block cyclicly distributed matrix. Its description vector is DESCA:
1707 *
1708 * NOTATION STORED IN EXPLANATION
1709 * ---------------- --------------- ------------------------------------
1710 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
1711 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
1712 * the NPROW x NPCOL BLACS process grid
1713 * A is distributed over. The context
1714 * itself is global, but the handle
1715 * (the integer value) may vary.
1716 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
1717 * ted matrix A, M_A >= 0.
1718 * N_A (global) DESCA( N_ ) The number of columns in the distri-
1719 * buted matrix A, N_A >= 0.
1720 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
1721 * block of the matrix A, IMB_A > 0.
1722 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
1723 * left block of the matrix A,
1724 * INB_A > 0.
1725 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
1726 * bute the last M_A-IMB_A rows of A,
1727 * MB_A > 0.
1728 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
1729 * bute the last N_A-INB_A columns of
1730 * A, NB_A > 0.
1731 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
1732 * row of the matrix A is distributed,
1733 * NPROW > RSRC_A >= 0.
1734 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
1735 * first column of A is distributed.
1736 * NPCOL > CSRC_A >= 0.
1737 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1738 * array storing the local blocks of
1739 * the distributed matrix A,
1740 * IF( Lc( 1, N_A ) > 0 )
1741 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
1742 * ELSE
1743 * LLD_A >= 1.
1744 *
1745 * Let K be the number of rows of a matrix A starting at the global in-
1746 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1747 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1748 * receive if these K rows were distributed over NPROW processes. If K
1749 * is the number of columns of a matrix A starting at the global index
1750 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1751 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1752 * these K columns were distributed over NPCOL processes.
1753 *
1754 * The values of Lr() and Lc() may be determined via a call to the func-
1755 * tion PB_NUMROC:
1756 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1757 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1758 *
1759 * Arguments
1760 * =========
1761 *
1762 * ICTXT (local input) INTEGER
1763 * On entry, ICTXT specifies the BLACS context handle, indica-
1764 * ting the global context of the operation. The context itself
1765 * is global, but the value of ICTXT is local.
1766 *
1767 * NOUT (global input) INTEGER
1768 * On entry, NOUT specifies the unit number for the output file.
1769 * When NOUT is 6, output to screen, when NOUT is 0, output to
1770 * stderr. NOUT is only defined for process 0.
1771 *
1772 * SUBPTR (global input) SUBROUTINE
1773 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
1774 * EXTERNAL in the calling subroutine.
1775 *
1776 * SCODE (global input) INTEGER
1777 * On entry, SCODE specifies the calling sequence code.
1778 *
1779 * SNAME (global input) CHARACTER*(*)
1780 * On entry, SNAME specifies the subroutine name calling this
1781 * subprogram.
1782 *
1783 * ARGNAM (global input) CHARACTER*(*)
1784 * On entry, ARGNAM specifies the name of the matrix or vector
1785 * to be checked. ARGNAM can either be 'A', 'B' or 'C' when one
1786 * wants to check a matrix, and 'X' or 'Y' for a vector.
1787 *
1788 * ARGPOS (global input) INTEGER
1789 * On entry, ARGPOS indicates the position of the first argument
1790 * of the matrix (or vector) ARGNAM.
1791 *
1792 * -- Written on April 1, 1998 by
1793 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1794 *
1795 * =====================================================================
1796 *
1797 * .. Parameters ..
1798  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
1799  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
1800  $ RSRC_
1801  parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
1802  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
1803  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
1804  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
1805  INTEGER DESCMULT
1806  PARAMETER ( DESCMULT = 100 )
1807 * ..
1808 * .. Local Scalars ..
1809  INTEGER I, INFOT, NPROW, NPCOL, MYROW, MYCOL
1810 * ..
1811 * .. External Subroutines ..
1812  EXTERNAL blacs_gridinfo, pchkpbe, pdcallsub, pdsetpblas
1813 * ..
1814 * .. External Functions ..
1815  LOGICAL LSAME
1816  EXTERNAL LSAME
1817 * ..
1818 * .. Common Blocks ..
1819  INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB,
1820  $ JC, JX, JY
1821  INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ),
1822  $ descx( dlen_ ), descy( dlen_ )
1823  COMMON /pblasd/desca, descb, descc, descx, descy
1824  COMMON /pblasi/ia, ib, ic, incx, incy, isclr, ix, iy,
1825  $ ja, jb, jc, jx, jy
1826 * ..
1827 * .. Executable Statements ..
1828 *
1829  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
1830 *
1831  IF( lsame( argnam, 'A' ) ) THEN
1832 *
1833 * Check IA. Set all other OK, bad IA
1834 *
1835  CALL pdsetpblas( ictxt )
1836  ia = -1
1837  infot = argpos + 1
1838  CALL pdcallsub( subptr, scode )
1839  CALL pchkpbe( ictxt, nout, sname, infot )
1840 *
1841 * Check JA. Set all other OK, bad JA
1842 *
1843  CALL pdsetpblas( ictxt )
1844  ja = -1
1845  infot = argpos + 2
1846  CALL pdcallsub( subptr, scode )
1847  CALL pchkpbe( ictxt, nout, sname, infot )
1848 *
1849 * Check DESCA. Set all other OK, bad DESCA
1850 *
1851  DO 10 i = 1, dlen_
1852 *
1853 * Set I'th entry of DESCA to incorrect value, rest ok.
1854 *
1855  CALL pdsetpblas( ictxt )
1856  desca( i ) = -2
1857  infot = ( ( argpos + 3 ) * descmult ) + i
1858  CALL pdcallsub( subptr, scode )
1859  CALL pchkpbe( ictxt, nout, sname, infot )
1860 *
1861 * Extra tests for RSRCA, CSRCA, LDA
1862 *
1863  IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
1864  $ ( i.EQ.lld_ ) ) THEN
1865 *
1866  CALL pdsetpblas( ictxt )
1867 *
1868 * Test RSRCA >= NPROW
1869 *
1870  IF( i.EQ.rsrc_ )
1871  $ desca( i ) = nprow
1872 *
1873 * Test CSRCA >= NPCOL
1874 *
1875  IF( i.EQ.csrc_ )
1876  $ desca( i ) = npcol
1877 *
1878 * Test LDA >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
1879 *
1880  IF( i.EQ.lld_ ) THEN
1881  IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
1882  desca( i ) = 1
1883  ELSE
1884  desca( i ) = 0
1885  END IF
1886  END IF
1887 *
1888  infot = ( ( argpos + 3 ) * descmult ) + i
1889  CALL pdcallsub( subptr, scode )
1890  CALL pchkpbe( ictxt, nout, sname, infot )
1891 *
1892  END IF
1893 *
1894  10 CONTINUE
1895 *
1896  ELSE IF( lsame( argnam, 'B' ) ) THEN
1897 *
1898 * Check IB. Set all other OK, bad IB
1899 *
1900  CALL pdsetpblas( ictxt )
1901  ib = -1
1902  infot = argpos + 1
1903  CALL pdcallsub( subptr, scode )
1904  CALL pchkpbe( ictxt, nout, sname, infot )
1905 *
1906 * Check JB. Set all other OK, bad JB
1907 *
1908  CALL pdsetpblas( ictxt )
1909  jb = -1
1910  infot = argpos + 2
1911  CALL pdcallsub( subptr, scode )
1912  CALL pchkpbe( ictxt, nout, sname, infot )
1913 *
1914 * Check DESCB. Set all other OK, bad DESCB
1915 *
1916  DO 20 i = 1, dlen_
1917 *
1918 * Set I'th entry of DESCB to incorrect value, rest ok.
1919 *
1920  CALL pdsetpblas( ictxt )
1921  descb( i ) = -2
1922  infot = ( ( argpos + 3 ) * descmult ) + i
1923  CALL pdcallsub( subptr, scode )
1924  CALL pchkpbe( ictxt, nout, sname, infot )
1925 *
1926 * Extra tests for RSRCB, CSRCB, LDB
1927 *
1928  IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
1929  $ ( i.EQ.lld_ ) ) THEN
1930 *
1931  CALL pdsetpblas( ictxt )
1932 *
1933 * Test RSRCB >= NPROW
1934 *
1935  IF( i.EQ.rsrc_ )
1936  $ descb( i ) = nprow
1937 *
1938 * Test CSRCB >= NPCOL
1939 *
1940  IF( i.EQ.csrc_ )
1941  $ descb( i ) = npcol
1942 *
1943 * Test LDB >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
1944 *
1945  IF( i.EQ.lld_ ) THEN
1946  IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
1947  descb( i ) = 1
1948  ELSE
1949  descb( i ) = 0
1950  END IF
1951  END IF
1952 *
1953  infot = ( ( argpos + 3 ) * descmult ) + i
1954  CALL pdcallsub( subptr, scode )
1955  CALL pchkpbe( ictxt, nout, sname, infot )
1956 *
1957  END IF
1958 *
1959  20 CONTINUE
1960 *
1961  ELSE IF( lsame( argnam, 'C' ) ) THEN
1962 *
1963 * Check IC. Set all other OK, bad IC
1964 *
1965  CALL pdsetpblas( ictxt )
1966  ic = -1
1967  infot = argpos + 1
1968  CALL pdcallsub( subptr, scode )
1969  CALL pchkpbe( ictxt, nout, sname, infot )
1970 *
1971 * Check JC. Set all other OK, bad JC
1972 *
1973  CALL pdsetpblas( ictxt )
1974  jc = -1
1975  infot = argpos + 2
1976  CALL pdcallsub( subptr, scode )
1977  CALL pchkpbe( ictxt, nout, sname, infot )
1978 *
1979 * Check DESCC. Set all other OK, bad DESCC
1980 *
1981  DO 30 i = 1, dlen_
1982 *
1983 * Set I'th entry of DESCC to incorrect value, rest ok.
1984 *
1985  CALL pdsetpblas( ictxt )
1986  descc( i ) = -2
1987  infot = ( ( argpos + 3 ) * descmult ) + i
1988  CALL pdcallsub( subptr, scode )
1989  CALL pchkpbe( ictxt, nout, sname, infot )
1990 *
1991 * Extra tests for RSRCC, CSRCC, LDC
1992 *
1993  IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
1994  $ ( i.EQ.lld_ ) ) THEN
1995 *
1996  CALL pdsetpblas( ictxt )
1997 *
1998 * Test RSRCC >= NPROW
1999 *
2000  IF( i.EQ.rsrc_ )
2001  $ descc( i ) = nprow
2002 *
2003 * Test CSRCC >= NPCOL
2004 *
2005  IF( i.EQ.csrc_ )
2006  $ descc( i ) = npcol
2007 *
2008 * Test LDC >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
2009 *
2010  IF( i.EQ.lld_ ) THEN
2011  IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
2012  descc( i ) = 1
2013  ELSE
2014  descc( i ) = 0
2015  END IF
2016  END IF
2017 *
2018  infot = ( ( argpos + 3 ) * descmult ) + i
2019  CALL pdcallsub( subptr, scode )
2020  CALL pchkpbe( ictxt, nout, sname, infot )
2021 *
2022  END IF
2023 *
2024  30 CONTINUE
2025 *
2026  ELSE IF( lsame( argnam, 'X' ) ) THEN
2027 *
2028 * Check IX. Set all other OK, bad IX
2029 *
2030  CALL pdsetpblas( ictxt )
2031  ix = -1
2032  infot = argpos + 1
2033  CALL pdcallsub( subptr, scode )
2034  CALL pchkpbe( ictxt, nout, sname, infot )
2035 *
2036 * Check JX. Set all other OK, bad JX
2037 *
2038  CALL pdsetpblas( ictxt )
2039  jx = -1
2040  infot = argpos + 2
2041  CALL pdcallsub( subptr, scode )
2042  CALL pchkpbe( ictxt, nout, sname, infot )
2043 *
2044 * Check DESCX. Set all other OK, bad DESCX
2045 *
2046  DO 40 i = 1, dlen_
2047 *
2048 * Set I'th entry of DESCX to incorrect value, rest ok.
2049 *
2050  CALL pdsetpblas( ictxt )
2051  descx( i ) = -2
2052  infot = ( ( argpos + 3 ) * descmult ) + i
2053  CALL pdcallsub( subptr, scode )
2054  CALL pchkpbe( ictxt, nout, sname, infot )
2055 *
2056 * Extra tests for RSRCX, CSRCX, LDX
2057 *
2058  IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
2059  $ ( i.EQ.lld_ ) ) THEN
2060 *
2061  CALL pdsetpblas( ictxt )
2062 *
2063 * Test RSRCX >= NPROW
2064 *
2065  IF( i.EQ.rsrc_ )
2066  $ descx( i ) = nprow
2067 *
2068 * Test CSRCX >= NPCOL
2069 *
2070  IF( i.EQ.csrc_ )
2071  $ descx( i ) = npcol
2072 *
2073 * Test LDX >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
2074 *
2075  IF( i.EQ.lld_ ) THEN
2076  IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
2077  descx( i ) = 1
2078  ELSE
2079  descx( i ) = 0
2080  END IF
2081  END IF
2082 *
2083  infot = ( ( argpos + 3 ) * descmult ) + i
2084  CALL pdcallsub( subptr, scode )
2085  CALL pchkpbe( ictxt, nout, sname, infot )
2086 *
2087  END IF
2088 *
2089  40 CONTINUE
2090 *
2091 * Check INCX. Set all other OK, bad INCX
2092 *
2093  CALL pdsetpblas( ictxt )
2094  incx = -1
2095  infot = argpos + 4
2096  CALL pdcallsub( subptr, scode )
2097  CALL pchkpbe( ictxt, nout, sname, infot )
2098 *
2099  ELSE
2100 *
2101 * Check IY. Set all other OK, bad IY
2102 *
2103  CALL pdsetpblas( ictxt )
2104  iy = -1
2105  infot = argpos + 1
2106  CALL pdcallsub( subptr, scode )
2107  CALL pchkpbe( ictxt, nout, sname, infot )
2108 *
2109 * Check JY. Set all other OK, bad JY
2110 *
2111  CALL pdsetpblas( ictxt )
2112  jy = -1
2113  infot = argpos + 2
2114  CALL pdcallsub( subptr, scode )
2115  CALL pchkpbe( ictxt, nout, sname, infot )
2116 *
2117 * Check DESCY. Set all other OK, bad DESCY
2118 *
2119  DO 50 i = 1, dlen_
2120 *
2121 * Set I'th entry of DESCY to incorrect value, rest ok.
2122 *
2123  CALL pdsetpblas( ictxt )
2124  descy( i ) = -2
2125  infot = ( ( argpos + 3 ) * descmult ) + i
2126  CALL pdcallsub( subptr, scode )
2127  CALL pchkpbe( ictxt, nout, sname, infot )
2128 *
2129 * Extra tests for RSRCY, CSRCY, LDY
2130 *
2131  IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
2132  $ ( i.EQ.lld_ ) ) THEN
2133 *
2134  CALL pdsetpblas( ictxt )
2135 *
2136 * Test RSRCY >= NPROW
2137 *
2138  IF( i.EQ.rsrc_ )
2139  $ descy( i ) = nprow
2140 *
2141 * Test CSRCY >= NPCOL
2142 *
2143  IF( i.EQ.csrc_ )
2144  $ descy( i ) = npcol
2145 *
2146 * Test LDY >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
2147 *
2148  IF( i.EQ.lld_ ) THEN
2149  IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
2150  descy( i ) = 1
2151  ELSE
2152  descy( i ) = 0
2153  END IF
2154  END IF
2155 *
2156  infot = ( ( argpos + 3 ) * descmult ) + i
2157  CALL pdcallsub( subptr, scode )
2158  CALL pchkpbe( ictxt, nout, sname, infot )
2159 *
2160  END IF
2161 *
2162  50 CONTINUE
2163 *
2164 * Check INCY. Set all other OK, bad INCY
2165 *
2166  CALL pdsetpblas( ictxt )
2167  incy = -1
2168  infot = argpos + 4
2169  CALL pdcallsub( subptr, scode )
2170  CALL pchkpbe( ictxt, nout, sname, infot )
2171 *
2172  END IF
2173 *
2174  RETURN
2175 *
2176 * End of PDCHKMAT
2177 *
2178  END
2179  SUBROUTINE pdcallsub( SUBPTR, SCODE )
2181 * -- PBLAS test routine (version 2.0) --
2182 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2183 * and University of California, Berkeley.
2184 * April 1, 1998
2185 *
2186 * .. Scalar Arguments ..
2187  INTEGER SCODE
2188 * ..
2189 * .. Subroutine Arguments ..
2190  EXTERNAL subptr
2191 * ..
2192 *
2193 * Purpose
2194 * =======
2195 *
2196 * PDCALLSUB calls the subroutine SUBPTR with the calling sequence iden-
2197 * tified by SCODE.
2198 *
2199 * Notes
2200 * =====
2201 *
2202 * A description vector is associated with each 2D block-cyclicly dis-
2203 * tributed matrix. This vector stores the information required to
2204 * establish the mapping between a matrix entry and its corresponding
2205 * process and memory location.
2206 *
2207 * In the following comments, the character _ should be read as
2208 * "of the distributed matrix". Let A be a generic term for any 2D
2209 * block cyclicly distributed matrix. Its description vector is DESCA:
2210 *
2211 * NOTATION STORED IN EXPLANATION
2212 * ---------------- --------------- ------------------------------------
2213 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2214 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2215 * the NPROW x NPCOL BLACS process grid
2216 * A is distributed over. The context
2217 * itself is global, but the handle
2218 * (the integer value) may vary.
2219 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
2220 * ted matrix A, M_A >= 0.
2221 * N_A (global) DESCA( N_ ) The number of columns in the distri-
2222 * buted matrix A, N_A >= 0.
2223 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2224 * block of the matrix A, IMB_A > 0.
2225 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
2226 * left block of the matrix A,
2227 * INB_A > 0.
2228 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2229 * bute the last M_A-IMB_A rows of A,
2230 * MB_A > 0.
2231 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2232 * bute the last N_A-INB_A columns of
2233 * A, NB_A > 0.
2234 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2235 * row of the matrix A is distributed,
2236 * NPROW > RSRC_A >= 0.
2237 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2238 * first column of A is distributed.
2239 * NPCOL > CSRC_A >= 0.
2240 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2241 * array storing the local blocks of
2242 * the distributed matrix A,
2243 * IF( Lc( 1, N_A ) > 0 )
2244 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
2245 * ELSE
2246 * LLD_A >= 1.
2247 *
2248 * Let K be the number of rows of a matrix A starting at the global in-
2249 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2250 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2251 * receive if these K rows were distributed over NPROW processes. If K
2252 * is the number of columns of a matrix A starting at the global index
2253 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2254 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2255 * these K columns were distributed over NPCOL processes.
2256 *
2257 * The values of Lr() and Lc() may be determined via a call to the func-
2258 * tion PB_NUMROC:
2259 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2260 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2261 *
2262 * Arguments
2263 * =========
2264 *
2265 * SUBPTR (global input) SUBROUTINE
2266 * On entry, SUBPTR is a subroutine. SUBPTR must be declared
2267 * EXTERNAL in the calling subroutine.
2268 *
2269 * SCODE (global input) INTEGER
2270 * On entry, SCODE specifies the calling sequence code.
2271 *
2272 * Calling sequence encodings
2273 * ==========================
2274 *
2275 * code Formal argument list Examples
2276 *
2277 * 11 (n, v1,v2) _SWAP, _COPY
2278 * 12 (n,s1, v1 ) _SCAL, _SCAL
2279 * 13 (n,s1, v1,v2) _AXPY, _DOT_
2280 * 14 (n,s1,i1,v1 ) _AMAX
2281 * 15 (n,u1, v1 ) _ASUM, _NRM2
2282 *
2283 * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
2284 * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
2285 * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
2286 * 24 ( m,n,s1,v1,v2,m1) _GER_
2287 * 25 (uplo, n,s1,v1, m1) _SYR
2288 * 26 (uplo, n,u1,v1, m1) _HER
2289 * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
2290 *
2291 * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
2292 * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
2293 * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
2294 * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
2295 * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
2296 * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
2297 * 37 ( m,n, s1,m1, s2,m3) _TRAN_
2298 * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
2299 * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
2300 * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
2301 *
2302 * -- Written on April 1, 1998 by
2303 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2304 *
2305 * =====================================================================
2306 *
2307 * .. Parameters ..
2308  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
2309  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
2310  $ RSRC_
2311  parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
2312  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
2313  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
2314  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
2315 * ..
2316 * .. Common Blocks ..
2317  CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO
2318  INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB,
2319  $ JC, JX, JY, KDIM, MDIM, NDIM
2320  DOUBLE PRECISION USCLR, SCLR
2321  INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ),
2322  $ DESCX( DLEN_ ), DESCY( DLEN_ )
2323  DOUBLE PRECISION A( 2, 2 ), B( 2, 2 ), C( 2, 2 ), X( 2 ), Y( 2 )
2324  COMMON /pblasc/diag, side, transa, transb, uplo
2325  COMMON /pblasd/desca, descb, descc, descx, descy
2326  COMMON /pblasi/ia, ib, ic, incx, incy, isclr, ix, iy,
2327  $ ja, jb, jc, jx, jy
2328  COMMON /pblasm/a, b, c
2329  COMMON /pblasn/kdim, mdim, ndim
2330  COMMON /pblass/sclr, usclr
2331  COMMON /pblasv/x, y
2332 * ..
2333 * .. Executable Statements ..
2334 *
2335 * Level 1 PBLAS
2336 *
2337  IF( scode.EQ.11 ) THEN
2338 *
2339  CALL subptr( ndim, x, ix, jx, descx, incx, y, iy, jy, descy,
2340  $ incy )
2341 *
2342  ELSE IF( scode.EQ.12 ) THEN
2343 *
2344  CALL subptr( ndim, sclr, x, ix, jx, descx, incx )
2345 *
2346  ELSE IF( scode.EQ.13 ) THEN
2347 *
2348  CALL subptr( ndim, sclr, x, ix, jx, descx, incx, y, iy, jy,
2349  $ descy, incy )
2350 *
2351  ELSE IF( scode.EQ.14 ) THEN
2352 *
2353  CALL subptr( ndim, sclr, isclr, x, ix, jx, descx, incx )
2354 *
2355  ELSE IF( scode.EQ.15 ) THEN
2356 *
2357  CALL subptr( ndim, usclr, x, ix, jx, descx, incx )
2358 *
2359 * Level 2 PBLAS
2360 *
2361  ELSE IF( scode.EQ.21 ) THEN
2362 *
2363  CALL subptr( transa, mdim, ndim, sclr, a, ia, ja, desca, x, ix,
2364  $ jx, descx, incx, sclr, y, iy, jy, descy, incy )
2365 *
2366  ELSE IF( scode.EQ.22 ) THEN
2367 *
2368  CALL subptr( uplo, ndim, sclr, a, ia, ja, desca, x, ix, jx,
2369  $ descx, incx, sclr, y, iy, jy, descy, incy )
2370 *
2371  ELSE IF( scode.EQ.23 ) THEN
2372 *
2373  CALL subptr( uplo, transa, diag, ndim, a, ia, ja, desca, x, ix,
2374  $ jx, descx, incx )
2375 *
2376  ELSE IF( scode.EQ.24 ) THEN
2377 *
2378  CALL subptr( mdim, ndim, sclr, x, ix, jx, descx, incx, y, iy,
2379  $ jy, descy, incy, a, ia, ja, desca )
2380 *
2381  ELSE IF( scode.EQ.25 ) THEN
2382 *
2383  CALL subptr( uplo, ndim, sclr, x, ix, jx, descx, incx, a, ia,
2384  $ ja, desca )
2385 *
2386  ELSE IF( scode.EQ.26 ) THEN
2387 *
2388  CALL subptr( uplo, ndim, usclr, x, ix, jx, descx, incx, a, ia,
2389  $ ja, desca )
2390 *
2391  ELSE IF( scode.EQ.27 ) THEN
2392 *
2393  CALL subptr( uplo, ndim, sclr, x, ix, jx, descx, incx, y, iy,
2394  $ jy, descy, incy, a, ia, ja, desca )
2395 *
2396 * Level 3 PBLAS
2397 *
2398  ELSE IF( scode.EQ.31 ) THEN
2399 *
2400  CALL subptr( transa, transb, mdim, ndim, kdim, sclr, a, ia, ja,
2401  $ desca, b, ib, jb, descb, sclr, c, ic, jc, descc )
2402 *
2403  ELSE IF( scode.EQ.32 ) THEN
2404 *
2405  CALL subptr( side, uplo, mdim, ndim, sclr, a, ia, ja, desca, b,
2406  $ ib, jb, descb, sclr, c, ic, jc, descc )
2407 *
2408  ELSE IF( scode.EQ.33 ) THEN
2409 *
2410  CALL subptr( uplo, transa, ndim, kdim, sclr, a, ia, ja, desca,
2411  $ sclr, c, ic, jc, descc )
2412 *
2413  ELSE IF( scode.EQ.34 ) THEN
2414 *
2415  CALL subptr( uplo, transa, ndim, kdim, usclr, a, ia, ja, desca,
2416  $ usclr, c, ic, jc, descc )
2417 *
2418  ELSE IF( scode.EQ.35 ) THEN
2419 *
2420  CALL subptr( uplo, transa, ndim, kdim, sclr, a, ia, ja, desca,
2421  $ b, ib, jb, descb, sclr, c, ic, jc, descc )
2422 *
2423  ELSE IF( scode.EQ.36 ) THEN
2424 *
2425  CALL subptr( uplo, transa, ndim, kdim, sclr, a, ia, ja, desca,
2426  $ b, ib, jb, descb, usclr, c, ic, jc, descc )
2427 *
2428  ELSE IF( scode.EQ.37 ) THEN
2429 *
2430  CALL subptr( mdim, ndim, sclr, a, ia, ja, desca, sclr, c, ic,
2431  $ jc, descc )
2432 *
2433  ELSE IF( scode.EQ.38 ) THEN
2434 *
2435  CALL subptr( side, uplo, transa, diag, mdim, ndim, sclr, a, ia,
2436  $ ja, desca, b, ib, jb, descb )
2437 *
2438  ELSE IF( scode.EQ.39 ) THEN
2439 *
2440  CALL subptr( transa, mdim, ndim, sclr, a, ia, ja, desca, sclr,
2441  $ c, ic, jc, descc )
2442 *
2443  ELSE IF( scode.EQ.40 ) THEN
2444 *
2445  CALL subptr( uplo, transa, mdim, ndim, sclr, a, ia, ja, desca,
2446  $ sclr, c, ic, jc, descc )
2447 *
2448  END IF
2449 *
2450  RETURN
2451 *
2452 * End of PDCALLSUB
2453 *
2454  END
2455  SUBROUTINE pderrset( ERR, ERRMAX, XTRUE, X )
2457 * -- PBLAS test routine (version 2.0) --
2458 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2459 * and University of California, Berkeley.
2460 * April 1, 1998
2461 *
2462 * .. Scalar Arguments ..
2463  DOUBLE PRECISION ERR, ERRMAX, X, XTRUE
2464 * ..
2465 *
2466 * Purpose
2467 * =======
2468 *
2469 * PDERRSET computes the absolute difference ERR = |XTRUE - X| and com-
2470 * pares it with zero. ERRMAX accumulates the absolute error difference.
2471 *
2472 * Notes
2473 * =====
2474 *
2475 * A description vector is associated with each 2D block-cyclicly dis-
2476 * tributed matrix. This vector stores the information required to
2477 * establish the mapping between a matrix entry and its corresponding
2478 * process and memory location.
2479 *
2480 * In the following comments, the character _ should be read as
2481 * "of the distributed matrix". Let A be a generic term for any 2D
2482 * block cyclicly distributed matrix. Its description vector is DESCA:
2483 *
2484 * NOTATION STORED IN EXPLANATION
2485 * ---------------- --------------- ------------------------------------
2486 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2487 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2488 * the NPROW x NPCOL BLACS process grid
2489 * A is distributed over. The context
2490 * itself is global, but the handle
2491 * (the integer value) may vary.
2492 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
2493 * ted matrix A, M_A >= 0.
2494 * N_A (global) DESCA( N_ ) The number of columns in the distri-
2495 * buted matrix A, N_A >= 0.
2496 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2497 * block of the matrix A, IMB_A > 0.
2498 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
2499 * left block of the matrix A,
2500 * INB_A > 0.
2501 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2502 * bute the last M_A-IMB_A rows of A,
2503 * MB_A > 0.
2504 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2505 * bute the last N_A-INB_A columns of
2506 * A, NB_A > 0.
2507 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2508 * row of the matrix A is distributed,
2509 * NPROW > RSRC_A >= 0.
2510 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2511 * first column of A is distributed.
2512 * NPCOL > CSRC_A >= 0.
2513 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2514 * array storing the local blocks of
2515 * the distributed matrix A,
2516 * IF( Lc( 1, N_A ) > 0 )
2517 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
2518 * ELSE
2519 * LLD_A >= 1.
2520 *
2521 * Let K be the number of rows of a matrix A starting at the global in-
2522 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2523 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2524 * receive if these K rows were distributed over NPROW processes. If K
2525 * is the number of columns of a matrix A starting at the global index
2526 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2527 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2528 * these K columns were distributed over NPCOL processes.
2529 *
2530 * The values of Lr() and Lc() may be determined via a call to the func-
2531 * tion PB_NUMROC:
2532 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2533 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2534 *
2535 * Arguments
2536 * =========
2537 *
2538 * ERR (local output) DOUBLE PRECISION
2539 * On exit, ERR specifies the absolute difference |XTRUE - X|.
2540 *
2541 * ERRMAX (local input/local output) DOUBLE PRECISION
2542 * On entry, ERRMAX specifies a previously computed error. On
2543 * exit ERRMAX is the accumulated error MAX( ERRMAX, ERR ).
2544 *
2545 * XTRUE (local input) DOUBLE PRECISION
2546 * On entry, XTRUE specifies the true value.
2547 *
2548 * X (local input) DOUBLE PRECISION
2549 * On entry, X specifies the value to be compared to XTRUE.
2550 *
2551 * -- Written on April 1, 1998 by
2552 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2553 *
2554 * =====================================================================
2555 *
2556 * .. External Functions ..
2557  DOUBLE PRECISION PDDIFF
2558  EXTERNAL PDDIFF
2559 * ..
2560 * .. Intrinsic Functions ..
2561  INTRINSIC abs, max
2562 * ..
2563 * .. Executable Statements ..
2564 *
2565  err = abs( pddiff( xtrue, x ) )
2566 *
2567  errmax = max( errmax, err )
2568 *
2569  RETURN
2570 *
2571 * End of PDERRSET
2572 *
2573  END
2574  SUBROUTINE pdchkvin( ERRMAX, N, X, PX, IX, JX, DESCX, INCX,
2575  $ INFO )
2577 * -- PBLAS test routine (version 2.0) --
2578 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2579 * and University of California, Berkeley.
2580 * April 1, 1998
2581 *
2582 * .. Scalar Arguments ..
2583  INTEGER INCX, INFO, IX, JX, N
2584  DOUBLE PRECISION ERRMAX
2585 * ..
2586 * .. Array Arguments ..
2587  INTEGER DESCX( * )
2588  DOUBLE PRECISION PX( * ), X( * )
2589 * ..
2590 *
2591 * Purpose
2592 * =======
2593 *
2594 * PDCHKVIN checks that the submatrix sub( PX ) remained unchanged. The
2595 * local array entries are compared element by element, and their dif-
2596 * ference is tested against 0.0 as well as the epsilon machine. Notice
2597 * that this difference should be numerically exactly the zero machine,
2598 * but because of the possible fluctuation of some of the data we flag-
2599 * ged differently a difference less than twice the epsilon machine. The
2600 * largest error is also returned.
2601 *
2602 * Notes
2603 * =====
2604 *
2605 * A description vector is associated with each 2D block-cyclicly dis-
2606 * tributed matrix. This vector stores the information required to
2607 * establish the mapping between a matrix entry and its corresponding
2608 * process and memory location.
2609 *
2610 * In the following comments, the character _ should be read as
2611 * "of the distributed matrix". Let A be a generic term for any 2D
2612 * block cyclicly distributed matrix. Its description vector is DESCA:
2613 *
2614 * NOTATION STORED IN EXPLANATION
2615 * ---------------- --------------- ------------------------------------
2616 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2617 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2618 * the NPROW x NPCOL BLACS process grid
2619 * A is distributed over. The context
2620 * itself is global, but the handle
2621 * (the integer value) may vary.
2622 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
2623 * ted matrix A, M_A >= 0.
2624 * N_A (global) DESCA( N_ ) The number of columns in the distri-
2625 * buted matrix A, N_A >= 0.
2626 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2627 * block of the matrix A, IMB_A > 0.
2628 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
2629 * left block of the matrix A,
2630 * INB_A > 0.
2631 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2632 * bute the last M_A-IMB_A rows of A,
2633 * MB_A > 0.
2634 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2635 * bute the last N_A-INB_A columns of
2636 * A, NB_A > 0.
2637 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2638 * row of the matrix A is distributed,
2639 * NPROW > RSRC_A >= 0.
2640 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2641 * first column of A is distributed.
2642 * NPCOL > CSRC_A >= 0.
2643 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2644 * array storing the local blocks of
2645 * the distributed matrix A,
2646 * IF( Lc( 1, N_A ) > 0 )
2647 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
2648 * ELSE
2649 * LLD_A >= 1.
2650 *
2651 * Let K be the number of rows of a matrix A starting at the global in-
2652 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2653 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2654 * receive if these K rows were distributed over NPROW processes. If K
2655 * is the number of columns of a matrix A starting at the global index
2656 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2657 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2658 * these K columns were distributed over NPCOL processes.
2659 *
2660 * The values of Lr() and Lc() may be determined via a call to the func-
2661 * tion PB_NUMROC:
2662 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2663 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2664 *
2665 * Arguments
2666 * =========
2667 *
2668 * ERRMAX (global output) DOUBLE PRECISION
2669 * On exit, ERRMAX specifies the largest absolute element-wise
2670 * difference between sub( X ) and sub( PX ).
2671 *
2672 * N (global input) INTEGER
2673 * On entry, N specifies the length of the subvector operand
2674 * sub( X ). N must be at least zero.
2675 *
2676 * X (local input) DOUBLE PRECISION array
2677 * On entry, X is an array of dimension (DESCX( M_ ),*). This
2678 * array contains a local copy of the initial entire matrix PX.
2679 *
2680 * PX (local input) DOUBLE PRECISION array
2681 * On entry, PX is an array of dimension (DESCX( LLD_ ),*). This
2682 * array contains the local entries of the matrix PX.
2683 *
2684 * IX (global input) INTEGER
2685 * On entry, IX specifies X's global row index, which points to
2686 * the beginning of the submatrix sub( X ).
2687 *
2688 * JX (global input) INTEGER
2689 * On entry, JX specifies X's global column index, which points
2690 * to the beginning of the submatrix sub( X ).
2691 *
2692 * DESCX (global and local input) INTEGER array
2693 * On entry, DESCX is an integer array of dimension DLEN_. This
2694 * is the array descriptor for the matrix X.
2695 *
2696 * INCX (global input) INTEGER
2697 * On entry, INCX specifies the global increment for the
2698 * elements of X. Only two values of INCX are supported in
2699 * this version, namely 1 and M_X. INCX must not be zero.
2700 *
2701 * INFO (global output) INTEGER
2702 * On exit, if INFO = 0, no error has been found,
2703 * If INFO > 0, the maximum abolute error found is in (0,eps],
2704 * If INFO < 0, the maximum abolute error found is in (eps,+oo).
2705 *
2706 * -- Written on April 1, 1998 by
2707 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2708 *
2709 * =====================================================================
2710 *
2711 * .. Parameters ..
2712  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
2713  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
2714  $ RSRC_
2715  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
2716  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
2717  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
2718  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
2719  DOUBLE PRECISION ZERO
2720  PARAMETER ( ZERO = 0.0d+0 )
2721 * ..
2722 * .. Local Scalars ..
2723  LOGICAL COLREP, ROWREP
2724  INTEGER I, IB, ICTXT, ICURCOL, ICURROW, IIX, IN, IXCOL,
2725  $ IXROW, J, JB, JJX, JN, KK, LDPX, LDX, LL,
2726  $ MYCOL, MYROW, NPCOL, NPROW
2727  DOUBLE PRECISION ERR, EPS
2728 * ..
2729 * .. External Subroutines ..
2730  EXTERNAL blacs_gridinfo, dgamx2d, pb_infog2l, pderrset
2731 * ..
2732 * .. External Functions ..
2733  DOUBLE PRECISION PDLAMCH
2734  EXTERNAL pdlamch
2735 * ..
2736 * .. Intrinsic Functions ..
2737  INTRINSIC abs, max, min, mod
2738 * ..
2739 * .. Executable Statements ..
2740 *
2741  info = 0
2742  errmax = zero
2743 *
2744 * Quick return if possible
2745 *
2746  IF( n.LE.0 )
2747  $ RETURN
2748 *
2749  ictxt = descx( ctxt_ )
2750  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
2751 *
2752  eps = pdlamch( ictxt, 'eps' )
2753 *
2754  CALL pb_infog2l( ix, jx, descx, nprow, npcol, myrow, mycol, iix,
2755  $ jjx, ixrow, ixcol )
2756 *
2757  ldx = descx( m_ )
2758  ldpx = descx( lld_ )
2759  rowrep = ( ixrow.EQ.-1 )
2760  colrep = ( ixcol.EQ.-1 )
2761 *
2762  IF( n.EQ.1 ) THEN
2763 *
2764  IF( ( myrow.EQ.ixrow .OR. rowrep ) .AND.
2765  $ ( mycol.EQ.ixcol .OR. colrep ) )
2766  $ CALL pderrset( err, errmax, x( ix+(jx-1)*ldx ),
2767  $ px( iix+(jjx-1)*ldpx ) )
2768 *
2769  ELSE IF( incx.EQ.descx( m_ ) ) THEN
2770 *
2771 * sub( X ) is a row vector
2772 *
2773  jb = descx( inb_ ) - jx + 1
2774  IF( jb.LE.0 )
2775  $ jb = ( ( -jb ) / descx( nb_ ) + 1 ) * descx( nb_ ) + jb
2776  jb = min( jb, n )
2777  jn = jx + jb - 1
2778 *
2779  IF( myrow.EQ.ixrow .OR. rowrep ) THEN
2780 *
2781  icurcol = ixcol
2782  IF( mycol.EQ.icurcol .OR. colrep ) THEN
2783  DO 10 j = jx, jn
2784  CALL pderrset( err, errmax, x( ix+(j-1)*ldx ),
2785  $ px( iix+(jjx-1)*ldpx ) )
2786  jjx = jjx + 1
2787  10 CONTINUE
2788  END IF
2789  icurcol = mod( icurcol+1, npcol )
2790 *
2791  DO 30 j = jn+1, jx+n-1, descx( nb_ )
2792  jb = min( jx+n-j, descx( nb_ ) )
2793 *
2794  IF( mycol.EQ.icurcol .OR. colrep ) THEN
2795 *
2796  DO 20 kk = 0, jb-1
2797  CALL pderrset( err, errmax, x( ix+(j+kk-1)*ldx ),
2798  $ px( iix+(jjx+kk-1)*ldpx ) )
2799  20 CONTINUE
2800 *
2801  jjx = jjx + jb
2802 *
2803  END IF
2804 *
2805  icurcol = mod( icurcol+1, npcol )
2806 *
2807  30 CONTINUE
2808 *
2809  END IF
2810 *
2811  ELSE
2812 *
2813 * sub( X ) is a column vector
2814 *
2815  ib = descx( imb_ ) - ix + 1
2816  IF( ib.LE.0 )
2817  $ ib = ( ( -ib ) / descx( mb_ ) + 1 ) * descx( mb_ ) + ib
2818  ib = min( ib, n )
2819  in = ix + ib - 1
2820 *
2821  IF( mycol.EQ.ixcol .OR. colrep ) THEN
2822 *
2823  icurrow = ixrow
2824  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
2825  DO 40 i = ix, in
2826  CALL pderrset( err, errmax, x( i+(jx-1)*ldx ),
2827  $ px( iix+(jjx-1)*ldpx ) )
2828  iix = iix + 1
2829  40 CONTINUE
2830  END IF
2831  icurrow = mod( icurrow+1, nprow )
2832 *
2833  DO 60 i = in+1, ix+n-1, descx( mb_ )
2834  ib = min( ix+n-i, descx( mb_ ) )
2835 *
2836  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
2837 *
2838  DO 50 kk = 0, ib-1
2839  CALL pderrset( err, errmax, x( i+kk+(jx-1)*ldx ),
2840  $ px( iix+kk+(jjx-1)*ldpx ) )
2841  50 CONTINUE
2842 *
2843  iix = iix + ib
2844 *
2845  END IF
2846 *
2847  icurrow = mod( icurrow+1, nprow )
2848 *
2849  60 CONTINUE
2850 *
2851  END IF
2852 *
2853  END IF
2854 *
2855  CALL dgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
2856  $ -1, -1 )
2857 *
2858  IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
2859  info = 1
2860  ELSE IF( errmax.GT.eps ) THEN
2861  info = -1
2862  END IF
2863 *
2864  RETURN
2865 *
2866 * End of PDCHKVIN
2867 *
2868  END
2869  SUBROUTINE pdchkvout( N, X, PX, IX, JX, DESCX, INCX, INFO )
2871 * -- PBLAS test routine (version 2.0) --
2872 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2873 * and University of California, Berkeley.
2874 * April 1, 1998
2875 *
2876 * .. Scalar Arguments ..
2877  INTEGER INCX, INFO, IX, JX, N
2878 * ..
2879 * .. Array Arguments ..
2880  INTEGER DESCX( * )
2881  DOUBLE PRECISION PX( * ), X( * )
2882 * ..
2883 *
2884 * Purpose
2885 * =======
2886 *
2887 * PDCHKVOUT checks that the matrix PX \ sub( PX ) remained unchanged.
2888 * The local array entries are compared element by element, and their
2889 * difference is tested against 0.0 as well as the epsilon machine. No-
2890 * tice that this difference should be numerically exactly the zero ma-
2891 * chine, but because of the possible movement of some of the data we
2892 * flagged differently a difference less than twice the epsilon machine.
2893 * The largest error is reported.
2894 *
2895 * Notes
2896 * =====
2897 *
2898 * A description vector is associated with each 2D block-cyclicly dis-
2899 * tributed matrix. This vector stores the information required to
2900 * establish the mapping between a matrix entry and its corresponding
2901 * process and memory location.
2902 *
2903 * In the following comments, the character _ should be read as
2904 * "of the distributed matrix". Let A be a generic term for any 2D
2905 * block cyclicly distributed matrix. Its description vector is DESCA:
2906 *
2907 * NOTATION STORED IN EXPLANATION
2908 * ---------------- --------------- ------------------------------------
2909 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2910 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2911 * the NPROW x NPCOL BLACS process grid
2912 * A is distributed over. The context
2913 * itself is global, but the handle
2914 * (the integer value) may vary.
2915 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
2916 * ted matrix A, M_A >= 0.
2917 * N_A (global) DESCA( N_ ) The number of columns in the distri-
2918 * buted matrix A, N_A >= 0.
2919 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2920 * block of the matrix A, IMB_A > 0.
2921 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
2922 * left block of the matrix A,
2923 * INB_A > 0.
2924 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2925 * bute the last M_A-IMB_A rows of A,
2926 * MB_A > 0.
2927 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2928 * bute the last N_A-INB_A columns of
2929 * A, NB_A > 0.
2930 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2931 * row of the matrix A is distributed,
2932 * NPROW > RSRC_A >= 0.
2933 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2934 * first column of A is distributed.
2935 * NPCOL > CSRC_A >= 0.
2936 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2937 * array storing the local blocks of
2938 * the distributed matrix A,
2939 * IF( Lc( 1, N_A ) > 0 )
2940 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
2941 * ELSE
2942 * LLD_A >= 1.
2943 *
2944 * Let K be the number of rows of a matrix A starting at the global in-
2945 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2946 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2947 * receive if these K rows were distributed over NPROW processes. If K
2948 * is the number of columns of a matrix A starting at the global index
2949 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2950 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2951 * these K columns were distributed over NPCOL processes.
2952 *
2953 * The values of Lr() and Lc() may be determined via a call to the func-
2954 * tion PB_NUMROC:
2955 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2956 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2957 *
2958 * Arguments
2959 * =========
2960 *
2961 * N (global input) INTEGER
2962 * On entry, N specifies the length of the subvector operand
2963 * sub( X ). N must be at least zero.
2964 *
2965 * X (local input) DOUBLE PRECISION array
2966 * On entry, X is an array of dimension (DESCX( M_ ),*). This
2967 * array contains a local copy of the initial entire matrix PX.
2968 *
2969 * PX (local input) DOUBLE PRECISION array
2970 * On entry, PX is an array of dimension (DESCX( LLD_ ),*). This
2971 * array contains the local entries of the matrix PX.
2972 *
2973 * IX (global input) INTEGER
2974 * On entry, IX specifies X's global row index, which points to
2975 * the beginning of the submatrix sub( X ).
2976 *
2977 * JX (global input) INTEGER
2978 * On entry, JX specifies X's global column index, which points
2979 * to the beginning of the submatrix sub( X ).
2980 *
2981 * DESCX (global and local input) INTEGER array
2982 * On entry, DESCX is an integer array of dimension DLEN_. This
2983 * is the array descriptor for the matrix X.
2984 *
2985 * INCX (global input) INTEGER
2986 * On entry, INCX specifies the global increment for the
2987 * elements of X. Only two values of INCX are supported in
2988 * this version, namely 1 and M_X. INCX must not be zero.
2989 *
2990 * INFO (global output) INTEGER
2991 * On exit, if INFO = 0, no error has been found,
2992 * If INFO > 0, the maximum abolute error found is in (0,eps],
2993 * If INFO < 0, the maximum abolute error found is in (eps,+oo).
2994 *
2995 * -- Written on April 1, 1998 by
2996 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2997 *
2998 * =====================================================================
2999 *
3000 * .. Parameters ..
3001  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
3002  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
3003  $ RSRC_
3004  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
3005  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
3006  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
3007  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
3008  DOUBLE PRECISION ZERO
3009  PARAMETER ( ZERO = 0.0d+0 )
3010 * ..
3011 * .. Local Scalars ..
3012  LOGICAL COLREP, ROWREP
3013  INTEGER I, IB, ICTXT, ICURCOL, ICURROW, II, IMBX, INBX,
3014  $ J, JB, JJ, KK, LDPX, LDX, LL, MBX, MPALL,
3015  $ MYCOL, MYCOLDIST, MYROW, MYROWDIST, NBX, NPCOL,
3016  $ nprow, nqall
3017  DOUBLE PRECISION EPS, ERR, ERRMAX
3018 * ..
3019 * .. External Subroutines ..
3020  EXTERNAL BLACS_GRIDINFO, DGAMX2D, PDERRSET
3021 * ..
3022 * .. External Functions ..
3023  INTEGER PB_NUMROC
3024  DOUBLE PRECISION PDLAMCH
3025  EXTERNAL PDLAMCH, PB_NUMROC
3026 * ..
3027 * .. Intrinsic Functions ..
3028  INTRINSIC abs, max, min, mod
3029 * ..
3030 * .. Executable Statements ..
3031 *
3032  info = 0
3033  errmax = zero
3034 *
3035 * Quick return if possible
3036 *
3037  IF( ( descx( m_ ).LE.0 ).OR.( descx( n_ ).LE.0 ) )
3038  $ RETURN
3039 *
3040 * Start the operations
3041 *
3042  ictxt = descx( ctxt_ )
3043  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
3044 *
3045  eps = pdlamch( ictxt, 'eps' )
3046 *
3047  mpall = pb_numroc( descx( m_ ), 1, descx( imb_ ), descx( mb_ ),
3048  $ myrow, descx( rsrc_ ), nprow )
3049  nqall = pb_numroc( descx( n_ ), 1, descx( inb_ ), descx( nb_ ),
3050  $ mycol, descx( csrc_ ), npcol )
3051 *
3052  mbx = descx( mb_ )
3053  nbx = descx( nb_ )
3054  ldx = descx( m_ )
3055  ldpx = descx( lld_ )
3056  icurrow = descx( rsrc_ )
3057  icurcol = descx( csrc_ )
3058  rowrep = ( icurrow.EQ.-1 )
3059  colrep = ( icurcol.EQ.-1 )
3060  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3061  imbx = descx( imb_ )
3062  ELSE
3063  imbx = mbx
3064  END IF
3065  IF( mycol.EQ.icurcol .OR. colrep ) THEN
3066  inbx = descx( inb_ )
3067  ELSE
3068  inbx = nbx
3069  END IF
3070  IF( rowrep ) THEN
3071  myrowdist = 0
3072  ELSE
3073  myrowdist = mod( myrow - icurrow + nprow, nprow )
3074  END IF
3075  IF( colrep ) THEN
3076  mycoldist = 0
3077  ELSE
3078  mycoldist = mod( mycol - icurcol + npcol, npcol )
3079  END IF
3080  ii = 1
3081  jj = 1
3082 *
3083  IF( incx.EQ.descx( m_ ) ) THEN
3084 *
3085 * sub( X ) is a row vector
3086 *
3087  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3088 *
3089  i = 1
3090  IF( mycoldist.EQ.0 ) THEN
3091  j = 1
3092  ELSE
3093  j = descx( inb_ ) + ( mycoldist - 1 ) * nbx + 1
3094  END IF
3095  jb = min( max( 0, descx( n_ ) - j + 1 ), inbx )
3096  ib = min( descx( m_ ), descx( imb_ ) )
3097 *
3098  DO 20 kk = 0, jb-1
3099  DO 10 ll = 0, ib-1
3100  IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR. j+kk.GT.jx+n-1 )
3101  $ CALL pderrset( err, errmax,
3102  $ x( i+ll+(j+kk-1)*ldx ),
3103  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3104  10 CONTINUE
3105  20 CONTINUE
3106  IF( colrep ) THEN
3107  j = j + inbx
3108  ELSE
3109  j = j + inbx + ( npcol - 1 ) * nbx
3110  END IF
3111 *
3112  DO 50 jj = inbx+1, nqall, nbx
3113  jb = min( nqall-jj+1, nbx )
3114 *
3115  DO 40 kk = 0, jb-1
3116  DO 30 ll = 0, ib-1
3117  IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR.
3118  $ j+kk.GT.jx+n-1 )
3119  $ CALL pderrset( err, errmax,
3120  $ x( i+ll+(j+kk-1)*ldx ),
3121  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3122  30 CONTINUE
3123  40 CONTINUE
3124 *
3125  IF( colrep ) THEN
3126  j = j + nbx
3127  ELSE
3128  j = j + npcol * nbx
3129  END IF
3130 *
3131  50 CONTINUE
3132 *
3133  ii = ii + ib
3134 *
3135  END IF
3136 *
3137  icurrow = mod( icurrow + 1, nprow )
3138 *
3139  DO 110 i = descx( imb_ ) + 1, descx( m_ ), mbx
3140  ib = min( descx( m_ ) - i + 1, mbx )
3141 *
3142  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3143 *
3144  IF( mycoldist.EQ.0 ) THEN
3145  j = 1
3146  ELSE
3147  j = descx( inb_ ) + ( mycoldist - 1 ) * nbx + 1
3148  END IF
3149 *
3150  jj = 1
3151  jb = min( max( 0, descx( n_ ) - j + 1 ), inbx )
3152  DO 70 kk = 0, jb-1
3153  DO 60 ll = 0, ib-1
3154  IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR.
3155  $ j+kk.GT.jx+n-1 )
3156  $ CALL pderrset( err, errmax,
3157  $ x( i+ll+(j+kk-1)*ldx ),
3158  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3159  60 CONTINUE
3160  70 CONTINUE
3161  IF( colrep ) THEN
3162  j = j + inbx
3163  ELSE
3164  j = j + inbx + ( npcol - 1 ) * nbx
3165  END IF
3166 *
3167  DO 100 jj = inbx+1, nqall, nbx
3168  jb = min( nqall-jj+1, nbx )
3169 *
3170  DO 90 kk = 0, jb-1
3171  DO 80 ll = 0, ib-1
3172  IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR.
3173  $ j+kk.GT.jx+n-1 )
3174  $ CALL pderrset( err, errmax,
3175  $ x( i+ll+(j+kk-1)*ldx ),
3176  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3177  80 CONTINUE
3178  90 CONTINUE
3179 *
3180  IF( colrep ) THEN
3181  j = j + nbx
3182  ELSE
3183  j = j + npcol * nbx
3184  END IF
3185 *
3186  100 CONTINUE
3187 *
3188  ii = ii + ib
3189 *
3190  END IF
3191 *
3192  icurrow = mod( icurrow + 1, nprow )
3193 *
3194  110 CONTINUE
3195 *
3196  ELSE
3197 *
3198 * sub( X ) is a column vector
3199 *
3200  IF( mycol.EQ.icurcol .OR. colrep ) THEN
3201 *
3202  j = 1
3203  IF( myrowdist.EQ.0 ) THEN
3204  i = 1
3205  ELSE
3206  i = descx( imb_ ) + ( myrowdist - 1 ) * mbx + 1
3207  END IF
3208  ib = min( max( 0, descx( m_ ) - i + 1 ), imbx )
3209  jb = min( descx( n_ ), descx( inb_ ) )
3210 *
3211  DO 130 kk = 0, jb-1
3212  DO 120 ll = 0, ib-1
3213  IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR. i+ll.GT.ix+n-1 )
3214  $ CALL pderrset( err, errmax,
3215  $ x( i+ll+(j+kk-1)*ldx ),
3216  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3217  120 CONTINUE
3218  130 CONTINUE
3219  IF( rowrep ) THEN
3220  i = i + imbx
3221  ELSE
3222  i = i + imbx + ( nprow - 1 ) * mbx
3223  END IF
3224 *
3225  DO 160 ii = imbx+1, mpall, mbx
3226  ib = min( mpall-ii+1, mbx )
3227 *
3228  DO 150 kk = 0, jb-1
3229  DO 140 ll = 0, ib-1
3230  IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR.
3231  $ i+ll.GT.ix+n-1 )
3232  $ CALL pderrset( err, errmax,
3233  $ x( i+ll+(j+kk-1)*ldx ),
3234  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3235  140 CONTINUE
3236  150 CONTINUE
3237 *
3238  IF( rowrep ) THEN
3239  i = i + mbx
3240  ELSE
3241  i = i + nprow * mbx
3242  END IF
3243 *
3244  160 CONTINUE
3245 *
3246  jj = jj + jb
3247 *
3248  END IF
3249 *
3250  icurcol = mod( icurcol + 1, npcol )
3251 *
3252  DO 220 j = descx( inb_ ) + 1, descx( n_ ), nbx
3253  jb = min( descx( n_ ) - j + 1, nbx )
3254 *
3255  IF( mycol.EQ.icurcol .OR. colrep ) THEN
3256 *
3257  IF( myrowdist.EQ.0 ) THEN
3258  i = 1
3259  ELSE
3260  i = descx( imb_ ) + ( myrowdist - 1 ) * mbx + 1
3261  END IF
3262 *
3263  ii = 1
3264  ib = min( max( 0, descx( m_ ) - i + 1 ), imbx )
3265  DO 180 kk = 0, jb-1
3266  DO 170 ll = 0, ib-1
3267  IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR.
3268  $ i+ll.GT.ix+n-1 )
3269  $ CALL pderrset( err, errmax,
3270  $ x( i+ll+(j+kk-1)*ldx ),
3271  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3272  170 CONTINUE
3273  180 CONTINUE
3274  IF( rowrep ) THEN
3275  i = i + imbx
3276  ELSE
3277  i = i + imbx + ( nprow - 1 ) * mbx
3278  END IF
3279 *
3280  DO 210 ii = imbx+1, mpall, mbx
3281  ib = min( mpall-ii+1, mbx )
3282 *
3283  DO 200 kk = 0, jb-1
3284  DO 190 ll = 0, ib-1
3285  IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR.
3286  $ i+ll.GT.ix+n-1 )
3287  $ CALL pderrset( err, errmax,
3288  $ x( i+ll+(j+kk-1)*ldx ),
3289  $ px( ii+ll+(jj+kk-1)*ldpx ) )
3290  190 CONTINUE
3291  200 CONTINUE
3292 *
3293  IF( rowrep ) THEN
3294  i = i + mbx
3295  ELSE
3296  i = i + nprow * mbx
3297  END IF
3298 *
3299  210 CONTINUE
3300 *
3301  jj = jj + jb
3302 *
3303  END IF
3304 *
3305  icurcol = mod( icurcol + 1, npcol )
3306 *
3307  220 CONTINUE
3308 *
3309  END IF
3310 *
3311  CALL dgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
3312  $ -1, -1 )
3313 *
3314  IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
3315  info = 1
3316  ELSE IF( errmax.GT.eps ) THEN
3317  info = -1
3318  END IF
3319 *
3320  RETURN
3321 *
3322 * End of PDCHKVOUT
3323 *
3324  END
3325  SUBROUTINE pdchkmin( ERRMAX, M, N, A, PA, IA, JA, DESCA, INFO )
3327 * -- PBLAS test routine (version 2.0) --
3328 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
3329 * and University of California, Berkeley.
3330 * April 1, 1998
3331 *
3332 * .. Scalar Arguments ..
3333  INTEGER IA, INFO, JA, M, N
3334  DOUBLE PRECISION ERRMAX
3335 * ..
3336 * .. Array Arguments ..
3337  INTEGER DESCA( * )
3338  DOUBLE PRECISION PA( * ), A( * )
3339 * ..
3340 *
3341 * Purpose
3342 * =======
3343 *
3344 * PDCHKMIN checks that the submatrix sub( PA ) remained unchanged. The
3345 * local array entries are compared element by element, and their dif-
3346 * ference is tested against 0.0 as well as the epsilon machine. Notice
3347 * that this difference should be numerically exactly the zero machine,
3348 * but because of the possible fluctuation of some of the data we flag-
3349 * ged differently a difference less than twice the epsilon machine. The
3350 * largest error is also returned.
3351 *
3352 * Notes
3353 * =====
3354 *
3355 * A description vector is associated with each 2D block-cyclicly dis-
3356 * tributed matrix. This vector stores the information required to
3357 * establish the mapping between a matrix entry and its corresponding
3358 * process and memory location.
3359 *
3360 * In the following comments, the character _ should be read as
3361 * "of the distributed matrix". Let A be a generic term for any 2D
3362 * block cyclicly distributed matrix. Its description vector is DESCA:
3363 *
3364 * NOTATION STORED IN EXPLANATION
3365 * ---------------- --------------- ------------------------------------
3366 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
3367 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
3368 * the NPROW x NPCOL BLACS process grid
3369 * A is distributed over. The context
3370 * itself is global, but the handle
3371 * (the integer value) may vary.
3372 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
3373 * ted matrix A, M_A >= 0.
3374 * N_A (global) DESCA( N_ ) The number of columns in the distri-
3375 * buted matrix A, N_A >= 0.
3376 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
3377 * block of the matrix A, IMB_A > 0.
3378 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
3379 * left block of the matrix A,
3380 * INB_A > 0.
3381 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
3382 * bute the last M_A-IMB_A rows of A,
3383 * MB_A > 0.
3384 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
3385 * bute the last N_A-INB_A columns of
3386 * A, NB_A > 0.
3387 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
3388 * row of the matrix A is distributed,
3389 * NPROW > RSRC_A >= 0.
3390 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
3391 * first column of A is distributed.
3392 * NPCOL > CSRC_A >= 0.
3393 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
3394 * array storing the local blocks of
3395 * the distributed matrix A,
3396 * IF( Lc( 1, N_A ) > 0 )
3397 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
3398 * ELSE
3399 * LLD_A >= 1.
3400 *
3401 * Let K be the number of rows of a matrix A starting at the global in-
3402 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
3403 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
3404 * receive if these K rows were distributed over NPROW processes. If K
3405 * is the number of columns of a matrix A starting at the global index
3406 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
3407 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
3408 * these K columns were distributed over NPCOL processes.
3409 *
3410 * The values of Lr() and Lc() may be determined via a call to the func-
3411 * tion PB_NUMROC:
3412 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
3413 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
3414 *
3415 * Arguments
3416 * =========
3417 *
3418 * ERRMAX (global output) DOUBLE PRECISION
3419 * On exit, ERRMAX specifies the largest absolute element-wise
3420 * difference between sub( A ) and sub( PA ).
3421 *
3422 * M (global input) INTEGER
3423 * On entry, M specifies the number of rows of the submatrix
3424 * operand sub( A ). M must be at least zero.
3425 *
3426 * N (global input) INTEGER
3427 * On entry, N specifies the number of columns of the submatrix
3428 * operand sub( A ). N must be at least zero.
3429 *
3430 * A (local input) DOUBLE PRECISION array
3431 * On entry, A is an array of dimension (DESCA( M_ ),*). This
3432 * array contains a local copy of the initial entire matrix PA.
3433 *
3434 * PA (local input) DOUBLE PRECISION array
3435 * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
3436 * array contains the local entries of the matrix PA.
3437 *
3438 * IA (global input) INTEGER
3439 * On entry, IA specifies A's global row index, which points to
3440 * the beginning of the submatrix sub( A ).
3441 *
3442 * JA (global input) INTEGER
3443 * On entry, JA specifies A's global column index, which points
3444 * to the beginning of the submatrix sub( A ).
3445 *
3446 * DESCA (global and local input) INTEGER array
3447 * On entry, DESCA is an integer array of dimension DLEN_. This
3448 * is the array descriptor for the matrix A.
3449 *
3450 * INFO (global output) INTEGER
3451 * On exit, if INFO = 0, no error has been found,
3452 * If INFO > 0, the maximum abolute error found is in (0,eps],
3453 * If INFO < 0, the maximum abolute error found is in (eps,+oo).
3454 *
3455 * -- Written on April 1, 1998 by
3456 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
3457 *
3458 * =====================================================================
3459 *
3460 * .. Parameters ..
3461  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
3462  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
3463  $ RSRC_
3464  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
3465  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
3466  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
3467  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
3468  DOUBLE PRECISION ZERO
3469  PARAMETER ( ZERO = 0.0d+0 )
3470 * ..
3471 * .. Local Scalars ..
3472  LOGICAL COLREP, ROWREP
3473  INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL,
3474  $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K,
3475  $ KK, LDA, LDPA, LL, MYCOL, MYROW, NPCOL, NPROW
3476  DOUBLE PRECISION ERR, EPS
3477 * ..
3478 * .. External Subroutines ..
3479  EXTERNAL blacs_gridinfo, dgamx2d, pb_infog2l, pderrset
3480 * ..
3481 * .. External Functions ..
3482  DOUBLE PRECISION PDLAMCH
3483  EXTERNAL pdlamch
3484 * ..
3485 * .. Intrinsic Functions ..
3486  INTRINSIC abs, max, min, mod
3487 * ..
3488 * .. Executable Statements ..
3489 *
3490  info = 0
3491  errmax = zero
3492 *
3493 * Quick return if posssible
3494 *
3495  IF( ( m.EQ.0 ).OR.( n.EQ.0 ) )
3496  $ RETURN
3497 *
3498 * Start the operations
3499 *
3500  ictxt = desca( ctxt_ )
3501  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
3502 *
3503  eps = pdlamch( ictxt, 'eps' )
3504 *
3505  CALL pb_infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia,
3506  $ jja, iarow, iacol )
3507 *
3508  ii = iia
3509  jj = jja
3510  lda = desca( m_ )
3511  ldpa = desca( lld_ )
3512  icurrow = iarow
3513  icurcol = iacol
3514  rowrep = ( iarow.EQ.-1 )
3515  colrep = ( iacol.EQ.-1 )
3516 *
3517 * Handle the first block of column separately
3518 *
3519  jb = desca( inb_ ) - ja + 1
3520  IF( jb.LE.0 )
3521  $ jb = ( ( -jb ) / desca( nb_ ) + 1 ) * desca( nb_ ) + jb
3522  jb = min( jb, n )
3523  jn = ja + jb - 1
3524 *
3525  IF( mycol.EQ.icurcol .OR. colrep ) THEN
3526 *
3527  DO 40 h = 0, jb-1
3528  ib = desca( imb_ ) - ia + 1
3529  IF( ib.LE.0 )
3530  $ ib = ( ( -ib ) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
3531  ib = min( ib, m )
3532  in = ia + ib - 1
3533  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3534  DO 10 k = 0, ib-1
3535  CALL pderrset( err, errmax, a( ia+k+(ja+h-1)*lda ),
3536  $ pa( ii+k+(jj+h-1)*ldpa ) )
3537  10 CONTINUE
3538  ii = ii + ib
3539  END IF
3540  icurrow = mod( icurrow+1, nprow )
3541 *
3542 * Loop over remaining block of rows
3543 *
3544  DO 30 i = in+1, ia+m-1, desca( mb_ )
3545  ib = min( desca( mb_ ), ia+m-i )
3546  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3547  DO 20 k = 0, ib-1
3548  CALL pderrset( err, errmax, a( i+k+(ja+h-1)*lda ),
3549  $ pa( ii+k+(jj+h-1)*ldpa ) )
3550  20 CONTINUE
3551  ii = ii + ib
3552  END IF
3553  icurrow = mod( icurrow+1, nprow )
3554  30 CONTINUE
3555 *
3556  ii = iia
3557  icurrow = iarow
3558  40 CONTINUE
3559 *
3560  jj = jj + jb
3561 *
3562  END IF
3563 *
3564  icurcol = mod( icurcol+1, npcol )
3565 *
3566 * Loop over remaining column blocks
3567 *
3568  DO 90 j = jn+1, ja+n-1, desca( nb_ )
3569  jb = min( desca( nb_ ), ja+n-j )
3570  IF( mycol.EQ.icurcol .OR. colrep ) THEN
3571  DO 80 h = 0, jb-1
3572  ib = desca( imb_ ) - ia + 1
3573  IF( ib.LE.0 )
3574  $ ib = ( ( -ib ) / desca( mb_ ) + 1 )*desca( mb_ ) + ib
3575  ib = min( ib, m )
3576  in = ia + ib - 1
3577  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3578  DO 50 k = 0, ib-1
3579  CALL pderrset( err, errmax, a( ia+k+(j+h-1)*lda ),
3580  $ pa( ii+k+(jj+h-1)*ldpa ) )
3581  50 CONTINUE
3582  ii = ii + ib
3583  END IF
3584  icurrow = mod( icurrow+1, nprow )
3585 *
3586 * Loop over remaining block of rows
3587 *
3588  DO 70 i = in+1, ia+m-1, desca( mb_ )
3589  ib = min( desca( mb_ ), ia+m-i )
3590  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3591  DO 60 k = 0, ib-1
3592  CALL pderrset( err, errmax,
3593  $ a( i+k+(j+h-1)*lda ),
3594  $ pa( ii+k+(jj+h-1)*ldpa ) )
3595  60 CONTINUE
3596  ii = ii + ib
3597  END IF
3598  icurrow = mod( icurrow+1, nprow )
3599  70 CONTINUE
3600 *
3601  ii = iia
3602  icurrow = iarow
3603  80 CONTINUE
3604 *
3605  jj = jj + jb
3606  END IF
3607 *
3608  icurcol = mod( icurcol+1, npcol )
3609 *
3610  90 CONTINUE
3611 *
3612  CALL dgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
3613  $ -1, -1 )
3614 *
3615  IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
3616  info = 1
3617  ELSE IF( errmax.GT.eps ) THEN
3618  info = -1
3619  END IF
3620 *
3621  RETURN
3622 *
3623 * End of PDCHKMIN
3624 *
3625  END
3626  SUBROUTINE pdchkmout( M, N, A, PA, IA, JA, DESCA, INFO )
3628 * -- PBLAS test routine (version 2.0) --
3629 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
3630 * and University of California, Berkeley.
3631 * April 1, 1998
3632 *
3633 * .. Scalar Arguments ..
3634  INTEGER IA, INFO, JA, M, N
3635 * ..
3636 * .. Array Arguments ..
3637  INTEGER DESCA( * )
3638  DOUBLE PRECISION A( * ), PA( * )
3639 * ..
3640 *
3641 * Purpose
3642 * =======
3643 *
3644 * PDCHKMOUT checks that the matrix PA \ sub( PA ) remained unchanged.
3645 * The local array entries are compared element by element, and their
3646 * difference is tested against 0.0 as well as the epsilon machine. No-
3647 * tice that this difference should be numerically exactly the zero ma-
3648 * chine, but because of the possible movement of some of the data we
3649 * flagged differently a difference less than twice the epsilon machine.
3650 * The largest error is reported.
3651 *
3652 * Notes
3653 * =====
3654 *
3655 * A description vector is associated with each 2D block-cyclicly dis-
3656 * tributed matrix. This vector stores the information required to
3657 * establish the mapping between a matrix entry and its corresponding
3658 * process and memory location.
3659 *
3660 * In the following comments, the character _ should be read as
3661 * "of the distributed matrix". Let A be a generic term for any 2D
3662 * block cyclicly distributed matrix. Its description vector is DESCA:
3663 *
3664 * NOTATION STORED IN EXPLANATION
3665 * ---------------- --------------- ------------------------------------
3666 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
3667 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
3668 * the NPROW x NPCOL BLACS process grid
3669 * A is distributed over. The context
3670 * itself is global, but the handle
3671 * (the integer value) may vary.
3672 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
3673 * ted matrix A, M_A >= 0.
3674 * N_A (global) DESCA( N_ ) The number of columns in the distri-
3675 * buted matrix A, N_A >= 0.
3676 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
3677 * block of the matrix A, IMB_A > 0.
3678 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
3679 * left block of the matrix A,
3680 * INB_A > 0.
3681 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
3682 * bute the last M_A-IMB_A rows of A,
3683 * MB_A > 0.
3684 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
3685 * bute the last N_A-INB_A columns of
3686 * A, NB_A > 0.
3687 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
3688 * row of the matrix A is distributed,
3689 * NPROW > RSRC_A >= 0.
3690 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
3691 * first column of A is distributed.
3692 * NPCOL > CSRC_A >= 0.
3693 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
3694 * array storing the local blocks of
3695 * the distributed matrix A,
3696 * IF( Lc( 1, N_A ) > 0 )
3697 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
3698 * ELSE
3699 * LLD_A >= 1.
3700 *
3701 * Let K be the number of rows of a matrix A starting at the global in-
3702 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
3703 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
3704 * receive if these K rows were distributed over NPROW processes. If K
3705 * is the number of columns of a matrix A starting at the global index
3706 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
3707 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
3708 * these K columns were distributed over NPCOL processes.
3709 *
3710 * The values of Lr() and Lc() may be determined via a call to the func-
3711 * tion PB_NUMROC:
3712 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
3713 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
3714 *
3715 * Arguments
3716 * =========
3717 *
3718 * M (global input) INTEGER
3719 * On entry, M specifies the number of rows of the submatrix
3720 * sub( PA ). M must be at least zero.
3721 *
3722 * N (global input) INTEGER
3723 * On entry, N specifies the number of columns of the submatrix
3724 * sub( PA ). N must be at least zero.
3725 *
3726 * A (local input) DOUBLE PRECISION array
3727 * On entry, A is an array of dimension (DESCA( M_ ),*). This
3728 * array contains a local copy of the initial entire matrix PA.
3729 *
3730 * PA (local input) DOUBLE PRECISION array
3731 * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
3732 * array contains the local entries of the matrix PA.
3733 *
3734 * IA (global input) INTEGER
3735 * On entry, IA specifies A's global row index, which points to
3736 * the beginning of the submatrix sub( A ).
3737 *
3738 * JA (global input) INTEGER
3739 * On entry, JA specifies A's global column index, which points
3740 * to the beginning of the submatrix sub( A ).
3741 *
3742 * DESCA (global and local input) INTEGER array
3743 * On entry, DESCA is an integer array of dimension DLEN_. This
3744 * is the array descriptor for the matrix A.
3745 *
3746 * INFO (global output) INTEGER
3747 * On exit, if INFO = 0, no error has been found,
3748 * If INFO > 0, the maximum abolute error found is in (0,eps],
3749 * If INFO < 0, the maximum abolute error found is in (eps,+oo).
3750 *
3751 * -- Written on April 1, 1998 by
3752 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
3753 *
3754 * =====================================================================
3755 *
3756 * .. Parameters ..
3757  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
3758  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
3759  $ RSRC_
3760  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
3761  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
3762  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
3763  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
3764  DOUBLE PRECISION ZERO
3765  PARAMETER ( ZERO = 0.0d+0 )
3766 * ..
3767 * .. Local Scalars ..
3768  LOGICAL COLREP, ROWREP
3769  INTEGER I, IB, ICTXT, ICURCOL, II, IMBA, J, JB, JJ, KK,
3770  $ LDA, LDPA, LL, MPALL, MYCOL, MYROW, MYROWDIST,
3771  $ NPCOL, NPROW
3772  DOUBLE PRECISION EPS, ERR, ERRMAX
3773 * ..
3774 * .. External Subroutines ..
3775  EXTERNAL blacs_gridinfo, dgamx2d, pderrset
3776 * ..
3777 * .. External Functions ..
3778  INTEGER PB_NUMROC
3779  DOUBLE PRECISION PDLAMCH
3780  EXTERNAL PDLAMCH, PB_NUMROC
3781 * ..
3782 * .. Intrinsic Functions ..
3783  INTRINSIC max, min, mod
3784 * ..
3785 * .. Executable Statements ..
3786 *
3787  info = 0
3788  errmax = zero
3789 *
3790 * Quick return if possible
3791 *
3792  IF( ( desca( m_ ).LE.0 ).OR.( desca( n_ ).LE.0 ) )
3793  $ RETURN
3794 *
3795 * Start the operations
3796 *
3797  ictxt = desca( ctxt_ )
3798  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
3799 *
3800  eps = pdlamch( ictxt, 'eps' )
3801 *
3802  mpall = pb_numroc( desca( m_ ), 1, desca( imb_ ), desca( mb_ ),
3803  $ myrow, desca( rsrc_ ), nprow )
3804 *
3805  lda = desca( m_ )
3806  ldpa = desca( lld_ )
3807 *
3808  ii = 1
3809  jj = 1
3810  rowrep = ( desca( rsrc_ ).EQ.-1 )
3811  colrep = ( desca( csrc_ ).EQ.-1 )
3812  icurcol = desca( csrc_ )
3813  IF( myrow.EQ.desca( rsrc_ ) .OR. rowrep ) THEN
3814  imba = desca( imb_ )
3815  ELSE
3816  imba = desca( mb_ )
3817  END IF
3818  IF( rowrep ) THEN
3819  myrowdist = 0
3820  ELSE
3821  myrowdist = mod( myrow - desca( rsrc_ ) + nprow, nprow )
3822  END IF
3823 *
3824  IF( mycol.EQ.icurcol .OR. colrep ) THEN
3825 *
3826  j = 1
3827  IF( myrowdist.EQ.0 ) THEN
3828  i = 1
3829  ELSE
3830  i = desca( imb_ ) + ( myrowdist - 1 ) * desca( mb_ ) + 1
3831  END IF
3832  ib = min( max( 0, desca( m_ ) - i + 1 ), imba )
3833  jb = min( desca( n_ ), desca( inb_ ) )
3834 *
3835  DO 20 kk = 0, jb-1
3836  DO 10 ll = 0, ib-1
3837  IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3838  $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3839  $ CALL pderrset( err, errmax, a( i+ll+(j+kk-1)*lda ),
3840  $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3841  10 CONTINUE
3842  20 CONTINUE
3843  IF( rowrep ) THEN
3844  i = i + imba
3845  ELSE
3846  i = i + imba + ( nprow - 1 ) * desca( mb_ )
3847  END IF
3848 *
3849  DO 50 ii = imba + 1, mpall, desca( mb_ )
3850  ib = min( mpall-ii+1, desca( mb_ ) )
3851 *
3852  DO 40 kk = 0, jb-1
3853  DO 30 ll = 0, ib-1
3854  IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3855  $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3856  $ CALL pderrset( err, errmax,
3857  $ a( i+ll+(j+kk-1)*lda ),
3858  $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3859  30 CONTINUE
3860  40 CONTINUE
3861 *
3862  IF( rowrep ) THEN
3863  i = i + desca( mb_ )
3864  ELSE
3865  i = i + nprow * desca( mb_ )
3866  END IF
3867 *
3868  50 CONTINUE
3869 *
3870  jj = jj + jb
3871 *
3872  END IF
3873 *
3874  icurcol = mod( icurcol + 1, npcol )
3875 *
3876  DO 110 j = desca( inb_ ) + 1, desca( n_ ), desca( nb_ )
3877  jb = min( desca( n_ ) - j + 1, desca( nb_ ) )
3878 *
3879  IF( mycol.EQ.icurcol .OR. colrep ) THEN
3880 *
3881  IF( myrowdist.EQ.0 ) THEN
3882  i = 1
3883  ELSE
3884  i = desca( imb_ ) + ( myrowdist - 1 ) * desca( mb_ ) + 1
3885  END IF
3886 *
3887  ii = 1
3888  ib = min( max( 0, desca( m_ ) - i + 1 ), imba )
3889  DO 70 kk = 0, jb-1
3890  DO 60 ll = 0, ib-1
3891  IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3892  $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3893  $ CALL pderrset( err, errmax,
3894  $ a( i+ll+(j+kk-1)*lda ),
3895  $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3896  60 CONTINUE
3897  70 CONTINUE
3898  IF( rowrep ) THEN
3899  i = i + imba
3900  ELSE
3901  i = i + imba + ( nprow - 1 ) * desca( mb_ )
3902  END IF
3903 *
3904  DO 100 ii = imba+1, mpall, desca( mb_ )
3905  ib = min( mpall-ii+1, desca( mb_ ) )
3906 *
3907  DO 90 kk = 0, jb-1
3908  DO 80 ll = 0, ib-1
3909  IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3910  $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3911  $ CALL pderrset( err, errmax,
3912  $ a( i+ll+(j+kk-1)*lda ),
3913  $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3914  80 CONTINUE
3915  90 CONTINUE
3916 *
3917  IF( rowrep ) THEN
3918  i = i + desca( mb_ )
3919  ELSE
3920  i = i + nprow * desca( mb_ )
3921  END IF
3922 *
3923  100 CONTINUE
3924 *
3925  jj = jj + jb
3926 *
3927  END IF
3928 *
3929  icurcol = mod( icurcol + 1, npcol )
3930 * INSERT MODE
3931  110 CONTINUE
3932 *
3933  CALL dgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
3934  $ -1, -1 )
3935 *
3936  IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
3937  info = 1
3938  ELSE IF( errmax.GT.eps ) THEN
3939  info = -1
3940  END IF
3941 *
3942  RETURN
3943 *
3944 * End of PDCHKMOUT
3945 *
3946  END
3947  SUBROUTINE pdmprnt( ICTXT, NOUT, M, N, A, LDA, IRPRNT, ICPRNT,
3948  $ CMATNM )
3950 * -- PBLAS test routine (version 2.0) --
3951 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
3952 * and University of California, Berkeley.
3953 * April 1, 1998
3954 *
3955 * .. Scalar Arguments ..
3956  INTEGER ICPRNT, ICTXT, IRPRNT, LDA, M, N, NOUT
3957 * ..
3958 * .. Array Arguments ..
3959  CHARACTER*(*) CMATNM
3960  DOUBLE PRECISION A( LDA, * )
3961 * ..
3962 *
3963 * Purpose
3964 * =======
3965 *
3966 * PDMPRNT prints to the standard output an array A of size m by n. Only
3967 * the process of coordinates ( IRPRNT, ICPRNT ) is printing.
3968 *
3969 * Arguments
3970 * =========
3971 *
3972 * ICTXT (local input) INTEGER
3973 * On entry, ICTXT specifies the BLACS context handle, indica-
3974 * ting the global context of the operation. The context itself
3975 * is global, but the value of ICTXT is local.
3976 *
3977 * NOUT (global input) INTEGER
3978 * On entry, NOUT specifies the unit number for the output file.
3979 * When NOUT is 6, output to screen, when NOUT is 0, output to
3980 * stderr. NOUT is only defined for process 0.
3981 *
3982 * M (global input) INTEGER
3983 * On entry, M specifies the number of rows of the matrix A. M
3984 * must be at least zero.
3985 *
3986 * N (global input) INTEGER
3987 * On entry, N specifies the number of columns of the matrix A.
3988 * N must be at least zero.
3989 *
3990 * A (local input) DOUBLE PRECISION array
3991 * On entry, A is an array of dimension (LDA,N). The leading m
3992 * by n part of this array is printed.
3993 *
3994 * LDA (local input) INTEGER
3995 * On entry, LDA specifies the leading dimension of the local
3996 * array A to be printed. LDA must be at least MAX( 1, M ).
3997 *
3998 * IRPRNT (global input) INTEGER
3999 * On entry, IRPRNT specifies the process row coordinate of the
4000 * printing process.
4001 *
4002 * ICPRNT (global input) INTEGER
4003 * On entry, ICPRNT specifies the process column coordinate of
4004 * the printing process.
4005 *
4006 * CMATNM (global input) CHARACTER*(*)
4007 * On entry, CMATNM specifies the identifier of the matrix to be
4008 * printed.
4009 *
4010 * -- Written on April 1, 1998 by
4011 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4012 *
4013 * =====================================================================
4014 *
4015 * .. Local Scalars ..
4016  INTEGER I, J, MYCOL, MYROW, NPCOL, NPROW
4017 * ..
4018 * .. External Subroutines ..
4019  EXTERNAL BLACS_GRIDINFO
4020 * ..
4021 * .. Executable Statements ..
4022 *
4023 * Quick return if possible
4024 *
4025  IF( ( m.LE.0 ).OR.( n.LE.0 ) )
4026  $ RETURN
4027 *
4028 * Get grid parameters
4029 *
4030  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4031 *
4032  IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
4033 *
4034  WRITE( nout, fmt = * )
4035  DO 20 j = 1, n
4036 *
4037  DO 10 i = 1, m
4038 *
4039  WRITE( nout, fmt = 9999 ) cmatnm, i, j, a( i, j )
4040 *
4041  10 CONTINUE
4042 *
4043  20 CONTINUE
4044 *
4045  END IF
4046 *
4047  9999 FORMAT( 1x, a, '(', i6, ',', i6, ')=', d30.18 )
4048 *
4049  RETURN
4050 *
4051 * End of PDMPRNT
4052 *
4053  END
4054  SUBROUTINE pdvprnt( ICTXT, NOUT, N, X, INCX, IRPRNT, ICPRNT,
4055  $ CVECNM )
4057 * -- PBLAS test routine (version 2.0) --
4058 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4059 * and University of California, Berkeley.
4060 * April 1, 1998
4061 *
4062 * .. Scalar Arguments ..
4063  INTEGER ICPRNT, ICTXT, INCX, IRPRNT, N, NOUT
4064 * ..
4065 * .. Array Arguments ..
4066  CHARACTER*(*) CVECNM
4067  DOUBLE PRECISION X( * )
4068 * ..
4069 *
4070 * Purpose
4071 * =======
4072 *
4073 * PDVPRNT prints to the standard output an vector x of length n. Only
4074 * the process of coordinates ( IRPRNT, ICPRNT ) is printing.
4075 *
4076 * Arguments
4077 * =========
4078 *
4079 * ICTXT (local input) INTEGER
4080 * On entry, ICTXT specifies the BLACS context handle, indica-
4081 * ting the global context of the operation. The context itself
4082 * is global, but the value of ICTXT is local.
4083 *
4084 * NOUT (global input) INTEGER
4085 * On entry, NOUT specifies the unit number for the output file.
4086 * When NOUT is 6, output to screen, when NOUT is 0, output to
4087 * stderr. NOUT is only defined for process 0.
4088 *
4089 * N (global input) INTEGER
4090 * On entry, N specifies the length of the vector X. N must be
4091 * at least zero.
4092 *
4093 * X (global input) DOUBLE PRECISION array
4094 * On entry, X is an array of dimension at least
4095 * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen-
4096 * ted array X must contain the vector x.
4097 *
4098 * INCX (global input) INTEGER.
4099 * On entry, INCX specifies the increment for the elements of X.
4100 * INCX must not be zero.
4101 *
4102 * IRPRNT (global input) INTEGER
4103 * On entry, IRPRNT specifies the process row coordinate of the
4104 * printing process.
4105 *
4106 * ICPRNT (global input) INTEGER
4107 * On entry, ICPRNT specifies the process column coordinate of
4108 * the printing process.
4109 *
4110 * CVECNM (global input) CHARACTER*(*)
4111 * On entry, CVECNM specifies the identifier of the vector to be
4112 * printed.
4113 *
4114 * -- Written on April 1, 1998 by
4115 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4116 *
4117 * =====================================================================
4118 *
4119 * .. Local Scalars ..
4120  INTEGER I, MYCOL, MYROW, NPCOL, NPROW
4121 * ..
4122 * .. External Subroutines ..
4123  EXTERNAL BLACS_GRIDINFO
4124 * ..
4125 * .. Executable Statements ..
4126 *
4127 * Quick return if possible
4128 *
4129  IF( n.LE.0 )
4130  $ RETURN
4131 *
4132 * Get grid parameters
4133 *
4134  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4135 *
4136  IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
4137 *
4138  WRITE( nout, fmt = * )
4139  DO 10 i = 1, 1 + ( n-1 )*incx, incx
4140 *
4141  WRITE( nout, fmt = 9999 ) cvecnm, i, x( i )
4142 *
4143  10 CONTINUE
4144 *
4145  END IF
4146 *
4147  9999 FORMAT( 1x, a, '(', i6, ')=', d30.18 )
4148 *
4149  RETURN
4150 *
4151 * End of PDVPRNT
4152 *
4153  END
4154  SUBROUTINE pdmvch( ICTXT, TRANS, M, N, ALPHA, A, IA, JA, DESCA,
4155  $ X, IX, JX, DESCX, INCX, BETA, Y, PY, IY, JY,
4156  $ DESCY, INCY, G, ERR, INFO )
4158 * -- PBLAS test routine (version 2.0) --
4159 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4160 * and University of California, Berkeley.
4161 * April 1, 1998
4162 *
4163 * .. Scalar Arguments ..
4164  CHARACTER*1 TRANS
4165  INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX,
4166  $ JY, M, N
4167  DOUBLE PRECISION ALPHA, BETA, ERR
4168 * ..
4169 * .. Array Arguments ..
4170  INTEGER DESCA( * ), DESCX( * ), DESCY( * )
4171  DOUBLE PRECISION A( * ), G( * ), PY( * ), X( * ), Y( * )
4172 * ..
4173 *
4174 * Purpose
4175 * =======
4176 *
4177 * PDMVCH checks the results of the computational tests.
4178 *
4179 * Notes
4180 * =====
4181 *
4182 * A description vector is associated with each 2D block-cyclicly dis-
4183 * tributed matrix. This vector stores the information required to
4184 * establish the mapping between a matrix entry and its corresponding
4185 * process and memory location.
4186 *
4187 * In the following comments, the character _ should be read as
4188 * "of the distributed matrix". Let A be a generic term for any 2D
4189 * block cyclicly distributed matrix. Its description vector is DESCA:
4190 *
4191 * NOTATION STORED IN EXPLANATION
4192 * ---------------- --------------- ------------------------------------
4193 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
4194 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
4195 * the NPROW x NPCOL BLACS process grid
4196 * A is distributed over. The context
4197 * itself is global, but the handle
4198 * (the integer value) may vary.
4199 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
4200 * ted matrix A, M_A >= 0.
4201 * N_A (global) DESCA( N_ ) The number of columns in the distri-
4202 * buted matrix A, N_A >= 0.
4203 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
4204 * block of the matrix A, IMB_A > 0.
4205 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
4206 * left block of the matrix A,
4207 * INB_A > 0.
4208 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
4209 * bute the last M_A-IMB_A rows of A,
4210 * MB_A > 0.
4211 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
4212 * bute the last N_A-INB_A columns of
4213 * A, NB_A > 0.
4214 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
4215 * row of the matrix A is distributed,
4216 * NPROW > RSRC_A >= 0.
4217 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
4218 * first column of A is distributed.
4219 * NPCOL > CSRC_A >= 0.
4220 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
4221 * array storing the local blocks of
4222 * the distributed matrix A,
4223 * IF( Lc( 1, N_A ) > 0 )
4224 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
4225 * ELSE
4226 * LLD_A >= 1.
4227 *
4228 * Let K be the number of rows of a matrix A starting at the global in-
4229 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
4230 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
4231 * receive if these K rows were distributed over NPROW processes. If K
4232 * is the number of columns of a matrix A starting at the global index
4233 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
4234 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
4235 * these K columns were distributed over NPCOL processes.
4236 *
4237 * The values of Lr() and Lc() may be determined via a call to the func-
4238 * tion PB_NUMROC:
4239 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
4240 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
4241 *
4242 * Arguments
4243 * =========
4244 *
4245 * ICTXT (local input) INTEGER
4246 * On entry, ICTXT specifies the BLACS context handle, indica-
4247 * ting the global context of the operation. The context itself
4248 * is global, but the value of ICTXT is local.
4249 *
4250 * TRANS (global input) CHARACTER*1
4251 * On entry, TRANS specifies which matrix-vector product is to
4252 * be computed as follows:
4253 * If TRANS = 'N',
4254 * sub( Y ) = BETA * sub( Y ) + sub( A ) * sub( X ),
4255 * otherwise
4256 * sub( Y ) = BETA * sub( Y ) + sub( A )' * sub( X ).
4257 *
4258 * M (global input) INTEGER
4259 * On entry, M specifies the number of rows of the submatrix
4260 * operand matrix A. M must be at least zero.
4261 *
4262 * N (global input) INTEGER
4263 * On entry, N specifies the number of columns of the subma-
4264 * trix operand matrix A. N must be at least zero.
4265 *
4266 * ALPHA (global input) DOUBLE PRECISION
4267 * On entry, ALPHA specifies the scalar alpha.
4268 *
4269 * A (local input) DOUBLE PRECISION array
4270 * On entry, A is an array of dimension (DESCA( M_ ),*). This
4271 * array contains a local copy of the initial entire matrix PA.
4272 *
4273 * IA (global input) INTEGER
4274 * On entry, IA specifies A's global row index, which points to
4275 * the beginning of the submatrix sub( A ).
4276 *
4277 * JA (global input) INTEGER
4278 * On entry, JA specifies A's global column index, which points
4279 * to the beginning of the submatrix sub( A ).
4280 *
4281 * DESCA (global and local input) INTEGER array
4282 * On entry, DESCA is an integer array of dimension DLEN_. This
4283 * is the array descriptor for the matrix A.
4284 *
4285 * X (local input) DOUBLE PRECISION array
4286 * On entry, X is an array of dimension (DESCX( M_ ),*). This
4287 * array contains a local copy of the initial entire matrix PX.
4288 *
4289 * IX (global input) INTEGER
4290 * On entry, IX specifies X's global row index, which points to
4291 * the beginning of the submatrix sub( X ).
4292 *
4293 * JX (global input) INTEGER
4294 * On entry, JX specifies X's global column index, which points
4295 * to the beginning of the submatrix sub( X ).
4296 *
4297 * DESCX (global and local input) INTEGER array
4298 * On entry, DESCX is an integer array of dimension DLEN_. This
4299 * is the array descriptor for the matrix X.
4300 *
4301 * INCX (global input) INTEGER
4302 * On entry, INCX specifies the global increment for the
4303 * elements of X. Only two values of INCX are supported in
4304 * this version, namely 1 and M_X. INCX must not be zero.
4305 *
4306 * BETA (global input) DOUBLE PRECISION
4307 * On entry, BETA specifies the scalar beta.
4308 *
4309 * Y (local input/local output) DOUBLE PRECISION array
4310 * On entry, Y is an array of dimension (DESCY( M_ ),*). This
4311 * array contains a local copy of the initial entire matrix PY.
4312 *
4313 * PY (local input) DOUBLE PRECISION array
4314 * On entry, PY is an array of dimension (DESCY( LLD_ ),*). This
4315 * array contains the local entries of the matrix PY.
4316 *
4317 * IY (global input) INTEGER
4318 * On entry, IY specifies Y's global row index, which points to
4319 * the beginning of the submatrix sub( Y ).
4320 *
4321 * JY (global input) INTEGER
4322 * On entry, JY specifies Y's global column index, which points
4323 * to the beginning of the submatrix sub( Y ).
4324 *
4325 * DESCY (global and local input) INTEGER array
4326 * On entry, DESCY is an integer array of dimension DLEN_. This
4327 * is the array descriptor for the matrix Y.
4328 *
4329 * INCY (global input) INTEGER
4330 * On entry, INCY specifies the global increment for the
4331 * elements of Y. Only two values of INCY are supported in
4332 * this version, namely 1 and M_Y. INCY must not be zero.
4333 *
4334 * G (workspace) DOUBLE PRECISION array
4335 * On entry, G is an array of dimension at least MAX( M, N ). G
4336 * is used to compute the gauges.
4337 *
4338 * ERR (global output) DOUBLE PRECISION
4339 * On exit, ERR specifies the largest error in absolute value.
4340 *
4341 * INFO (global output) INTEGER
4342 * On exit, if INFO <> 0, the result is less than half accurate.
4343 *
4344 * -- Written on April 1, 1998 by
4345 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4346 *
4347 * =====================================================================
4348 *
4349 * .. Parameters ..
4350  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
4351  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
4352  $ RSRC_
4353  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
4354  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
4355  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
4356  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
4357  DOUBLE PRECISION ZERO, ONE
4358  parameter( zero = 0.0d+0, one = 1.0d+0 )
4359 * ..
4360 * .. Local Scalars ..
4361  LOGICAL COLREP, ROWREP, TRAN
4362  INTEGER I, IB, ICURCOL, ICURROW, IIY, IN, IOFFA, IOFFX,
4363  $ IOFFY, IYCOL, IYROW, J, JB, JJY, JN, KK, LDA,
4364  $ LDPY, LDX, LDY, ML, MYCOL, MYROW, NL, NPCOL,
4365  $ nprow
4366  DOUBLE PRECISION EPS, ERRI, GTMP, TBETA, YTMP
4367 * ..
4368 * .. External Subroutines ..
4369  EXTERNAL blacs_gridinfo, dgamx2d, igsum2d, pb_infog2l
4370 * ..
4371 * .. External Functions ..
4372  LOGICAL LSAME
4373  DOUBLE PRECISION PDLAMCH
4374  EXTERNAL lsame, pdlamch
4375 * ..
4376 * .. Intrinsic Functions ..
4377  INTRINSIC abs, max, min, mod, sqrt
4378 * ..
4379 * .. Executable Statements ..
4380 *
4381  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4382 *
4383  eps = pdlamch( ictxt, 'eps' )
4384 *
4385  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
4386  tbeta = one
4387  ELSE
4388  tbeta = beta
4389  END IF
4390 *
4391  tran = lsame( trans, 'T' ).OR.lsame( trans, 'C' )
4392  IF( tran ) THEN
4393  ml = n
4394  nl = m
4395  ELSE
4396  ml = m
4397  nl = n
4398  END IF
4399 *
4400  lda = max( 1, desca( m_ ) )
4401  ldx = max( 1, descx( m_ ) )
4402  ldy = max( 1, descy( m_ ) )
4403 *
4404 * Compute expected result in Y using data in A, X and Y.
4405 * Compute gauges in G. This part of the computation is performed
4406 * by every process in the grid.
4407 *
4408  ioffy = iy + ( jy - 1 ) * ldy
4409  DO 30 i = 1, ml
4410  ytmp = zero
4411  gtmp = zero
4412  ioffx = ix + ( jx - 1 ) * ldx
4413  IF( tran )THEN
4414  ioffa = ia + ( ja + i - 2 ) * lda
4415  DO 10 j = 1, nl
4416  ytmp = ytmp + a( ioffa ) * x( ioffx )
4417  gtmp = gtmp + abs( a( ioffa ) * x( ioffx ) )
4418  ioffa = ioffa + 1
4419  ioffx = ioffx + incx
4420  10 CONTINUE
4421  ELSE
4422  ioffa = ia + i - 1 + ( ja - 1 ) * lda
4423  DO 20 j = 1, nl
4424  ytmp = ytmp + a( ioffa ) * x( ioffx )
4425  gtmp = gtmp + abs( a( ioffa ) * x( ioffx ) )
4426  ioffa = ioffa + lda
4427  ioffx = ioffx + incx
4428  20 CONTINUE
4429  END IF
4430  g( i ) = abs( alpha ) * gtmp + abs( tbeta * y( ioffy ) )
4431  y( ioffy ) = alpha * ytmp + tbeta * y( ioffy )
4432  ioffy = ioffy + incy
4433  30 CONTINUE
4434 *
4435 * Compute the error ratio for this result.
4436 *
4437  err = zero
4438  info = 0
4439  ldpy = descy( lld_ )
4440  ioffy = iy + ( jy - 1 ) * ldy
4441  CALL pb_infog2l( iy, jy, descy, nprow, npcol, myrow, mycol, iiy,
4442  $ jjy, iyrow, iycol )
4443  icurrow = iyrow
4444  icurcol = iycol
4445  rowrep = ( iyrow.EQ.-1 )
4446  colrep = ( iycol.EQ.-1 )
4447 *
4448  IF( incy.EQ.descy( m_ ) ) THEN
4449 *
4450 * sub( Y ) is a row vector
4451 *
4452  jb = descy( inb_ ) - jy + 1
4453  IF( jb.LE.0 )
4454  $ jb = ( ( -jb ) / descy( nb_ ) + 1 ) * descy( nb_ ) + jb
4455  jb = min( jb, ml )
4456  jn = jy + jb - 1
4457 *
4458  DO 50 j = jy, jn
4459 *
4460  IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4461  $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4462  erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) ) / eps
4463  IF( g( j-jy+1 ).NE.zero )
4464  $ erri = erri / g( j-jy+1 )
4465  err = max( err, erri )
4466  IF( err*sqrt( eps ).GE.one )
4467  $ info = 1
4468  jjy = jjy + 1
4469  END IF
4470 *
4471  ioffy = ioffy + incy
4472 *
4473  50 CONTINUE
4474 *
4475  icurcol = mod( icurcol+1, npcol )
4476 *
4477  DO 70 j = jn+1, jy+ml-1, descy( nb_ )
4478  jb = min( jy+ml-j, descy( nb_ ) )
4479 *
4480  DO 60 kk = 0, jb-1
4481 *
4482  IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4483  $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4484  erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) )/eps
4485  IF( g( j+kk-jy+1 ).NE.zero )
4486  $ erri = erri / g( j+kk-jy+1 )
4487  err = max( err, erri )
4488  IF( err*sqrt( eps ).GE.one )
4489  $ info = 1
4490  jjy = jjy + 1
4491  END IF
4492 *
4493  ioffy = ioffy + incy
4494 *
4495  60 CONTINUE
4496 *
4497  icurcol = mod( icurcol+1, npcol )
4498 *
4499  70 CONTINUE
4500 *
4501  ELSE
4502 *
4503 * sub( Y ) is a column vector
4504 *
4505  ib = descy( imb_ ) - iy + 1
4506  IF( ib.LE.0 )
4507  $ ib = ( ( -ib ) / descy( mb_ ) + 1 ) * descy( mb_ ) + ib
4508  ib = min( ib, ml )
4509  in = iy + ib - 1
4510 *
4511  DO 80 i = iy, in
4512 *
4513  IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4514  $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4515  erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) ) / eps
4516  IF( g( i-iy+1 ).NE.zero )
4517  $ erri = erri / g( i-iy+1 )
4518  err = max( err, erri )
4519  IF( err*sqrt( eps ).GE.one )
4520  $ info = 1
4521  iiy = iiy + 1
4522  END IF
4523 *
4524  ioffy = ioffy + incy
4525 *
4526  80 CONTINUE
4527 *
4528  icurrow = mod( icurrow+1, nprow )
4529 *
4530  DO 100 i = in+1, iy+ml-1, descy( mb_ )
4531  ib = min( iy+ml-i, descy( mb_ ) )
4532 *
4533  DO 90 kk = 0, ib-1
4534 *
4535  IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4536  $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4537  erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) )/eps
4538  IF( g( i+kk-iy+1 ).NE.zero )
4539  $ erri = erri / g( i+kk-iy+1 )
4540  err = max( err, erri )
4541  IF( err*sqrt( eps ).GE.one )
4542  $ info = 1
4543  iiy = iiy + 1
4544  END IF
4545 *
4546  ioffy = ioffy + incy
4547 *
4548  90 CONTINUE
4549 *
4550  icurrow = mod( icurrow+1, nprow )
4551 *
4552  100 CONTINUE
4553 *
4554  END IF
4555 *
4556 * If INFO = 0, all results are at least half accurate.
4557 *
4558  CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
4559  CALL dgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
4560  $ mycol )
4561 *
4562  RETURN
4563 *
4564 * End of PDMVCH
4565 *
4566  END
4567  SUBROUTINE pdvmch( ICTXT, UPLO, M, N, ALPHA, X, IX, JX, DESCX,
4568  $ INCX, Y, IY, JY, DESCY, INCY, A, PA, IA, JA,
4569  $ DESCA, G, ERR, INFO )
4571 * -- PBLAS test routine (version 2.0) --
4572 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4573 * and University of California, Berkeley.
4574 * April 1, 1998
4575 *
4576 * .. Scalar Arguments ..
4577  CHARACTER*1 UPLO
4578  INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX,
4579  $ JY, M, N
4580  DOUBLE PRECISION ALPHA, ERR
4581 * ..
4582 * .. Array Arguments ..
4583  INTEGER DESCA( * ), DESCX( * ), DESCY( * )
4584  DOUBLE PRECISION A( * ), G( * ), PA( * ), X( * ), Y( * )
4585 * ..
4586 *
4587 * Purpose
4588 * =======
4589 *
4590 * PDVMCH checks the results of the computational tests.
4591 *
4592 * Notes
4593 * =====
4594 *
4595 * A description vector is associated with each 2D block-cyclicly dis-
4596 * tributed matrix. This vector stores the information required to
4597 * establish the mapping between a matrix entry and its corresponding
4598 * process and memory location.
4599 *
4600 * In the following comments, the character _ should be read as
4601 * "of the distributed matrix". Let A be a generic term for any 2D
4602 * block cyclicly distributed matrix. Its description vector is DESCA:
4603 *
4604 * NOTATION STORED IN EXPLANATION
4605 * ---------------- --------------- ------------------------------------
4606 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
4607 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
4608 * the NPROW x NPCOL BLACS process grid
4609 * A is distributed over. The context
4610 * itself is global, but the handle
4611 * (the integer value) may vary.
4612 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
4613 * ted matrix A, M_A >= 0.
4614 * N_A (global) DESCA( N_ ) The number of columns in the distri-
4615 * buted matrix A, N_A >= 0.
4616 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
4617 * block of the matrix A, IMB_A > 0.
4618 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
4619 * left block of the matrix A,
4620 * INB_A > 0.
4621 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
4622 * bute the last M_A-IMB_A rows of A,
4623 * MB_A > 0.
4624 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
4625 * bute the last N_A-INB_A columns of
4626 * A, NB_A > 0.
4627 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
4628 * row of the matrix A is distributed,
4629 * NPROW > RSRC_A >= 0.
4630 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
4631 * first column of A is distributed.
4632 * NPCOL > CSRC_A >= 0.
4633 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
4634 * array storing the local blocks of
4635 * the distributed matrix A,
4636 * IF( Lc( 1, N_A ) > 0 )
4637 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
4638 * ELSE
4639 * LLD_A >= 1.
4640 *
4641 * Let K be the number of rows of a matrix A starting at the global in-
4642 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
4643 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
4644 * receive if these K rows were distributed over NPROW processes. If K
4645 * is the number of columns of a matrix A starting at the global index
4646 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
4647 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
4648 * these K columns were distributed over NPCOL processes.
4649 *
4650 * The values of Lr() and Lc() may be determined via a call to the func-
4651 * tion PB_NUMROC:
4652 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
4653 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
4654 *
4655 * Arguments
4656 * =========
4657 *
4658 * ICTXT (local input) INTEGER
4659 * On entry, ICTXT specifies the BLACS context handle, indica-
4660 * ting the global context of the operation. The context itself
4661 * is global, but the value of ICTXT is local.
4662 *
4663 * UPLO (global input) CHARACTER*1
4664 * On entry, UPLO specifies which part of the submatrix sub( A )
4665 * is to be referenced as follows:
4666 * If UPLO = 'L', only the lower triangular part,
4667 * If UPLO = 'U', only the upper triangular part,
4668 * else the entire matrix is to be referenced.
4669 *
4670 * M (global input) INTEGER
4671 * On entry, M specifies the number of rows of the submatrix
4672 * operand matrix A. M must be at least zero.
4673 *
4674 * N (global input) INTEGER
4675 * On entry, N specifies the number of columns of the subma-
4676 * trix operand matrix A. N must be at least zero.
4677 *
4678 * ALPHA (global input) DOUBLE PRECISION
4679 * On entry, ALPHA specifies the scalar alpha.
4680 *
4681 * X (local input) DOUBLE PRECISION array
4682 * On entry, X is an array of dimension (DESCX( M_ ),*). This
4683 * array contains a local copy of the initial entire matrix PX.
4684 *
4685 * IX (global input) INTEGER
4686 * On entry, IX specifies X's global row index, which points to
4687 * the beginning of the submatrix sub( X ).
4688 *
4689 * JX (global input) INTEGER
4690 * On entry, JX specifies X's global column index, which points
4691 * to the beginning of the submatrix sub( X ).
4692 *
4693 * DESCX (global and local input) INTEGER array
4694 * On entry, DESCX is an integer array of dimension DLEN_. This
4695 * is the array descriptor for the matrix X.
4696 *
4697 * INCX (global input) INTEGER
4698 * On entry, INCX specifies the global increment for the
4699 * elements of X. Only two values of INCX are supported in
4700 * this version, namely 1 and M_X. INCX must not be zero.
4701 *
4702 * Y (local input) DOUBLE PRECISION array
4703 * On entry, Y is an array of dimension (DESCY( M_ ),*). This
4704 * array contains a local copy of the initial entire matrix PY.
4705 *
4706 * IY (global input) INTEGER
4707 * On entry, IY specifies Y's global row index, which points to
4708 * the beginning of the submatrix sub( Y ).
4709 *
4710 * JY (global input) INTEGER
4711 * On entry, JY specifies Y's global column index, which points
4712 * to the beginning of the submatrix sub( Y ).
4713 *
4714 * DESCY (global and local input) INTEGER array
4715 * On entry, DESCY is an integer array of dimension DLEN_. This
4716 * is the array descriptor for the matrix Y.
4717 *
4718 * INCY (global input) INTEGER
4719 * On entry, INCY specifies the global increment for the
4720 * elements of Y. Only two values of INCY are supported in
4721 * this version, namely 1 and M_Y. INCY must not be zero.
4722 *
4723 * A (local input/local output) DOUBLE PRECISION array
4724 * On entry, A is an array of dimension (DESCA( M_ ),*). This
4725 * array contains a local copy of the initial entire matrix PA.
4726 *
4727 * PA (local input) DOUBLE PRECISION array
4728 * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
4729 * array contains the local entries of the matrix PA.
4730 *
4731 * IA (global input) INTEGER
4732 * On entry, IA specifies A's global row index, which points to
4733 * the beginning of the submatrix sub( A ).
4734 *
4735 * JA (global input) INTEGER
4736 * On entry, JA specifies A's global column index, which points
4737 * to the beginning of the submatrix sub( A ).
4738 *
4739 * DESCA (global and local input) INTEGER array
4740 * On entry, DESCA is an integer array of dimension DLEN_. This
4741 * is the array descriptor for the matrix A.
4742 *
4743 * G (workspace) DOUBLE PRECISION array
4744 * On entry, G is an array of dimension at least MAX( M, N ). G
4745 * is used to compute the gauges.
4746 *
4747 * ERR (global output) DOUBLE PRECISION
4748 * On exit, ERR specifies the largest error in absolute value.
4749 *
4750 * INFO (global output) INTEGER
4751 * On exit, if INFO <> 0, the result is less than half accurate.
4752 *
4753 * -- Written on April 1, 1998 by
4754 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4755 *
4756 * =====================================================================
4757 *
4758 * .. Parameters ..
4759  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
4760  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
4761  $ RSRC_
4762  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
4763  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
4764  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
4765  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
4766  DOUBLE PRECISION ZERO, ONE
4767  PARAMETER ( ZERO = 0.0d+0, one = 1.0d+0 )
4768 * ..
4769 * .. Local Scalars ..
4770  LOGICAL COLREP, LOWER, ROWREP, UPPER
4771  INTEGER I, IACOL, IAROW, IB, IBEG, ICURROW, IEND, IIA,
4772  $ in, ioffa, ioffx, ioffy, j, jja, kk, lda, ldpa,
4773  $ ldx, ldy, mycol, myrow, npcol, nprow
4774  DOUBLE PRECISION ATMP, EPS, ERRI, GTMP
4775 * ..
4776 * .. External Subroutines ..
4777  EXTERNAL blacs_gridinfo, dgamx2d, igsum2d, pb_infog2l
4778 * ..
4779 * .. External Functions ..
4780  LOGICAL LSAME
4781  DOUBLE PRECISION PDLAMCH
4782  EXTERNAL LSAME, PDLAMCH
4783 * ..
4784 * .. Intrinsic Functions ..
4785  INTRINSIC abs, max, min, mod, sqrt
4786 * ..
4787 * .. Executable Statements ..
4788 *
4789  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4790 *
4791  eps = pdlamch( ictxt, 'eps' )
4792 *
4793  upper = lsame( uplo, 'U' )
4794  lower = lsame( uplo, 'L' )
4795 *
4796  lda = max( 1, desca( m_ ) )
4797  ldx = max( 1, descx( m_ ) )
4798  ldy = max( 1, descy( m_ ) )
4799 *
4800 * Compute expected result in A using data in A, X and Y.
4801 * Compute gauges in G. This part of the computation is performed
4802 * by every process in the grid.
4803 *
4804  DO 70 j = 1, n
4805 *
4806  ioffy = iy + ( jy - 1 ) * ldy + ( j - 1 ) * incy
4807 *
4808  IF( lower ) THEN
4809  ibeg = j
4810  iend = m
4811  DO 10 i = 1, j-1
4812  g( i ) = zero
4813  10 CONTINUE
4814  ELSE IF( upper ) THEN
4815  ibeg = 1
4816  iend = j
4817  DO 20 i = j+1, m
4818  g( i ) = zero
4819  20 CONTINUE
4820  ELSE
4821  ibeg = 1
4822  iend = m
4823  END IF
4824 *
4825  DO 30 i = ibeg, iend
4826 *
4827  ioffx = ix + ( jx - 1 ) * ldx + ( i - 1 ) * incx
4828  ioffa = ia + i - 1 + ( ja + j - 2 ) * lda
4829  atmp = x( ioffx ) * y( ioffy )
4830  gtmp = abs( x( ioffx ) * y( ioffy ) )
4831  g( i ) = abs( alpha ) * gtmp + abs( a( ioffa ) )
4832  a( ioffa ) = alpha * atmp + a( ioffa )
4833 *
4834  30 CONTINUE
4835 *
4836 * Compute the error ratio for this result.
4837 *
4838  info = 0
4839  err = zero
4840  ldpa = desca( lld_ )
4841  ioffa = ia + ( ja + j - 2 ) * lda
4842  CALL pb_infog2l( ia, ja+j-1, desca, nprow, npcol, myrow, mycol,
4843  $ iia, jja, iarow, iacol )
4844  rowrep = ( iarow.EQ.-1 )
4845  colrep = ( iacol.EQ.-1 )
4846 *
4847  IF( mycol.EQ.iacol .OR. colrep ) THEN
4848 *
4849  icurrow = iarow
4850  ib = desca( imb_ ) - ia + 1
4851  IF( ib.LE.0 )
4852  $ ib = ( ( -ib ) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
4853  ib = min( ib, m )
4854  in = ia + ib - 1
4855 *
4856  DO 40 i = ia, in
4857 *
4858  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
4859  erri = abs( pa( iia+(jja-1)*ldpa ) - a( ioffa ) )/eps
4860  IF( g( i-ia+1 ).NE.zero )
4861  $ erri = erri / g( i-ia+1 )
4862  err = max( err, erri )
4863  IF( err*sqrt( eps ).GE.one )
4864  $ info = 1
4865  iia = iia + 1
4866  END IF
4867 *
4868  ioffa = ioffa + 1
4869 *
4870  40 CONTINUE
4871 *
4872  icurrow = mod( icurrow+1, nprow )
4873 *
4874  DO 60 i = in+1, ia+m-1, desca( mb_ )
4875  ib = min( ia+m-i, desca( mb_ ) )
4876 *
4877  DO 50 kk = 0, ib-1
4878 *
4879  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
4880  erri = abs( pa( iia+(jja-1)*ldpa )-a( ioffa ) )/eps
4881  IF( g( i+kk-ia+1 ).NE.zero )
4882  $ erri = erri / g( i+kk-ia+1 )
4883  err = max( err, erri )
4884  IF( err*sqrt( eps ).GE.one )
4885  $ info = 1
4886  iia = iia + 1
4887  END IF
4888 *
4889  ioffa = ioffa + 1
4890 *
4891  50 CONTINUE
4892 *
4893  icurrow = mod( icurrow+1, nprow )
4894 *
4895  60 CONTINUE
4896 *
4897  END IF
4898 *
4899 * If INFO = 0, all results are at least half accurate.
4900 *
4901  CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
4902  CALL dgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
4903  $ mycol )
4904  IF( info.NE.0 )
4905  $ GO TO 80
4906 *
4907  70 CONTINUE
4908 *
4909  80 CONTINUE
4910 *
4911  RETURN
4912 *
4913 * End of PDVMCH
4914 *
4915  END
4916  SUBROUTINE pdvmch2( ICTXT, UPLO, M, N, ALPHA, X, IX, JX, DESCX,
4917  $ INCX, Y, IY, JY, DESCY, INCY, A, PA, IA,
4918  $ JA, DESCA, G, ERR, INFO )
4920 * -- PBLAS test routine (version 2.0) --
4921 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4922 * and University of California, Berkeley.
4923 * April 1, 1998
4924 *
4925 * .. Scalar Arguments ..
4926  CHARACTER*1 UPLO
4927  INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX,
4928  $ jy, m, n
4929  DOUBLE PRECISION ALPHA, ERR
4930 * ..
4931 * .. Array Arguments ..
4932  INTEGER DESCA( * ), DESCX( * ), DESCY( * )
4933  DOUBLE PRECISION A( * ), G( * ), PA( * ), X( * ), Y( * )
4934 * ..
4935 *
4936 * Purpose
4937 * =======
4938 *
4939 * PDVMCH2 checks the results of the computational tests.
4940 *
4941 * Notes
4942 * =====
4943 *
4944 * A description vector is associated with each 2D block-cyclicly dis-
4945 * tributed matrix. This vector stores the information required to
4946 * establish the mapping between a matrix entry and its corresponding
4947 * process and memory location.
4948 *
4949 * In the following comments, the character _ should be read as
4950 * "of the distributed matrix". Let A be a generic term for any 2D
4951 * block cyclicly distributed matrix. Its description vector is DESCA:
4952 *
4953 * NOTATION STORED IN EXPLANATION
4954 * ---------------- --------------- ------------------------------------
4955 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
4956 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
4957 * the NPROW x NPCOL BLACS process grid
4958 * A is distributed over. The context
4959 * itself is global, but the handle
4960 * (the integer value) may vary.
4961 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
4962 * ted matrix A, M_A >= 0.
4963 * N_A (global) DESCA( N_ ) The number of columns in the distri-
4964 * buted matrix A, N_A >= 0.
4965 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
4966 * block of the matrix A, IMB_A > 0.
4967 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
4968 * left block of the matrix A,
4969 * INB_A > 0.
4970 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
4971 * bute the last M_A-IMB_A rows of A,
4972 * MB_A > 0.
4973 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
4974 * bute the last N_A-INB_A columns of
4975 * A, NB_A > 0.
4976 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
4977 * row of the matrix A is distributed,
4978 * NPROW > RSRC_A >= 0.
4979 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
4980 * first column of A is distributed.
4981 * NPCOL > CSRC_A >= 0.
4982 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
4983 * array storing the local blocks of
4984 * the distributed matrix A,
4985 * IF( Lc( 1, N_A ) > 0 )
4986 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
4987 * ELSE
4988 * LLD_A >= 1.
4989 *
4990 * Let K be the number of rows of a matrix A starting at the global in-
4991 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
4992 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
4993 * receive if these K rows were distributed over NPROW processes. If K
4994 * is the number of columns of a matrix A starting at the global index
4995 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
4996 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
4997 * these K columns were distributed over NPCOL processes.
4998 *
4999 * The values of Lr() and Lc() may be determined via a call to the func-
5000 * tion PB_NUMROC:
5001 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
5002 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
5003 *
5004 * Arguments
5005 * =========
5006 *
5007 * ICTXT (local input) INTEGER
5008 * On entry, ICTXT specifies the BLACS context handle, indica-
5009 * ting the global context of the operation. The context itself
5010 * is global, but the value of ICTXT is local.
5011 *
5012 * UPLO (global input) CHARACTER*1
5013 * On entry, UPLO specifies which part of the submatrix sub( A )
5014 * is to be referenced as follows:
5015 * If UPLO = 'L', only the lower triangular part,
5016 * If UPLO = 'U', only the upper triangular part,
5017 * else the entire matrix is to be referenced.
5018 *
5019 * M (global input) INTEGER
5020 * On entry, M specifies the number of rows of the submatrix
5021 * operand matrix A. M must be at least zero.
5022 *
5023 * N (global input) INTEGER
5024 * On entry, N specifies the number of columns of the subma-
5025 * trix operand matrix A. N must be at least zero.
5026 *
5027 * ALPHA (global input) DOUBLE PRECISION
5028 * On entry, ALPHA specifies the scalar alpha.
5029 *
5030 * X (local input) DOUBLE PRECISION array
5031 * On entry, X is an array of dimension (DESCX( M_ ),*). This
5032 * array contains a local copy of the initial entire matrix PX.
5033 *
5034 * IX (global input) INTEGER
5035 * On entry, IX specifies X's global row index, which points to
5036 * the beginning of the submatrix sub( X ).
5037 *
5038 * JX (global input) INTEGER
5039 * On entry, JX specifies X's global column index, which points
5040 * to the beginning of the submatrix sub( X ).
5041 *
5042 * DESCX (global and local input) INTEGER array
5043 * On entry, DESCX is an integer array of dimension DLEN_. This
5044 * is the array descriptor for the matrix X.
5045 *
5046 * INCX (global input) INTEGER
5047 * On entry, INCX specifies the global increment for the
5048 * elements of X. Only two values of INCX are supported in
5049 * this version, namely 1 and M_X. INCX must not be zero.
5050 *
5051 * Y (local input) DOUBLE PRECISION array
5052 * On entry, Y is an array of dimension (DESCY( M_ ),*). This
5053 * array contains a local copy of the initial entire matrix PY.
5054 *
5055 * IY (global input) INTEGER
5056 * On entry, IY specifies Y's global row index, which points to
5057 * the beginning of the submatrix sub( Y ).
5058 *
5059 * JY (global input) INTEGER
5060 * On entry, JY specifies Y's global column index, which points
5061 * to the beginning of the submatrix sub( Y ).
5062 *
5063 * DESCY (global and local input) INTEGER array
5064 * On entry, DESCY is an integer array of dimension DLEN_. This
5065 * is the array descriptor for the matrix Y.
5066 *
5067 * INCY (global input) INTEGER
5068 * On entry, INCY specifies the global increment for the
5069 * elements of Y. Only two values of INCY are supported in
5070 * this version, namely 1 and M_Y. INCY must not be zero.
5071 *
5072 * A (local input/local output) DOUBLE PRECISION array
5073 * On entry, A is an array of dimension (DESCA( M_ ),*). This
5074 * array contains a local copy of the initial entire matrix PA.
5075 *
5076 * PA (local input) DOUBLE PRECISION array
5077 * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
5078 * array contains the local entries of the matrix PA.
5079 *
5080 * IA (global input) INTEGER
5081 * On entry, IA specifies A's global row index, which points to
5082 * the beginning of the submatrix sub( A ).
5083 *
5084 * JA (global input) INTEGER
5085 * On entry, JA specifies A's global column index, which points
5086 * to the beginning of the submatrix sub( A ).
5087 *
5088 * DESCA (global and local input) INTEGER array
5089 * On entry, DESCA is an integer array of dimension DLEN_. This
5090 * is the array descriptor for the matrix A.
5091 *
5092 * G (workspace) DOUBLE PRECISION array
5093 * On entry, G is an array of dimension at least MAX( M, N ). G
5094 * is used to compute the gauges.
5095 *
5096 * ERR (global output) DOUBLE PRECISION
5097 * On exit, ERR specifies the largest error in absolute value.
5098 *
5099 * INFO (global output) INTEGER
5100 * On exit, if INFO <> 0, the result is less than half accurate.
5101 *
5102 * -- Written on April 1, 1998 by
5103 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
5104 *
5105 * =====================================================================
5106 *
5107 * .. Parameters ..
5108  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
5109  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
5110  $ RSRC_
5111  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
5112  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
5113  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
5114  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
5115  DOUBLE PRECISION ZERO, ONE
5116  PARAMETER ( ZERO = 0.0d+0, one = 1.0d+0 )
5117 * ..
5118 * .. Local Scalars ..
5119  LOGICAL COLREP, LOWER, ROWREP, UPPER
5120  INTEGER I, IACOL, IAROW, IB, IBEG, ICURROW, IEND, IIA,
5121  $ IN, IOFFA, IOFFXI, IOFFXJ, IOFFYI, IOFFYJ, J,
5122  $ JJA, KK, LDA, LDPA, LDX, LDY, MYCOL, MYROW,
5123  $ npcol, nprow
5124  DOUBLE PRECISION EPS, ERRI, GTMP, ATMP
5125 * ..
5126 * .. External Subroutines ..
5127  EXTERNAL blacs_gridinfo, dgamx2d, igsum2d, pb_infog2l
5128 * ..
5129 * .. External Functions ..
5130  LOGICAL LSAME
5131  DOUBLE PRECISION PDLAMCH
5132  EXTERNAL lsame, pdlamch
5133 * ..
5134 * .. Intrinsic Functions ..
5135  INTRINSIC abs, max, min, mod, sqrt
5136 * ..
5137 * .. Executable Statements ..
5138 *
5139  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
5140 *
5141  eps = pdlamch( ictxt, 'eps' )
5142 *
5143  upper = lsame( uplo, 'U' )
5144  lower = lsame( uplo, 'L' )
5145 *
5146  lda = max( 1, desca( m_ ) )
5147  ldx = max( 1, descx( m_ ) )
5148  ldy = max( 1, descy( m_ ) )
5149 *
5150 * Compute expected result in A using data in A, X and Y.
5151 * Compute gauges in G. This part of the computation is performed
5152 * by every process in the grid.
5153 *
5154  DO 70 j = 1, n
5155 *
5156  ioffxj = ix + ( jx - 1 ) * ldx + ( j - 1 ) * incx
5157  ioffyj = iy + ( jy - 1 ) * ldy + ( j - 1 ) * incy
5158 *
5159  IF( lower ) THEN
5160  ibeg = j
5161  iend = m
5162  DO 10 i = 1, j-1
5163  g( i ) = zero
5164  10 CONTINUE
5165  ELSE IF( upper ) THEN
5166  ibeg = 1
5167  iend = j
5168  DO 20 i = j+1, m
5169  g( i ) = zero
5170  20 CONTINUE
5171  ELSE
5172  ibeg = 1
5173  iend = m
5174  END IF
5175 *
5176  DO 30 i = ibeg, iend
5177  ioffa = ia + i - 1 + ( ja + j - 2 ) * lda
5178  ioffxi = ix + ( jx - 1 ) * ldx + ( i - 1 ) * incx
5179  ioffyi = iy + ( jy - 1 ) * ldy + ( i - 1 ) * incy
5180  atmp = x( ioffxi ) * y( ioffyj )
5181  atmp = atmp + y( ioffyi ) * x( ioffxj )
5182  gtmp = abs( x( ioffxi ) * y( ioffyj ) )
5183  gtmp = gtmp + abs( y( ioffyi ) * x( ioffxj ) )
5184  g( i ) = abs( alpha ) * gtmp + abs( a( ioffa ) )
5185  a( ioffa ) = alpha*atmp + a( ioffa )
5186 *
5187  30 CONTINUE
5188 *
5189 * Compute the error ratio for this result.
5190 *
5191  info = 0
5192  err = zero
5193  ldpa = desca( lld_ )
5194  ioffa = ia + ( ja + j - 2 ) * lda
5195  CALL pb_infog2l( ia, ja+j-1, desca, nprow, npcol, myrow, mycol,
5196  $ iia, jja, iarow, iacol )
5197  rowrep = ( iarow.EQ.-1 )
5198  colrep = ( iacol.EQ.-1 )
5199 *
5200  IF( mycol.EQ.iacol .OR. colrep ) THEN
5201 *
5202  icurrow = iarow
5203  ib = desca( imb_ ) - ia + 1
5204  IF( ib.LE.0 )
5205  $ ib = ( ( -ib ) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
5206  ib = min( ib, m )
5207  in = ia + ib - 1
5208 *
5209  DO 40 i = ia, in
5210 *
5211  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5212  erri = abs( pa( iia+(jja-1)*ldpa ) - a( ioffa ) )/eps
5213  IF( g( i-ia+1 ).NE.zero )
5214  $ erri = erri / g( i-ia+1 )
5215  err = max( err, erri )
5216  IF( err*sqrt( eps ).GE.one )
5217  $ info = 1
5218  iia = iia + 1
5219  END IF
5220 *
5221  ioffa = ioffa + 1
5222 *
5223  40 CONTINUE
5224 *
5225  icurrow = mod( icurrow+1, nprow )
5226 *
5227  DO 60 i = in+1, ia+m-1, desca( mb_ )
5228  ib = min( ia+m-i, desca( mb_ ) )
5229 *
5230  DO 50 kk = 0, ib-1
5231 *
5232  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5233  erri = abs( pa( iia+(jja-1)*ldpa )-a( ioffa ) )/eps
5234  IF( g( i+kk-ia+1 ).NE.zero )
5235  $ erri = erri / g( i+kk-ia+1 )
5236  err = max( err, erri )
5237  IF( err*sqrt( eps ).GE.one )
5238  $ info = 1
5239  iia = iia + 1
5240  END IF
5241 *
5242  ioffa = ioffa + 1
5243 *
5244  50 CONTINUE
5245 *
5246  icurrow = mod( icurrow+1, nprow )
5247 *
5248  60 CONTINUE
5249 *
5250  END IF
5251 *
5252 * If INFO = 0, all results are at least half accurate.
5253 *
5254  CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
5255  CALL dgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
5256  $ mycol )
5257  IF( info.NE.0 )
5258  $ GO TO 80
5259 *
5260  70 CONTINUE
5261 *
5262  80 CONTINUE
5263 *
5264  RETURN
5265 *
5266 * End of PDVMCH2
5267 *
5268  END
5269  SUBROUTINE pdmmch( ICTXT, TRANSA, TRANSB, M, N, K, ALPHA, A, IA,
5270  $ JA, DESCA, B, IB, JB, DESCB, BETA, C, PC, IC,
5271  $ JC, DESCC, CT, G, ERR, INFO )
5273 * -- PBLAS test routine (version 2.0) --
5274 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5275 * and University of California, Berkeley.
5276 * April 1, 1998
5277 *
5278 * .. Scalar Arguments ..
5279  CHARACTER*1 TRANSA, TRANSB
5280  INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, M, N
5281  DOUBLE PRECISION ALPHA, BETA, ERR
5282 * ..
5283 * .. Array Arguments ..
5284  INTEGER DESCA( * ), DESCB( * ), DESCC( * )
5285  DOUBLE PRECISION A( * ), B( * ), C( * ), CT( * ), G( * ),
5286  $ PC( * )
5287 * ..
5288 *
5289 * Purpose
5290 * =======
5291 *
5292 * PDMMCH checks the results of the computational tests.
5293 *
5294 * Notes
5295 * =====
5296 *
5297 * A description vector is associated with each 2D block-cyclicly dis-
5298 * tributed matrix. This vector stores the information required to
5299 * establish the mapping between a matrix entry and its corresponding
5300 * process and memory location.
5301 *
5302 * In the following comments, the character _ should be read as
5303 * "of the distributed matrix". Let A be a generic term for any 2D
5304 * block cyclicly distributed matrix. Its description vector is DESCA:
5305 *
5306 * NOTATION STORED IN EXPLANATION
5307 * ---------------- --------------- ------------------------------------
5308 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
5309 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
5310 * the NPROW x NPCOL BLACS process grid
5311 * A is distributed over. The context
5312 * itself is global, but the handle
5313 * (the integer value) may vary.
5314 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
5315 * ted matrix A, M_A >= 0.
5316 * N_A (global) DESCA( N_ ) The number of columns in the distri-
5317 * buted matrix A, N_A >= 0.
5318 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
5319 * block of the matrix A, IMB_A > 0.
5320 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
5321 * left block of the matrix A,
5322 * INB_A > 0.
5323 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
5324 * bute the last M_A-IMB_A rows of A,
5325 * MB_A > 0.
5326 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
5327 * bute the last N_A-INB_A columns of
5328 * A, NB_A > 0.
5329 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
5330 * row of the matrix A is distributed,
5331 * NPROW > RSRC_A >= 0.
5332 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
5333 * first column of A is distributed.
5334 * NPCOL > CSRC_A >= 0.
5335 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
5336 * array storing the local blocks of
5337 * the distributed matrix A,
5338 * IF( Lc( 1, N_A ) > 0 )
5339 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
5340 * ELSE
5341 * LLD_A >= 1.
5342 *
5343 * Let K be the number of rows of a matrix A starting at the global in-
5344 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
5345 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
5346 * receive if these K rows were distributed over NPROW processes. If K
5347 * is the number of columns of a matrix A starting at the global index
5348 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
5349 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
5350 * these K columns were distributed over NPCOL processes.
5351 *
5352 * The values of Lr() and Lc() may be determined via a call to the func-
5353 * tion PB_NUMROC:
5354 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
5355 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
5356 *
5357 * Arguments
5358 * =========
5359 *
5360 * ICTXT (local input) INTEGER
5361 * On entry, ICTXT specifies the BLACS context handle, indica-
5362 * ting the global context of the operation. The context itself
5363 * is global, but the value of ICTXT is local.
5364 *
5365 * TRANSA (global input) CHARACTER*1
5366 * On entry, TRANSA specifies if the matrix operand A is to be
5367 * transposed.
5368 *
5369 * TRANSB (global input) CHARACTER*1
5370 * On entry, TRANSB specifies if the matrix operand B is to be
5371 * transposed.
5372 *
5373 * M (global input) INTEGER
5374 * On entry, M specifies the number of rows of C.
5375 *
5376 * N (global input) INTEGER
5377 * On entry, N specifies the number of columns of C.
5378 *
5379 * K (global input) INTEGER
5380 * On entry, K specifies the number of columns (resp. rows) of A
5381 * when TRANSA = 'N' (resp. TRANSA <> 'N') in PxGEMM, PxSYRK,
5382 * PxSYR2K, PxHERK and PxHER2K.
5383 *
5384 * ALPHA (global input) DOUBLE PRECISION
5385 * On entry, ALPHA specifies the scalar alpha.
5386 *
5387 * A (local input) DOUBLE PRECISION array
5388 * On entry, A is an array of dimension (DESCA( M_ ),*). This
5389 * array contains a local copy of the initial entire matrix PA.
5390 *
5391 * IA (global input) INTEGER
5392 * On entry, IA specifies A's global row index, which points to
5393 * the beginning of the submatrix sub( A ).
5394 *
5395 * JA (global input) INTEGER
5396 * On entry, JA specifies A's global column index, which points
5397 * to the beginning of the submatrix sub( A ).
5398 *
5399 * DESCA (global and local input) INTEGER array
5400 * On entry, DESCA is an integer array of dimension DLEN_. This
5401 * is the array descriptor for the matrix A.
5402 *
5403 * B (local input) DOUBLE PRECISION array
5404 * On entry, B is an array of dimension (DESCB( M_ ),*). This
5405 * array contains a local copy of the initial entire matrix PB.
5406 *
5407 * IB (global input) INTEGER
5408 * On entry, IB specifies B's global row index, which points to
5409 * the beginning of the submatrix sub( B ).
5410 *
5411 * JB (global input) INTEGER
5412 * On entry, JB specifies B's global column index, which points
5413 * to the beginning of the submatrix sub( B ).
5414 *
5415 * DESCB (global and local input) INTEGER array
5416 * On entry, DESCB is an integer array of dimension DLEN_. This
5417 * is the array descriptor for the matrix B.
5418 *
5419 * BETA (global input) DOUBLE PRECISION
5420 * On entry, BETA specifies the scalar beta.
5421 *
5422 * C (local input/local output) DOUBLE PRECISION array
5423 * On entry, C is an array of dimension (DESCC( M_ ),*). This
5424 * array contains a local copy of the initial entire matrix PC.
5425 *
5426 * PC (local input) DOUBLE PRECISION array
5427 * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
5428 * array contains the local pieces of the matrix PC.
5429 *
5430 * IC (global input) INTEGER
5431 * On entry, IC specifies C's global row index, which points to
5432 * the beginning of the submatrix sub( C ).
5433 *
5434 * JC (global input) INTEGER
5435 * On entry, JC specifies C's global column index, which points
5436 * to the beginning of the submatrix sub( C ).
5437 *
5438 * DESCC (global and local input) INTEGER array
5439 * On entry, DESCC is an integer array of dimension DLEN_. This
5440 * is the array descriptor for the matrix C.
5441 *
5442 * CT (workspace) DOUBLE PRECISION array
5443 * On entry, CT is an array of dimension at least MAX(M,N,K). CT
5444 * holds a copy of the current column of C.
5445 *
5446 * G (workspace) DOUBLE PRECISION array
5447 * On entry, G is an array of dimension at least MAX(M,N,K). G
5448 * is used to compute the gauges.
5449 *
5450 * ERR (global output) DOUBLE PRECISION
5451 * On exit, ERR specifies the largest error in absolute value.
5452 *
5453 * INFO (global output) INTEGER
5454 * On exit, if INFO <> 0, the result is less than half accurate.
5455 *
5456 * -- Written on April 1, 1998 by
5457 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
5458 *
5459 * =====================================================================
5460 *
5461 * .. Parameters ..
5462  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
5463  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
5464  $ RSRC_
5465  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
5466  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
5467  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
5468  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
5469  DOUBLE PRECISION ZERO, ONE
5470  PARAMETER ( ZERO = 0.0d+0, one = 1.0d+0 )
5471 * ..
5472 * .. Local Scalars ..
5473  LOGICAL COLREP, ROWREP, TRANA, TRANB
5474  INTEGER I, IBB, ICCOL, ICROW, ICURROW, IIC, IN, IOFFA,
5475  $ IOFFB, IOFFC, J, JJC, KK, LDA, LDB, LDC, LDPC,
5476  $ mycol, myrow, npcol, nprow
5477  DOUBLE PRECISION EPS, ERRI
5478 * ..
5479 * .. External Subroutines ..
5480  EXTERNAL blacs_gridinfo, dgamx2d, igsum2d, pb_infog2l
5481 * ..
5482 * .. External Functions ..
5483  LOGICAL LSAME
5484  DOUBLE PRECISION PDLAMCH
5485  EXTERNAL LSAME, PDLAMCH
5486 * ..
5487 * .. Intrinsic Functions ..
5488  INTRINSIC abs, max, min, mod, sqrt
5489 * ..
5490 * .. Executable Statements ..
5491 *
5492  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
5493 *
5494  eps = pdlamch( ictxt, 'eps' )
5495 *
5496  trana = lsame( transa, 'T' ).OR.lsame( transa, 'C' )
5497  tranb = lsame( transb, 'T' ).OR.lsame( transb, 'C' )
5498 *
5499  lda = max( 1, desca( m_ ) )
5500  ldb = max( 1, descb( m_ ) )
5501  ldc = max( 1, descc( m_ ) )
5502 *
5503 * Compute expected result in C using data in A, B and C.
5504 * Compute gauges in G. This part of the computation is performed
5505 * by every process in the grid.
5506 *
5507  DO 240 j = 1, n
5508 *
5509  ioffc = ic + ( jc + j - 2 ) * ldc
5510  DO 10 i = 1, m
5511  ct( i ) = zero
5512  g( i ) = zero
5513  10 CONTINUE
5514 *
5515  IF( .NOT.trana .AND. .NOT.tranb ) THEN
5516  DO 30 kk = 1, k
5517  ioffb = ib + kk - 1 + ( jb + j - 2 ) * ldb
5518  DO 20 i = 1, m
5519  ioffa = ia + i - 1 + ( ja + kk - 2 ) * lda
5520  ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5521  g( i ) = g( i ) + abs( a( ioffa ) ) *
5522  $ abs( b( ioffb ) )
5523  20 CONTINUE
5524  30 CONTINUE
5525  ELSE IF( trana .AND. .NOT.tranb ) THEN
5526  DO 50 kk = 1, k
5527  ioffb = ib + kk - 1 + ( jb + j - 2 ) * ldb
5528  DO 40 i = 1, m
5529  ioffa = ia + kk - 1 + ( ja + i - 2 ) * lda
5530  ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5531  g( i ) = g( i ) + abs( a( ioffa ) ) *
5532  $ abs( b( ioffb ) )
5533  40 CONTINUE
5534  50 CONTINUE
5535  ELSE IF( .NOT.trana .AND. tranb ) THEN
5536  DO 70 kk = 1, k
5537  ioffb = ib + j - 1 + ( jb + kk - 2 ) * ldb
5538  DO 60 i = 1, m
5539  ioffa = ia + i - 1 + ( ja + kk - 2 ) * lda
5540  ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5541  g( i ) = g( i ) + abs( a( ioffa ) ) *
5542  $ abs( b( ioffb ) )
5543  60 CONTINUE
5544  70 CONTINUE
5545  ELSE IF( trana .AND. tranb ) THEN
5546  DO 90 kk = 1, k
5547  ioffb = ib + j - 1 + ( jb + kk - 2 ) * ldb
5548  DO 80 i = 1, m
5549  ioffa = ia + kk - 1 + ( ja + i - 2 ) * lda
5550  ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5551  g( i ) = g( i ) + abs( a( ioffa ) ) *
5552  $ abs( b( ioffb ) )
5553  80 CONTINUE
5554  90 CONTINUE
5555  END IF
5556 *
5557  DO 200 i = 1, m
5558  ct( i ) = alpha*ct( i ) + beta * c( ioffc )
5559  g( i ) = abs( alpha )*g( i ) + abs( beta )*abs( c( ioffc ) )
5560  c( ioffc ) = ct( i )
5561  ioffc = ioffc + 1
5562  200 CONTINUE
5563 *
5564 * Compute the error ratio for this result.
5565 *
5566  err = zero
5567  info = 0
5568  ldpc = descc( lld_ )
5569  ioffc = ic + ( jc + j - 2 ) * ldc
5570  CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
5571  $ iic, jjc, icrow, iccol )
5572  icurrow = icrow
5573  rowrep = ( icrow.EQ.-1 )
5574  colrep = ( iccol.EQ.-1 )
5575 *
5576  IF( mycol.EQ.iccol .OR. colrep ) THEN
5577 *
5578  ibb = descc( imb_ ) - ic + 1
5579  IF( ibb.LE.0 )
5580  $ ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
5581  ibb = min( ibb, m )
5582  in = ic + ibb - 1
5583 *
5584  DO 210 i = ic, in
5585 *
5586  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5587  erri = abs( pc( iic+(jjc-1)*ldpc ) -
5588  $ c( ioffc ) ) / eps
5589  IF( g( i-ic+1 ).NE.zero )
5590  $ erri = erri / g( i-ic+1 )
5591  err = max( err, erri )
5592  IF( err*sqrt( eps ).GE.one )
5593  $ info = 1
5594  iic = iic + 1
5595  END IF
5596 *
5597  ioffc = ioffc + 1
5598 *
5599  210 CONTINUE
5600 *
5601  icurrow = mod( icurrow+1, nprow )
5602 *
5603  DO 230 i = in+1, ic+m-1, descc( mb_ )
5604  ibb = min( ic+m-i, descc( mb_ ) )
5605 *
5606  DO 220 kk = 0, ibb-1
5607 *
5608  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5609  erri = abs( pc( iic+(jjc-1)*ldpc ) -
5610  $ c( ioffc ) )/eps
5611  IF( g( i+kk-ic+1 ).NE.zero )
5612  $ erri = erri / g( i+kk-ic+1 )
5613  err = max( err, erri )
5614  IF( err*sqrt( eps ).GE.one )
5615  $ info = 1
5616  iic = iic + 1
5617  END IF
5618 *
5619  ioffc = ioffc + 1
5620 *
5621  220 CONTINUE
5622 *
5623  icurrow = mod( icurrow+1, nprow )
5624 *
5625  230 CONTINUE
5626 *
5627  END IF
5628 *
5629 * If INFO = 0, all results are at least half accurate.
5630 *
5631  CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
5632  CALL dgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
5633  $ mycol )
5634  IF( info.NE.0 )
5635  $ GO TO 250
5636 *
5637  240 CONTINUE
5638 *
5639  250 CONTINUE
5640 *
5641  RETURN
5642 *
5643 * End of PDMMCH
5644 *
5645  END
5646  SUBROUTINE pdmmch1( ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA,
5647  $ DESCA, BETA, C, PC, IC, JC, DESCC, CT, G,
5648  $ ERR, INFO )
5650 * -- PBLAS test routine (version 2.0) --
5651 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5652 * and University of California, Berkeley.
5653 * April 1, 1998
5654 *
5655 * .. Scalar Arguments ..
5656  CHARACTER*1 TRANS, UPLO
5657  INTEGER IA, IC, ICTXT, INFO, JA, JC, K, N
5658  DOUBLE PRECISION ALPHA, BETA, ERR
5659 * ..
5660 * .. Array Arguments ..
5661  INTEGER DESCA( * ), DESCC( * )
5662  DOUBLE PRECISION A( * ), C( * ), CT( * ), G( * ), PC( * )
5663 * ..
5664 *
5665 * Purpose
5666 * =======
5667 *
5668 * PDMMCH1 checks the results of the computational tests.
5669 *
5670 * Notes
5671 * =====
5672 *
5673 * A description vector is associated with each 2D block-cyclicly dis-
5674 * tributed matrix. This vector stores the information required to
5675 * establish the mapping between a matrix entry and its corresponding
5676 * process and memory location.
5677 *
5678 * In the following comments, the character _ should be read as
5679 * "of the distributed matrix". Let A be a generic term for any 2D
5680 * block cyclicly distributed matrix. Its description vector is DESCA:
5681 *
5682 * NOTATION STORED IN EXPLANATION
5683 * ---------------- --------------- ------------------------------------
5684 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
5685 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
5686 * the NPROW x NPCOL BLACS process grid
5687 * A is distributed over. The context
5688 * itself is global, but the handle
5689 * (the integer value) may vary.
5690 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
5691 * ted matrix A, M_A >= 0.
5692 * N_A (global) DESCA( N_ ) The number of columns in the distri-
5693 * buted matrix A, N_A >= 0.
5694 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
5695 * block of the matrix A, IMB_A > 0.
5696 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
5697 * left block of the matrix A,
5698 * INB_A > 0.
5699 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
5700 * bute the last M_A-IMB_A rows of A,
5701 * MB_A > 0.
5702 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
5703 * bute the last N_A-INB_A columns of
5704 * A, NB_A > 0.
5705 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
5706 * row of the matrix A is distributed,
5707 * NPROW > RSRC_A >= 0.
5708 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
5709 * first column of A is distributed.
5710 * NPCOL > CSRC_A >= 0.
5711 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
5712 * array storing the local blocks of
5713 * the distributed matrix A,
5714 * IF( Lc( 1, N_A ) > 0 )
5715 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
5716 * ELSE
5717 * LLD_A >= 1.
5718 *
5719 * Let K be the number of rows of a matrix A starting at the global in-
5720 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
5721 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
5722 * receive if these K rows were distributed over NPROW processes. If K
5723 * is the number of columns of a matrix A starting at the global index
5724 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
5725 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
5726 * these K columns were distributed over NPCOL processes.
5727 *
5728 * The values of Lr() and Lc() may be determined via a call to the func-
5729 * tion PB_NUMROC:
5730 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
5731 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
5732 *
5733 * Arguments
5734 * =========
5735 *
5736 * ICTXT (local input) INTEGER
5737 * On entry, ICTXT specifies the BLACS context handle, indica-
5738 * ting the global context of the operation. The context itself
5739 * is global, but the value of ICTXT is local.
5740 *
5741 * UPLO (global input) CHARACTER*1
5742 * On entry, UPLO specifies which part of C should contain the
5743 * result.
5744 *
5745 * TRANS (global input) CHARACTER*1
5746 * On entry, TRANS specifies whether the matrix A has to be
5747 * transposed or not before computing the matrix-matrix product.
5748 *
5749 * N (global input) INTEGER
5750 * On entry, N specifies the order the submatrix operand C. N
5751 * must be at least zero.
5752 *
5753 * K (global input) INTEGER
5754 * On entry, K specifies the number of columns (resp. rows) of A
5755 * when TRANS = 'N' (resp. TRANS <> 'N'). K must be at least
5756 * zero.
5757 *
5758 * ALPHA (global input) DOUBLE PRECISION
5759 * On entry, ALPHA specifies the scalar alpha.
5760 *
5761 * A (local input) DOUBLE PRECISION array
5762 * On entry, A is an array of dimension (DESCA( M_ ),*). This
5763 * array contains a local copy of the initial entire matrix PA.
5764 *
5765 * IA (global input) INTEGER
5766 * On entry, IA specifies A's global row index, which points to
5767 * the beginning of the submatrix sub( A ).
5768 *
5769 * JA (global input) INTEGER
5770 * On entry, JA specifies A's global column index, which points
5771 * to the beginning of the submatrix sub( A ).
5772 *
5773 * DESCA (global and local input) INTEGER array
5774 * On entry, DESCA is an integer array of dimension DLEN_. This
5775 * is the array descriptor for the matrix A.
5776 *
5777 * BETA (global input) DOUBLE PRECISION
5778 * On entry, BETA specifies the scalar beta.
5779 *
5780 * C (local input/local output) DOUBLE PRECISION array
5781 * On entry, C is an array of dimension (DESCC( M_ ),*). This
5782 * array contains a local copy of the initial entire matrix PC.
5783 *
5784 * PC (local input) DOUBLE PRECISION array
5785 * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
5786 * array contains the local pieces of the matrix PC.
5787 *
5788 * IC (global input) INTEGER
5789 * On entry, IC specifies C's global row index, which points to
5790 * the beginning of the submatrix sub( C ).
5791 *
5792 * JC (global input) INTEGER
5793 * On entry, JC specifies C's global column index, which points
5794 * to the beginning of the submatrix sub( C ).
5795 *
5796 * DESCC (global and local input) INTEGER array
5797 * On entry, DESCC is an integer array of dimension DLEN_. This
5798 * is the array descriptor for the matrix C.
5799 *
5800 * CT (workspace) DOUBLE PRECISION array
5801 * On entry, CT is an array of dimension at least MAX(M,N,K). CT
5802 * holds a copy of the current column of C.
5803 *
5804 * G (workspace) DOUBLE PRECISION array
5805 * On entry, G is an array of dimension at least MAX(M,N,K). G
5806 * is used to compute the gauges.
5807 *
5808 * ERR (global output) DOUBLE PRECISION
5809 * On exit, ERR specifies the largest error in absolute value.
5810 *
5811 * INFO (global output) INTEGER
5812 * On exit, if INFO <> 0, the result is less than half accurate.
5813 *
5814 * -- Written on April 1, 1998 by
5815 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
5816 *
5817 * =====================================================================
5818 *
5819 * .. Parameters ..
5820  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
5821  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
5822  $ RSRC_
5823  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
5824  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
5825  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
5826  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
5827  DOUBLE PRECISION ZERO, ONE
5828  PARAMETER ( ZERO = 0.0d+0, one = 1.0d+0 )
5829 * ..
5830 * .. Local Scalars ..
5831  LOGICAL COLREP, NOTRAN, ROWREP, TRAN, UPPER
5832  INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC,
5833  $ IN, IOFFAK, IOFFAN, IOFFC, J, JJC, KK, LDA,
5834  $ LDC, LDPC, MYCOL, MYROW, NPCOL, NPROW
5835  DOUBLE PRECISION EPS, ERRI
5836 * ..
5837 * .. External Subroutines ..
5838  EXTERNAL blacs_gridinfo, dgamx2d, igsum2d, pb_infog2l
5839 * ..
5840 * .. External Functions ..
5841  LOGICAL LSAME
5842  DOUBLE PRECISION PDLAMCH
5843  EXTERNAL lsame, pdlamch
5844 * ..
5845 * .. Intrinsic Functions ..
5846  INTRINSIC abs, max, min, mod, sqrt
5847 * ..
5848 * .. Executable Statements ..
5849 *
5850  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
5851 *
5852  eps = pdlamch( ictxt, 'eps' )
5853 *
5854  upper = lsame( uplo, 'U' )
5855  notran = lsame( trans, 'N' )
5856  tran = lsame( trans, 'T' )
5857 *
5858  lda = max( 1, desca( m_ ) )
5859  ldc = max( 1, descc( m_ ) )
5860 *
5861 * Compute expected result in C using data in A, B and C.
5862 * Compute gauges in G. This part of the computation is performed
5863 * by every process in the grid.
5864 *
5865  DO 140 j = 1, n
5866 *
5867  IF( upper ) THEN
5868  ibeg = 1
5869  iend = j
5870  ELSE
5871  ibeg = j
5872  iend = n
5873  END IF
5874 *
5875  DO 10 i = 1, n
5876  ct( i ) = zero
5877  g( i ) = zero
5878  10 CONTINUE
5879 *
5880  IF( notran ) THEN
5881  DO 30 kk = 1, k
5882  ioffak = ia + j - 1 + ( ja + kk - 2 ) * lda
5883  DO 20 i = ibeg, iend
5884  ioffan = ia + i - 1 + ( ja + kk - 2 ) * lda
5885  ct( i ) = ct( i ) + a( ioffak ) * a( ioffan )
5886  g( i ) = g( i ) + abs( a( ioffak ) ) *
5887  $ abs( a( ioffan ) )
5888  20 CONTINUE
5889  30 CONTINUE
5890  ELSE IF( tran ) THEN
5891  DO 50 kk = 1, k
5892  ioffak = ia + kk - 1 + ( ja + j - 2 ) * lda
5893  DO 40 i = ibeg, iend
5894  ioffan = ia + kk - 1 + ( ja + i - 2 ) * lda
5895  ct( i ) = ct( i ) + a( ioffak ) * a( ioffan )
5896  g( i ) = g( i ) + abs( a( ioffak ) ) *
5897  $ abs( a( ioffan ) )
5898  40 CONTINUE
5899  50 CONTINUE
5900  END IF
5901 *
5902  ioffc = ic + ibeg - 1 + ( jc + j - 2 ) * ldc
5903 *
5904  DO 100 i = ibeg, iend
5905  ct( i ) = alpha*ct( i ) + beta * c( ioffc )
5906  g( i ) = abs( alpha )*g( i ) + abs( beta )*abs( c( ioffc ) )
5907  c( ioffc ) = ct( i )
5908  ioffc = ioffc + 1
5909  100 CONTINUE
5910 *
5911 * Compute the error ratio for this result.
5912 *
5913  err = zero
5914  info = 0
5915  ldpc = descc( lld_ )
5916  ioffc = ic + ( jc + j - 2 ) * ldc
5917  CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
5918  $ iic, jjc, icrow, iccol )
5919  icurrow = icrow
5920  rowrep = ( icrow.EQ.-1 )
5921  colrep = ( iccol.EQ.-1 )
5922 *
5923  IF( mycol.EQ.iccol .OR. colrep ) THEN
5924 *
5925  ibb = descc( imb_ ) - ic + 1
5926  IF( ibb.LE.0 )
5927  $ ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
5928  ibb = min( ibb, n )
5929  in = ic + ibb - 1
5930 *
5931  DO 110 i = ic, in
5932 *
5933  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5934  erri = abs( pc( iic+(jjc-1)*ldpc ) -
5935  $ c( ioffc ) ) / eps
5936  IF( g( i-ic+1 ).NE.zero )
5937  $ erri = erri / g( i-ic+1 )
5938  err = max( err, erri )
5939  IF( err*sqrt( eps ).GE.one )
5940  $ info = 1
5941  iic = iic + 1
5942  END IF
5943 *
5944  ioffc = ioffc + 1
5945 *
5946  110 CONTINUE
5947 *
5948  icurrow = mod( icurrow+1, nprow )
5949 *
5950  DO 130 i = in+1, ic+n-1, descc( mb_ )
5951  ibb = min( ic+n-i, descc( mb_ ) )
5952 *
5953  DO 120 kk = 0, ibb-1
5954 *
5955  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5956  erri = abs( pc( iic+(jjc-1)*ldpc ) -
5957  $ c( ioffc ) )/eps
5958  IF( g( i+kk-ic+1 ).NE.zero )
5959  $ erri = erri / g( i+kk-ic+1 )
5960  err = max( err, erri )
5961  IF( err*sqrt( eps ).GE.one )
5962  $ info = 1
5963  iic = iic + 1
5964  END IF
5965 *
5966  ioffc = ioffc + 1
5967 *
5968  120 CONTINUE
5969 *
5970  icurrow = mod( icurrow+1, nprow )
5971 *
5972  130 CONTINUE
5973 *
5974  END IF
5975 *
5976 * If INFO = 0, all results are at least half accurate.
5977 *
5978  CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
5979  CALL dgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
5980  $ mycol )
5981  IF( info.NE.0 )
5982  $ GO TO 150
5983 *
5984  140 CONTINUE
5985 *
5986  150 CONTINUE
5987 *
5988  RETURN
5989 *
5990 * End of PDMMCH1
5991 *
5992  END
5993  SUBROUTINE pdmmch2( ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA,
5994  $ DESCA, B, IB, JB, DESCB, BETA, C, PC, IC,
5995  $ JC, DESCC, CT, G, ERR, INFO )
5997 * -- PBLAS test routine (version 2.0) --
5998 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5999 * and University of California, Berkeley.
6000 * April 1, 1998
6001 *
6002 * .. Scalar Arguments ..
6003  CHARACTER*1 TRANS, UPLO
6004  INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, N
6005  DOUBLE PRECISION ALPHA, BETA, ERR
6006 * ..
6007 * .. Array Arguments ..
6008  INTEGER DESCA( * ), DESCB( * ), DESCC( * )
6009  DOUBLE PRECISION A( * ), B( * ), C( * ), CT( * ), G( * ),
6010  $ pc( * )
6011 * ..
6012 *
6013 * Purpose
6014 * =======
6015 *
6016 * PDMMCH2 checks the results of the computational tests.
6017 *
6018 * Notes
6019 * =====
6020 *
6021 * A description vector is associated with each 2D block-cyclicly dis-
6022 * tributed matrix. This vector stores the information required to
6023 * establish the mapping between a matrix entry and its corresponding
6024 * process and memory location.
6025 *
6026 * In the following comments, the character _ should be read as
6027 * "of the distributed matrix". Let A be a generic term for any 2D
6028 * block cyclicly distributed matrix. Its description vector is DESCA:
6029 *
6030 * NOTATION STORED IN EXPLANATION
6031 * ---------------- --------------- ------------------------------------
6032 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
6033 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
6034 * the NPROW x NPCOL BLACS process grid
6035 * A is distributed over. The context
6036 * itself is global, but the handle
6037 * (the integer value) may vary.
6038 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
6039 * ted matrix A, M_A >= 0.
6040 * N_A (global) DESCA( N_ ) The number of columns in the distri-
6041 * buted matrix A, N_A >= 0.
6042 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
6043 * block of the matrix A, IMB_A > 0.
6044 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
6045 * left block of the matrix A,
6046 * INB_A > 0.
6047 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
6048 * bute the last M_A-IMB_A rows of A,
6049 * MB_A > 0.
6050 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
6051 * bute the last N_A-INB_A columns of
6052 * A, NB_A > 0.
6053 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
6054 * row of the matrix A is distributed,
6055 * NPROW > RSRC_A >= 0.
6056 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
6057 * first column of A is distributed.
6058 * NPCOL > CSRC_A >= 0.
6059 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
6060 * array storing the local blocks of
6061 * the distributed matrix A,
6062 * IF( Lc( 1, N_A ) > 0 )
6063 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
6064 * ELSE
6065 * LLD_A >= 1.
6066 *
6067 * Let K be the number of rows of a matrix A starting at the global in-
6068 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
6069 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
6070 * receive if these K rows were distributed over NPROW processes. If K
6071 * is the number of columns of a matrix A starting at the global index
6072 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
6073 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
6074 * these K columns were distributed over NPCOL processes.
6075 *
6076 * The values of Lr() and Lc() may be determined via a call to the func-
6077 * tion PB_NUMROC:
6078 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
6079 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
6080 *
6081 * Arguments
6082 * =========
6083 *
6084 * ICTXT (local input) INTEGER
6085 * On entry, ICTXT specifies the BLACS context handle, indica-
6086 * ting the global context of the operation. The context itself
6087 * is global, but the value of ICTXT is local.
6088 *
6089 * UPLO (global input) CHARACTER*1
6090 * On entry, UPLO specifies which part of C should contain the
6091 * result.
6092 *
6093 * TRANS (global input) CHARACTER*1
6094 * On entry, TRANS specifies whether the matrices A and B have
6095 * to be transposed or not before computing the matrix-matrix
6096 * product.
6097 *
6098 * N (global input) INTEGER
6099 * On entry, N specifies the order the submatrix operand C. N
6100 * must be at least zero.
6101 *
6102 * K (global input) INTEGER
6103 * On entry, K specifies the number of columns (resp. rows) of A
6104 * and B when TRANS = 'N' (resp. TRANS <> 'N'). K must be at
6105 * least zero.
6106 *
6107 * ALPHA (global input) DOUBLE PRECISION
6108 * On entry, ALPHA specifies the scalar alpha.
6109 *
6110 * A (local input) DOUBLE PRECISION array
6111 * On entry, A is an array of dimension (DESCA( M_ ),*). This
6112 * array contains a local copy of the initial entire matrix PA.
6113 *
6114 * IA (global input) INTEGER
6115 * On entry, IA specifies A's global row index, which points to
6116 * the beginning of the submatrix sub( A ).
6117 *
6118 * JA (global input) INTEGER
6119 * On entry, JA specifies A's global column index, which points
6120 * to the beginning of the submatrix sub( A ).
6121 *
6122 * DESCA (global and local input) INTEGER array
6123 * On entry, DESCA is an integer array of dimension DLEN_. This
6124 * is the array descriptor for the matrix A.
6125 *
6126 * B (local input) DOUBLE PRECISION array
6127 * On entry, B is an array of dimension (DESCB( M_ ),*). This
6128 * array contains a local copy of the initial entire matrix PB.
6129 *
6130 * IB (global input) INTEGER
6131 * On entry, IB specifies B's global row index, which points to
6132 * the beginning of the submatrix sub( B ).
6133 *
6134 * JB (global input) INTEGER
6135 * On entry, JB specifies B's global column index, which points
6136 * to the beginning of the submatrix sub( B ).
6137 *
6138 * DESCB (global and local input) INTEGER array
6139 * On entry, DESCB is an integer array of dimension DLEN_. This
6140 * is the array descriptor for the matrix B.
6141 *
6142 * BETA (global input) DOUBLE PRECISION
6143 * On entry, BETA specifies the scalar beta.
6144 *
6145 * C (local input/local output) DOUBLE PRECISION array
6146 * On entry, C is an array of dimension (DESCC( M_ ),*). This
6147 * array contains a local copy of the initial entire matrix PC.
6148 *
6149 * PC (local input) DOUBLE PRECISION array
6150 * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
6151 * array contains the local pieces of the matrix PC.
6152 *
6153 * IC (global input) INTEGER
6154 * On entry, IC specifies C's global row index, which points to
6155 * the beginning of the submatrix sub( C ).
6156 *
6157 * JC (global input) INTEGER
6158 * On entry, JC specifies C's global column index, which points
6159 * to the beginning of the submatrix sub( C ).
6160 *
6161 * DESCC (global and local input) INTEGER array
6162 * On entry, DESCC is an integer array of dimension DLEN_. This
6163 * is the array descriptor for the matrix C.
6164 *
6165 * CT (workspace) DOUBLE PRECISION array
6166 * On entry, CT is an array of dimension at least MAX(M,N,K). CT
6167 * holds a copy of the current column of C.
6168 *
6169 * G (workspace) DOUBLE PRECISION array
6170 * On entry, G is an array of dimension at least MAX(M,N,K). G
6171 * is used to compute the gauges.
6172 *
6173 * ERR (global output) DOUBLE PRECISION
6174 * On exit, ERR specifies the largest error in absolute value.
6175 *
6176 * INFO (global output) INTEGER
6177 * On exit, if INFO <> 0, the result is less than half accurate.
6178 *
6179 * -- Written on April 1, 1998 by
6180 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6181 *
6182 * =====================================================================
6183 *
6184 * .. Parameters ..
6185  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
6186  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
6187  $ RSRC_
6188  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
6189  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
6190  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
6191  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
6192  DOUBLE PRECISION ZERO, ONE
6193  PARAMETER ( ZERO = 0.0d+0, one = 1.0d+0 )
6194 * ..
6195 * .. Local Scalars ..
6196  LOGICAL COLREP, NOTRAN, ROWREP, TRAN, UPPER
6197  INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC,
6198  $ IN, IOFFAK, IOFFAN, IOFFBK, IOFFBN, IOFFC, J,
6199  $ JJC, KK, LDA, LDB, LDC, LDPC, MYCOL, MYROW,
6200  $ NPCOL, NPROW
6201  DOUBLE PRECISION EPS, ERRI
6202 * ..
6203 * .. External Subroutines ..
6204  EXTERNAL blacs_gridinfo, dgamx2d, igsum2d, pb_infog2l
6205 * ..
6206 * .. External Functions ..
6207  LOGICAL LSAME
6208  DOUBLE PRECISION PDLAMCH
6209  EXTERNAL LSAME, PDLAMCH
6210 * ..
6211 * .. Intrinsic Functions ..
6212  INTRINSIC abs, max, min, mod, sqrt
6213 * ..
6214 * .. Executable Statements ..
6215 *
6216  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
6217 *
6218  eps = pdlamch( ictxt, 'eps' )
6219 *
6220  upper = lsame( uplo, 'U' )
6221  notran = lsame( trans, 'N' )
6222  tran = lsame( trans, 'T' )
6223 *
6224  lda = max( 1, desca( m_ ) )
6225  ldb = max( 1, descb( m_ ) )
6226  ldc = max( 1, descc( m_ ) )
6227 *
6228 * Compute expected result in C using data in A, B and C.
6229 * Compute gauges in G. This part of the computation is performed
6230 * by every process in the grid.
6231 *
6232  DO 140 j = 1, n
6233 *
6234  IF( upper ) THEN
6235  ibeg = 1
6236  iend = j
6237  ELSE
6238  ibeg = j
6239  iend = n
6240  END IF
6241 *
6242  DO 10 i = 1, n
6243  ct( i ) = zero
6244  g( i ) = zero
6245  10 CONTINUE
6246 *
6247  IF( notran ) THEN
6248  DO 30 kk = 1, k
6249  ioffak = ia + j - 1 + ( ja + kk - 2 ) * lda
6250  ioffbk = ib + j - 1 + ( jb + kk - 2 ) * ldb
6251  DO 20 i = ibeg, iend
6252  ioffan = ia + i - 1 + ( ja + kk - 2 ) * lda
6253  ioffbn = ib + i - 1 + ( jb + kk - 2 ) * ldb
6254  ct( i ) = ct( i ) + alpha * (
6255  $ a( ioffan ) * b( ioffbk ) +
6256  $ b( ioffbn ) * a( ioffak ) )
6257  g( i ) = g( i ) + abs( alpha ) * (
6258  $ abs( a( ioffan ) ) * abs( b( ioffbk ) ) +
6259  $ abs( b( ioffbn ) ) * abs( a( ioffak ) ) )
6260  20 CONTINUE
6261  30 CONTINUE
6262  ELSE IF( tran ) THEN
6263  DO 50 kk = 1, k
6264  ioffak = ia + kk - 1 + ( ja + j - 2 ) * lda
6265  ioffbk = ib + kk - 1 + ( jb + j - 2 ) * ldb
6266  DO 40 i = ibeg, iend
6267  ioffan = ia + kk - 1 + ( ja + i - 2 ) * lda
6268  ioffbn = ib + kk - 1 + ( jb + i - 2 ) * ldb
6269  ct( i ) = ct( i ) + alpha * (
6270  $ a( ioffan ) * b( ioffbk ) +
6271  $ b( ioffbn ) * a( ioffak ) )
6272  g( i ) = g( i ) + abs( alpha ) * (
6273  $ abs( a( ioffan ) ) * abs( b( ioffbk ) ) +
6274  $ abs( b( ioffbn ) ) * abs( a( ioffak ) ) )
6275  40 CONTINUE
6276  50 CONTINUE
6277  END IF
6278 *
6279  ioffc = ic + ibeg - 1 + ( jc + j - 2 ) * ldc
6280 *
6281  DO 100 i = ibeg, iend
6282  ct( i ) = ct( i ) + beta * c( ioffc )
6283  g( i ) = g( i ) + abs( beta )*abs( c( ioffc ) )
6284  c( ioffc ) = ct( i )
6285  ioffc = ioffc + 1
6286  100 CONTINUE
6287 *
6288 * Compute the error ratio for this result.
6289 *
6290  err = zero
6291  info = 0
6292  ldpc = descc( lld_ )
6293  ioffc = ic + ( jc + j - 2 ) * ldc
6294  CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
6295  $ iic, jjc, icrow, iccol )
6296  icurrow = icrow
6297  rowrep = ( icrow.EQ.-1 )
6298  colrep = ( iccol.EQ.-1 )
6299 *
6300  IF( mycol.EQ.iccol .OR. colrep ) THEN
6301 *
6302  ibb = descc( imb_ ) - ic + 1
6303  IF( ibb.LE.0 )
6304  $ ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
6305  ibb = min( ibb, n )
6306  in = ic + ibb - 1
6307 *
6308  DO 110 i = ic, in
6309 *
6310  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
6311  erri = abs( pc( iic+(jjc-1)*ldpc ) -
6312  $ c( ioffc ) ) / eps
6313  IF( g( i-ic+1 ).NE.zero )
6314  $ erri = erri / g( i-ic+1 )
6315  err = max( err, erri )
6316  IF( err*sqrt( eps ).GE.one )
6317  $ info = 1
6318  iic = iic + 1
6319  END IF
6320 *
6321  ioffc = ioffc + 1
6322 *
6323  110 CONTINUE
6324 *
6325  icurrow = mod( icurrow+1, nprow )
6326 *
6327  DO 130 i = in+1, ic+n-1, descc( mb_ )
6328  ibb = min( ic+n-i, descc( mb_ ) )
6329 *
6330  DO 120 kk = 0, ibb-1
6331 *
6332  IF( myrow.EQ.icurrow .OR. rowrep ) THEN
6333  erri = abs( pc( iic+(jjc-1)*ldpc ) -
6334  $ c( ioffc ) )/eps
6335  IF( g( i+kk-ic+1 ).NE.zero )
6336  $ erri = erri / g( i+kk-ic+1 )
6337  err = max( err, erri )
6338  IF( err*sqrt( eps ).GE.one )
6339  $ info = 1
6340  iic = iic + 1
6341  END IF
6342 *
6343  ioffc = ioffc + 1
6344 *
6345  120 CONTINUE
6346 *
6347  icurrow = mod( icurrow+1, nprow )
6348 *
6349  130 CONTINUE
6350 *
6351  END IF
6352 *
6353 * If INFO = 0, all results are at least half accurate.
6354 *
6355  CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
6356  CALL dgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
6357  $ mycol )
6358  IF( info.NE.0 )
6359  $ GO TO 150
6360 *
6361  140 CONTINUE
6362 *
6363  150 CONTINUE
6364 *
6365  RETURN
6366 *
6367 * End of PDMMCH2
6368 *
6369  END
6370  SUBROUTINE pdmmch3( UPLO, TRANS, M, N, ALPHA, A, IA, JA, DESCA,
6371  $ BETA, C, PC, IC, JC, DESCC, ERR, INFO )
6373 * -- PBLAS test routine (version 2.0) --
6374 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6375 * and University of California, Berkeley.
6376 * April 1, 1998
6377 *
6378 * .. Scalar Arguments ..
6379  CHARACTER*1 TRANS, UPLO
6380  INTEGER IA, IC, INFO, JA, JC, M, N
6381  DOUBLE PRECISION ALPHA, BETA, ERR
6382 * ..
6383 * .. Array Arguments ..
6384  INTEGER DESCA( * ), DESCC( * )
6385  DOUBLE PRECISION A( * ), C( * ), PC( * )
6386 * ..
6387 *
6388 * Purpose
6389 * =======
6390 *
6391 * PDMMCH3 checks the results of the computational tests.
6392 *
6393 * Notes
6394 * =====
6395 *
6396 * A description vector is associated with each 2D block-cyclicly dis-
6397 * tributed matrix. This vector stores the information required to
6398 * establish the mapping between a matrix entry and its corresponding
6399 * process and memory location.
6400 *
6401 * In the following comments, the character _ should be read as
6402 * "of the distributed matrix". Let A be a generic term for any 2D
6403 * block cyclicly distributed matrix. Its description vector is DESCA:
6404 *
6405 * NOTATION STORED IN EXPLANATION
6406 * ---------------- --------------- ------------------------------------
6407 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
6408 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
6409 * the NPROW x NPCOL BLACS process grid
6410 * A is distributed over. The context
6411 * itself is global, but the handle
6412 * (the integer value) may vary.
6413 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
6414 * ted matrix A, M_A >= 0.
6415 * N_A (global) DESCA( N_ ) The number of columns in the distri-
6416 * buted matrix A, N_A >= 0.
6417 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
6418 * block of the matrix A, IMB_A > 0.
6419 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
6420 * left block of the matrix A,
6421 * INB_A > 0.
6422 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
6423 * bute the last M_A-IMB_A rows of A,
6424 * MB_A > 0.
6425 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
6426 * bute the last N_A-INB_A columns of
6427 * A, NB_A > 0.
6428 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
6429 * row of the matrix A is distributed,
6430 * NPROW > RSRC_A >= 0.
6431 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
6432 * first column of A is distributed.
6433 * NPCOL > CSRC_A >= 0.
6434 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
6435 * array storing the local blocks of
6436 * the distributed matrix A,
6437 * IF( Lc( 1, N_A ) > 0 )
6438 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
6439 * ELSE
6440 * LLD_A >= 1.
6441 *
6442 * Let K be the number of rows of a matrix A starting at the global in-
6443 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
6444 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
6445 * receive if these K rows were distributed over NPROW processes. If K
6446 * is the number of columns of a matrix A starting at the global index
6447 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
6448 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
6449 * these K columns were distributed over NPCOL processes.
6450 *
6451 * The values of Lr() and Lc() may be determined via a call to the func-
6452 * tion PB_NUMROC:
6453 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
6454 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
6455 *
6456 * Arguments
6457 * =========
6458 *
6459 * UPLO (global input) CHARACTER*1
6460 * On entry, UPLO specifies which part of C should contain the
6461 * result.
6462 *
6463 * TRANS (global input) CHARACTER*1
6464 * On entry, TRANS specifies whether the matrix A has to be
6465 * transposed or not before computing the matrix-matrix addi-
6466 * tion.
6467 *
6468 * M (global input) INTEGER
6469 * On entry, M specifies the number of rows of C.
6470 *
6471 * N (global input) INTEGER
6472 * On entry, N specifies the number of columns of C.
6473 *
6474 * ALPHA (global input) DOUBLE PRECISION
6475 * On entry, ALPHA specifies the scalar alpha.
6476 *
6477 * A (local input) DOUBLE PRECISION array
6478 * On entry, A is an array of dimension (DESCA( M_ ),*). This
6479 * array contains a local copy of the initial entire matrix PA.
6480 *
6481 * IA (global input) INTEGER
6482 * On entry, IA specifies A's global row index, which points to
6483 * the beginning of the submatrix sub( A ).
6484 *
6485 * JA (global input) INTEGER
6486 * On entry, JA specifies A's global column index, which points
6487 * to the beginning of the submatrix sub( A ).
6488 *
6489 * DESCA (global and local input) INTEGER array
6490 * On entry, DESCA is an integer array of dimension DLEN_. This
6491 * is the array descriptor for the matrix A.
6492 *
6493 * BETA (global input) DOUBLE PRECISION
6494 * On entry, BETA specifies the scalar beta.
6495 *
6496 * C (local input/local output) DOUBLE PRECISION array
6497 * On entry, C is an array of dimension (DESCC( M_ ),*). This
6498 * array contains a local copy of the initial entire matrix PC.
6499 *
6500 * PC (local input) DOUBLE PRECISION array
6501 * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
6502 * array contains the local pieces of the matrix PC.
6503 *
6504 * IC (global input) INTEGER
6505 * On entry, IC specifies C's global row index, which points to
6506 * the beginning of the submatrix sub( C ).
6507 *
6508 * JC (global input) INTEGER
6509 * On entry, JC specifies C's global column index, which points
6510 * to the beginning of the submatrix sub( C ).
6511 *
6512 * DESCC (global and local input) INTEGER array
6513 * On entry, DESCC is an integer array of dimension DLEN_. This
6514 * is the array descriptor for the matrix C.
6515 *
6516 * ERR (global output) DOUBLE PRECISION
6517 * On exit, ERR specifies the largest error in absolute value.
6518 *
6519 * INFO (global output) INTEGER
6520 * On exit, if INFO <> 0, the result is less than half accurate.
6521 *
6522 * -- Written on April 1, 1998 by
6523 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6524 *
6525 * =====================================================================
6526 *
6527 * .. Parameters ..
6528  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
6529  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
6530  $ RSRC_
6531  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
6532  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
6533  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
6534  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
6535  DOUBLE PRECISION ZERO
6536  PARAMETER ( ZERO = 0.0d+0 )
6537 * ..
6538 * .. Local Scalars ..
6539  LOGICAL COLREP, LOWER, NOTRAN, ROWREP, UPPER
6540  INTEGER I, ICCOL, ICROW, ICTXT, IIC, IOFFA, IOFFC, J,
6541  $ JJC, LDA, LDC, LDPC, MYCOL, MYROW, NPCOL,
6542  $ NPROW
6543  DOUBLE PRECISION ERR0, ERRI, PREC
6544 * ..
6545 * .. External Subroutines ..
6546  EXTERNAL BLACS_GRIDINFO, DGAMX2D, IGSUM2D, PB_INFOG2L,
6547  $ pderraxpby
6548 * ..
6549 * .. External Functions ..
6550  LOGICAL LSAME
6551  DOUBLE PRECISION PDLAMCH
6552  EXTERNAL LSAME, PDLAMCH
6553 * ..
6554 * .. Intrinsic Functions ..
6555  INTRINSIC abs, max
6556 * ..
6557 * .. Executable Statements ..
6558 *
6559  ictxt = descc( ctxt_ )
6560  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
6561 *
6562  prec = pdlamch( ictxt, 'eps' )
6563 *
6564  upper = lsame( uplo, 'U' )
6565  lower = lsame( uplo, 'L' )
6566  notran = lsame( trans, 'N' )
6567 *
6568 * Compute expected result in C using data in A and C. This part of
6569 * the computation is performed by every process in the grid.
6570 *
6571  info = 0
6572  err = zero
6573 *
6574  lda = max( 1, desca( m_ ) )
6575  ldc = max( 1, descc( m_ ) )
6576  ldpc = max( 1, descc( lld_ ) )
6577  rowrep = ( descc( rsrc_ ).EQ.-1 )
6578  colrep = ( descc( csrc_ ).EQ.-1 )
6579 *
6580  IF( notran ) THEN
6581 *
6582  DO 20 j = jc, jc + n - 1
6583 *
6584  ioffc = ic + ( j - 1 ) * ldc
6585  ioffa = ia + ( ja - 1 + j - jc ) * lda
6586 *
6587  DO 10 i = ic, ic + m - 1
6588 *
6589  IF( upper ) THEN
6590  IF( ( j - jc ).GE.( i - ic ) ) THEN
6591  CALL pderraxpby( erri, alpha, a( ioffa ), beta,
6592  $ c( ioffc ), prec )
6593  ELSE
6594  erri = zero
6595  END IF
6596  ELSE IF( lower ) THEN
6597  IF( ( j - jc ).LE.( i - ic ) ) THEN
6598  CALL pderraxpby( erri, alpha, a( ioffa ), beta,
6599  $ c( ioffc ), prec )
6600  ELSE
6601  erri = zero
6602  END IF
6603  ELSE
6604  CALL pderraxpby( erri, alpha, a( ioffa ), beta,
6605  $ c( ioffc ), prec )
6606  END IF
6607 *
6608  CALL pb_infog2l( i, j, descc, nprow, npcol, myrow, mycol,
6609  $ iic, jjc, icrow, iccol )
6610  IF( ( myrow.EQ.icrow .OR. rowrep ) .AND.
6611  $ ( mycol.EQ.iccol .OR. colrep ) ) THEN
6612  err0 = abs( pc( iic+(jjc-1)*ldpc )-c( ioffc ) )
6613  IF( err0.GT.erri )
6614  $ info = 1
6615  err = max( err, err0 )
6616  END IF
6617 *
6618  ioffa = ioffa + 1
6619  ioffc = ioffc + 1
6620 *
6621  10 CONTINUE
6622 *
6623  20 CONTINUE
6624 *
6625  ELSE
6626 *
6627  DO 40 j = jc, jc + n - 1
6628 *
6629  ioffc = ic + ( j - 1 ) * ldc
6630  ioffa = ia + ( j - jc ) + ( ja - 1 ) * lda
6631 *
6632  DO 30 i = ic, ic + m - 1
6633 *
6634  IF( upper ) THEN
6635  IF( ( j - jc ).GE.( i - ic ) ) THEN
6636  CALL pderraxpby( erri, alpha, a( ioffa ), beta,
6637  $ c( ioffc ), prec )
6638  ELSE
6639  erri = zero
6640  END IF
6641  ELSE IF( lower ) THEN
6642  IF( ( j - jc ).LE.( i - ic ) ) THEN
6643  CALL pderraxpby( erri, alpha, a( ioffa ), beta,
6644  $ c( ioffc ), prec )
6645  ELSE
6646  erri = zero
6647  END IF
6648  ELSE
6649  CALL pderraxpby( erri, alpha, a( ioffa ), beta,
6650  $ c( ioffc ), prec )
6651  END IF
6652 *
6653  CALL pb_infog2l( i, j, descc, nprow, npcol, myrow, mycol,
6654  $ iic, jjc, icrow, iccol )
6655  IF( ( myrow.EQ.icrow .OR. rowrep ) .AND.
6656  $ ( mycol.EQ.iccol .OR. colrep ) ) THEN
6657  err0 = abs( pc( iic+(jjc-1)*ldpc )-c( ioffc ) )
6658  IF( err0.GT.erri )
6659  $ info = 1
6660  err = max( err, err0 )
6661  END IF
6662 *
6663  ioffc = ioffc + 1
6664  ioffa = ioffa + lda
6665 *
6666  30 CONTINUE
6667 *
6668  40 CONTINUE
6669 *
6670  END IF
6671 *
6672 * If INFO = 0, all results are at least half accurate.
6673 *
6674  CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
6675  CALL dgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
6676  $ mycol )
6677 *
6678  RETURN
6679 *
6680 * End of PDMMCH3
6681 *
6682  END
6683  SUBROUTINE pderraxpby( ERRBND, ALPHA, X, BETA, Y, PREC )
6685 * -- PBLAS test routine (version 2.0) --
6686 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6687 * and University of California, Berkeley.
6688 * April 1, 1998
6689 *
6690 * .. Scalar Arguments ..
6691  DOUBLE PRECISION ALPHA, BETA, ERRBND, PREC, X, Y
6692 * ..
6693 *
6694 * Purpose
6695 * =======
6696 *
6697 * PDERRAXPBY serially computes y := beta*y + alpha * x and returns a
6698 * scaled relative acceptable error bound on the result.
6699 *
6700 * Arguments
6701 * =========
6702 *
6703 * ERRBND (global output) DOUBLE PRECISION
6704 * On exit, ERRBND specifies the scaled relative acceptable er-
6705 * ror bound.
6706 *
6707 * ALPHA (global input) DOUBLE PRECISION
6708 * On entry, ALPHA specifies the scalar alpha.
6709 *
6710 * X (global input) DOUBLE PRECISION
6711 * On entry, X specifies the scalar x to be scaled.
6712 *
6713 * BETA (global input) DOUBLE PRECISION
6714 * On entry, BETA specifies the scalar beta.
6715 *
6716 * Y (global input/global output) DOUBLE PRECISION
6717 * On entry, Y specifies the scalar y to be added. On exit, Y
6718 * contains the resulting scalar y.
6719 *
6720 * PREC (global input) DOUBLE PRECISION
6721 * On entry, PREC specifies the machine precision.
6722 *
6723 * -- Written on April 1, 1998 by
6724 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6725 *
6726 * =====================================================================
6727 *
6728 * .. Parameters ..
6729  DOUBLE PRECISION ONE, TWO, ZERO
6730  PARAMETER ( ONE = 1.0d+0, two = 2.0d+0,
6731  $ zero = 0.0d+0 )
6732 * ..
6733 * .. Local Scalars ..
6734  DOUBLE PRECISION ADDBND, FACT, SUMPOS, SUMNEG, TMP
6735 * ..
6736 * .. Intrinsic Functions ..
6737 * ..
6738 * .. Executable Statements ..
6739 *
6740  SUMPOS = zero
6741  sumneg = zero
6742  fact = one + two * prec
6743  addbnd = two * two * two * prec
6744 *
6745  tmp = alpha * x
6746  IF( tmp.GE.zero ) THEN
6747  sumpos = sumpos + tmp * fact
6748  ELSE
6749  sumneg = sumneg - tmp * fact
6750  END IF
6751 *
6752  tmp = beta * y
6753  IF( tmp.GE.zero ) THEN
6754  sumpos = sumpos + tmp * fact
6755  ELSE
6756  sumneg = sumneg - tmp * fact
6757  END IF
6758 *
6759  y = ( beta * y ) + ( alpha * x )
6760 *
6761  errbnd = addbnd * max( sumpos, sumneg )
6762 *
6763  RETURN
6764 *
6765 * End of PDERRAXPBY
6766 *
6767  END
6768  DOUBLE PRECISION FUNCTION pdlamch( ICTXT, CMACH )
6770 * -- PBLAS test routine (version 2.0) --
6771 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6772 * and University of California, Berkeley.
6773 * April 1, 1998
6774 *
6775 * .. Scalar Arguments ..
6776  CHARACTER*1 cmach
6777  INTEGER ictxt
6778 * ..
6779 *
6780 * Purpose
6781 * =======
6782 *
6783 * PDLAMCH determines double precision machine parameters.
6784 *
6785 * Arguments
6786 * =========
6787 *
6788 * ICTXT (local input) INTEGER
6789 * On entry, ICTXT specifies the BLACS context handle, indica-
6790 * ting the global context of the operation. The context itself
6791 * is global, but the value of ICTXT is local.
6792 *
6793 * CMACH (global input) CHARACTER*1
6794 * On entry, CMACH specifies the value to be returned by PDLAMCH
6795 * as follows:
6796 * = 'E' or 'e', PDLAMCH := eps,
6797 * = 'S' or 's , PDLAMCH := sfmin,
6798 * = 'B' or 'b', PDLAMCH := base,
6799 * = 'P' or 'p', PDLAMCH := eps*base,
6800 * = 'N' or 'n', PDLAMCH := t,
6801 * = 'R' or 'r', PDLAMCH := rnd,
6802 * = 'M' or 'm', PDLAMCH := emin,
6803 * = 'U' or 'u', PDLAMCH := rmin,
6804 * = 'L' or 'l', PDLAMCH := emax,
6805 * = 'O' or 'o', PDLAMCH := rmax,
6806 *
6807 * where
6808 *
6809 * eps = relative machine precision,
6810 * sfmin = safe minimum, such that 1/sfmin does not overflow,
6811 * base = base of the machine,
6812 * prec = eps*base,
6813 * t = number of (base) digits in the mantissa,
6814 * rnd = 1.0 when rounding occurs in addition, 0.0 otherwise,
6815 * emin = minimum exponent before (gradual) underflow,
6816 * rmin = underflow threshold - base**(emin-1),
6817 * emax = largest exponent before overflow,
6818 * rmax = overflow threshold - (base**emax)*(1-eps).
6819 *
6820 * -- Written on April 1, 1998 by
6821 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6822 *
6823 * =====================================================================
6824 *
6825 * .. Local Scalars ..
6826  CHARACTER*1 top
6827  INTEGER idumm
6828  DOUBLE PRECISION temp
6829 * ..
6830 * .. External Subroutines ..
6831  EXTERNAL dgamn2d, dgamx2d, pb_topget
6832 * ..
6833 * .. External Functions ..
6834  LOGICAL lsame
6835  DOUBLE PRECISION dlamch
6836  EXTERNAL dlamch, lsame
6837 * ..
6838 * .. Executable Statements ..
6839 *
6840  temp = dlamch( cmach )
6841  idumm = 0
6842 *
6843  IF( lsame( cmach, 'E' ).OR.lsame( cmach, 'S' ).OR.
6844  $ lsame( cmach, 'M' ).OR.lsame( cmach, 'U' ) ) THEN
6845  CALL pb_topget( ictxt, 'Combine', 'All', top )
6846  CALL dgamx2d( ictxt, 'All', top, 1, 1, temp, 1, idumm,
6847  $ idumm, -1, -1, idumm )
6848  ELSE IF( lsame( cmach, 'L' ).OR.lsame( cmach, 'O' ) ) THEN
6849  CALL pb_topget( ictxt, 'Combine', 'All', top )
6850  CALL dgamn2d( ictxt, 'All', top, 1, 1, temp, 1, idumm,
6851  $ idumm, -1, -1, idumm )
6852  END IF
6853 *
6854  pdlamch = temp
6855 *
6856  RETURN
6857 *
6858 * End of PDLAMCH
6859 *
6860  END
6861  SUBROUTINE pdlaset( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA )
6863 * -- PBLAS test routine (version 2.0) --
6864 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6865 * and University of California, Berkeley.
6866 * April 1, 1998
6867 *
6868 * .. Scalar Arguments ..
6869  CHARACTER*1 UPLO
6870  INTEGER IA, JA, M, N
6871  DOUBLE PRECISION ALPHA, BETA
6872 * ..
6873 * .. Array Arguments ..
6874  INTEGER DESCA( * )
6875  DOUBLE PRECISION A( * )
6876 * ..
6877 *
6878 * Purpose
6879 * =======
6880 *
6881 * PDLASET initializes an m by n submatrix A(IA:IA+M-1,JA:JA+N-1) deno-
6882 * ted by sub( A ) to beta on the diagonal and alpha on the offdiago-
6883 * nals.
6884 *
6885 * Notes
6886 * =====
6887 *
6888 * A description vector is associated with each 2D block-cyclicly dis-
6889 * tributed matrix. This vector stores the information required to
6890 * establish the mapping between a matrix entry and its corresponding
6891 * process and memory location.
6892 *
6893 * In the following comments, the character _ should be read as
6894 * "of the distributed matrix". Let A be a generic term for any 2D
6895 * block cyclicly distributed matrix. Its description vector is DESCA:
6896 *
6897 * NOTATION STORED IN EXPLANATION
6898 * ---------------- --------------- ------------------------------------
6899 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
6900 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
6901 * the NPROW x NPCOL BLACS process grid
6902 * A is distributed over. The context
6903 * itself is global, but the handle
6904 * (the integer value) may vary.
6905 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
6906 * ted matrix A, M_A >= 0.
6907 * N_A (global) DESCA( N_ ) The number of columns in the distri-
6908 * buted matrix A, N_A >= 0.
6909 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
6910 * block of the matrix A, IMB_A > 0.
6911 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
6912 * left block of the matrix A,
6913 * INB_A > 0.
6914 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
6915 * bute the last M_A-IMB_A rows of A,
6916 * MB_A > 0.
6917 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
6918 * bute the last N_A-INB_A columns of
6919 * A, NB_A > 0.
6920 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
6921 * row of the matrix A is distributed,
6922 * NPROW > RSRC_A >= 0.
6923 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
6924 * first column of A is distributed.
6925 * NPCOL > CSRC_A >= 0.
6926 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
6927 * array storing the local blocks of
6928 * the distributed matrix A,
6929 * IF( Lc( 1, N_A ) > 0 )
6930 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
6931 * ELSE
6932 * LLD_A >= 1.
6933 *
6934 * Let K be the number of rows of a matrix A starting at the global in-
6935 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
6936 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
6937 * receive if these K rows were distributed over NPROW processes. If K
6938 * is the number of columns of a matrix A starting at the global index
6939 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
6940 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
6941 * these K columns were distributed over NPCOL processes.
6942 *
6943 * The values of Lr() and Lc() may be determined via a call to the func-
6944 * tion PB_NUMROC:
6945 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
6946 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
6947 *
6948 * Arguments
6949 * =========
6950 *
6951 * UPLO (global input) CHARACTER*1
6952 * On entry, UPLO specifies the part of the submatrix sub( A )
6953 * to be set:
6954 * = 'L' or 'l': Lower triangular part is set; the strictly
6955 * upper triangular part of sub( A ) is not changed;
6956 * = 'U' or 'u': Upper triangular part is set; the strictly
6957 * lower triangular part of sub( A ) is not changed;
6958 * Otherwise: All of the matrix sub( A ) is set.
6959 *
6960 * M (global input) INTEGER
6961 * On entry, M specifies the number of rows of the submatrix
6962 * sub( A ). M must be at least zero.
6963 *
6964 * N (global input) INTEGER
6965 * On entry, N specifies the number of columns of the submatrix
6966 * sub( A ). N must be at least zero.
6967 *
6968 * ALPHA (global input) DOUBLE PRECISION
6969 * On entry, ALPHA specifies the scalar alpha, i.e., the cons-
6970 * tant to which the offdiagonal elements are to be set.
6971 *
6972 * BETA (global input) DOUBLE PRECISION
6973 * On entry, BETA specifies the scalar beta, i.e., the constant
6974 * to which the diagonal elements are to be set.
6975 *
6976 * A (local input/local output) DOUBLE PRECISION array
6977 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
6978 * at least Lc( 1, JA+N-1 ). Before entry, this array contains
6979 * the local entries of the matrix A to be set. On exit, the
6980 * leading m by n submatrix sub( A ) is set as follows:
6981 *
6982 * if UPLO = 'U', A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=j-1, 1<=j<=N,
6983 * if UPLO = 'L', A(IA+i-1,JA+j-1) = ALPHA, j+1<=i<=M, 1<=j<=N,
6984 * otherwise, A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=M, 1<=j<=N,
6985 * and IA+i.NE.JA+j,
6986 * and, for all UPLO, A(IA+i-1,JA+i-1) = BETA, 1<=i<=min(M,N).
6987 *
6988 * IA (global input) INTEGER
6989 * On entry, IA specifies A's global row index, which points to
6990 * the beginning of the submatrix sub( A ).
6991 *
6992 * JA (global input) INTEGER
6993 * On entry, JA specifies A's global column index, which points
6994 * to the beginning of the submatrix sub( A ).
6995 *
6996 * DESCA (global and local input) INTEGER array
6997 * On entry, DESCA is an integer array of dimension DLEN_. This
6998 * is the array descriptor for the matrix A.
6999 *
7000 * -- Written on April 1, 1998 by
7001 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
7002 *
7003 * =====================================================================
7004 *
7005 * .. Parameters ..
7006  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
7007  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
7008  $ RSRC_
7009  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
7010  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
7011  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
7012  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
7013 * ..
7014 * .. Local Scalars ..
7015  LOGICAL GODOWN, GOLEFT, ISCOLREP, ISROWREP, LOWER,
7016  $ UPPER
7017  INTEGER IACOL, IAROW, ICTXT, IIA, IIMAX, ILOW, IMB1,
7018  $ IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP, JJA,
7019  $ JJMAX, JOFFA, JOFFD, LCMT, LCMT00, LDA, LMBLOC,
7020  $ LNBLOC, LOW, M1, MB, MBLKD, MBLKS, MBLOC, MP,
7021  $ MRCOL, MRROW, MYCOL, MYROW, N1, NB, NBLKD,
7022  $ NBLKS, NBLOC, NPCOL, NPROW, NQ, PMB, QNB, TMP1,
7023  $ UPP
7024 * ..
7025 * .. Local Arrays ..
7026  INTEGER DESCA2( DLEN_ )
7027 * ..
7028 * .. External Subroutines ..
7029  EXTERNAL blacs_gridinfo, pb_ainfog2l, pb_binfo,
7031 * ..
7032 * .. External Functions ..
7033  LOGICAL LSAME
7034  EXTERNAL lsame
7035 * ..
7036 * .. Intrinsic Functions ..
7037  INTRINSIC min
7038 * ..
7039 * .. Executable Statements ..
7040 *
7041  IF( m.EQ.0 .OR. n.EQ.0 )
7042  $ RETURN
7043 *
7044 * Convert descriptor
7045 *
7046  CALL pb_desctrans( desca, desca2 )
7047 *
7048 * Get grid parameters
7049 *
7050  ictxt = desca2( ctxt_ )
7051  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
7052 *
7053  CALL pb_ainfog2l( m, n, ia, ja, desca2, nprow, npcol, myrow,
7054  $ mycol, imb1, inb1, mp, nq, iia, jja, iarow,
7055  $ iacol, mrrow, mrcol )
7056 *
7057  IF( mp.LE.0 .OR. nq.LE.0 )
7058  $ RETURN
7059 *
7060  isrowrep = ( desca2( rsrc_ ).LT.0 )
7061  iscolrep = ( desca2( csrc_ ).LT.0 )
7062  lda = desca2( lld_ )
7063 *
7064  upper = .NOT.( lsame( uplo, 'L' ) )
7065  lower = .NOT.( lsame( uplo, 'U' ) )
7066 *
7067  IF( ( ( lower.AND.upper ).AND.( alpha.EQ.beta ) ).OR.
7068  $ ( isrowrep .AND. iscolrep ) ) THEN
7069  IF( ( mp.GT.0 ).AND.( nq.GT.0 ) )
7070  $ CALL pb_dlaset( uplo, mp, nq, 0, alpha, beta,
7071  $ a( iia + ( jja - 1 ) * lda ), lda )
7072  RETURN
7073  END IF
7074 *
7075 * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC,
7076 * ILOW, LOW, IUPP, and UPP.
7077 *
7078  mb = desca2( mb_ )
7079  nb = desca2( nb_ )
7080  CALL pb_binfo( 0, mp, nq, imb1, inb1, mb, nb, mrrow, mrcol,
7081  $ lcmt00, mblks, nblks, imbloc, inbloc, lmbloc,
7082  $ lnbloc, ilow, low, iupp, upp )
7083 *
7084  ioffa = iia - 1
7085  joffa = jja - 1
7086  iimax = ioffa + mp
7087  jjmax = joffa + nq
7088 *
7089  IF( isrowrep ) THEN
7090  pmb = mb
7091  ELSE
7092  pmb = nprow * mb
7093  END IF
7094  IF( iscolrep ) THEN
7095  qnb = nb
7096  ELSE
7097  qnb = npcol * nb
7098  END IF
7099 *
7100  m1 = mp
7101  n1 = nq
7102 *
7103 * Handle the first block of rows or columns separately, and update
7104 * LCMT00, MBLKS and NBLKS.
7105 *
7106  godown = ( lcmt00.GT.iupp )
7107  goleft = ( lcmt00.LT.ilow )
7108 *
7109  IF( .NOT.godown .AND. .NOT.goleft ) THEN
7110 *
7111 * LCMT00 >= ILOW && LCMT00 <= IUPP
7112 *
7113  goleft = ( ( lcmt00 - ( iupp - upp + pmb ) ).LT.ilow )
7114  godown = .NOT.goleft
7115 *
7116  CALL pb_dlaset( uplo, imbloc, inbloc, lcmt00, alpha, beta,
7117  $ a( iia+joffa*lda ), lda )
7118  IF( godown ) THEN
7119  IF( upper .AND. nq.GT.inbloc )
7120  $ CALL pb_dlaset( 'All', imbloc, nq-inbloc, 0, alpha,
7121  $ alpha, a( iia+(joffa+inbloc)*lda ), lda )
7122  iia = iia + imbloc
7123  m1 = m1 - imbloc
7124  ELSE
7125  IF( lower .AND. mp.GT.imbloc )
7126  $ CALL pb_dlaset( 'All', mp-imbloc, inbloc, 0, alpha,
7127  $ alpha, a( iia+imbloc+joffa*lda ), lda )
7128  jja = jja + inbloc
7129  n1 = n1 - inbloc
7130  END IF
7131 *
7132  END IF
7133 *
7134  IF( godown ) THEN
7135 *
7136  lcmt00 = lcmt00 - ( iupp - upp + pmb )
7137  mblks = mblks - 1
7138  ioffa = ioffa + imbloc
7139 *
7140  10 CONTINUE
7141  IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
7142  lcmt00 = lcmt00 - pmb
7143  mblks = mblks - 1
7144  ioffa = ioffa + mb
7145  GO TO 10
7146  END IF
7147 *
7148  tmp1 = min( ioffa, iimax ) - iia + 1
7149  IF( upper .AND. tmp1.GT.0 ) THEN
7150  CALL pb_dlaset( 'All', tmp1, n1, 0, alpha, alpha,
7151  $ a( iia+joffa*lda ), lda )
7152  iia = iia + tmp1
7153  m1 = m1 - tmp1
7154  END IF
7155 *
7156  IF( mblks.LE.0 )
7157  $ RETURN
7158 *
7159  lcmt = lcmt00
7160  mblkd = mblks
7161  ioffd = ioffa
7162 *
7163  mbloc = mb
7164  20 CONTINUE
7165  IF( mblkd.GT.0 .AND. lcmt.GE.ilow ) THEN
7166  IF( mblkd.EQ.1 )
7167  $ mbloc = lmbloc
7168  CALL pb_dlaset( uplo, mbloc, inbloc, lcmt, alpha, beta,
7169  $ a( ioffd+1+joffa*lda ), lda )
7170  lcmt00 = lcmt
7171  lcmt = lcmt - pmb
7172  mblks = mblkd
7173  mblkd = mblkd - 1
7174  ioffa = ioffd
7175  ioffd = ioffd + mbloc
7176  GO TO 20
7177  END IF
7178 *
7179  tmp1 = m1 - ioffd + iia - 1
7180  IF( lower .AND. tmp1.GT.0 )
7181  $ CALL pb_dlaset( 'ALL', tmp1, inbloc, 0, alpha, alpha,
7182  $ a( ioffd+1+joffa*lda ), lda )
7183 *
7184  tmp1 = ioffa - iia + 1
7185  m1 = m1 - tmp1
7186  n1 = n1 - inbloc
7187  lcmt00 = lcmt00 + low - ilow + qnb
7188  nblks = nblks - 1
7189  joffa = joffa + inbloc
7190 *
7191  IF( upper .AND. tmp1.GT.0 .AND. n1.GT.0 )
7192  $ CALL pb_dlaset( 'ALL', tmp1, n1, 0, alpha, alpha,
7193  $ a( iia+joffa*lda ), lda )
7194 *
7195  iia = ioffa + 1
7196  jja = joffa + 1
7197 *
7198  ELSE IF( goleft ) THEN
7199 *
7200  lcmt00 = lcmt00 + low - ilow + qnb
7201  nblks = nblks - 1
7202  joffa = joffa + inbloc
7203 *
7204  30 CONTINUE
7205  IF( nblks.GT.0 .AND. lcmt00.LT.low ) THEN
7206  lcmt00 = lcmt00 + qnb
7207  nblks = nblks - 1
7208  joffa = joffa + nb
7209  GO TO 30
7210  END IF
7211 *
7212  tmp1 = min( joffa, jjmax ) - jja + 1
7213  IF( lower .AND. tmp1.GT.0 ) THEN
7214  CALL pb_dlaset( 'All', m1, tmp1, 0, alpha, alpha,
7215  $ a( iia+(jja-1)*lda ), lda )
7216  jja = jja + tmp1
7217  n1 = n1 - tmp1
7218  END IF
7219 *
7220  IF( nblks.LE.0 )
7221  $ RETURN
7222 *
7223  lcmt = lcmt00
7224  nblkd = nblks
7225  joffd = joffa
7226 *
7227  nbloc = nb
7228  40 CONTINUE
7229  IF( nblkd.GT.0 .AND. lcmt.LE.iupp ) THEN
7230  IF( nblkd.EQ.1 )
7231  $ nbloc = lnbloc
7232  CALL pb_dlaset( uplo, imbloc, nbloc, lcmt, alpha, beta,
7233  $ a( iia+joffd*lda ), lda )
7234  lcmt00 = lcmt
7235  lcmt = lcmt + qnb
7236  nblks = nblkd
7237  nblkd = nblkd - 1
7238  joffa = joffd
7239  joffd = joffd + nbloc
7240  GO TO 40
7241  END IF
7242 *
7243  tmp1 = n1 - joffd + jja - 1
7244  IF( upper .AND. tmp1.GT.0 )
7245  $ CALL pb_dlaset( 'All', imbloc, tmp1, 0, alpha, alpha,
7246  $ a( iia+joffd*lda ), lda )
7247 *
7248  tmp1 = joffa - jja + 1
7249  m1 = m1 - imbloc
7250  n1 = n1 - tmp1
7251  lcmt00 = lcmt00 - ( iupp - upp + pmb )
7252  mblks = mblks - 1
7253  ioffa = ioffa + imbloc
7254 *
7255  IF( lower .AND. m1.GT.0 .AND. tmp1.GT.0 )
7256  $ CALL pb_dlaset( 'All', m1, tmp1, 0, alpha, alpha,
7257  $ a( ioffa+1+(jja-1)*lda ), lda )
7258 *
7259  iia = ioffa + 1
7260  jja = joffa + 1
7261 *
7262  END IF
7263 *
7264  nbloc = nb
7265  50 CONTINUE
7266  IF( nblks.GT.0 ) THEN
7267  IF( nblks.EQ.1 )
7268  $ nbloc = lnbloc
7269  60 CONTINUE
7270  IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
7271  lcmt00 = lcmt00 - pmb
7272  mblks = mblks - 1
7273  ioffa = ioffa + mb
7274  GO TO 60
7275  END IF
7276 *
7277  tmp1 = min( ioffa, iimax ) - iia + 1
7278  IF( upper .AND. tmp1.GT.0 ) THEN
7279  CALL pb_dlaset( 'All', tmp1, n1, 0, alpha, alpha,
7280  $ a( iia+joffa*lda ), lda )
7281  iia = iia + tmp1
7282  m1 = m1 - tmp1
7283  END IF
7284 *
7285  IF( mblks.LE.0 )
7286  $ RETURN
7287 *
7288  lcmt = lcmt00
7289  mblkd = mblks
7290  ioffd = ioffa
7291 *
7292  mbloc = mb
7293  70 CONTINUE
7294  IF( mblkd.GT.0 .AND. lcmt.GE.low ) THEN
7295  IF( mblkd.EQ.1 )
7296  $ mbloc = lmbloc
7297  CALL pb_dlaset( uplo, mbloc, nbloc, lcmt, alpha, beta,
7298  $ a( ioffd+1+joffa*lda ), lda )
7299  lcmt00 = lcmt
7300  lcmt = lcmt - pmb
7301  mblks = mblkd
7302  mblkd = mblkd - 1
7303  ioffa = ioffd
7304  ioffd = ioffd + mbloc
7305  GO TO 70
7306  END IF
7307 *
7308  tmp1 = m1 - ioffd + iia - 1
7309  IF( lower .AND. tmp1.GT.0 )
7310  $ CALL pb_dlaset( 'All', tmp1, nbloc, 0, alpha, alpha,
7311  $ a( ioffd+1+joffa*lda ), lda )
7312 *
7313  tmp1 = min( ioffa, iimax ) - iia + 1
7314  m1 = m1 - tmp1
7315  n1 = n1 - nbloc
7316  lcmt00 = lcmt00 + qnb
7317  nblks = nblks - 1
7318  joffa = joffa + nbloc
7319 *
7320  IF( upper .AND. tmp1.GT.0 .AND. n1.GT.0 )
7321  $ CALL pb_dlaset( 'All', tmp1, n1, 0, alpha, alpha,
7322  $ a( iia+joffa*lda ), lda )
7323 *
7324  iia = ioffa + 1
7325  jja = joffa + 1
7326 *
7327  GO TO 50
7328 *
7329  END IF
7330 *
7331  RETURN
7332 *
7333 * End of PDLASET
7334 *
7335  END
7336  SUBROUTINE pdlascal( TYPE, M, N, ALPHA, A, IA, JA, DESCA )
7338 * -- PBLAS test routine (version 2.0) --
7339 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7340 * and University of California, Berkeley.
7341 * April 1, 1998
7342 *
7343 * .. Scalar Arguments ..
7344  CHARACTER*1 TYPE
7345  INTEGER IA, JA, M, N
7346  DOUBLE PRECISION ALPHA
7347 * ..
7348 * .. Array Arguments ..
7349  INTEGER DESCA( * )
7350  DOUBLE PRECISION A( * )
7351 * ..
7352 *
7353 * Purpose
7354 * =======
7355 *
7356 * PDLASCAL scales the m by n submatrix A(IA:IA+M-1,JA:JA+N-1) denoted
7357 * by sub( A ) by the scalar alpha. TYPE specifies if sub( A ) is full,
7358 * upper triangular, lower triangular or upper Hessenberg.
7359 *
7360 * Notes
7361 * =====
7362 *
7363 * A description vector is associated with each 2D block-cyclicly dis-
7364 * tributed matrix. This vector stores the information required to
7365 * establish the mapping between a matrix entry and its corresponding
7366 * process and memory location.
7367 *
7368 * In the following comments, the character _ should be read as
7369 * "of the distributed matrix". Let A be a generic term for any 2D
7370 * block cyclicly distributed matrix. Its description vector is DESCA:
7371 *
7372 * NOTATION STORED IN EXPLANATION
7373 * ---------------- --------------- ------------------------------------
7374 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
7375 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
7376 * the NPROW x NPCOL BLACS process grid
7377 * A is distributed over. The context
7378 * itself is global, but the handle
7379 * (the integer value) may vary.
7380 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
7381 * ted matrix A, M_A >= 0.
7382 * N_A (global) DESCA( N_ ) The number of columns in the distri-
7383 * buted matrix A, N_A >= 0.
7384 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
7385 * block of the matrix A, IMB_A > 0.
7386 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
7387 * left block of the matrix A,
7388 * INB_A > 0.
7389 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
7390 * bute the last M_A-IMB_A rows of A,
7391 * MB_A > 0.
7392 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
7393 * bute the last N_A-INB_A columns of
7394 * A, NB_A > 0.
7395 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
7396 * row of the matrix A is distributed,
7397 * NPROW > RSRC_A >= 0.
7398 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
7399 * first column of A is distributed.
7400 * NPCOL > CSRC_A >= 0.
7401 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
7402 * array storing the local blocks of
7403 * the distributed matrix A,
7404 * IF( Lc( 1, N_A ) > 0 )
7405 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
7406 * ELSE
7407 * LLD_A >= 1.
7408 *
7409 * Let K be the number of rows of a matrix A starting at the global in-
7410 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
7411 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
7412 * receive if these K rows were distributed over NPROW processes. If K
7413 * is the number of columns of a matrix A starting at the global index
7414 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
7415 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
7416 * these K columns were distributed over NPCOL processes.
7417 *
7418 * The values of Lr() and Lc() may be determined via a call to the func-
7419 * tion PB_NUMROC:
7420 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
7421 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
7422 *
7423 * Arguments
7424 * =========
7425 *
7426 * TYPE (global input) CHARACTER*1
7427 * On entry, TYPE specifies the type of the input submatrix as
7428 * follows:
7429 * = 'L' or 'l': sub( A ) is a lower triangular matrix,
7430 * = 'U' or 'u': sub( A ) is an upper triangular matrix,
7431 * = 'H' or 'h': sub( A ) is an upper Hessenberg matrix,
7432 * otherwise sub( A ) is a full matrix.
7433 *
7434 * M (global input) INTEGER
7435 * On entry, M specifies the number of rows of the submatrix
7436 * sub( A ). M must be at least zero.
7437 *
7438 * N (global input) INTEGER
7439 * On entry, N specifies the number of columns of the submatrix
7440 * sub( A ). N must be at least zero.
7441 *
7442 * ALPHA (global input) DOUBLE PRECISION
7443 * On entry, ALPHA specifies the scalar alpha.
7444 *
7445 * A (local input/local output) DOUBLE PRECISION array
7446 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
7447 * at least Lc( 1, JA+N-1 ). Before entry, this array contains
7448 * the local entries of the matrix A.
7449 * On exit, the local entries of this array corresponding to the
7450 * to the entries of the submatrix sub( A ) are overwritten by
7451 * the local entries of the m by n scaled submatrix.
7452 *
7453 * IA (global input) INTEGER
7454 * On entry, IA specifies A's global row index, which points to
7455 * the beginning of the submatrix sub( A ).
7456 *
7457 * JA (global input) INTEGER
7458 * On entry, JA specifies A's global column index, which points
7459 * to the beginning of the submatrix sub( A ).
7460 *
7461 * DESCA (global and local input) INTEGER array
7462 * On entry, DESCA is an integer array of dimension DLEN_. This
7463 * is the array descriptor for the matrix A.
7464 *
7465 * -- Written on April 1, 1998 by
7466 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
7467 *
7468 * =====================================================================
7469 *
7470 * .. Parameters ..
7471  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
7472  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
7473  $ RSRC_
7474  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
7475  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
7476  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
7477  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
7478 * ..
7479 * .. Local Scalars ..
7480  CHARACTER*1 UPLO
7481  LOGICAL GODOWN, GOLEFT, LOWER, UPPER
7482  INTEGER IACOL, IAROW, ICTXT, IIA, IIMAX, ILOW, IMB1,
7483  $ IMBLOC, INB1, INBLOC, IOFFA, IOFFD, ITYPE,
7484  $ IUPP, JJA, JJMAX, JOFFA, JOFFD, LCMT, LCMT00,
7485  $ LDA, LMBLOC, LNBLOC, LOW, M1, MB, MBLKD, MBLKS,
7486  $ MBLOC, MP, MRCOL, MRROW, MYCOL, MYROW, N1, NB,
7487  $ NBLKD, NBLKS, NBLOC, NPCOL, NPROW, NQ, PMB,
7488  $ QNB, TMP1, UPP
7489 * ..
7490 * .. Local Arrays ..
7491  INTEGER DESCA2( DLEN_ )
7492 * ..
7493 * .. External Subroutines ..
7494  EXTERNAL blacs_gridinfo, pb_ainfog2l, pb_binfo,
7496 * ..
7497 * .. External Functions ..
7498  LOGICAL LSAME
7499  INTEGER PB_NUMROC
7500  EXTERNAL lsame, pb_numroc
7501 * ..
7502 * .. Intrinsic Functions ..
7503  INTRINSIC min
7504 * ..
7505 * .. Executable Statements ..
7506 *
7507 * Convert descriptor
7508 *
7509  CALL pb_desctrans( desca, desca2 )
7510 *
7511 * Get grid parameters
7512 *
7513  ictxt = desca2( ctxt_ )
7514  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
7515 *
7516 * Quick return if possible
7517 *
7518  IF( m.EQ.0 .OR. n.EQ.0 )
7519  $ RETURN
7520 *
7521  IF( lsame( TYPE, 'L' ) ) then
7522  itype = 1
7523  uplo = TYPE
7524  upper = .false.
7525  lower = .true.
7526  ioffd = 0
7527  ELSE IF( lsame( TYPE, 'U' ) ) then
7528  itype = 2
7529  uplo = TYPE
7530  upper = .true.
7531  lower = .false.
7532  ioffd = 0
7533  ELSE IF( lsame( TYPE, 'H' ) ) then
7534  itype = 3
7535  uplo = 'U'
7536  upper = .true.
7537  lower = .false.
7538  ioffd = 1
7539  ELSE
7540  itype = 0
7541  uplo = 'A'
7542  upper = .true.
7543  lower = .true.
7544  ioffd = 0
7545  END IF
7546 *
7547 * Compute local indexes
7548 *
7549  IF( itype.EQ.0 ) THEN
7550 *
7551 * Full matrix
7552 *
7553  CALL pb_infog2l( ia, ja, desca2, nprow, npcol, myrow, mycol,
7554  $ iia, jja, iarow, iacol )
7555  mp = pb_numroc( m, ia, desca2( imb_ ), desca2( mb_ ), myrow,
7556  $ desca2( rsrc_ ), nprow )
7557  nq = pb_numroc( n, ja, desca2( inb_ ), desca2( nb_ ), mycol,
7558  $ desca2( csrc_ ), npcol )
7559 *
7560  IF( mp.LE.0 .OR. nq.LE.0 )
7561  $ RETURN
7562 *
7563  lda = desca2( lld_ )
7564  ioffa = iia + ( jja - 1 ) * lda
7565 *
7566  CALL pb_dlascal( 'All', mp, nq, 0, alpha, a( ioffa ), lda )
7567 *
7568  ELSE
7569 *
7570 * Trapezoidal matrix
7571 *
7572  CALL pb_ainfog2l( m, n, ia, ja, desca2, nprow, npcol, myrow,
7573  $ mycol, imb1, inb1, mp, nq, iia, jja, iarow,
7574  $ iacol, mrrow, mrcol )
7575 *
7576  IF( mp.LE.0 .OR. nq.LE.0 )
7577  $ RETURN
7578 *
7579 * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC,
7580 * LNBLOC, ILOW, LOW, IUPP, and UPP.
7581 *
7582  mb = desca2( mb_ )
7583  nb = desca2( nb_ )
7584  lda = desca2( lld_ )
7585 *
7586  CALL pb_binfo( ioffd, mp, nq, imb1, inb1, mb, nb, mrrow,
7587  $ mrcol, lcmt00, mblks, nblks, imbloc, inbloc,
7588  $ lmbloc, lnbloc, ilow, low, iupp, upp )
7589 *
7590  m1 = mp
7591  n1 = nq
7592  ioffa = iia - 1
7593  joffa = jja - 1
7594  iimax = ioffa + mp
7595  jjmax = joffa + nq
7596 *
7597  IF( desca2( rsrc_ ).LT.0 ) THEN
7598  pmb = mb
7599  ELSE
7600  pmb = nprow * mb
7601  END IF
7602  IF( desca2( csrc_ ).LT.0 ) THEN
7603  qnb = nb
7604  ELSE
7605  qnb = npcol * nb
7606  END IF
7607 *
7608 * Handle the first block of rows or columns separately, and
7609 * update LCMT00, MBLKS and NBLKS.
7610 *
7611  godown = ( lcmt00.GT.iupp )
7612  goleft = ( lcmt00.LT.ilow )
7613 *
7614  IF( .NOT.godown .AND. .NOT.goleft ) THEN
7615 *
7616 * LCMT00 >= ILOW && LCMT00 <= IUPP
7617 *
7618  goleft = ( ( lcmt00 - ( iupp - upp + pmb ) ).LT.ilow )
7619  godown = .NOT.goleft
7620 *
7621  CALL pb_dlascal( uplo, imbloc, inbloc, lcmt00, alpha,
7622  $ a( iia+joffa*lda ), lda )
7623  IF( godown ) THEN
7624  IF( upper .AND. nq.GT.inbloc )
7625  $ CALL pb_dlascal( 'All', imbloc, nq-inbloc, 0, alpha,
7626  $ a( iia+(joffa+inbloc)*lda ), lda )
7627  iia = iia + imbloc
7628  m1 = m1 - imbloc
7629  ELSE
7630  IF( lower .AND. mp.GT.imbloc )
7631  $ CALL pb_dlascal( 'All', mp-imbloc, inbloc, 0, alpha,
7632  $ a( iia+imbloc+joffa*lda ), lda )
7633  jja = jja + inbloc
7634  n1 = n1 - inbloc
7635  END IF
7636 *
7637  END IF
7638 *
7639  IF( godown ) THEN
7640 *
7641  lcmt00 = lcmt00 - ( iupp - upp + pmb )
7642  mblks = mblks - 1
7643  ioffa = ioffa + imbloc
7644 *
7645  10 CONTINUE
7646  IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
7647  lcmt00 = lcmt00 - pmb
7648  mblks = mblks - 1
7649  ioffa = ioffa + mb
7650  GO TO 10
7651  END IF
7652 *
7653  tmp1 = min( ioffa, iimax ) - iia + 1
7654  IF( upper .AND. tmp1.GT.0 ) THEN
7655  CALL pb_dlascal( 'All', tmp1, n1, 0, alpha,
7656  $ a( iia+joffa*lda ), lda )
7657  iia = iia + tmp1
7658  m1 = m1 - tmp1
7659  END IF
7660 *
7661  IF( mblks.LE.0 )
7662  $ RETURN
7663 *
7664  lcmt = lcmt00
7665  mblkd = mblks
7666  ioffd = ioffa
7667 *
7668  mbloc = mb
7669  20 CONTINUE
7670  IF( mblkd.GT.0 .AND. lcmt.GE.ilow ) THEN
7671  IF( mblkd.EQ.1 )
7672  $ mbloc = lmbloc
7673  CALL pb_dlascal( uplo, mbloc, inbloc, lcmt, alpha,
7674  $ a( ioffd+1+joffa*lda ), lda )
7675  lcmt00 = lcmt
7676  lcmt = lcmt - pmb
7677  mblks = mblkd
7678  mblkd = mblkd - 1
7679  ioffa = ioffd
7680  ioffd = ioffd + mbloc
7681  GO TO 20
7682  END IF
7683 *
7684  tmp1 = m1 - ioffd + iia - 1
7685  IF( lower .AND. tmp1.GT.0 )
7686  $ CALL pb_dlascal( 'All', tmp1, inbloc, 0, alpha,
7687  $ a( ioffd+1+joffa*lda ), lda )
7688 *
7689  tmp1 = ioffa - iia + 1
7690  m1 = m1 - tmp1
7691  n1 = n1 - inbloc
7692  lcmt00 = lcmt00 + low - ilow + qnb
7693  nblks = nblks - 1
7694  joffa = joffa + inbloc
7695 *
7696  IF( upper .AND. tmp1.GT.0 .AND. n1.GT.0 )
7697  $ CALL pb_dlascal( 'All', tmp1, n1, 0, alpha,
7698  $ a( iia+joffa*lda ), lda )
7699 *
7700  iia = ioffa + 1
7701  jja = joffa + 1
7702 *
7703  ELSE IF( goleft ) THEN
7704 *
7705  lcmt00 = lcmt00 + low - ilow + qnb
7706  nblks = nblks - 1
7707  joffa = joffa + inbloc
7708 *
7709  30 CONTINUE
7710  IF( nblks.GT.0 .AND. lcmt00.LT.low ) THEN
7711  lcmt00 = lcmt00 + qnb
7712  nblks = nblks - 1
7713  joffa = joffa + nb
7714  GO TO 30
7715  END IF
7716 *
7717  tmp1 = min( joffa, jjmax ) - jja + 1
7718  IF( lower .AND. tmp1.GT.0 ) THEN
7719  CALL pb_dlascal( 'All', m1, tmp1, 0, alpha,
7720  $ a( iia+(jja-1)*lda ), lda )
7721  jja = jja + tmp1
7722  n1 = n1 - tmp1
7723  END IF
7724 *
7725  IF( nblks.LE.0 )
7726  $ RETURN
7727 *
7728  lcmt = lcmt00
7729  nblkd = nblks
7730  joffd = joffa
7731 *
7732  nbloc = nb
7733  40 CONTINUE
7734  IF( nblkd.GT.0 .AND. lcmt.LE.iupp ) THEN
7735  IF( nblkd.EQ.1 )
7736  $ nbloc = lnbloc
7737  CALL pb_dlascal( uplo, imbloc, nbloc, lcmt, alpha,
7738  $ a( iia+joffd*lda ), lda )
7739  lcmt00 = lcmt
7740  lcmt = lcmt + qnb
7741  nblks = nblkd
7742  nblkd = nblkd - 1
7743  joffa = joffd
7744  joffd = joffd + nbloc
7745  GO TO 40
7746  END IF
7747 *
7748  tmp1 = n1 - joffd + jja - 1
7749  IF( upper .AND. tmp1.GT.0 )
7750  $ CALL pb_dlascal( 'All', imbloc, tmp1, 0, alpha,
7751  $ a( iia+joffd*lda ), lda )
7752 *
7753  tmp1 = joffa - jja + 1
7754  m1 = m1 - imbloc
7755  n1 = n1 - tmp1
7756  lcmt00 = lcmt00 - ( iupp - upp + pmb )
7757  mblks = mblks - 1
7758  ioffa = ioffa + imbloc
7759 *
7760  IF( lower .AND. m1.GT.0 .AND. tmp1.GT.0 )
7761  $ CALL pb_dlascal( 'All', m1, tmp1, 0, alpha,
7762  $ a( ioffa+1+(jja-1)*lda ), lda )
7763 *
7764  iia = ioffa + 1
7765  jja = joffa + 1
7766 *
7767  END IF
7768 *
7769  nbloc = nb
7770  50 CONTINUE
7771  IF( nblks.GT.0 ) THEN
7772  IF( nblks.EQ.1 )
7773  $ nbloc = lnbloc
7774  60 CONTINUE
7775  IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
7776  lcmt00 = lcmt00 - pmb
7777  mblks = mblks - 1
7778  ioffa = ioffa + mb
7779  GO TO 60
7780  END IF
7781 *
7782  tmp1 = min( ioffa, iimax ) - iia + 1
7783  IF( upper .AND. tmp1.GT.0 ) THEN
7784  CALL pb_dlascal( 'All', tmp1, n1, 0, alpha,
7785  $ a( iia+joffa*lda ), lda )
7786  iia = iia + tmp1
7787  m1 = m1 - tmp1
7788  END IF
7789 *
7790  IF( mblks.LE.0 )
7791  $ RETURN
7792 *
7793  lcmt = lcmt00
7794  mblkd = mblks
7795  ioffd = ioffa
7796 *
7797  mbloc = mb
7798  70 CONTINUE
7799  IF( mblkd.GT.0 .AND. lcmt.GE.low ) THEN
7800  IF( mblkd.EQ.1 )
7801  $ mbloc = lmbloc
7802  CALL pb_dlascal( uplo, mbloc, nbloc, lcmt, alpha,
7803  $ a( ioffd+1+joffa*lda ), lda )
7804  lcmt00 = lcmt
7805  lcmt = lcmt - pmb
7806  mblks = mblkd
7807  mblkd = mblkd - 1
7808  ioffa = ioffd
7809  ioffd = ioffd + mbloc
7810  GO TO 70
7811  END IF
7812 *
7813  tmp1 = m1 - ioffd + iia - 1
7814  IF( lower .AND. tmp1.GT.0 )
7815  $ CALL pb_dlascal( 'All', tmp1, nbloc, 0, alpha,
7816  $ a( ioffd+1+joffa*lda ), lda )
7817 *
7818  tmp1 = min( ioffa, iimax ) - iia + 1
7819  m1 = m1 - tmp1
7820  n1 = n1 - nbloc
7821  lcmt00 = lcmt00 + qnb
7822  nblks = nblks - 1
7823  joffa = joffa + nbloc
7824 *
7825  IF( upper .AND. tmp1.GT.0 .AND. n1.GT.0 )
7826  $ CALL pb_dlascal( 'All', tmp1, n1, 0, alpha,
7827  $ a( iia+joffa*lda ), lda )
7828 *
7829  iia = ioffa + 1
7830  jja = joffa + 1
7831 *
7832  GO TO 50
7833 *
7834  END IF
7835 *
7836  END IF
7837 *
7838  RETURN
7839 *
7840 * End of PDLASCAL
7841 *
7842  END
7843  SUBROUTINE pdlagen( INPLACE, AFORM, DIAG, OFFA, M, N, IA, JA,
7844  $ DESCA, IASEED, A, LDA )
7846 * -- PBLAS test routine (version 2.0) --
7847 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7848 * and University of California, Berkeley.
7849 * April 1, 1998
7850 *
7851 * .. Scalar Arguments ..
7852  LOGICAL inplace
7853  CHARACTER*1 aform, diag
7854  INTEGER ia, iaseed, ja, lda, m, n, offa
7855 * ..
7856 * .. Array Arguments ..
7857  INTEGER desca( * )
7858  DOUBLE PRECISION A( LDA, * )
7859 * ..
7860 *
7861 * Purpose
7862 * =======
7863 *
7864 * PDLAGEN generates (or regenerates) a submatrix sub( A ) denoting
7865 * A(IA:IA+M-1,JA:JA+N-1).
7866 *
7867 * Notes
7868 * =====
7869 *
7870 * A description vector is associated with each 2D block-cyclicly dis-
7871 * tributed matrix. This vector stores the information required to
7872 * establish the mapping between a matrix entry and its corresponding
7873 * process and memory location.
7874 *
7875 * In the following comments, the character _ should be read as
7876 * "of the distributed matrix". Let A be a generic term for any 2D
7877 * block cyclicly distributed matrix. Its description vector is DESCA:
7878 *
7879 * NOTATION STORED IN EXPLANATION
7880 * ---------------- --------------- ------------------------------------
7881 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
7882 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
7883 * the NPROW x NPCOL BLACS process grid
7884 * A is distributed over. The context
7885 * itself is global, but the handle
7886 * (the integer value) may vary.
7887 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
7888 * ted matrix A, M_A >= 0.
7889 * N_A (global) DESCA( N_ ) The number of columns in the distri-
7890 * buted matrix A, N_A >= 0.
7891 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
7892 * block of the matrix A, IMB_A > 0.
7893 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
7894 * left block of the matrix A,
7895 * INB_A > 0.
7896 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
7897 * bute the last M_A-IMB_A rows of A,
7898 * MB_A > 0.
7899 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
7900 * bute the last N_A-INB_A columns of
7901 * A, NB_A > 0.
7902 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
7903 * row of the matrix A is distributed,
7904 * NPROW > RSRC_A >= 0.
7905 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
7906 * first column of A is distributed.
7907 * NPCOL > CSRC_A >= 0.
7908 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
7909 * array storing the local blocks of
7910 * the distributed matrix A,
7911 * IF( Lc( 1, N_A ) > 0 )
7912 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
7913 * ELSE
7914 * LLD_A >= 1.
7915 *
7916 * Let K be the number of rows of a matrix A starting at the global in-
7917 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
7918 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
7919 * receive if these K rows were distributed over NPROW processes. If K
7920 * is the number of columns of a matrix A starting at the global index
7921 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
7922 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
7923 * these K columns were distributed over NPCOL processes.
7924 *
7925 * The values of Lr() and Lc() may be determined via a call to the func-
7926 * tion PB_NUMROC:
7927 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
7928 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
7929 *
7930 * Arguments
7931 * =========
7932 *
7933 * INPLACE (global input) LOGICAL
7934 * On entry, INPLACE specifies if the matrix should be generated
7935 * in place or not. If INPLACE is .TRUE., the local random array
7936 * to be generated will start in memory at the local memory lo-
7937 * cation A( 1, 1 ), otherwise it will start at the local posi-
7938 * tion induced by IA and JA.
7939 *
7940 * AFORM (global input) CHARACTER*1
7941 * On entry, AFORM specifies the type of submatrix to be genera-
7942 * ted as follows:
7943 * AFORM = 'S', sub( A ) is a symmetric matrix,
7944 * AFORM = 'H', sub( A ) is a Hermitian matrix,
7945 * AFORM = 'T', sub( A ) is overrwritten with the transpose
7946 * of what would normally be generated,
7947 * AFORM = 'C', sub( A ) is overwritten with the conjugate
7948 * transpose of what would normally be genera-
7949 * ted.
7950 * AFORM = 'N', a random submatrix is generated.
7951 *
7952 * DIAG (global input) CHARACTER*1
7953 * On entry, DIAG specifies if the generated submatrix is diago-
7954 * nally dominant or not as follows:
7955 * DIAG = 'D' : sub( A ) is diagonally dominant,
7956 * DIAG = 'N' : sub( A ) is not diagonally dominant.
7957 *
7958 * OFFA (global input) INTEGER
7959 * On entry, OFFA specifies the offdiagonal of the underlying
7960 * matrix A(1:DESCA(M_),1:DESCA(N_)) of interest when the subma-
7961 * trix is symmetric, Hermitian or diagonally dominant. OFFA = 0
7962 * specifies the main diagonal, OFFA > 0 specifies a subdiago-
7963 * nal, and OFFA < 0 specifies a superdiagonal (see further de-
7964 * tails).
7965 *
7966 * M (global input) INTEGER
7967 * On entry, M specifies the global number of matrix rows of the
7968 * submatrix sub( A ) to be generated. M must be at least zero.
7969 *
7970 * N (global input) INTEGER
7971 * On entry, N specifies the global number of matrix columns of
7972 * the submatrix sub( A ) to be generated. N must be at least
7973 * zero.
7974 *
7975 * IA (global input) INTEGER
7976 * On entry, IA specifies A's global row index, which points to
7977 * the beginning of the submatrix sub( A ).
7978 *
7979 * JA (global input) INTEGER
7980 * On entry, JA specifies A's global column index, which points
7981 * to the beginning of the submatrix sub( A ).
7982 *
7983 * DESCA (global and local input) INTEGER array
7984 * On entry, DESCA is an integer array of dimension DLEN_. This
7985 * is the array descriptor for the matrix A.
7986 *
7987 * IASEED (global input) INTEGER
7988 * On entry, IASEED specifies the seed number to generate the
7989 * matrix A. IASEED must be at least zero.
7990 *
7991 * A (local output) DOUBLE PRECISION array
7992 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
7993 * at least Lc( 1, JA+N-1 ). On exit, this array contains the
7994 * local entries of the randomly generated submatrix sub( A ).
7995 *
7996 * LDA (local input) INTEGER
7997 * On entry, LDA specifies the local leading dimension of the
7998 * array A. When INPLACE is .FALSE., LDA is usually DESCA(LLD_).
7999 * This restriction is however not enforced, and this subroutine
8000 * requires only that LDA >= MAX( 1, Mp ) where
8001 *
8002 * Mp = PB_NUMROC( M, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ).
8003 *
8004 * PB_NUMROC is a ScaLAPACK tool function; MYROW, MYCOL, NPROW
8005 * and NPCOL can be determined by calling the BLACS subroutine
8006 * BLACS_GRIDINFO.
8007 *
8008 * Further Details
8009 * ===============
8010 *
8011 * OFFD is tied to the matrix described by DESCA, as opposed to the
8012 * piece that is currently (re)generated. This is a global information
8013 * independent from the distribution parameters. Below are examples of
8014 * the meaning of OFFD for a global 7 by 5 matrix:
8015 *
8016 * ---------------------------------------------------------------------
8017 * OFFD | 0 -1 -2 -3 -4 0 -1 -2 -3 -4 0 -1 -2 -3 -4
8018 * -------|-------------------------------------------------------------
8019 * | | OFFD=-1 | OFFD=0 OFFD=2
8020 * | V V
8021 * 0 | . d . . . -> d . . . . . . . . .
8022 * 1 | . . d . . . d . . . . . . . .
8023 * 2 | . . . d . . . d . . -> d . . . .
8024 * 3 | . . . . d . . . d . . d . . .
8025 * 4 | . . . . . . . . . d . . d . .
8026 * 5 | . . . . . . . . . . . . . d .
8027 * 6 | . . . . . . . . . . . . . . d
8028 * ---------------------------------------------------------------------
8029 *
8030 * -- Written on April 1, 1998 by
8031 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
8032 *
8033 * =====================================================================
8034 *
8035 * .. Parameters ..
8036  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
8037  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
8038  $ RSRC_
8039  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
8040  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
8041  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
8042  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
8043  INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN,
8044  $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB,
8045  $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW
8046  PARAMETER ( JMP_1 = 1, jmp_row = 2, jmp_col = 3,
8047  $ jmp_mb = 4, jmp_imbv = 5, jmp_npmb = 6,
8048  $ jmp_npimbloc = 7, jmp_nb = 8, jmp_inbv = 9,
8049  $ jmp_nqnb = 10, jmp_nqinbloc = 11,
8050  $ jmp_len = 11 )
8051 * ..
8052 * .. Local Scalars ..
8053  LOGICAL DIAGDO, SYMM, HERM, NOTRAN
8054  INTEGER CSRC, I, IACOL, IAROW, ICTXT, IIA, ILOCBLK,
8055  $ ILOCOFF, ILOW, IMB, IMB1, IMBLOC, IMBVIR, INB,
8056  $ inb1, inbloc, inbvir, info, ioffda, itmp, iupp,
8057  $ ivir, jja, jlocblk, jlocoff, jvir, lcmt00,
8058  $ lmbloc, lnbloc, low, maxmn, mb, mblks, mp,
8059  $ mrcol, mrrow, mycdist, mycol, myrdist, myrow,
8060  $ nb, nblks, npcol, nprow, nq, nvir, rsrc, upp
8061  DOUBLE PRECISION ALPHA
8062 * ..
8063 * .. Local Arrays ..
8064  INTEGER DESCA2( DLEN_ ), IMULADD( 4, JMP_LEN ),
8065  $ IRAN( 2 ), JMP( JMP_LEN ), MULADD0( 4 )
8066 * ..
8067 * .. External Subroutines ..
8068  EXTERNAL blacs_gridinfo, pb_ainfog2l, pb_binfo,
8072 * ..
8073 * .. External Functions ..
8074  LOGICAL LSAME
8075  EXTERNAL LSAME
8076 * ..
8077 * .. Intrinsic Functions ..
8078  INTRINSIC DBLE, MAX, MIN
8079 * ..
8080 * .. Data Statements ..
8081  DATA ( muladd0( i ), i = 1, 4 ) / 20077, 16838,
8082  $ 12345, 0 /
8083 * ..
8084 * .. Executable Statements ..
8085 *
8086 * Convert descriptor
8087 *
8088  CALL pb_desctrans( desca, desca2 )
8089 *
8090 * Test the input arguments
8091 *
8092  ictxt = desca2( ctxt_ )
8093  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
8094 *
8095 * Test the input parameters
8096 *
8097  info = 0
8098  IF( nprow.EQ.-1 ) THEN
8099  info = -( 1000 + ctxt_ )
8100  ELSE
8101  symm = lsame( aform, 'S' )
8102  herm = lsame( aform, 'H' )
8103  notran = lsame( aform, 'N' )
8104  diagdo = lsame( diag, 'D' )
8105  IF( .NOT.( symm.OR.herm.OR.notran ) .AND.
8106  $ .NOT.( lsame( aform, 'T' ) ) .AND.
8107  $ .NOT.( lsame( aform, 'C' ) ) ) THEN
8108  info = -2
8109  ELSE IF( ( .NOT.diagdo ) .AND.
8110  $ ( .NOT.lsame( diag, 'N' ) ) ) THEN
8111  info = -3
8112  END IF
8113  CALL pb_chkmat( ictxt, m, 5, n, 6, ia, ja, desca2, 10, info )
8114  END IF
8115 *
8116  IF( info.NE.0 ) THEN
8117  CALL pxerbla( ictxt, 'PDLAGEN', -info )
8118  RETURN
8119  END IF
8120 *
8121 * Quick return if possible
8122 *
8123  IF( ( m.LE.0 ).OR.( n.LE.0 ) )
8124  $ RETURN
8125 *
8126 * Start the operations
8127 *
8128  mb = desca2( mb_ )
8129  nb = desca2( nb_ )
8130  imb = desca2( imb_ )
8131  inb = desca2( inb_ )
8132  rsrc = desca2( rsrc_ )
8133  csrc = desca2( csrc_ )
8134 *
8135 * Figure out local information about the distributed matrix operand
8136 *
8137  CALL pb_ainfog2l( m, n, ia, ja, desca2, nprow, npcol, myrow,
8138  $ mycol, imb1, inb1, mp, nq, iia, jja, iarow,
8139  $ iacol, mrrow, mrcol )
8140 *
8141 * Decide where the entries shall be stored in memory
8142 *
8143  IF( inplace ) THEN
8144  iia = 1
8145  jja = 1
8146  END IF
8147 *
8148 * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC,
8149 * ILOW, LOW, IUPP, and UPP.
8150 *
8151  ioffda = ja + offa - ia
8152  CALL pb_binfo( ioffda, mp, nq, imb1, inb1, mb, nb, mrrow,
8153  $ mrcol, lcmt00, mblks, nblks, imbloc, inbloc,
8154  $ lmbloc, lnbloc, ilow, low, iupp, upp )
8155 *
8156 * Initialize ILOCBLK, ILOCOFF, MYRDIST, JLOCBLK, JLOCOFF, MYCDIST
8157 * This values correspond to the square virtual underlying matrix
8158 * of size MAX( M_ + MAX( 0, -OFFA ), N_ + MAX( 0, OFFA ) ) used
8159 * to set up the random sequence. For practical purposes, the size
8160 * of this virtual matrix is upper bounded by M_ + N_ - 1.
8161 *
8162  itmp = max( 0, -offa )
8163  ivir = ia + itmp
8164  imbvir = imb + itmp
8165  nvir = desca2( m_ ) + itmp
8166 *
8167  CALL pb_locinfo( ivir, imbvir, mb, myrow, rsrc, nprow, ilocblk,
8168  $ ilocoff, myrdist )
8169 *
8170  itmp = max( 0, offa )
8171  jvir = ja + itmp
8172  inbvir = inb + itmp
8173  nvir = max( max( nvir, desca2( n_ ) + itmp ),
8174  $ desca2( m_ ) + desca2( n_ ) - 1 )
8175 *
8176  CALL pb_locinfo( jvir, inbvir, nb, mycol, csrc, npcol, jlocblk,
8177  $ jlocoff, mycdist )
8178 *
8179  IF( symm .OR. herm .OR. notran ) THEN
8180 *
8181  CALL pb_initjmp( .true., nvir, imbvir, inbvir, imbloc, inbloc,
8182  $ mb, nb, rsrc, csrc, nprow, npcol, 1, jmp )
8183 *
8184 * Compute constants to jump JMP( * ) numbers in the sequence
8185 *
8186  CALL pb_initmuladd( muladd0, jmp, imuladd )
8187 *
8188 * Compute and set the random value corresponding to A( IA, JA )
8189 *
8190  CALL pb_setlocran( iaseed, ilocblk, jlocblk, ilocoff, jlocoff,
8191  $ myrdist, mycdist, nprow, npcol, jmp,
8192  $ imuladd, iran )
8193 *
8194  CALL pb_dlagen( 'Lower', aform, a( iia, jja ), lda, lcmt00,
8195  $ iran, mblks, imbloc, mb, lmbloc, nblks, inbloc,
8196  $ nb, lnbloc, jmp, imuladd )
8197 *
8198  END IF
8199 *
8200  IF( symm .OR. herm .OR. ( .NOT. notran ) ) THEN
8201 *
8202  CALL pb_initjmp( .false., nvir, imbvir, inbvir, imbloc, inbloc,
8203  $ mb, nb, rsrc, csrc, nprow, npcol, 1, jmp )
8204 *
8205 * Compute constants to jump JMP( * ) numbers in the sequence
8206 *
8207  CALL pb_initmuladd( muladd0, jmp, imuladd )
8208 *
8209 * Compute and set the random value corresponding to A( IA, JA )
8210 *
8211  CALL pb_setlocran( iaseed, ilocblk, jlocblk, ilocoff, jlocoff,
8212  $ myrdist, mycdist, nprow, npcol, jmp,
8213  $ imuladd, iran )
8214 *
8215  CALL pb_dlagen( 'Upper', aform, a( iia, jja ), lda, lcmt00,
8216  $ iran, mblks, imbloc, mb, lmbloc, nblks, inbloc,
8217  $ nb, lnbloc, jmp, imuladd )
8218 *
8219  END IF
8220 *
8221  IF( diagdo ) THEN
8222 *
8223  maxmn = max( desca2( m_ ), desca2( n_ ) )
8224  alpha = dble( maxmn )
8225 *
8226  IF( ioffda.GE.0 ) THEN
8227  CALL pdladom( inplace, min( max( 0, m-ioffda ), n ), alpha,
8228  $ a, min( ia+ioffda, ia+m-1 ), ja, desca )
8229  ELSE
8230  CALL pdladom( inplace, min( m, max( 0, n+ioffda ) ), alpha,
8231  $ a, ia, min( ja-ioffda, ja+n-1 ), desca )
8232  END IF
8233 *
8234  END IF
8235 *
8236  RETURN
8237 *
8238 * End of PDLAGEN
8239 *
8240  END
8241  SUBROUTINE pdladom( INPLACE, N, ALPHA, A, IA, JA, DESCA )
8243 * -- PBLAS test routine (version 2.0) --
8244 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8245 * and University of California, Berkeley.
8246 * April 1, 1998
8247 *
8248 * .. Scalar Arguments ..
8249  LOGICAL INPLACE
8250  INTEGER IA, JA, N
8251  DOUBLE PRECISION ALPHA
8252 * ..
8253 * .. Array Arguments ..
8254  INTEGER DESCA( * )
8255  DOUBLE PRECISION A( * )
8256 * ..
8257 *
8258 * Purpose
8259 * =======
8260 *
8261 * PDLADOM adds alpha to the diagonal entries of an n by n submatrix
8262 * sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ).
8263 *
8264 * Notes
8265 * =====
8266 *
8267 * A description vector is associated with each 2D block-cyclicly dis-
8268 * tributed matrix. This vector stores the information required to
8269 * establish the mapping between a matrix entry and its corresponding
8270 * process and memory location.
8271 *
8272 * In the following comments, the character _ should be read as
8273 * "of the distributed matrix". Let A be a generic term for any 2D
8274 * block cyclicly distributed matrix. Its description vector is DESCA:
8275 *
8276 * NOTATION STORED IN EXPLANATION
8277 * ---------------- --------------- ------------------------------------
8278 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
8279 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
8280 * the NPROW x NPCOL BLACS process grid
8281 * A is distributed over. The context
8282 * itself is global, but the handle
8283 * (the integer value) may vary.
8284 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
8285 * ted matrix A, M_A >= 0.
8286 * N_A (global) DESCA( N_ ) The number of columns in the distri-
8287 * buted matrix A, N_A >= 0.
8288 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
8289 * block of the matrix A, IMB_A > 0.
8290 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
8291 * left block of the matrix A,
8292 * INB_A > 0.
8293 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
8294 * bute the last M_A-IMB_A rows of A,
8295 * MB_A > 0.
8296 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
8297 * bute the last N_A-INB_A columns of
8298 * A, NB_A > 0.
8299 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
8300 * row of the matrix A is distributed,
8301 * NPROW > RSRC_A >= 0.
8302 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
8303 * first column of A is distributed.
8304 * NPCOL > CSRC_A >= 0.
8305 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
8306 * array storing the local blocks of
8307 * the distributed matrix A,
8308 * IF( Lc( 1, N_A ) > 0 )
8309 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
8310 * ELSE
8311 * LLD_A >= 1.
8312 *
8313 * Let K be the number of rows of a matrix A starting at the global in-
8314 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
8315 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
8316 * receive if these K rows were distributed over NPROW processes. If K
8317 * is the number of columns of a matrix A starting at the global index
8318 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
8319 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
8320 * these K columns were distributed over NPCOL processes.
8321 *
8322 * The values of Lr() and Lc() may be determined via a call to the func-
8323 * tion PB_NUMROC:
8324 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
8325 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
8326 *
8327 * Arguments
8328 * =========
8329 *
8330 * INPLACE (global input) LOGICAL
8331 * On entry, INPLACE specifies if the matrix should be generated
8332 * in place or not. If INPLACE is .TRUE., the local random array
8333 * to be generated will start in memory at the local memory lo-
8334 * cation A( 1, 1 ), otherwise it will start at the local posi-
8335 * tion induced by IA and JA.
8336 *
8337 * N (global input) INTEGER
8338 * On entry, N specifies the global order of the submatrix
8339 * sub( A ) to be modified. N must be at least zero.
8340 *
8341 * ALPHA (global input) DOUBLE PRECISION
8342 * On entry, ALPHA specifies the scalar alpha.
8343 *
8344 * A (local input/local output) DOUBLE PRECISION array
8345 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
8346 * at least Lc( 1, JA+N-1 ). Before entry, this array contains
8347 * the local entries of the matrix A. On exit, the local entries
8348 * of this array corresponding to the main diagonal of sub( A )
8349 * have been updated.
8350 *
8351 * IA (global input) INTEGER
8352 * On entry, IA specifies A's global row index, which points to
8353 * the beginning of the submatrix sub( A ).
8354 *
8355 * JA (global input) INTEGER
8356 * On entry, JA specifies A's global column index, which points
8357 * to the beginning of the submatrix sub( A ).
8358 *
8359 * DESCA (global and local input) INTEGER array
8360 * On entry, DESCA is an integer array of dimension DLEN_. This
8361 * is the array descriptor for the matrix A.
8362 *
8363 * -- Written on April 1, 1998 by
8364 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
8365 *
8366 * =====================================================================
8367 *
8368 * .. Parameters ..
8369  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
8370  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
8371  $ RSRC_
8372  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
8373  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
8374  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
8375  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
8376 * ..
8377 * .. Local Scalars ..
8378  LOGICAL GODOWN, GOLEFT
8379  INTEGER I, IACOL, IAROW, ICTXT, IIA, IJOFFA, ILOW,
8380  $ IMB1, IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP,
8381  $ JJA, JOFFA, JOFFD, LCMT, LCMT00, LDA, LDAP1,
8382  $ LMBLOC, LNBLOC, LOW, MB, MBLKD, MBLKS, MBLOC,
8383  $ MRCOL, MRROW, MYCOL, MYROW, NB, NBLKD, NBLKS,
8384  $ NBLOC, NP, NPCOL, NPROW, NQ, PMB, QNB, UPP
8385  DOUBLE PRECISION ATMP
8386 * ..
8387 * .. Local Scalars ..
8388  INTEGER DESCA2( DLEN_ )
8389 * ..
8390 * .. External Subroutines ..
8391  EXTERNAL blacs_gridinfo, pb_ainfog2l, pb_binfo,
8392  $ pb_desctrans
8393 * ..
8394 * .. Intrinsic Functions ..
8395  INTRINSIC abs, max, min
8396 * ..
8397 * .. Executable Statements ..
8398 *
8399 * Convert descriptor
8400 *
8401  CALL pb_desctrans( desca, desca2 )
8402 *
8403 * Get grid parameters
8404 *
8405  ictxt = desca2( ctxt_ )
8406  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
8407 *
8408  IF( n.EQ.0 )
8409  $ RETURN
8410 *
8411  CALL pb_ainfog2l( n, n, ia, ja, desca2, nprow, npcol, myrow,
8412  $ mycol, imb1, inb1, np, nq, iia, jja, iarow,
8413  $ iacol, mrrow, mrcol )
8414 *
8415 * Decide where the entries shall be stored in memory
8416 *
8417  IF( inplace ) THEN
8418  iia = 1
8419  jja = 1
8420  END IF
8421 *
8422 * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC,
8423 * ILOW, LOW, IUPP, and UPP.
8424 *
8425  mb = desca2( mb_ )
8426  nb = desca2( nb_ )
8427 *
8428  CALL pb_binfo( 0, np, nq, imb1, inb1, mb, nb, mrrow, mrcol,
8429  $ lcmt00, mblks, nblks, imbloc, inbloc, lmbloc,
8430  $ lnbloc, ilow, low, iupp, upp )
8431 *
8432  ioffa = iia - 1
8433  joffa = jja - 1
8434  lda = desca2( lld_ )
8435  ldap1 = lda + 1
8436 *
8437  IF( desca2( rsrc_ ).LT.0 ) THEN
8438  pmb = mb
8439  ELSE
8440  pmb = nprow * mb
8441  END IF
8442  IF( desca2( csrc_ ).LT.0 ) THEN
8443  qnb = nb
8444  ELSE
8445  qnb = npcol * nb
8446  END IF
8447 *
8448 * Handle the first block of rows or columns separately, and update
8449 * LCMT00, MBLKS and NBLKS.
8450 *
8451  godown = ( lcmt00.GT.iupp )
8452  goleft = ( lcmt00.LT.ilow )
8453 *
8454  IF( .NOT.godown .AND. .NOT.goleft ) THEN
8455 *
8456 * LCMT00 >= ILOW && LCMT00 <= IUPP
8457 *
8458  IF( lcmt00.GE.0 ) THEN
8459  ijoffa = ioffa+lcmt00 + ( joffa - 1 ) * lda
8460  DO 10 i = 1, min( inbloc, max( 0, imbloc - lcmt00 ) )
8461  atmp = a( ijoffa + i*ldap1 )
8462  a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8463  10 CONTINUE
8464  ELSE
8465  ijoffa = ioffa + ( joffa - lcmt00 - 1 ) * lda
8466  DO 20 i = 1, min( imbloc, max( 0, inbloc + lcmt00 ) )
8467  atmp = a( ijoffa + i*ldap1 )
8468  a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8469  20 CONTINUE
8470  END IF
8471  goleft = ( ( lcmt00 - ( iupp - upp + pmb ) ).LT.ilow )
8472  godown = .NOT.goleft
8473 *
8474  END IF
8475 *
8476  IF( godown ) THEN
8477 *
8478  lcmt00 = lcmt00 - ( iupp - upp + pmb )
8479  mblks = mblks - 1
8480  ioffa = ioffa + imbloc
8481 *
8482  30 CONTINUE
8483  IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
8484  lcmt00 = lcmt00 - pmb
8485  mblks = mblks - 1
8486  ioffa = ioffa + mb
8487  GO TO 30
8488  END IF
8489 *
8490  lcmt = lcmt00
8491  mblkd = mblks
8492  ioffd = ioffa
8493 *
8494  mbloc = mb
8495  40 CONTINUE
8496  IF( mblkd.GT.0 .AND. lcmt.GE.ilow ) THEN
8497  IF( mblkd.EQ.1 )
8498  $ mbloc = lmbloc
8499  IF( lcmt.GE.0 ) THEN
8500  ijoffa = ioffd + lcmt + ( joffa - 1 ) * lda
8501  DO 50 i = 1, min( inbloc, max( 0, mbloc - lcmt ) )
8502  atmp = a( ijoffa + i*ldap1 )
8503  a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8504  50 CONTINUE
8505  ELSE
8506  ijoffa = ioffd + ( joffa - lcmt - 1 ) * lda
8507  DO 60 i = 1, min( mbloc, max( 0, inbloc + lcmt ) )
8508  atmp = a( ijoffa + i*ldap1 )
8509  a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8510  60 CONTINUE
8511  END IF
8512  lcmt00 = lcmt
8513  lcmt = lcmt - pmb
8514  mblks = mblkd
8515  mblkd = mblkd - 1
8516  ioffa = ioffd
8517  ioffd = ioffd + mbloc
8518  GO TO 40
8519  END IF
8520 *
8521  lcmt00 = lcmt00 + low - ilow + qnb
8522  nblks = nblks - 1
8523  joffa = joffa + inbloc
8524 *
8525  ELSE IF( goleft ) THEN
8526 *
8527  lcmt00 = lcmt00 + low - ilow + qnb
8528  nblks = nblks - 1
8529  joffa = joffa + inbloc
8530 *
8531  70 CONTINUE
8532  IF( nblks.GT.0 .AND. lcmt00.LT.low ) THEN
8533  lcmt00 = lcmt00 + qnb
8534  nblks = nblks - 1
8535  joffa = joffa + nb
8536  GO TO 70
8537  END IF
8538 *
8539  lcmt = lcmt00
8540  nblkd = nblks
8541  joffd = joffa
8542 *
8543  nbloc = nb
8544  80 CONTINUE
8545  IF( nblkd.GT.0 .AND. lcmt.LE.iupp ) THEN
8546  IF( nblkd.EQ.1 )
8547  $ nbloc = lnbloc
8548  IF( lcmt.GE.0 ) THEN
8549  ijoffa = ioffa + lcmt + ( joffd - 1 ) * lda
8550  DO 90 i = 1, min( nbloc, max( 0, imbloc - lcmt ) )
8551  atmp = a( ijoffa + i*ldap1 )
8552  a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8553  90 CONTINUE
8554  ELSE
8555  ijoffa = ioffa + ( joffd - lcmt - 1 ) * lda
8556  DO 100 i = 1, min( imbloc, max( 0, nbloc + lcmt ) )
8557  atmp = a( ijoffa + i*ldap1 )
8558  a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8559  100 CONTINUE
8560  END IF
8561  lcmt00 = lcmt
8562  lcmt = lcmt + qnb
8563  nblks = nblkd
8564  nblkd = nblkd - 1
8565  joffa = joffd
8566  joffd = joffd + nbloc
8567  GO TO 80
8568  END IF
8569 *
8570  lcmt00 = lcmt00 - ( iupp - upp + pmb )
8571  mblks = mblks - 1
8572  ioffa = ioffa + imbloc
8573 *
8574  END IF
8575 *
8576  nbloc = nb
8577  110 CONTINUE
8578  IF( nblks.GT.0 ) THEN
8579  IF( nblks.EQ.1 )
8580  $ nbloc = lnbloc
8581  120 CONTINUE
8582  IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
8583  lcmt00 = lcmt00 - pmb
8584  mblks = mblks - 1
8585  ioffa = ioffa + mb
8586  GO TO 120
8587  END IF
8588 *
8589  lcmt = lcmt00
8590  mblkd = mblks
8591  ioffd = ioffa
8592 *
8593  mbloc = mb
8594  130 CONTINUE
8595  IF( mblkd.GT.0 .AND. lcmt.GE.low ) THEN
8596  IF( mblkd.EQ.1 )
8597  $ mbloc = lmbloc
8598  IF( lcmt.GE.0 ) THEN
8599  ijoffa = ioffd + lcmt + ( joffa - 1 ) * lda
8600  DO 140 i = 1, min( nbloc, max( 0, mbloc - lcmt ) )
8601  atmp = a( ijoffa + i*ldap1 )
8602  a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8603  140 CONTINUE
8604  ELSE
8605  ijoffa = ioffd + ( joffa - lcmt - 1 ) * lda
8606  DO 150 i = 1, min( mbloc, max( 0, nbloc + lcmt ) )
8607  atmp = a( ijoffa + i*ldap1 )
8608  a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8609  150 CONTINUE
8610  END IF
8611  lcmt00 = lcmt
8612  lcmt = lcmt - pmb
8613  mblks = mblkd
8614  mblkd = mblkd - 1
8615  ioffa = ioffd
8616  ioffd = ioffd + mbloc
8617  GO TO 130
8618  END IF
8619 *
8620  lcmt00 = lcmt00 + qnb
8621  nblks = nblks - 1
8622  joffa = joffa + nbloc
8623  GO TO 110
8624 *
8625  END IF
8626 *
8627  RETURN
8628 *
8629 * End of PDLADOM
8630 *
8631  END
8632  SUBROUTINE pb_pdlaprnt( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT,
8633  $ CMATNM, NOUT, WORK )
8635 * -- PBLAS test routine (version 2.0) --
8636 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8637 * and University of California, Berkeley.
8638 * April 1, 1998
8639 *
8640 * .. Scalar Arguments ..
8641  INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT
8642 * ..
8643 * .. Array Arguments ..
8644  CHARACTER*(*) CMATNM
8645  INTEGER DESCA( * )
8646  DOUBLE PRECISION A( * ), WORK( * )
8647 * ..
8648 *
8649 * Purpose
8650 * =======
8651 *
8652 * PB_PDLAPRNT prints to the standard output a submatrix sub( A ) deno-
8653 * ting A(IA:IA+M-1,JA:JA+N-1). The local pieces are sent and printed by
8654 * the process of coordinates (IRPRNT, ICPRNT).
8655 *
8656 * Notes
8657 * =====
8658 *
8659 * A description vector is associated with each 2D block-cyclicly dis-
8660 * tributed matrix. This vector stores the information required to
8661 * establish the mapping between a matrix entry and its corresponding
8662 * process and memory location.
8663 *
8664 * In the following comments, the character _ should be read as
8665 * "of the distributed matrix". Let A be a generic term for any 2D
8666 * block cyclicly distributed matrix. Its description vector is DESCA:
8667 *
8668 * NOTATION STORED IN EXPLANATION
8669 * ---------------- --------------- ------------------------------------
8670 * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
8671 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
8672 * the NPROW x NPCOL BLACS process grid
8673 * A is distributed over. The context
8674 * itself is global, but the handle
8675 * (the integer value) may vary.
8676 * M_A (global) DESCA( M_ ) The number of rows in the distribu-
8677 * ted matrix A, M_A >= 0.
8678 * N_A (global) DESCA( N_ ) The number of columns in the distri-
8679 * buted matrix A, N_A >= 0.
8680 * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
8681 * block of the matrix A, IMB_A > 0.
8682 * INB_A (global) DESCA( INB_ ) The number of columns of the upper
8683 * left block of the matrix A,
8684 * INB_A > 0.
8685 * MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
8686 * bute the last M_A-IMB_A rows of A,
8687 * MB_A > 0.
8688 * NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
8689 * bute the last N_A-INB_A columns of
8690 * A, NB_A > 0.
8691 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
8692 * row of the matrix A is distributed,
8693 * NPROW > RSRC_A >= 0.
8694 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
8695 * first column of A is distributed.
8696 * NPCOL > CSRC_A >= 0.
8697 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
8698 * array storing the local blocks of
8699 * the distributed matrix A,
8700 * IF( Lc( 1, N_A ) > 0 )
8701 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
8702 * ELSE
8703 * LLD_A >= 1.
8704 *
8705 * Let K be the number of rows of a matrix A starting at the global in-
8706 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
8707 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
8708 * receive if these K rows were distributed over NPROW processes. If K
8709 * is the number of columns of a matrix A starting at the global index
8710 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
8711 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
8712 * these K columns were distributed over NPCOL processes.
8713 *
8714 * The values of Lr() and Lc() may be determined via a call to the func-
8715 * tion PB_NUMROC:
8716 * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
8717 * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
8718 *
8719 * Arguments
8720 * =========
8721 *
8722 * M (global input) INTEGER
8723 * On entry, M specifies the number of rows of the submatrix
8724 * sub( A ). M must be at least zero.
8725 *
8726 * N (global input) INTEGER
8727 * On entry, N specifies the number of columns of the submatrix
8728 * sub( A ). N must be at least zero.
8729 *
8730 * A (local input) DOUBLE PRECISION array
8731 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
8732 * at least Lc( 1, JA+N-1 ). Before entry, this array contains
8733 * the local entries of the matrix A.
8734 *
8735 * IA (global input) INTEGER
8736 * On entry, IA specifies A's global row index, which points to
8737 * the beginning of the submatrix sub( A ).
8738 *
8739 * JA (global input) INTEGER
8740 * On entry, JA specifies A's global column index, which points
8741 * to the beginning of the submatrix sub( A ).
8742 *
8743 * DESCA (global and local input) INTEGER array
8744 * On entry, DESCA is an integer array of dimension DLEN_. This
8745 * is the array descriptor for the matrix A.
8746 *
8747 * IRPRNT (global input) INTEGER
8748 * On entry, IRPRNT specifies the row index of the printing pro-
8749 * cess.
8750 *
8751 * ICPRNT (global input) INTEGER
8752 * On entry, ICPRNT specifies the column index of the printing
8753 * process.
8754 *
8755 * CMATNM (global input) CHARACTER*(*)
8756 * On entry, CMATNM is the name of the matrix to be printed.
8757 *
8758 * NOUT (global input) INTEGER
8759 * On entry, NOUT specifies the output unit number. When NOUT is
8760 * equal to 6, the submatrix is printed on the screen.
8761 *
8762 * WORK (local workspace) DOUBLE PRECISION array
8763 * On entry, WORK is a work array of dimension at least equal to
8764 * MAX( IMB_A, MB_A ).
8765 *
8766 * -- Written on April 1, 1998 by
8767 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
8768 *
8769 * =====================================================================
8770 *
8771 * .. Parameters ..
8772  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
8773  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
8774  $ RSRC_
8775  PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
8776  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
8777  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
8778  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
8779 * ..
8780 * .. Local Scalars ..
8781  INTEGER MYCOL, MYROW, NPCOL, NPROW, PCOL, PROW
8782 * ..
8783 * .. Local Arrays ..
8784  INTEGER DESCA2( DLEN_ )
8785 * ..
8786 * .. External Subroutines ..
8787  EXTERNAL BLACS_GRIDINFO, PB_DESCTRANS, PB_PDLAPRN2
8788 * ..
8789 * .. Executable Statements ..
8790 *
8791 * Quick return if possible
8792 *
8793  IF( ( m.LE.0 ).OR.( n.LE.0 ) )
8794  $ RETURN
8795 *
8796 * Convert descriptor
8797 *
8798  CALL pb_desctrans( desca, desca2 )
8799 *
8800  CALL blacs_gridinfo( desca2( ctxt_ ), nprow, npcol, myrow, mycol )
8801 *
8802  IF( desca2( rsrc_ ).GE.0 ) THEN
8803  IF( desca2( csrc_ ).GE.0 ) THEN
8804  CALL pb_pdlaprn2( m, n, a, ia, ja, desca2, irprnt, icprnt,
8805  $ cmatnm, nout, desca2( rsrc_ ),
8806  $ desca2( csrc_ ), work )
8807  ELSE
8808  DO 10 pcol = 0, npcol - 1
8809  IF( ( myrow.EQ.irprnt ).AND.( mycol.EQ.icprnt ) )
8810  $ WRITE( nout, * ) 'Colum-replicated array -- ' ,
8811  $ 'copy in process column: ', pcol
8812  CALL pb_pdlaprn2( m, n, a, ia, ja, desca2, irprnt,
8813  $ icprnt, cmatnm, nout, desca2( rsrc_ ),
8814  $ pcol, work )
8815  10 CONTINUE
8816  END IF
8817  ELSE
8818  IF( desca2( csrc_ ).GE.0 ) THEN
8819  DO 20 prow = 0, nprow - 1
8820  IF( ( myrow.EQ.irprnt ).AND.( mycol.EQ.icprnt ) )
8821  $ WRITE( nout, * ) 'Row-replicated array -- ' ,
8822  $ 'copy in process row: ', prow
8823  CALL pb_pdlaprn2( m, n, a, ia, ja, desca2, irprnt,
8824  $ icprnt, cmatnm, nout, prow,
8825  $ desca2( csrc_ ), work )
8826  20 CONTINUE
8827  ELSE
8828  DO 40 prow = 0, nprow - 1
8829  DO 30 pcol = 0, npcol - 1
8830  IF( ( myrow.EQ.irprnt ).AND.( mycol.EQ.icprnt ) )
8831  $ WRITE( nout, * ) 'Replicated array -- ' ,
8832  $ 'copy in process (', prow, ',', pcol, ')'
8833  CALL pb_pdlaprn2( m, n, a, ia, ja, desca2, irprnt,
8834  $ icprnt, cmatnm, nout, prow, pcol,
8835  $ work )
8836  30 CONTINUE
8837  40 CONTINUE
8838  END IF
8839  END IF
8840 *
8841  RETURN
8842 *
8843 * End of PB_PDLAPRNT
8844 *
8845  END
8846  SUBROUTINE pb_pdlaprn2( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT,
8847  $ CMATNM, NOUT, PROW, PCOL, WORK )
8849 * -- PBLAS test routine (version 2.0) --
8850 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8851 * and University of California, Berkeley.
8852 * April 1, 1998
8853 *
8854 * .. Scalar Arguments ..
8855  INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT, PCOL, PROW
8856 * ..
8857 * .. Array Arguments ..
8858  CHARACTER*(*) CMATNM
8859  INTEGER DESCA( * )
8860  DOUBLE PRECISION A( * ), WORK( * )
8861 * ..
8862 *
8863 * .. Parameters ..
8864  INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
8865  $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
8866  $ RSRC_
8867  parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
8868  $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
8869  $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
8870  $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
8871 * ..
8872 * .. Local Scalars ..
8873  LOGICAL AISCOLREP, AISROWREP
8874  INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL,
8875  $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K,
8876  $ LDA, LDW, MYCOL, MYROW, NPCOL, NPROW
8877 * ..
8878 * .. External Subroutines ..
8879  EXTERNAL blacs_barrier, blacs_gridinfo, dgerv2d,
8880  $ dgesd2d, pb_infog2l
8881 * ..
8882 * .. Intrinsic Functions ..
8883  INTRINSIC min
8884 * ..
8885 * .. Executable Statements ..
8886 *
8887 * Get grid parameters
8888 *
8889  ictxt = desca( ctxt_ )
8890  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
8891  CALL pb_infog2l( ia, ja, desca, nprow, npcol, myrow, mycol,
8892  $ iia, jja, iarow, iacol )
8893  ii = iia
8894  jj = jja
8895  IF( desca( rsrc_ ).LT.0 ) THEN
8896  aisrowrep = .true.
8897  iarow = prow
8898  icurrow = prow
8899  ELSE
8900  aisrowrep = .false.
8901  icurrow = iarow
8902  END IF
8903  IF( desca( csrc_ ).LT.0 ) THEN
8904  aiscolrep = .true.
8905  iacol = pcol
8906  icurcol = pcol
8907  ELSE
8908  aiscolrep = .false.
8909  icurcol = iacol
8910  END IF
8911  lda = desca( lld_ )
8912  ldw = max( desca( imb_ ), desca( mb_ ) )
8913 *
8914 * Handle the first block of column separately
8915 *
8916  jb = desca( inb_ ) - ja + 1
8917  IF( jb.LE.0 )
8918  $ jb = ( (-jb) / desca( nb_ ) + 1 ) * desca( nb_ ) + jb
8919  jb = min( jb, n )
8920  jn = ja+jb-1
8921  DO 60 h = 0, jb-1
8922  ib = desca( imb_ ) - ia + 1
8923  IF( ib.LE.0 )
8924  $ ib = ( (-ib) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
8925  ib = min( ib, m )
8926  in = ia+ib-1
8927  IF( icurrow.EQ.irprnt .AND. icurcol.EQ.icprnt ) THEN
8928  IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
8929  DO 10 k = 0, ib-1
8930  WRITE( nout, fmt = 9999 )
8931  $ cmatnm, ia+k, ja+h, a( ii+k+(jj+h-1)*lda )
8932  10 CONTINUE
8933  END IF
8934  ELSE
8935  IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
8936  CALL dgesd2d( ictxt, ib, 1, a( ii+(jj+h-1)*lda ), lda,
8937  $ irprnt, icprnt )
8938  ELSE IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
8939  CALL dgerv2d( ictxt, ib, 1, work, ldw, icurrow, icurcol )
8940  DO 20 k = 1, ib
8941  WRITE( nout, fmt = 9999 )
8942  $ cmatnm, ia+k-1, ja+h, work( k )
8943  20 CONTINUE
8944  END IF
8945  END IF
8946  IF( myrow.EQ.icurrow )
8947  $ ii = ii + ib
8948  IF( .NOT.aisrowrep )
8949  $ icurrow = mod( icurrow+1, nprow )
8950  CALL blacs_barrier( ictxt, 'All' )
8951 *
8952 * Loop over remaining block of rows
8953 *
8954  DO 50 i = in+1, ia+m-1, desca( mb_ )
8955  ib = min( desca( mb_ ), ia+m-i )
8956  IF( icurrow.EQ.irprnt .AND. icurcol.EQ.icprnt ) THEN
8957  IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
8958  DO 30 k = 0, ib-1
8959  WRITE( nout, fmt = 9999 )
8960  $ cmatnm, i+k, ja+h, a( ii+k+(jj+h-1)*lda )
8961  30 CONTINUE
8962  END IF
8963  ELSE
8964  IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
8965  CALL dgesd2d( ictxt, ib, 1, a( ii+(jj+h-1)*lda ),
8966  $ lda, irprnt, icprnt )
8967  ELSE IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
8968  CALL dgerv2d( ictxt, ib, 1, work, ldw, icurrow,
8969  $ icurcol )
8970  DO 40 k = 1, ib
8971  WRITE( nout, fmt = 9999 )
8972  $ cmatnm, i+k-1, ja+h, work( k )
8973  40 CONTINUE
8974  END IF
8975  END IF
8976  IF( myrow.EQ.icurrow )
8977  $ ii = ii + ib
8978  IF( .NOT.aisrowrep )
8979  $ icurrow = mod( icurrow+1, nprow )
8980  CALL blacs_barrier( ictxt, 'All' )
8981  50 CONTINUE
8982 *
8983  ii = iia
8984  icurrow = iarow
8985  60 CONTINUE
8986 *
8987  IF( mycol.EQ.icurcol )
8988  $ jj = jj + jb
8989  IF( .NOT.aiscolrep )
8990  $ icurcol = mod( icurcol+1, npcol )
8991  CALL blacs_barrier( ictxt, 'All' )
8992 *
8993 * Loop over remaining column blocks
8994 *
8995  DO 130 j = jn+1, ja+n-1, desca( nb_ )
8996  jb = min( desca( nb_ ), ja+n-j )
8997  DO 120 h = 0, jb-1
8998  ib = desca( imb_ )-ia+1
8999  IF( ib.LE.0 )
9000  $ ib = ( (-ib) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
9001  ib = min( ib, m )
9002  in = ia+ib-1
9003  IF( icurrow.EQ.irprnt .AND. icurcol.EQ.icprnt ) THEN
9004  IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9005  DO 70 k = 0, ib-1
9006  WRITE( nout, fmt = 9999 )
9007  $ cmatnm, ia+k, j+h, a( ii+k+(jj+h-1)*lda )
9008  70 CONTINUE
9009  END IF
9010  ELSE
9011  IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
9012  CALL dgesd2d( ictxt, ib, 1, a( ii+(jj+h-1)*lda ),
9013  $ lda, irprnt, icprnt )
9014  ELSE IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9015  CALL dgerv2d( ictxt, ib, 1, work, ldw, icurrow,
9016  $ icurcol )
9017  DO 80 k = 1, ib
9018  WRITE( nout, fmt = 9999 )
9019  $ cmatnm, ia+k-1, j+h, work( k )
9020  80 CONTINUE
9021  END IF
9022  END IF
9023  IF( myrow.EQ.icurrow )
9024  $ ii = ii + ib
9025  icurrow = mod( icurrow+1, nprow )
9026  CALL blacs_barrier( ictxt, 'All' )
9027 *
9028 * Loop over remaining block of rows
9029 *
9030  DO 110 i = in+1, ia+m-1, desca( mb_ )
9031  ib = min( desca( mb_ ), ia+m-i )
9032  IF( icurrow.EQ.irprnt .AND. icurcol.EQ.icprnt ) THEN
9033  IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9034  DO 90 k = 0, ib-1
9035  WRITE( nout, fmt = 9999 )
9036  $ cmatnm, i+k, j+h, a( ii+k+(jj+h-1)*lda )
9037  90 CONTINUE
9038  END IF
9039  ELSE
9040  IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
9041  CALL dgesd2d( ictxt, ib, 1, a( ii+(jj+h-1)*lda ),
9042  $ lda, irprnt, icprnt )
9043  ELSE IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9044  CALL dgerv2d( ictxt, ib, 1, work, ldw, icurrow,
9045  $ icurcol )
9046  DO 100 k = 1, ib
9047  WRITE( nout, fmt = 9999 )
9048  $ cmatnm, i+k-1, j+h, work( k )
9049  100 CONTINUE
9050  END IF
9051  END IF
9052  IF( myrow.EQ.icurrow )
9053  $ ii = ii + ib
9054  IF( .NOT.aisrowrep )
9055  $ icurrow = mod( icurrow+1, nprow )
9056  CALL blacs_barrier( ictxt, 'All' )
9057  110 CONTINUE
9058 *
9059  ii = iia
9060  icurrow = iarow
9061  120 CONTINUE
9062 *
9063  IF( mycol.EQ.icurcol )
9064  $ jj = jj + jb
9065  IF( .NOT.aiscolrep )
9066  $ icurcol = mod( icurcol+1, npcol )
9067  CALL blacs_barrier( ictxt, 'All' )
9068 *
9069  130 CONTINUE
9070 *
9071  9999 FORMAT( 1x, a, '(', i6, ',', i6, ')=', d30.18 )
9072 *
9073  RETURN
9074 *
9075 * End of PB_PDLAPRN2
9076 *
9077  END
9078  SUBROUTINE pb_dfillpad( ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL )
9080 * -- PBLAS test routine (version 2.0) --
9081 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9082 * and University of California, Berkeley.
9083 * April 1, 1998
9084 *
9085 * .. Scalar Arguments ..
9086  INTEGER ICTXT, IPOST, IPRE, LDA, M, N
9087  DOUBLE PRECISION CHKVAL
9088 * ..
9089 * .. Array Arguments ..
9090  DOUBLE PRECISION A( * )
9091 * ..
9092 *
9093 * Purpose
9094 * =======
9095 *
9096 * PB_DFILLPAD surrounds a two dimensional local array with a guard-zone
9097 * initialized to the value CHKVAL. The user may later call the routine
9098 * PB_DCHEKPAD to discover if the guardzone has been violated. There are
9099 * three guardzones. The first is a buffer of size IPRE that is before
9100 * the start of the array. The second is the buffer of size IPOST which
9101 * is after the end of the array to be padded. Finally, there is a guard
9102 * zone inside every column of the array to be padded, in the elements
9103 * of A(M+1:LDA, J).
9104 *
9105 * Arguments
9106 * =========
9107 *
9108 * ICTXT (local input) INTEGER
9109 * On entry, ICTXT specifies the BLACS context handle, indica-
9110 * ting the global context of the operation. The context itself
9111 * is global, but the value of ICTXT is local.
9112 *
9113 * M (local input) INTEGER
9114 * On entry, M specifies the number of rows in the local array
9115 * A. M must be at least zero.
9116 *
9117 * N (local input) INTEGER
9118 * On entry, N specifies the number of columns in the local ar-
9119 * ray A. N must be at least zero.
9120 *
9121 * A (local input/local output) DOUBLE PRECISION array
9122 * On entry, A is an array of dimension (LDA,N). On exit, this
9123 * array is the padded array.
9124 *
9125 * LDA (local input) INTEGER
9126 * On entry, LDA specifies the leading dimension of the local
9127 * array to be padded. LDA must be at least MAX( 1, M ).
9128 *
9129 * IPRE (local input) INTEGER
9130 * On entry, IPRE specifies the size of the guard zone to put
9131 * before the start of the padded array.
9132 *
9133 * IPOST (local input) INTEGER
9134 * On entry, IPOST specifies the size of the guard zone to put
9135 * after the end of the padded array.
9136 *
9137 * CHKVAL (local input) DOUBLE PRECISION
9138 * On entry, CHKVAL specifies the value to pad the array with.
9139 *
9140 * -- Written on April 1, 1998 by
9141 * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA.
9142 *
9143 * =====================================================================
9144 *
9145 * .. Local Scalars ..
9146  INTEGER I, J, K
9147 * ..
9148 * .. Executable Statements ..
9149 *
9150 * Put check buffer in front of A
9151 *
9152  IF( IPRE.GT.0 ) THEN
9153  DO 10 I = 1, ipre
9154  a( i ) = chkval
9155  10 CONTINUE
9156  ELSE
9157  WRITE( *, fmt = '(A)' )
9158  $ 'WARNING no pre-guardzone in PB_DFILLPAD'
9159  END IF
9160 *
9161 * Put check buffer in back of A
9162 *
9163  IF( ipost.GT.0 ) THEN
9164  j = ipre+lda*n+1
9165  DO 20 i = j, j+ipost-1
9166  a( i ) = chkval
9167  20 CONTINUE
9168  ELSE
9169  WRITE( *, fmt = '(A)' )
9170  $ 'WARNING no post-guardzone in PB_DFILLPAD'
9171  END IF
9172 *
9173 * Put check buffer in all (LDA-M) gaps
9174 *
9175  IF( lda.GT.m ) THEN
9176  k = ipre + m + 1
9177  DO 40 j = 1, n
9178  DO 30 i = k, k + ( lda - m ) - 1
9179  a( i ) = chkval
9180  30 CONTINUE
9181  k = k + lda
9182  40 CONTINUE
9183  END IF
9184 *
9185  RETURN
9186 *
9187 * End of PB_DFILLPAD
9188 *
9189  END
9190  SUBROUTINE pb_dchekpad( ICTXT, MESS, M, N, A, LDA, IPRE, IPOST,
9191  $ CHKVAL )
9193 * -- PBLAS test routine (version 2.0) --
9194 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9195 * and University of California, Berkeley.
9196 * April 1, 1998
9197 *
9198 * .. Scalar Arguments ..
9199  INTEGER ICTXT, IPOST, IPRE, LDA, M, N
9200  DOUBLE PRECISION CHKVAL
9201 * ..
9202 * .. Array Arguments ..
9203  CHARACTER*(*) MESS
9204  DOUBLE PRECISION A( * )
9205 * ..
9206 *
9207 * Purpose
9208 * =======
9209 *
9210 * PB_DCHEKPAD checks that the padding around a local array has not been
9211 * overwritten since the call to PB_DFILLPAD. Three types of errors are
9212 * reported:
9213 *
9214 * 1) Overwrite in pre-guardzone. This indicates a memory overwrite has
9215 * occurred in the first IPRE elements which form a buffer before the
9216 * beginning of A. Therefore, the error message:
9217 * 'Overwrite in pre-guardzone: loc( 5) = 18.00000'
9218 * tells that the 5th element of the IPRE long buffer has been overwrit-
9219 * ten with the value 18, where it should still have the value CHKVAL.
9220 *
9221 * 2) Overwrite in post-guardzone. This indicates a memory overwrite has
9222 * occurred in the last IPOST elements which form a buffer after the end
9223 * of A. Error reports are refered from the end of A. Therefore,
9224 * 'Overwrite in post-guardzone: loc( 19) = 24.00000'
9225 * tells that the 19th element after the end of A was overwritten with
9226 * the value 24, where it should still have the value of CHKVAL.
9227 *
9228 * 3) Overwrite in lda-m gap. Tells you elements between M and LDA were
9229 * overwritten. So,
9230 * 'Overwrite in lda-m gap: A( 12, 3) = 22.00000'
9231 * tells that the element at the 12th row and 3rd column of A was over-
9232 * written with the value of 22, where it should still have the value of
9233 * CHKVAL.
9234 *
9235 * Arguments
9236 * =========
9237 *
9238 * ICTXT (local input) INTEGER
9239 * On entry, ICTXT specifies the BLACS context handle, indica-
9240 * ting the global context of the operation. The context itself
9241 * is global, but the value of ICTXT is local.
9242 *
9243 * MESS (local input) CHARACTER*(*)
9244 * On entry, MESS is a ttring containing a user-defined message.
9245 *
9246 * M (local input) INTEGER
9247 * On entry, M specifies the number of rows in the local array
9248 * A. M must be at least zero.
9249 *
9250 * N (local input) INTEGER
9251 * On entry, N specifies the number of columns in the local ar-
9252 * ray A. N must be at least zero.
9253 *
9254 * A (local input) DOUBLE PRECISION array
9255 * On entry, A is an array of dimension (LDA,N).
9256 *
9257 * LDA (local input) INTEGER
9258 * On entry, LDA specifies the leading dimension of the local
9259 * array to be padded. LDA must be at least MAX( 1, M ).
9260 *
9261 * IPRE (local input) INTEGER
9262 * On entry, IPRE specifies the size of the guard zone to put
9263 * before the start of the padded array.
9264 *
9265 * IPOST (local input) INTEGER
9266 * On entry, IPOST specifies the size of the guard zone to put
9267 * after the end of the padded array.
9268 *
9269 * CHKVAL (local input) DOUBLE PRECISION
9270 * On entry, CHKVAL specifies the value to pad the array with.
9271 *
9272 *
9273 * -- Written on April 1, 1998 by
9274 * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA.
9275 *
9276 * =====================================================================
9277 *
9278 * .. Local Scalars ..
9279  CHARACTER*1 TOP
9280  INTEGER I, IAM, IDUMM, INFO, J, K, MYCOL, MYROW, NPCOL,
9281  $ NPROW
9282 * ..
9283 * .. External Subroutines ..
9284  EXTERNAL BLACS_GRIDINFO, IGAMX2D, PB_TOPGET
9285 * ..
9286 * .. Executable Statements ..
9287 *
9288 * Get grid parameters
9289 *
9290  CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
9291  IAM = myrow*npcol + mycol
9292  info = -1
9293 *
9294 * Check buffer in front of A
9295 *
9296  IF( ipre.GT.0 ) THEN
9297  DO 10 i = 1, ipre
9298  IF( a( i ).NE.chkval ) THEN
9299  WRITE( *, fmt = 9998 ) myrow, mycol, mess, ' pre', i,
9300  $ a( i )
9301  info = iam
9302  END IF
9303  10 CONTINUE
9304  ELSE
9305  WRITE( *, fmt = * ) 'WARNING no pre-guardzone in PB_DCHEKPAD'
9306  END IF
9307 *
9308 * Check buffer after A
9309 *
9310  IF( ipost.GT.0 ) THEN
9311  j = ipre+lda*n+1
9312  DO 20 i = j, j+ipost-1
9313  IF( a( i ).NE.chkval ) THEN
9314  WRITE( *, fmt = 9998 ) myrow, mycol, mess, 'post',
9315  $ i-j+1, a( i )
9316  info = iam
9317  END IF
9318  20 CONTINUE
9319  ELSE
9320  WRITE( *, fmt = * )
9321  $ 'WARNING no post-guardzone buffer in PB_DCHEKPAD'
9322  END IF
9323 *
9324 * Check all (LDA-M) gaps
9325 *
9326  IF( lda.GT.m ) THEN
9327  k = ipre + m + 1
9328  DO 40 j = 1, n
9329  DO 30 i = k, k + (lda-m) - 1
9330  IF( a( i ).NE.chkval ) THEN
9331  WRITE( *, fmt = 9997 ) myrow, mycol, mess,
9332  $ i-ipre-lda*(j-1), j, a( i )
9333  info = iam
9334  END IF
9335  30 CONTINUE
9336  k = k + lda
9337  40 CONTINUE
9338  END IF
9339 *
9340  CALL pb_topget( ictxt, 'Combine', 'All', top )
9341  CALL igamx2d( ictxt, 'All', top, 1, 1, info, 1, idumm, idumm, -1,
9342  $ 0, 0 )
9343  IF( iam.EQ.0 .AND. info.GE.0 ) THEN
9344  WRITE( *, fmt = 9999 ) info / npcol, mod( info, npcol ), mess
9345  END IF
9346 *
9347  9999 FORMAT( '{', i5, ',', i5, '}: Memory overwrite in ', a )
9348  9998 FORMAT( '{', i5, ',', i5, '}: ', a, ' memory overwrite in ',
9349  $ a4, '-guardzone: loc(', i3, ') = ', g20.7 )
9350  9997 FORMAT( '{', i5, ',', i5, '}: ', a, ' memory overwrite in ',
9351  $ 'lda-m gap: loc(', i3, ',', i3, ') = ', g20.7 )
9352 *
9353  RETURN
9354 *
9355 * End of PB_DCHEKPAD
9356 *
9357  END
9358  SUBROUTINE pb_dlaset( UPLO, M, N, IOFFD, ALPHA, BETA, A, LDA )
9360 * -- PBLAS test routine (version 2.0) --
9361 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9362 * and University of California, Berkeley.
9363 * April 1, 1998
9364 *
9365 * .. Scalar Arguments ..
9366  CHARACTER*1 UPLO
9367  INTEGER IOFFD, LDA, M, N
9368  DOUBLE PRECISION ALPHA, BETA
9369 * ..
9370 * .. Array Arguments ..
9371  DOUBLE PRECISION A( LDA, * )
9372 * ..
9373 *
9374 * Purpose
9375 * =======
9376 *
9377 * PB_DLASET initializes a two-dimensional array A to beta on the diago-
9378 * nal specified by IOFFD and alpha on the offdiagonals.
9379 *
9380 * Arguments
9381 * =========
9382 *
9383 * UPLO (global input) CHARACTER*1
9384 * On entry, UPLO specifies which trapezoidal part of the ar-
9385 * ray A is to be set as follows:
9386 * = 'L' or 'l': Lower triangular part is set; the strictly
9387 * upper triangular part of A is not changed,
9388 * = 'U' or 'u': Upper triangular part is set; the strictly
9389 * lower triangular part of A is not changed,
9390 * = 'D' or 'd' Only the diagonal of A is set,
9391 * Otherwise: All of the array A is set.
9392 *
9393 * M (input) INTEGER
9394 * On entry, M specifies the number of rows of the array A. M
9395 * must be at least zero.
9396 *
9397 * N (input) INTEGER
9398 * On entry, N specifies the number of columns of the array A.
9399 * N must be at least zero.
9400 *
9401 * IOFFD (input) INTEGER
9402 * On entry, IOFFD specifies the position of the offdiagonal de-
9403 * limiting the upper and lower trapezoidal part of A as follows
9404 * (see the notes below):
9405 *
9406 * IOFFD = 0 specifies the main diagonal A( i, i ),
9407 * with i = 1 ... MIN( M, N ),
9408 * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ),
9409 * with i = 1 ... MIN( M-IOFFD, N ),
9410 * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ),
9411 * with i = 1 ... MIN( M, N+IOFFD ).
9412 *
9413 * ALPHA (input) DOUBLE PRECISION
9414 * On entry, ALPHA specifies the value to which the offdiagonal
9415 * array elements are set to.
9416 *
9417 * BETA (input) DOUBLE PRECISION
9418 * On entry, BETA specifies the value to which the diagonal ar-
9419 * ray elements are set to.
9420 *
9421 * A (input/output) DOUBLE PRECISION array
9422 * On entry, A is an array of dimension (LDA,N). Before entry
9423 * with UPLO = 'U' or 'u', the leading m by n part of the array
9424 * A must contain the upper trapezoidal part of the matrix as
9425 * specified by IOFFD to be set, and the strictly lower trape-
9426 * zoidal part of A is not referenced; When IUPLO = 'L' or 'l',
9427 * the leading m by n part of the array A must contain the
9428 * lower trapezoidal part of the matrix as specified by IOFFD to
9429 * be set, and the strictly upper trapezoidal part of A is
9430 * not referenced.
9431 *
9432 * LDA (input) INTEGER
9433 * On entry, LDA specifies the leading dimension of the array A.
9434 * LDA must be at least max( 1, M ).
9435 *
9436 * Notes
9437 * =====
9438 * N N
9439 * ---------------------------- -----------
9440 * | d | | |
9441 * M | d 'U' | | 'U' |
9442 * | 'L' 'D' | |d |
9443 * | d | M | d |
9444 * ---------------------------- | 'D' |
9445 * | d |
9446 * IOFFD < 0 | 'L' d |
9447 * | d|
9448 * N | |
9449 * ----------- -----------
9450 * | d 'U'|
9451 * | d | IOFFD > 0
9452 * M | 'D' |
9453 * | d| N
9454 * | 'L' | ----------------------------
9455 * | | | 'U' |
9456 * | | |d |
9457 * | | | 'D' |
9458 * | | | d |
9459 * | | |'L' d |
9460 * ----------- ----------------------------
9461 *
9462 * -- Written on April 1, 1998 by
9463 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
9464 *
9465 * =====================================================================
9466 *
9467 * .. Local Scalars ..
9468  INTEGER I, J, JTMP, MN
9469 * ..
9470 * .. External Functions ..
9471  LOGICAL LSAME
9472  EXTERNAL LSAME
9473 * ..
9474 * .. Intrinsic Functions ..
9475  INTRINSIC MAX, MIN
9476 * ..
9477 * .. Executable Statements ..
9478 *
9479 * Quick return if possible
9480 *
9481  IF( M.LE.0 .OR. N.LE.0 )
9482  $ RETURN
9483 *
9484 * Start the operations
9485 *
9486  IF( LSAME( UPLO, 'L' ) ) THEN
9487 *
9488 * Set the diagonal to BETA and the strictly lower triangular
9489 * part of the array to ALPHA.
9490 *
9491  mn = max( 0, -ioffd )
9492  DO 20 j = 1, min( mn, n )
9493  DO 10 i = 1, m
9494  a( i, j ) = alpha
9495  10 CONTINUE
9496  20 CONTINUE
9497  DO 40 j = mn + 1, min( m - ioffd, n )
9498  jtmp = j + ioffd
9499  a( jtmp, j ) = beta
9500  DO 30 i = jtmp + 1, m
9501  a( i, j ) = alpha
9502  30 CONTINUE
9503  40 CONTINUE
9504 *
9505  ELSE IF( lsame( uplo, 'U' ) ) THEN
9506 *
9507 * Set the diagonal to BETA and the strictly upper triangular
9508 * part of the array to ALPHA.
9509 *
9510  mn = min( m - ioffd, n )
9511  DO 60 j = max( 0, -ioffd ) + 1, mn
9512  jtmp = j + ioffd
9513  DO 50 i = 1, jtmp - 1
9514  a( i, j ) = alpha
9515  50 CONTINUE
9516  a( jtmp, j ) = beta
9517  60 CONTINUE
9518  DO 80 j = max( 0, mn ) + 1, n
9519  DO 70 i = 1, m
9520  a( i, j ) = alpha
9521  70 CONTINUE
9522  80 CONTINUE
9523 *
9524  ELSE IF( lsame( uplo, 'D' ) ) THEN
9525 *
9526 * Set the array to BETA on the diagonal.
9527 *
9528  DO 90 j = max( 0, -ioffd ) + 1, min( m - ioffd, n )
9529  a( j + ioffd, j ) = beta
9530  90 CONTINUE
9531 *
9532  ELSE
9533 *
9534 * Set the array to BETA on the diagonal and ALPHA on the
9535 * offdiagonal.
9536 *
9537  DO 110 j = 1, n
9538  DO 100 i = 1, m
9539  a( i, j ) = alpha
9540  100 CONTINUE
9541  110 CONTINUE
9542  IF( alpha.NE.beta .AND. ioffd.LT.m .AND. ioffd.GT.-n ) THEN
9543  DO 120 j = max( 0, -ioffd ) + 1, min( m - ioffd, n )
9544  a( j + ioffd, j ) = beta
9545  120 CONTINUE
9546  END IF
9547 *
9548  END IF
9549 *
9550  RETURN
9551 *
9552 * End of PB_DLASET
9553 *
9554  END
9555  SUBROUTINE pb_dlascal( UPLO, M, N, IOFFD, ALPHA, A, LDA )
9557 * -- PBLAS test routine (version 2.0) --
9558 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9559 * and University of California, Berkeley.
9560 * April 1, 1998
9561 *
9562 * .. Scalar Arguments ..
9563  CHARACTER*1 UPLO
9564  INTEGER IOFFD, LDA, M, N
9565  DOUBLE PRECISION ALPHA
9566 * ..
9567 * .. Array Arguments ..
9568  DOUBLE PRECISION A( LDA, * )
9569 * ..
9570 *
9571 * Purpose
9572 * =======
9573 *
9574 * PB_DLASCAL scales a two-dimensional array A by the scalar alpha.
9575 *
9576 * Arguments
9577 * =========
9578 *
9579 * UPLO (input) CHARACTER*1
9580 * On entry, UPLO specifies which trapezoidal part of the ar-
9581 * ray A is to be scaled as follows:
9582 * = 'L' or 'l': the lower trapezoid of A is scaled,
9583 * = 'U' or 'u': the upper trapezoid of A is scaled,
9584 * = 'D' or 'd': diagonal specified by IOFFD is scaled,
9585 * Otherwise: all of the array A is scaled.
9586 *
9587 * M (input) INTEGER
9588 * On entry, M specifies the number of rows of the array A. M
9589 * must be at least zero.
9590 *
9591 * N (input) INTEGER
9592 * On entry, N specifies the number of columns of the array A.
9593 * N must be at least zero.
9594 *
9595 * IOFFD (input) INTEGER
9596 * On entry, IOFFD specifies the position of the offdiagonal de-
9597 * limiting the upper and lower trapezoidal part of A as follows
9598 * (see the notes below):
9599 *
9600 * IOFFD = 0 specifies the main diagonal A( i, i ),
9601 * with i = 1 ... MIN( M, N ),
9602 * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ),
9603 * with i = 1 ... MIN( M-IOFFD, N ),
9604 * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ),
9605 * with i = 1 ... MIN( M, N+IOFFD ).
9606 *
9607 * ALPHA (input) DOUBLE PRECISION
9608 * On entry, ALPHA specifies the scalar alpha.
9609 *
9610 * A (input/output) DOUBLE PRECISION array
9611 * On entry, A is an array of dimension (LDA,N). Before entry
9612 * with UPLO = 'U' or 'u', the leading m by n part of the array
9613 * A must contain the upper trapezoidal part of the matrix as
9614 * specified by IOFFD to be scaled, and the strictly lower tra-
9615 * pezoidal part of A is not referenced; When UPLO = 'L' or 'l',
9616 * the leading m by n part of the array A must contain the lower
9617 * trapezoidal part of the matrix as specified by IOFFD to be
9618 * scaled, and the strictly upper trapezoidal part of A is not
9619 * referenced. On exit, the entries of the trapezoid part of A
9620 * determined by UPLO and IOFFD are scaled.
9621 *
9622 * LDA (input) INTEGER
9623 * On entry, LDA specifies the leading dimension of the array A.
9624 * LDA must be at least max( 1, M ).
9625 *
9626 * Notes
9627 * =====
9628 * N N
9629 * ---------------------------- -----------
9630 * | d | | |
9631 * M | d 'U' | | 'U' |
9632 * | 'L' 'D' | |d |
9633 * | d | M | d |
9634 * ---------------------------- | 'D' |
9635 * | d |
9636 * IOFFD < 0 | 'L' d |
9637 * | d|
9638 * N | |
9639 * ----------- -----------
9640 * | d 'U'|
9641 * | d | IOFFD > 0
9642 * M | 'D' |
9643 * | d| N
9644 * | 'L' | ----------------------------
9645 * | | | 'U' |
9646 * | | |d |
9647 * | | | 'D' |
9648 * | | | d |
9649 * | | |'L' d |
9650 * ----------- ----------------------------
9651 *
9652 * -- Written on April 1, 1998 by
9653 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
9654 *
9655 * =====================================================================
9656 *
9657 * .. Local Scalars ..
9658  INTEGER I, J, JTMP, MN
9659 * ..
9660 * .. External Functions ..
9661  LOGICAL LSAME
9662  EXTERNAL LSAME
9663 * ..
9664 * .. Intrinsic Functions ..
9665  INTRINSIC MAX, MIN
9666 * ..
9667 * .. Executable Statements ..
9668 *
9669 * Quick return if possible
9670 *
9671  IF( M.LE.0 .OR. N.LE.0 )
9672  $ RETURN
9673 *
9674 * Start the operations
9675 *
9676  IF( LSAME( UPLO, 'L' ) ) THEN
9677 *
9678 * Scales the lower triangular part of the array by ALPHA.
9679 *
9680  MN = max( 0, -ioffd )
9681  DO 20 j = 1, min( mn, n )
9682  DO 10 i = 1, m
9683  a( i, j ) = alpha * a( i, j )
9684  10 CONTINUE
9685  20 CONTINUE
9686  DO 40 j = mn + 1, min( m - ioffd, n )
9687  DO 30 i = j + ioffd, m
9688  a( i, j ) = alpha * a( i, j )
9689  30 CONTINUE
9690  40 CONTINUE
9691 *
9692  ELSE IF( lsame( uplo, 'U' ) ) THEN
9693 *
9694 * Scales the upper triangular part of the array by ALPHA.
9695 *
9696  mn = min( m - ioffd, n )
9697  DO 60 j = max( 0, -ioffd ) + 1, mn
9698  DO 50 i = 1, j + ioffd
9699  a( i, j ) = alpha * a( i, j )
9700  50 CONTINUE
9701  60 CONTINUE
9702  DO 80 j = max( 0, mn ) + 1, n
9703  DO 70 i = 1, m
9704  a( i, j ) = alpha * a( i, j )
9705  70 CONTINUE
9706  80 CONTINUE
9707 *
9708  ELSE IF( lsame( uplo, 'D' ) ) THEN
9709 *
9710 * Scales the diagonal entries by ALPHA.
9711 *
9712  DO 90 j = max( 0, -ioffd ) + 1, min( m - ioffd, n )
9713  jtmp = j + ioffd
9714  a( jtmp, j ) = alpha * a( jtmp, j )
9715  90 CONTINUE
9716 *
9717  ELSE
9718 *
9719 * Scales the entire array by ALPHA.
9720 *
9721  DO 110 j = 1, n
9722  DO 100 i = 1, m
9723  a( i, j ) = alpha * a( i, j )
9724  100 CONTINUE
9725  110 CONTINUE
9726 *
9727  END IF
9728 *
9729  RETURN
9730 *
9731 * End of PB_DLASCAL
9732 *
9733  END
9734  SUBROUTINE pb_dlagen( UPLO, AFORM, A, LDA, LCMT00, IRAN, MBLKS,
9735  $ IMBLOC, MB, LMBLOC, NBLKS, INBLOC, NB,
9736  $ LNBLOC, JMP, IMULADD )
9738 * -- PBLAS test routine (version 2.0) --
9739 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9740 * and University of California, Berkeley.
9741 * April 1, 1998
9742 *
9743 * .. Scalar Arguments ..
9744  CHARACTER*1 UPLO, AFORM
9745  INTEGER IMBLOC, INBLOC, LCMT00, LDA, LMBLOC, LNBLOC,
9746  $ mb, mblks, nb, nblks
9747 * ..
9748 * .. Array Arguments ..
9749  INTEGER IMULADD( 4, * ), IRAN( * ), JMP( * )
9750  DOUBLE PRECISION A( LDA, * )
9751 * ..
9752 *
9753 * Purpose
9754 * =======
9755 *
9756 * PB_DLAGEN locally initializes an array A.
9757 *
9758 * Arguments
9759 * =========
9760 *
9761 * UPLO (global input) CHARACTER*1
9762 * On entry, UPLO specifies whether the lower (UPLO='L') trape-
9763 * zoidal part or the upper (UPLO='U') trapezoidal part is to be
9764 * generated when the matrix to be generated is symmetric or
9765 * Hermitian. For all the other values of AFORM, the value of
9766 * this input argument is ignored.
9767 *
9768 * AFORM (global input) CHARACTER*1
9769 * On entry, AFORM specifies the type of submatrix to be genera-
9770 * ted as follows:
9771 * AFORM = 'S', sub( A ) is a symmetric matrix,
9772 * AFORM = 'H', sub( A ) is a Hermitian matrix,
9773 * AFORM = 'T', sub( A ) is overrwritten with the transpose
9774 * of what would normally be generated,
9775 * AFORM = 'C', sub( A ) is overwritten with the conjugate
9776 * transpose of what would normally be genera-
9777 * ted.
9778 * AFORM = 'N', a random submatrix is generated.
9779 *
9780 * A (local output) DOUBLE PRECISION array
9781 * On entry, A is an array of dimension (LLD_A, *). On exit,
9782 * this array contains the local entries of the randomly genera-
9783 * ted submatrix sub( A ).
9784 *
9785 * LDA (local input) INTEGER
9786 * On entry, LDA specifies the local leading dimension of the
9787 * array A. LDA must be at least one.
9788 *
9789 * LCMT00 (global input) INTEGER
9790 * On entry, LCMT00 is the LCM value specifying the off-diagonal
9791 * of the underlying matrix of interest. LCMT00=0 specifies the
9792 * main diagonal, LCMT00 > 0 specifies a subdiagonal, LCMT00 < 0
9793 * specifies superdiagonals.
9794 *
9795 * IRAN (local input) INTEGER array
9796 * On entry, IRAN is an array of dimension 2 containing respec-
9797 * tively the 16-lower and 16-higher bits of the encoding of the
9798 * entry of the random sequence corresponding locally to the
9799 * first local array entry to generate. Usually, this array is
9800 * computed by PB_SETLOCRAN.
9801 *
9802 * MBLKS (local input) INTEGER
9803 * On entry, MBLKS specifies the local number of blocks of rows.
9804 * MBLKS is at least zero.
9805 *
9806 * IMBLOC (local input) INTEGER
9807 * On entry, IMBLOC specifies the number of rows (size) of the
9808 * local uppest blocks. IMBLOC is at least zero.
9809 *
9810 * MB (global input) INTEGER
9811 * On entry, MB specifies the blocking factor used to partition
9812 * the rows of the matrix. MB must be at least one.
9813 *
9814 * LMBLOC (local input) INTEGER
9815 * On entry, LMBLOC specifies the number of rows (size) of the
9816 * local lowest blocks. LMBLOC is at least zero.
9817 *
9818 * NBLKS (local input) INTEGER
9819 * On entry, NBLKS specifies the local number of blocks of co-
9820 * lumns. NBLKS is at least zero.
9821 *
9822 * INBLOC (local input) INTEGER
9823 * On entry, INBLOC specifies the number of columns (size) of
9824 * the local leftmost blocks. INBLOC is at least zero.
9825 *
9826 * NB (global input) INTEGER
9827 * On entry, NB specifies the blocking factor used to partition
9828 * the the columns of the matrix. NB must be at least one.
9829 *
9830 * LNBLOC (local input) INTEGER
9831 * On entry, LNBLOC specifies the number of columns (size) of
9832 * the local rightmost blocks. LNBLOC is at least zero.
9833 *
9834 * JMP (local input) INTEGER array
9835 * On entry, JMP is an array of dimension JMP_LEN containing the
9836 * different jump values used by the random matrix generator.
9837 *
9838 * IMULADD (local input) INTEGER array
9839 * On entry, IMULADD is an array of dimension (4, JMP_LEN). The
9840 * jth column of this array contains the encoded initial cons-
9841 * tants a_j and c_j to jump from X( n ) to X( n + JMP( j ) )
9842 * (= a_j * X( n ) + c_j) in the random sequence. IMULADD(1:2,j)
9843 * contains respectively the 16-lower and 16-higher bits of the
9844 * constant a_j, and IMULADD(3:4,j) contains the 16-lower and
9845 * 16-higher bits of the constant c_j.
9846 *
9847 * -- Written on April 1, 1998 by
9848 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
9849 *
9850 * =====================================================================
9851 *
9852 * .. Parameters ..
9853  INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN,
9854  $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB,
9855  $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW
9856  PARAMETER ( JMP_1 = 1, jmp_row = 2, jmp_col = 3,
9857  $ jmp_mb = 4, jmp_imbv = 5, jmp_npmb = 6,
9858  $ jmp_npimbloc = 7, jmp_nb = 8, jmp_inbv = 9,
9859  $ jmp_nqnb = 10, jmp_nqinbloc = 11,
9860  $ jmp_len = 11 )
9861 * ..
9862 * .. Local Scalars ..
9863  INTEGER I, IB, IBLK, II, IK, ITMP, JB, JBLK, JJ, JK,
9864  $ JTMP, LCMTC, LCMTR, LOW, MNB, UPP
9865  DOUBLE PRECISION DUMMY
9866 * ..
9867 * .. Local Arrays ..
9868  INTEGER IB0( 2 ), IB1( 2 ), IB2( 2 ), IB3( 2 )
9869 * ..
9870 * .. External Subroutines ..
9871  EXTERNAL PB_JUMPIT
9872 * ..
9873 * .. External Functions ..
9874  LOGICAL LSAME
9875  DOUBLE PRECISION PB_DRAND
9876  EXTERNAL LSAME, PB_DRAND
9877 * ..
9878 * .. Intrinsic Functions ..
9879  INTRINSIC max, min
9880 * ..
9881 * .. Executable Statements ..
9882 *
9883  DO 10 i = 1, 2
9884  ib1( i ) = iran( i )
9885  ib2( i ) = iran( i )
9886  ib3( i ) = iran( i )
9887  10 CONTINUE
9888 *
9889  IF( lsame( aform, 'N' ) ) THEN
9890 *
9891 * Generate random matrix
9892 *
9893  jj = 1
9894 *
9895  DO 50 jblk = 1, nblks
9896 *
9897  IF( jblk.EQ.1 ) THEN
9898  jb = inbloc
9899  ELSE IF( jblk.EQ.nblks ) THEN
9900  jb = lnbloc
9901  ELSE
9902  jb = nb
9903  END IF
9904 *
9905  DO 40 jk = jj, jj + jb - 1
9906 *
9907  ii = 1
9908 *
9909  DO 30 iblk = 1, mblks
9910 *
9911  IF( iblk.EQ.1 ) THEN
9912  ib = imbloc
9913  ELSE IF( iblk.EQ.mblks ) THEN
9914  ib = lmbloc
9915  ELSE
9916  ib = mb
9917  END IF
9918 *
9919 * Blocks are IB by JB
9920 *
9921  DO 20 ik = ii, ii + ib - 1
9922  a( ik, jk ) = pb_drand( 0 )
9923  20 CONTINUE
9924 *
9925  ii = ii + ib
9926 *
9927  IF( iblk.EQ.1 ) THEN
9928 *
9929 * Jump IMBLOC + ( NPROW - 1 ) * MB rows
9930 *
9931  CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib1,
9932  $ ib0 )
9933 *
9934  ELSE
9935 *
9936 * Jump NPROW * MB rows
9937 *
9938  CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib1, ib0 )
9939 *
9940  END IF
9941 *
9942  ib1( 1 ) = ib0( 1 )
9943  ib1( 2 ) = ib0( 2 )
9944 *
9945  30 CONTINUE
9946 *
9947 * Jump one column
9948 *
9949  CALL pb_jumpit( imuladd( 1, jmp_col ), ib2, ib0 )
9950 *
9951  ib1( 1 ) = ib0( 1 )
9952  ib1( 2 ) = ib0( 2 )
9953  ib2( 1 ) = ib0( 1 )
9954  ib2( 2 ) = ib0( 2 )
9955 *
9956  40 CONTINUE
9957 *
9958  jj = jj + jb
9959 *
9960  IF( jblk.EQ.1 ) THEN
9961 *
9962 * Jump INBLOC + ( NPCOL - 1 ) * NB columns
9963 *
9964  CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib3, ib0 )
9965 *
9966  ELSE
9967 *
9968 * Jump NPCOL * NB columns
9969 *
9970  CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib3, ib0 )
9971 *
9972  END IF
9973 *
9974  ib1( 1 ) = ib0( 1 )
9975  ib1( 2 ) = ib0( 2 )
9976  ib2( 1 ) = ib0( 1 )
9977  ib2( 2 ) = ib0( 2 )
9978  ib3( 1 ) = ib0( 1 )
9979  ib3( 2 ) = ib0( 2 )
9980 *
9981  50 CONTINUE
9982 *
9983  ELSE IF( lsame( aform, 'T' ) .OR. lsame( aform, 'C' ) ) THEN
9984 *
9985 * Generate the transpose of the matrix that would be normally
9986 * generated.
9987 *
9988  ii = 1
9989 *
9990  DO 90 iblk = 1, mblks
9991 *
9992  IF( iblk.EQ.1 ) THEN
9993  ib = imbloc
9994  ELSE IF( iblk.EQ.mblks ) THEN
9995  ib = lmbloc
9996  ELSE
9997  ib = mb
9998  END IF
9999 *
10000  DO 80 ik = ii, ii + ib - 1
10001 *
10002  jj = 1
10003 *
10004  DO 70 jblk = 1, nblks
10005 *
10006  IF( jblk.EQ.1 ) THEN
10007  jb = inbloc
10008  ELSE IF( jblk.EQ.nblks ) THEN
10009  jb = lnbloc
10010  ELSE
10011  jb = nb
10012  END IF
10013 *
10014 * Blocks are IB by JB
10015 *
10016  DO 60 jk = jj, jj + jb - 1
10017  a( ik, jk ) = pb_drand( 0 )
10018  60 CONTINUE
10019 *
10020  jj = jj + jb
10021 *
10022  IF( jblk.EQ.1 ) THEN
10023 *
10024 * Jump INBLOC + ( NPCOL - 1 ) * NB columns
10025 *
10026  CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib1,
10027  $ ib0 )
10028 *
10029  ELSE
10030 *
10031 * Jump NPCOL * NB columns
10032 *
10033  CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib1, ib0 )
10034 *
10035  END IF
10036 *
10037  ib1( 1 ) = ib0( 1 )
10038  ib1( 2 ) = ib0( 2 )
10039 *
10040  70 CONTINUE
10041 *
10042 * Jump one row
10043 *
10044  CALL pb_jumpit( imuladd( 1, jmp_row ), ib2, ib0 )
10045 *
10046  ib1( 1 ) = ib0( 1 )
10047  ib1( 2 ) = ib0( 2 )
10048  ib2( 1 ) = ib0( 1 )
10049  ib2( 2 ) = ib0( 2 )
10050 *
10051  80 CONTINUE
10052 *
10053  ii = ii + ib
10054 *
10055  IF( iblk.EQ.1 ) THEN
10056 *
10057 * Jump IMBLOC + ( NPROW - 1 ) * MB rows
10058 *
10059  CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib3, ib0 )
10060 *
10061  ELSE
10062 *
10063 * Jump NPROW * MB rows
10064 *
10065  CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib3, ib0 )
10066 *
10067  END IF
10068 *
10069  ib1( 1 ) = ib0( 1 )
10070  ib1( 2 ) = ib0( 2 )
10071  ib2( 1 ) = ib0( 1 )
10072  ib2( 2 ) = ib0( 2 )
10073  ib3( 1 ) = ib0( 1 )
10074  ib3( 2 ) = ib0( 2 )
10075 *
10076  90 CONTINUE
10077 *
10078  ELSE IF( ( lsame( aform, 'S' ) ).OR.( lsame( aform, 'H' ) ) ) THEN
10079 *
10080 * Generate a symmetric matrix
10081 *
10082  IF( lsame( uplo, 'L' ) ) THEN
10083 *
10084 * generate lower trapezoidal part
10085 *
10086  jj = 1
10087  lcmtc = lcmt00
10088 *
10089  DO 170 jblk = 1, nblks
10090 *
10091  IF( jblk.EQ.1 ) THEN
10092  jb = inbloc
10093  low = 1 - inbloc
10094  ELSE IF( jblk.EQ.nblks ) THEN
10095  jb = lnbloc
10096  low = 1 - nb
10097  ELSE
10098  jb = nb
10099  low = 1 - nb
10100  END IF
10101 *
10102  DO 160 jk = jj, jj + jb - 1
10103 *
10104  ii = 1
10105  lcmtr = lcmtc
10106 *
10107  DO 150 iblk = 1, mblks
10108 *
10109  IF( iblk.EQ.1 ) THEN
10110  ib = imbloc
10111  upp = imbloc - 1
10112  ELSE IF( iblk.EQ.mblks ) THEN
10113  ib = lmbloc
10114  upp = mb - 1
10115  ELSE
10116  ib = mb
10117  upp = mb - 1
10118  END IF
10119 *
10120 * Blocks are IB by JB
10121 *
10122  IF( lcmtr.GT.upp ) THEN
10123 *
10124  DO 100 ik = ii, ii + ib - 1
10125  dummy = pb_drand( 0 )
10126  100 CONTINUE
10127 *
10128  ELSE IF( lcmtr.GE.low ) THEN
10129 *
10130  jtmp = jk - jj + 1
10131  mnb = max( 0, -lcmtr )
10132 *
10133  IF( jtmp.LE.min( mnb, jb ) ) THEN
10134 *
10135  DO 110 ik = ii, ii + ib - 1
10136  a( ik, jk ) = pb_drand( 0 )
10137  110 CONTINUE
10138 *
10139  ELSE IF( ( jtmp.GE.( mnb + 1 ) ) .AND.
10140  $ ( jtmp.LE.min( ib-lcmtr, jb ) ) ) THEN
10141 *
10142  itmp = ii + jtmp + lcmtr - 1
10143 *
10144  DO 120 ik = ii, itmp - 1
10145  dummy = pb_drand( 0 )
10146  120 CONTINUE
10147 *
10148  DO 130 ik = itmp, ii + ib - 1
10149  a( ik, jk ) = pb_drand( 0 )
10150  130 CONTINUE
10151 *
10152  END IF
10153 *
10154  ELSE
10155 *
10156  DO 140 ik = ii, ii + ib - 1
10157  a( ik, jk ) = pb_drand( 0 )
10158  140 CONTINUE
10159 *
10160  END IF
10161 *
10162  ii = ii + ib
10163 *
10164  IF( iblk.EQ.1 ) THEN
10165 *
10166 * Jump IMBLOC + ( NPROW - 1 ) * MB rows
10167 *
10168  lcmtr = lcmtr - jmp( jmp_npimbloc )
10169  CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib1,
10170  $ ib0 )
10171 *
10172  ELSE
10173 *
10174 * Jump NPROW * MB rows
10175 *
10176  lcmtr = lcmtr - jmp( jmp_npmb )
10177  CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib1,
10178  $ ib0 )
10179 *
10180  END IF
10181 *
10182  ib1( 1 ) = ib0( 1 )
10183  ib1( 2 ) = ib0( 2 )
10184 *
10185  150 CONTINUE
10186 *
10187 * Jump one column
10188 *
10189  CALL pb_jumpit( imuladd( 1, jmp_col ), ib2, ib0 )
10190 *
10191  ib1( 1 ) = ib0( 1 )
10192  ib1( 2 ) = ib0( 2 )
10193  ib2( 1 ) = ib0( 1 )
10194  ib2( 2 ) = ib0( 2 )
10195 *
10196  160 CONTINUE
10197 *
10198  jj = jj + jb
10199 *
10200  IF( jblk.EQ.1 ) THEN
10201 *
10202 * Jump INBLOC + ( NPCOL - 1 ) * NB columns
10203 *
10204  lcmtc = lcmtc + jmp( jmp_nqinbloc )
10205  CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib3, ib0 )
10206 *
10207  ELSE
10208 *
10209 * Jump NPCOL * NB columns
10210 *
10211  lcmtc = lcmtc + jmp( jmp_nqnb )
10212  CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib3, ib0 )
10213 *
10214  END IF
10215 *
10216  ib1( 1 ) = ib0( 1 )
10217  ib1( 2 ) = ib0( 2 )
10218  ib2( 1 ) = ib0( 1 )
10219  ib2( 2 ) = ib0( 2 )
10220  ib3( 1 ) = ib0( 1 )
10221  ib3( 2 ) = ib0( 2 )
10222 *
10223  170 CONTINUE
10224 *
10225  ELSE
10226 *
10227 * generate upper trapezoidal part
10228 *
10229  ii = 1
10230  lcmtr = lcmt00
10231 *
10232  DO 250 iblk = 1, mblks
10233 *
10234  IF( iblk.EQ.1 ) THEN
10235  ib = imbloc
10236  upp = imbloc - 1
10237  ELSE IF( iblk.EQ.mblks ) THEN
10238  ib = lmbloc
10239  upp = mb - 1
10240  ELSE
10241  ib = mb
10242  upp = mb - 1
10243  END IF
10244 *
10245  DO 240 ik = ii, ii + ib - 1
10246 *
10247  jj = 1
10248  lcmtc = lcmtr
10249 *
10250  DO 230 jblk = 1, nblks
10251 *
10252  IF( jblk.EQ.1 ) THEN
10253  jb = inbloc
10254  low = 1 - inbloc
10255  ELSE IF( jblk.EQ.nblks ) THEN
10256  jb = lnbloc
10257  low = 1 - nb
10258  ELSE
10259  jb = nb
10260  low = 1 - nb
10261  END IF
10262 *
10263 * Blocks are IB by JB
10264 *
10265  IF( lcmtc.LT.low ) THEN
10266 *
10267  DO 180 jk = jj, jj + jb - 1
10268  dummy = pb_drand( 0 )
10269  180 CONTINUE
10270 *
10271  ELSE IF( lcmtc.LE.upp ) THEN
10272 *
10273  itmp = ik - ii + 1
10274  mnb = max( 0, lcmtc )
10275 *
10276  IF( itmp.LE.min( mnb, ib ) ) THEN
10277 *
10278  DO 190 jk = jj, jj + jb - 1
10279  a( ik, jk ) = pb_drand( 0 )
10280  190 CONTINUE
10281 *
10282  ELSE IF( ( itmp.GE.( mnb + 1 ) ) .AND.
10283  $ ( itmp.LE.min( jb+lcmtc, ib ) ) ) THEN
10284 *
10285  jtmp = jj + itmp - lcmtc - 1
10286 *
10287  DO 200 jk = jj, jtmp - 1
10288  dummy = pb_drand( 0 )
10289  200 CONTINUE
10290 *
10291  DO 210 jk = jtmp, jj + jb - 1
10292  a( ik, jk ) = pb_drand( 0 )
10293  210 CONTINUE
10294 *
10295  END IF
10296 *
10297  ELSE
10298 *
10299  DO 220 jk = jj, jj + jb - 1
10300  a( ik, jk ) = pb_drand( 0 )
10301  220 CONTINUE
10302 *
10303  END IF
10304 *
10305  jj = jj + jb
10306 *
10307  IF( jblk.EQ.1 ) THEN
10308 *
10309 * Jump INBLOC + ( NPCOL - 1 ) * NB columns
10310 *
10311  lcmtc = lcmtc + jmp( jmp_nqinbloc )
10312  CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib1,
10313  $ ib0 )
10314 *
10315  ELSE
10316 *
10317 * Jump NPCOL * NB columns
10318 *
10319  lcmtc = lcmtc + jmp( jmp_nqnb )
10320  CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib1,
10321  $ ib0 )
10322 *
10323  END IF
10324 *
10325  ib1( 1 ) = ib0( 1 )
10326  ib1( 2 ) = ib0( 2 )
10327 *
10328  230 CONTINUE
10329 *
10330 * Jump one row
10331 *
10332  CALL pb_jumpit( imuladd( 1, jmp_row ), ib2, ib0 )
10333 *
10334  ib1( 1 ) = ib0( 1 )
10335  ib1( 2 ) = ib0( 2 )
10336  ib2( 1 ) = ib0( 1 )
10337  ib2( 2 ) = ib0( 2 )
10338 *
10339  240 CONTINUE
10340 *
10341  ii = ii + ib
10342 *
10343  IF( iblk.EQ.1 ) THEN
10344 *
10345 * Jump IMBLOC + ( NPROW - 1 ) * MB rows
10346 *
10347  lcmtr = lcmtr - jmp( jmp_npimbloc )
10348  CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib3, ib0 )
10349 *
10350  ELSE
10351 *
10352 * Jump NPROW * MB rows
10353 *
10354  lcmtr = lcmtr - jmp( jmp_npmb )
10355  CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib3, ib0 )
10356 *
10357  END IF
10358 *
10359  ib1( 1 ) = ib0( 1 )
10360  ib1( 2 ) = ib0( 2 )
10361  ib2( 1 ) = ib0( 1 )
10362  ib2( 2 ) = ib0( 2 )
10363  ib3( 1 ) = ib0( 1 )
10364  ib3( 2 ) = ib0( 2 )
10365 *
10366  250 CONTINUE
10367 *
10368  END IF
10369 *
10370  END IF
10371 *
10372  RETURN
10373 *
10374 * End of PB_DLAGEN
10375 *
10376  END
10377  DOUBLE PRECISION FUNCTION pb_drand( IDUMM )
10379 * -- PBLAS test routine (version 2.0) --
10380 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
10381 * and University of California, Berkeley.
10382 * April 1, 1998
10383 *
10384 * .. Scalar Arguments ..
10385  INTEGER idumm
10386 * ..
10387 *
10388 * Purpose
10389 * =======
10390 *
10391 * PB_DRAND generates the next number in the random sequence. This func-
10392 * tion ensures that this number will be in the interval ( -1.0, 1.0 ).
10393 *
10394 * Arguments
10395 * =========
10396 *
10397 * IDUMM (local input) INTEGER
10398 * This argument is ignored, but necessary to a FORTRAN 77 func-
10399 * tion.
10400 *
10401 * Further Details
10402 * ===============
10403 *
10404 * On entry, the array IRAND stored in the common block RANCOM contains
10405 * the information (2 integers) required to generate the next number in
10406 * the sequence X( n ). This number is computed as
10407 *
10408 * X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d,
10409 *
10410 * where the constant d is the largest 32 bit positive integer. The
10411 * array IRAND is then updated for the generation of the next number
10412 * X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c.
10413 * The constants a and c should have been preliminarily stored in the
10414 * array IACS as 2 pairs of integers. The initial set up of IRAND and
10415 * IACS is performed by the routine PB_SETRAN.
10416 *
10417 * -- Written on April 1, 1998 by
10418 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
10419 *
10420 * =====================================================================
10421 *
10422 * .. Parameters ..
10423  DOUBLE PRECISION one, two
10424  PARAMETER ( one = 1.0d+0, two = 2.0d+0 )
10425 * ..
10426 * .. External Functions ..
10427  DOUBLE PRECISION pb_dran
10428  EXTERNAL pb_dran
10429 * ..
10430 * .. Executable Statements ..
10431 *
10432  pb_drand = one - two * pb_dran( idumm )
10433 *
10434  RETURN
10435 *
10436 * End of PB_DRAND
10437 *
10438  END
10439  DOUBLE PRECISION FUNCTION pb_dran( IDUMM )
10441 * -- PBLAS test routine (version 2.0) --
10442 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
10443 * and University of California, Berkeley.
10444 * April 1, 1998
10445 *
10446 * .. Scalar Arguments ..
10447  INTEGER idumm
10448 * ..
10449 *
10450 * Purpose
10451 * =======
10452 *
10453 * PB_DRAN generates the next number in the random sequence.
10454 *
10455 * Arguments
10456 * =========
10457 *
10458 * IDUMM (local input) INTEGER
10459 * This argument is ignored, but necessary to a FORTRAN 77 func-
10460 * tion.
10461 *
10462 * Further Details
10463 * ===============
10464 *
10465 * On entry, the array IRAND stored in the common block RANCOM contains
10466 * the information (2 integers) required to generate the next number in
10467 * the sequence X( n ). This number is computed as
10468 *
10469 * X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d,
10470 *
10471 * where the constant d is the largest 32 bit positive integer. The
10472 * array IRAND is then updated for the generation of the next number
10473 * X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c.
10474 * The constants a and c should have been preliminarily stored in the
10475 * array IACS as 2 pairs of integers. The initial set up of IRAND and
10476 * IACS is performed by the routine PB_SETRAN.
10477 *
10478 * -- Written on April 1, 1998 by
10479 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
10480 *
10481 * =====================================================================
10482 *
10483 * .. Parameters ..
10484  DOUBLE PRECISION divfac, pow16
10485  PARAMETER ( divfac = 2.147483648d+9,
10486  $ pow16 = 6.5536d+4 )
10487 * ..
10488 * .. Local Arrays ..
10489  INTEGER j( 2 )
10490 * ..
10491 * .. External Subroutines ..
10492  EXTERNAL pb_ladd, pb_lmul
10493 * ..
10494 * .. Intrinsic Functions ..
10495  INTRINSIC dble
10496 * ..
10497 * .. Common Blocks ..
10498  INTEGER iacs( 4 ), irand( 2 )
10499  common /rancom/ irand, iacs
10500 * ..
10501 * .. Save Statements ..
10502  SAVE /rancom/
10503 * ..
10504 * .. Executable Statements ..
10505 *
10506  pb_dran = ( dble( irand( 1 ) ) + pow16 * dble( irand( 2 ) ) ) /
10507  $ divfac
10508 *
10509  CALL pb_lmul( irand, iacs, j )
10510  CALL pb_ladd( j, iacs( 3 ), irand )
10511 *
10512  RETURN
10513 *
10514 * End of PB_DRAN
10515 *
10516  END
pdlascal
subroutine pdlascal(TYPE, M, N, ALPHA, A, IA, JA, DESCA)
Definition: pdblastst.f:7337
pb_ladd
subroutine pb_ladd(J, K, I)
Definition: pblastst.f:4480
pdcallsub
subroutine pdcallsub(SUBPTR, SCODE)
Definition: pdblastst.f:2180
max
#define max(A, B)
Definition: pcgemr.c:180
pdchkdim
subroutine pdchkdim(ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM, ARGPOS)
Definition: pdblastst.f:759
pb_setlocran
subroutine pb_setlocran(SEED, ILOCBLK, JLOCBLK, ILOCOFF, JLOCOFF, MYRDIST, MYCDIST, NPROW, NPCOL, JMP, IMULADD, IRAN)
Definition: pblastst.f:4302
pdchkopt
subroutine pdchkopt(ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM, ARGPOS)
Definition: pdblastst.f:266
pb_setran
subroutine pb_setran(IRAN, IAC)
Definition: pblastst.f:4759
pdmmch
subroutine pdmmch(ICTXT, TRANSA, TRANSB, M, N, K, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, PC, IC, JC, DESCC, CT, G, ERR, INFO)
Definition: pdblastst.f:5272
pdchkvout
subroutine pdchkvout(N, X, PX, IX, JX, DESCX, INCX, INFO)
Definition: pdblastst.f:2870
pdchkvin
subroutine pdchkvin(ERRMAX, N, X, PX, IX, JX, DESCX, INCX, INFO)
Definition: pdblastst.f:2576
pb_descset2
subroutine pb_descset2(DESC, M, N, IMB, INB, MB, NB, RSRC, CSRC, CTXT, LLD)
Definition: pblastst.f:3172
pddimee
subroutine pddimee(ICTXT, NOUT, SUBPTR, SCODE, SNAME)
Definition: pdblastst.f:455
pb_lmul
subroutine pb_lmul(K, J, I)
Definition: pblastst.f:4559
pdmmch2
subroutine pdmmch2(ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, PC, IC, JC, DESCC, CT, G, ERR, INFO)
Definition: pdblastst.f:5996
pdvecee
subroutine pdvecee(ICTXT, NOUT, SUBPTR, SCODE, SNAME)
Definition: pdblastst.f:936
pdvmch2
subroutine pdvmch2(ICTXT, UPLO, M, N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, A, PA, IA, JA, DESCA, G, ERR, INFO)
Definition: pdblastst.f:4919
lsame
logical function lsame(CA, CB)
Definition: tools.f:1724
pb_pdlaprnt
subroutine pb_pdlaprnt(M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, CMATNM, NOUT, WORK)
Definition: pdblastst.f:8634
pdladom
subroutine pdladom(INPLACE, N, ALPHA, A, IA, JA, DESCA)
Definition: pdblastst.f:8242
dlamch
double precision function dlamch(CMACH)
Definition: tools.f:10
pdchkmat
subroutine pdchkmat(ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM, ARGPOS)
Definition: pdblastst.f:1674
pb_desctrans
subroutine pb_desctrans(DESCIN, DESCOUT)
Definition: pblastst.f:2964
pb_dlaset
subroutine pb_dlaset(UPLO, M, N, IOFFD, ALPHA, BETA, A, LDA)
Definition: pdblastst.f:9359
pdoptee
subroutine pdoptee(ICTXT, NOUT, SUBPTR, SCODE, SNAME)
Definition: pdblastst.f:2
pdmmch1
subroutine pdmmch1(ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, BETA, C, PC, IC, JC, DESCC, CT, G, ERR, INFO)
Definition: pdblastst.f:5649
pdmatee
subroutine pdmatee(ICTXT, NOUT, SUBPTR, SCODE, SNAME)
Definition: pdblastst.f:1190
pb_dlagen
subroutine pb_dlagen(UPLO, AFORM, A, LDA, LCMT00, IRAN, MBLKS, IMBLOC, MB, LMBLOC, NBLKS, INBLOC, NB, LNBLOC, JMP, IMULADD)
Definition: pdblastst.f:9737
pb_dran
double precision function pb_dran(IDUMM)
Definition: pdblastst.f:10440
pb_pdlaprn2
subroutine pb_pdlaprn2(M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, CMATNM, NOUT, PROW, PCOL, WORK)
Definition: pdblastst.f:8848
pderrset
subroutine pderrset(ERR, ERRMAX, XTRUE, X)
Definition: pdblastst.f:2456
pb_jumpit
subroutine pb_jumpit(MULADD, IRANN, IRANM)
Definition: pblastst.f:4822
pdmvch
subroutine pdmvch(ICTXT, TRANS, M, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX, BETA, Y, PY, IY, JY, DESCY, INCY, G, ERR, INFO)
Definition: pdblastst.f:4157
pderraxpby
subroutine pderraxpby(ERRBND, ALPHA, X, BETA, Y, PREC)
Definition: pdblastst.f:6684
pb_infog2l
subroutine pb_infog2l(I, J, DESC, NPROW, NPCOL, MYROW, MYCOL, II, JJ, PROW, PCOL)
Definition: pblastst.f:1673
pdsetpblas
subroutine pdsetpblas(ICTXT)
Definition: pdblastst.f:1478
pb_drand
double precision function pb_drand(IDUMM)
Definition: pdblastst.f:10378
pb_initmuladd
subroutine pb_initmuladd(MULADD0, JMP, IMULADD)
Definition: pblastst.f:4196
pb_ainfog2l
subroutine pb_ainfog2l(M, N, I, J, DESC, NPROW, NPCOL, MYROW, MYCOL, IMB1, INB1, MP, NQ, II, JJ, PROW, PCOL, RPROW, RPCOL)
Definition: pblastst.f:2023
pdlaset
subroutine pdlaset(UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA)
Definition: pdblastst.f:6862
pb_initjmp
subroutine pb_initjmp(COLMAJ, NVIR, IMBVIR, INBVIR, IMBLOC, INBLOC, MB, NB, RSRC, CSRC, NPROW, NPCOL, STRIDE, JMP)
Definition: pblastst.f:4045
pchkpbe
subroutine pchkpbe(ICTXT, NOUT, SNAME, INFOT)
Definition: pblastst.f:1084
pdchkmout
subroutine pdchkmout(M, N, A, PA, IA, JA, DESCA, INFO)
Definition: pdblastst.f:3627
pdlagen
subroutine pdlagen(INPLACE, AFORM, DIAG, OFFA, M, N, IA, JA, DESCA, IASEED, A, LDA)
Definition: pdblastst.f:7845
pb_dchekpad
subroutine pb_dchekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pdblastst.f:9192
pdmmch3
subroutine pdmmch3(UPLO, TRANS, M, N, ALPHA, A, IA, JA, DESCA, BETA, C, PC, IC, JC, DESCC, ERR, INFO)
Definition: pdblastst.f:6372
pb_jump
subroutine pb_jump(K, MULADD, IRANN, IRANM, IMA)
Definition: pblastst.f:4648
pdvprnt
subroutine pdvprnt(ICTXT, NOUT, N, X, INCX, IRPRNT, ICPRNT, CVECNM)
Definition: pdblastst.f:4056
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
pdchkmin
subroutine pdchkmin(ERRMAX, M, N, A, PA, IA, JA, DESCA, INFO)
Definition: pdblastst.f:3326
pb_binfo
subroutine pb_binfo(OFFD, M, N, IMB1, INB1, MB, NB, MRROW, MRCOL, LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, ILOW, LOW, IUPP, UPP)
Definition: pblastst.f:3577
pb_dfillpad
subroutine pb_dfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pdblastst.f:9079
pb_chkmat
subroutine pb_chkmat(ICTXT, M, MPOS0, N, NPOS0, IA, JA, DESCA, DPOS0, INFO)
Definition: pblastst.f:2742
pb_dlascal
subroutine pb_dlascal(UPLO, M, N, IOFFD, ALPHA, A, LDA)
Definition: pdblastst.f:9556
pdlamch
double precision function pdlamch(ICTXT, CMACH)
Definition: pdblastst.f:6769
pb_locinfo
subroutine pb_locinfo(I, INB, NB, MYROC, SRCPROC, NPROCS, ILOCBLK, ILOCOFF, MYDIST)
Definition: pblastst.f:3910
pdmprnt
subroutine pdmprnt(ICTXT, NOUT, M, N, A, LDA, IRPRNT, ICPRNT, CMATNM)
Definition: pdblastst.f:3949
pdvmch
subroutine pdvmch(ICTXT, UPLO, M, N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, A, PA, IA, JA, DESCA, G, ERR, INFO)
Definition: pdblastst.f:4570
min
#define min(A, B)
Definition: pcgemr.c:181