SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pdblastst.f
Go to the documentation of this file.
1 SUBROUTINE pdoptee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
2*
3* -- PBLAS test routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* .. Scalar Arguments ..
9 INTEGER ICTXT, NOUT, SCODE
10* ..
11* .. Array Arguments ..
12 CHARACTER*(*) SNAME
13* ..
14* .. Subroutine Arguments ..
15 EXTERNAL subptr
16* ..
17*
18* Purpose
19* =======
20*
21* PDOPTEE tests whether the PBLAS respond correctly to a bad option
22* argument.
23*
24* Notes
25* =====
26*
27* A description vector is associated with each 2D block-cyclicly dis-
28* tributed matrix. This vector stores the information required to
29* establish the mapping between a matrix entry and its corresponding
30* process and memory location.
31*
32* In the following comments, the character _ should be read as
33* "of the distributed matrix". Let A be a generic term for any 2D
34* block cyclicly distributed matrix. Its description vector is DESCA:
35*
36* NOTATION STORED IN EXPLANATION
37* ---------------- --------------- ------------------------------------
38* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
39* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
40* the NPROW x NPCOL BLACS process grid
41* A is distributed over. The context
42* itself is global, but the handle
43* (the integer value) may vary.
44* M_A (global) DESCA( M_ ) The number of rows in the distribu-
45* ted matrix A, M_A >= 0.
46* N_A (global) DESCA( N_ ) The number of columns in the distri-
47* buted matrix A, N_A >= 0.
48* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
49* block of the matrix A, IMB_A > 0.
50* INB_A (global) DESCA( INB_ ) The number of columns of the upper
51* left block of the matrix A,
52* INB_A > 0.
53* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
54* bute the last M_A-IMB_A rows of A,
55* MB_A > 0.
56* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
57* bute the last N_A-INB_A columns of
58* A, NB_A > 0.
59* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
60* row of the matrix A is distributed,
61* NPROW > RSRC_A >= 0.
62* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
63* first column of A is distributed.
64* NPCOL > CSRC_A >= 0.
65* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
66* array storing the local blocks of
67* the distributed matrix A,
68* IF( Lc( 1, N_A ) > 0 )
69* LLD_A >= MAX( 1, Lr( 1, M_A ) )
70* ELSE
71* LLD_A >= 1.
72*
73* Let K be the number of rows of a matrix A starting at the global in-
74* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
75* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
76* receive if these K rows were distributed over NPROW processes. If K
77* is the number of columns of a matrix A starting at the global index
78* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
79* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
80* these K columns were distributed over NPCOL processes.
81*
82* The values of Lr() and Lc() may be determined via a call to the func-
83* tion PB_NUMROC:
84* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
85* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
86*
87* Arguments
88* =========
89*
90* ICTXT (local input) INTEGER
91* On entry, ICTXT specifies the BLACS context handle, indica-
92* ting the global context of the operation. The context itself
93* is global, but the value of ICTXT is local.
94*
95* NOUT (global input) INTEGER
96* On entry, NOUT specifies the unit number for the output file.
97* When NOUT is 6, output to screen, when NOUT is 0, output to
98* stderr. NOUT is only defined for process 0.
99*
100* SUBPTR (global input) SUBROUTINE
101* On entry, SUBPTR is a subroutine. SUBPTR must be declared
102* EXTERNAL in the calling subroutine.
103*
104* SCODE (global input) INTEGER
105* On entry, SCODE specifies the calling sequence code.
106*
107* SNAME (global input) CHARACTER*(*)
108* On entry, SNAME specifies the subroutine name calling this
109* subprogram.
110*
111* Calling sequence encodings
112* ==========================
113*
114* code Formal argument list Examples
115*
116* 11 (n, v1,v2) _SWAP, _COPY
117* 12 (n,s1, v1 ) _SCAL, _SCAL
118* 13 (n,s1, v1,v2) _AXPY, _DOT_
119* 14 (n,s1,i1,v1 ) _AMAX
120* 15 (n,u1, v1 ) _ASUM, _NRM2
121*
122* 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
123* 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
124* 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
125* 24 ( m,n,s1,v1,v2,m1) _GER_
126* 25 (uplo, n,s1,v1, m1) _SYR
127* 26 (uplo, n,u1,v1, m1) _HER
128* 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
129*
130* 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
131* 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
132* 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
133* 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
134* 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
135* 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
136* 37 ( m,n, s1,m1, s2,m3) _TRAN_
137* 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
138* 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
139* 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
140*
141* -- Written on April 1, 1998 by
142* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
143*
144* =====================================================================
145*
146* .. Local Scalars ..
147 INTEGER APOS
148* ..
149* .. External Subroutines ..
150 EXTERNAL pdchkopt
151* ..
152* .. Executable Statements ..
153*
154* Level 2 PBLAS
155*
156 IF( scode.EQ.21 ) THEN
157*
158* Check 1st (and only) option
159*
160 apos = 1
161 CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
162*
163 ELSE IF( scode.EQ.22 .OR. scode.EQ.25 .OR. scode.EQ.26 .OR.
164 $ scode.EQ.27 ) THEN
165*
166* Check 1st (and only) option
167*
168 apos = 1
169 CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
170*
171 ELSE IF( scode.EQ.23 ) THEN
172*
173* Check 1st option
174*
175 apos = 1
176 CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
177*
178* Check 2nd option
179*
180 apos = 2
181 CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
182*
183* Check 3rd option
184*
185 apos = 3
186 CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'D', apos )
187*
188* Level 3 PBLAS
189*
190 ELSE IF( scode.EQ.31 ) THEN
191*
192* Check 1st option
193*
194 apos = 1
195 CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
196*
197* Check 2'nd option
198*
199 apos = 2
200 CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'B', apos )
201*
202 ELSE IF( scode.EQ.32 ) THEN
203*
204* Check 1st option
205*
206 apos = 1
207 CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'S', apos )
208*
209* Check 2nd option
210*
211 apos = 2
212 CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
213*
214 ELSE IF( scode.EQ.33 .OR. scode.EQ.34 .OR. scode.EQ.35 .OR.
215 $ scode.EQ.36 .OR. scode.EQ.40 ) THEN
216*
217* Check 1st option
218*
219 apos = 1
220 CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
221*
222* Check 2'nd option
223*
224 apos = 2
225 CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
226*
227 ELSE IF( scode.EQ.38 ) THEN
228*
229* Check 1st option
230*
231 apos = 1
232 CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'S', apos )
233*
234* Check 2nd option
235*
236 apos = 2
237 CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
238*
239* Check 3rd option
240*
241 apos = 3
242 CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
243*
244* Check 4th option
245*
246 apos = 4
247 CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'D', apos )
248*
249*
250 ELSE IF( scode.EQ.39 ) THEN
251*
252* Check 1st option
253*
254 apos = 1
255 CALL pdchkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
256*
257 END IF
258*
259 RETURN
260*
261* End of PDOPTEE
262*
263 END
264 SUBROUTINE pdchkopt( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM,
265 $ ARGPOS )
266*
267* -- PBLAS test routine (version 2.0) --
268* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
269* and University of California, Berkeley.
270* April 1, 1998
271*
272* .. Scalar Arguments ..
273 CHARACTER*1 ARGNAM
274 INTEGER ARGPOS, ICTXT, NOUT, SCODE
275* ..
276* .. Array Arguments ..
277 CHARACTER*(*) SNAME
278* ..
279* .. Subroutine Arguments ..
280 EXTERNAL subptr
281* ..
282*
283* Purpose
284* =======
285*
286* PDCHKOPT tests the option ARGNAM in any PBLAS routine.
287*
288* Notes
289* =====
290*
291* A description vector is associated with each 2D block-cyclicly dis-
292* tributed matrix. This vector stores the information required to
293* establish the mapping between a matrix entry and its corresponding
294* process and memory location.
295*
296* In the following comments, the character _ should be read as
297* "of the distributed matrix". Let A be a generic term for any 2D
298* block cyclicly distributed matrix. Its description vector is DESCA:
299*
300* NOTATION STORED IN EXPLANATION
301* ---------------- --------------- ------------------------------------
302* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
303* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
304* the NPROW x NPCOL BLACS process grid
305* A is distributed over. The context
306* itself is global, but the handle
307* (the integer value) may vary.
308* M_A (global) DESCA( M_ ) The number of rows in the distribu-
309* ted matrix A, M_A >= 0.
310* N_A (global) DESCA( N_ ) The number of columns in the distri-
311* buted matrix A, N_A >= 0.
312* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
313* block of the matrix A, IMB_A > 0.
314* INB_A (global) DESCA( INB_ ) The number of columns of the upper
315* left block of the matrix A,
316* INB_A > 0.
317* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
318* bute the last M_A-IMB_A rows of A,
319* MB_A > 0.
320* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
321* bute the last N_A-INB_A columns of
322* A, NB_A > 0.
323* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
324* row of the matrix A is distributed,
325* NPROW > RSRC_A >= 0.
326* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
327* first column of A is distributed.
328* NPCOL > CSRC_A >= 0.
329* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
330* array storing the local blocks of
331* the distributed matrix A,
332* IF( Lc( 1, N_A ) > 0 )
333* LLD_A >= MAX( 1, Lr( 1, M_A ) )
334* ELSE
335* LLD_A >= 1.
336*
337* Let K be the number of rows of a matrix A starting at the global in-
338* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
339* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
340* receive if these K rows were distributed over NPROW processes. If K
341* is the number of columns of a matrix A starting at the global index
342* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
343* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
344* these K columns were distributed over NPCOL processes.
345*
346* The values of Lr() and Lc() may be determined via a call to the func-
347* tion PB_NUMROC:
348* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
349* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
350*
351* Arguments
352* =========
353*
354* ICTXT (local input) INTEGER
355* On entry, ICTXT specifies the BLACS context handle, indica-
356* ting the global context of the operation. The context itself
357* is global, but the value of ICTXT is local.
358*
359* NOUT (global input) INTEGER
360* On entry, NOUT specifies the unit number for the output file.
361* When NOUT is 6, output to screen, when NOUT is 0, output to
362* stderr. NOUT is only defined for process 0.
363*
364* SUBPTR (global input) SUBROUTINE
365* On entry, SUBPTR is a subroutine. SUBPTR must be declared
366* EXTERNAL in the calling subroutine.
367*
368* SCODE (global input) INTEGER
369* On entry, SCODE specifies the calling sequence code.
370*
371* SNAME (global input) CHARACTER*(*)
372* On entry, SNAME specifies the subroutine name calling this
373* subprogram.
374*
375* ARGNAM (global input) CHARACTER*(*)
376* On entry, ARGNAM specifies the name of the option to be
377* checked. ARGNAM can either be 'D', 'S', 'A', 'B', or 'U'.
378*
379* ARGPOS (global input) INTEGER
380* On entry, ARGPOS indicates the position of the option ARGNAM
381* to be tested.
382*
383* -- Written on April 1, 1998 by
384* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
385*
386* =====================================================================
387*
388* .. Local Scalars ..
389 INTEGER INFOT
390* ..
391* .. External Subroutines ..
392 EXTERNAL pchkpbe, pdcallsub, pdsetpblas
393* ..
394* .. External Functions ..
395 LOGICAL LSAME
396 EXTERNAL lsame
397* ..
398* .. Common Blocks ..
399 CHARACTER DIAG, SIDE, TRANSA, TRANSB, UPLO
400 COMMON /pblasc/diag, side, transa, transb, uplo
401* ..
402* .. Executable Statements ..
403*
404* Reiniatilize the dummy arguments to correct values
405*
406 CALL pdsetpblas( ictxt )
407*
408 IF( lsame( argnam, 'D' ) ) THEN
409*
410* Generate bad DIAG option
411*
412 diag = '/'
413*
414 ELSE IF( lsame( argnam, 'S' ) ) THEN
415*
416* Generate bad SIDE option
417*
418 side = '/'
419*
420 ELSE IF( lsame( argnam, 'A' ) ) THEN
421*
422* Generate bad TRANSA option
423*
424 transa = '/'
425*
426 ELSE IF( lsame( argnam, 'B' ) ) THEN
427*
428* Generate bad TRANSB option
429*
430 transb = '/'
431*
432 ELSE IF( lsame( argnam, 'U' ) ) THEN
433*
434* Generate bad UPLO option
435*
436 uplo = '/'
437*
438 END IF
439*
440* Set INFOT to the position of the bad dimension argument
441*
442 infot = argpos
443*
444* Call the PBLAS routine
445*
446 CALL pdcallsub( subptr, scode )
447 CALL pchkpbe( ictxt, nout, sname, infot )
448*
449 RETURN
450*
451* End of PDCHKOPT
452*
453 END
454 SUBROUTINE pddimee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
455*
456* -- PBLAS test routine (version 2.0) --
457* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
458* and University of California, Berkeley.
459* April 1, 1998
460*
461* .. Scalar Arguments ..
462 INTEGER ICTXT, NOUT, SCODE
463* ..
464* .. Array Arguments ..
465 CHARACTER*(*) SNAME
466* ..
467* .. Subroutine Arguments ..
468 EXTERNAL subptr
469* ..
470*
471* Purpose
472* =======
473*
474* PDDIMEE tests whether the PBLAS respond correctly to a bad dimension
475* argument.
476*
477* Notes
478* =====
479*
480* A description vector is associated with each 2D block-cyclicly dis-
481* tributed matrix. This vector stores the information required to
482* establish the mapping between a matrix entry and its corresponding
483* process and memory location.
484*
485* In the following comments, the character _ should be read as
486* "of the distributed matrix". Let A be a generic term for any 2D
487* block cyclicly distributed matrix. Its description vector is DESCA:
488*
489* NOTATION STORED IN EXPLANATION
490* ---------------- --------------- ------------------------------------
491* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
492* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
493* the NPROW x NPCOL BLACS process grid
494* A is distributed over. The context
495* itself is global, but the handle
496* (the integer value) may vary.
497* M_A (global) DESCA( M_ ) The number of rows in the distribu-
498* ted matrix A, M_A >= 0.
499* N_A (global) DESCA( N_ ) The number of columns in the distri-
500* buted matrix A, N_A >= 0.
501* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
502* block of the matrix A, IMB_A > 0.
503* INB_A (global) DESCA( INB_ ) The number of columns of the upper
504* left block of the matrix A,
505* INB_A > 0.
506* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
507* bute the last M_A-IMB_A rows of A,
508* MB_A > 0.
509* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
510* bute the last N_A-INB_A columns of
511* A, NB_A > 0.
512* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
513* row of the matrix A is distributed,
514* NPROW > RSRC_A >= 0.
515* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
516* first column of A is distributed.
517* NPCOL > CSRC_A >= 0.
518* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
519* array storing the local blocks of
520* the distributed matrix A,
521* IF( Lc( 1, N_A ) > 0 )
522* LLD_A >= MAX( 1, Lr( 1, M_A ) )
523* ELSE
524* LLD_A >= 1.
525*
526* Let K be the number of rows of a matrix A starting at the global in-
527* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
528* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
529* receive if these K rows were distributed over NPROW processes. If K
530* is the number of columns of a matrix A starting at the global index
531* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
532* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
533* these K columns were distributed over NPCOL processes.
534*
535* The values of Lr() and Lc() may be determined via a call to the func-
536* tion PB_NUMROC:
537* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
538* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
539*
540* Arguments
541* =========
542*
543* ICTXT (local input) INTEGER
544* On entry, ICTXT specifies the BLACS context handle, indica-
545* ting the global context of the operation. The context itself
546* is global, but the value of ICTXT is local.
547*
548* NOUT (global input) INTEGER
549* On entry, NOUT specifies the unit number for the output file.
550* When NOUT is 6, output to screen, when NOUT is 0, output to
551* stderr. NOUT is only defined for process 0.
552*
553* SUBPTR (global input) SUBROUTINE
554* On entry, SUBPTR is a subroutine. SUBPTR must be declared
555* EXTERNAL in the calling subroutine.
556*
557* SCODE (global input) INTEGER
558* On entry, SCODE specifies the calling sequence code.
559*
560* SNAME (global input) CHARACTER*(*)
561* On entry, SNAME specifies the subroutine name calling this
562* subprogram.
563*
564* Calling sequence encodings
565* ==========================
566*
567* code Formal argument list Examples
568*
569* 11 (n, v1,v2) _SWAP, _COPY
570* 12 (n,s1, v1 ) _SCAL, _SCAL
571* 13 (n,s1, v1,v2) _AXPY, _DOT_
572* 14 (n,s1,i1,v1 ) _AMAX
573* 15 (n,u1, v1 ) _ASUM, _NRM2
574*
575* 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
576* 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
577* 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
578* 24 ( m,n,s1,v1,v2,m1) _GER_
579* 25 (uplo, n,s1,v1, m1) _SYR
580* 26 (uplo, n,u1,v1, m1) _HER
581* 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
582*
583* 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
584* 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
585* 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
586* 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
587* 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
588* 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
589* 37 ( m,n, s1,m1, s2,m3) _TRAN_
590* 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
591* 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
592* 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
593*
594* -- Written on April 1, 1998 by
595* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
596*
597* =====================================================================
598*
599* .. Local Scalars ..
600 INTEGER APOS
601* ..
602* .. External Subroutines ..
603 EXTERNAL pdchkdim
604* ..
605* .. Executable Statements ..
606*
607* Level 1 PBLAS
608*
609 IF( scode.EQ.11 .OR. scode.EQ.12 .OR. scode.EQ.13 .OR.
610 $ scode.EQ.14 .OR. scode.EQ.15 ) THEN
611*
612* Check 1st (and only) dimension
613*
614 apos = 1
615 CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
616*
617* Level 2 PBLAS
618*
619 ELSE IF( scode.EQ.21 ) THEN
620*
621* Check 1st dimension
622*
623 apos = 2
624 CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
625*
626* Check 2nd dimension
627*
628 apos = 3
629 CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
630*
631 ELSE IF( scode.EQ.22 .OR. scode.EQ.25 .OR. scode.EQ.26 .OR.
632 $ scode.EQ.27 ) THEN
633*
634* Check 1st (and only) dimension
635*
636 apos = 2
637 CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
638*
639 ELSE IF( scode.EQ.23 ) THEN
640*
641* Check 1st (and only) dimension
642*
643 apos = 4
644 CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
645*
646 ELSE IF( scode.EQ.24 ) THEN
647*
648* Check 1st dimension
649*
650 apos = 1
651 CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
652*
653* Check 2nd dimension
654*
655 apos = 2
656 CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
657*
658* Level 3 PBLAS
659*
660 ELSE IF( scode.EQ.31 ) THEN
661*
662* Check 1st dimension
663*
664 apos = 3
665 CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
666*
667* Check 2nd dimension
668*
669 apos = 4
670 CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
671*
672* Check 3rd dimension
673*
674 apos = 5
675 CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'K', apos )
676*
677 ELSE IF( scode.EQ.32 ) THEN
678*
679* Check 1st dimension
680*
681 apos = 3
682 CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
683*
684* Check 2nd dimension
685*
686 apos = 4
687 CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
688*
689 ELSE IF( scode.EQ.33 .OR. scode.EQ.34 .OR. scode.EQ.35 .OR.
690 $ scode.EQ.36 ) THEN
691*
692* Check 1st dimension
693*
694 apos = 3
695 CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
696*
697* Check 2nd dimension
698*
699 apos = 4
700 CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'K', apos )
701*
702 ELSE IF( scode.EQ.37 ) THEN
703*
704* Check 1st dimension
705*
706 apos = 1
707 CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
708*
709* Check 2nd dimension
710*
711 apos = 2
712 CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
713*
714 ELSE IF( scode.EQ.38 ) THEN
715*
716* Check 1st dimension
717*
718 apos = 5
719 CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
720*
721* Check 2nd dimension
722*
723 apos = 6
724 CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
725*
726 ELSE IF( scode.EQ.39 ) THEN
727*
728* Check 1st dimension
729*
730 apos = 2
731 CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
732*
733* Check 2nd dimension
734*
735 apos = 3
736 CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
737*
738 ELSE IF( scode.EQ.40 ) THEN
739*
740* Check 1st dimension
741*
742 apos = 3
743 CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
744*
745* Check 2nd dimension
746*
747 apos = 4
748 CALL pdchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
749*
750 END IF
751*
752 RETURN
753*
754* End of PDDIMEE
755*
756 END
757 SUBROUTINE pdchkdim( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM,
758 $ ARGPOS )
759*
760* -- PBLAS test routine (version 2.0) --
761* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
762* and University of California, Berkeley.
763* April 1, 1998
764*
765* .. Scalar Arguments ..
766 CHARACTER*1 ARGNAM
767 INTEGER ARGPOS, ICTXT, NOUT, SCODE
768* ..
769* .. Array Arguments ..
770 CHARACTER*(*) SNAME
771* ..
772* .. Subroutine Arguments ..
773 EXTERNAL subptr
774* ..
775*
776* Purpose
777* =======
778*
779* PDCHKDIM tests the dimension ARGNAM in any PBLAS routine.
780*
781* Notes
782* =====
783*
784* A description vector is associated with each 2D block-cyclicly dis-
785* tributed matrix. This vector stores the information required to
786* establish the mapping between a matrix entry and its corresponding
787* process and memory location.
788*
789* In the following comments, the character _ should be read as
790* "of the distributed matrix". Let A be a generic term for any 2D
791* block cyclicly distributed matrix. Its description vector is DESCA:
792*
793* NOTATION STORED IN EXPLANATION
794* ---------------- --------------- ------------------------------------
795* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
796* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
797* the NPROW x NPCOL BLACS process grid
798* A is distributed over. The context
799* itself is global, but the handle
800* (the integer value) may vary.
801* M_A (global) DESCA( M_ ) The number of rows in the distribu-
802* ted matrix A, M_A >= 0.
803* N_A (global) DESCA( N_ ) The number of columns in the distri-
804* buted matrix A, N_A >= 0.
805* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
806* block of the matrix A, IMB_A > 0.
807* INB_A (global) DESCA( INB_ ) The number of columns of the upper
808* left block of the matrix A,
809* INB_A > 0.
810* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
811* bute the last M_A-IMB_A rows of A,
812* MB_A > 0.
813* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
814* bute the last N_A-INB_A columns of
815* A, NB_A > 0.
816* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
817* row of the matrix A is distributed,
818* NPROW > RSRC_A >= 0.
819* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
820* first column of A is distributed.
821* NPCOL > CSRC_A >= 0.
822* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
823* array storing the local blocks of
824* the distributed matrix A,
825* IF( Lc( 1, N_A ) > 0 )
826* LLD_A >= MAX( 1, Lr( 1, M_A ) )
827* ELSE
828* LLD_A >= 1.
829*
830* Let K be the number of rows of a matrix A starting at the global in-
831* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
832* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
833* receive if these K rows were distributed over NPROW processes. If K
834* is the number of columns of a matrix A starting at the global index
835* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
836* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
837* these K columns were distributed over NPCOL processes.
838*
839* The values of Lr() and Lc() may be determined via a call to the func-
840* tion PB_NUMROC:
841* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
842* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
843*
844* Arguments
845* =========
846*
847* ICTXT (local input) INTEGER
848* On entry, ICTXT specifies the BLACS context handle, indica-
849* ting the global context of the operation. The context itself
850* is global, but the value of ICTXT is local.
851*
852* NOUT (global input) INTEGER
853* On entry, NOUT specifies the unit number for the output file.
854* When NOUT is 6, output to screen, when NOUT is 0, output to
855* stderr. NOUT is only defined for process 0.
856*
857* SUBPTR (global input) SUBROUTINE
858* On entry, SUBPTR is a subroutine. SUBPTR must be declared
859* EXTERNAL in the calling subroutine.
860*
861* SCODE (global input) INTEGER
862* On entry, SCODE specifies the calling sequence code.
863*
864* SNAME (global input) CHARACTER*(*)
865* On entry, SNAME specifies the subroutine name calling this
866* subprogram.
867*
868* ARGNAM (global input) CHARACTER*(*)
869* On entry, ARGNAM specifies the name of the dimension to be
870* checked. ARGNAM can either be 'M', 'N' or 'K'.
871*
872* ARGPOS (global input) INTEGER
873* On entry, ARGPOS indicates the position of the option ARGNAM
874* to be tested.
875*
876* -- Written on April 1, 1998 by
877* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
878*
879* =====================================================================
880*
881* .. Local Scalars ..
882 INTEGER INFOT
883* ..
884* .. External Subroutines ..
885 EXTERNAL pchkpbe, pdcallsub, pdsetpblas
886* ..
887* .. External Functions ..
888 LOGICAL LSAME
889 EXTERNAL LSAME
890* ..
891* .. Common Blocks ..
892 INTEGER KDIM, MDIM, NDIM
893 COMMON /PBLASN/KDIM, MDIM, NDIM
894* ..
895* .. Executable Statements ..
896*
897* Reiniatilize the dummy arguments to correct values
898*
899 CALL pdsetpblas( ictxt )
900*
901 IF( lsame( argnam, 'M' ) ) THEN
902*
903* Generate bad MDIM
904*
905 mdim = -1
906*
907 ELSE IF( lsame( argnam, 'N' ) ) THEN
908*
909* Generate bad NDIM
910*
911 ndim = -1
912*
913 ELSE
914*
915* Generate bad KDIM
916*
917 kdim = -1
918*
919 END IF
920*
921* Set INFOT to the position of the bad dimension argument
922*
923 infot = argpos
924*
925* Call the PBLAS routine
926*
927 CALL pdcallsub( subptr, scode )
928 CALL pchkpbe( ictxt, nout, sname, infot )
929*
930 RETURN
931*
932* End of PDCHKDIM
933*
934 END
935 SUBROUTINE pdvecee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
936*
937* -- PBLAS test routine (version 2.0) --
938* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
939* and University of California, Berkeley.
940* April 1, 1998
941*
942* .. Scalar Arguments ..
943 INTEGER ICTXT, NOUT, SCODE
944* ..
945* .. Array Arguments ..
946 CHARACTER*7 SNAME
947* ..
948* .. Subroutine Arguments ..
949 EXTERNAL subptr
950* ..
951*
952* Purpose
953* =======
954*
955* PDVECEE tests whether the PBLAS respond correctly to a bad vector
956* argument. Each vector <vec> is described by: <vec>, I<vec>, J<vec>,
957* DESC<vec>, INC<vec>. Out of all these, only I<vec>, J<vec>,
958* DESC<vec>, and INC<vec> can be tested.
959*
960* Notes
961* =====
962*
963* A description vector is associated with each 2D block-cyclicly dis-
964* tributed matrix. This vector stores the information required to
965* establish the mapping between a matrix entry and its corresponding
966* process and memory location.
967*
968* In the following comments, the character _ should be read as
969* "of the distributed matrix". Let A be a generic term for any 2D
970* block cyclicly distributed matrix. Its description vector is DESCA:
971*
972* NOTATION STORED IN EXPLANATION
973* ---------------- --------------- ------------------------------------
974* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
975* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
976* the NPROW x NPCOL BLACS process grid
977* A is distributed over. The context
978* itself is global, but the handle
979* (the integer value) may vary.
980* M_A (global) DESCA( M_ ) The number of rows in the distribu-
981* ted matrix A, M_A >= 0.
982* N_A (global) DESCA( N_ ) The number of columns in the distri-
983* buted matrix A, N_A >= 0.
984* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
985* block of the matrix A, IMB_A > 0.
986* INB_A (global) DESCA( INB_ ) The number of columns of the upper
987* left block of the matrix A,
988* INB_A > 0.
989* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
990* bute the last M_A-IMB_A rows of A,
991* MB_A > 0.
992* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
993* bute the last N_A-INB_A columns of
994* A, NB_A > 0.
995* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
996* row of the matrix A is distributed,
997* NPROW > RSRC_A >= 0.
998* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
999* first column of A is distributed.
1000* NPCOL > CSRC_A >= 0.
1001* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1002* array storing the local blocks of
1003* the distributed matrix A,
1004* IF( Lc( 1, N_A ) > 0 )
1005* LLD_A >= MAX( 1, Lr( 1, M_A ) )
1006* ELSE
1007* LLD_A >= 1.
1008*
1009* Let K be the number of rows of a matrix A starting at the global in-
1010* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1011* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1012* receive if these K rows were distributed over NPROW processes. If K
1013* is the number of columns of a matrix A starting at the global index
1014* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1015* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1016* these K columns were distributed over NPCOL processes.
1017*
1018* The values of Lr() and Lc() may be determined via a call to the func-
1019* tion PB_NUMROC:
1020* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1021* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1022*
1023* Arguments
1024* =========
1025*
1026* ICTXT (local input) INTEGER
1027* On entry, ICTXT specifies the BLACS context handle, indica-
1028* ting the global context of the operation. The context itself
1029* is global, but the value of ICTXT is local.
1030*
1031* NOUT (global input) INTEGER
1032* On entry, NOUT specifies the unit number for the output file.
1033* When NOUT is 6, output to screen, when NOUT is 0, output to
1034* stderr. NOUT is only defined for process 0.
1035*
1036* SUBPTR (global input) SUBROUTINE
1037* On entry, SUBPTR is a subroutine. SUBPTR must be declared
1038* EXTERNAL in the calling subroutine.
1039*
1040* SCODE (global input) INTEGER
1041* On entry, SCODE specifies the calling sequence code.
1042*
1043* SNAME (global input) CHARACTER*(*)
1044* On entry, SNAME specifies the subroutine name calling this
1045* subprogram.
1046*
1047* Calling sequence encodings
1048* ==========================
1049*
1050* code Formal argument list Examples
1051*
1052* 11 (n, v1,v2) _SWAP, _COPY
1053* 12 (n,s1, v1 ) _SCAL, _SCAL
1054* 13 (n,s1, v1,v2) _AXPY, _DOT_
1055* 14 (n,s1,i1,v1 ) _AMAX
1056* 15 (n,u1, v1 ) _ASUM, _NRM2
1057*
1058* 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
1059* 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
1060* 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
1061* 24 ( m,n,s1,v1,v2,m1) _GER_
1062* 25 (uplo, n,s1,v1, m1) _SYR
1063* 26 (uplo, n,u1,v1, m1) _HER
1064* 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
1065*
1066* 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
1067* 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
1068* 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
1069* 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
1070* 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
1071* 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
1072* 37 ( m,n, s1,m1, s2,m3) _TRAN_
1073* 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
1074* 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
1075* 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
1076*
1077* -- Written on April 1, 1998 by
1078* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1079*
1080* =====================================================================
1081*
1082* .. Local Scalars ..
1083 INTEGER APOS
1084* ..
1085* .. External Subroutines ..
1086 EXTERNAL pdchkmat
1087* ..
1088* .. Executable Statements ..
1089*
1090* Level 1 PBLAS
1091*
1092 IF( scode.EQ.11 ) THEN
1093*
1094* Check 1st vector
1095*
1096 apos = 2
1097 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1098*
1099* Check 2nd vector
1100*
1101 apos = 7
1102 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1103*
1104 ELSE IF( scode.EQ.12 .OR. scode.EQ.15 ) THEN
1105*
1106* Check 1st (and only) vector
1107*
1108 apos = 3
1109 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1110*
1111 ELSE IF( scode.EQ.13 ) THEN
1112*
1113* Check 1st vector
1114*
1115 apos = 3
1116 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1117*
1118* Check 2nd vector
1119*
1120 apos = 8
1121 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1122*
1123 ELSE IF( scode.EQ.14 ) THEN
1124*
1125* Check 1st (and only) vector
1126*
1127 apos = 4
1128 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1129*
1130* Level 2 PBLAS
1131*
1132 ELSE IF( scode.EQ.21 ) THEN
1133*
1134* Check 1st vector
1135*
1136 apos = 9
1137 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1138*
1139* Check 2nd vector
1140*
1141 apos = 15
1142 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1143*
1144 ELSE IF( scode.EQ.22 ) THEN
1145*
1146* Check 1st vector
1147*
1148 apos = 8
1149 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1150*
1151* Check 2nd vector
1152*
1153 apos = 14
1154 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1155*
1156 ELSE IF( scode.EQ.23 ) THEN
1157*
1158* Check 1st (and only) vector
1159*
1160 apos = 9
1161 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1162*
1163 ELSE IF( scode.EQ.24 .OR. scode.EQ.27 ) THEN
1164*
1165* Check 1st vector
1166*
1167 apos = 4
1168 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1169*
1170* Check 2nd vector
1171*
1172 apos = 9
1173 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1174*
1175 ELSE IF( scode.EQ.26 .OR. scode.EQ.27 ) THEN
1176*
1177* Check 1'st (and only) vector
1178*
1179 apos = 4
1180 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1181*
1182 END IF
1183*
1184 RETURN
1185*
1186* End of PDVECEE
1187*
1188 END
1189 SUBROUTINE pdmatee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
1190*
1191* -- PBLAS test routine (version 2.0) --
1192* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
1193* and University of California, Berkeley.
1194* April 1, 1998
1195*
1196* .. Scalar Arguments ..
1197 INTEGER ICTXT, NOUT, SCODE
1198* ..
1199* .. Array Arguments ..
1200 CHARACTER*7 SNAME
1201* ..
1202* .. Subroutine Arguments ..
1203 EXTERNAL subptr
1204* ..
1205*
1206* Purpose
1207* =======
1208*
1209* PDMATEE tests whether the PBLAS respond correctly to a bad matrix
1210* argument. Each matrix <mat> is described by: <mat>, I<mat>, J<mat>,
1211* and DESC<mat>. Out of all these, only I<vec>, J<vec> and DESC<mat>
1212* can be tested.
1213*
1214* Notes
1215* =====
1216*
1217* A description vector is associated with each 2D block-cyclicly dis-
1218* tributed matrix. This vector stores the information required to
1219* establish the mapping between a matrix entry and its corresponding
1220* process and memory location.
1221*
1222* In the following comments, the character _ should be read as
1223* "of the distributed matrix". Let A be a generic term for any 2D
1224* block cyclicly distributed matrix. Its description vector is DESCA:
1225*
1226* NOTATION STORED IN EXPLANATION
1227* ---------------- --------------- ------------------------------------
1228* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
1229* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
1230* the NPROW x NPCOL BLACS process grid
1231* A is distributed over. The context
1232* itself is global, but the handle
1233* (the integer value) may vary.
1234* M_A (global) DESCA( M_ ) The number of rows in the distribu-
1235* ted matrix A, M_A >= 0.
1236* N_A (global) DESCA( N_ ) The number of columns in the distri-
1237* buted matrix A, N_A >= 0.
1238* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
1239* block of the matrix A, IMB_A > 0.
1240* INB_A (global) DESCA( INB_ ) The number of columns of the upper
1241* left block of the matrix A,
1242* INB_A > 0.
1243* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
1244* bute the last M_A-IMB_A rows of A,
1245* MB_A > 0.
1246* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
1247* bute the last N_A-INB_A columns of
1248* A, NB_A > 0.
1249* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
1250* row of the matrix A is distributed,
1251* NPROW > RSRC_A >= 0.
1252* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
1253* first column of A is distributed.
1254* NPCOL > CSRC_A >= 0.
1255* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1256* array storing the local blocks of
1257* the distributed matrix A,
1258* IF( Lc( 1, N_A ) > 0 )
1259* LLD_A >= MAX( 1, Lr( 1, M_A ) )
1260* ELSE
1261* LLD_A >= 1.
1262*
1263* Let K be the number of rows of a matrix A starting at the global in-
1264* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1265* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1266* receive if these K rows were distributed over NPROW processes. If K
1267* is the number of columns of a matrix A starting at the global index
1268* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1269* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1270* these K columns were distributed over NPCOL processes.
1271*
1272* The values of Lr() and Lc() may be determined via a call to the func-
1273* tion PB_NUMROC:
1274* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1275* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1276*
1277* Arguments
1278* =========
1279*
1280* ICTXT (local input) INTEGER
1281* On entry, ICTXT specifies the BLACS context handle, indica-
1282* ting the global context of the operation. The context itself
1283* is global, but the value of ICTXT is local.
1284*
1285* NOUT (global input) INTEGER
1286* On entry, NOUT specifies the unit number for the output file.
1287* When NOUT is 6, output to screen, when NOUT is 0, output to
1288* stderr. NOUT is only defined for process 0.
1289*
1290* SUBPTR (global input) SUBROUTINE
1291* On entry, SUBPTR is a subroutine. SUBPTR must be declared
1292* EXTERNAL in the calling subroutine.
1293*
1294* SCODE (global input) INTEGER
1295* On entry, SCODE specifies the calling sequence code.
1296*
1297* SNAME (global input) CHARACTER*(*)
1298* On entry, SNAME specifies the subroutine name calling this
1299* subprogram.
1300*
1301* Calling sequence encodings
1302* ==========================
1303*
1304* code Formal argument list Examples
1305*
1306* 11 (n, v1,v2) _SWAP, _COPY
1307* 12 (n,s1, v1 ) _SCAL, _SCAL
1308* 13 (n,s1, v1,v2) _AXPY, _DOT_
1309* 14 (n,s1,i1,v1 ) _AMAX
1310* 15 (n,u1, v1 ) _ASUM, _NRM2
1311*
1312* 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
1313* 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
1314* 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
1315* 24 ( m,n,s1,v1,v2,m1) _GER_
1316* 25 (uplo, n,s1,v1, m1) _SYR
1317* 26 (uplo, n,u1,v1, m1) _HER
1318* 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
1319*
1320* 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
1321* 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
1322* 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
1323* 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
1324* 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
1325* 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
1326* 37 ( m,n, s1,m1, s2,m3) _TRAN_
1327* 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
1328* 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
1329* 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
1330*
1331* -- Written on April 1, 1998 by
1332* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1333*
1334* =====================================================================
1335*
1336* .. Local Scalars ..
1337 INTEGER APOS
1338* ..
1339* .. External Subroutines ..
1340 EXTERNAL pdchkmat
1341* ..
1342* .. Executable Statements ..
1343*
1344* Level 2 PBLAS
1345*
1346 IF( scode.EQ.21 .OR. scode.EQ.23 ) THEN
1347*
1348* Check 1st (and only) matrix
1349*
1350 apos = 5
1351 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1352*
1353 ELSE IF( scode.EQ.22 ) THEN
1354*
1355* Check 1st (and only) matrix
1356*
1357 apos = 4
1358 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1359*
1360 ELSE IF( scode.EQ.24 .OR. scode.EQ.27 ) THEN
1361*
1362* Check 1st (and only) matrix
1363*
1364 apos = 14
1365 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1366*
1367 ELSE IF( scode.EQ.25 .OR. scode.EQ.26 ) THEN
1368*
1369* Check 1st (and only) matrix
1370*
1371 apos = 9
1372 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1373*
1374* Level 3 PBLAS
1375*
1376 ELSE IF( scode.EQ.31 ) THEN
1377*
1378* Check 1st matrix
1379*
1380 apos = 7
1381 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1382*
1383* Check 2nd matrix
1384*
1385 apos = 11
1386 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'B', apos )
1387*
1388* Check 3nd matrix
1389*
1390 apos = 16
1391 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1392*
1393 ELSE IF( scode.EQ.32 .OR. scode.EQ.35 .OR. scode.EQ.36 ) THEN
1394*
1395* Check 1st matrix
1396*
1397 apos = 6
1398 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1399*
1400* Check 2nd matrix
1401*
1402 apos = 10
1403 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'B', apos )
1404*
1405* Check 3nd matrix
1406*
1407 apos = 15
1408 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1409*
1410 ELSE IF( scode.EQ.33 .OR. scode.EQ.34 ) THEN
1411*
1412* Check 1st matrix
1413*
1414 apos = 6
1415 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1416*
1417* Check 2nd matrix
1418*
1419 apos = 11
1420 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1421*
1422 ELSE IF( scode.EQ.37 ) THEN
1423*
1424* Check 1st matrix
1425*
1426 apos = 4
1427 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1428*
1429* Check 2nd matrix
1430*
1431 apos = 9
1432 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1433*
1434 ELSE IF( scode.EQ.38 ) THEN
1435*
1436* Check 1st matrix
1437*
1438 apos = 8
1439 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1440*
1441* Check 2nd matrix
1442*
1443 apos = 12
1444 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'B', apos )
1445*
1446 ELSE IF( scode.EQ.39 ) THEN
1447*
1448* Check 1st matrix
1449*
1450 apos = 5
1451 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1452*
1453* Check 2nd matrix
1454*
1455 apos = 10
1456 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1457*
1458 ELSE IF( scode.EQ.40 ) THEN
1459*
1460* Check 1st matrix
1461*
1462 apos = 6
1463 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1464*
1465* Check 2nd matrix
1466*
1467 apos = 11
1468 CALL pdchkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1469*
1470 END IF
1471*
1472 RETURN
1473*
1474* End of PDMATEE
1475*
1476 END
1477 SUBROUTINE pdsetpblas( ICTXT )
1478*
1479* -- PBLAS test routine (version 2.0) --
1480* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
1481* and University of California, Berkeley.
1482* April 1, 1998
1483*
1484* .. Scalar Arguments ..
1485 INTEGER ICTXT
1486* ..
1487*
1488* Purpose
1489* =======
1490*
1491* PDSETPBLAS initializes *all* the dummy arguments to correct values.
1492*
1493* Notes
1494* =====
1495*
1496* A description vector is associated with each 2D block-cyclicly dis-
1497* tributed matrix. This vector stores the information required to
1498* establish the mapping between a matrix entry and its corresponding
1499* process and memory location.
1500*
1501* In the following comments, the character _ should be read as
1502* "of the distributed matrix". Let A be a generic term for any 2D
1503* block cyclicly distributed matrix. Its description vector is DESCA:
1504*
1505* NOTATION STORED IN EXPLANATION
1506* ---------------- --------------- ------------------------------------
1507* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
1508* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
1509* the NPROW x NPCOL BLACS process grid
1510* A is distributed over. The context
1511* itself is global, but the handle
1512* (the integer value) may vary.
1513* M_A (global) DESCA( M_ ) The number of rows in the distribu-
1514* ted matrix A, M_A >= 0.
1515* N_A (global) DESCA( N_ ) The number of columns in the distri-
1516* buted matrix A, N_A >= 0.
1517* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
1518* block of the matrix A, IMB_A > 0.
1519* INB_A (global) DESCA( INB_ ) The number of columns of the upper
1520* left block of the matrix A,
1521* INB_A > 0.
1522* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
1523* bute the last M_A-IMB_A rows of A,
1524* MB_A > 0.
1525* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
1526* bute the last N_A-INB_A columns of
1527* A, NB_A > 0.
1528* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
1529* row of the matrix A is distributed,
1530* NPROW > RSRC_A >= 0.
1531* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
1532* first column of A is distributed.
1533* NPCOL > CSRC_A >= 0.
1534* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1535* array storing the local blocks of
1536* the distributed matrix A,
1537* IF( Lc( 1, N_A ) > 0 )
1538* LLD_A >= MAX( 1, Lr( 1, M_A ) )
1539* ELSE
1540* LLD_A >= 1.
1541*
1542* Let K be the number of rows of a matrix A starting at the global in-
1543* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1544* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1545* receive if these K rows were distributed over NPROW processes. If K
1546* is the number of columns of a matrix A starting at the global index
1547* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1548* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1549* these K columns were distributed over NPCOL processes.
1550*
1551* The values of Lr() and Lc() may be determined via a call to the func-
1552* tion PB_NUMROC:
1553* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1554* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1555*
1556* Arguments
1557* =========
1558*
1559* ICTXT (local input) INTEGER
1560* On entry, ICTXT specifies the BLACS context handle, indica-
1561* ting the global context of the operation. The context itself
1562* is global, but the value of ICTXT is local.
1563*
1564* -- Written on April 1, 1998 by
1565* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1566*
1567* =====================================================================
1568*
1569* .. Parameters ..
1570 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
1571 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
1572 $ rsrc_
1573 parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
1574 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
1575 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
1576 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
1577 DOUBLE PRECISION ONE
1578 PARAMETER ( ONE = 1.0d+0 )
1579* ..
1580* .. External Subroutines ..
1581 EXTERNAL pb_descset2
1582* ..
1583* .. Common Blocks ..
1584 CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO
1585 INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB,
1586 $ jc, jx, jy, kdim, mdim, ndim
1587 DOUBLE PRECISION USCLR, SCLR
1588 INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ),
1589 $ descx( dlen_ ), descy( dlen_ )
1590 DOUBLE PRECISION A( 2, 2 ), B( 2, 2 ), C( 2, 2 ), X( 2 ), Y( 2 )
1591 COMMON /PBLASC/DIAG, SIDE, TRANSA, TRANSB, UPLO
1592 COMMON /pblasd/desca, descb, descc, descx, descy
1593 COMMON /pblasi/ia, ib, ic, incx, incy, isclr, ix, iy,
1594 $ ja, jb, jc, jx, jy
1595 COMMON /pblasm/a, b, c
1596 COMMON /pblasn/kdim, mdim, ndim
1597 COMMON /pblass/sclr, usclr
1598 COMMON /pblasv/x, y
1599* ..
1600* .. Executable Statements ..
1601*
1602* Set default values for options
1603*
1604 diag = 'N'
1605 side = 'L'
1606 transa = 'N'
1607 transb = 'N'
1608 uplo = 'U'
1609*
1610* Set default values for scalars
1611*
1612 kdim = 1
1613 mdim = 1
1614 ndim = 1
1615 isclr = 1
1616 sclr = one
1617 usclr = one
1618*
1619* Set default values for distributed matrix A
1620*
1621 a( 1, 1 ) = one
1622 a( 2, 1 ) = one
1623 a( 1, 2 ) = one
1624 a( 2, 2 ) = one
1625 ia = 1
1626 ja = 1
1627 CALL pb_descset2( desca, 2, 2, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1628*
1629* Set default values for distributed matrix B
1630*
1631 b( 1, 1 ) = one
1632 b( 2, 1 ) = one
1633 b( 1, 2 ) = one
1634 b( 2, 2 ) = one
1635 ib = 1
1636 jb = 1
1637 CALL pb_descset2( descb, 2, 2, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1638*
1639* Set default values for distributed matrix C
1640*
1641 c( 1, 1 ) = one
1642 c( 2, 1 ) = one
1643 c( 1, 2 ) = one
1644 c( 2, 2 ) = one
1645 ic = 1
1646 jc = 1
1647 CALL pb_descset2( descc, 2, 2, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1648*
1649* Set default values for distributed matrix X
1650*
1651 x( 1 ) = one
1652 x( 2 ) = one
1653 ix = 1
1654 jx = 1
1655 CALL pb_descset2( descx, 2, 1, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1656 incx = 1
1657*
1658* Set default values for distributed matrix Y
1659*
1660 y( 1 ) = one
1661 y( 2 ) = one
1662 iy = 1
1663 jy = 1
1664 CALL pb_descset2( descy, 2, 1, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1665 incy = 1
1666*
1667 RETURN
1668*
1669* End of PDSETPBLAS
1670*
1671 END
1672 SUBROUTINE pdchkmat( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM,
1673 $ ARGPOS )
1674*
1675* -- PBLAS test routine (version 2.0) --
1676* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
1677* and University of California, Berkeley.
1678* April 1, 1998
1679*
1680* .. Scalar Arguments ..
1681 CHARACTER*1 ARGNAM
1682 INTEGER ARGPOS, ICTXT, NOUT, SCODE
1683* ..
1684* .. Array Arguments ..
1685 CHARACTER*(*) SNAME
1686* ..
1687* .. Subroutine Arguments ..
1688 EXTERNAL subptr
1689* ..
1690*
1691* Purpose
1692* =======
1693*
1694* PDCHKMAT tests the matrix (or vector) ARGNAM in any PBLAS routine.
1695*
1696* Notes
1697* =====
1698*
1699* A description vector is associated with each 2D block-cyclicly dis-
1700* tributed matrix. This vector stores the information required to
1701* establish the mapping between a matrix entry and its corresponding
1702* process and memory location.
1703*
1704* In the following comments, the character _ should be read as
1705* "of the distributed matrix". Let A be a generic term for any 2D
1706* block cyclicly distributed matrix. Its description vector is DESCA:
1707*
1708* NOTATION STORED IN EXPLANATION
1709* ---------------- --------------- ------------------------------------
1710* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
1711* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
1712* the NPROW x NPCOL BLACS process grid
1713* A is distributed over. The context
1714* itself is global, but the handle
1715* (the integer value) may vary.
1716* M_A (global) DESCA( M_ ) The number of rows in the distribu-
1717* ted matrix A, M_A >= 0.
1718* N_A (global) DESCA( N_ ) The number of columns in the distri-
1719* buted matrix A, N_A >= 0.
1720* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
1721* block of the matrix A, IMB_A > 0.
1722* INB_A (global) DESCA( INB_ ) The number of columns of the upper
1723* left block of the matrix A,
1724* INB_A > 0.
1725* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
1726* bute the last M_A-IMB_A rows of A,
1727* MB_A > 0.
1728* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
1729* bute the last N_A-INB_A columns of
1730* A, NB_A > 0.
1731* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
1732* row of the matrix A is distributed,
1733* NPROW > RSRC_A >= 0.
1734* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
1735* first column of A is distributed.
1736* NPCOL > CSRC_A >= 0.
1737* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1738* array storing the local blocks of
1739* the distributed matrix A,
1740* IF( Lc( 1, N_A ) > 0 )
1741* LLD_A >= MAX( 1, Lr( 1, M_A ) )
1742* ELSE
1743* LLD_A >= 1.
1744*
1745* Let K be the number of rows of a matrix A starting at the global in-
1746* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1747* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1748* receive if these K rows were distributed over NPROW processes. If K
1749* is the number of columns of a matrix A starting at the global index
1750* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1751* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1752* these K columns were distributed over NPCOL processes.
1753*
1754* The values of Lr() and Lc() may be determined via a call to the func-
1755* tion PB_NUMROC:
1756* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1757* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1758*
1759* Arguments
1760* =========
1761*
1762* ICTXT (local input) INTEGER
1763* On entry, ICTXT specifies the BLACS context handle, indica-
1764* ting the global context of the operation. The context itself
1765* is global, but the value of ICTXT is local.
1766*
1767* NOUT (global input) INTEGER
1768* On entry, NOUT specifies the unit number for the output file.
1769* When NOUT is 6, output to screen, when NOUT is 0, output to
1770* stderr. NOUT is only defined for process 0.
1771*
1772* SUBPTR (global input) SUBROUTINE
1773* On entry, SUBPTR is a subroutine. SUBPTR must be declared
1774* EXTERNAL in the calling subroutine.
1775*
1776* SCODE (global input) INTEGER
1777* On entry, SCODE specifies the calling sequence code.
1778*
1779* SNAME (global input) CHARACTER*(*)
1780* On entry, SNAME specifies the subroutine name calling this
1781* subprogram.
1782*
1783* ARGNAM (global input) CHARACTER*(*)
1784* On entry, ARGNAM specifies the name of the matrix or vector
1785* to be checked. ARGNAM can either be 'A', 'B' or 'C' when one
1786* wants to check a matrix, and 'X' or 'Y' for a vector.
1787*
1788* ARGPOS (global input) INTEGER
1789* On entry, ARGPOS indicates the position of the first argument
1790* of the matrix (or vector) ARGNAM.
1791*
1792* -- Written on April 1, 1998 by
1793* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1794*
1795* =====================================================================
1796*
1797* .. Parameters ..
1798 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
1799 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
1800 $ RSRC_
1801 parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
1802 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
1803 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
1804 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
1805 INTEGER DESCMULT
1806 PARAMETER ( DESCMULT = 100 )
1807* ..
1808* .. Local Scalars ..
1809 INTEGER I, INFOT, NPROW, NPCOL, MYROW, MYCOL
1810* ..
1811* .. External Subroutines ..
1812 EXTERNAL blacs_gridinfo, pchkpbe, pdcallsub, pdsetpblas
1813* ..
1814* .. External Functions ..
1815 LOGICAL LSAME
1816 EXTERNAL LSAME
1817* ..
1818* .. Common Blocks ..
1819 INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB,
1820 $ JC, JX, JY
1821 INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ),
1822 $ descx( dlen_ ), descy( dlen_ )
1823 COMMON /pblasd/desca, descb, descc, descx, descy
1824 COMMON /pblasi/ia, ib, ic, incx, incy, isclr, ix, iy,
1825 $ ja, jb, jc, jx, jy
1826* ..
1827* .. Executable Statements ..
1828*
1829 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
1830*
1831 IF( lsame( argnam, 'A' ) ) THEN
1832*
1833* Check IA. Set all other OK, bad IA
1834*
1835 CALL pdsetpblas( ictxt )
1836 ia = -1
1837 infot = argpos + 1
1838 CALL pdcallsub( subptr, scode )
1839 CALL pchkpbe( ictxt, nout, sname, infot )
1840*
1841* Check JA. Set all other OK, bad JA
1842*
1843 CALL pdsetpblas( ictxt )
1844 ja = -1
1845 infot = argpos + 2
1846 CALL pdcallsub( subptr, scode )
1847 CALL pchkpbe( ictxt, nout, sname, infot )
1848*
1849* Check DESCA. Set all other OK, bad DESCA
1850*
1851 DO 10 i = 1, dlen_
1852*
1853* Set I'th entry of DESCA to incorrect value, rest ok.
1854*
1855 CALL pdsetpblas( ictxt )
1856 desca( i ) = -2
1857 infot = ( ( argpos + 3 ) * descmult ) + i
1858 CALL pdcallsub( subptr, scode )
1859 CALL pchkpbe( ictxt, nout, sname, infot )
1860*
1861* Extra tests for RSRCA, CSRCA, LDA
1862*
1863 IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
1864 $ ( i.EQ.lld_ ) ) THEN
1865*
1866 CALL pdsetpblas( ictxt )
1867*
1868* Test RSRCA >= NPROW
1869*
1870 IF( i.EQ.rsrc_ )
1871 $ desca( i ) = nprow
1872*
1873* Test CSRCA >= NPCOL
1874*
1875 IF( i.EQ.csrc_ )
1876 $ desca( i ) = npcol
1877*
1878* Test LDA >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
1879*
1880 IF( i.EQ.lld_ ) THEN
1881 IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
1882 desca( i ) = 1
1883 ELSE
1884 desca( i ) = 0
1885 END IF
1886 END IF
1887*
1888 infot = ( ( argpos + 3 ) * descmult ) + i
1889 CALL pdcallsub( subptr, scode )
1890 CALL pchkpbe( ictxt, nout, sname, infot )
1891*
1892 END IF
1893*
1894 10 CONTINUE
1895*
1896 ELSE IF( lsame( argnam, 'B' ) ) THEN
1897*
1898* Check IB. Set all other OK, bad IB
1899*
1900 CALL pdsetpblas( ictxt )
1901 ib = -1
1902 infot = argpos + 1
1903 CALL pdcallsub( subptr, scode )
1904 CALL pchkpbe( ictxt, nout, sname, infot )
1905*
1906* Check JB. Set all other OK, bad JB
1907*
1908 CALL pdsetpblas( ictxt )
1909 jb = -1
1910 infot = argpos + 2
1911 CALL pdcallsub( subptr, scode )
1912 CALL pchkpbe( ictxt, nout, sname, infot )
1913*
1914* Check DESCB. Set all other OK, bad DESCB
1915*
1916 DO 20 i = 1, dlen_
1917*
1918* Set I'th entry of DESCB to incorrect value, rest ok.
1919*
1920 CALL pdsetpblas( ictxt )
1921 descb( i ) = -2
1922 infot = ( ( argpos + 3 ) * descmult ) + i
1923 CALL pdcallsub( subptr, scode )
1924 CALL pchkpbe( ictxt, nout, sname, infot )
1925*
1926* Extra tests for RSRCB, CSRCB, LDB
1927*
1928 IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
1929 $ ( i.EQ.lld_ ) ) THEN
1930*
1931 CALL pdsetpblas( ictxt )
1932*
1933* Test RSRCB >= NPROW
1934*
1935 IF( i.EQ.rsrc_ )
1936 $ descb( i ) = nprow
1937*
1938* Test CSRCB >= NPCOL
1939*
1940 IF( i.EQ.csrc_ )
1941 $ descb( i ) = npcol
1942*
1943* Test LDB >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
1944*
1945 IF( i.EQ.lld_ ) THEN
1946 IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
1947 descb( i ) = 1
1948 ELSE
1949 descb( i ) = 0
1950 END IF
1951 END IF
1952*
1953 infot = ( ( argpos + 3 ) * descmult ) + i
1954 CALL pdcallsub( subptr, scode )
1955 CALL pchkpbe( ictxt, nout, sname, infot )
1956*
1957 END IF
1958*
1959 20 CONTINUE
1960*
1961 ELSE IF( lsame( argnam, 'C' ) ) THEN
1962*
1963* Check IC. Set all other OK, bad IC
1964*
1965 CALL pdsetpblas( ictxt )
1966 ic = -1
1967 infot = argpos + 1
1968 CALL pdcallsub( subptr, scode )
1969 CALL pchkpbe( ictxt, nout, sname, infot )
1970*
1971* Check JC. Set all other OK, bad JC
1972*
1973 CALL pdsetpblas( ictxt )
1974 jc = -1
1975 infot = argpos + 2
1976 CALL pdcallsub( subptr, scode )
1977 CALL pchkpbe( ictxt, nout, sname, infot )
1978*
1979* Check DESCC. Set all other OK, bad DESCC
1980*
1981 DO 30 i = 1, dlen_
1982*
1983* Set I'th entry of DESCC to incorrect value, rest ok.
1984*
1985 CALL pdsetpblas( ictxt )
1986 descc( i ) = -2
1987 infot = ( ( argpos + 3 ) * descmult ) + i
1988 CALL pdcallsub( subptr, scode )
1989 CALL pchkpbe( ictxt, nout, sname, infot )
1990*
1991* Extra tests for RSRCC, CSRCC, LDC
1992*
1993 IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
1994 $ ( i.EQ.lld_ ) ) THEN
1995*
1996 CALL pdsetpblas( ictxt )
1997*
1998* Test RSRCC >= NPROW
1999*
2000 IF( i.EQ.rsrc_ )
2001 $ descc( i ) = nprow
2002*
2003* Test CSRCC >= NPCOL
2004*
2005 IF( i.EQ.csrc_ )
2006 $ descc( i ) = npcol
2007*
2008* Test LDC >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
2009*
2010 IF( i.EQ.lld_ ) THEN
2011 IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
2012 descc( i ) = 1
2013 ELSE
2014 descc( i ) = 0
2015 END IF
2016 END IF
2017*
2018 infot = ( ( argpos + 3 ) * descmult ) + i
2019 CALL pdcallsub( subptr, scode )
2020 CALL pchkpbe( ictxt, nout, sname, infot )
2021*
2022 END IF
2023*
2024 30 CONTINUE
2025*
2026 ELSE IF( lsame( argnam, 'X' ) ) THEN
2027*
2028* Check IX. Set all other OK, bad IX
2029*
2030 CALL pdsetpblas( ictxt )
2031 ix = -1
2032 infot = argpos + 1
2033 CALL pdcallsub( subptr, scode )
2034 CALL pchkpbe( ictxt, nout, sname, infot )
2035*
2036* Check JX. Set all other OK, bad JX
2037*
2038 CALL pdsetpblas( ictxt )
2039 jx = -1
2040 infot = argpos + 2
2041 CALL pdcallsub( subptr, scode )
2042 CALL pchkpbe( ictxt, nout, sname, infot )
2043*
2044* Check DESCX. Set all other OK, bad DESCX
2045*
2046 DO 40 i = 1, dlen_
2047*
2048* Set I'th entry of DESCX to incorrect value, rest ok.
2049*
2050 CALL pdsetpblas( ictxt )
2051 descx( i ) = -2
2052 infot = ( ( argpos + 3 ) * descmult ) + i
2053 CALL pdcallsub( subptr, scode )
2054 CALL pchkpbe( ictxt, nout, sname, infot )
2055*
2056* Extra tests for RSRCX, CSRCX, LDX
2057*
2058 IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
2059 $ ( i.EQ.lld_ ) ) THEN
2060*
2061 CALL pdsetpblas( ictxt )
2062*
2063* Test RSRCX >= NPROW
2064*
2065 IF( i.EQ.rsrc_ )
2066 $ descx( i ) = nprow
2067*
2068* Test CSRCX >= NPCOL
2069*
2070 IF( i.EQ.csrc_ )
2071 $ descx( i ) = npcol
2072*
2073* Test LDX >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
2074*
2075 IF( i.EQ.lld_ ) THEN
2076 IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
2077 descx( i ) = 1
2078 ELSE
2079 descx( i ) = 0
2080 END IF
2081 END IF
2082*
2083 infot = ( ( argpos + 3 ) * descmult ) + i
2084 CALL pdcallsub( subptr, scode )
2085 CALL pchkpbe( ictxt, nout, sname, infot )
2086*
2087 END IF
2088*
2089 40 CONTINUE
2090*
2091* Check INCX. Set all other OK, bad INCX
2092*
2093 CALL pdsetpblas( ictxt )
2094 incx = -1
2095 infot = argpos + 4
2096 CALL pdcallsub( subptr, scode )
2097 CALL pchkpbe( ictxt, nout, sname, infot )
2098*
2099 ELSE
2100*
2101* Check IY. Set all other OK, bad IY
2102*
2103 CALL pdsetpblas( ictxt )
2104 iy = -1
2105 infot = argpos + 1
2106 CALL pdcallsub( subptr, scode )
2107 CALL pchkpbe( ictxt, nout, sname, infot )
2108*
2109* Check JY. Set all other OK, bad JY
2110*
2111 CALL pdsetpblas( ictxt )
2112 jy = -1
2113 infot = argpos + 2
2114 CALL pdcallsub( subptr, scode )
2115 CALL pchkpbe( ictxt, nout, sname, infot )
2116*
2117* Check DESCY. Set all other OK, bad DESCY
2118*
2119 DO 50 i = 1, dlen_
2120*
2121* Set I'th entry of DESCY to incorrect value, rest ok.
2122*
2123 CALL pdsetpblas( ictxt )
2124 descy( i ) = -2
2125 infot = ( ( argpos + 3 ) * descmult ) + i
2126 CALL pdcallsub( subptr, scode )
2127 CALL pchkpbe( ictxt, nout, sname, infot )
2128*
2129* Extra tests for RSRCY, CSRCY, LDY
2130*
2131 IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
2132 $ ( i.EQ.lld_ ) ) THEN
2133*
2134 CALL pdsetpblas( ictxt )
2135*
2136* Test RSRCY >= NPROW
2137*
2138 IF( i.EQ.rsrc_ )
2139 $ descy( i ) = nprow
2140*
2141* Test CSRCY >= NPCOL
2142*
2143 IF( i.EQ.csrc_ )
2144 $ descy( i ) = npcol
2145*
2146* Test LDY >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
2147*
2148 IF( i.EQ.lld_ ) THEN
2149 IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
2150 descy( i ) = 1
2151 ELSE
2152 descy( i ) = 0
2153 END IF
2154 END IF
2155*
2156 infot = ( ( argpos + 3 ) * descmult ) + i
2157 CALL pdcallsub( subptr, scode )
2158 CALL pchkpbe( ictxt, nout, sname, infot )
2159*
2160 END IF
2161*
2162 50 CONTINUE
2163*
2164* Check INCY. Set all other OK, bad INCY
2165*
2166 CALL pdsetpblas( ictxt )
2167 incy = -1
2168 infot = argpos + 4
2169 CALL pdcallsub( subptr, scode )
2170 CALL pchkpbe( ictxt, nout, sname, infot )
2171*
2172 END IF
2173*
2174 RETURN
2175*
2176* End of PDCHKMAT
2177*
2178 END
2179 SUBROUTINE pdcallsub( SUBPTR, SCODE )
2180*
2181* -- PBLAS test routine (version 2.0) --
2182* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2183* and University of California, Berkeley.
2184* April 1, 1998
2185*
2186* .. Scalar Arguments ..
2187 INTEGER SCODE
2188* ..
2189* .. Subroutine Arguments ..
2190 EXTERNAL subptr
2191* ..
2192*
2193* Purpose
2194* =======
2195*
2196* PDCALLSUB calls the subroutine SUBPTR with the calling sequence iden-
2197* tified by SCODE.
2198*
2199* Notes
2200* =====
2201*
2202* A description vector is associated with each 2D block-cyclicly dis-
2203* tributed matrix. This vector stores the information required to
2204* establish the mapping between a matrix entry and its corresponding
2205* process and memory location.
2206*
2207* In the following comments, the character _ should be read as
2208* "of the distributed matrix". Let A be a generic term for any 2D
2209* block cyclicly distributed matrix. Its description vector is DESCA:
2210*
2211* NOTATION STORED IN EXPLANATION
2212* ---------------- --------------- ------------------------------------
2213* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2214* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2215* the NPROW x NPCOL BLACS process grid
2216* A is distributed over. The context
2217* itself is global, but the handle
2218* (the integer value) may vary.
2219* M_A (global) DESCA( M_ ) The number of rows in the distribu-
2220* ted matrix A, M_A >= 0.
2221* N_A (global) DESCA( N_ ) The number of columns in the distri-
2222* buted matrix A, N_A >= 0.
2223* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2224* block of the matrix A, IMB_A > 0.
2225* INB_A (global) DESCA( INB_ ) The number of columns of the upper
2226* left block of the matrix A,
2227* INB_A > 0.
2228* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2229* bute the last M_A-IMB_A rows of A,
2230* MB_A > 0.
2231* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2232* bute the last N_A-INB_A columns of
2233* A, NB_A > 0.
2234* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2235* row of the matrix A is distributed,
2236* NPROW > RSRC_A >= 0.
2237* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2238* first column of A is distributed.
2239* NPCOL > CSRC_A >= 0.
2240* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2241* array storing the local blocks of
2242* the distributed matrix A,
2243* IF( Lc( 1, N_A ) > 0 )
2244* LLD_A >= MAX( 1, Lr( 1, M_A ) )
2245* ELSE
2246* LLD_A >= 1.
2247*
2248* Let K be the number of rows of a matrix A starting at the global in-
2249* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2250* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2251* receive if these K rows were distributed over NPROW processes. If K
2252* is the number of columns of a matrix A starting at the global index
2253* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2254* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2255* these K columns were distributed over NPCOL processes.
2256*
2257* The values of Lr() and Lc() may be determined via a call to the func-
2258* tion PB_NUMROC:
2259* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2260* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2261*
2262* Arguments
2263* =========
2264*
2265* SUBPTR (global input) SUBROUTINE
2266* On entry, SUBPTR is a subroutine. SUBPTR must be declared
2267* EXTERNAL in the calling subroutine.
2268*
2269* SCODE (global input) INTEGER
2270* On entry, SCODE specifies the calling sequence code.
2271*
2272* Calling sequence encodings
2273* ==========================
2274*
2275* code Formal argument list Examples
2276*
2277* 11 (n, v1,v2) _SWAP, _COPY
2278* 12 (n,s1, v1 ) _SCAL, _SCAL
2279* 13 (n,s1, v1,v2) _AXPY, _DOT_
2280* 14 (n,s1,i1,v1 ) _AMAX
2281* 15 (n,u1, v1 ) _ASUM, _NRM2
2282*
2283* 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
2284* 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
2285* 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
2286* 24 ( m,n,s1,v1,v2,m1) _GER_
2287* 25 (uplo, n,s1,v1, m1) _SYR
2288* 26 (uplo, n,u1,v1, m1) _HER
2289* 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
2290*
2291* 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
2292* 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
2293* 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
2294* 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
2295* 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
2296* 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
2297* 37 ( m,n, s1,m1, s2,m3) _TRAN_
2298* 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
2299* 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
2300* 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
2301*
2302* -- Written on April 1, 1998 by
2303* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2304*
2305* =====================================================================
2306*
2307* .. Parameters ..
2308 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
2309 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
2310 $ RSRC_
2311 parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
2312 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
2313 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
2314 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
2315* ..
2316* .. Common Blocks ..
2317 CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO
2318 INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB,
2319 $ JC, JX, JY, KDIM, MDIM, NDIM
2320 DOUBLE PRECISION USCLR, SCLR
2321 INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ),
2322 $ DESCX( DLEN_ ), DESCY( DLEN_ )
2323 DOUBLE PRECISION A( 2, 2 ), B( 2, 2 ), C( 2, 2 ), X( 2 ), Y( 2 )
2324 COMMON /pblasc/diag, side, transa, transb, uplo
2325 COMMON /pblasd/desca, descb, descc, descx, descy
2326 COMMON /pblasi/ia, ib, ic, incx, incy, isclr, ix, iy,
2327 $ ja, jb, jc, jx, jy
2328 COMMON /pblasm/a, b, c
2329 COMMON /pblasn/kdim, mdim, ndim
2330 COMMON /pblass/sclr, usclr
2331 COMMON /pblasv/x, y
2332* ..
2333* .. Executable Statements ..
2334*
2335* Level 1 PBLAS
2336*
2337 IF( scode.EQ.11 ) THEN
2338*
2339 CALL subptr( ndim, x, ix, jx, descx, incx, y, iy, jy, descy,
2340 $ incy )
2341*
2342 ELSE IF( scode.EQ.12 ) THEN
2343*
2344 CALL subptr( ndim, sclr, x, ix, jx, descx, incx )
2345*
2346 ELSE IF( scode.EQ.13 ) THEN
2347*
2348 CALL subptr( ndim, sclr, x, ix, jx, descx, incx, y, iy, jy,
2349 $ descy, incy )
2350*
2351 ELSE IF( scode.EQ.14 ) THEN
2352*
2353 CALL subptr( ndim, sclr, isclr, x, ix, jx, descx, incx )
2354*
2355 ELSE IF( scode.EQ.15 ) THEN
2356*
2357 CALL subptr( ndim, usclr, x, ix, jx, descx, incx )
2358*
2359* Level 2 PBLAS
2360*
2361 ELSE IF( scode.EQ.21 ) THEN
2362*
2363 CALL subptr( transa, mdim, ndim, sclr, a, ia, ja, desca, x, ix,
2364 $ jx, descx, incx, sclr, y, iy, jy, descy, incy )
2365*
2366 ELSE IF( scode.EQ.22 ) THEN
2367*
2368 CALL subptr( uplo, ndim, sclr, a, ia, ja, desca, x, ix, jx,
2369 $ descx, incx, sclr, y, iy, jy, descy, incy )
2370*
2371 ELSE IF( scode.EQ.23 ) THEN
2372*
2373 CALL subptr( uplo, transa, diag, ndim, a, ia, ja, desca, x, ix,
2374 $ jx, descx, incx )
2375*
2376 ELSE IF( scode.EQ.24 ) THEN
2377*
2378 CALL subptr( mdim, ndim, sclr, x, ix, jx, descx, incx, y, iy,
2379 $ jy, descy, incy, a, ia, ja, desca )
2380*
2381 ELSE IF( scode.EQ.25 ) THEN
2382*
2383 CALL subptr( uplo, ndim, sclr, x, ix, jx, descx, incx, a, ia,
2384 $ ja, desca )
2385*
2386 ELSE IF( scode.EQ.26 ) THEN
2387*
2388 CALL subptr( uplo, ndim, usclr, x, ix, jx, descx, incx, a, ia,
2389 $ ja, desca )
2390*
2391 ELSE IF( scode.EQ.27 ) THEN
2392*
2393 CALL subptr( uplo, ndim, sclr, x, ix, jx, descx, incx, y, iy,
2394 $ jy, descy, incy, a, ia, ja, desca )
2395*
2396* Level 3 PBLAS
2397*
2398 ELSE IF( scode.EQ.31 ) THEN
2399*
2400 CALL subptr( transa, transb, mdim, ndim, kdim, sclr, a, ia, ja,
2401 $ desca, b, ib, jb, descb, sclr, c, ic, jc, descc )
2402*
2403 ELSE IF( scode.EQ.32 ) THEN
2404*
2405 CALL subptr( side, uplo, mdim, ndim, sclr, a, ia, ja, desca, b,
2406 $ ib, jb, descb, sclr, c, ic, jc, descc )
2407*
2408 ELSE IF( scode.EQ.33 ) THEN
2409*
2410 CALL subptr( uplo, transa, ndim, kdim, sclr, a, ia, ja, desca,
2411 $ sclr, c, ic, jc, descc )
2412*
2413 ELSE IF( scode.EQ.34 ) THEN
2414*
2415 CALL subptr( uplo, transa, ndim, kdim, usclr, a, ia, ja, desca,
2416 $ usclr, c, ic, jc, descc )
2417*
2418 ELSE IF( scode.EQ.35 ) THEN
2419*
2420 CALL subptr( uplo, transa, ndim, kdim, sclr, a, ia, ja, desca,
2421 $ b, ib, jb, descb, sclr, c, ic, jc, descc )
2422*
2423 ELSE IF( scode.EQ.36 ) THEN
2424*
2425 CALL subptr( uplo, transa, ndim, kdim, sclr, a, ia, ja, desca,
2426 $ b, ib, jb, descb, usclr, c, ic, jc, descc )
2427*
2428 ELSE IF( scode.EQ.37 ) THEN
2429*
2430 CALL subptr( mdim, ndim, sclr, a, ia, ja, desca, sclr, c, ic,
2431 $ jc, descc )
2432*
2433 ELSE IF( scode.EQ.38 ) THEN
2434*
2435 CALL subptr( side, uplo, transa, diag, mdim, ndim, sclr, a, ia,
2436 $ ja, desca, b, ib, jb, descb )
2437*
2438 ELSE IF( scode.EQ.39 ) THEN
2439*
2440 CALL subptr( transa, mdim, ndim, sclr, a, ia, ja, desca, sclr,
2441 $ c, ic, jc, descc )
2442*
2443 ELSE IF( scode.EQ.40 ) THEN
2444*
2445 CALL subptr( uplo, transa, mdim, ndim, sclr, a, ia, ja, desca,
2446 $ sclr, c, ic, jc, descc )
2447*
2448 END IF
2449*
2450 RETURN
2451*
2452* End of PDCALLSUB
2453*
2454 END
2455 SUBROUTINE pderrset( ERR, ERRMAX, XTRUE, X )
2456*
2457* -- PBLAS test routine (version 2.0) --
2458* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2459* and University of California, Berkeley.
2460* April 1, 1998
2461*
2462* .. Scalar Arguments ..
2463 DOUBLE PRECISION ERR, ERRMAX, X, XTRUE
2464* ..
2465*
2466* Purpose
2467* =======
2468*
2469* PDERRSET computes the absolute difference ERR = |XTRUE - X| and com-
2470* pares it with zero. ERRMAX accumulates the absolute error difference.
2471*
2472* Notes
2473* =====
2474*
2475* A description vector is associated with each 2D block-cyclicly dis-
2476* tributed matrix. This vector stores the information required to
2477* establish the mapping between a matrix entry and its corresponding
2478* process and memory location.
2479*
2480* In the following comments, the character _ should be read as
2481* "of the distributed matrix". Let A be a generic term for any 2D
2482* block cyclicly distributed matrix. Its description vector is DESCA:
2483*
2484* NOTATION STORED IN EXPLANATION
2485* ---------------- --------------- ------------------------------------
2486* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2487* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2488* the NPROW x NPCOL BLACS process grid
2489* A is distributed over. The context
2490* itself is global, but the handle
2491* (the integer value) may vary.
2492* M_A (global) DESCA( M_ ) The number of rows in the distribu-
2493* ted matrix A, M_A >= 0.
2494* N_A (global) DESCA( N_ ) The number of columns in the distri-
2495* buted matrix A, N_A >= 0.
2496* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2497* block of the matrix A, IMB_A > 0.
2498* INB_A (global) DESCA( INB_ ) The number of columns of the upper
2499* left block of the matrix A,
2500* INB_A > 0.
2501* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2502* bute the last M_A-IMB_A rows of A,
2503* MB_A > 0.
2504* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2505* bute the last N_A-INB_A columns of
2506* A, NB_A > 0.
2507* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2508* row of the matrix A is distributed,
2509* NPROW > RSRC_A >= 0.
2510* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2511* first column of A is distributed.
2512* NPCOL > CSRC_A >= 0.
2513* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2514* array storing the local blocks of
2515* the distributed matrix A,
2516* IF( Lc( 1, N_A ) > 0 )
2517* LLD_A >= MAX( 1, Lr( 1, M_A ) )
2518* ELSE
2519* LLD_A >= 1.
2520*
2521* Let K be the number of rows of a matrix A starting at the global in-
2522* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2523* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2524* receive if these K rows were distributed over NPROW processes. If K
2525* is the number of columns of a matrix A starting at the global index
2526* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2527* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2528* these K columns were distributed over NPCOL processes.
2529*
2530* The values of Lr() and Lc() may be determined via a call to the func-
2531* tion PB_NUMROC:
2532* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2533* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2534*
2535* Arguments
2536* =========
2537*
2538* ERR (local output) DOUBLE PRECISION
2539* On exit, ERR specifies the absolute difference |XTRUE - X|.
2540*
2541* ERRMAX (local input/local output) DOUBLE PRECISION
2542* On entry, ERRMAX specifies a previously computed error. On
2543* exit ERRMAX is the accumulated error MAX( ERRMAX, ERR ).
2544*
2545* XTRUE (local input) DOUBLE PRECISION
2546* On entry, XTRUE specifies the true value.
2547*
2548* X (local input) DOUBLE PRECISION
2549* On entry, X specifies the value to be compared to XTRUE.
2550*
2551* -- Written on April 1, 1998 by
2552* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2553*
2554* =====================================================================
2555*
2556* .. External Functions ..
2557 DOUBLE PRECISION PDDIFF
2558 EXTERNAL PDDIFF
2559* ..
2560* .. Intrinsic Functions ..
2561 INTRINSIC abs, max
2562* ..
2563* .. Executable Statements ..
2564*
2565 err = abs( pddiff( xtrue, x ) )
2566*
2567 errmax = max( errmax, err )
2568*
2569 RETURN
2570*
2571* End of PDERRSET
2572*
2573 END
2574 SUBROUTINE pdchkvin( ERRMAX, N, X, PX, IX, JX, DESCX, INCX,
2575 $ INFO )
2576*
2577* -- PBLAS test routine (version 2.0) --
2578* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2579* and University of California, Berkeley.
2580* April 1, 1998
2581*
2582* .. Scalar Arguments ..
2583 INTEGER INCX, INFO, IX, JX, N
2584 DOUBLE PRECISION ERRMAX
2585* ..
2586* .. Array Arguments ..
2587 INTEGER DESCX( * )
2588 DOUBLE PRECISION PX( * ), X( * )
2589* ..
2590*
2591* Purpose
2592* =======
2593*
2594* PDCHKVIN checks that the submatrix sub( PX ) remained unchanged. The
2595* local array entries are compared element by element, and their dif-
2596* ference is tested against 0.0 as well as the epsilon machine. Notice
2597* that this difference should be numerically exactly the zero machine,
2598* but because of the possible fluctuation of some of the data we flag-
2599* ged differently a difference less than twice the epsilon machine. The
2600* largest error is also returned.
2601*
2602* Notes
2603* =====
2604*
2605* A description vector is associated with each 2D block-cyclicly dis-
2606* tributed matrix. This vector stores the information required to
2607* establish the mapping between a matrix entry and its corresponding
2608* process and memory location.
2609*
2610* In the following comments, the character _ should be read as
2611* "of the distributed matrix". Let A be a generic term for any 2D
2612* block cyclicly distributed matrix. Its description vector is DESCA:
2613*
2614* NOTATION STORED IN EXPLANATION
2615* ---------------- --------------- ------------------------------------
2616* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2617* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2618* the NPROW x NPCOL BLACS process grid
2619* A is distributed over. The context
2620* itself is global, but the handle
2621* (the integer value) may vary.
2622* M_A (global) DESCA( M_ ) The number of rows in the distribu-
2623* ted matrix A, M_A >= 0.
2624* N_A (global) DESCA( N_ ) The number of columns in the distri-
2625* buted matrix A, N_A >= 0.
2626* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2627* block of the matrix A, IMB_A > 0.
2628* INB_A (global) DESCA( INB_ ) The number of columns of the upper
2629* left block of the matrix A,
2630* INB_A > 0.
2631* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2632* bute the last M_A-IMB_A rows of A,
2633* MB_A > 0.
2634* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2635* bute the last N_A-INB_A columns of
2636* A, NB_A > 0.
2637* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2638* row of the matrix A is distributed,
2639* NPROW > RSRC_A >= 0.
2640* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2641* first column of A is distributed.
2642* NPCOL > CSRC_A >= 0.
2643* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2644* array storing the local blocks of
2645* the distributed matrix A,
2646* IF( Lc( 1, N_A ) > 0 )
2647* LLD_A >= MAX( 1, Lr( 1, M_A ) )
2648* ELSE
2649* LLD_A >= 1.
2650*
2651* Let K be the number of rows of a matrix A starting at the global in-
2652* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2653* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2654* receive if these K rows were distributed over NPROW processes. If K
2655* is the number of columns of a matrix A starting at the global index
2656* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2657* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2658* these K columns were distributed over NPCOL processes.
2659*
2660* The values of Lr() and Lc() may be determined via a call to the func-
2661* tion PB_NUMROC:
2662* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2663* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2664*
2665* Arguments
2666* =========
2667*
2668* ERRMAX (global output) DOUBLE PRECISION
2669* On exit, ERRMAX specifies the largest absolute element-wise
2670* difference between sub( X ) and sub( PX ).
2671*
2672* N (global input) INTEGER
2673* On entry, N specifies the length of the subvector operand
2674* sub( X ). N must be at least zero.
2675*
2676* X (local input) DOUBLE PRECISION array
2677* On entry, X is an array of dimension (DESCX( M_ ),*). This
2678* array contains a local copy of the initial entire matrix PX.
2679*
2680* PX (local input) DOUBLE PRECISION array
2681* On entry, PX is an array of dimension (DESCX( LLD_ ),*). This
2682* array contains the local entries of the matrix PX.
2683*
2684* IX (global input) INTEGER
2685* On entry, IX specifies X's global row index, which points to
2686* the beginning of the submatrix sub( X ).
2687*
2688* JX (global input) INTEGER
2689* On entry, JX specifies X's global column index, which points
2690* to the beginning of the submatrix sub( X ).
2691*
2692* DESCX (global and local input) INTEGER array
2693* On entry, DESCX is an integer array of dimension DLEN_. This
2694* is the array descriptor for the matrix X.
2695*
2696* INCX (global input) INTEGER
2697* On entry, INCX specifies the global increment for the
2698* elements of X. Only two values of INCX are supported in
2699* this version, namely 1 and M_X. INCX must not be zero.
2700*
2701* INFO (global output) INTEGER
2702* On exit, if INFO = 0, no error has been found,
2703* If INFO > 0, the maximum abolute error found is in (0,eps],
2704* If INFO < 0, the maximum abolute error found is in (eps,+oo).
2705*
2706* -- Written on April 1, 1998 by
2707* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2708*
2709* =====================================================================
2710*
2711* .. Parameters ..
2712 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
2713 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
2714 $ RSRC_
2715 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
2716 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
2717 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
2718 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
2719 DOUBLE PRECISION ZERO
2720 PARAMETER ( ZERO = 0.0d+0 )
2721* ..
2722* .. Local Scalars ..
2723 LOGICAL COLREP, ROWREP
2724 INTEGER I, IB, ICTXT, ICURCOL, ICURROW, IIX, IN, IXCOL,
2725 $ IXROW, J, JB, JJX, JN, KK, LDPX, LDX, LL,
2726 $ MYCOL, MYROW, NPCOL, NPROW
2727 DOUBLE PRECISION ERR, EPS
2728* ..
2729* .. External Subroutines ..
2730 EXTERNAL blacs_gridinfo, dgamx2d, pb_infog2l, pderrset
2731* ..
2732* .. External Functions ..
2733 DOUBLE PRECISION PDLAMCH
2734 EXTERNAL pdlamch
2735* ..
2736* .. Intrinsic Functions ..
2737 INTRINSIC abs, max, min, mod
2738* ..
2739* .. Executable Statements ..
2740*
2741 info = 0
2742 errmax = zero
2743*
2744* Quick return if possible
2745*
2746 IF( n.LE.0 )
2747 $ RETURN
2748*
2749 ictxt = descx( ctxt_ )
2750 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
2751*
2752 eps = pdlamch( ictxt, 'eps' )
2753*
2754 CALL pb_infog2l( ix, jx, descx, nprow, npcol, myrow, mycol, iix,
2755 $ jjx, ixrow, ixcol )
2756*
2757 ldx = descx( m_ )
2758 ldpx = descx( lld_ )
2759 rowrep = ( ixrow.EQ.-1 )
2760 colrep = ( ixcol.EQ.-1 )
2761*
2762 IF( n.EQ.1 ) THEN
2763*
2764 IF( ( myrow.EQ.ixrow .OR. rowrep ) .AND.
2765 $ ( mycol.EQ.ixcol .OR. colrep ) )
2766 $ CALL pderrset( err, errmax, x( ix+(jx-1)*ldx ),
2767 $ px( iix+(jjx-1)*ldpx ) )
2768*
2769 ELSE IF( incx.EQ.descx( m_ ) ) THEN
2770*
2771* sub( X ) is a row vector
2772*
2773 jb = descx( inb_ ) - jx + 1
2774 IF( jb.LE.0 )
2775 $ jb = ( ( -jb ) / descx( nb_ ) + 1 ) * descx( nb_ ) + jb
2776 jb = min( jb, n )
2777 jn = jx + jb - 1
2778*
2779 IF( myrow.EQ.ixrow .OR. rowrep ) THEN
2780*
2781 icurcol = ixcol
2782 IF( mycol.EQ.icurcol .OR. colrep ) THEN
2783 DO 10 j = jx, jn
2784 CALL pderrset( err, errmax, x( ix+(j-1)*ldx ),
2785 $ px( iix+(jjx-1)*ldpx ) )
2786 jjx = jjx + 1
2787 10 CONTINUE
2788 END IF
2789 icurcol = mod( icurcol+1, npcol )
2790*
2791 DO 30 j = jn+1, jx+n-1, descx( nb_ )
2792 jb = min( jx+n-j, descx( nb_ ) )
2793*
2794 IF( mycol.EQ.icurcol .OR. colrep ) THEN
2795*
2796 DO 20 kk = 0, jb-1
2797 CALL pderrset( err, errmax, x( ix+(j+kk-1)*ldx ),
2798 $ px( iix+(jjx+kk-1)*ldpx ) )
2799 20 CONTINUE
2800*
2801 jjx = jjx + jb
2802*
2803 END IF
2804*
2805 icurcol = mod( icurcol+1, npcol )
2806*
2807 30 CONTINUE
2808*
2809 END IF
2810*
2811 ELSE
2812*
2813* sub( X ) is a column vector
2814*
2815 ib = descx( imb_ ) - ix + 1
2816 IF( ib.LE.0 )
2817 $ ib = ( ( -ib ) / descx( mb_ ) + 1 ) * descx( mb_ ) + ib
2818 ib = min( ib, n )
2819 in = ix + ib - 1
2820*
2821 IF( mycol.EQ.ixcol .OR. colrep ) THEN
2822*
2823 icurrow = ixrow
2824 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
2825 DO 40 i = ix, in
2826 CALL pderrset( err, errmax, x( i+(jx-1)*ldx ),
2827 $ px( iix+(jjx-1)*ldpx ) )
2828 iix = iix + 1
2829 40 CONTINUE
2830 END IF
2831 icurrow = mod( icurrow+1, nprow )
2832*
2833 DO 60 i = in+1, ix+n-1, descx( mb_ )
2834 ib = min( ix+n-i, descx( mb_ ) )
2835*
2836 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
2837*
2838 DO 50 kk = 0, ib-1
2839 CALL pderrset( err, errmax, x( i+kk+(jx-1)*ldx ),
2840 $ px( iix+kk+(jjx-1)*ldpx ) )
2841 50 CONTINUE
2842*
2843 iix = iix + ib
2844*
2845 END IF
2846*
2847 icurrow = mod( icurrow+1, nprow )
2848*
2849 60 CONTINUE
2850*
2851 END IF
2852*
2853 END IF
2854*
2855 CALL dgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
2856 $ -1, -1 )
2857*
2858 IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
2859 info = 1
2860 ELSE IF( errmax.GT.eps ) THEN
2861 info = -1
2862 END IF
2863*
2864 RETURN
2865*
2866* End of PDCHKVIN
2867*
2868 END
2869 SUBROUTINE pdchkvout( N, X, PX, IX, JX, DESCX, INCX, INFO )
2870*
2871* -- PBLAS test routine (version 2.0) --
2872* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2873* and University of California, Berkeley.
2874* April 1, 1998
2875*
2876* .. Scalar Arguments ..
2877 INTEGER INCX, INFO, IX, JX, N
2878* ..
2879* .. Array Arguments ..
2880 INTEGER DESCX( * )
2881 DOUBLE PRECISION PX( * ), X( * )
2882* ..
2883*
2884* Purpose
2885* =======
2886*
2887* PDCHKVOUT checks that the matrix PX \ sub( PX ) remained unchanged.
2888* The local array entries are compared element by element, and their
2889* difference is tested against 0.0 as well as the epsilon machine. No-
2890* tice that this difference should be numerically exactly the zero ma-
2891* chine, but because of the possible movement of some of the data we
2892* flagged differently a difference less than twice the epsilon machine.
2893* The largest error is reported.
2894*
2895* Notes
2896* =====
2897*
2898* A description vector is associated with each 2D block-cyclicly dis-
2899* tributed matrix. This vector stores the information required to
2900* establish the mapping between a matrix entry and its corresponding
2901* process and memory location.
2902*
2903* In the following comments, the character _ should be read as
2904* "of the distributed matrix". Let A be a generic term for any 2D
2905* block cyclicly distributed matrix. Its description vector is DESCA:
2906*
2907* NOTATION STORED IN EXPLANATION
2908* ---------------- --------------- ------------------------------------
2909* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2910* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2911* the NPROW x NPCOL BLACS process grid
2912* A is distributed over. The context
2913* itself is global, but the handle
2914* (the integer value) may vary.
2915* M_A (global) DESCA( M_ ) The number of rows in the distribu-
2916* ted matrix A, M_A >= 0.
2917* N_A (global) DESCA( N_ ) The number of columns in the distri-
2918* buted matrix A, N_A >= 0.
2919* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2920* block of the matrix A, IMB_A > 0.
2921* INB_A (global) DESCA( INB_ ) The number of columns of the upper
2922* left block of the matrix A,
2923* INB_A > 0.
2924* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2925* bute the last M_A-IMB_A rows of A,
2926* MB_A > 0.
2927* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2928* bute the last N_A-INB_A columns of
2929* A, NB_A > 0.
2930* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2931* row of the matrix A is distributed,
2932* NPROW > RSRC_A >= 0.
2933* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2934* first column of A is distributed.
2935* NPCOL > CSRC_A >= 0.
2936* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2937* array storing the local blocks of
2938* the distributed matrix A,
2939* IF( Lc( 1, N_A ) > 0 )
2940* LLD_A >= MAX( 1, Lr( 1, M_A ) )
2941* ELSE
2942* LLD_A >= 1.
2943*
2944* Let K be the number of rows of a matrix A starting at the global in-
2945* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2946* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2947* receive if these K rows were distributed over NPROW processes. If K
2948* is the number of columns of a matrix A starting at the global index
2949* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2950* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2951* these K columns were distributed over NPCOL processes.
2952*
2953* The values of Lr() and Lc() may be determined via a call to the func-
2954* tion PB_NUMROC:
2955* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2956* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2957*
2958* Arguments
2959* =========
2960*
2961* N (global input) INTEGER
2962* On entry, N specifies the length of the subvector operand
2963* sub( X ). N must be at least zero.
2964*
2965* X (local input) DOUBLE PRECISION array
2966* On entry, X is an array of dimension (DESCX( M_ ),*). This
2967* array contains a local copy of the initial entire matrix PX.
2968*
2969* PX (local input) DOUBLE PRECISION array
2970* On entry, PX is an array of dimension (DESCX( LLD_ ),*). This
2971* array contains the local entries of the matrix PX.
2972*
2973* IX (global input) INTEGER
2974* On entry, IX specifies X's global row index, which points to
2975* the beginning of the submatrix sub( X ).
2976*
2977* JX (global input) INTEGER
2978* On entry, JX specifies X's global column index, which points
2979* to the beginning of the submatrix sub( X ).
2980*
2981* DESCX (global and local input) INTEGER array
2982* On entry, DESCX is an integer array of dimension DLEN_. This
2983* is the array descriptor for the matrix X.
2984*
2985* INCX (global input) INTEGER
2986* On entry, INCX specifies the global increment for the
2987* elements of X. Only two values of INCX are supported in
2988* this version, namely 1 and M_X. INCX must not be zero.
2989*
2990* INFO (global output) INTEGER
2991* On exit, if INFO = 0, no error has been found,
2992* If INFO > 0, the maximum abolute error found is in (0,eps],
2993* If INFO < 0, the maximum abolute error found is in (eps,+oo).
2994*
2995* -- Written on April 1, 1998 by
2996* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2997*
2998* =====================================================================
2999*
3000* .. Parameters ..
3001 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
3002 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
3003 $ RSRC_
3004 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
3005 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
3006 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
3007 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
3008 DOUBLE PRECISION ZERO
3009 PARAMETER ( ZERO = 0.0d+0 )
3010* ..
3011* .. Local Scalars ..
3012 LOGICAL COLREP, ROWREP
3013 INTEGER I, IB, ICTXT, ICURCOL, ICURROW, II, IMBX, INBX,
3014 $ J, JB, JJ, KK, LDPX, LDX, LL, MBX, MPALL,
3015 $ MYCOL, MYCOLDIST, MYROW, MYROWDIST, NBX, NPCOL,
3016 $ nprow, nqall
3017 DOUBLE PRECISION EPS, ERR, ERRMAX
3018* ..
3019* .. External Subroutines ..
3020 EXTERNAL BLACS_GRIDINFO, DGAMX2D, PDERRSET
3021* ..
3022* .. External Functions ..
3023 INTEGER PB_NUMROC
3024 DOUBLE PRECISION PDLAMCH
3025 EXTERNAL PDLAMCH, PB_NUMROC
3026* ..
3027* .. Intrinsic Functions ..
3028 INTRINSIC abs, max, min, mod
3029* ..
3030* .. Executable Statements ..
3031*
3032 info = 0
3033 errmax = zero
3034*
3035* Quick return if possible
3036*
3037 IF( ( descx( m_ ).LE.0 ).OR.( descx( n_ ).LE.0 ) )
3038 $ RETURN
3039*
3040* Start the operations
3041*
3042 ictxt = descx( ctxt_ )
3043 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
3044*
3045 eps = pdlamch( ictxt, 'eps' )
3046*
3047 mpall = pb_numroc( descx( m_ ), 1, descx( imb_ ), descx( mb_ ),
3048 $ myrow, descx( rsrc_ ), nprow )
3049 nqall = pb_numroc( descx( n_ ), 1, descx( inb_ ), descx( nb_ ),
3050 $ mycol, descx( csrc_ ), npcol )
3051*
3052 mbx = descx( mb_ )
3053 nbx = descx( nb_ )
3054 ldx = descx( m_ )
3055 ldpx = descx( lld_ )
3056 icurrow = descx( rsrc_ )
3057 icurcol = descx( csrc_ )
3058 rowrep = ( icurrow.EQ.-1 )
3059 colrep = ( icurcol.EQ.-1 )
3060 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3061 imbx = descx( imb_ )
3062 ELSE
3063 imbx = mbx
3064 END IF
3065 IF( mycol.EQ.icurcol .OR. colrep ) THEN
3066 inbx = descx( inb_ )
3067 ELSE
3068 inbx = nbx
3069 END IF
3070 IF( rowrep ) THEN
3071 myrowdist = 0
3072 ELSE
3073 myrowdist = mod( myrow - icurrow + nprow, nprow )
3074 END IF
3075 IF( colrep ) THEN
3076 mycoldist = 0
3077 ELSE
3078 mycoldist = mod( mycol - icurcol + npcol, npcol )
3079 END IF
3080 ii = 1
3081 jj = 1
3082*
3083 IF( incx.EQ.descx( m_ ) ) THEN
3084*
3085* sub( X ) is a row vector
3086*
3087 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3088*
3089 i = 1
3090 IF( mycoldist.EQ.0 ) THEN
3091 j = 1
3092 ELSE
3093 j = descx( inb_ ) + ( mycoldist - 1 ) * nbx + 1
3094 END IF
3095 jb = min( max( 0, descx( n_ ) - j + 1 ), inbx )
3096 ib = min( descx( m_ ), descx( imb_ ) )
3097*
3098 DO 20 kk = 0, jb-1
3099 DO 10 ll = 0, ib-1
3100 IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR. j+kk.GT.jx+n-1 )
3101 $ CALL pderrset( err, errmax,
3102 $ x( i+ll+(j+kk-1)*ldx ),
3103 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3104 10 CONTINUE
3105 20 CONTINUE
3106 IF( colrep ) THEN
3107 j = j + inbx
3108 ELSE
3109 j = j + inbx + ( npcol - 1 ) * nbx
3110 END IF
3111*
3112 DO 50 jj = inbx+1, nqall, nbx
3113 jb = min( nqall-jj+1, nbx )
3114*
3115 DO 40 kk = 0, jb-1
3116 DO 30 ll = 0, ib-1
3117 IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR.
3118 $ j+kk.GT.jx+n-1 )
3119 $ CALL pderrset( err, errmax,
3120 $ x( i+ll+(j+kk-1)*ldx ),
3121 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3122 30 CONTINUE
3123 40 CONTINUE
3124*
3125 IF( colrep ) THEN
3126 j = j + nbx
3127 ELSE
3128 j = j + npcol * nbx
3129 END IF
3130*
3131 50 CONTINUE
3132*
3133 ii = ii + ib
3134*
3135 END IF
3136*
3137 icurrow = mod( icurrow + 1, nprow )
3138*
3139 DO 110 i = descx( imb_ ) + 1, descx( m_ ), mbx
3140 ib = min( descx( m_ ) - i + 1, mbx )
3141*
3142 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3143*
3144 IF( mycoldist.EQ.0 ) THEN
3145 j = 1
3146 ELSE
3147 j = descx( inb_ ) + ( mycoldist - 1 ) * nbx + 1
3148 END IF
3149*
3150 jj = 1
3151 jb = min( max( 0, descx( n_ ) - j + 1 ), inbx )
3152 DO 70 kk = 0, jb-1
3153 DO 60 ll = 0, ib-1
3154 IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR.
3155 $ j+kk.GT.jx+n-1 )
3156 $ CALL pderrset( err, errmax,
3157 $ x( i+ll+(j+kk-1)*ldx ),
3158 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3159 60 CONTINUE
3160 70 CONTINUE
3161 IF( colrep ) THEN
3162 j = j + inbx
3163 ELSE
3164 j = j + inbx + ( npcol - 1 ) * nbx
3165 END IF
3166*
3167 DO 100 jj = inbx+1, nqall, nbx
3168 jb = min( nqall-jj+1, nbx )
3169*
3170 DO 90 kk = 0, jb-1
3171 DO 80 ll = 0, ib-1
3172 IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR.
3173 $ j+kk.GT.jx+n-1 )
3174 $ CALL pderrset( err, errmax,
3175 $ x( i+ll+(j+kk-1)*ldx ),
3176 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3177 80 CONTINUE
3178 90 CONTINUE
3179*
3180 IF( colrep ) THEN
3181 j = j + nbx
3182 ELSE
3183 j = j + npcol * nbx
3184 END IF
3185*
3186 100 CONTINUE
3187*
3188 ii = ii + ib
3189*
3190 END IF
3191*
3192 icurrow = mod( icurrow + 1, nprow )
3193*
3194 110 CONTINUE
3195*
3196 ELSE
3197*
3198* sub( X ) is a column vector
3199*
3200 IF( mycol.EQ.icurcol .OR. colrep ) THEN
3201*
3202 j = 1
3203 IF( myrowdist.EQ.0 ) THEN
3204 i = 1
3205 ELSE
3206 i = descx( imb_ ) + ( myrowdist - 1 ) * mbx + 1
3207 END IF
3208 ib = min( max( 0, descx( m_ ) - i + 1 ), imbx )
3209 jb = min( descx( n_ ), descx( inb_ ) )
3210*
3211 DO 130 kk = 0, jb-1
3212 DO 120 ll = 0, ib-1
3213 IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR. i+ll.GT.ix+n-1 )
3214 $ CALL pderrset( err, errmax,
3215 $ x( i+ll+(j+kk-1)*ldx ),
3216 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3217 120 CONTINUE
3218 130 CONTINUE
3219 IF( rowrep ) THEN
3220 i = i + imbx
3221 ELSE
3222 i = i + imbx + ( nprow - 1 ) * mbx
3223 END IF
3224*
3225 DO 160 ii = imbx+1, mpall, mbx
3226 ib = min( mpall-ii+1, mbx )
3227*
3228 DO 150 kk = 0, jb-1
3229 DO 140 ll = 0, ib-1
3230 IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR.
3231 $ i+ll.GT.ix+n-1 )
3232 $ CALL pderrset( err, errmax,
3233 $ x( i+ll+(j+kk-1)*ldx ),
3234 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3235 140 CONTINUE
3236 150 CONTINUE
3237*
3238 IF( rowrep ) THEN
3239 i = i + mbx
3240 ELSE
3241 i = i + nprow * mbx
3242 END IF
3243*
3244 160 CONTINUE
3245*
3246 jj = jj + jb
3247*
3248 END IF
3249*
3250 icurcol = mod( icurcol + 1, npcol )
3251*
3252 DO 220 j = descx( inb_ ) + 1, descx( n_ ), nbx
3253 jb = min( descx( n_ ) - j + 1, nbx )
3254*
3255 IF( mycol.EQ.icurcol .OR. colrep ) THEN
3256*
3257 IF( myrowdist.EQ.0 ) THEN
3258 i = 1
3259 ELSE
3260 i = descx( imb_ ) + ( myrowdist - 1 ) * mbx + 1
3261 END IF
3262*
3263 ii = 1
3264 ib = min( max( 0, descx( m_ ) - i + 1 ), imbx )
3265 DO 180 kk = 0, jb-1
3266 DO 170 ll = 0, ib-1
3267 IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR.
3268 $ i+ll.GT.ix+n-1 )
3269 $ CALL pderrset( err, errmax,
3270 $ x( i+ll+(j+kk-1)*ldx ),
3271 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3272 170 CONTINUE
3273 180 CONTINUE
3274 IF( rowrep ) THEN
3275 i = i + imbx
3276 ELSE
3277 i = i + imbx + ( nprow - 1 ) * mbx
3278 END IF
3279*
3280 DO 210 ii = imbx+1, mpall, mbx
3281 ib = min( mpall-ii+1, mbx )
3282*
3283 DO 200 kk = 0, jb-1
3284 DO 190 ll = 0, ib-1
3285 IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR.
3286 $ i+ll.GT.ix+n-1 )
3287 $ CALL pderrset( err, errmax,
3288 $ x( i+ll+(j+kk-1)*ldx ),
3289 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3290 190 CONTINUE
3291 200 CONTINUE
3292*
3293 IF( rowrep ) THEN
3294 i = i + mbx
3295 ELSE
3296 i = i + nprow * mbx
3297 END IF
3298*
3299 210 CONTINUE
3300*
3301 jj = jj + jb
3302*
3303 END IF
3304*
3305 icurcol = mod( icurcol + 1, npcol )
3306*
3307 220 CONTINUE
3308*
3309 END IF
3310*
3311 CALL dgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
3312 $ -1, -1 )
3313*
3314 IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
3315 info = 1
3316 ELSE IF( errmax.GT.eps ) THEN
3317 info = -1
3318 END IF
3319*
3320 RETURN
3321*
3322* End of PDCHKVOUT
3323*
3324 END
3325 SUBROUTINE pdchkmin( ERRMAX, M, N, A, PA, IA, JA, DESCA, INFO )
3326*
3327* -- PBLAS test routine (version 2.0) --
3328* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
3329* and University of California, Berkeley.
3330* April 1, 1998
3331*
3332* .. Scalar Arguments ..
3333 INTEGER IA, INFO, JA, M, N
3334 DOUBLE PRECISION ERRMAX
3335* ..
3336* .. Array Arguments ..
3337 INTEGER DESCA( * )
3338 DOUBLE PRECISION PA( * ), A( * )
3339* ..
3340*
3341* Purpose
3342* =======
3343*
3344* PDCHKMIN checks that the submatrix sub( PA ) remained unchanged. The
3345* local array entries are compared element by element, and their dif-
3346* ference is tested against 0.0 as well as the epsilon machine. Notice
3347* that this difference should be numerically exactly the zero machine,
3348* but because of the possible fluctuation of some of the data we flag-
3349* ged differently a difference less than twice the epsilon machine. The
3350* largest error is also returned.
3351*
3352* Notes
3353* =====
3354*
3355* A description vector is associated with each 2D block-cyclicly dis-
3356* tributed matrix. This vector stores the information required to
3357* establish the mapping between a matrix entry and its corresponding
3358* process and memory location.
3359*
3360* In the following comments, the character _ should be read as
3361* "of the distributed matrix". Let A be a generic term for any 2D
3362* block cyclicly distributed matrix. Its description vector is DESCA:
3363*
3364* NOTATION STORED IN EXPLANATION
3365* ---------------- --------------- ------------------------------------
3366* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
3367* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
3368* the NPROW x NPCOL BLACS process grid
3369* A is distributed over. The context
3370* itself is global, but the handle
3371* (the integer value) may vary.
3372* M_A (global) DESCA( M_ ) The number of rows in the distribu-
3373* ted matrix A, M_A >= 0.
3374* N_A (global) DESCA( N_ ) The number of columns in the distri-
3375* buted matrix A, N_A >= 0.
3376* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
3377* block of the matrix A, IMB_A > 0.
3378* INB_A (global) DESCA( INB_ ) The number of columns of the upper
3379* left block of the matrix A,
3380* INB_A > 0.
3381* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
3382* bute the last M_A-IMB_A rows of A,
3383* MB_A > 0.
3384* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
3385* bute the last N_A-INB_A columns of
3386* A, NB_A > 0.
3387* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
3388* row of the matrix A is distributed,
3389* NPROW > RSRC_A >= 0.
3390* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
3391* first column of A is distributed.
3392* NPCOL > CSRC_A >= 0.
3393* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
3394* array storing the local blocks of
3395* the distributed matrix A,
3396* IF( Lc( 1, N_A ) > 0 )
3397* LLD_A >= MAX( 1, Lr( 1, M_A ) )
3398* ELSE
3399* LLD_A >= 1.
3400*
3401* Let K be the number of rows of a matrix A starting at the global in-
3402* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
3403* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
3404* receive if these K rows were distributed over NPROW processes. If K
3405* is the number of columns of a matrix A starting at the global index
3406* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
3407* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
3408* these K columns were distributed over NPCOL processes.
3409*
3410* The values of Lr() and Lc() may be determined via a call to the func-
3411* tion PB_NUMROC:
3412* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
3413* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
3414*
3415* Arguments
3416* =========
3417*
3418* ERRMAX (global output) DOUBLE PRECISION
3419* On exit, ERRMAX specifies the largest absolute element-wise
3420* difference between sub( A ) and sub( PA ).
3421*
3422* M (global input) INTEGER
3423* On entry, M specifies the number of rows of the submatrix
3424* operand sub( A ). M must be at least zero.
3425*
3426* N (global input) INTEGER
3427* On entry, N specifies the number of columns of the submatrix
3428* operand sub( A ). N must be at least zero.
3429*
3430* A (local input) DOUBLE PRECISION array
3431* On entry, A is an array of dimension (DESCA( M_ ),*). This
3432* array contains a local copy of the initial entire matrix PA.
3433*
3434* PA (local input) DOUBLE PRECISION array
3435* On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
3436* array contains the local entries of the matrix PA.
3437*
3438* IA (global input) INTEGER
3439* On entry, IA specifies A's global row index, which points to
3440* the beginning of the submatrix sub( A ).
3441*
3442* JA (global input) INTEGER
3443* On entry, JA specifies A's global column index, which points
3444* to the beginning of the submatrix sub( A ).
3445*
3446* DESCA (global and local input) INTEGER array
3447* On entry, DESCA is an integer array of dimension DLEN_. This
3448* is the array descriptor for the matrix A.
3449*
3450* INFO (global output) INTEGER
3451* On exit, if INFO = 0, no error has been found,
3452* If INFO > 0, the maximum abolute error found is in (0,eps],
3453* If INFO < 0, the maximum abolute error found is in (eps,+oo).
3454*
3455* -- Written on April 1, 1998 by
3456* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
3457*
3458* =====================================================================
3459*
3460* .. Parameters ..
3461 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
3462 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
3463 $ RSRC_
3464 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
3465 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
3466 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
3467 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
3468 DOUBLE PRECISION ZERO
3469 PARAMETER ( ZERO = 0.0d+0 )
3470* ..
3471* .. Local Scalars ..
3472 LOGICAL COLREP, ROWREP
3473 INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL,
3474 $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K,
3475 $ KK, LDA, LDPA, LL, MYCOL, MYROW, NPCOL, NPROW
3476 DOUBLE PRECISION ERR, EPS
3477* ..
3478* .. External Subroutines ..
3479 EXTERNAL blacs_gridinfo, dgamx2d, pb_infog2l, pderrset
3480* ..
3481* .. External Functions ..
3482 DOUBLE PRECISION PDLAMCH
3483 EXTERNAL pdlamch
3484* ..
3485* .. Intrinsic Functions ..
3486 INTRINSIC abs, max, min, mod
3487* ..
3488* .. Executable Statements ..
3489*
3490 info = 0
3491 errmax = zero
3492*
3493* Quick return if posssible
3494*
3495 IF( ( m.EQ.0 ).OR.( n.EQ.0 ) )
3496 $ RETURN
3497*
3498* Start the operations
3499*
3500 ictxt = desca( ctxt_ )
3501 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
3502*
3503 eps = pdlamch( ictxt, 'eps' )
3504*
3505 CALL pb_infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia,
3506 $ jja, iarow, iacol )
3507*
3508 ii = iia
3509 jj = jja
3510 lda = desca( m_ )
3511 ldpa = desca( lld_ )
3512 icurrow = iarow
3513 icurcol = iacol
3514 rowrep = ( iarow.EQ.-1 )
3515 colrep = ( iacol.EQ.-1 )
3516*
3517* Handle the first block of column separately
3518*
3519 jb = desca( inb_ ) - ja + 1
3520 IF( jb.LE.0 )
3521 $ jb = ( ( -jb ) / desca( nb_ ) + 1 ) * desca( nb_ ) + jb
3522 jb = min( jb, n )
3523 jn = ja + jb - 1
3524*
3525 IF( mycol.EQ.icurcol .OR. colrep ) THEN
3526*
3527 DO 40 h = 0, jb-1
3528 ib = desca( imb_ ) - ia + 1
3529 IF( ib.LE.0 )
3530 $ ib = ( ( -ib ) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
3531 ib = min( ib, m )
3532 in = ia + ib - 1
3533 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3534 DO 10 k = 0, ib-1
3535 CALL pderrset( err, errmax, a( ia+k+(ja+h-1)*lda ),
3536 $ pa( ii+k+(jj+h-1)*ldpa ) )
3537 10 CONTINUE
3538 ii = ii + ib
3539 END IF
3540 icurrow = mod( icurrow+1, nprow )
3541*
3542* Loop over remaining block of rows
3543*
3544 DO 30 i = in+1, ia+m-1, desca( mb_ )
3545 ib = min( desca( mb_ ), ia+m-i )
3546 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3547 DO 20 k = 0, ib-1
3548 CALL pderrset( err, errmax, a( i+k+(ja+h-1)*lda ),
3549 $ pa( ii+k+(jj+h-1)*ldpa ) )
3550 20 CONTINUE
3551 ii = ii + ib
3552 END IF
3553 icurrow = mod( icurrow+1, nprow )
3554 30 CONTINUE
3555*
3556 ii = iia
3557 icurrow = iarow
3558 40 CONTINUE
3559*
3560 jj = jj + jb
3561*
3562 END IF
3563*
3564 icurcol = mod( icurcol+1, npcol )
3565*
3566* Loop over remaining column blocks
3567*
3568 DO 90 j = jn+1, ja+n-1, desca( nb_ )
3569 jb = min( desca( nb_ ), ja+n-j )
3570 IF( mycol.EQ.icurcol .OR. colrep ) THEN
3571 DO 80 h = 0, jb-1
3572 ib = desca( imb_ ) - ia + 1
3573 IF( ib.LE.0 )
3574 $ ib = ( ( -ib ) / desca( mb_ ) + 1 )*desca( mb_ ) + ib
3575 ib = min( ib, m )
3576 in = ia + ib - 1
3577 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3578 DO 50 k = 0, ib-1
3579 CALL pderrset( err, errmax, a( ia+k+(j+h-1)*lda ),
3580 $ pa( ii+k+(jj+h-1)*ldpa ) )
3581 50 CONTINUE
3582 ii = ii + ib
3583 END IF
3584 icurrow = mod( icurrow+1, nprow )
3585*
3586* Loop over remaining block of rows
3587*
3588 DO 70 i = in+1, ia+m-1, desca( mb_ )
3589 ib = min( desca( mb_ ), ia+m-i )
3590 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3591 DO 60 k = 0, ib-1
3592 CALL pderrset( err, errmax,
3593 $ a( i+k+(j+h-1)*lda ),
3594 $ pa( ii+k+(jj+h-1)*ldpa ) )
3595 60 CONTINUE
3596 ii = ii + ib
3597 END IF
3598 icurrow = mod( icurrow+1, nprow )
3599 70 CONTINUE
3600*
3601 ii = iia
3602 icurrow = iarow
3603 80 CONTINUE
3604*
3605 jj = jj + jb
3606 END IF
3607*
3608 icurcol = mod( icurcol+1, npcol )
3609*
3610 90 CONTINUE
3611*
3612 CALL dgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
3613 $ -1, -1 )
3614*
3615 IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
3616 info = 1
3617 ELSE IF( errmax.GT.eps ) THEN
3618 info = -1
3619 END IF
3620*
3621 RETURN
3622*
3623* End of PDCHKMIN
3624*
3625 END
3626 SUBROUTINE pdchkmout( M, N, A, PA, IA, JA, DESCA, INFO )
3627*
3628* -- PBLAS test routine (version 2.0) --
3629* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
3630* and University of California, Berkeley.
3631* April 1, 1998
3632*
3633* .. Scalar Arguments ..
3634 INTEGER IA, INFO, JA, M, N
3635* ..
3636* .. Array Arguments ..
3637 INTEGER DESCA( * )
3638 DOUBLE PRECISION A( * ), PA( * )
3639* ..
3640*
3641* Purpose
3642* =======
3643*
3644* PDCHKMOUT checks that the matrix PA \ sub( PA ) remained unchanged.
3645* The local array entries are compared element by element, and their
3646* difference is tested against 0.0 as well as the epsilon machine. No-
3647* tice that this difference should be numerically exactly the zero ma-
3648* chine, but because of the possible movement of some of the data we
3649* flagged differently a difference less than twice the epsilon machine.
3650* The largest error is reported.
3651*
3652* Notes
3653* =====
3654*
3655* A description vector is associated with each 2D block-cyclicly dis-
3656* tributed matrix. This vector stores the information required to
3657* establish the mapping between a matrix entry and its corresponding
3658* process and memory location.
3659*
3660* In the following comments, the character _ should be read as
3661* "of the distributed matrix". Let A be a generic term for any 2D
3662* block cyclicly distributed matrix. Its description vector is DESCA:
3663*
3664* NOTATION STORED IN EXPLANATION
3665* ---------------- --------------- ------------------------------------
3666* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
3667* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
3668* the NPROW x NPCOL BLACS process grid
3669* A is distributed over. The context
3670* itself is global, but the handle
3671* (the integer value) may vary.
3672* M_A (global) DESCA( M_ ) The number of rows in the distribu-
3673* ted matrix A, M_A >= 0.
3674* N_A (global) DESCA( N_ ) The number of columns in the distri-
3675* buted matrix A, N_A >= 0.
3676* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
3677* block of the matrix A, IMB_A > 0.
3678* INB_A (global) DESCA( INB_ ) The number of columns of the upper
3679* left block of the matrix A,
3680* INB_A > 0.
3681* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
3682* bute the last M_A-IMB_A rows of A,
3683* MB_A > 0.
3684* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
3685* bute the last N_A-INB_A columns of
3686* A, NB_A > 0.
3687* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
3688* row of the matrix A is distributed,
3689* NPROW > RSRC_A >= 0.
3690* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
3691* first column of A is distributed.
3692* NPCOL > CSRC_A >= 0.
3693* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
3694* array storing the local blocks of
3695* the distributed matrix A,
3696* IF( Lc( 1, N_A ) > 0 )
3697* LLD_A >= MAX( 1, Lr( 1, M_A ) )
3698* ELSE
3699* LLD_A >= 1.
3700*
3701* Let K be the number of rows of a matrix A starting at the global in-
3702* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
3703* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
3704* receive if these K rows were distributed over NPROW processes. If K
3705* is the number of columns of a matrix A starting at the global index
3706* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
3707* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
3708* these K columns were distributed over NPCOL processes.
3709*
3710* The values of Lr() and Lc() may be determined via a call to the func-
3711* tion PB_NUMROC:
3712* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
3713* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
3714*
3715* Arguments
3716* =========
3717*
3718* M (global input) INTEGER
3719* On entry, M specifies the number of rows of the submatrix
3720* sub( PA ). M must be at least zero.
3721*
3722* N (global input) INTEGER
3723* On entry, N specifies the number of columns of the submatrix
3724* sub( PA ). N must be at least zero.
3725*
3726* A (local input) DOUBLE PRECISION array
3727* On entry, A is an array of dimension (DESCA( M_ ),*). This
3728* array contains a local copy of the initial entire matrix PA.
3729*
3730* PA (local input) DOUBLE PRECISION array
3731* On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
3732* array contains the local entries of the matrix PA.
3733*
3734* IA (global input) INTEGER
3735* On entry, IA specifies A's global row index, which points to
3736* the beginning of the submatrix sub( A ).
3737*
3738* JA (global input) INTEGER
3739* On entry, JA specifies A's global column index, which points
3740* to the beginning of the submatrix sub( A ).
3741*
3742* DESCA (global and local input) INTEGER array
3743* On entry, DESCA is an integer array of dimension DLEN_. This
3744* is the array descriptor for the matrix A.
3745*
3746* INFO (global output) INTEGER
3747* On exit, if INFO = 0, no error has been found,
3748* If INFO > 0, the maximum abolute error found is in (0,eps],
3749* If INFO < 0, the maximum abolute error found is in (eps,+oo).
3750*
3751* -- Written on April 1, 1998 by
3752* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
3753*
3754* =====================================================================
3755*
3756* .. Parameters ..
3757 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
3758 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
3759 $ RSRC_
3760 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
3761 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
3762 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
3763 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
3764 DOUBLE PRECISION ZERO
3765 PARAMETER ( ZERO = 0.0d+0 )
3766* ..
3767* .. Local Scalars ..
3768 LOGICAL COLREP, ROWREP
3769 INTEGER I, IB, ICTXT, ICURCOL, II, IMBA, J, JB, JJ, KK,
3770 $ LDA, LDPA, LL, MPALL, MYCOL, MYROW, MYROWDIST,
3771 $ NPCOL, NPROW
3772 DOUBLE PRECISION EPS, ERR, ERRMAX
3773* ..
3774* .. External Subroutines ..
3775 EXTERNAL blacs_gridinfo, dgamx2d, pderrset
3776* ..
3777* .. External Functions ..
3778 INTEGER PB_NUMROC
3779 DOUBLE PRECISION PDLAMCH
3780 EXTERNAL PDLAMCH, PB_NUMROC
3781* ..
3782* .. Intrinsic Functions ..
3783 INTRINSIC max, min, mod
3784* ..
3785* .. Executable Statements ..
3786*
3787 info = 0
3788 errmax = zero
3789*
3790* Quick return if possible
3791*
3792 IF( ( desca( m_ ).LE.0 ).OR.( desca( n_ ).LE.0 ) )
3793 $ RETURN
3794*
3795* Start the operations
3796*
3797 ictxt = desca( ctxt_ )
3798 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
3799*
3800 eps = pdlamch( ictxt, 'eps' )
3801*
3802 mpall = pb_numroc( desca( m_ ), 1, desca( imb_ ), desca( mb_ ),
3803 $ myrow, desca( rsrc_ ), nprow )
3804*
3805 lda = desca( m_ )
3806 ldpa = desca( lld_ )
3807*
3808 ii = 1
3809 jj = 1
3810 rowrep = ( desca( rsrc_ ).EQ.-1 )
3811 colrep = ( desca( csrc_ ).EQ.-1 )
3812 icurcol = desca( csrc_ )
3813 IF( myrow.EQ.desca( rsrc_ ) .OR. rowrep ) THEN
3814 imba = desca( imb_ )
3815 ELSE
3816 imba = desca( mb_ )
3817 END IF
3818 IF( rowrep ) THEN
3819 myrowdist = 0
3820 ELSE
3821 myrowdist = mod( myrow - desca( rsrc_ ) + nprow, nprow )
3822 END IF
3823*
3824 IF( mycol.EQ.icurcol .OR. colrep ) THEN
3825*
3826 j = 1
3827 IF( myrowdist.EQ.0 ) THEN
3828 i = 1
3829 ELSE
3830 i = desca( imb_ ) + ( myrowdist - 1 ) * desca( mb_ ) + 1
3831 END IF
3832 ib = min( max( 0, desca( m_ ) - i + 1 ), imba )
3833 jb = min( desca( n_ ), desca( inb_ ) )
3834*
3835 DO 20 kk = 0, jb-1
3836 DO 10 ll = 0, ib-1
3837 IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3838 $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3839 $ CALL pderrset( err, errmax, a( i+ll+(j+kk-1)*lda ),
3840 $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3841 10 CONTINUE
3842 20 CONTINUE
3843 IF( rowrep ) THEN
3844 i = i + imba
3845 ELSE
3846 i = i + imba + ( nprow - 1 ) * desca( mb_ )
3847 END IF
3848*
3849 DO 50 ii = imba + 1, mpall, desca( mb_ )
3850 ib = min( mpall-ii+1, desca( mb_ ) )
3851*
3852 DO 40 kk = 0, jb-1
3853 DO 30 ll = 0, ib-1
3854 IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3855 $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3856 $ CALL pderrset( err, errmax,
3857 $ a( i+ll+(j+kk-1)*lda ),
3858 $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3859 30 CONTINUE
3860 40 CONTINUE
3861*
3862 IF( rowrep ) THEN
3863 i = i + desca( mb_ )
3864 ELSE
3865 i = i + nprow * desca( mb_ )
3866 END IF
3867*
3868 50 CONTINUE
3869*
3870 jj = jj + jb
3871*
3872 END IF
3873*
3874 icurcol = mod( icurcol + 1, npcol )
3875*
3876 DO 110 j = desca( inb_ ) + 1, desca( n_ ), desca( nb_ )
3877 jb = min( desca( n_ ) - j + 1, desca( nb_ ) )
3878*
3879 IF( mycol.EQ.icurcol .OR. colrep ) THEN
3880*
3881 IF( myrowdist.EQ.0 ) THEN
3882 i = 1
3883 ELSE
3884 i = desca( imb_ ) + ( myrowdist - 1 ) * desca( mb_ ) + 1
3885 END IF
3886*
3887 ii = 1
3888 ib = min( max( 0, desca( m_ ) - i + 1 ), imba )
3889 DO 70 kk = 0, jb-1
3890 DO 60 ll = 0, ib-1
3891 IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3892 $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3893 $ CALL pderrset( err, errmax,
3894 $ a( i+ll+(j+kk-1)*lda ),
3895 $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3896 60 CONTINUE
3897 70 CONTINUE
3898 IF( rowrep ) THEN
3899 i = i + imba
3900 ELSE
3901 i = i + imba + ( nprow - 1 ) * desca( mb_ )
3902 END IF
3903*
3904 DO 100 ii = imba+1, mpall, desca( mb_ )
3905 ib = min( mpall-ii+1, desca( mb_ ) )
3906*
3907 DO 90 kk = 0, jb-1
3908 DO 80 ll = 0, ib-1
3909 IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3910 $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3911 $ CALL pderrset( err, errmax,
3912 $ a( i+ll+(j+kk-1)*lda ),
3913 $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3914 80 CONTINUE
3915 90 CONTINUE
3916*
3917 IF( rowrep ) THEN
3918 i = i + desca( mb_ )
3919 ELSE
3920 i = i + nprow * desca( mb_ )
3921 END IF
3922*
3923 100 CONTINUE
3924*
3925 jj = jj + jb
3926*
3927 END IF
3928*
3929 icurcol = mod( icurcol + 1, npcol )
3930* INSERT MODE
3931 110 CONTINUE
3932*
3933 CALL dgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
3934 $ -1, -1 )
3935*
3936 IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
3937 info = 1
3938 ELSE IF( errmax.GT.eps ) THEN
3939 info = -1
3940 END IF
3941*
3942 RETURN
3943*
3944* End of PDCHKMOUT
3945*
3946 END
3947 SUBROUTINE pdmprnt( ICTXT, NOUT, M, N, A, LDA, IRPRNT, ICPRNT,
3948 $ CMATNM )
3949*
3950* -- PBLAS test routine (version 2.0) --
3951* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
3952* and University of California, Berkeley.
3953* April 1, 1998
3954*
3955* .. Scalar Arguments ..
3956 INTEGER ICPRNT, ICTXT, IRPRNT, LDA, M, N, NOUT
3957* ..
3958* .. Array Arguments ..
3959 CHARACTER*(*) CMATNM
3960 DOUBLE PRECISION A( LDA, * )
3961* ..
3962*
3963* Purpose
3964* =======
3965*
3966* PDMPRNT prints to the standard output an array A of size m by n. Only
3967* the process of coordinates ( IRPRNT, ICPRNT ) is printing.
3968*
3969* Arguments
3970* =========
3971*
3972* ICTXT (local input) INTEGER
3973* On entry, ICTXT specifies the BLACS context handle, indica-
3974* ting the global context of the operation. The context itself
3975* is global, but the value of ICTXT is local.
3976*
3977* NOUT (global input) INTEGER
3978* On entry, NOUT specifies the unit number for the output file.
3979* When NOUT is 6, output to screen, when NOUT is 0, output to
3980* stderr. NOUT is only defined for process 0.
3981*
3982* M (global input) INTEGER
3983* On entry, M specifies the number of rows of the matrix A. M
3984* must be at least zero.
3985*
3986* N (global input) INTEGER
3987* On entry, N specifies the number of columns of the matrix A.
3988* N must be at least zero.
3989*
3990* A (local input) DOUBLE PRECISION array
3991* On entry, A is an array of dimension (LDA,N). The leading m
3992* by n part of this array is printed.
3993*
3994* LDA (local input) INTEGER
3995* On entry, LDA specifies the leading dimension of the local
3996* array A to be printed. LDA must be at least MAX( 1, M ).
3997*
3998* IRPRNT (global input) INTEGER
3999* On entry, IRPRNT specifies the process row coordinate of the
4000* printing process.
4001*
4002* ICPRNT (global input) INTEGER
4003* On entry, ICPRNT specifies the process column coordinate of
4004* the printing process.
4005*
4006* CMATNM (global input) CHARACTER*(*)
4007* On entry, CMATNM specifies the identifier of the matrix to be
4008* printed.
4009*
4010* -- Written on April 1, 1998 by
4011* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4012*
4013* =====================================================================
4014*
4015* .. Local Scalars ..
4016 INTEGER I, J, MYCOL, MYROW, NPCOL, NPROW
4017* ..
4018* .. External Subroutines ..
4019 EXTERNAL BLACS_GRIDINFO
4020* ..
4021* .. Executable Statements ..
4022*
4023* Quick return if possible
4024*
4025 IF( ( m.LE.0 ).OR.( n.LE.0 ) )
4026 $ RETURN
4027*
4028* Get grid parameters
4029*
4030 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4031*
4032 IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
4033*
4034 WRITE( nout, fmt = * )
4035 DO 20 j = 1, n
4036*
4037 DO 10 i = 1, m
4038*
4039 WRITE( nout, fmt = 9999 ) cmatnm, i, j, a( i, j )
4040*
4041 10 CONTINUE
4042*
4043 20 CONTINUE
4044*
4045 END IF
4046*
4047 9999 FORMAT( 1x, a, '(', i6, ',', i6, ')=', d30.18 )
4048*
4049 RETURN
4050*
4051* End of PDMPRNT
4052*
4053 END
4054 SUBROUTINE pdvprnt( ICTXT, NOUT, N, X, INCX, IRPRNT, ICPRNT,
4055 $ CVECNM )
4056*
4057* -- PBLAS test routine (version 2.0) --
4058* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4059* and University of California, Berkeley.
4060* April 1, 1998
4061*
4062* .. Scalar Arguments ..
4063 INTEGER ICPRNT, ICTXT, INCX, IRPRNT, N, NOUT
4064* ..
4065* .. Array Arguments ..
4066 CHARACTER*(*) CVECNM
4067 DOUBLE PRECISION X( * )
4068* ..
4069*
4070* Purpose
4071* =======
4072*
4073* PDVPRNT prints to the standard output an vector x of length n. Only
4074* the process of coordinates ( IRPRNT, ICPRNT ) is printing.
4075*
4076* Arguments
4077* =========
4078*
4079* ICTXT (local input) INTEGER
4080* On entry, ICTXT specifies the BLACS context handle, indica-
4081* ting the global context of the operation. The context itself
4082* is global, but the value of ICTXT is local.
4083*
4084* NOUT (global input) INTEGER
4085* On entry, NOUT specifies the unit number for the output file.
4086* When NOUT is 6, output to screen, when NOUT is 0, output to
4087* stderr. NOUT is only defined for process 0.
4088*
4089* N (global input) INTEGER
4090* On entry, N specifies the length of the vector X. N must be
4091* at least zero.
4092*
4093* X (global input) DOUBLE PRECISION array
4094* On entry, X is an array of dimension at least
4095* ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen-
4096* ted array X must contain the vector x.
4097*
4098* INCX (global input) INTEGER.
4099* On entry, INCX specifies the increment for the elements of X.
4100* INCX must not be zero.
4101*
4102* IRPRNT (global input) INTEGER
4103* On entry, IRPRNT specifies the process row coordinate of the
4104* printing process.
4105*
4106* ICPRNT (global input) INTEGER
4107* On entry, ICPRNT specifies the process column coordinate of
4108* the printing process.
4109*
4110* CVECNM (global input) CHARACTER*(*)
4111* On entry, CVECNM specifies the identifier of the vector to be
4112* printed.
4113*
4114* -- Written on April 1, 1998 by
4115* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4116*
4117* =====================================================================
4118*
4119* .. Local Scalars ..
4120 INTEGER I, MYCOL, MYROW, NPCOL, NPROW
4121* ..
4122* .. External Subroutines ..
4123 EXTERNAL BLACS_GRIDINFO
4124* ..
4125* .. Executable Statements ..
4126*
4127* Quick return if possible
4128*
4129 IF( n.LE.0 )
4130 $ RETURN
4131*
4132* Get grid parameters
4133*
4134 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4135*
4136 IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
4137*
4138 WRITE( nout, fmt = * )
4139 DO 10 i = 1, 1 + ( n-1 )*incx, incx
4140*
4141 WRITE( nout, fmt = 9999 ) cvecnm, i, x( i )
4142*
4143 10 CONTINUE
4144*
4145 END IF
4146*
4147 9999 FORMAT( 1x, a, '(', i6, ')=', d30.18 )
4148*
4149 RETURN
4150*
4151* End of PDVPRNT
4152*
4153 END
4154 SUBROUTINE pdmvch( ICTXT, TRANS, M, N, ALPHA, A, IA, JA, DESCA,
4155 $ X, IX, JX, DESCX, INCX, BETA, Y, PY, IY, JY,
4156 $ DESCY, INCY, G, ERR, INFO )
4157*
4158* -- PBLAS test routine (version 2.0) --
4159* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4160* and University of California, Berkeley.
4161* April 1, 1998
4162*
4163* .. Scalar Arguments ..
4164 CHARACTER*1 TRANS
4165 INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX,
4166 $ JY, M, N
4167 DOUBLE PRECISION ALPHA, BETA, ERR
4168* ..
4169* .. Array Arguments ..
4170 INTEGER DESCA( * ), DESCX( * ), DESCY( * )
4171 DOUBLE PRECISION A( * ), G( * ), PY( * ), X( * ), Y( * )
4172* ..
4173*
4174* Purpose
4175* =======
4176*
4177* PDMVCH checks the results of the computational tests.
4178*
4179* Notes
4180* =====
4181*
4182* A description vector is associated with each 2D block-cyclicly dis-
4183* tributed matrix. This vector stores the information required to
4184* establish the mapping between a matrix entry and its corresponding
4185* process and memory location.
4186*
4187* In the following comments, the character _ should be read as
4188* "of the distributed matrix". Let A be a generic term for any 2D
4189* block cyclicly distributed matrix. Its description vector is DESCA:
4190*
4191* NOTATION STORED IN EXPLANATION
4192* ---------------- --------------- ------------------------------------
4193* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
4194* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
4195* the NPROW x NPCOL BLACS process grid
4196* A is distributed over. The context
4197* itself is global, but the handle
4198* (the integer value) may vary.
4199* M_A (global) DESCA( M_ ) The number of rows in the distribu-
4200* ted matrix A, M_A >= 0.
4201* N_A (global) DESCA( N_ ) The number of columns in the distri-
4202* buted matrix A, N_A >= 0.
4203* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
4204* block of the matrix A, IMB_A > 0.
4205* INB_A (global) DESCA( INB_ ) The number of columns of the upper
4206* left block of the matrix A,
4207* INB_A > 0.
4208* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
4209* bute the last M_A-IMB_A rows of A,
4210* MB_A > 0.
4211* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
4212* bute the last N_A-INB_A columns of
4213* A, NB_A > 0.
4214* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
4215* row of the matrix A is distributed,
4216* NPROW > RSRC_A >= 0.
4217* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
4218* first column of A is distributed.
4219* NPCOL > CSRC_A >= 0.
4220* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
4221* array storing the local blocks of
4222* the distributed matrix A,
4223* IF( Lc( 1, N_A ) > 0 )
4224* LLD_A >= MAX( 1, Lr( 1, M_A ) )
4225* ELSE
4226* LLD_A >= 1.
4227*
4228* Let K be the number of rows of a matrix A starting at the global in-
4229* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
4230* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
4231* receive if these K rows were distributed over NPROW processes. If K
4232* is the number of columns of a matrix A starting at the global index
4233* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
4234* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
4235* these K columns were distributed over NPCOL processes.
4236*
4237* The values of Lr() and Lc() may be determined via a call to the func-
4238* tion PB_NUMROC:
4239* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
4240* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
4241*
4242* Arguments
4243* =========
4244*
4245* ICTXT (local input) INTEGER
4246* On entry, ICTXT specifies the BLACS context handle, indica-
4247* ting the global context of the operation. The context itself
4248* is global, but the value of ICTXT is local.
4249*
4250* TRANS (global input) CHARACTER*1
4251* On entry, TRANS specifies which matrix-vector product is to
4252* be computed as follows:
4253* If TRANS = 'N',
4254* sub( Y ) = BETA * sub( Y ) + sub( A ) * sub( X ),
4255* otherwise
4256* sub( Y ) = BETA * sub( Y ) + sub( A )' * sub( X ).
4257*
4258* M (global input) INTEGER
4259* On entry, M specifies the number of rows of the submatrix
4260* operand matrix A. M must be at least zero.
4261*
4262* N (global input) INTEGER
4263* On entry, N specifies the number of columns of the subma-
4264* trix operand matrix A. N must be at least zero.
4265*
4266* ALPHA (global input) DOUBLE PRECISION
4267* On entry, ALPHA specifies the scalar alpha.
4268*
4269* A (local input) DOUBLE PRECISION array
4270* On entry, A is an array of dimension (DESCA( M_ ),*). This
4271* array contains a local copy of the initial entire matrix PA.
4272*
4273* IA (global input) INTEGER
4274* On entry, IA specifies A's global row index, which points to
4275* the beginning of the submatrix sub( A ).
4276*
4277* JA (global input) INTEGER
4278* On entry, JA specifies A's global column index, which points
4279* to the beginning of the submatrix sub( A ).
4280*
4281* DESCA (global and local input) INTEGER array
4282* On entry, DESCA is an integer array of dimension DLEN_. This
4283* is the array descriptor for the matrix A.
4284*
4285* X (local input) DOUBLE PRECISION array
4286* On entry, X is an array of dimension (DESCX( M_ ),*). This
4287* array contains a local copy of the initial entire matrix PX.
4288*
4289* IX (global input) INTEGER
4290* On entry, IX specifies X's global row index, which points to
4291* the beginning of the submatrix sub( X ).
4292*
4293* JX (global input) INTEGER
4294* On entry, JX specifies X's global column index, which points
4295* to the beginning of the submatrix sub( X ).
4296*
4297* DESCX (global and local input) INTEGER array
4298* On entry, DESCX is an integer array of dimension DLEN_. This
4299* is the array descriptor for the matrix X.
4300*
4301* INCX (global input) INTEGER
4302* On entry, INCX specifies the global increment for the
4303* elements of X. Only two values of INCX are supported in
4304* this version, namely 1 and M_X. INCX must not be zero.
4305*
4306* BETA (global input) DOUBLE PRECISION
4307* On entry, BETA specifies the scalar beta.
4308*
4309* Y (local input/local output) DOUBLE PRECISION array
4310* On entry, Y is an array of dimension (DESCY( M_ ),*). This
4311* array contains a local copy of the initial entire matrix PY.
4312*
4313* PY (local input) DOUBLE PRECISION array
4314* On entry, PY is an array of dimension (DESCY( LLD_ ),*). This
4315* array contains the local entries of the matrix PY.
4316*
4317* IY (global input) INTEGER
4318* On entry, IY specifies Y's global row index, which points to
4319* the beginning of the submatrix sub( Y ).
4320*
4321* JY (global input) INTEGER
4322* On entry, JY specifies Y's global column index, which points
4323* to the beginning of the submatrix sub( Y ).
4324*
4325* DESCY (global and local input) INTEGER array
4326* On entry, DESCY is an integer array of dimension DLEN_. This
4327* is the array descriptor for the matrix Y.
4328*
4329* INCY (global input) INTEGER
4330* On entry, INCY specifies the global increment for the
4331* elements of Y. Only two values of INCY are supported in
4332* this version, namely 1 and M_Y. INCY must not be zero.
4333*
4334* G (workspace) DOUBLE PRECISION array
4335* On entry, G is an array of dimension at least MAX( M, N ). G
4336* is used to compute the gauges.
4337*
4338* ERR (global output) DOUBLE PRECISION
4339* On exit, ERR specifies the largest error in absolute value.
4340*
4341* INFO (global output) INTEGER
4342* On exit, if INFO <> 0, the result is less than half accurate.
4343*
4344* -- Written on April 1, 1998 by
4345* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4346*
4347* =====================================================================
4348*
4349* .. Parameters ..
4350 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
4351 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
4352 $ RSRC_
4353 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
4354 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
4355 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
4356 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
4357 DOUBLE PRECISION ZERO, ONE
4358 parameter( zero = 0.0d+0, one = 1.0d+0 )
4359* ..
4360* .. Local Scalars ..
4361 LOGICAL COLREP, ROWREP, TRAN
4362 INTEGER I, IB, ICURCOL, ICURROW, IIY, IN, IOFFA, IOFFX,
4363 $ IOFFY, IYCOL, IYROW, J, JB, JJY, JN, KK, LDA,
4364 $ LDPY, LDX, LDY, ML, MYCOL, MYROW, NL, NPCOL,
4365 $ nprow
4366 DOUBLE PRECISION EPS, ERRI, GTMP, TBETA, YTMP
4367* ..
4368* .. External Subroutines ..
4369 EXTERNAL blacs_gridinfo, dgamx2d, igsum2d, pb_infog2l
4370* ..
4371* .. External Functions ..
4372 LOGICAL LSAME
4373 DOUBLE PRECISION PDLAMCH
4374 EXTERNAL lsame, pdlamch
4375* ..
4376* .. Intrinsic Functions ..
4377 INTRINSIC abs, max, min, mod, sqrt
4378* ..
4379* .. Executable Statements ..
4380*
4381 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4382*
4383 eps = pdlamch( ictxt, 'eps' )
4384*
4385 IF( m.EQ.0 .OR. n.EQ.0 ) THEN
4386 tbeta = one
4387 ELSE
4388 tbeta = beta
4389 END IF
4390*
4391 tran = lsame( trans, 'T' ).OR.lsame( trans, 'C' )
4392 IF( tran ) THEN
4393 ml = n
4394 nl = m
4395 ELSE
4396 ml = m
4397 nl = n
4398 END IF
4399*
4400 lda = max( 1, desca( m_ ) )
4401 ldx = max( 1, descx( m_ ) )
4402 ldy = max( 1, descy( m_ ) )
4403*
4404* Compute expected result in Y using data in A, X and Y.
4405* Compute gauges in G. This part of the computation is performed
4406* by every process in the grid.
4407*
4408 ioffy = iy + ( jy - 1 ) * ldy
4409 DO 30 i = 1, ml
4410 ytmp = zero
4411 gtmp = zero
4412 ioffx = ix + ( jx - 1 ) * ldx
4413 IF( tran )THEN
4414 ioffa = ia + ( ja + i - 2 ) * lda
4415 DO 10 j = 1, nl
4416 ytmp = ytmp + a( ioffa ) * x( ioffx )
4417 gtmp = gtmp + abs( a( ioffa ) * x( ioffx ) )
4418 ioffa = ioffa + 1
4419 ioffx = ioffx + incx
4420 10 CONTINUE
4421 ELSE
4422 ioffa = ia + i - 1 + ( ja - 1 ) * lda
4423 DO 20 j = 1, nl
4424 ytmp = ytmp + a( ioffa ) * x( ioffx )
4425 gtmp = gtmp + abs( a( ioffa ) * x( ioffx ) )
4426 ioffa = ioffa + lda
4427 ioffx = ioffx + incx
4428 20 CONTINUE
4429 END IF
4430 g( i ) = abs( alpha ) * gtmp + abs( tbeta * y( ioffy ) )
4431 y( ioffy ) = alpha * ytmp + tbeta * y( ioffy )
4432 ioffy = ioffy + incy
4433 30 CONTINUE
4434*
4435* Compute the error ratio for this result.
4436*
4437 err = zero
4438 info = 0
4439 ldpy = descy( lld_ )
4440 ioffy = iy + ( jy - 1 ) * ldy
4441 CALL pb_infog2l( iy, jy, descy, nprow, npcol, myrow, mycol, iiy,
4442 $ jjy, iyrow, iycol )
4443 icurrow = iyrow
4444 icurcol = iycol
4445 rowrep = ( iyrow.EQ.-1 )
4446 colrep = ( iycol.EQ.-1 )
4447*
4448 IF( incy.EQ.descy( m_ ) ) THEN
4449*
4450* sub( Y ) is a row vector
4451*
4452 jb = descy( inb_ ) - jy + 1
4453 IF( jb.LE.0 )
4454 $ jb = ( ( -jb ) / descy( nb_ ) + 1 ) * descy( nb_ ) + jb
4455 jb = min( jb, ml )
4456 jn = jy + jb - 1
4457*
4458 DO 50 j = jy, jn
4459*
4460 IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4461 $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4462 erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) ) / eps
4463 IF( g( j-jy+1 ).NE.zero )
4464 $ erri = erri / g( j-jy+1 )
4465 err = max( err, erri )
4466 IF( err*sqrt( eps ).GE.one )
4467 $ info = 1
4468 jjy = jjy + 1
4469 END IF
4470*
4471 ioffy = ioffy + incy
4472*
4473 50 CONTINUE
4474*
4475 icurcol = mod( icurcol+1, npcol )
4476*
4477 DO 70 j = jn+1, jy+ml-1, descy( nb_ )
4478 jb = min( jy+ml-j, descy( nb_ ) )
4479*
4480 DO 60 kk = 0, jb-1
4481*
4482 IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4483 $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4484 erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) )/eps
4485 IF( g( j+kk-jy+1 ).NE.zero )
4486 $ erri = erri / g( j+kk-jy+1 )
4487 err = max( err, erri )
4488 IF( err*sqrt( eps ).GE.one )
4489 $ info = 1
4490 jjy = jjy + 1
4491 END IF
4492*
4493 ioffy = ioffy + incy
4494*
4495 60 CONTINUE
4496*
4497 icurcol = mod( icurcol+1, npcol )
4498*
4499 70 CONTINUE
4500*
4501 ELSE
4502*
4503* sub( Y ) is a column vector
4504*
4505 ib = descy( imb_ ) - iy + 1
4506 IF( ib.LE.0 )
4507 $ ib = ( ( -ib ) / descy( mb_ ) + 1 ) * descy( mb_ ) + ib
4508 ib = min( ib, ml )
4509 in = iy + ib - 1
4510*
4511 DO 80 i = iy, in
4512*
4513 IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4514 $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4515 erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) ) / eps
4516 IF( g( i-iy+1 ).NE.zero )
4517 $ erri = erri / g( i-iy+1 )
4518 err = max( err, erri )
4519 IF( err*sqrt( eps ).GE.one )
4520 $ info = 1
4521 iiy = iiy + 1
4522 END IF
4523*
4524 ioffy = ioffy + incy
4525*
4526 80 CONTINUE
4527*
4528 icurrow = mod( icurrow+1, nprow )
4529*
4530 DO 100 i = in+1, iy+ml-1, descy( mb_ )
4531 ib = min( iy+ml-i, descy( mb_ ) )
4532*
4533 DO 90 kk = 0, ib-1
4534*
4535 IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4536 $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4537 erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) )/eps
4538 IF( g( i+kk-iy+1 ).NE.zero )
4539 $ erri = erri / g( i+kk-iy+1 )
4540 err = max( err, erri )
4541 IF( err*sqrt( eps ).GE.one )
4542 $ info = 1
4543 iiy = iiy + 1
4544 END IF
4545*
4546 ioffy = ioffy + incy
4547*
4548 90 CONTINUE
4549*
4550 icurrow = mod( icurrow+1, nprow )
4551*
4552 100 CONTINUE
4553*
4554 END IF
4555*
4556* If INFO = 0, all results are at least half accurate.
4557*
4558 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
4559 CALL dgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
4560 $ mycol )
4561*
4562 RETURN
4563*
4564* End of PDMVCH
4565*
4566 END
4567 SUBROUTINE pdvmch( ICTXT, UPLO, M, N, ALPHA, X, IX, JX, DESCX,
4568 $ INCX, Y, IY, JY, DESCY, INCY, A, PA, IA, JA,
4569 $ DESCA, G, ERR, INFO )
4570*
4571* -- PBLAS test routine (version 2.0) --
4572* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4573* and University of California, Berkeley.
4574* April 1, 1998
4575*
4576* .. Scalar Arguments ..
4577 CHARACTER*1 UPLO
4578 INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX,
4579 $ JY, M, N
4580 DOUBLE PRECISION ALPHA, ERR
4581* ..
4582* .. Array Arguments ..
4583 INTEGER DESCA( * ), DESCX( * ), DESCY( * )
4584 DOUBLE PRECISION A( * ), G( * ), PA( * ), X( * ), Y( * )
4585* ..
4586*
4587* Purpose
4588* =======
4589*
4590* PDVMCH checks the results of the computational tests.
4591*
4592* Notes
4593* =====
4594*
4595* A description vector is associated with each 2D block-cyclicly dis-
4596* tributed matrix. This vector stores the information required to
4597* establish the mapping between a matrix entry and its corresponding
4598* process and memory location.
4599*
4600* In the following comments, the character _ should be read as
4601* "of the distributed matrix". Let A be a generic term for any 2D
4602* block cyclicly distributed matrix. Its description vector is DESCA:
4603*
4604* NOTATION STORED IN EXPLANATION
4605* ---------------- --------------- ------------------------------------
4606* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
4607* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
4608* the NPROW x NPCOL BLACS process grid
4609* A is distributed over. The context
4610* itself is global, but the handle
4611* (the integer value) may vary.
4612* M_A (global) DESCA( M_ ) The number of rows in the distribu-
4613* ted matrix A, M_A >= 0.
4614* N_A (global) DESCA( N_ ) The number of columns in the distri-
4615* buted matrix A, N_A >= 0.
4616* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
4617* block of the matrix A, IMB_A > 0.
4618* INB_A (global) DESCA( INB_ ) The number of columns of the upper
4619* left block of the matrix A,
4620* INB_A > 0.
4621* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
4622* bute the last M_A-IMB_A rows of A,
4623* MB_A > 0.
4624* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
4625* bute the last N_A-INB_A columns of
4626* A, NB_A > 0.
4627* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
4628* row of the matrix A is distributed,
4629* NPROW > RSRC_A >= 0.
4630* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
4631* first column of A is distributed.
4632* NPCOL > CSRC_A >= 0.
4633* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
4634* array storing the local blocks of
4635* the distributed matrix A,
4636* IF( Lc( 1, N_A ) > 0 )
4637* LLD_A >= MAX( 1, Lr( 1, M_A ) )
4638* ELSE
4639* LLD_A >= 1.
4640*
4641* Let K be the number of rows of a matrix A starting at the global in-
4642* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
4643* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
4644* receive if these K rows were distributed over NPROW processes. If K
4645* is the number of columns of a matrix A starting at the global index
4646* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
4647* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
4648* these K columns were distributed over NPCOL processes.
4649*
4650* The values of Lr() and Lc() may be determined via a call to the func-
4651* tion PB_NUMROC:
4652* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
4653* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
4654*
4655* Arguments
4656* =========
4657*
4658* ICTXT (local input) INTEGER
4659* On entry, ICTXT specifies the BLACS context handle, indica-
4660* ting the global context of the operation. The context itself
4661* is global, but the value of ICTXT is local.
4662*
4663* UPLO (global input) CHARACTER*1
4664* On entry, UPLO specifies which part of the submatrix sub( A )
4665* is to be referenced as follows:
4666* If UPLO = 'L', only the lower triangular part,
4667* If UPLO = 'U', only the upper triangular part,
4668* else the entire matrix is to be referenced.
4669*
4670* M (global input) INTEGER
4671* On entry, M specifies the number of rows of the submatrix
4672* operand matrix A. M must be at least zero.
4673*
4674* N (global input) INTEGER
4675* On entry, N specifies the number of columns of the subma-
4676* trix operand matrix A. N must be at least zero.
4677*
4678* ALPHA (global input) DOUBLE PRECISION
4679* On entry, ALPHA specifies the scalar alpha.
4680*
4681* X (local input) DOUBLE PRECISION array
4682* On entry, X is an array of dimension (DESCX( M_ ),*). This
4683* array contains a local copy of the initial entire matrix PX.
4684*
4685* IX (global input) INTEGER
4686* On entry, IX specifies X's global row index, which points to
4687* the beginning of the submatrix sub( X ).
4688*
4689* JX (global input) INTEGER
4690* On entry, JX specifies X's global column index, which points
4691* to the beginning of the submatrix sub( X ).
4692*
4693* DESCX (global and local input) INTEGER array
4694* On entry, DESCX is an integer array of dimension DLEN_. This
4695* is the array descriptor for the matrix X.
4696*
4697* INCX (global input) INTEGER
4698* On entry, INCX specifies the global increment for the
4699* elements of X. Only two values of INCX are supported in
4700* this version, namely 1 and M_X. INCX must not be zero.
4701*
4702* Y (local input) DOUBLE PRECISION array
4703* On entry, Y is an array of dimension (DESCY( M_ ),*). This
4704* array contains a local copy of the initial entire matrix PY.
4705*
4706* IY (global input) INTEGER
4707* On entry, IY specifies Y's global row index, which points to
4708* the beginning of the submatrix sub( Y ).
4709*
4710* JY (global input) INTEGER
4711* On entry, JY specifies Y's global column index, which points
4712* to the beginning of the submatrix sub( Y ).
4713*
4714* DESCY (global and local input) INTEGER array
4715* On entry, DESCY is an integer array of dimension DLEN_. This
4716* is the array descriptor for the matrix Y.
4717*
4718* INCY (global input) INTEGER
4719* On entry, INCY specifies the global increment for the
4720* elements of Y. Only two values of INCY are supported in
4721* this version, namely 1 and M_Y. INCY must not be zero.
4722*
4723* A (local input/local output) DOUBLE PRECISION array
4724* On entry, A is an array of dimension (DESCA( M_ ),*). This
4725* array contains a local copy of the initial entire matrix PA.
4726*
4727* PA (local input) DOUBLE PRECISION array
4728* On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
4729* array contains the local entries of the matrix PA.
4730*
4731* IA (global input) INTEGER
4732* On entry, IA specifies A's global row index, which points to
4733* the beginning of the submatrix sub( A ).
4734*
4735* JA (global input) INTEGER
4736* On entry, JA specifies A's global column index, which points
4737* to the beginning of the submatrix sub( A ).
4738*
4739* DESCA (global and local input) INTEGER array
4740* On entry, DESCA is an integer array of dimension DLEN_. This
4741* is the array descriptor for the matrix A.
4742*
4743* G (workspace) DOUBLE PRECISION array
4744* On entry, G is an array of dimension at least MAX( M, N ). G
4745* is used to compute the gauges.
4746*
4747* ERR (global output) DOUBLE PRECISION
4748* On exit, ERR specifies the largest error in absolute value.
4749*
4750* INFO (global output) INTEGER
4751* On exit, if INFO <> 0, the result is less than half accurate.
4752*
4753* -- Written on April 1, 1998 by
4754* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4755*
4756* =====================================================================
4757*
4758* .. Parameters ..
4759 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
4760 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
4761 $ RSRC_
4762 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
4763 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
4764 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
4765 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
4766 DOUBLE PRECISION ZERO, ONE
4767 PARAMETER ( ZERO = 0.0d+0, one = 1.0d+0 )
4768* ..
4769* .. Local Scalars ..
4770 LOGICAL COLREP, LOWER, ROWREP, UPPER
4771 INTEGER I, IACOL, IAROW, IB, IBEG, ICURROW, IEND, IIA,
4772 $ in, ioffa, ioffx, ioffy, j, jja, kk, lda, ldpa,
4773 $ ldx, ldy, mycol, myrow, npcol, nprow
4774 DOUBLE PRECISION ATMP, EPS, ERRI, GTMP
4775* ..
4776* .. External Subroutines ..
4777 EXTERNAL blacs_gridinfo, dgamx2d, igsum2d, pb_infog2l
4778* ..
4779* .. External Functions ..
4780 LOGICAL LSAME
4781 DOUBLE PRECISION PDLAMCH
4782 EXTERNAL LSAME, PDLAMCH
4783* ..
4784* .. Intrinsic Functions ..
4785 INTRINSIC abs, max, min, mod, sqrt
4786* ..
4787* .. Executable Statements ..
4788*
4789 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4790*
4791 eps = pdlamch( ictxt, 'eps' )
4792*
4793 upper = lsame( uplo, 'U' )
4794 lower = lsame( uplo, 'L' )
4795*
4796 lda = max( 1, desca( m_ ) )
4797 ldx = max( 1, descx( m_ ) )
4798 ldy = max( 1, descy( m_ ) )
4799*
4800* Compute expected result in A using data in A, X and Y.
4801* Compute gauges in G. This part of the computation is performed
4802* by every process in the grid.
4803*
4804 DO 70 j = 1, n
4805*
4806 ioffy = iy + ( jy - 1 ) * ldy + ( j - 1 ) * incy
4807*
4808 IF( lower ) THEN
4809 ibeg = j
4810 iend = m
4811 DO 10 i = 1, j-1
4812 g( i ) = zero
4813 10 CONTINUE
4814 ELSE IF( upper ) THEN
4815 ibeg = 1
4816 iend = j
4817 DO 20 i = j+1, m
4818 g( i ) = zero
4819 20 CONTINUE
4820 ELSE
4821 ibeg = 1
4822 iend = m
4823 END IF
4824*
4825 DO 30 i = ibeg, iend
4826*
4827 ioffx = ix + ( jx - 1 ) * ldx + ( i - 1 ) * incx
4828 ioffa = ia + i - 1 + ( ja + j - 2 ) * lda
4829 atmp = x( ioffx ) * y( ioffy )
4830 gtmp = abs( x( ioffx ) * y( ioffy ) )
4831 g( i ) = abs( alpha ) * gtmp + abs( a( ioffa ) )
4832 a( ioffa ) = alpha * atmp + a( ioffa )
4833*
4834 30 CONTINUE
4835*
4836* Compute the error ratio for this result.
4837*
4838 info = 0
4839 err = zero
4840 ldpa = desca( lld_ )
4841 ioffa = ia + ( ja + j - 2 ) * lda
4842 CALL pb_infog2l( ia, ja+j-1, desca, nprow, npcol, myrow, mycol,
4843 $ iia, jja, iarow, iacol )
4844 rowrep = ( iarow.EQ.-1 )
4845 colrep = ( iacol.EQ.-1 )
4846*
4847 IF( mycol.EQ.iacol .OR. colrep ) THEN
4848*
4849 icurrow = iarow
4850 ib = desca( imb_ ) - ia + 1
4851 IF( ib.LE.0 )
4852 $ ib = ( ( -ib ) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
4853 ib = min( ib, m )
4854 in = ia + ib - 1
4855*
4856 DO 40 i = ia, in
4857*
4858 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
4859 erri = abs( pa( iia+(jja-1)*ldpa ) - a( ioffa ) )/eps
4860 IF( g( i-ia+1 ).NE.zero )
4861 $ erri = erri / g( i-ia+1 )
4862 err = max( err, erri )
4863 IF( err*sqrt( eps ).GE.one )
4864 $ info = 1
4865 iia = iia + 1
4866 END IF
4867*
4868 ioffa = ioffa + 1
4869*
4870 40 CONTINUE
4871*
4872 icurrow = mod( icurrow+1, nprow )
4873*
4874 DO 60 i = in+1, ia+m-1, desca( mb_ )
4875 ib = min( ia+m-i, desca( mb_ ) )
4876*
4877 DO 50 kk = 0, ib-1
4878*
4879 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
4880 erri = abs( pa( iia+(jja-1)*ldpa )-a( ioffa ) )/eps
4881 IF( g( i+kk-ia+1 ).NE.zero )
4882 $ erri = erri / g( i+kk-ia+1 )
4883 err = max( err, erri )
4884 IF( err*sqrt( eps ).GE.one )
4885 $ info = 1
4886 iia = iia + 1
4887 END IF
4888*
4889 ioffa = ioffa + 1
4890*
4891 50 CONTINUE
4892*
4893 icurrow = mod( icurrow+1, nprow )
4894*
4895 60 CONTINUE
4896*
4897 END IF
4898*
4899* If INFO = 0, all results are at least half accurate.
4900*
4901 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
4902 CALL dgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
4903 $ mycol )
4904 IF( info.NE.0 )
4905 $ GO TO 80
4906*
4907 70 CONTINUE
4908*
4909 80 CONTINUE
4910*
4911 RETURN
4912*
4913* End of PDVMCH
4914*
4915 END
4916 SUBROUTINE pdvmch2( ICTXT, UPLO, M, N, ALPHA, X, IX, JX, DESCX,
4917 $ INCX, Y, IY, JY, DESCY, INCY, A, PA, IA,
4918 $ JA, DESCA, G, ERR, INFO )
4919*
4920* -- PBLAS test routine (version 2.0) --
4921* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4922* and University of California, Berkeley.
4923* April 1, 1998
4924*
4925* .. Scalar Arguments ..
4926 CHARACTER*1 UPLO
4927 INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX,
4928 $ jy, m, n
4929 DOUBLE PRECISION ALPHA, ERR
4930* ..
4931* .. Array Arguments ..
4932 INTEGER DESCA( * ), DESCX( * ), DESCY( * )
4933 DOUBLE PRECISION A( * ), G( * ), PA( * ), X( * ), Y( * )
4934* ..
4935*
4936* Purpose
4937* =======
4938*
4939* PDVMCH2 checks the results of the computational tests.
4940*
4941* Notes
4942* =====
4943*
4944* A description vector is associated with each 2D block-cyclicly dis-
4945* tributed matrix. This vector stores the information required to
4946* establish the mapping between a matrix entry and its corresponding
4947* process and memory location.
4948*
4949* In the following comments, the character _ should be read as
4950* "of the distributed matrix". Let A be a generic term for any 2D
4951* block cyclicly distributed matrix. Its description vector is DESCA:
4952*
4953* NOTATION STORED IN EXPLANATION
4954* ---------------- --------------- ------------------------------------
4955* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
4956* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
4957* the NPROW x NPCOL BLACS process grid
4958* A is distributed over. The context
4959* itself is global, but the handle
4960* (the integer value) may vary.
4961* M_A (global) DESCA( M_ ) The number of rows in the distribu-
4962* ted matrix A, M_A >= 0.
4963* N_A (global) DESCA( N_ ) The number of columns in the distri-
4964* buted matrix A, N_A >= 0.
4965* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
4966* block of the matrix A, IMB_A > 0.
4967* INB_A (global) DESCA( INB_ ) The number of columns of the upper
4968* left block of the matrix A,
4969* INB_A > 0.
4970* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
4971* bute the last M_A-IMB_A rows of A,
4972* MB_A > 0.
4973* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
4974* bute the last N_A-INB_A columns of
4975* A, NB_A > 0.
4976* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
4977* row of the matrix A is distributed,
4978* NPROW > RSRC_A >= 0.
4979* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
4980* first column of A is distributed.
4981* NPCOL > CSRC_A >= 0.
4982* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
4983* array storing the local blocks of
4984* the distributed matrix A,
4985* IF( Lc( 1, N_A ) > 0 )
4986* LLD_A >= MAX( 1, Lr( 1, M_A ) )
4987* ELSE
4988* LLD_A >= 1.
4989*
4990* Let K be the number of rows of a matrix A starting at the global in-
4991* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
4992* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
4993* receive if these K rows were distributed over NPROW processes. If K
4994* is the number of columns of a matrix A starting at the global index
4995* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
4996* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
4997* these K columns were distributed over NPCOL processes.
4998*
4999* The values of Lr() and Lc() may be determined via a call to the func-
5000* tion PB_NUMROC:
5001* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
5002* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
5003*
5004* Arguments
5005* =========
5006*
5007* ICTXT (local input) INTEGER
5008* On entry, ICTXT specifies the BLACS context handle, indica-
5009* ting the global context of the operation. The context itself
5010* is global, but the value of ICTXT is local.
5011*
5012* UPLO (global input) CHARACTER*1
5013* On entry, UPLO specifies which part of the submatrix sub( A )
5014* is to be referenced as follows:
5015* If UPLO = 'L', only the lower triangular part,
5016* If UPLO = 'U', only the upper triangular part,
5017* else the entire matrix is to be referenced.
5018*
5019* M (global input) INTEGER
5020* On entry, M specifies the number of rows of the submatrix
5021* operand matrix A. M must be at least zero.
5022*
5023* N (global input) INTEGER
5024* On entry, N specifies the number of columns of the subma-
5025* trix operand matrix A. N must be at least zero.
5026*
5027* ALPHA (global input) DOUBLE PRECISION
5028* On entry, ALPHA specifies the scalar alpha.
5029*
5030* X (local input) DOUBLE PRECISION array
5031* On entry, X is an array of dimension (DESCX( M_ ),*). This
5032* array contains a local copy of the initial entire matrix PX.
5033*
5034* IX (global input) INTEGER
5035* On entry, IX specifies X's global row index, which points to
5036* the beginning of the submatrix sub( X ).
5037*
5038* JX (global input) INTEGER
5039* On entry, JX specifies X's global column index, which points
5040* to the beginning of the submatrix sub( X ).
5041*
5042* DESCX (global and local input) INTEGER array
5043* On entry, DESCX is an integer array of dimension DLEN_. This
5044* is the array descriptor for the matrix X.
5045*
5046* INCX (global input) INTEGER
5047* On entry, INCX specifies the global increment for the
5048* elements of X. Only two values of INCX are supported in
5049* this version, namely 1 and M_X. INCX must not be zero.
5050*
5051* Y (local input) DOUBLE PRECISION array
5052* On entry, Y is an array of dimension (DESCY( M_ ),*). This
5053* array contains a local copy of the initial entire matrix PY.
5054*
5055* IY (global input) INTEGER
5056* On entry, IY specifies Y's global row index, which points to
5057* the beginning of the submatrix sub( Y ).
5058*
5059* JY (global input) INTEGER
5060* On entry, JY specifies Y's global column index, which points
5061* to the beginning of the submatrix sub( Y ).
5062*
5063* DESCY (global and local input) INTEGER array
5064* On entry, DESCY is an integer array of dimension DLEN_. This
5065* is the array descriptor for the matrix Y.
5066*
5067* INCY (global input) INTEGER
5068* On entry, INCY specifies the global increment for the
5069* elements of Y. Only two values of INCY are supported in
5070* this version, namely 1 and M_Y. INCY must not be zero.
5071*
5072* A (local input/local output) DOUBLE PRECISION array
5073* On entry, A is an array of dimension (DESCA( M_ ),*). This
5074* array contains a local copy of the initial entire matrix PA.
5075*
5076* PA (local input) DOUBLE PRECISION array
5077* On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
5078* array contains the local entries of the matrix PA.
5079*
5080* IA (global input) INTEGER
5081* On entry, IA specifies A's global row index, which points to
5082* the beginning of the submatrix sub( A ).
5083*
5084* JA (global input) INTEGER
5085* On entry, JA specifies A's global column index, which points
5086* to the beginning of the submatrix sub( A ).
5087*
5088* DESCA (global and local input) INTEGER array
5089* On entry, DESCA is an integer array of dimension DLEN_. This
5090* is the array descriptor for the matrix A.
5091*
5092* G (workspace) DOUBLE PRECISION array
5093* On entry, G is an array of dimension at least MAX( M, N ). G
5094* is used to compute the gauges.
5095*
5096* ERR (global output) DOUBLE PRECISION
5097* On exit, ERR specifies the largest error in absolute value.
5098*
5099* INFO (global output) INTEGER
5100* On exit, if INFO <> 0, the result is less than half accurate.
5101*
5102* -- Written on April 1, 1998 by
5103* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
5104*
5105* =====================================================================
5106*
5107* .. Parameters ..
5108 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
5109 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
5110 $ RSRC_
5111 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
5112 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
5113 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
5114 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
5115 DOUBLE PRECISION ZERO, ONE
5116 PARAMETER ( ZERO = 0.0d+0, one = 1.0d+0 )
5117* ..
5118* .. Local Scalars ..
5119 LOGICAL COLREP, LOWER, ROWREP, UPPER
5120 INTEGER I, IACOL, IAROW, IB, IBEG, ICURROW, IEND, IIA,
5121 $ IN, IOFFA, IOFFXI, IOFFXJ, IOFFYI, IOFFYJ, J,
5122 $ JJA, KK, LDA, LDPA, LDX, LDY, MYCOL, MYROW,
5123 $ npcol, nprow
5124 DOUBLE PRECISION EPS, ERRI, GTMP, ATMP
5125* ..
5126* .. External Subroutines ..
5127 EXTERNAL blacs_gridinfo, dgamx2d, igsum2d, pb_infog2l
5128* ..
5129* .. External Functions ..
5130 LOGICAL LSAME
5131 DOUBLE PRECISION PDLAMCH
5132 EXTERNAL lsame, pdlamch
5133* ..
5134* .. Intrinsic Functions ..
5135 INTRINSIC abs, max, min, mod, sqrt
5136* ..
5137* .. Executable Statements ..
5138*
5139 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
5140*
5141 eps = pdlamch( ictxt, 'eps' )
5142*
5143 upper = lsame( uplo, 'U' )
5144 lower = lsame( uplo, 'L' )
5145*
5146 lda = max( 1, desca( m_ ) )
5147 ldx = max( 1, descx( m_ ) )
5148 ldy = max( 1, descy( m_ ) )
5149*
5150* Compute expected result in A using data in A, X and Y.
5151* Compute gauges in G. This part of the computation is performed
5152* by every process in the grid.
5153*
5154 DO 70 j = 1, n
5155*
5156 ioffxj = ix + ( jx - 1 ) * ldx + ( j - 1 ) * incx
5157 ioffyj = iy + ( jy - 1 ) * ldy + ( j - 1 ) * incy
5158*
5159 IF( lower ) THEN
5160 ibeg = j
5161 iend = m
5162 DO 10 i = 1, j-1
5163 g( i ) = zero
5164 10 CONTINUE
5165 ELSE IF( upper ) THEN
5166 ibeg = 1
5167 iend = j
5168 DO 20 i = j+1, m
5169 g( i ) = zero
5170 20 CONTINUE
5171 ELSE
5172 ibeg = 1
5173 iend = m
5174 END IF
5175*
5176 DO 30 i = ibeg, iend
5177 ioffa = ia + i - 1 + ( ja + j - 2 ) * lda
5178 ioffxi = ix + ( jx - 1 ) * ldx + ( i - 1 ) * incx
5179 ioffyi = iy + ( jy - 1 ) * ldy + ( i - 1 ) * incy
5180 atmp = x( ioffxi ) * y( ioffyj )
5181 atmp = atmp + y( ioffyi ) * x( ioffxj )
5182 gtmp = abs( x( ioffxi ) * y( ioffyj ) )
5183 gtmp = gtmp + abs( y( ioffyi ) * x( ioffxj ) )
5184 g( i ) = abs( alpha ) * gtmp + abs( a( ioffa ) )
5185 a( ioffa ) = alpha*atmp + a( ioffa )
5186*
5187 30 CONTINUE
5188*
5189* Compute the error ratio for this result.
5190*
5191 info = 0
5192 err = zero
5193 ldpa = desca( lld_ )
5194 ioffa = ia + ( ja + j - 2 ) * lda
5195 CALL pb_infog2l( ia, ja+j-1, desca, nprow, npcol, myrow, mycol,
5196 $ iia, jja, iarow, iacol )
5197 rowrep = ( iarow.EQ.-1 )
5198 colrep = ( iacol.EQ.-1 )
5199*
5200 IF( mycol.EQ.iacol .OR. colrep ) THEN
5201*
5202 icurrow = iarow
5203 ib = desca( imb_ ) - ia + 1
5204 IF( ib.LE.0 )
5205 $ ib = ( ( -ib ) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
5206 ib = min( ib, m )
5207 in = ia + ib - 1
5208*
5209 DO 40 i = ia, in
5210*
5211 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5212 erri = abs( pa( iia+(jja-1)*ldpa ) - a( ioffa ) )/eps
5213 IF( g( i-ia+1 ).NE.zero )
5214 $ erri = erri / g( i-ia+1 )
5215 err = max( err, erri )
5216 IF( err*sqrt( eps ).GE.one )
5217 $ info = 1
5218 iia = iia + 1
5219 END IF
5220*
5221 ioffa = ioffa + 1
5222*
5223 40 CONTINUE
5224*
5225 icurrow = mod( icurrow+1, nprow )
5226*
5227 DO 60 i = in+1, ia+m-1, desca( mb_ )
5228 ib = min( ia+m-i, desca( mb_ ) )
5229*
5230 DO 50 kk = 0, ib-1
5231*
5232 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5233 erri = abs( pa( iia+(jja-1)*ldpa )-a( ioffa ) )/eps
5234 IF( g( i+kk-ia+1 ).NE.zero )
5235 $ erri = erri / g( i+kk-ia+1 )
5236 err = max( err, erri )
5237 IF( err*sqrt( eps ).GE.one )
5238 $ info = 1
5239 iia = iia + 1
5240 END IF
5241*
5242 ioffa = ioffa + 1
5243*
5244 50 CONTINUE
5245*
5246 icurrow = mod( icurrow+1, nprow )
5247*
5248 60 CONTINUE
5249*
5250 END IF
5251*
5252* If INFO = 0, all results are at least half accurate.
5253*
5254 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
5255 CALL dgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
5256 $ mycol )
5257 IF( info.NE.0 )
5258 $ GO TO 80
5259*
5260 70 CONTINUE
5261*
5262 80 CONTINUE
5263*
5264 RETURN
5265*
5266* End of PDVMCH2
5267*
5268 END
5269 SUBROUTINE pdmmch( ICTXT, TRANSA, TRANSB, M, N, K, ALPHA, A, IA,
5270 $ JA, DESCA, B, IB, JB, DESCB, BETA, C, PC, IC,
5271 $ JC, DESCC, CT, G, ERR, INFO )
5272*
5273* -- PBLAS test routine (version 2.0) --
5274* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5275* and University of California, Berkeley.
5276* April 1, 1998
5277*
5278* .. Scalar Arguments ..
5279 CHARACTER*1 TRANSA, TRANSB
5280 INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, M, N
5281 DOUBLE PRECISION ALPHA, BETA, ERR
5282* ..
5283* .. Array Arguments ..
5284 INTEGER DESCA( * ), DESCB( * ), DESCC( * )
5285 DOUBLE PRECISION A( * ), B( * ), C( * ), CT( * ), G( * ),
5286 $ PC( * )
5287* ..
5288*
5289* Purpose
5290* =======
5291*
5292* PDMMCH checks the results of the computational tests.
5293*
5294* Notes
5295* =====
5296*
5297* A description vector is associated with each 2D block-cyclicly dis-
5298* tributed matrix. This vector stores the information required to
5299* establish the mapping between a matrix entry and its corresponding
5300* process and memory location.
5301*
5302* In the following comments, the character _ should be read as
5303* "of the distributed matrix". Let A be a generic term for any 2D
5304* block cyclicly distributed matrix. Its description vector is DESCA:
5305*
5306* NOTATION STORED IN EXPLANATION
5307* ---------------- --------------- ------------------------------------
5308* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
5309* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
5310* the NPROW x NPCOL BLACS process grid
5311* A is distributed over. The context
5312* itself is global, but the handle
5313* (the integer value) may vary.
5314* M_A (global) DESCA( M_ ) The number of rows in the distribu-
5315* ted matrix A, M_A >= 0.
5316* N_A (global) DESCA( N_ ) The number of columns in the distri-
5317* buted matrix A, N_A >= 0.
5318* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
5319* block of the matrix A, IMB_A > 0.
5320* INB_A (global) DESCA( INB_ ) The number of columns of the upper
5321* left block of the matrix A,
5322* INB_A > 0.
5323* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
5324* bute the last M_A-IMB_A rows of A,
5325* MB_A > 0.
5326* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
5327* bute the last N_A-INB_A columns of
5328* A, NB_A > 0.
5329* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
5330* row of the matrix A is distributed,
5331* NPROW > RSRC_A >= 0.
5332* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
5333* first column of A is distributed.
5334* NPCOL > CSRC_A >= 0.
5335* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
5336* array storing the local blocks of
5337* the distributed matrix A,
5338* IF( Lc( 1, N_A ) > 0 )
5339* LLD_A >= MAX( 1, Lr( 1, M_A ) )
5340* ELSE
5341* LLD_A >= 1.
5342*
5343* Let K be the number of rows of a matrix A starting at the global in-
5344* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
5345* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
5346* receive if these K rows were distributed over NPROW processes. If K
5347* is the number of columns of a matrix A starting at the global index
5348* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
5349* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
5350* these K columns were distributed over NPCOL processes.
5351*
5352* The values of Lr() and Lc() may be determined via a call to the func-
5353* tion PB_NUMROC:
5354* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
5355* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
5356*
5357* Arguments
5358* =========
5359*
5360* ICTXT (local input) INTEGER
5361* On entry, ICTXT specifies the BLACS context handle, indica-
5362* ting the global context of the operation. The context itself
5363* is global, but the value of ICTXT is local.
5364*
5365* TRANSA (global input) CHARACTER*1
5366* On entry, TRANSA specifies if the matrix operand A is to be
5367* transposed.
5368*
5369* TRANSB (global input) CHARACTER*1
5370* On entry, TRANSB specifies if the matrix operand B is to be
5371* transposed.
5372*
5373* M (global input) INTEGER
5374* On entry, M specifies the number of rows of C.
5375*
5376* N (global input) INTEGER
5377* On entry, N specifies the number of columns of C.
5378*
5379* K (global input) INTEGER
5380* On entry, K specifies the number of columns (resp. rows) of A
5381* when TRANSA = 'N' (resp. TRANSA <> 'N') in PxGEMM, PxSYRK,
5382* PxSYR2K, PxHERK and PxHER2K.
5383*
5384* ALPHA (global input) DOUBLE PRECISION
5385* On entry, ALPHA specifies the scalar alpha.
5386*
5387* A (local input) DOUBLE PRECISION array
5388* On entry, A is an array of dimension (DESCA( M_ ),*). This
5389* array contains a local copy of the initial entire matrix PA.
5390*
5391* IA (global input) INTEGER
5392* On entry, IA specifies A's global row index, which points to
5393* the beginning of the submatrix sub( A ).
5394*
5395* JA (global input) INTEGER
5396* On entry, JA specifies A's global column index, which points
5397* to the beginning of the submatrix sub( A ).
5398*
5399* DESCA (global and local input) INTEGER array
5400* On entry, DESCA is an integer array of dimension DLEN_. This
5401* is the array descriptor for the matrix A.
5402*
5403* B (local input) DOUBLE PRECISION array
5404* On entry, B is an array of dimension (DESCB( M_ ),*). This
5405* array contains a local copy of the initial entire matrix PB.
5406*
5407* IB (global input) INTEGER
5408* On entry, IB specifies B's global row index, which points to
5409* the beginning of the submatrix sub( B ).
5410*
5411* JB (global input) INTEGER
5412* On entry, JB specifies B's global column index, which points
5413* to the beginning of the submatrix sub( B ).
5414*
5415* DESCB (global and local input) INTEGER array
5416* On entry, DESCB is an integer array of dimension DLEN_. This
5417* is the array descriptor for the matrix B.
5418*
5419* BETA (global input) DOUBLE PRECISION
5420* On entry, BETA specifies the scalar beta.
5421*
5422* C (local input/local output) DOUBLE PRECISION array
5423* On entry, C is an array of dimension (DESCC( M_ ),*). This
5424* array contains a local copy of the initial entire matrix PC.
5425*
5426* PC (local input) DOUBLE PRECISION array
5427* On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
5428* array contains the local pieces of the matrix PC.
5429*
5430* IC (global input) INTEGER
5431* On entry, IC specifies C's global row index, which points to
5432* the beginning of the submatrix sub( C ).
5433*
5434* JC (global input) INTEGER
5435* On entry, JC specifies C's global column index, which points
5436* to the beginning of the submatrix sub( C ).
5437*
5438* DESCC (global and local input) INTEGER array
5439* On entry, DESCC is an integer array of dimension DLEN_. This
5440* is the array descriptor for the matrix C.
5441*
5442* CT (workspace) DOUBLE PRECISION array
5443* On entry, CT is an array of dimension at least MAX(M,N,K). CT
5444* holds a copy of the current column of C.
5445*
5446* G (workspace) DOUBLE PRECISION array
5447* On entry, G is an array of dimension at least MAX(M,N,K). G
5448* is used to compute the gauges.
5449*
5450* ERR (global output) DOUBLE PRECISION
5451* On exit, ERR specifies the largest error in absolute value.
5452*
5453* INFO (global output) INTEGER
5454* On exit, if INFO <> 0, the result is less than half accurate.
5455*
5456* -- Written on April 1, 1998 by
5457* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
5458*
5459* =====================================================================
5460*
5461* .. Parameters ..
5462 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
5463 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
5464 $ RSRC_
5465 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
5466 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
5467 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
5468 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
5469 DOUBLE PRECISION ZERO, ONE
5470 PARAMETER ( ZERO = 0.0d+0, one = 1.0d+0 )
5471* ..
5472* .. Local Scalars ..
5473 LOGICAL COLREP, ROWREP, TRANA, TRANB
5474 INTEGER I, IBB, ICCOL, ICROW, ICURROW, IIC, IN, IOFFA,
5475 $ IOFFB, IOFFC, J, JJC, KK, LDA, LDB, LDC, LDPC,
5476 $ mycol, myrow, npcol, nprow
5477 DOUBLE PRECISION EPS, ERRI
5478* ..
5479* .. External Subroutines ..
5480 EXTERNAL blacs_gridinfo, dgamx2d, igsum2d, pb_infog2l
5481* ..
5482* .. External Functions ..
5483 LOGICAL LSAME
5484 DOUBLE PRECISION PDLAMCH
5485 EXTERNAL LSAME, PDLAMCH
5486* ..
5487* .. Intrinsic Functions ..
5488 INTRINSIC abs, max, min, mod, sqrt
5489* ..
5490* .. Executable Statements ..
5491*
5492 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
5493*
5494 eps = pdlamch( ictxt, 'eps' )
5495*
5496 trana = lsame( transa, 'T' ).OR.lsame( transa, 'C' )
5497 tranb = lsame( transb, 'T' ).OR.lsame( transb, 'C' )
5498*
5499 lda = max( 1, desca( m_ ) )
5500 ldb = max( 1, descb( m_ ) )
5501 ldc = max( 1, descc( m_ ) )
5502*
5503* Compute expected result in C using data in A, B and C.
5504* Compute gauges in G. This part of the computation is performed
5505* by every process in the grid.
5506*
5507 DO 240 j = 1, n
5508*
5509 ioffc = ic + ( jc + j - 2 ) * ldc
5510 DO 10 i = 1, m
5511 ct( i ) = zero
5512 g( i ) = zero
5513 10 CONTINUE
5514*
5515 IF( .NOT.trana .AND. .NOT.tranb ) THEN
5516 DO 30 kk = 1, k
5517 ioffb = ib + kk - 1 + ( jb + j - 2 ) * ldb
5518 DO 20 i = 1, m
5519 ioffa = ia + i - 1 + ( ja + kk - 2 ) * lda
5520 ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5521 g( i ) = g( i ) + abs( a( ioffa ) ) *
5522 $ abs( b( ioffb ) )
5523 20 CONTINUE
5524 30 CONTINUE
5525 ELSE IF( trana .AND. .NOT.tranb ) THEN
5526 DO 50 kk = 1, k
5527 ioffb = ib + kk - 1 + ( jb + j - 2 ) * ldb
5528 DO 40 i = 1, m
5529 ioffa = ia + kk - 1 + ( ja + i - 2 ) * lda
5530 ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5531 g( i ) = g( i ) + abs( a( ioffa ) ) *
5532 $ abs( b( ioffb ) )
5533 40 CONTINUE
5534 50 CONTINUE
5535 ELSE IF( .NOT.trana .AND. tranb ) THEN
5536 DO 70 kk = 1, k
5537 ioffb = ib + j - 1 + ( jb + kk - 2 ) * ldb
5538 DO 60 i = 1, m
5539 ioffa = ia + i - 1 + ( ja + kk - 2 ) * lda
5540 ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5541 g( i ) = g( i ) + abs( a( ioffa ) ) *
5542 $ abs( b( ioffb ) )
5543 60 CONTINUE
5544 70 CONTINUE
5545 ELSE IF( trana .AND. tranb ) THEN
5546 DO 90 kk = 1, k
5547 ioffb = ib + j - 1 + ( jb + kk - 2 ) * ldb
5548 DO 80 i = 1, m
5549 ioffa = ia + kk - 1 + ( ja + i - 2 ) * lda
5550 ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5551 g( i ) = g( i ) + abs( a( ioffa ) ) *
5552 $ abs( b( ioffb ) )
5553 80 CONTINUE
5554 90 CONTINUE
5555 END IF
5556*
5557 DO 200 i = 1, m
5558 ct( i ) = alpha*ct( i ) + beta * c( ioffc )
5559 g( i ) = abs( alpha )*g( i ) + abs( beta )*abs( c( ioffc ) )
5560 c( ioffc ) = ct( i )
5561 ioffc = ioffc + 1
5562 200 CONTINUE
5563*
5564* Compute the error ratio for this result.
5565*
5566 err = zero
5567 info = 0
5568 ldpc = descc( lld_ )
5569 ioffc = ic + ( jc + j - 2 ) * ldc
5570 CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
5571 $ iic, jjc, icrow, iccol )
5572 icurrow = icrow
5573 rowrep = ( icrow.EQ.-1 )
5574 colrep = ( iccol.EQ.-1 )
5575*
5576 IF( mycol.EQ.iccol .OR. colrep ) THEN
5577*
5578 ibb = descc( imb_ ) - ic + 1
5579 IF( ibb.LE.0 )
5580 $ ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
5581 ibb = min( ibb, m )
5582 in = ic + ibb - 1
5583*
5584 DO 210 i = ic, in
5585*
5586 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5587 erri = abs( pc( iic+(jjc-1)*ldpc ) -
5588 $ c( ioffc ) ) / eps
5589 IF( g( i-ic+1 ).NE.zero )
5590 $ erri = erri / g( i-ic+1 )
5591 err = max( err, erri )
5592 IF( err*sqrt( eps ).GE.one )
5593 $ info = 1
5594 iic = iic + 1
5595 END IF
5596*
5597 ioffc = ioffc + 1
5598*
5599 210 CONTINUE
5600*
5601 icurrow = mod( icurrow+1, nprow )
5602*
5603 DO 230 i = in+1, ic+m-1, descc( mb_ )
5604 ibb = min( ic+m-i, descc( mb_ ) )
5605*
5606 DO 220 kk = 0, ibb-1
5607*
5608 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5609 erri = abs( pc( iic+(jjc-1)*ldpc ) -
5610 $ c( ioffc ) )/eps
5611 IF( g( i+kk-ic+1 ).NE.zero )
5612 $ erri = erri / g( i+kk-ic+1 )
5613 err = max( err, erri )
5614 IF( err*sqrt( eps ).GE.one )
5615 $ info = 1
5616 iic = iic + 1
5617 END IF
5618*
5619 ioffc = ioffc + 1
5620*
5621 220 CONTINUE
5622*
5623 icurrow = mod( icurrow+1, nprow )
5624*
5625 230 CONTINUE
5626*
5627 END IF
5628*
5629* If INFO = 0, all results are at least half accurate.
5630*
5631 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
5632 CALL dgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
5633 $ mycol )
5634 IF( info.NE.0 )
5635 $ GO TO 250
5636*
5637 240 CONTINUE
5638*
5639 250 CONTINUE
5640*
5641 RETURN
5642*
5643* End of PDMMCH
5644*
5645 END
5646 SUBROUTINE pdmmch1( ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA,
5647 $ DESCA, BETA, C, PC, IC, JC, DESCC, CT, G,
5648 $ ERR, INFO )
5649*
5650* -- PBLAS test routine (version 2.0) --
5651* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5652* and University of California, Berkeley.
5653* April 1, 1998
5654*
5655* .. Scalar Arguments ..
5656 CHARACTER*1 TRANS, UPLO
5657 INTEGER IA, IC, ICTXT, INFO, JA, JC, K, N
5658 DOUBLE PRECISION ALPHA, BETA, ERR
5659* ..
5660* .. Array Arguments ..
5661 INTEGER DESCA( * ), DESCC( * )
5662 DOUBLE PRECISION A( * ), C( * ), CT( * ), G( * ), PC( * )
5663* ..
5664*
5665* Purpose
5666* =======
5667*
5668* PDMMCH1 checks the results of the computational tests.
5669*
5670* Notes
5671* =====
5672*
5673* A description vector is associated with each 2D block-cyclicly dis-
5674* tributed matrix. This vector stores the information required to
5675* establish the mapping between a matrix entry and its corresponding
5676* process and memory location.
5677*
5678* In the following comments, the character _ should be read as
5679* "of the distributed matrix". Let A be a generic term for any 2D
5680* block cyclicly distributed matrix. Its description vector is DESCA:
5681*
5682* NOTATION STORED IN EXPLANATION
5683* ---------------- --------------- ------------------------------------
5684* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
5685* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
5686* the NPROW x NPCOL BLACS process grid
5687* A is distributed over. The context
5688* itself is global, but the handle
5689* (the integer value) may vary.
5690* M_A (global) DESCA( M_ ) The number of rows in the distribu-
5691* ted matrix A, M_A >= 0.
5692* N_A (global) DESCA( N_ ) The number of columns in the distri-
5693* buted matrix A, N_A >= 0.
5694* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
5695* block of the matrix A, IMB_A > 0.
5696* INB_A (global) DESCA( INB_ ) The number of columns of the upper
5697* left block of the matrix A,
5698* INB_A > 0.
5699* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
5700* bute the last M_A-IMB_A rows of A,
5701* MB_A > 0.
5702* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
5703* bute the last N_A-INB_A columns of
5704* A, NB_A > 0.
5705* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
5706* row of the matrix A is distributed,
5707* NPROW > RSRC_A >= 0.
5708* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
5709* first column of A is distributed.
5710* NPCOL > CSRC_A >= 0.
5711* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
5712* array storing the local blocks of
5713* the distributed matrix A,
5714* IF( Lc( 1, N_A ) > 0 )
5715* LLD_A >= MAX( 1, Lr( 1, M_A ) )
5716* ELSE
5717* LLD_A >= 1.
5718*
5719* Let K be the number of rows of a matrix A starting at the global in-
5720* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
5721* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
5722* receive if these K rows were distributed over NPROW processes. If K
5723* is the number of columns of a matrix A starting at the global index
5724* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
5725* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
5726* these K columns were distributed over NPCOL processes.
5727*
5728* The values of Lr() and Lc() may be determined via a call to the func-
5729* tion PB_NUMROC:
5730* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
5731* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
5732*
5733* Arguments
5734* =========
5735*
5736* ICTXT (local input) INTEGER
5737* On entry, ICTXT specifies the BLACS context handle, indica-
5738* ting the global context of the operation. The context itself
5739* is global, but the value of ICTXT is local.
5740*
5741* UPLO (global input) CHARACTER*1
5742* On entry, UPLO specifies which part of C should contain the
5743* result.
5744*
5745* TRANS (global input) CHARACTER*1
5746* On entry, TRANS specifies whether the matrix A has to be
5747* transposed or not before computing the matrix-matrix product.
5748*
5749* N (global input) INTEGER
5750* On entry, N specifies the order the submatrix operand C. N
5751* must be at least zero.
5752*
5753* K (global input) INTEGER
5754* On entry, K specifies the number of columns (resp. rows) of A
5755* when TRANS = 'N' (resp. TRANS <> 'N'). K must be at least
5756* zero.
5757*
5758* ALPHA (global input) DOUBLE PRECISION
5759* On entry, ALPHA specifies the scalar alpha.
5760*
5761* A (local input) DOUBLE PRECISION array
5762* On entry, A is an array of dimension (DESCA( M_ ),*). This
5763* array contains a local copy of the initial entire matrix PA.
5764*
5765* IA (global input) INTEGER
5766* On entry, IA specifies A's global row index, which points to
5767* the beginning of the submatrix sub( A ).
5768*
5769* JA (global input) INTEGER
5770* On entry, JA specifies A's global column index, which points
5771* to the beginning of the submatrix sub( A ).
5772*
5773* DESCA (global and local input) INTEGER array
5774* On entry, DESCA is an integer array of dimension DLEN_. This
5775* is the array descriptor for the matrix A.
5776*
5777* BETA (global input) DOUBLE PRECISION
5778* On entry, BETA specifies the scalar beta.
5779*
5780* C (local input/local output) DOUBLE PRECISION array
5781* On entry, C is an array of dimension (DESCC( M_ ),*). This
5782* array contains a local copy of the initial entire matrix PC.
5783*
5784* PC (local input) DOUBLE PRECISION array
5785* On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
5786* array contains the local pieces of the matrix PC.
5787*
5788* IC (global input) INTEGER
5789* On entry, IC specifies C's global row index, which points to
5790* the beginning of the submatrix sub( C ).
5791*
5792* JC (global input) INTEGER
5793* On entry, JC specifies C's global column index, which points
5794* to the beginning of the submatrix sub( C ).
5795*
5796* DESCC (global and local input) INTEGER array
5797* On entry, DESCC is an integer array of dimension DLEN_. This
5798* is the array descriptor for the matrix C.
5799*
5800* CT (workspace) DOUBLE PRECISION array
5801* On entry, CT is an array of dimension at least MAX(M,N,K). CT
5802* holds a copy of the current column of C.
5803*
5804* G (workspace) DOUBLE PRECISION array
5805* On entry, G is an array of dimension at least MAX(M,N,K). G
5806* is used to compute the gauges.
5807*
5808* ERR (global output) DOUBLE PRECISION
5809* On exit, ERR specifies the largest error in absolute value.
5810*
5811* INFO (global output) INTEGER
5812* On exit, if INFO <> 0, the result is less than half accurate.
5813*
5814* -- Written on April 1, 1998 by
5815* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
5816*
5817* =====================================================================
5818*
5819* .. Parameters ..
5820 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
5821 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
5822 $ RSRC_
5823 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
5824 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
5825 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
5826 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
5827 DOUBLE PRECISION ZERO, ONE
5828 PARAMETER ( ZERO = 0.0d+0, one = 1.0d+0 )
5829* ..
5830* .. Local Scalars ..
5831 LOGICAL COLREP, NOTRAN, ROWREP, TRAN, UPPER
5832 INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC,
5833 $ IN, IOFFAK, IOFFAN, IOFFC, J, JJC, KK, LDA,
5834 $ LDC, LDPC, MYCOL, MYROW, NPCOL, NPROW
5835 DOUBLE PRECISION EPS, ERRI
5836* ..
5837* .. External Subroutines ..
5838 EXTERNAL blacs_gridinfo, dgamx2d, igsum2d, pb_infog2l
5839* ..
5840* .. External Functions ..
5841 LOGICAL LSAME
5842 DOUBLE PRECISION PDLAMCH
5843 EXTERNAL lsame, pdlamch
5844* ..
5845* .. Intrinsic Functions ..
5846 INTRINSIC abs, max, min, mod, sqrt
5847* ..
5848* .. Executable Statements ..
5849*
5850 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
5851*
5852 eps = pdlamch( ictxt, 'eps' )
5853*
5854 upper = lsame( uplo, 'U' )
5855 notran = lsame( trans, 'N' )
5856 tran = lsame( trans, 'T' )
5857*
5858 lda = max( 1, desca( m_ ) )
5859 ldc = max( 1, descc( m_ ) )
5860*
5861* Compute expected result in C using data in A, B and C.
5862* Compute gauges in G. This part of the computation is performed
5863* by every process in the grid.
5864*
5865 DO 140 j = 1, n
5866*
5867 IF( upper ) THEN
5868 ibeg = 1
5869 iend = j
5870 ELSE
5871 ibeg = j
5872 iend = n
5873 END IF
5874*
5875 DO 10 i = 1, n
5876 ct( i ) = zero
5877 g( i ) = zero
5878 10 CONTINUE
5879*
5880 IF( notran ) THEN
5881 DO 30 kk = 1, k
5882 ioffak = ia + j - 1 + ( ja + kk - 2 ) * lda
5883 DO 20 i = ibeg, iend
5884 ioffan = ia + i - 1 + ( ja + kk - 2 ) * lda
5885 ct( i ) = ct( i ) + a( ioffak ) * a( ioffan )
5886 g( i ) = g( i ) + abs( a( ioffak ) ) *
5887 $ abs( a( ioffan ) )
5888 20 CONTINUE
5889 30 CONTINUE
5890 ELSE IF( tran ) THEN
5891 DO 50 kk = 1, k
5892 ioffak = ia + kk - 1 + ( ja + j - 2 ) * lda
5893 DO 40 i = ibeg, iend
5894 ioffan = ia + kk - 1 + ( ja + i - 2 ) * lda
5895 ct( i ) = ct( i ) + a( ioffak ) * a( ioffan )
5896 g( i ) = g( i ) + abs( a( ioffak ) ) *
5897 $ abs( a( ioffan ) )
5898 40 CONTINUE
5899 50 CONTINUE
5900 END IF
5901*
5902 ioffc = ic + ibeg - 1 + ( jc + j - 2 ) * ldc
5903*
5904 DO 100 i = ibeg, iend
5905 ct( i ) = alpha*ct( i ) + beta * c( ioffc )
5906 g( i ) = abs( alpha )*g( i ) + abs( beta )*abs( c( ioffc ) )
5907 c( ioffc ) = ct( i )
5908 ioffc = ioffc + 1
5909 100 CONTINUE
5910*
5911* Compute the error ratio for this result.
5912*
5913 err = zero
5914 info = 0
5915 ldpc = descc( lld_ )
5916 ioffc = ic + ( jc + j - 2 ) * ldc
5917 CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
5918 $ iic, jjc, icrow, iccol )
5919 icurrow = icrow
5920 rowrep = ( icrow.EQ.-1 )
5921 colrep = ( iccol.EQ.-1 )
5922*
5923 IF( mycol.EQ.iccol .OR. colrep ) THEN
5924*
5925 ibb = descc( imb_ ) - ic + 1
5926 IF( ibb.LE.0 )
5927 $ ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
5928 ibb = min( ibb, n )
5929 in = ic + ibb - 1
5930*
5931 DO 110 i = ic, in
5932*
5933 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5934 erri = abs( pc( iic+(jjc-1)*ldpc ) -
5935 $ c( ioffc ) ) / eps
5936 IF( g( i-ic+1 ).NE.zero )
5937 $ erri = erri / g( i-ic+1 )
5938 err = max( err, erri )
5939 IF( err*sqrt( eps ).GE.one )
5940 $ info = 1
5941 iic = iic + 1
5942 END IF
5943*
5944 ioffc = ioffc + 1
5945*
5946 110 CONTINUE
5947*
5948 icurrow = mod( icurrow+1, nprow )
5949*
5950 DO 130 i = in+1, ic+n-1, descc( mb_ )
5951 ibb = min( ic+n-i, descc( mb_ ) )
5952*
5953 DO 120 kk = 0, ibb-1
5954*
5955 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5956 erri = abs( pc( iic+(jjc-1)*ldpc ) -
5957 $ c( ioffc ) )/eps
5958 IF( g( i+kk-ic+1 ).NE.zero )
5959 $ erri = erri / g( i+kk-ic+1 )
5960 err = max( err, erri )
5961 IF( err*sqrt( eps ).GE.one )
5962 $ info = 1
5963 iic = iic + 1
5964 END IF
5965*
5966 ioffc = ioffc + 1
5967*
5968 120 CONTINUE
5969*
5970 icurrow = mod( icurrow+1, nprow )
5971*
5972 130 CONTINUE
5973*
5974 END IF
5975*
5976* If INFO = 0, all results are at least half accurate.
5977*
5978 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
5979 CALL dgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
5980 $ mycol )
5981 IF( info.NE.0 )
5982 $ GO TO 150
5983*
5984 140 CONTINUE
5985*
5986 150 CONTINUE
5987*
5988 RETURN
5989*
5990* End of PDMMCH1
5991*
5992 END
5993 SUBROUTINE pdmmch2( ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA,
5994 $ DESCA, B, IB, JB, DESCB, BETA, C, PC, IC,
5995 $ JC, DESCC, CT, G, ERR, INFO )
5996*
5997* -- PBLAS test routine (version 2.0) --
5998* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5999* and University of California, Berkeley.
6000* April 1, 1998
6001*
6002* .. Scalar Arguments ..
6003 CHARACTER*1 TRANS, UPLO
6004 INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, N
6005 DOUBLE PRECISION ALPHA, BETA, ERR
6006* ..
6007* .. Array Arguments ..
6008 INTEGER DESCA( * ), DESCB( * ), DESCC( * )
6009 DOUBLE PRECISION A( * ), B( * ), C( * ), CT( * ), G( * ),
6010 $ pc( * )
6011* ..
6012*
6013* Purpose
6014* =======
6015*
6016* PDMMCH2 checks the results of the computational tests.
6017*
6018* Notes
6019* =====
6020*
6021* A description vector is associated with each 2D block-cyclicly dis-
6022* tributed matrix. This vector stores the information required to
6023* establish the mapping between a matrix entry and its corresponding
6024* process and memory location.
6025*
6026* In the following comments, the character _ should be read as
6027* "of the distributed matrix". Let A be a generic term for any 2D
6028* block cyclicly distributed matrix. Its description vector is DESCA:
6029*
6030* NOTATION STORED IN EXPLANATION
6031* ---------------- --------------- ------------------------------------
6032* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
6033* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
6034* the NPROW x NPCOL BLACS process grid
6035* A is distributed over. The context
6036* itself is global, but the handle
6037* (the integer value) may vary.
6038* M_A (global) DESCA( M_ ) The number of rows in the distribu-
6039* ted matrix A, M_A >= 0.
6040* N_A (global) DESCA( N_ ) The number of columns in the distri-
6041* buted matrix A, N_A >= 0.
6042* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
6043* block of the matrix A, IMB_A > 0.
6044* INB_A (global) DESCA( INB_ ) The number of columns of the upper
6045* left block of the matrix A,
6046* INB_A > 0.
6047* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
6048* bute the last M_A-IMB_A rows of A,
6049* MB_A > 0.
6050* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
6051* bute the last N_A-INB_A columns of
6052* A, NB_A > 0.
6053* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
6054* row of the matrix A is distributed,
6055* NPROW > RSRC_A >= 0.
6056* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
6057* first column of A is distributed.
6058* NPCOL > CSRC_A >= 0.
6059* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
6060* array storing the local blocks of
6061* the distributed matrix A,
6062* IF( Lc( 1, N_A ) > 0 )
6063* LLD_A >= MAX( 1, Lr( 1, M_A ) )
6064* ELSE
6065* LLD_A >= 1.
6066*
6067* Let K be the number of rows of a matrix A starting at the global in-
6068* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
6069* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
6070* receive if these K rows were distributed over NPROW processes. If K
6071* is the number of columns of a matrix A starting at the global index
6072* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
6073* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
6074* these K columns were distributed over NPCOL processes.
6075*
6076* The values of Lr() and Lc() may be determined via a call to the func-
6077* tion PB_NUMROC:
6078* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
6079* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
6080*
6081* Arguments
6082* =========
6083*
6084* ICTXT (local input) INTEGER
6085* On entry, ICTXT specifies the BLACS context handle, indica-
6086* ting the global context of the operation. The context itself
6087* is global, but the value of ICTXT is local.
6088*
6089* UPLO (global input) CHARACTER*1
6090* On entry, UPLO specifies which part of C should contain the
6091* result.
6092*
6093* TRANS (global input) CHARACTER*1
6094* On entry, TRANS specifies whether the matrices A and B have
6095* to be transposed or not before computing the matrix-matrix
6096* product.
6097*
6098* N (global input) INTEGER
6099* On entry, N specifies the order the submatrix operand C. N
6100* must be at least zero.
6101*
6102* K (global input) INTEGER
6103* On entry, K specifies the number of columns (resp. rows) of A
6104* and B when TRANS = 'N' (resp. TRANS <> 'N'). K must be at
6105* least zero.
6106*
6107* ALPHA (global input) DOUBLE PRECISION
6108* On entry, ALPHA specifies the scalar alpha.
6109*
6110* A (local input) DOUBLE PRECISION array
6111* On entry, A is an array of dimension (DESCA( M_ ),*). This
6112* array contains a local copy of the initial entire matrix PA.
6113*
6114* IA (global input) INTEGER
6115* On entry, IA specifies A's global row index, which points to
6116* the beginning of the submatrix sub( A ).
6117*
6118* JA (global input) INTEGER
6119* On entry, JA specifies A's global column index, which points
6120* to the beginning of the submatrix sub( A ).
6121*
6122* DESCA (global and local input) INTEGER array
6123* On entry, DESCA is an integer array of dimension DLEN_. This
6124* is the array descriptor for the matrix A.
6125*
6126* B (local input) DOUBLE PRECISION array
6127* On entry, B is an array of dimension (DESCB( M_ ),*). This
6128* array contains a local copy of the initial entire matrix PB.
6129*
6130* IB (global input) INTEGER
6131* On entry, IB specifies B's global row index, which points to
6132* the beginning of the submatrix sub( B ).
6133*
6134* JB (global input) INTEGER
6135* On entry, JB specifies B's global column index, which points
6136* to the beginning of the submatrix sub( B ).
6137*
6138* DESCB (global and local input) INTEGER array
6139* On entry, DESCB is an integer array of dimension DLEN_. This
6140* is the array descriptor for the matrix B.
6141*
6142* BETA (global input) DOUBLE PRECISION
6143* On entry, BETA specifies the scalar beta.
6144*
6145* C (local input/local output) DOUBLE PRECISION array
6146* On entry, C is an array of dimension (DESCC( M_ ),*). This
6147* array contains a local copy of the initial entire matrix PC.
6148*
6149* PC (local input) DOUBLE PRECISION array
6150* On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
6151* array contains the local pieces of the matrix PC.
6152*
6153* IC (global input) INTEGER
6154* On entry, IC specifies C's global row index, which points to
6155* the beginning of the submatrix sub( C ).
6156*
6157* JC (global input) INTEGER
6158* On entry, JC specifies C's global column index, which points
6159* to the beginning of the submatrix sub( C ).
6160*
6161* DESCC (global and local input) INTEGER array
6162* On entry, DESCC is an integer array of dimension DLEN_. This
6163* is the array descriptor for the matrix C.
6164*
6165* CT (workspace) DOUBLE PRECISION array
6166* On entry, CT is an array of dimension at least MAX(M,N,K). CT
6167* holds a copy of the current column of C.
6168*
6169* G (workspace) DOUBLE PRECISION array
6170* On entry, G is an array of dimension at least MAX(M,N,K). G
6171* is used to compute the gauges.
6172*
6173* ERR (global output) DOUBLE PRECISION
6174* On exit, ERR specifies the largest error in absolute value.
6175*
6176* INFO (global output) INTEGER
6177* On exit, if INFO <> 0, the result is less than half accurate.
6178*
6179* -- Written on April 1, 1998 by
6180* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6181*
6182* =====================================================================
6183*
6184* .. Parameters ..
6185 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
6186 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
6187 $ RSRC_
6188 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
6189 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
6190 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
6191 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
6192 DOUBLE PRECISION ZERO, ONE
6193 PARAMETER ( ZERO = 0.0d+0, one = 1.0d+0 )
6194* ..
6195* .. Local Scalars ..
6196 LOGICAL COLREP, NOTRAN, ROWREP, TRAN, UPPER
6197 INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC,
6198 $ IN, IOFFAK, IOFFAN, IOFFBK, IOFFBN, IOFFC, J,
6199 $ JJC, KK, LDA, LDB, LDC, LDPC, MYCOL, MYROW,
6200 $ NPCOL, NPROW
6201 DOUBLE PRECISION EPS, ERRI
6202* ..
6203* .. External Subroutines ..
6204 EXTERNAL blacs_gridinfo, dgamx2d, igsum2d, pb_infog2l
6205* ..
6206* .. External Functions ..
6207 LOGICAL LSAME
6208 DOUBLE PRECISION PDLAMCH
6209 EXTERNAL LSAME, PDLAMCH
6210* ..
6211* .. Intrinsic Functions ..
6212 INTRINSIC abs, max, min, mod, sqrt
6213* ..
6214* .. Executable Statements ..
6215*
6216 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
6217*
6218 eps = pdlamch( ictxt, 'eps' )
6219*
6220 upper = lsame( uplo, 'U' )
6221 notran = lsame( trans, 'N' )
6222 tran = lsame( trans, 'T' )
6223*
6224 lda = max( 1, desca( m_ ) )
6225 ldb = max( 1, descb( m_ ) )
6226 ldc = max( 1, descc( m_ ) )
6227*
6228* Compute expected result in C using data in A, B and C.
6229* Compute gauges in G. This part of the computation is performed
6230* by every process in the grid.
6231*
6232 DO 140 j = 1, n
6233*
6234 IF( upper ) THEN
6235 ibeg = 1
6236 iend = j
6237 ELSE
6238 ibeg = j
6239 iend = n
6240 END IF
6241*
6242 DO 10 i = 1, n
6243 ct( i ) = zero
6244 g( i ) = zero
6245 10 CONTINUE
6246*
6247 IF( notran ) THEN
6248 DO 30 kk = 1, k
6249 ioffak = ia + j - 1 + ( ja + kk - 2 ) * lda
6250 ioffbk = ib + j - 1 + ( jb + kk - 2 ) * ldb
6251 DO 20 i = ibeg, iend
6252 ioffan = ia + i - 1 + ( ja + kk - 2 ) * lda
6253 ioffbn = ib + i - 1 + ( jb + kk - 2 ) * ldb
6254 ct( i ) = ct( i ) + alpha * (
6255 $ a( ioffan ) * b( ioffbk ) +
6256 $ b( ioffbn ) * a( ioffak ) )
6257 g( i ) = g( i ) + abs( alpha ) * (
6258 $ abs( a( ioffan ) ) * abs( b( ioffbk ) ) +
6259 $ abs( b( ioffbn ) ) * abs( a( ioffak ) ) )
6260 20 CONTINUE
6261 30 CONTINUE
6262 ELSE IF( tran ) THEN
6263 DO 50 kk = 1, k
6264 ioffak = ia + kk - 1 + ( ja + j - 2 ) * lda
6265 ioffbk = ib + kk - 1 + ( jb + j - 2 ) * ldb
6266 DO 40 i = ibeg, iend
6267 ioffan = ia + kk - 1 + ( ja + i - 2 ) * lda
6268 ioffbn = ib + kk - 1 + ( jb + i - 2 ) * ldb
6269 ct( i ) = ct( i ) + alpha * (
6270 $ a( ioffan ) * b( ioffbk ) +
6271 $ b( ioffbn ) * a( ioffak ) )
6272 g( i ) = g( i ) + abs( alpha ) * (
6273 $ abs( a( ioffan ) ) * abs( b( ioffbk ) ) +
6274 $ abs( b( ioffbn ) ) * abs( a( ioffak ) ) )
6275 40 CONTINUE
6276 50 CONTINUE
6277 END IF
6278*
6279 ioffc = ic + ibeg - 1 + ( jc + j - 2 ) * ldc
6280*
6281 DO 100 i = ibeg, iend
6282 ct( i ) = ct( i ) + beta * c( ioffc )
6283 g( i ) = g( i ) + abs( beta )*abs( c( ioffc ) )
6284 c( ioffc ) = ct( i )
6285 ioffc = ioffc + 1
6286 100 CONTINUE
6287*
6288* Compute the error ratio for this result.
6289*
6290 err = zero
6291 info = 0
6292 ldpc = descc( lld_ )
6293 ioffc = ic + ( jc + j - 2 ) * ldc
6294 CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
6295 $ iic, jjc, icrow, iccol )
6296 icurrow = icrow
6297 rowrep = ( icrow.EQ.-1 )
6298 colrep = ( iccol.EQ.-1 )
6299*
6300 IF( mycol.EQ.iccol .OR. colrep ) THEN
6301*
6302 ibb = descc( imb_ ) - ic + 1
6303 IF( ibb.LE.0 )
6304 $ ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
6305 ibb = min( ibb, n )
6306 in = ic + ibb - 1
6307*
6308 DO 110 i = ic, in
6309*
6310 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
6311 erri = abs( pc( iic+(jjc-1)*ldpc ) -
6312 $ c( ioffc ) ) / eps
6313 IF( g( i-ic+1 ).NE.zero )
6314 $ erri = erri / g( i-ic+1 )
6315 err = max( err, erri )
6316 IF( err*sqrt( eps ).GE.one )
6317 $ info = 1
6318 iic = iic + 1
6319 END IF
6320*
6321 ioffc = ioffc + 1
6322*
6323 110 CONTINUE
6324*
6325 icurrow = mod( icurrow+1, nprow )
6326*
6327 DO 130 i = in+1, ic+n-1, descc( mb_ )
6328 ibb = min( ic+n-i, descc( mb_ ) )
6329*
6330 DO 120 kk = 0, ibb-1
6331*
6332 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
6333 erri = abs( pc( iic+(jjc-1)*ldpc ) -
6334 $ c( ioffc ) )/eps
6335 IF( g( i+kk-ic+1 ).NE.zero )
6336 $ erri = erri / g( i+kk-ic+1 )
6337 err = max( err, erri )
6338 IF( err*sqrt( eps ).GE.one )
6339 $ info = 1
6340 iic = iic + 1
6341 END IF
6342*
6343 ioffc = ioffc + 1
6344*
6345 120 CONTINUE
6346*
6347 icurrow = mod( icurrow+1, nprow )
6348*
6349 130 CONTINUE
6350*
6351 END IF
6352*
6353* If INFO = 0, all results are at least half accurate.
6354*
6355 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
6356 CALL dgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
6357 $ mycol )
6358 IF( info.NE.0 )
6359 $ GO TO 150
6360*
6361 140 CONTINUE
6362*
6363 150 CONTINUE
6364*
6365 RETURN
6366*
6367* End of PDMMCH2
6368*
6369 END
6370 SUBROUTINE pdmmch3( UPLO, TRANS, M, N, ALPHA, A, IA, JA, DESCA,
6371 $ BETA, C, PC, IC, JC, DESCC, ERR, INFO )
6372*
6373* -- PBLAS test routine (version 2.0) --
6374* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6375* and University of California, Berkeley.
6376* April 1, 1998
6377*
6378* .. Scalar Arguments ..
6379 CHARACTER*1 TRANS, UPLO
6380 INTEGER IA, IC, INFO, JA, JC, M, N
6381 DOUBLE PRECISION ALPHA, BETA, ERR
6382* ..
6383* .. Array Arguments ..
6384 INTEGER DESCA( * ), DESCC( * )
6385 DOUBLE PRECISION A( * ), C( * ), PC( * )
6386* ..
6387*
6388* Purpose
6389* =======
6390*
6391* PDMMCH3 checks the results of the computational tests.
6392*
6393* Notes
6394* =====
6395*
6396* A description vector is associated with each 2D block-cyclicly dis-
6397* tributed matrix. This vector stores the information required to
6398* establish the mapping between a matrix entry and its corresponding
6399* process and memory location.
6400*
6401* In the following comments, the character _ should be read as
6402* "of the distributed matrix". Let A be a generic term for any 2D
6403* block cyclicly distributed matrix. Its description vector is DESCA:
6404*
6405* NOTATION STORED IN EXPLANATION
6406* ---------------- --------------- ------------------------------------
6407* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
6408* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
6409* the NPROW x NPCOL BLACS process grid
6410* A is distributed over. The context
6411* itself is global, but the handle
6412* (the integer value) may vary.
6413* M_A (global) DESCA( M_ ) The number of rows in the distribu-
6414* ted matrix A, M_A >= 0.
6415* N_A (global) DESCA( N_ ) The number of columns in the distri-
6416* buted matrix A, N_A >= 0.
6417* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
6418* block of the matrix A, IMB_A > 0.
6419* INB_A (global) DESCA( INB_ ) The number of columns of the upper
6420* left block of the matrix A,
6421* INB_A > 0.
6422* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
6423* bute the last M_A-IMB_A rows of A,
6424* MB_A > 0.
6425* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
6426* bute the last N_A-INB_A columns of
6427* A, NB_A > 0.
6428* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
6429* row of the matrix A is distributed,
6430* NPROW > RSRC_A >= 0.
6431* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
6432* first column of A is distributed.
6433* NPCOL > CSRC_A >= 0.
6434* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
6435* array storing the local blocks of
6436* the distributed matrix A,
6437* IF( Lc( 1, N_A ) > 0 )
6438* LLD_A >= MAX( 1, Lr( 1, M_A ) )
6439* ELSE
6440* LLD_A >= 1.
6441*
6442* Let K be the number of rows of a matrix A starting at the global in-
6443* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
6444* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
6445* receive if these K rows were distributed over NPROW processes. If K
6446* is the number of columns of a matrix A starting at the global index
6447* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
6448* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
6449* these K columns were distributed over NPCOL processes.
6450*
6451* The values of Lr() and Lc() may be determined via a call to the func-
6452* tion PB_NUMROC:
6453* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
6454* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
6455*
6456* Arguments
6457* =========
6458*
6459* UPLO (global input) CHARACTER*1
6460* On entry, UPLO specifies which part of C should contain the
6461* result.
6462*
6463* TRANS (global input) CHARACTER*1
6464* On entry, TRANS specifies whether the matrix A has to be
6465* transposed or not before computing the matrix-matrix addi-
6466* tion.
6467*
6468* M (global input) INTEGER
6469* On entry, M specifies the number of rows of C.
6470*
6471* N (global input) INTEGER
6472* On entry, N specifies the number of columns of C.
6473*
6474* ALPHA (global input) DOUBLE PRECISION
6475* On entry, ALPHA specifies the scalar alpha.
6476*
6477* A (local input) DOUBLE PRECISION array
6478* On entry, A is an array of dimension (DESCA( M_ ),*). This
6479* array contains a local copy of the initial entire matrix PA.
6480*
6481* IA (global input) INTEGER
6482* On entry, IA specifies A's global row index, which points to
6483* the beginning of the submatrix sub( A ).
6484*
6485* JA (global input) INTEGER
6486* On entry, JA specifies A's global column index, which points
6487* to the beginning of the submatrix sub( A ).
6488*
6489* DESCA (global and local input) INTEGER array
6490* On entry, DESCA is an integer array of dimension DLEN_. This
6491* is the array descriptor for the matrix A.
6492*
6493* BETA (global input) DOUBLE PRECISION
6494* On entry, BETA specifies the scalar beta.
6495*
6496* C (local input/local output) DOUBLE PRECISION array
6497* On entry, C is an array of dimension (DESCC( M_ ),*). This
6498* array contains a local copy of the initial entire matrix PC.
6499*
6500* PC (local input) DOUBLE PRECISION array
6501* On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
6502* array contains the local pieces of the matrix PC.
6503*
6504* IC (global input) INTEGER
6505* On entry, IC specifies C's global row index, which points to
6506* the beginning of the submatrix sub( C ).
6507*
6508* JC (global input) INTEGER
6509* On entry, JC specifies C's global column index, which points
6510* to the beginning of the submatrix sub( C ).
6511*
6512* DESCC (global and local input) INTEGER array
6513* On entry, DESCC is an integer array of dimension DLEN_. This
6514* is the array descriptor for the matrix C.
6515*
6516* ERR (global output) DOUBLE PRECISION
6517* On exit, ERR specifies the largest error in absolute value.
6518*
6519* INFO (global output) INTEGER
6520* On exit, if INFO <> 0, the result is less than half accurate.
6521*
6522* -- Written on April 1, 1998 by
6523* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6524*
6525* =====================================================================
6526*
6527* .. Parameters ..
6528 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
6529 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
6530 $ RSRC_
6531 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
6532 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
6533 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
6534 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
6535 DOUBLE PRECISION ZERO
6536 PARAMETER ( ZERO = 0.0d+0 )
6537* ..
6538* .. Local Scalars ..
6539 LOGICAL COLREP, LOWER, NOTRAN, ROWREP, UPPER
6540 INTEGER I, ICCOL, ICROW, ICTXT, IIC, IOFFA, IOFFC, J,
6541 $ JJC, LDA, LDC, LDPC, MYCOL, MYROW, NPCOL,
6542 $ NPROW
6543 DOUBLE PRECISION ERR0, ERRI, PREC
6544* ..
6545* .. External Subroutines ..
6546 EXTERNAL BLACS_GRIDINFO, DGAMX2D, IGSUM2D, PB_INFOG2L,
6547 $ pderraxpby
6548* ..
6549* .. External Functions ..
6550 LOGICAL LSAME
6551 DOUBLE PRECISION PDLAMCH
6552 EXTERNAL LSAME, PDLAMCH
6553* ..
6554* .. Intrinsic Functions ..
6555 INTRINSIC abs, max
6556* ..
6557* .. Executable Statements ..
6558*
6559 ictxt = descc( ctxt_ )
6560 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
6561*
6562 prec = pdlamch( ictxt, 'eps' )
6563*
6564 upper = lsame( uplo, 'U' )
6565 lower = lsame( uplo, 'L' )
6566 notran = lsame( trans, 'N' )
6567*
6568* Compute expected result in C using data in A and C. This part of
6569* the computation is performed by every process in the grid.
6570*
6571 info = 0
6572 err = zero
6573*
6574 lda = max( 1, desca( m_ ) )
6575 ldc = max( 1, descc( m_ ) )
6576 ldpc = max( 1, descc( lld_ ) )
6577 rowrep = ( descc( rsrc_ ).EQ.-1 )
6578 colrep = ( descc( csrc_ ).EQ.-1 )
6579*
6580 IF( notran ) THEN
6581*
6582 DO 20 j = jc, jc + n - 1
6583*
6584 ioffc = ic + ( j - 1 ) * ldc
6585 ioffa = ia + ( ja - 1 + j - jc ) * lda
6586*
6587 DO 10 i = ic, ic + m - 1
6588*
6589 IF( upper ) THEN
6590 IF( ( j - jc ).GE.( i - ic ) ) THEN
6591 CALL pderraxpby( erri, alpha, a( ioffa ), beta,
6592 $ c( ioffc ), prec )
6593 ELSE
6594 erri = zero
6595 END IF
6596 ELSE IF( lower ) THEN
6597 IF( ( j - jc ).LE.( i - ic ) ) THEN
6598 CALL pderraxpby( erri, alpha, a( ioffa ), beta,
6599 $ c( ioffc ), prec )
6600 ELSE
6601 erri = zero
6602 END IF
6603 ELSE
6604 CALL pderraxpby( erri, alpha, a( ioffa ), beta,
6605 $ c( ioffc ), prec )
6606 END IF
6607*
6608 CALL pb_infog2l( i, j, descc, nprow, npcol, myrow, mycol,
6609 $ iic, jjc, icrow, iccol )
6610 IF( ( myrow.EQ.icrow .OR. rowrep ) .AND.
6611 $ ( mycol.EQ.iccol .OR. colrep ) ) THEN
6612 err0 = abs( pc( iic+(jjc-1)*ldpc )-c( ioffc ) )
6613 IF( err0.GT.erri )
6614 $ info = 1
6615 err = max( err, err0 )
6616 END IF
6617*
6618 ioffa = ioffa + 1
6619 ioffc = ioffc + 1
6620*
6621 10 CONTINUE
6622*
6623 20 CONTINUE
6624*
6625 ELSE
6626*
6627 DO 40 j = jc, jc + n - 1
6628*
6629 ioffc = ic + ( j - 1 ) * ldc
6630 ioffa = ia + ( j - jc ) + ( ja - 1 ) * lda
6631*
6632 DO 30 i = ic, ic + m - 1
6633*
6634 IF( upper ) THEN
6635 IF( ( j - jc ).GE.( i - ic ) ) THEN
6636 CALL pderraxpby( erri, alpha, a( ioffa ), beta,
6637 $ c( ioffc ), prec )
6638 ELSE
6639 erri = zero
6640 END IF
6641 ELSE IF( lower ) THEN
6642 IF( ( j - jc ).LE.( i - ic ) ) THEN
6643 CALL pderraxpby( erri, alpha, a( ioffa ), beta,
6644 $ c( ioffc ), prec )
6645 ELSE
6646 erri = zero
6647 END IF
6648 ELSE
6649 CALL pderraxpby( erri, alpha, a( ioffa ), beta,
6650 $ c( ioffc ), prec )
6651 END IF
6652*
6653 CALL pb_infog2l( i, j, descc, nprow, npcol, myrow, mycol,
6654 $ iic, jjc, icrow, iccol )
6655 IF( ( myrow.EQ.icrow .OR. rowrep ) .AND.
6656 $ ( mycol.EQ.iccol .OR. colrep ) ) THEN
6657 err0 = abs( pc( iic+(jjc-1)*ldpc )-c( ioffc ) )
6658 IF( err0.GT.erri )
6659 $ info = 1
6660 err = max( err, err0 )
6661 END IF
6662*
6663 ioffc = ioffc + 1
6664 ioffa = ioffa + lda
6665*
6666 30 CONTINUE
6667*
6668 40 CONTINUE
6669*
6670 END IF
6671*
6672* If INFO = 0, all results are at least half accurate.
6673*
6674 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
6675 CALL dgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
6676 $ mycol )
6677*
6678 RETURN
6679*
6680* End of PDMMCH3
6681*
6682 END
6683 SUBROUTINE pderraxpby( ERRBND, ALPHA, X, BETA, Y, PREC )
6684*
6685* -- PBLAS test routine (version 2.0) --
6686* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6687* and University of California, Berkeley.
6688* April 1, 1998
6689*
6690* .. Scalar Arguments ..
6691 DOUBLE PRECISION ALPHA, BETA, ERRBND, PREC, X, Y
6692* ..
6693*
6694* Purpose
6695* =======
6696*
6697* PDERRAXPBY serially computes y := beta*y + alpha * x and returns a
6698* scaled relative acceptable error bound on the result.
6699*
6700* Arguments
6701* =========
6702*
6703* ERRBND (global output) DOUBLE PRECISION
6704* On exit, ERRBND specifies the scaled relative acceptable er-
6705* ror bound.
6706*
6707* ALPHA (global input) DOUBLE PRECISION
6708* On entry, ALPHA specifies the scalar alpha.
6709*
6710* X (global input) DOUBLE PRECISION
6711* On entry, X specifies the scalar x to be scaled.
6712*
6713* BETA (global input) DOUBLE PRECISION
6714* On entry, BETA specifies the scalar beta.
6715*
6716* Y (global input/global output) DOUBLE PRECISION
6717* On entry, Y specifies the scalar y to be added. On exit, Y
6718* contains the resulting scalar y.
6719*
6720* PREC (global input) DOUBLE PRECISION
6721* On entry, PREC specifies the machine precision.
6722*
6723* -- Written on April 1, 1998 by
6724* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6725*
6726* =====================================================================
6727*
6728* .. Parameters ..
6729 DOUBLE PRECISION ONE, TWO, ZERO
6730 PARAMETER ( ONE = 1.0d+0, two = 2.0d+0,
6731 $ zero = 0.0d+0 )
6732* ..
6733* .. Local Scalars ..
6734 DOUBLE PRECISION ADDBND, FACT, SUMPOS, SUMNEG, TMP
6735* ..
6736* .. Intrinsic Functions ..
6737* ..
6738* .. Executable Statements ..
6739*
6740 SUMPOS = zero
6741 sumneg = zero
6742 fact = one + two * prec
6743 addbnd = two * two * two * prec
6744*
6745 tmp = alpha * x
6746 IF( tmp.GE.zero ) THEN
6747 sumpos = sumpos + tmp * fact
6748 ELSE
6749 sumneg = sumneg - tmp * fact
6750 END IF
6751*
6752 tmp = beta * y
6753 IF( tmp.GE.zero ) THEN
6754 sumpos = sumpos + tmp * fact
6755 ELSE
6756 sumneg = sumneg - tmp * fact
6757 END IF
6758*
6759 y = ( beta * y ) + ( alpha * x )
6760*
6761 errbnd = addbnd * max( sumpos, sumneg )
6762*
6763 RETURN
6764*
6765* End of PDERRAXPBY
6766*
6767 END
6768 DOUBLE PRECISION FUNCTION pdlamch( ICTXT, CMACH )
6769*
6770* -- PBLAS test routine (version 2.0) --
6771* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6772* and University of California, Berkeley.
6773* April 1, 1998
6774*
6775* .. Scalar Arguments ..
6776 CHARACTER*1 cmach
6777 INTEGER ictxt
6778* ..
6779*
6780* Purpose
6781* =======
6782*
6783* PDLAMCH determines double precision machine parameters.
6784*
6785* Arguments
6786* =========
6787*
6788* ICTXT (local input) INTEGER
6789* On entry, ICTXT specifies the BLACS context handle, indica-
6790* ting the global context of the operation. The context itself
6791* is global, but the value of ICTXT is local.
6792*
6793* CMACH (global input) CHARACTER*1
6794* On entry, CMACH specifies the value to be returned by PDLAMCH
6795* as follows:
6796* = 'E' or 'e', PDLAMCH := eps,
6797* = 'S' or 's , PDLAMCH := sfmin,
6798* = 'B' or 'b', PDLAMCH := base,
6799* = 'P' or 'p', PDLAMCH := eps*base,
6800* = 'N' or 'n', PDLAMCH := t,
6801* = 'R' or 'r', PDLAMCH := rnd,
6802* = 'M' or 'm', PDLAMCH := emin,
6803* = 'U' or 'u', PDLAMCH := rmin,
6804* = 'L' or 'l', PDLAMCH := emax,
6805* = 'O' or 'o', PDLAMCH := rmax,
6806*
6807* where
6808*
6809* eps = relative machine precision,
6810* sfmin = safe minimum, such that 1/sfmin does not overflow,
6811* base = base of the machine,
6812* prec = eps*base,
6813* t = number of (base) digits in the mantissa,
6814* rnd = 1.0 when rounding occurs in addition, 0.0 otherwise,
6815* emin = minimum exponent before (gradual) underflow,
6816* rmin = underflow threshold - base**(emin-1),
6817* emax = largest exponent before overflow,
6818* rmax = overflow threshold - (base**emax)*(1-eps).
6819*
6820* -- Written on April 1, 1998 by
6821* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6822*
6823* =====================================================================
6824*
6825* .. Local Scalars ..
6826 CHARACTER*1 top
6827 INTEGER idumm
6828 DOUBLE PRECISION temp
6829* ..
6830* .. External Subroutines ..
6831 EXTERNAL dgamn2d, dgamx2d, pb_topget
6832* ..
6833* .. External Functions ..
6834 LOGICAL lsame
6835 DOUBLE PRECISION dlamch
6836 EXTERNAL dlamch, lsame
6837* ..
6838* .. Executable Statements ..
6839*
6840 temp = dlamch( cmach )
6841 idumm = 0
6842*
6843 IF( lsame( cmach, 'E' ).OR.lsame( cmach, 'S' ).OR.
6844 $ lsame( cmach, 'M' ).OR.lsame( cmach, 'U' ) ) THEN
6845 CALL pb_topget( ictxt, 'Combine', 'All', top )
6846 CALL dgamx2d( ictxt, 'All', top, 1, 1, temp, 1, idumm,
6847 $ idumm, -1, -1, idumm )
6848 ELSE IF( lsame( cmach, 'L' ).OR.lsame( cmach, 'O' ) ) THEN
6849 CALL pb_topget( ictxt, 'Combine', 'All', top )
6850 CALL dgamn2d( ictxt, 'All', top, 1, 1, temp, 1, idumm,
6851 $ idumm, -1, -1, idumm )
6852 END IF
6853*
6854 pdlamch = temp
6855*
6856 RETURN
6857*
6858* End of PDLAMCH
6859*
6860 END
6861 SUBROUTINE pdlaset( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA )
6862*
6863* -- PBLAS test routine (version 2.0) --
6864* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6865* and University of California, Berkeley.
6866* April 1, 1998
6867*
6868* .. Scalar Arguments ..
6869 CHARACTER*1 UPLO
6870 INTEGER IA, JA, M, N
6871 DOUBLE PRECISION ALPHA, BETA
6872* ..
6873* .. Array Arguments ..
6874 INTEGER DESCA( * )
6875 DOUBLE PRECISION A( * )
6876* ..
6877*
6878* Purpose
6879* =======
6880*
6881* PDLASET initializes an m by n submatrix A(IA:IA+M-1,JA:JA+N-1) deno-
6882* ted by sub( A ) to beta on the diagonal and alpha on the offdiago-
6883* nals.
6884*
6885* Notes
6886* =====
6887*
6888* A description vector is associated with each 2D block-cyclicly dis-
6889* tributed matrix. This vector stores the information required to
6890* establish the mapping between a matrix entry and its corresponding
6891* process and memory location.
6892*
6893* In the following comments, the character _ should be read as
6894* "of the distributed matrix". Let A be a generic term for any 2D
6895* block cyclicly distributed matrix. Its description vector is DESCA:
6896*
6897* NOTATION STORED IN EXPLANATION
6898* ---------------- --------------- ------------------------------------
6899* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
6900* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
6901* the NPROW x NPCOL BLACS process grid
6902* A is distributed over. The context
6903* itself is global, but the handle
6904* (the integer value) may vary.
6905* M_A (global) DESCA( M_ ) The number of rows in the distribu-
6906* ted matrix A, M_A >= 0.
6907* N_A (global) DESCA( N_ ) The number of columns in the distri-
6908* buted matrix A, N_A >= 0.
6909* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
6910* block of the matrix A, IMB_A > 0.
6911* INB_A (global) DESCA( INB_ ) The number of columns of the upper
6912* left block of the matrix A,
6913* INB_A > 0.
6914* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
6915* bute the last M_A-IMB_A rows of A,
6916* MB_A > 0.
6917* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
6918* bute the last N_A-INB_A columns of
6919* A, NB_A > 0.
6920* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
6921* row of the matrix A is distributed,
6922* NPROW > RSRC_A >= 0.
6923* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
6924* first column of A is distributed.
6925* NPCOL > CSRC_A >= 0.
6926* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
6927* array storing the local blocks of
6928* the distributed matrix A,
6929* IF( Lc( 1, N_A ) > 0 )
6930* LLD_A >= MAX( 1, Lr( 1, M_A ) )
6931* ELSE
6932* LLD_A >= 1.
6933*
6934* Let K be the number of rows of a matrix A starting at the global in-
6935* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
6936* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
6937* receive if these K rows were distributed over NPROW processes. If K
6938* is the number of columns of a matrix A starting at the global index
6939* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
6940* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
6941* these K columns were distributed over NPCOL processes.
6942*
6943* The values of Lr() and Lc() may be determined via a call to the func-
6944* tion PB_NUMROC:
6945* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
6946* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
6947*
6948* Arguments
6949* =========
6950*
6951* UPLO (global input) CHARACTER*1
6952* On entry, UPLO specifies the part of the submatrix sub( A )
6953* to be set:
6954* = 'L' or 'l': Lower triangular part is set; the strictly
6955* upper triangular part of sub( A ) is not changed;
6956* = 'U' or 'u': Upper triangular part is set; the strictly
6957* lower triangular part of sub( A ) is not changed;
6958* Otherwise: All of the matrix sub( A ) is set.
6959*
6960* M (global input) INTEGER
6961* On entry, M specifies the number of rows of the submatrix
6962* sub( A ). M must be at least zero.
6963*
6964* N (global input) INTEGER
6965* On entry, N specifies the number of columns of the submatrix
6966* sub( A ). N must be at least zero.
6967*
6968* ALPHA (global input) DOUBLE PRECISION
6969* On entry, ALPHA specifies the scalar alpha, i.e., the cons-
6970* tant to which the offdiagonal elements are to be set.
6971*
6972* BETA (global input) DOUBLE PRECISION
6973* On entry, BETA specifies the scalar beta, i.e., the constant
6974* to which the diagonal elements are to be set.
6975*
6976* A (local input/local output) DOUBLE PRECISION array
6977* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
6978* at least Lc( 1, JA+N-1 ). Before entry, this array contains
6979* the local entries of the matrix A to be set. On exit, the
6980* leading m by n submatrix sub( A ) is set as follows:
6981*
6982* if UPLO = 'U', A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=j-1, 1<=j<=N,
6983* if UPLO = 'L', A(IA+i-1,JA+j-1) = ALPHA, j+1<=i<=M, 1<=j<=N,
6984* otherwise, A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=M, 1<=j<=N,
6985* and IA+i.NE.JA+j,
6986* and, for all UPLO, A(IA+i-1,JA+i-1) = BETA, 1<=i<=min(M,N).
6987*
6988* IA (global input) INTEGER
6989* On entry, IA specifies A's global row index, which points to
6990* the beginning of the submatrix sub( A ).
6991*
6992* JA (global input) INTEGER
6993* On entry, JA specifies A's global column index, which points
6994* to the beginning of the submatrix sub( A ).
6995*
6996* DESCA (global and local input) INTEGER array
6997* On entry, DESCA is an integer array of dimension DLEN_. This
6998* is the array descriptor for the matrix A.
6999*
7000* -- Written on April 1, 1998 by
7001* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
7002*
7003* =====================================================================
7004*
7005* .. Parameters ..
7006 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
7007 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
7008 $ RSRC_
7009 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
7010 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
7011 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
7012 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
7013* ..
7014* .. Local Scalars ..
7015 LOGICAL GODOWN, GOLEFT, ISCOLREP, ISROWREP, LOWER,
7016 $ UPPER
7017 INTEGER IACOL, IAROW, ICTXT, IIA, IIMAX, ILOW, IMB1,
7018 $ IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP, JJA,
7019 $ JJMAX, JOFFA, JOFFD, LCMT, LCMT00, LDA, LMBLOC,
7020 $ LNBLOC, LOW, M1, MB, MBLKD, MBLKS, MBLOC, MP,
7021 $ MRCOL, MRROW, MYCOL, MYROW, N1, NB, NBLKD,
7022 $ NBLKS, NBLOC, NPCOL, NPROW, NQ, PMB, QNB, TMP1,
7023 $ UPP
7024* ..
7025* .. Local Arrays ..
7026 INTEGER DESCA2( DLEN_ )
7027* ..
7028* .. External Subroutines ..
7029 EXTERNAL blacs_gridinfo, pb_ainfog2l, pb_binfo,
7031* ..
7032* .. External Functions ..
7033 LOGICAL LSAME
7034 EXTERNAL lsame
7035* ..
7036* .. Intrinsic Functions ..
7037 INTRINSIC min
7038* ..
7039* .. Executable Statements ..
7040*
7041 IF( m.EQ.0 .OR. n.EQ.0 )
7042 $ RETURN
7043*
7044* Convert descriptor
7045*
7046 CALL pb_desctrans( desca, desca2 )
7047*
7048* Get grid parameters
7049*
7050 ictxt = desca2( ctxt_ )
7051 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
7052*
7053 CALL pb_ainfog2l( m, n, ia, ja, desca2, nprow, npcol, myrow,
7054 $ mycol, imb1, inb1, mp, nq, iia, jja, iarow,
7055 $ iacol, mrrow, mrcol )
7056*
7057 IF( mp.LE.0 .OR. nq.LE.0 )
7058 $ RETURN
7059*
7060 isrowrep = ( desca2( rsrc_ ).LT.0 )
7061 iscolrep = ( desca2( csrc_ ).LT.0 )
7062 lda = desca2( lld_ )
7063*
7064 upper = .NOT.( lsame( uplo, 'L' ) )
7065 lower = .NOT.( lsame( uplo, 'U' ) )
7066*
7067 IF( ( ( lower.AND.upper ).AND.( alpha.EQ.beta ) ).OR.
7068 $ ( isrowrep .AND. iscolrep ) ) THEN
7069 IF( ( mp.GT.0 ).AND.( nq.GT.0 ) )
7070 $ CALL pb_dlaset( uplo, mp, nq, 0, alpha, beta,
7071 $ a( iia + ( jja - 1 ) * lda ), lda )
7072 RETURN
7073 END IF
7074*
7075* Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC,
7076* ILOW, LOW, IUPP, and UPP.
7077*
7078 mb = desca2( mb_ )
7079 nb = desca2( nb_ )
7080 CALL pb_binfo( 0, mp, nq, imb1, inb1, mb, nb, mrrow, mrcol,
7081 $ lcmt00, mblks, nblks, imbloc, inbloc, lmbloc,
7082 $ lnbloc, ilow, low, iupp, upp )
7083*
7084 ioffa = iia - 1
7085 joffa = jja - 1
7086 iimax = ioffa + mp
7087 jjmax = joffa + nq
7088*
7089 IF( isrowrep ) THEN
7090 pmb = mb
7091 ELSE
7092 pmb = nprow * mb
7093 END IF
7094 IF( iscolrep ) THEN
7095 qnb = nb
7096 ELSE
7097 qnb = npcol * nb
7098 END IF
7099*
7100 m1 = mp
7101 n1 = nq
7102*
7103* Handle the first block of rows or columns separately, and update
7104* LCMT00, MBLKS and NBLKS.
7105*
7106 godown = ( lcmt00.GT.iupp )
7107 goleft = ( lcmt00.LT.ilow )
7108*
7109 IF( .NOT.godown .AND. .NOT.goleft ) THEN
7110*
7111* LCMT00 >= ILOW && LCMT00 <= IUPP
7112*
7113 goleft = ( ( lcmt00 - ( iupp - upp + pmb ) ).LT.ilow )
7114 godown = .NOT.goleft
7115*
7116 CALL pb_dlaset( uplo, imbloc, inbloc, lcmt00, alpha, beta,
7117 $ a( iia+joffa*lda ), lda )
7118 IF( godown ) THEN
7119 IF( upper .AND. nq.GT.inbloc )
7120 $ CALL pb_dlaset( 'All', imbloc, nq-inbloc, 0, alpha,
7121 $ alpha, a( iia+(joffa+inbloc)*lda ), lda )
7122 iia = iia + imbloc
7123 m1 = m1 - imbloc
7124 ELSE
7125 IF( lower .AND. mp.GT.imbloc )
7126 $ CALL pb_dlaset( 'All', mp-imbloc, inbloc, 0, alpha,
7127 $ alpha, a( iia+imbloc+joffa*lda ), lda )
7128 jja = jja + inbloc
7129 n1 = n1 - inbloc
7130 END IF
7131*
7132 END IF
7133*
7134 IF( godown ) THEN
7135*
7136 lcmt00 = lcmt00 - ( iupp - upp + pmb )
7137 mblks = mblks - 1
7138 ioffa = ioffa + imbloc
7139*
7140 10 CONTINUE
7141 IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
7142 lcmt00 = lcmt00 - pmb
7143 mblks = mblks - 1
7144 ioffa = ioffa + mb
7145 GO TO 10
7146 END IF
7147*
7148 tmp1 = min( ioffa, iimax ) - iia + 1
7149 IF( upper .AND. tmp1.GT.0 ) THEN
7150 CALL pb_dlaset( 'All', tmp1, n1, 0, alpha, alpha,
7151 $ a( iia+joffa*lda ), lda )
7152 iia = iia + tmp1
7153 m1 = m1 - tmp1
7154 END IF
7155*
7156 IF( mblks.LE.0 )
7157 $ RETURN
7158*
7159 lcmt = lcmt00
7160 mblkd = mblks
7161 ioffd = ioffa
7162*
7163 mbloc = mb
7164 20 CONTINUE
7165 IF( mblkd.GT.0 .AND. lcmt.GE.ilow ) THEN
7166 IF( mblkd.EQ.1 )
7167 $ mbloc = lmbloc
7168 CALL pb_dlaset( uplo, mbloc, inbloc, lcmt, alpha, beta,
7169 $ a( ioffd+1+joffa*lda ), lda )
7170 lcmt00 = lcmt
7171 lcmt = lcmt - pmb
7172 mblks = mblkd
7173 mblkd = mblkd - 1
7174 ioffa = ioffd
7175 ioffd = ioffd + mbloc
7176 GO TO 20
7177 END IF
7178*
7179 tmp1 = m1 - ioffd + iia - 1
7180 IF( lower .AND. tmp1.GT.0 )
7181 $ CALL pb_dlaset( 'ALL', tmp1, inbloc, 0, alpha, alpha,
7182 $ a( ioffd+1+joffa*lda ), lda )
7183*
7184 tmp1 = ioffa - iia + 1
7185 m1 = m1 - tmp1
7186 n1 = n1 - inbloc
7187 lcmt00 = lcmt00 + low - ilow + qnb
7188 nblks = nblks - 1
7189 joffa = joffa + inbloc
7190*
7191 IF( upper .AND. tmp1.GT.0 .AND. n1.GT.0 )
7192 $ CALL pb_dlaset( 'ALL', tmp1, n1, 0, alpha, alpha,
7193 $ a( iia+joffa*lda ), lda )
7194*
7195 iia = ioffa + 1
7196 jja = joffa + 1
7197*
7198 ELSE IF( goleft ) THEN
7199*
7200 lcmt00 = lcmt00 + low - ilow + qnb
7201 nblks = nblks - 1
7202 joffa = joffa + inbloc
7203*
7204 30 CONTINUE
7205 IF( nblks.GT.0 .AND. lcmt00.LT.low ) THEN
7206 lcmt00 = lcmt00 + qnb
7207 nblks = nblks - 1
7208 joffa = joffa + nb
7209 GO TO 30
7210 END IF
7211*
7212 tmp1 = min( joffa, jjmax ) - jja + 1
7213 IF( lower .AND. tmp1.GT.0 ) THEN
7214 CALL pb_dlaset( 'All', m1, tmp1, 0, alpha, alpha,
7215 $ a( iia+(jja-1)*lda ), lda )
7216 jja = jja + tmp1
7217 n1 = n1 - tmp1
7218 END IF
7219*
7220 IF( nblks.LE.0 )
7221 $ RETURN
7222*
7223 lcmt = lcmt00
7224 nblkd = nblks
7225 joffd = joffa
7226*
7227 nbloc = nb
7228 40 CONTINUE
7229 IF( nblkd.GT.0 .AND. lcmt.LE.iupp ) THEN
7230 IF( nblkd.EQ.1 )
7231 $ nbloc = lnbloc
7232 CALL pb_dlaset( uplo, imbloc, nbloc, lcmt, alpha, beta,
7233 $ a( iia+joffd*lda ), lda )
7234 lcmt00 = lcmt
7235 lcmt = lcmt + qnb
7236 nblks = nblkd
7237 nblkd = nblkd - 1
7238 joffa = joffd
7239 joffd = joffd + nbloc
7240 GO TO 40
7241 END IF
7242*
7243 tmp1 = n1 - joffd + jja - 1
7244 IF( upper .AND. tmp1.GT.0 )
7245 $ CALL pb_dlaset( 'All', imbloc, tmp1, 0, alpha, alpha,
7246 $ a( iia+joffd*lda ), lda )
7247*
7248 tmp1 = joffa - jja + 1
7249 m1 = m1 - imbloc
7250 n1 = n1 - tmp1
7251 lcmt00 = lcmt00 - ( iupp - upp + pmb )
7252 mblks = mblks - 1
7253 ioffa = ioffa + imbloc
7254*
7255 IF( lower .AND. m1.GT.0 .AND. tmp1.GT.0 )
7256 $ CALL pb_dlaset( 'All', m1, tmp1, 0, alpha, alpha,
7257 $ a( ioffa+1+(jja-1)*lda ), lda )
7258*
7259 iia = ioffa + 1
7260 jja = joffa + 1
7261*
7262 END IF
7263*
7264 nbloc = nb
7265 50 CONTINUE
7266 IF( nblks.GT.0 ) THEN
7267 IF( nblks.EQ.1 )
7268 $ nbloc = lnbloc
7269 60 CONTINUE
7270 IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
7271 lcmt00 = lcmt00 - pmb
7272 mblks = mblks - 1
7273 ioffa = ioffa + mb
7274 GO TO 60
7275 END IF
7276*
7277 tmp1 = min( ioffa, iimax ) - iia + 1
7278 IF( upper .AND. tmp1.GT.0 ) THEN
7279 CALL pb_dlaset( 'All', tmp1, n1, 0, alpha, alpha,
7280 $ a( iia+joffa*lda ), lda )
7281 iia = iia + tmp1
7282 m1 = m1 - tmp1
7283 END IF
7284*
7285 IF( mblks.LE.0 )
7286 $ RETURN
7287*
7288 lcmt = lcmt00
7289 mblkd = mblks
7290 ioffd = ioffa
7291*
7292 mbloc = mb
7293 70 CONTINUE
7294 IF( mblkd.GT.0 .AND. lcmt.GE.low ) THEN
7295 IF( mblkd.EQ.1 )
7296 $ mbloc = lmbloc
7297 CALL pb_dlaset( uplo, mbloc, nbloc, lcmt, alpha, beta,
7298 $ a( ioffd+1+joffa*lda ), lda )
7299 lcmt00 = lcmt
7300 lcmt = lcmt - pmb
7301 mblks = mblkd
7302 mblkd = mblkd - 1
7303 ioffa = ioffd
7304 ioffd = ioffd + mbloc
7305 GO TO 70
7306 END IF
7307*
7308 tmp1 = m1 - ioffd + iia - 1
7309 IF( lower .AND. tmp1.GT.0 )
7310 $ CALL pb_dlaset( 'All', tmp1, nbloc, 0, alpha, alpha,
7311 $ a( ioffd+1+joffa*lda ), lda )
7312*
7313 tmp1 = min( ioffa, iimax ) - iia + 1
7314 m1 = m1 - tmp1
7315 n1 = n1 - nbloc
7316 lcmt00 = lcmt00 + qnb
7317 nblks = nblks - 1
7318 joffa = joffa + nbloc
7319*
7320 IF( upper .AND. tmp1.GT.0 .AND. n1.GT.0 )
7321 $ CALL pb_dlaset( 'All', tmp1, n1, 0, alpha, alpha,
7322 $ a( iia+joffa*lda ), lda )
7323*
7324 iia = ioffa + 1
7325 jja = joffa + 1
7326*
7327 GO TO 50
7328*
7329 END IF
7330*
7331 RETURN
7332*
7333* End of PDLASET
7334*
7335 END
7336 SUBROUTINE pdlascal( TYPE, M, N, ALPHA, A, IA, JA, DESCA )
7337*
7338* -- PBLAS test routine (version 2.0) --
7339* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7340* and University of California, Berkeley.
7341* April 1, 1998
7342*
7343* .. Scalar Arguments ..
7344 CHARACTER*1 TYPE
7345 INTEGER IA, JA, M, N
7346 DOUBLE PRECISION ALPHA
7347* ..
7348* .. Array Arguments ..
7349 INTEGER DESCA( * )
7350 DOUBLE PRECISION A( * )
7351* ..
7352*
7353* Purpose
7354* =======
7355*
7356* PDLASCAL scales the m by n submatrix A(IA:IA+M-1,JA:JA+N-1) denoted
7357* by sub( A ) by the scalar alpha. TYPE specifies if sub( A ) is full,
7358* upper triangular, lower triangular or upper Hessenberg.
7359*
7360* Notes
7361* =====
7362*
7363* A description vector is associated with each 2D block-cyclicly dis-
7364* tributed matrix. This vector stores the information required to
7365* establish the mapping between a matrix entry and its corresponding
7366* process and memory location.
7367*
7368* In the following comments, the character _ should be read as
7369* "of the distributed matrix". Let A be a generic term for any 2D
7370* block cyclicly distributed matrix. Its description vector is DESCA:
7371*
7372* NOTATION STORED IN EXPLANATION
7373* ---------------- --------------- ------------------------------------
7374* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
7375* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
7376* the NPROW x NPCOL BLACS process grid
7377* A is distributed over. The context
7378* itself is global, but the handle
7379* (the integer value) may vary.
7380* M_A (global) DESCA( M_ ) The number of rows in the distribu-
7381* ted matrix A, M_A >= 0.
7382* N_A (global) DESCA( N_ ) The number of columns in the distri-
7383* buted matrix A, N_A >= 0.
7384* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
7385* block of the matrix A, IMB_A > 0.
7386* INB_A (global) DESCA( INB_ ) The number of columns of the upper
7387* left block of the matrix A,
7388* INB_A > 0.
7389* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
7390* bute the last M_A-IMB_A rows of A,
7391* MB_A > 0.
7392* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
7393* bute the last N_A-INB_A columns of
7394* A, NB_A > 0.
7395* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
7396* row of the matrix A is distributed,
7397* NPROW > RSRC_A >= 0.
7398* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
7399* first column of A is distributed.
7400* NPCOL > CSRC_A >= 0.
7401* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
7402* array storing the local blocks of
7403* the distributed matrix A,
7404* IF( Lc( 1, N_A ) > 0 )
7405* LLD_A >= MAX( 1, Lr( 1, M_A ) )
7406* ELSE
7407* LLD_A >= 1.
7408*
7409* Let K be the number of rows of a matrix A starting at the global in-
7410* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
7411* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
7412* receive if these K rows were distributed over NPROW processes. If K
7413* is the number of columns of a matrix A starting at the global index
7414* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
7415* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
7416* these K columns were distributed over NPCOL processes.
7417*
7418* The values of Lr() and Lc() may be determined via a call to the func-
7419* tion PB_NUMROC:
7420* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
7421* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
7422*
7423* Arguments
7424* =========
7425*
7426* TYPE (global input) CHARACTER*1
7427* On entry, TYPE specifies the type of the input submatrix as
7428* follows:
7429* = 'L' or 'l': sub( A ) is a lower triangular matrix,
7430* = 'U' or 'u': sub( A ) is an upper triangular matrix,
7431* = 'H' or 'h': sub( A ) is an upper Hessenberg matrix,
7432* otherwise sub( A ) is a full matrix.
7433*
7434* M (global input) INTEGER
7435* On entry, M specifies the number of rows of the submatrix
7436* sub( A ). M must be at least zero.
7437*
7438* N (global input) INTEGER
7439* On entry, N specifies the number of columns of the submatrix
7440* sub( A ). N must be at least zero.
7441*
7442* ALPHA (global input) DOUBLE PRECISION
7443* On entry, ALPHA specifies the scalar alpha.
7444*
7445* A (local input/local output) DOUBLE PRECISION array
7446* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
7447* at least Lc( 1, JA+N-1 ). Before entry, this array contains
7448* the local entries of the matrix A.
7449* On exit, the local entries of this array corresponding to the
7450* to the entries of the submatrix sub( A ) are overwritten by
7451* the local entries of the m by n scaled submatrix.
7452*
7453* IA (global input) INTEGER
7454* On entry, IA specifies A's global row index, which points to
7455* the beginning of the submatrix sub( A ).
7456*
7457* JA (global input) INTEGER
7458* On entry, JA specifies A's global column index, which points
7459* to the beginning of the submatrix sub( A ).
7460*
7461* DESCA (global and local input) INTEGER array
7462* On entry, DESCA is an integer array of dimension DLEN_. This
7463* is the array descriptor for the matrix A.
7464*
7465* -- Written on April 1, 1998 by
7466* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
7467*
7468* =====================================================================
7469*
7470* .. Parameters ..
7471 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
7472 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
7473 $ RSRC_
7474 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
7475 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
7476 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
7477 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
7478* ..
7479* .. Local Scalars ..
7480 CHARACTER*1 UPLO
7481 LOGICAL GODOWN, GOLEFT, LOWER, UPPER
7482 INTEGER IACOL, IAROW, ICTXT, IIA, IIMAX, ILOW, IMB1,
7483 $ IMBLOC, INB1, INBLOC, IOFFA, IOFFD, ITYPE,
7484 $ IUPP, JJA, JJMAX, JOFFA, JOFFD, LCMT, LCMT00,
7485 $ LDA, LMBLOC, LNBLOC, LOW, M1, MB, MBLKD, MBLKS,
7486 $ MBLOC, MP, MRCOL, MRROW, MYCOL, MYROW, N1, NB,
7487 $ NBLKD, NBLKS, NBLOC, NPCOL, NPROW, NQ, PMB,
7488 $ QNB, TMP1, UPP
7489* ..
7490* .. Local Arrays ..
7491 INTEGER DESCA2( DLEN_ )
7492* ..
7493* .. External Subroutines ..
7494 EXTERNAL blacs_gridinfo, pb_ainfog2l, pb_binfo,
7496* ..
7497* .. External Functions ..
7498 LOGICAL LSAME
7499 INTEGER PB_NUMROC
7500 EXTERNAL lsame, pb_numroc
7501* ..
7502* .. Intrinsic Functions ..
7503 INTRINSIC min
7504* ..
7505* .. Executable Statements ..
7506*
7507* Convert descriptor
7508*
7509 CALL pb_desctrans( desca, desca2 )
7510*
7511* Get grid parameters
7512*
7513 ictxt = desca2( ctxt_ )
7514 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
7515*
7516* Quick return if possible
7517*
7518 IF( m.EQ.0 .OR. n.EQ.0 )
7519 $ RETURN
7520*
7521 IF( lsame( TYPE, 'L' ) ) then
7522 itype = 1
7523 uplo = TYPE
7524 upper = .false.
7525 lower = .true.
7526 ioffd = 0
7527 ELSE IF( lsame( TYPE, 'U' ) ) then
7528 itype = 2
7529 uplo = TYPE
7530 upper = .true.
7531 lower = .false.
7532 ioffd = 0
7533 ELSE IF( lsame( TYPE, 'H' ) ) then
7534 itype = 3
7535 uplo = 'U'
7536 upper = .true.
7537 lower = .false.
7538 ioffd = 1
7539 ELSE
7540 itype = 0
7541 uplo = 'A'
7542 upper = .true.
7543 lower = .true.
7544 ioffd = 0
7545 END IF
7546*
7547* Compute local indexes
7548*
7549 IF( itype.EQ.0 ) THEN
7550*
7551* Full matrix
7552*
7553 CALL pb_infog2l( ia, ja, desca2, nprow, npcol, myrow, mycol,
7554 $ iia, jja, iarow, iacol )
7555 mp = pb_numroc( m, ia, desca2( imb_ ), desca2( mb_ ), myrow,
7556 $ desca2( rsrc_ ), nprow )
7557 nq = pb_numroc( n, ja, desca2( inb_ ), desca2( nb_ ), mycol,
7558 $ desca2( csrc_ ), npcol )
7559*
7560 IF( mp.LE.0 .OR. nq.LE.0 )
7561 $ RETURN
7562*
7563 lda = desca2( lld_ )
7564 ioffa = iia + ( jja - 1 ) * lda
7565*
7566 CALL pb_dlascal( 'All', mp, nq, 0, alpha, a( ioffa ), lda )
7567*
7568 ELSE
7569*
7570* Trapezoidal matrix
7571*
7572 CALL pb_ainfog2l( m, n, ia, ja, desca2, nprow, npcol, myrow,
7573 $ mycol, imb1, inb1, mp, nq, iia, jja, iarow,
7574 $ iacol, mrrow, mrcol )
7575*
7576 IF( mp.LE.0 .OR. nq.LE.0 )
7577 $ RETURN
7578*
7579* Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC,
7580* LNBLOC, ILOW, LOW, IUPP, and UPP.
7581*
7582 mb = desca2( mb_ )
7583 nb = desca2( nb_ )
7584 lda = desca2( lld_ )
7585*
7586 CALL pb_binfo( ioffd, mp, nq, imb1, inb1, mb, nb, mrrow,
7587 $ mrcol, lcmt00, mblks, nblks, imbloc, inbloc,
7588 $ lmbloc, lnbloc, ilow, low, iupp, upp )
7589*
7590 m1 = mp
7591 n1 = nq
7592 ioffa = iia - 1
7593 joffa = jja - 1
7594 iimax = ioffa + mp
7595 jjmax = joffa + nq
7596*
7597 IF( desca2( rsrc_ ).LT.0 ) THEN
7598 pmb = mb
7599 ELSE
7600 pmb = nprow * mb
7601 END IF
7602 IF( desca2( csrc_ ).LT.0 ) THEN
7603 qnb = nb
7604 ELSE
7605 qnb = npcol * nb
7606 END IF
7607*
7608* Handle the first block of rows or columns separately, and
7609* update LCMT00, MBLKS and NBLKS.
7610*
7611 godown = ( lcmt00.GT.iupp )
7612 goleft = ( lcmt00.LT.ilow )
7613*
7614 IF( .NOT.godown .AND. .NOT.goleft ) THEN
7615*
7616* LCMT00 >= ILOW && LCMT00 <= IUPP
7617*
7618 goleft = ( ( lcmt00 - ( iupp - upp + pmb ) ).LT.ilow )
7619 godown = .NOT.goleft
7620*
7621 CALL pb_dlascal( uplo, imbloc, inbloc, lcmt00, alpha,
7622 $ a( iia+joffa*lda ), lda )
7623 IF( godown ) THEN
7624 IF( upper .AND. nq.GT.inbloc )
7625 $ CALL pb_dlascal( 'All', imbloc, nq-inbloc, 0, alpha,
7626 $ a( iia+(joffa+inbloc)*lda ), lda )
7627 iia = iia + imbloc
7628 m1 = m1 - imbloc
7629 ELSE
7630 IF( lower .AND. mp.GT.imbloc )
7631 $ CALL pb_dlascal( 'All', mp-imbloc, inbloc, 0, alpha,
7632 $ a( iia+imbloc+joffa*lda ), lda )
7633 jja = jja + inbloc
7634 n1 = n1 - inbloc
7635 END IF
7636*
7637 END IF
7638*
7639 IF( godown ) THEN
7640*
7641 lcmt00 = lcmt00 - ( iupp - upp + pmb )
7642 mblks = mblks - 1
7643 ioffa = ioffa + imbloc
7644*
7645 10 CONTINUE
7646 IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
7647 lcmt00 = lcmt00 - pmb
7648 mblks = mblks - 1
7649 ioffa = ioffa + mb
7650 GO TO 10
7651 END IF
7652*
7653 tmp1 = min( ioffa, iimax ) - iia + 1
7654 IF( upper .AND. tmp1.GT.0 ) THEN
7655 CALL pb_dlascal( 'All', tmp1, n1, 0, alpha,
7656 $ a( iia+joffa*lda ), lda )
7657 iia = iia + tmp1
7658 m1 = m1 - tmp1
7659 END IF
7660*
7661 IF( mblks.LE.0 )
7662 $ RETURN
7663*
7664 lcmt = lcmt00
7665 mblkd = mblks
7666 ioffd = ioffa
7667*
7668 mbloc = mb
7669 20 CONTINUE
7670 IF( mblkd.GT.0 .AND. lcmt.GE.ilow ) THEN
7671 IF( mblkd.EQ.1 )
7672 $ mbloc = lmbloc
7673 CALL pb_dlascal( uplo, mbloc, inbloc, lcmt, alpha,
7674 $ a( ioffd+1+joffa*lda ), lda )
7675 lcmt00 = lcmt
7676 lcmt = lcmt - pmb
7677 mblks = mblkd
7678 mblkd = mblkd - 1
7679 ioffa = ioffd
7680 ioffd = ioffd + mbloc
7681 GO TO 20
7682 END IF
7683*
7684 tmp1 = m1 - ioffd + iia - 1
7685 IF( lower .AND. tmp1.GT.0 )
7686 $ CALL pb_dlascal( 'All', tmp1, inbloc, 0, alpha,
7687 $ a( ioffd+1+joffa*lda ), lda )
7688*
7689 tmp1 = ioffa - iia + 1
7690 m1 = m1 - tmp1
7691 n1 = n1 - inbloc
7692 lcmt00 = lcmt00 + low - ilow + qnb
7693 nblks = nblks - 1
7694 joffa = joffa + inbloc
7695*
7696 IF( upper .AND. tmp1.GT.0 .AND. n1.GT.0 )
7697 $ CALL pb_dlascal( 'All', tmp1, n1, 0, alpha,
7698 $ a( iia+joffa*lda ), lda )
7699*
7700 iia = ioffa + 1
7701 jja = joffa + 1
7702*
7703 ELSE IF( goleft ) THEN
7704*
7705 lcmt00 = lcmt00 + low - ilow + qnb
7706 nblks = nblks - 1
7707 joffa = joffa + inbloc
7708*
7709 30 CONTINUE
7710 IF( nblks.GT.0 .AND. lcmt00.LT.low ) THEN
7711 lcmt00 = lcmt00 + qnb
7712 nblks = nblks - 1
7713 joffa = joffa + nb
7714 GO TO 30
7715 END IF
7716*
7717 tmp1 = min( joffa, jjmax ) - jja + 1
7718 IF( lower .AND. tmp1.GT.0 ) THEN
7719 CALL pb_dlascal( 'All', m1, tmp1, 0, alpha,
7720 $ a( iia+(jja-1)*lda ), lda )
7721 jja = jja + tmp1
7722 n1 = n1 - tmp1
7723 END IF
7724*
7725 IF( nblks.LE.0 )
7726 $ RETURN
7727*
7728 lcmt = lcmt00
7729 nblkd = nblks
7730 joffd = joffa
7731*
7732 nbloc = nb
7733 40 CONTINUE
7734 IF( nblkd.GT.0 .AND. lcmt.LE.iupp ) THEN
7735 IF( nblkd.EQ.1 )
7736 $ nbloc = lnbloc
7737 CALL pb_dlascal( uplo, imbloc, nbloc, lcmt, alpha,
7738 $ a( iia+joffd*lda ), lda )
7739 lcmt00 = lcmt
7740 lcmt = lcmt + qnb
7741 nblks = nblkd
7742 nblkd = nblkd - 1
7743 joffa = joffd
7744 joffd = joffd + nbloc
7745 GO TO 40
7746 END IF
7747*
7748 tmp1 = n1 - joffd + jja - 1
7749 IF( upper .AND. tmp1.GT.0 )
7750 $ CALL pb_dlascal( 'All', imbloc, tmp1, 0, alpha,
7751 $ a( iia+joffd*lda ), lda )
7752*
7753 tmp1 = joffa - jja + 1
7754 m1 = m1 - imbloc
7755 n1 = n1 - tmp1
7756 lcmt00 = lcmt00 - ( iupp - upp + pmb )
7757 mblks = mblks - 1
7758 ioffa = ioffa + imbloc
7759*
7760 IF( lower .AND. m1.GT.0 .AND. tmp1.GT.0 )
7761 $ CALL pb_dlascal( 'All', m1, tmp1, 0, alpha,
7762 $ a( ioffa+1+(jja-1)*lda ), lda )
7763*
7764 iia = ioffa + 1
7765 jja = joffa + 1
7766*
7767 END IF
7768*
7769 nbloc = nb
7770 50 CONTINUE
7771 IF( nblks.GT.0 ) THEN
7772 IF( nblks.EQ.1 )
7773 $ nbloc = lnbloc
7774 60 CONTINUE
7775 IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
7776 lcmt00 = lcmt00 - pmb
7777 mblks = mblks - 1
7778 ioffa = ioffa + mb
7779 GO TO 60
7780 END IF
7781*
7782 tmp1 = min( ioffa, iimax ) - iia + 1
7783 IF( upper .AND. tmp1.GT.0 ) THEN
7784 CALL pb_dlascal( 'All', tmp1, n1, 0, alpha,
7785 $ a( iia+joffa*lda ), lda )
7786 iia = iia + tmp1
7787 m1 = m1 - tmp1
7788 END IF
7789*
7790 IF( mblks.LE.0 )
7791 $ RETURN
7792*
7793 lcmt = lcmt00
7794 mblkd = mblks
7795 ioffd = ioffa
7796*
7797 mbloc = mb
7798 70 CONTINUE
7799 IF( mblkd.GT.0 .AND. lcmt.GE.low ) THEN
7800 IF( mblkd.EQ.1 )
7801 $ mbloc = lmbloc
7802 CALL pb_dlascal( uplo, mbloc, nbloc, lcmt, alpha,
7803 $ a( ioffd+1+joffa*lda ), lda )
7804 lcmt00 = lcmt
7805 lcmt = lcmt - pmb
7806 mblks = mblkd
7807 mblkd = mblkd - 1
7808 ioffa = ioffd
7809 ioffd = ioffd + mbloc
7810 GO TO 70
7811 END IF
7812*
7813 tmp1 = m1 - ioffd + iia - 1
7814 IF( lower .AND. tmp1.GT.0 )
7815 $ CALL pb_dlascal( 'All', tmp1, nbloc, 0, alpha,
7816 $ a( ioffd+1+joffa*lda ), lda )
7817*
7818 tmp1 = min( ioffa, iimax ) - iia + 1
7819 m1 = m1 - tmp1
7820 n1 = n1 - nbloc
7821 lcmt00 = lcmt00 + qnb
7822 nblks = nblks - 1
7823 joffa = joffa + nbloc
7824*
7825 IF( upper .AND. tmp1.GT.0 .AND. n1.GT.0 )
7826 $ CALL pb_dlascal( 'All', tmp1, n1, 0, alpha,
7827 $ a( iia+joffa*lda ), lda )
7828*
7829 iia = ioffa + 1
7830 jja = joffa + 1
7831*
7832 GO TO 50
7833*
7834 END IF
7835*
7836 END IF
7837*
7838 RETURN
7839*
7840* End of PDLASCAL
7841*
7842 END
7843 SUBROUTINE pdlagen( INPLACE, AFORM, DIAG, OFFA, M, N, IA, JA,
7844 $ DESCA, IASEED, A, LDA )
7845*
7846* -- PBLAS test routine (version 2.0) --
7847* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7848* and University of California, Berkeley.
7849* April 1, 1998
7850*
7851* .. Scalar Arguments ..
7852 LOGICAL inplace
7853 CHARACTER*1 aform, diag
7854 INTEGER ia, iaseed, ja, lda, m, n, offa
7855* ..
7856* .. Array Arguments ..
7857 INTEGER desca( * )
7858 DOUBLE PRECISION A( LDA, * )
7859* ..
7860*
7861* Purpose
7862* =======
7863*
7864* PDLAGEN generates (or regenerates) a submatrix sub( A ) denoting
7865* A(IA:IA+M-1,JA:JA+N-1).
7866*
7867* Notes
7868* =====
7869*
7870* A description vector is associated with each 2D block-cyclicly dis-
7871* tributed matrix. This vector stores the information required to
7872* establish the mapping between a matrix entry and its corresponding
7873* process and memory location.
7874*
7875* In the following comments, the character _ should be read as
7876* "of the distributed matrix". Let A be a generic term for any 2D
7877* block cyclicly distributed matrix. Its description vector is DESCA:
7878*
7879* NOTATION STORED IN EXPLANATION
7880* ---------------- --------------- ------------------------------------
7881* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
7882* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
7883* the NPROW x NPCOL BLACS process grid
7884* A is distributed over. The context
7885* itself is global, but the handle
7886* (the integer value) may vary.
7887* M_A (global) DESCA( M_ ) The number of rows in the distribu-
7888* ted matrix A, M_A >= 0.
7889* N_A (global) DESCA( N_ ) The number of columns in the distri-
7890* buted matrix A, N_A >= 0.
7891* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
7892* block of the matrix A, IMB_A > 0.
7893* INB_A (global) DESCA( INB_ ) The number of columns of the upper
7894* left block of the matrix A,
7895* INB_A > 0.
7896* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
7897* bute the last M_A-IMB_A rows of A,
7898* MB_A > 0.
7899* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
7900* bute the last N_A-INB_A columns of
7901* A, NB_A > 0.
7902* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
7903* row of the matrix A is distributed,
7904* NPROW > RSRC_A >= 0.
7905* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
7906* first column of A is distributed.
7907* NPCOL > CSRC_A >= 0.
7908* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
7909* array storing the local blocks of
7910* the distributed matrix A,
7911* IF( Lc( 1, N_A ) > 0 )
7912* LLD_A >= MAX( 1, Lr( 1, M_A ) )
7913* ELSE
7914* LLD_A >= 1.
7915*
7916* Let K be the number of rows of a matrix A starting at the global in-
7917* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
7918* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
7919* receive if these K rows were distributed over NPROW processes. If K
7920* is the number of columns of a matrix A starting at the global index
7921* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
7922* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
7923* these K columns were distributed over NPCOL processes.
7924*
7925* The values of Lr() and Lc() may be determined via a call to the func-
7926* tion PB_NUMROC:
7927* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
7928* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
7929*
7930* Arguments
7931* =========
7932*
7933* INPLACE (global input) LOGICAL
7934* On entry, INPLACE specifies if the matrix should be generated
7935* in place or not. If INPLACE is .TRUE., the local random array
7936* to be generated will start in memory at the local memory lo-
7937* cation A( 1, 1 ), otherwise it will start at the local posi-
7938* tion induced by IA and JA.
7939*
7940* AFORM (global input) CHARACTER*1
7941* On entry, AFORM specifies the type of submatrix to be genera-
7942* ted as follows:
7943* AFORM = 'S', sub( A ) is a symmetric matrix,
7944* AFORM = 'H', sub( A ) is a Hermitian matrix,
7945* AFORM = 'T', sub( A ) is overrwritten with the transpose
7946* of what would normally be generated,
7947* AFORM = 'C', sub( A ) is overwritten with the conjugate
7948* transpose of what would normally be genera-
7949* ted.
7950* AFORM = 'N', a random submatrix is generated.
7951*
7952* DIAG (global input) CHARACTER*1
7953* On entry, DIAG specifies if the generated submatrix is diago-
7954* nally dominant or not as follows:
7955* DIAG = 'D' : sub( A ) is diagonally dominant,
7956* DIAG = 'N' : sub( A ) is not diagonally dominant.
7957*
7958* OFFA (global input) INTEGER
7959* On entry, OFFA specifies the offdiagonal of the underlying
7960* matrix A(1:DESCA(M_),1:DESCA(N_)) of interest when the subma-
7961* trix is symmetric, Hermitian or diagonally dominant. OFFA = 0
7962* specifies the main diagonal, OFFA > 0 specifies a subdiago-
7963* nal, and OFFA < 0 specifies a superdiagonal (see further de-
7964* tails).
7965*
7966* M (global input) INTEGER
7967* On entry, M specifies the global number of matrix rows of the
7968* submatrix sub( A ) to be generated. M must be at least zero.
7969*
7970* N (global input) INTEGER
7971* On entry, N specifies the global number of matrix columns of
7972* the submatrix sub( A ) to be generated. N must be at least
7973* zero.
7974*
7975* IA (global input) INTEGER
7976* On entry, IA specifies A's global row index, which points to
7977* the beginning of the submatrix sub( A ).
7978*
7979* JA (global input) INTEGER
7980* On entry, JA specifies A's global column index, which points
7981* to the beginning of the submatrix sub( A ).
7982*
7983* DESCA (global and local input) INTEGER array
7984* On entry, DESCA is an integer array of dimension DLEN_. This
7985* is the array descriptor for the matrix A.
7986*
7987* IASEED (global input) INTEGER
7988* On entry, IASEED specifies the seed number to generate the
7989* matrix A. IASEED must be at least zero.
7990*
7991* A (local output) DOUBLE PRECISION array
7992* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
7993* at least Lc( 1, JA+N-1 ). On exit, this array contains the
7994* local entries of the randomly generated submatrix sub( A ).
7995*
7996* LDA (local input) INTEGER
7997* On entry, LDA specifies the local leading dimension of the
7998* array A. When INPLACE is .FALSE., LDA is usually DESCA(LLD_).
7999* This restriction is however not enforced, and this subroutine
8000* requires only that LDA >= MAX( 1, Mp ) where
8001*
8002* Mp = PB_NUMROC( M, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ).
8003*
8004* PB_NUMROC is a ScaLAPACK tool function; MYROW, MYCOL, NPROW
8005* and NPCOL can be determined by calling the BLACS subroutine
8006* BLACS_GRIDINFO.
8007*
8008* Further Details
8009* ===============
8010*
8011* OFFD is tied to the matrix described by DESCA, as opposed to the
8012* piece that is currently (re)generated. This is a global information
8013* independent from the distribution parameters. Below are examples of
8014* the meaning of OFFD for a global 7 by 5 matrix:
8015*
8016* ---------------------------------------------------------------------
8017* OFFD | 0 -1 -2 -3 -4 0 -1 -2 -3 -4 0 -1 -2 -3 -4
8018* -------|-------------------------------------------------------------
8019* | | OFFD=-1 | OFFD=0 OFFD=2
8020* | V V
8021* 0 | . d . . . -> d . . . . . . . . .
8022* 1 | . . d . . . d . . . . . . . .
8023* 2 | . . . d . . . d . . -> d . . . .
8024* 3 | . . . . d . . . d . . d . . .
8025* 4 | . . . . . . . . . d . . d . .
8026* 5 | . . . . . . . . . . . . . d .
8027* 6 | . . . . . . . . . . . . . . d
8028* ---------------------------------------------------------------------
8029*
8030* -- Written on April 1, 1998 by
8031* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
8032*
8033* =====================================================================
8034*
8035* .. Parameters ..
8036 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
8037 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
8038 $ RSRC_
8039 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
8040 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
8041 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
8042 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
8043 INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN,
8044 $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB,
8045 $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW
8046 PARAMETER ( JMP_1 = 1, jmp_row = 2, jmp_col = 3,
8047 $ jmp_mb = 4, jmp_imbv = 5, jmp_npmb = 6,
8048 $ jmp_npimbloc = 7, jmp_nb = 8, jmp_inbv = 9,
8049 $ jmp_nqnb = 10, jmp_nqinbloc = 11,
8050 $ jmp_len = 11 )
8051* ..
8052* .. Local Scalars ..
8053 LOGICAL DIAGDO, SYMM, HERM, NOTRAN
8054 INTEGER CSRC, I, IACOL, IAROW, ICTXT, IIA, ILOCBLK,
8055 $ ILOCOFF, ILOW, IMB, IMB1, IMBLOC, IMBVIR, INB,
8056 $ inb1, inbloc, inbvir, info, ioffda, itmp, iupp,
8057 $ ivir, jja, jlocblk, jlocoff, jvir, lcmt00,
8058 $ lmbloc, lnbloc, low, maxmn, mb, mblks, mp,
8059 $ mrcol, mrrow, mycdist, mycol, myrdist, myrow,
8060 $ nb, nblks, npcol, nprow, nq, nvir, rsrc, upp
8061 DOUBLE PRECISION ALPHA
8062* ..
8063* .. Local Arrays ..
8064 INTEGER DESCA2( DLEN_ ), IMULADD( 4, JMP_LEN ),
8065 $ IRAN( 2 ), JMP( JMP_LEN ), MULADD0( 4 )
8066* ..
8067* .. External Subroutines ..
8068 EXTERNAL blacs_gridinfo, pb_ainfog2l, pb_binfo,
8072* ..
8073* .. External Functions ..
8074 LOGICAL LSAME
8075 EXTERNAL LSAME
8076* ..
8077* .. Intrinsic Functions ..
8078 INTRINSIC DBLE, MAX, MIN
8079* ..
8080* .. Data Statements ..
8081 DATA ( muladd0( i ), i = 1, 4 ) / 20077, 16838,
8082 $ 12345, 0 /
8083* ..
8084* .. Executable Statements ..
8085*
8086* Convert descriptor
8087*
8088 CALL pb_desctrans( desca, desca2 )
8089*
8090* Test the input arguments
8091*
8092 ictxt = desca2( ctxt_ )
8093 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
8094*
8095* Test the input parameters
8096*
8097 info = 0
8098 IF( nprow.EQ.-1 ) THEN
8099 info = -( 1000 + ctxt_ )
8100 ELSE
8101 symm = lsame( aform, 'S' )
8102 herm = lsame( aform, 'H' )
8103 notran = lsame( aform, 'N' )
8104 diagdo = lsame( diag, 'D' )
8105 IF( .NOT.( symm.OR.herm.OR.notran ) .AND.
8106 $ .NOT.( lsame( aform, 'T' ) ) .AND.
8107 $ .NOT.( lsame( aform, 'C' ) ) ) THEN
8108 info = -2
8109 ELSE IF( ( .NOT.diagdo ) .AND.
8110 $ ( .NOT.lsame( diag, 'N' ) ) ) THEN
8111 info = -3
8112 END IF
8113 CALL pb_chkmat( ictxt, m, 5, n, 6, ia, ja, desca2, 10, info )
8114 END IF
8115*
8116 IF( info.NE.0 ) THEN
8117 CALL pxerbla( ictxt, 'PDLAGEN', -info )
8118 RETURN
8119 END IF
8120*
8121* Quick return if possible
8122*
8123 IF( ( m.LE.0 ).OR.( n.LE.0 ) )
8124 $ RETURN
8125*
8126* Start the operations
8127*
8128 mb = desca2( mb_ )
8129 nb = desca2( nb_ )
8130 imb = desca2( imb_ )
8131 inb = desca2( inb_ )
8132 rsrc = desca2( rsrc_ )
8133 csrc = desca2( csrc_ )
8134*
8135* Figure out local information about the distributed matrix operand
8136*
8137 CALL pb_ainfog2l( m, n, ia, ja, desca2, nprow, npcol, myrow,
8138 $ mycol, imb1, inb1, mp, nq, iia, jja, iarow,
8139 $ iacol, mrrow, mrcol )
8140*
8141* Decide where the entries shall be stored in memory
8142*
8143 IF( inplace ) THEN
8144 iia = 1
8145 jja = 1
8146 END IF
8147*
8148* Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC,
8149* ILOW, LOW, IUPP, and UPP.
8150*
8151 ioffda = ja + offa - ia
8152 CALL pb_binfo( ioffda, mp, nq, imb1, inb1, mb, nb, mrrow,
8153 $ mrcol, lcmt00, mblks, nblks, imbloc, inbloc,
8154 $ lmbloc, lnbloc, ilow, low, iupp, upp )
8155*
8156* Initialize ILOCBLK, ILOCOFF, MYRDIST, JLOCBLK, JLOCOFF, MYCDIST
8157* This values correspond to the square virtual underlying matrix
8158* of size MAX( M_ + MAX( 0, -OFFA ), N_ + MAX( 0, OFFA ) ) used
8159* to set up the random sequence. For practical purposes, the size
8160* of this virtual matrix is upper bounded by M_ + N_ - 1.
8161*
8162 itmp = max( 0, -offa )
8163 ivir = ia + itmp
8164 imbvir = imb + itmp
8165 nvir = desca2( m_ ) + itmp
8166*
8167 CALL pb_locinfo( ivir, imbvir, mb, myrow, rsrc, nprow, ilocblk,
8168 $ ilocoff, myrdist )
8169*
8170 itmp = max( 0, offa )
8171 jvir = ja + itmp
8172 inbvir = inb + itmp
8173 nvir = max( max( nvir, desca2( n_ ) + itmp ),
8174 $ desca2( m_ ) + desca2( n_ ) - 1 )
8175*
8176 CALL pb_locinfo( jvir, inbvir, nb, mycol, csrc, npcol, jlocblk,
8177 $ jlocoff, mycdist )
8178*
8179 IF( symm .OR. herm .OR. notran ) THEN
8180*
8181 CALL pb_initjmp( .true., nvir, imbvir, inbvir, imbloc, inbloc,
8182 $ mb, nb, rsrc, csrc, nprow, npcol, 1, jmp )
8183*
8184* Compute constants to jump JMP( * ) numbers in the sequence
8185*
8186 CALL pb_initmuladd( muladd0, jmp, imuladd )
8187*
8188* Compute and set the random value corresponding to A( IA, JA )
8189*
8190 CALL pb_setlocran( iaseed, ilocblk, jlocblk, ilocoff, jlocoff,
8191 $ myrdist, mycdist, nprow, npcol, jmp,
8192 $ imuladd, iran )
8193*
8194 CALL pb_dlagen( 'Lower', aform, a( iia, jja ), lda, lcmt00,
8195 $ iran, mblks, imbloc, mb, lmbloc, nblks, inbloc,
8196 $ nb, lnbloc, jmp, imuladd )
8197*
8198 END IF
8199*
8200 IF( symm .OR. herm .OR. ( .NOT. notran ) ) THEN
8201*
8202 CALL pb_initjmp( .false., nvir, imbvir, inbvir, imbloc, inbloc,
8203 $ mb, nb, rsrc, csrc, nprow, npcol, 1, jmp )
8204*
8205* Compute constants to jump JMP( * ) numbers in the sequence
8206*
8207 CALL pb_initmuladd( muladd0, jmp, imuladd )
8208*
8209* Compute and set the random value corresponding to A( IA, JA )
8210*
8211 CALL pb_setlocran( iaseed, ilocblk, jlocblk, ilocoff, jlocoff,
8212 $ myrdist, mycdist, nprow, npcol, jmp,
8213 $ imuladd, iran )
8214*
8215 CALL pb_dlagen( 'Upper', aform, a( iia, jja ), lda, lcmt00,
8216 $ iran, mblks, imbloc, mb, lmbloc, nblks, inbloc,
8217 $ nb, lnbloc, jmp, imuladd )
8218*
8219 END IF
8220*
8221 IF( diagdo ) THEN
8222*
8223 maxmn = max( desca2( m_ ), desca2( n_ ) )
8224 alpha = dble( maxmn )
8225*
8226 IF( ioffda.GE.0 ) THEN
8227 CALL pdladom( inplace, min( max( 0, m-ioffda ), n ), alpha,
8228 $ a, min( ia+ioffda, ia+m-1 ), ja, desca )
8229 ELSE
8230 CALL pdladom( inplace, min( m, max( 0, n+ioffda ) ), alpha,
8231 $ a, ia, min( ja-ioffda, ja+n-1 ), desca )
8232 END IF
8233*
8234 END IF
8235*
8236 RETURN
8237*
8238* End of PDLAGEN
8239*
8240 END
8241 SUBROUTINE pdladom( INPLACE, N, ALPHA, A, IA, JA, DESCA )
8242*
8243* -- PBLAS test routine (version 2.0) --
8244* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8245* and University of California, Berkeley.
8246* April 1, 1998
8247*
8248* .. Scalar Arguments ..
8249 LOGICAL INPLACE
8250 INTEGER IA, JA, N
8251 DOUBLE PRECISION ALPHA
8252* ..
8253* .. Array Arguments ..
8254 INTEGER DESCA( * )
8255 DOUBLE PRECISION A( * )
8256* ..
8257*
8258* Purpose
8259* =======
8260*
8261* PDLADOM adds alpha to the diagonal entries of an n by n submatrix
8262* sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ).
8263*
8264* Notes
8265* =====
8266*
8267* A description vector is associated with each 2D block-cyclicly dis-
8268* tributed matrix. This vector stores the information required to
8269* establish the mapping between a matrix entry and its corresponding
8270* process and memory location.
8271*
8272* In the following comments, the character _ should be read as
8273* "of the distributed matrix". Let A be a generic term for any 2D
8274* block cyclicly distributed matrix. Its description vector is DESCA:
8275*
8276* NOTATION STORED IN EXPLANATION
8277* ---------------- --------------- ------------------------------------
8278* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
8279* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
8280* the NPROW x NPCOL BLACS process grid
8281* A is distributed over. The context
8282* itself is global, but the handle
8283* (the integer value) may vary.
8284* M_A (global) DESCA( M_ ) The number of rows in the distribu-
8285* ted matrix A, M_A >= 0.
8286* N_A (global) DESCA( N_ ) The number of columns in the distri-
8287* buted matrix A, N_A >= 0.
8288* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
8289* block of the matrix A, IMB_A > 0.
8290* INB_A (global) DESCA( INB_ ) The number of columns of the upper
8291* left block of the matrix A,
8292* INB_A > 0.
8293* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
8294* bute the last M_A-IMB_A rows of A,
8295* MB_A > 0.
8296* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
8297* bute the last N_A-INB_A columns of
8298* A, NB_A > 0.
8299* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
8300* row of the matrix A is distributed,
8301* NPROW > RSRC_A >= 0.
8302* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
8303* first column of A is distributed.
8304* NPCOL > CSRC_A >= 0.
8305* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
8306* array storing the local blocks of
8307* the distributed matrix A,
8308* IF( Lc( 1, N_A ) > 0 )
8309* LLD_A >= MAX( 1, Lr( 1, M_A ) )
8310* ELSE
8311* LLD_A >= 1.
8312*
8313* Let K be the number of rows of a matrix A starting at the global in-
8314* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
8315* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
8316* receive if these K rows were distributed over NPROW processes. If K
8317* is the number of columns of a matrix A starting at the global index
8318* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
8319* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
8320* these K columns were distributed over NPCOL processes.
8321*
8322* The values of Lr() and Lc() may be determined via a call to the func-
8323* tion PB_NUMROC:
8324* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
8325* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
8326*
8327* Arguments
8328* =========
8329*
8330* INPLACE (global input) LOGICAL
8331* On entry, INPLACE specifies if the matrix should be generated
8332* in place or not. If INPLACE is .TRUE., the local random array
8333* to be generated will start in memory at the local memory lo-
8334* cation A( 1, 1 ), otherwise it will start at the local posi-
8335* tion induced by IA and JA.
8336*
8337* N (global input) INTEGER
8338* On entry, N specifies the global order of the submatrix
8339* sub( A ) to be modified. N must be at least zero.
8340*
8341* ALPHA (global input) DOUBLE PRECISION
8342* On entry, ALPHA specifies the scalar alpha.
8343*
8344* A (local input/local output) DOUBLE PRECISION array
8345* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
8346* at least Lc( 1, JA+N-1 ). Before entry, this array contains
8347* the local entries of the matrix A. On exit, the local entries
8348* of this array corresponding to the main diagonal of sub( A )
8349* have been updated.
8350*
8351* IA (global input) INTEGER
8352* On entry, IA specifies A's global row index, which points to
8353* the beginning of the submatrix sub( A ).
8354*
8355* JA (global input) INTEGER
8356* On entry, JA specifies A's global column index, which points
8357* to the beginning of the submatrix sub( A ).
8358*
8359* DESCA (global and local input) INTEGER array
8360* On entry, DESCA is an integer array of dimension DLEN_. This
8361* is the array descriptor for the matrix A.
8362*
8363* -- Written on April 1, 1998 by
8364* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
8365*
8366* =====================================================================
8367*
8368* .. Parameters ..
8369 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
8370 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
8371 $ RSRC_
8372 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
8373 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
8374 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
8375 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
8376* ..
8377* .. Local Scalars ..
8378 LOGICAL GODOWN, GOLEFT
8379 INTEGER I, IACOL, IAROW, ICTXT, IIA, IJOFFA, ILOW,
8380 $ IMB1, IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP,
8381 $ JJA, JOFFA, JOFFD, LCMT, LCMT00, LDA, LDAP1,
8382 $ LMBLOC, LNBLOC, LOW, MB, MBLKD, MBLKS, MBLOC,
8383 $ MRCOL, MRROW, MYCOL, MYROW, NB, NBLKD, NBLKS,
8384 $ NBLOC, NP, NPCOL, NPROW, NQ, PMB, QNB, UPP
8385 DOUBLE PRECISION ATMP
8386* ..
8387* .. Local Scalars ..
8388 INTEGER DESCA2( DLEN_ )
8389* ..
8390* .. External Subroutines ..
8391 EXTERNAL blacs_gridinfo, pb_ainfog2l, pb_binfo,
8392 $ pb_desctrans
8393* ..
8394* .. Intrinsic Functions ..
8395 INTRINSIC abs, max, min
8396* ..
8397* .. Executable Statements ..
8398*
8399* Convert descriptor
8400*
8401 CALL pb_desctrans( desca, desca2 )
8402*
8403* Get grid parameters
8404*
8405 ictxt = desca2( ctxt_ )
8406 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
8407*
8408 IF( n.EQ.0 )
8409 $ RETURN
8410*
8411 CALL pb_ainfog2l( n, n, ia, ja, desca2, nprow, npcol, myrow,
8412 $ mycol, imb1, inb1, np, nq, iia, jja, iarow,
8413 $ iacol, mrrow, mrcol )
8414*
8415* Decide where the entries shall be stored in memory
8416*
8417 IF( inplace ) THEN
8418 iia = 1
8419 jja = 1
8420 END IF
8421*
8422* Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC,
8423* ILOW, LOW, IUPP, and UPP.
8424*
8425 mb = desca2( mb_ )
8426 nb = desca2( nb_ )
8427*
8428 CALL pb_binfo( 0, np, nq, imb1, inb1, mb, nb, mrrow, mrcol,
8429 $ lcmt00, mblks, nblks, imbloc, inbloc, lmbloc,
8430 $ lnbloc, ilow, low, iupp, upp )
8431*
8432 ioffa = iia - 1
8433 joffa = jja - 1
8434 lda = desca2( lld_ )
8435 ldap1 = lda + 1
8436*
8437 IF( desca2( rsrc_ ).LT.0 ) THEN
8438 pmb = mb
8439 ELSE
8440 pmb = nprow * mb
8441 END IF
8442 IF( desca2( csrc_ ).LT.0 ) THEN
8443 qnb = nb
8444 ELSE
8445 qnb = npcol * nb
8446 END IF
8447*
8448* Handle the first block of rows or columns separately, and update
8449* LCMT00, MBLKS and NBLKS.
8450*
8451 godown = ( lcmt00.GT.iupp )
8452 goleft = ( lcmt00.LT.ilow )
8453*
8454 IF( .NOT.godown .AND. .NOT.goleft ) THEN
8455*
8456* LCMT00 >= ILOW && LCMT00 <= IUPP
8457*
8458 IF( lcmt00.GE.0 ) THEN
8459 ijoffa = ioffa+lcmt00 + ( joffa - 1 ) * lda
8460 DO 10 i = 1, min( inbloc, max( 0, imbloc - lcmt00 ) )
8461 atmp = a( ijoffa + i*ldap1 )
8462 a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8463 10 CONTINUE
8464 ELSE
8465 ijoffa = ioffa + ( joffa - lcmt00 - 1 ) * lda
8466 DO 20 i = 1, min( imbloc, max( 0, inbloc + lcmt00 ) )
8467 atmp = a( ijoffa + i*ldap1 )
8468 a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8469 20 CONTINUE
8470 END IF
8471 goleft = ( ( lcmt00 - ( iupp - upp + pmb ) ).LT.ilow )
8472 godown = .NOT.goleft
8473*
8474 END IF
8475*
8476 IF( godown ) THEN
8477*
8478 lcmt00 = lcmt00 - ( iupp - upp + pmb )
8479 mblks = mblks - 1
8480 ioffa = ioffa + imbloc
8481*
8482 30 CONTINUE
8483 IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
8484 lcmt00 = lcmt00 - pmb
8485 mblks = mblks - 1
8486 ioffa = ioffa + mb
8487 GO TO 30
8488 END IF
8489*
8490 lcmt = lcmt00
8491 mblkd = mblks
8492 ioffd = ioffa
8493*
8494 mbloc = mb
8495 40 CONTINUE
8496 IF( mblkd.GT.0 .AND. lcmt.GE.ilow ) THEN
8497 IF( mblkd.EQ.1 )
8498 $ mbloc = lmbloc
8499 IF( lcmt.GE.0 ) THEN
8500 ijoffa = ioffd + lcmt + ( joffa - 1 ) * lda
8501 DO 50 i = 1, min( inbloc, max( 0, mbloc - lcmt ) )
8502 atmp = a( ijoffa + i*ldap1 )
8503 a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8504 50 CONTINUE
8505 ELSE
8506 ijoffa = ioffd + ( joffa - lcmt - 1 ) * lda
8507 DO 60 i = 1, min( mbloc, max( 0, inbloc + lcmt ) )
8508 atmp = a( ijoffa + i*ldap1 )
8509 a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8510 60 CONTINUE
8511 END IF
8512 lcmt00 = lcmt
8513 lcmt = lcmt - pmb
8514 mblks = mblkd
8515 mblkd = mblkd - 1
8516 ioffa = ioffd
8517 ioffd = ioffd + mbloc
8518 GO TO 40
8519 END IF
8520*
8521 lcmt00 = lcmt00 + low - ilow + qnb
8522 nblks = nblks - 1
8523 joffa = joffa + inbloc
8524*
8525 ELSE IF( goleft ) THEN
8526*
8527 lcmt00 = lcmt00 + low - ilow + qnb
8528 nblks = nblks - 1
8529 joffa = joffa + inbloc
8530*
8531 70 CONTINUE
8532 IF( nblks.GT.0 .AND. lcmt00.LT.low ) THEN
8533 lcmt00 = lcmt00 + qnb
8534 nblks = nblks - 1
8535 joffa = joffa + nb
8536 GO TO 70
8537 END IF
8538*
8539 lcmt = lcmt00
8540 nblkd = nblks
8541 joffd = joffa
8542*
8543 nbloc = nb
8544 80 CONTINUE
8545 IF( nblkd.GT.0 .AND. lcmt.LE.iupp ) THEN
8546 IF( nblkd.EQ.1 )
8547 $ nbloc = lnbloc
8548 IF( lcmt.GE.0 ) THEN
8549 ijoffa = ioffa + lcmt + ( joffd - 1 ) * lda
8550 DO 90 i = 1, min( nbloc, max( 0, imbloc - lcmt ) )
8551 atmp = a( ijoffa + i*ldap1 )
8552 a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8553 90 CONTINUE
8554 ELSE
8555 ijoffa = ioffa + ( joffd - lcmt - 1 ) * lda
8556 DO 100 i = 1, min( imbloc, max( 0, nbloc + lcmt ) )
8557 atmp = a( ijoffa + i*ldap1 )
8558 a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8559 100 CONTINUE
8560 END IF
8561 lcmt00 = lcmt
8562 lcmt = lcmt + qnb
8563 nblks = nblkd
8564 nblkd = nblkd - 1
8565 joffa = joffd
8566 joffd = joffd + nbloc
8567 GO TO 80
8568 END IF
8569*
8570 lcmt00 = lcmt00 - ( iupp - upp + pmb )
8571 mblks = mblks - 1
8572 ioffa = ioffa + imbloc
8573*
8574 END IF
8575*
8576 nbloc = nb
8577 110 CONTINUE
8578 IF( nblks.GT.0 ) THEN
8579 IF( nblks.EQ.1 )
8580 $ nbloc = lnbloc
8581 120 CONTINUE
8582 IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
8583 lcmt00 = lcmt00 - pmb
8584 mblks = mblks - 1
8585 ioffa = ioffa + mb
8586 GO TO 120
8587 END IF
8588*
8589 lcmt = lcmt00
8590 mblkd = mblks
8591 ioffd = ioffa
8592*
8593 mbloc = mb
8594 130 CONTINUE
8595 IF( mblkd.GT.0 .AND. lcmt.GE.low ) THEN
8596 IF( mblkd.EQ.1 )
8597 $ mbloc = lmbloc
8598 IF( lcmt.GE.0 ) THEN
8599 ijoffa = ioffd + lcmt + ( joffa - 1 ) * lda
8600 DO 140 i = 1, min( nbloc, max( 0, mbloc - lcmt ) )
8601 atmp = a( ijoffa + i*ldap1 )
8602 a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8603 140 CONTINUE
8604 ELSE
8605 ijoffa = ioffd + ( joffa - lcmt - 1 ) * lda
8606 DO 150 i = 1, min( mbloc, max( 0, nbloc + lcmt ) )
8607 atmp = a( ijoffa + i*ldap1 )
8608 a( ijoffa + i*ldap1 ) = abs( atmp ) + alpha
8609 150 CONTINUE
8610 END IF
8611 lcmt00 = lcmt
8612 lcmt = lcmt - pmb
8613 mblks = mblkd
8614 mblkd = mblkd - 1
8615 ioffa = ioffd
8616 ioffd = ioffd + mbloc
8617 GO TO 130
8618 END IF
8619*
8620 lcmt00 = lcmt00 + qnb
8621 nblks = nblks - 1
8622 joffa = joffa + nbloc
8623 GO TO 110
8624*
8625 END IF
8626*
8627 RETURN
8628*
8629* End of PDLADOM
8630*
8631 END
8632 SUBROUTINE pb_pdlaprnt( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT,
8633 $ CMATNM, NOUT, WORK )
8634*
8635* -- PBLAS test routine (version 2.0) --
8636* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8637* and University of California, Berkeley.
8638* April 1, 1998
8639*
8640* .. Scalar Arguments ..
8641 INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT
8642* ..
8643* .. Array Arguments ..
8644 CHARACTER*(*) CMATNM
8645 INTEGER DESCA( * )
8646 DOUBLE PRECISION A( * ), WORK( * )
8647* ..
8648*
8649* Purpose
8650* =======
8651*
8652* PB_PDLAPRNT prints to the standard output a submatrix sub( A ) deno-
8653* ting A(IA:IA+M-1,JA:JA+N-1). The local pieces are sent and printed by
8654* the process of coordinates (IRPRNT, ICPRNT).
8655*
8656* Notes
8657* =====
8658*
8659* A description vector is associated with each 2D block-cyclicly dis-
8660* tributed matrix. This vector stores the information required to
8661* establish the mapping between a matrix entry and its corresponding
8662* process and memory location.
8663*
8664* In the following comments, the character _ should be read as
8665* "of the distributed matrix". Let A be a generic term for any 2D
8666* block cyclicly distributed matrix. Its description vector is DESCA:
8667*
8668* NOTATION STORED IN EXPLANATION
8669* ---------------- --------------- ------------------------------------
8670* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
8671* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
8672* the NPROW x NPCOL BLACS process grid
8673* A is distributed over. The context
8674* itself is global, but the handle
8675* (the integer value) may vary.
8676* M_A (global) DESCA( M_ ) The number of rows in the distribu-
8677* ted matrix A, M_A >= 0.
8678* N_A (global) DESCA( N_ ) The number of columns in the distri-
8679* buted matrix A, N_A >= 0.
8680* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
8681* block of the matrix A, IMB_A > 0.
8682* INB_A (global) DESCA( INB_ ) The number of columns of the upper
8683* left block of the matrix A,
8684* INB_A > 0.
8685* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
8686* bute the last M_A-IMB_A rows of A,
8687* MB_A > 0.
8688* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
8689* bute the last N_A-INB_A columns of
8690* A, NB_A > 0.
8691* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
8692* row of the matrix A is distributed,
8693* NPROW > RSRC_A >= 0.
8694* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
8695* first column of A is distributed.
8696* NPCOL > CSRC_A >= 0.
8697* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
8698* array storing the local blocks of
8699* the distributed matrix A,
8700* IF( Lc( 1, N_A ) > 0 )
8701* LLD_A >= MAX( 1, Lr( 1, M_A ) )
8702* ELSE
8703* LLD_A >= 1.
8704*
8705* Let K be the number of rows of a matrix A starting at the global in-
8706* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
8707* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
8708* receive if these K rows were distributed over NPROW processes. If K
8709* is the number of columns of a matrix A starting at the global index
8710* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
8711* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
8712* these K columns were distributed over NPCOL processes.
8713*
8714* The values of Lr() and Lc() may be determined via a call to the func-
8715* tion PB_NUMROC:
8716* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
8717* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
8718*
8719* Arguments
8720* =========
8721*
8722* M (global input) INTEGER
8723* On entry, M specifies the number of rows of the submatrix
8724* sub( A ). M must be at least zero.
8725*
8726* N (global input) INTEGER
8727* On entry, N specifies the number of columns of the submatrix
8728* sub( A ). N must be at least zero.
8729*
8730* A (local input) DOUBLE PRECISION array
8731* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
8732* at least Lc( 1, JA+N-1 ). Before entry, this array contains
8733* the local entries of the matrix A.
8734*
8735* IA (global input) INTEGER
8736* On entry, IA specifies A's global row index, which points to
8737* the beginning of the submatrix sub( A ).
8738*
8739* JA (global input) INTEGER
8740* On entry, JA specifies A's global column index, which points
8741* to the beginning of the submatrix sub( A ).
8742*
8743* DESCA (global and local input) INTEGER array
8744* On entry, DESCA is an integer array of dimension DLEN_. This
8745* is the array descriptor for the matrix A.
8746*
8747* IRPRNT (global input) INTEGER
8748* On entry, IRPRNT specifies the row index of the printing pro-
8749* cess.
8750*
8751* ICPRNT (global input) INTEGER
8752* On entry, ICPRNT specifies the column index of the printing
8753* process.
8754*
8755* CMATNM (global input) CHARACTER*(*)
8756* On entry, CMATNM is the name of the matrix to be printed.
8757*
8758* NOUT (global input) INTEGER
8759* On entry, NOUT specifies the output unit number. When NOUT is
8760* equal to 6, the submatrix is printed on the screen.
8761*
8762* WORK (local workspace) DOUBLE PRECISION array
8763* On entry, WORK is a work array of dimension at least equal to
8764* MAX( IMB_A, MB_A ).
8765*
8766* -- Written on April 1, 1998 by
8767* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
8768*
8769* =====================================================================
8770*
8771* .. Parameters ..
8772 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
8773 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
8774 $ RSRC_
8775 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
8776 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
8777 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
8778 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
8779* ..
8780* .. Local Scalars ..
8781 INTEGER MYCOL, MYROW, NPCOL, NPROW, PCOL, PROW
8782* ..
8783* .. Local Arrays ..
8784 INTEGER DESCA2( DLEN_ )
8785* ..
8786* .. External Subroutines ..
8787 EXTERNAL BLACS_GRIDINFO, PB_DESCTRANS, PB_PDLAPRN2
8788* ..
8789* .. Executable Statements ..
8790*
8791* Quick return if possible
8792*
8793 IF( ( m.LE.0 ).OR.( n.LE.0 ) )
8794 $ RETURN
8795*
8796* Convert descriptor
8797*
8798 CALL pb_desctrans( desca, desca2 )
8799*
8800 CALL blacs_gridinfo( desca2( ctxt_ ), nprow, npcol, myrow, mycol )
8801*
8802 IF( desca2( rsrc_ ).GE.0 ) THEN
8803 IF( desca2( csrc_ ).GE.0 ) THEN
8804 CALL pb_pdlaprn2( m, n, a, ia, ja, desca2, irprnt, icprnt,
8805 $ cmatnm, nout, desca2( rsrc_ ),
8806 $ desca2( csrc_ ), work )
8807 ELSE
8808 DO 10 pcol = 0, npcol - 1
8809 IF( ( myrow.EQ.irprnt ).AND.( mycol.EQ.icprnt ) )
8810 $ WRITE( nout, * ) 'Colum-replicated array -- ' ,
8811 $ 'copy in process column: ', pcol
8812 CALL pb_pdlaprn2( m, n, a, ia, ja, desca2, irprnt,
8813 $ icprnt, cmatnm, nout, desca2( rsrc_ ),
8814 $ pcol, work )
8815 10 CONTINUE
8816 END IF
8817 ELSE
8818 IF( desca2( csrc_ ).GE.0 ) THEN
8819 DO 20 prow = 0, nprow - 1
8820 IF( ( myrow.EQ.irprnt ).AND.( mycol.EQ.icprnt ) )
8821 $ WRITE( nout, * ) 'Row-replicated array -- ' ,
8822 $ 'copy in process row: ', prow
8823 CALL pb_pdlaprn2( m, n, a, ia, ja, desca2, irprnt,
8824 $ icprnt, cmatnm, nout, prow,
8825 $ desca2( csrc_ ), work )
8826 20 CONTINUE
8827 ELSE
8828 DO 40 prow = 0, nprow - 1
8829 DO 30 pcol = 0, npcol - 1
8830 IF( ( myrow.EQ.irprnt ).AND.( mycol.EQ.icprnt ) )
8831 $ WRITE( nout, * ) 'Replicated array -- ' ,
8832 $ 'copy in process (', prow, ',', pcol, ')'
8833 CALL pb_pdlaprn2( m, n, a, ia, ja, desca2, irprnt,
8834 $ icprnt, cmatnm, nout, prow, pcol,
8835 $ work )
8836 30 CONTINUE
8837 40 CONTINUE
8838 END IF
8839 END IF
8840*
8841 RETURN
8842*
8843* End of PB_PDLAPRNT
8844*
8845 END
8846 SUBROUTINE pb_pdlaprn2( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT,
8847 $ CMATNM, NOUT, PROW, PCOL, WORK )
8848*
8849* -- PBLAS test routine (version 2.0) --
8850* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8851* and University of California, Berkeley.
8852* April 1, 1998
8853*
8854* .. Scalar Arguments ..
8855 INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT, PCOL, PROW
8856* ..
8857* .. Array Arguments ..
8858 CHARACTER*(*) CMATNM
8859 INTEGER DESCA( * )
8860 DOUBLE PRECISION A( * ), WORK( * )
8861* ..
8862*
8863* .. Parameters ..
8864 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
8865 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
8866 $ RSRC_
8867 parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
8868 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
8869 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
8870 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
8871* ..
8872* .. Local Scalars ..
8873 LOGICAL AISCOLREP, AISROWREP
8874 INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL,
8875 $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K,
8876 $ LDA, LDW, MYCOL, MYROW, NPCOL, NPROW
8877* ..
8878* .. External Subroutines ..
8879 EXTERNAL blacs_barrier, blacs_gridinfo, dgerv2d,
8880 $ dgesd2d, pb_infog2l
8881* ..
8882* .. Intrinsic Functions ..
8883 INTRINSIC min
8884* ..
8885* .. Executable Statements ..
8886*
8887* Get grid parameters
8888*
8889 ictxt = desca( ctxt_ )
8890 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
8891 CALL pb_infog2l( ia, ja, desca, nprow, npcol, myrow, mycol,
8892 $ iia, jja, iarow, iacol )
8893 ii = iia
8894 jj = jja
8895 IF( desca( rsrc_ ).LT.0 ) THEN
8896 aisrowrep = .true.
8897 iarow = prow
8898 icurrow = prow
8899 ELSE
8900 aisrowrep = .false.
8901 icurrow = iarow
8902 END IF
8903 IF( desca( csrc_ ).LT.0 ) THEN
8904 aiscolrep = .true.
8905 iacol = pcol
8906 icurcol = pcol
8907 ELSE
8908 aiscolrep = .false.
8909 icurcol = iacol
8910 END IF
8911 lda = desca( lld_ )
8912 ldw = max( desca( imb_ ), desca( mb_ ) )
8913*
8914* Handle the first block of column separately
8915*
8916 jb = desca( inb_ ) - ja + 1
8917 IF( jb.LE.0 )
8918 $ jb = ( (-jb) / desca( nb_ ) + 1 ) * desca( nb_ ) + jb
8919 jb = min( jb, n )
8920 jn = ja+jb-1
8921 DO 60 h = 0, jb-1
8922 ib = desca( imb_ ) - ia + 1
8923 IF( ib.LE.0 )
8924 $ ib = ( (-ib) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
8925 ib = min( ib, m )
8926 in = ia+ib-1
8927 IF( icurrow.EQ.irprnt .AND. icurcol.EQ.icprnt ) THEN
8928 IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
8929 DO 10 k = 0, ib-1
8930 WRITE( nout, fmt = 9999 )
8931 $ cmatnm, ia+k, ja+h, a( ii+k+(jj+h-1)*lda )
8932 10 CONTINUE
8933 END IF
8934 ELSE
8935 IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
8936 CALL dgesd2d( ictxt, ib, 1, a( ii+(jj+h-1)*lda ), lda,
8937 $ irprnt, icprnt )
8938 ELSE IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
8939 CALL dgerv2d( ictxt, ib, 1, work, ldw, icurrow, icurcol )
8940 DO 20 k = 1, ib
8941 WRITE( nout, fmt = 9999 )
8942 $ cmatnm, ia+k-1, ja+h, work( k )
8943 20 CONTINUE
8944 END IF
8945 END IF
8946 IF( myrow.EQ.icurrow )
8947 $ ii = ii + ib
8948 IF( .NOT.aisrowrep )
8949 $ icurrow = mod( icurrow+1, nprow )
8950 CALL blacs_barrier( ictxt, 'All' )
8951*
8952* Loop over remaining block of rows
8953*
8954 DO 50 i = in+1, ia+m-1, desca( mb_ )
8955 ib = min( desca( mb_ ), ia+m-i )
8956 IF( icurrow.EQ.irprnt .AND. icurcol.EQ.icprnt ) THEN
8957 IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
8958 DO 30 k = 0, ib-1
8959 WRITE( nout, fmt = 9999 )
8960 $ cmatnm, i+k, ja+h, a( ii+k+(jj+h-1)*lda )
8961 30 CONTINUE
8962 END IF
8963 ELSE
8964 IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
8965 CALL dgesd2d( ictxt, ib, 1, a( ii+(jj+h-1)*lda ),
8966 $ lda, irprnt, icprnt )
8967 ELSE IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
8968 CALL dgerv2d( ictxt, ib, 1, work, ldw, icurrow,
8969 $ icurcol )
8970 DO 40 k = 1, ib
8971 WRITE( nout, fmt = 9999 )
8972 $ cmatnm, i+k-1, ja+h, work( k )
8973 40 CONTINUE
8974 END IF
8975 END IF
8976 IF( myrow.EQ.icurrow )
8977 $ ii = ii + ib
8978 IF( .NOT.aisrowrep )
8979 $ icurrow = mod( icurrow+1, nprow )
8980 CALL blacs_barrier( ictxt, 'All' )
8981 50 CONTINUE
8982*
8983 ii = iia
8984 icurrow = iarow
8985 60 CONTINUE
8986*
8987 IF( mycol.EQ.icurcol )
8988 $ jj = jj + jb
8989 IF( .NOT.aiscolrep )
8990 $ icurcol = mod( icurcol+1, npcol )
8991 CALL blacs_barrier( ictxt, 'All' )
8992*
8993* Loop over remaining column blocks
8994*
8995 DO 130 j = jn+1, ja+n-1, desca( nb_ )
8996 jb = min( desca( nb_ ), ja+n-j )
8997 DO 120 h = 0, jb-1
8998 ib = desca( imb_ )-ia+1
8999 IF( ib.LE.0 )
9000 $ ib = ( (-ib) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
9001 ib = min( ib, m )
9002 in = ia+ib-1
9003 IF( icurrow.EQ.irprnt .AND. icurcol.EQ.icprnt ) THEN
9004 IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9005 DO 70 k = 0, ib-1
9006 WRITE( nout, fmt = 9999 )
9007 $ cmatnm, ia+k, j+h, a( ii+k+(jj+h-1)*lda )
9008 70 CONTINUE
9009 END IF
9010 ELSE
9011 IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
9012 CALL dgesd2d( ictxt, ib, 1, a( ii+(jj+h-1)*lda ),
9013 $ lda, irprnt, icprnt )
9014 ELSE IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9015 CALL dgerv2d( ictxt, ib, 1, work, ldw, icurrow,
9016 $ icurcol )
9017 DO 80 k = 1, ib
9018 WRITE( nout, fmt = 9999 )
9019 $ cmatnm, ia+k-1, j+h, work( k )
9020 80 CONTINUE
9021 END IF
9022 END IF
9023 IF( myrow.EQ.icurrow )
9024 $ ii = ii + ib
9025 icurrow = mod( icurrow+1, nprow )
9026 CALL blacs_barrier( ictxt, 'All' )
9027*
9028* Loop over remaining block of rows
9029*
9030 DO 110 i = in+1, ia+m-1, desca( mb_ )
9031 ib = min( desca( mb_ ), ia+m-i )
9032 IF( icurrow.EQ.irprnt .AND. icurcol.EQ.icprnt ) THEN
9033 IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9034 DO 90 k = 0, ib-1
9035 WRITE( nout, fmt = 9999 )
9036 $ cmatnm, i+k, j+h, a( ii+k+(jj+h-1)*lda )
9037 90 CONTINUE
9038 END IF
9039 ELSE
9040 IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
9041 CALL dgesd2d( ictxt, ib, 1, a( ii+(jj+h-1)*lda ),
9042 $ lda, irprnt, icprnt )
9043 ELSE IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9044 CALL dgerv2d( ictxt, ib, 1, work, ldw, icurrow,
9045 $ icurcol )
9046 DO 100 k = 1, ib
9047 WRITE( nout, fmt = 9999 )
9048 $ cmatnm, i+k-1, j+h, work( k )
9049 100 CONTINUE
9050 END IF
9051 END IF
9052 IF( myrow.EQ.icurrow )
9053 $ ii = ii + ib
9054 IF( .NOT.aisrowrep )
9055 $ icurrow = mod( icurrow+1, nprow )
9056 CALL blacs_barrier( ictxt, 'All' )
9057 110 CONTINUE
9058*
9059 ii = iia
9060 icurrow = iarow
9061 120 CONTINUE
9062*
9063 IF( mycol.EQ.icurcol )
9064 $ jj = jj + jb
9065 IF( .NOT.aiscolrep )
9066 $ icurcol = mod( icurcol+1, npcol )
9067 CALL blacs_barrier( ictxt, 'All' )
9068*
9069 130 CONTINUE
9070*
9071 9999 FORMAT( 1x, a, '(', i6, ',', i6, ')=', d30.18 )
9072*
9073 RETURN
9074*
9075* End of PB_PDLAPRN2
9076*
9077 END
9078 SUBROUTINE pb_dfillpad( ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL )
9079*
9080* -- PBLAS test routine (version 2.0) --
9081* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9082* and University of California, Berkeley.
9083* April 1, 1998
9084*
9085* .. Scalar Arguments ..
9086 INTEGER ICTXT, IPOST, IPRE, LDA, M, N
9087 DOUBLE PRECISION CHKVAL
9088* ..
9089* .. Array Arguments ..
9090 DOUBLE PRECISION A( * )
9091* ..
9092*
9093* Purpose
9094* =======
9095*
9096* PB_DFILLPAD surrounds a two dimensional local array with a guard-zone
9097* initialized to the value CHKVAL. The user may later call the routine
9098* PB_DCHEKPAD to discover if the guardzone has been violated. There are
9099* three guardzones. The first is a buffer of size IPRE that is before
9100* the start of the array. The second is the buffer of size IPOST which
9101* is after the end of the array to be padded. Finally, there is a guard
9102* zone inside every column of the array to be padded, in the elements
9103* of A(M+1:LDA, J).
9104*
9105* Arguments
9106* =========
9107*
9108* ICTXT (local input) INTEGER
9109* On entry, ICTXT specifies the BLACS context handle, indica-
9110* ting the global context of the operation. The context itself
9111* is global, but the value of ICTXT is local.
9112*
9113* M (local input) INTEGER
9114* On entry, M specifies the number of rows in the local array
9115* A. M must be at least zero.
9116*
9117* N (local input) INTEGER
9118* On entry, N specifies the number of columns in the local ar-
9119* ray A. N must be at least zero.
9120*
9121* A (local input/local output) DOUBLE PRECISION array
9122* On entry, A is an array of dimension (LDA,N). On exit, this
9123* array is the padded array.
9124*
9125* LDA (local input) INTEGER
9126* On entry, LDA specifies the leading dimension of the local
9127* array to be padded. LDA must be at least MAX( 1, M ).
9128*
9129* IPRE (local input) INTEGER
9130* On entry, IPRE specifies the size of the guard zone to put
9131* before the start of the padded array.
9132*
9133* IPOST (local input) INTEGER
9134* On entry, IPOST specifies the size of the guard zone to put
9135* after the end of the padded array.
9136*
9137* CHKVAL (local input) DOUBLE PRECISION
9138* On entry, CHKVAL specifies the value to pad the array with.
9139*
9140* -- Written on April 1, 1998 by
9141* R. Clint Whaley, University of Tennessee, Knoxville 37996, USA.
9142*
9143* =====================================================================
9144*
9145* .. Local Scalars ..
9146 INTEGER I, J, K
9147* ..
9148* .. Executable Statements ..
9149*
9150* Put check buffer in front of A
9151*
9152 IF( IPRE.GT.0 ) THEN
9153 DO 10 I = 1, ipre
9154 a( i ) = chkval
9155 10 CONTINUE
9156 ELSE
9157 WRITE( *, fmt = '(A)' )
9158 $ 'WARNING no pre-guardzone in PB_DFILLPAD'
9159 END IF
9160*
9161* Put check buffer in back of A
9162*
9163 IF( ipost.GT.0 ) THEN
9164 j = ipre+lda*n+1
9165 DO 20 i = j, j+ipost-1
9166 a( i ) = chkval
9167 20 CONTINUE
9168 ELSE
9169 WRITE( *, fmt = '(A)' )
9170 $ 'WARNING no post-guardzone in PB_DFILLPAD'
9171 END IF
9172*
9173* Put check buffer in all (LDA-M) gaps
9174*
9175 IF( lda.GT.m ) THEN
9176 k = ipre + m + 1
9177 DO 40 j = 1, n
9178 DO 30 i = k, k + ( lda - m ) - 1
9179 a( i ) = chkval
9180 30 CONTINUE
9181 k = k + lda
9182 40 CONTINUE
9183 END IF
9184*
9185 RETURN
9186*
9187* End of PB_DFILLPAD
9188*
9189 END
9190 SUBROUTINE pb_dchekpad( ICTXT, MESS, M, N, A, LDA, IPRE, IPOST,
9191 $ CHKVAL )
9192*
9193* -- PBLAS test routine (version 2.0) --
9194* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9195* and University of California, Berkeley.
9196* April 1, 1998
9197*
9198* .. Scalar Arguments ..
9199 INTEGER ICTXT, IPOST, IPRE, LDA, M, N
9200 DOUBLE PRECISION CHKVAL
9201* ..
9202* .. Array Arguments ..
9203 CHARACTER*(*) MESS
9204 DOUBLE PRECISION A( * )
9205* ..
9206*
9207* Purpose
9208* =======
9209*
9210* PB_DCHEKPAD checks that the padding around a local array has not been
9211* overwritten since the call to PB_DFILLPAD. Three types of errors are
9212* reported:
9213*
9214* 1) Overwrite in pre-guardzone. This indicates a memory overwrite has
9215* occurred in the first IPRE elements which form a buffer before the
9216* beginning of A. Therefore, the error message:
9217* 'Overwrite in pre-guardzone: loc( 5) = 18.00000'
9218* tells that the 5th element of the IPRE long buffer has been overwrit-
9219* ten with the value 18, where it should still have the value CHKVAL.
9220*
9221* 2) Overwrite in post-guardzone. This indicates a memory overwrite has
9222* occurred in the last IPOST elements which form a buffer after the end
9223* of A. Error reports are refered from the end of A. Therefore,
9224* 'Overwrite in post-guardzone: loc( 19) = 24.00000'
9225* tells that the 19th element after the end of A was overwritten with
9226* the value 24, where it should still have the value of CHKVAL.
9227*
9228* 3) Overwrite in lda-m gap. Tells you elements between M and LDA were
9229* overwritten. So,
9230* 'Overwrite in lda-m gap: A( 12, 3) = 22.00000'
9231* tells that the element at the 12th row and 3rd column of A was over-
9232* written with the value of 22, where it should still have the value of
9233* CHKVAL.
9234*
9235* Arguments
9236* =========
9237*
9238* ICTXT (local input) INTEGER
9239* On entry, ICTXT specifies the BLACS context handle, indica-
9240* ting the global context of the operation. The context itself
9241* is global, but the value of ICTXT is local.
9242*
9243* MESS (local input) CHARACTER*(*)
9244* On entry, MESS is a ttring containing a user-defined message.
9245*
9246* M (local input) INTEGER
9247* On entry, M specifies the number of rows in the local array
9248* A. M must be at least zero.
9249*
9250* N (local input) INTEGER
9251* On entry, N specifies the number of columns in the local ar-
9252* ray A. N must be at least zero.
9253*
9254* A (local input) DOUBLE PRECISION array
9255* On entry, A is an array of dimension (LDA,N).
9256*
9257* LDA (local input) INTEGER
9258* On entry, LDA specifies the leading dimension of the local
9259* array to be padded. LDA must be at least MAX( 1, M ).
9260*
9261* IPRE (local input) INTEGER
9262* On entry, IPRE specifies the size of the guard zone to put
9263* before the start of the padded array.
9264*
9265* IPOST (local input) INTEGER
9266* On entry, IPOST specifies the size of the guard zone to put
9267* after the end of the padded array.
9268*
9269* CHKVAL (local input) DOUBLE PRECISION
9270* On entry, CHKVAL specifies the value to pad the array with.
9271*
9272*
9273* -- Written on April 1, 1998 by
9274* R. Clint Whaley, University of Tennessee, Knoxville 37996, USA.
9275*
9276* =====================================================================
9277*
9278* .. Local Scalars ..
9279 CHARACTER*1 TOP
9280 INTEGER I, IAM, IDUMM, INFO, J, K, MYCOL, MYROW, NPCOL,
9281 $ NPROW
9282* ..
9283* .. External Subroutines ..
9284 EXTERNAL BLACS_GRIDINFO, IGAMX2D, PB_TOPGET
9285* ..
9286* .. Executable Statements ..
9287*
9288* Get grid parameters
9289*
9290 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
9291 IAM = myrow*npcol + mycol
9292 info = -1
9293*
9294* Check buffer in front of A
9295*
9296 IF( ipre.GT.0 ) THEN
9297 DO 10 i = 1, ipre
9298 IF( a( i ).NE.chkval ) THEN
9299 WRITE( *, fmt = 9998 ) myrow, mycol, mess, ' pre', i,
9300 $ a( i )
9301 info = iam
9302 END IF
9303 10 CONTINUE
9304 ELSE
9305 WRITE( *, fmt = * ) 'WARNING no pre-guardzone in PB_DCHEKPAD'
9306 END IF
9307*
9308* Check buffer after A
9309*
9310 IF( ipost.GT.0 ) THEN
9311 j = ipre+lda*n+1
9312 DO 20 i = j, j+ipost-1
9313 IF( a( i ).NE.chkval ) THEN
9314 WRITE( *, fmt = 9998 ) myrow, mycol, mess, 'post',
9315 $ i-j+1, a( i )
9316 info = iam
9317 END IF
9318 20 CONTINUE
9319 ELSE
9320 WRITE( *, fmt = * )
9321 $ 'WARNING no post-guardzone buffer in PB_DCHEKPAD'
9322 END IF
9323*
9324* Check all (LDA-M) gaps
9325*
9326 IF( lda.GT.m ) THEN
9327 k = ipre + m + 1
9328 DO 40 j = 1, n
9329 DO 30 i = k, k + (lda-m) - 1
9330 IF( a( i ).NE.chkval ) THEN
9331 WRITE( *, fmt = 9997 ) myrow, mycol, mess,
9332 $ i-ipre-lda*(j-1), j, a( i )
9333 info = iam
9334 END IF
9335 30 CONTINUE
9336 k = k + lda
9337 40 CONTINUE
9338 END IF
9339*
9340 CALL pb_topget( ictxt, 'Combine', 'All', top )
9341 CALL igamx2d( ictxt, 'All', top, 1, 1, info, 1, idumm, idumm, -1,
9342 $ 0, 0 )
9343 IF( iam.EQ.0 .AND. info.GE.0 ) THEN
9344 WRITE( *, fmt = 9999 ) info / npcol, mod( info, npcol ), mess
9345 END IF
9346*
9347 9999 FORMAT( '{', i5, ',', i5, '}: Memory overwrite in ', a )
9348 9998 FORMAT( '{', i5, ',', i5, '}: ', a, ' memory overwrite in ',
9349 $ a4, '-guardzone: loc(', i3, ') = ', g20.7 )
9350 9997 FORMAT( '{', i5, ',', i5, '}: ', a, ' memory overwrite in ',
9351 $ 'lda-m gap: loc(', i3, ',', i3, ') = ', g20.7 )
9352*
9353 RETURN
9354*
9355* End of PB_DCHEKPAD
9356*
9357 END
9358 SUBROUTINE pb_dlaset( UPLO, M, N, IOFFD, ALPHA, BETA, A, LDA )
9359*
9360* -- PBLAS test routine (version 2.0) --
9361* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9362* and University of California, Berkeley.
9363* April 1, 1998
9364*
9365* .. Scalar Arguments ..
9366 CHARACTER*1 UPLO
9367 INTEGER IOFFD, LDA, M, N
9368 DOUBLE PRECISION ALPHA, BETA
9369* ..
9370* .. Array Arguments ..
9371 DOUBLE PRECISION A( LDA, * )
9372* ..
9373*
9374* Purpose
9375* =======
9376*
9377* PB_DLASET initializes a two-dimensional array A to beta on the diago-
9378* nal specified by IOFFD and alpha on the offdiagonals.
9379*
9380* Arguments
9381* =========
9382*
9383* UPLO (global input) CHARACTER*1
9384* On entry, UPLO specifies which trapezoidal part of the ar-
9385* ray A is to be set as follows:
9386* = 'L' or 'l': Lower triangular part is set; the strictly
9387* upper triangular part of A is not changed,
9388* = 'U' or 'u': Upper triangular part is set; the strictly
9389* lower triangular part of A is not changed,
9390* = 'D' or 'd' Only the diagonal of A is set,
9391* Otherwise: All of the array A is set.
9392*
9393* M (input) INTEGER
9394* On entry, M specifies the number of rows of the array A. M
9395* must be at least zero.
9396*
9397* N (input) INTEGER
9398* On entry, N specifies the number of columns of the array A.
9399* N must be at least zero.
9400*
9401* IOFFD (input) INTEGER
9402* On entry, IOFFD specifies the position of the offdiagonal de-
9403* limiting the upper and lower trapezoidal part of A as follows
9404* (see the notes below):
9405*
9406* IOFFD = 0 specifies the main diagonal A( i, i ),
9407* with i = 1 ... MIN( M, N ),
9408* IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ),
9409* with i = 1 ... MIN( M-IOFFD, N ),
9410* IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ),
9411* with i = 1 ... MIN( M, N+IOFFD ).
9412*
9413* ALPHA (input) DOUBLE PRECISION
9414* On entry, ALPHA specifies the value to which the offdiagonal
9415* array elements are set to.
9416*
9417* BETA (input) DOUBLE PRECISION
9418* On entry, BETA specifies the value to which the diagonal ar-
9419* ray elements are set to.
9420*
9421* A (input/output) DOUBLE PRECISION array
9422* On entry, A is an array of dimension (LDA,N). Before entry
9423* with UPLO = 'U' or 'u', the leading m by n part of the array
9424* A must contain the upper trapezoidal part of the matrix as
9425* specified by IOFFD to be set, and the strictly lower trape-
9426* zoidal part of A is not referenced; When IUPLO = 'L' or 'l',
9427* the leading m by n part of the array A must contain the
9428* lower trapezoidal part of the matrix as specified by IOFFD to
9429* be set, and the strictly upper trapezoidal part of A is
9430* not referenced.
9431*
9432* LDA (input) INTEGER
9433* On entry, LDA specifies the leading dimension of the array A.
9434* LDA must be at least max( 1, M ).
9435*
9436* Notes
9437* =====
9438* N N
9439* ---------------------------- -----------
9440* | d | | |
9441* M | d 'U' | | 'U' |
9442* | 'L' 'D' | |d |
9443* | d | M | d |
9444* ---------------------------- | 'D' |
9445* | d |
9446* IOFFD < 0 | 'L' d |
9447* | d|
9448* N | |
9449* ----------- -----------
9450* | d 'U'|
9451* | d | IOFFD > 0
9452* M | 'D' |
9453* | d| N
9454* | 'L' | ----------------------------
9455* | | | 'U' |
9456* | | |d |
9457* | | | 'D' |
9458* | | | d |
9459* | | |'L' d |
9460* ----------- ----------------------------
9461*
9462* -- Written on April 1, 1998 by
9463* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
9464*
9465* =====================================================================
9466*
9467* .. Local Scalars ..
9468 INTEGER I, J, JTMP, MN
9469* ..
9470* .. External Functions ..
9471 LOGICAL LSAME
9472 EXTERNAL LSAME
9473* ..
9474* .. Intrinsic Functions ..
9475 INTRINSIC MAX, MIN
9476* ..
9477* .. Executable Statements ..
9478*
9479* Quick return if possible
9480*
9481 IF( M.LE.0 .OR. N.LE.0 )
9482 $ RETURN
9483*
9484* Start the operations
9485*
9486 IF( LSAME( UPLO, 'L' ) ) THEN
9487*
9488* Set the diagonal to BETA and the strictly lower triangular
9489* part of the array to ALPHA.
9490*
9491 mn = max( 0, -ioffd )
9492 DO 20 j = 1, min( mn, n )
9493 DO 10 i = 1, m
9494 a( i, j ) = alpha
9495 10 CONTINUE
9496 20 CONTINUE
9497 DO 40 j = mn + 1, min( m - ioffd, n )
9498 jtmp = j + ioffd
9499 a( jtmp, j ) = beta
9500 DO 30 i = jtmp + 1, m
9501 a( i, j ) = alpha
9502 30 CONTINUE
9503 40 CONTINUE
9504*
9505 ELSE IF( lsame( uplo, 'U' ) ) THEN
9506*
9507* Set the diagonal to BETA and the strictly upper triangular
9508* part of the array to ALPHA.
9509*
9510 mn = min( m - ioffd, n )
9511 DO 60 j = max( 0, -ioffd ) + 1, mn
9512 jtmp = j + ioffd
9513 DO 50 i = 1, jtmp - 1
9514 a( i, j ) = alpha
9515 50 CONTINUE
9516 a( jtmp, j ) = beta
9517 60 CONTINUE
9518 DO 80 j = max( 0, mn ) + 1, n
9519 DO 70 i = 1, m
9520 a( i, j ) = alpha
9521 70 CONTINUE
9522 80 CONTINUE
9523*
9524 ELSE IF( lsame( uplo, 'D' ) ) THEN
9525*
9526* Set the array to BETA on the diagonal.
9527*
9528 DO 90 j = max( 0, -ioffd ) + 1, min( m - ioffd, n )
9529 a( j + ioffd, j ) = beta
9530 90 CONTINUE
9531*
9532 ELSE
9533*
9534* Set the array to BETA on the diagonal and ALPHA on the
9535* offdiagonal.
9536*
9537 DO 110 j = 1, n
9538 DO 100 i = 1, m
9539 a( i, j ) = alpha
9540 100 CONTINUE
9541 110 CONTINUE
9542 IF( alpha.NE.beta .AND. ioffd.LT.m .AND. ioffd.GT.-n ) THEN
9543 DO 120 j = max( 0, -ioffd ) + 1, min( m - ioffd, n )
9544 a( j + ioffd, j ) = beta
9545 120 CONTINUE
9546 END IF
9547*
9548 END IF
9549*
9550 RETURN
9551*
9552* End of PB_DLASET
9553*
9554 END
9555 SUBROUTINE pb_dlascal( UPLO, M, N, IOFFD, ALPHA, A, LDA )
9556*
9557* -- PBLAS test routine (version 2.0) --
9558* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9559* and University of California, Berkeley.
9560* April 1, 1998
9561*
9562* .. Scalar Arguments ..
9563 CHARACTER*1 UPLO
9564 INTEGER IOFFD, LDA, M, N
9565 DOUBLE PRECISION ALPHA
9566* ..
9567* .. Array Arguments ..
9568 DOUBLE PRECISION A( LDA, * )
9569* ..
9570*
9571* Purpose
9572* =======
9573*
9574* PB_DLASCAL scales a two-dimensional array A by the scalar alpha.
9575*
9576* Arguments
9577* =========
9578*
9579* UPLO (input) CHARACTER*1
9580* On entry, UPLO specifies which trapezoidal part of the ar-
9581* ray A is to be scaled as follows:
9582* = 'L' or 'l': the lower trapezoid of A is scaled,
9583* = 'U' or 'u': the upper trapezoid of A is scaled,
9584* = 'D' or 'd': diagonal specified by IOFFD is scaled,
9585* Otherwise: all of the array A is scaled.
9586*
9587* M (input) INTEGER
9588* On entry, M specifies the number of rows of the array A. M
9589* must be at least zero.
9590*
9591* N (input) INTEGER
9592* On entry, N specifies the number of columns of the array A.
9593* N must be at least zero.
9594*
9595* IOFFD (input) INTEGER
9596* On entry, IOFFD specifies the position of the offdiagonal de-
9597* limiting the upper and lower trapezoidal part of A as follows
9598* (see the notes below):
9599*
9600* IOFFD = 0 specifies the main diagonal A( i, i ),
9601* with i = 1 ... MIN( M, N ),
9602* IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ),
9603* with i = 1 ... MIN( M-IOFFD, N ),
9604* IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ),
9605* with i = 1 ... MIN( M, N+IOFFD ).
9606*
9607* ALPHA (input) DOUBLE PRECISION
9608* On entry, ALPHA specifies the scalar alpha.
9609*
9610* A (input/output) DOUBLE PRECISION array
9611* On entry, A is an array of dimension (LDA,N). Before entry
9612* with UPLO = 'U' or 'u', the leading m by n part of the array
9613* A must contain the upper trapezoidal part of the matrix as
9614* specified by IOFFD to be scaled, and the strictly lower tra-
9615* pezoidal part of A is not referenced; When UPLO = 'L' or 'l',
9616* the leading m by n part of the array A must contain the lower
9617* trapezoidal part of the matrix as specified by IOFFD to be
9618* scaled, and the strictly upper trapezoidal part of A is not
9619* referenced. On exit, the entries of the trapezoid part of A
9620* determined by UPLO and IOFFD are scaled.
9621*
9622* LDA (input) INTEGER
9623* On entry, LDA specifies the leading dimension of the array A.
9624* LDA must be at least max( 1, M ).
9625*
9626* Notes
9627* =====
9628* N N
9629* ---------------------------- -----------
9630* | d | | |
9631* M | d 'U' | | 'U' |
9632* | 'L' 'D' | |d |
9633* | d | M | d |
9634* ---------------------------- | 'D' |
9635* | d |
9636* IOFFD < 0 | 'L' d |
9637* | d|
9638* N | |
9639* ----------- -----------
9640* | d 'U'|
9641* | d | IOFFD > 0
9642* M | 'D' |
9643* | d| N
9644* | 'L' | ----------------------------
9645* | | | 'U' |
9646* | | |d |
9647* | | | 'D' |
9648* | | | d |
9649* | | |'L' d |
9650* ----------- ----------------------------
9651*
9652* -- Written on April 1, 1998 by
9653* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
9654*
9655* =====================================================================
9656*
9657* .. Local Scalars ..
9658 INTEGER I, J, JTMP, MN
9659* ..
9660* .. External Functions ..
9661 LOGICAL LSAME
9662 EXTERNAL LSAME
9663* ..
9664* .. Intrinsic Functions ..
9665 INTRINSIC MAX, MIN
9666* ..
9667* .. Executable Statements ..
9668*
9669* Quick return if possible
9670*
9671 IF( M.LE.0 .OR. N.LE.0 )
9672 $ RETURN
9673*
9674* Start the operations
9675*
9676 IF( LSAME( UPLO, 'L' ) ) THEN
9677*
9678* Scales the lower triangular part of the array by ALPHA.
9679*
9680 MN = max( 0, -ioffd )
9681 DO 20 j = 1, min( mn, n )
9682 DO 10 i = 1, m
9683 a( i, j ) = alpha * a( i, j )
9684 10 CONTINUE
9685 20 CONTINUE
9686 DO 40 j = mn + 1, min( m - ioffd, n )
9687 DO 30 i = j + ioffd, m
9688 a( i, j ) = alpha * a( i, j )
9689 30 CONTINUE
9690 40 CONTINUE
9691*
9692 ELSE IF( lsame( uplo, 'U' ) ) THEN
9693*
9694* Scales the upper triangular part of the array by ALPHA.
9695*
9696 mn = min( m - ioffd, n )
9697 DO 60 j = max( 0, -ioffd ) + 1, mn
9698 DO 50 i = 1, j + ioffd
9699 a( i, j ) = alpha * a( i, j )
9700 50 CONTINUE
9701 60 CONTINUE
9702 DO 80 j = max( 0, mn ) + 1, n
9703 DO 70 i = 1, m
9704 a( i, j ) = alpha * a( i, j )
9705 70 CONTINUE
9706 80 CONTINUE
9707*
9708 ELSE IF( lsame( uplo, 'D' ) ) THEN
9709*
9710* Scales the diagonal entries by ALPHA.
9711*
9712 DO 90 j = max( 0, -ioffd ) + 1, min( m - ioffd, n )
9713 jtmp = j + ioffd
9714 a( jtmp, j ) = alpha * a( jtmp, j )
9715 90 CONTINUE
9716*
9717 ELSE
9718*
9719* Scales the entire array by ALPHA.
9720*
9721 DO 110 j = 1, n
9722 DO 100 i = 1, m
9723 a( i, j ) = alpha * a( i, j )
9724 100 CONTINUE
9725 110 CONTINUE
9726*
9727 END IF
9728*
9729 RETURN
9730*
9731* End of PB_DLASCAL
9732*
9733 END
9734 SUBROUTINE pb_dlagen( UPLO, AFORM, A, LDA, LCMT00, IRAN, MBLKS,
9735 $ IMBLOC, MB, LMBLOC, NBLKS, INBLOC, NB,
9736 $ LNBLOC, JMP, IMULADD )
9737*
9738* -- PBLAS test routine (version 2.0) --
9739* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9740* and University of California, Berkeley.
9741* April 1, 1998
9742*
9743* .. Scalar Arguments ..
9744 CHARACTER*1 UPLO, AFORM
9745 INTEGER IMBLOC, INBLOC, LCMT00, LDA, LMBLOC, LNBLOC,
9746 $ mb, mblks, nb, nblks
9747* ..
9748* .. Array Arguments ..
9749 INTEGER IMULADD( 4, * ), IRAN( * ), JMP( * )
9750 DOUBLE PRECISION A( LDA, * )
9751* ..
9752*
9753* Purpose
9754* =======
9755*
9756* PB_DLAGEN locally initializes an array A.
9757*
9758* Arguments
9759* =========
9760*
9761* UPLO (global input) CHARACTER*1
9762* On entry, UPLO specifies whether the lower (UPLO='L') trape-
9763* zoidal part or the upper (UPLO='U') trapezoidal part is to be
9764* generated when the matrix to be generated is symmetric or
9765* Hermitian. For all the other values of AFORM, the value of
9766* this input argument is ignored.
9767*
9768* AFORM (global input) CHARACTER*1
9769* On entry, AFORM specifies the type of submatrix to be genera-
9770* ted as follows:
9771* AFORM = 'S', sub( A ) is a symmetric matrix,
9772* AFORM = 'H', sub( A ) is a Hermitian matrix,
9773* AFORM = 'T', sub( A ) is overrwritten with the transpose
9774* of what would normally be generated,
9775* AFORM = 'C', sub( A ) is overwritten with the conjugate
9776* transpose of what would normally be genera-
9777* ted.
9778* AFORM = 'N', a random submatrix is generated.
9779*
9780* A (local output) DOUBLE PRECISION array
9781* On entry, A is an array of dimension (LLD_A, *). On exit,
9782* this array contains the local entries of the randomly genera-
9783* ted submatrix sub( A ).
9784*
9785* LDA (local input) INTEGER
9786* On entry, LDA specifies the local leading dimension of the
9787* array A. LDA must be at least one.
9788*
9789* LCMT00 (global input) INTEGER
9790* On entry, LCMT00 is the LCM value specifying the off-diagonal
9791* of the underlying matrix of interest. LCMT00=0 specifies the
9792* main diagonal, LCMT00 > 0 specifies a subdiagonal, LCMT00 < 0
9793* specifies superdiagonals.
9794*
9795* IRAN (local input) INTEGER array
9796* On entry, IRAN is an array of dimension 2 containing respec-
9797* tively the 16-lower and 16-higher bits of the encoding of the
9798* entry of the random sequence corresponding locally to the
9799* first local array entry to generate. Usually, this array is
9800* computed by PB_SETLOCRAN.
9801*
9802* MBLKS (local input) INTEGER
9803* On entry, MBLKS specifies the local number of blocks of rows.
9804* MBLKS is at least zero.
9805*
9806* IMBLOC (local input) INTEGER
9807* On entry, IMBLOC specifies the number of rows (size) of the
9808* local uppest blocks. IMBLOC is at least zero.
9809*
9810* MB (global input) INTEGER
9811* On entry, MB specifies the blocking factor used to partition
9812* the rows of the matrix. MB must be at least one.
9813*
9814* LMBLOC (local input) INTEGER
9815* On entry, LMBLOC specifies the number of rows (size) of the
9816* local lowest blocks. LMBLOC is at least zero.
9817*
9818* NBLKS (local input) INTEGER
9819* On entry, NBLKS specifies the local number of blocks of co-
9820* lumns. NBLKS is at least zero.
9821*
9822* INBLOC (local input) INTEGER
9823* On entry, INBLOC specifies the number of columns (size) of
9824* the local leftmost blocks. INBLOC is at least zero.
9825*
9826* NB (global input) INTEGER
9827* On entry, NB specifies the blocking factor used to partition
9828* the the columns of the matrix. NB must be at least one.
9829*
9830* LNBLOC (local input) INTEGER
9831* On entry, LNBLOC specifies the number of columns (size) of
9832* the local rightmost blocks. LNBLOC is at least zero.
9833*
9834* JMP (local input) INTEGER array
9835* On entry, JMP is an array of dimension JMP_LEN containing the
9836* different jump values used by the random matrix generator.
9837*
9838* IMULADD (local input) INTEGER array
9839* On entry, IMULADD is an array of dimension (4, JMP_LEN). The
9840* jth column of this array contains the encoded initial cons-
9841* tants a_j and c_j to jump from X( n ) to X( n + JMP( j ) )
9842* (= a_j * X( n ) + c_j) in the random sequence. IMULADD(1:2,j)
9843* contains respectively the 16-lower and 16-higher bits of the
9844* constant a_j, and IMULADD(3:4,j) contains the 16-lower and
9845* 16-higher bits of the constant c_j.
9846*
9847* -- Written on April 1, 1998 by
9848* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
9849*
9850* =====================================================================
9851*
9852* .. Parameters ..
9853 INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN,
9854 $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB,
9855 $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW
9856 PARAMETER ( JMP_1 = 1, jmp_row = 2, jmp_col = 3,
9857 $ jmp_mb = 4, jmp_imbv = 5, jmp_npmb = 6,
9858 $ jmp_npimbloc = 7, jmp_nb = 8, jmp_inbv = 9,
9859 $ jmp_nqnb = 10, jmp_nqinbloc = 11,
9860 $ jmp_len = 11 )
9861* ..
9862* .. Local Scalars ..
9863 INTEGER I, IB, IBLK, II, IK, ITMP, JB, JBLK, JJ, JK,
9864 $ JTMP, LCMTC, LCMTR, LOW, MNB, UPP
9865 DOUBLE PRECISION DUMMY
9866* ..
9867* .. Local Arrays ..
9868 INTEGER IB0( 2 ), IB1( 2 ), IB2( 2 ), IB3( 2 )
9869* ..
9870* .. External Subroutines ..
9871 EXTERNAL PB_JUMPIT
9872* ..
9873* .. External Functions ..
9874 LOGICAL LSAME
9875 DOUBLE PRECISION PB_DRAND
9876 EXTERNAL LSAME, PB_DRAND
9877* ..
9878* .. Intrinsic Functions ..
9879 INTRINSIC max, min
9880* ..
9881* .. Executable Statements ..
9882*
9883 DO 10 i = 1, 2
9884 ib1( i ) = iran( i )
9885 ib2( i ) = iran( i )
9886 ib3( i ) = iran( i )
9887 10 CONTINUE
9888*
9889 IF( lsame( aform, 'N' ) ) THEN
9890*
9891* Generate random matrix
9892*
9893 jj = 1
9894*
9895 DO 50 jblk = 1, nblks
9896*
9897 IF( jblk.EQ.1 ) THEN
9898 jb = inbloc
9899 ELSE IF( jblk.EQ.nblks ) THEN
9900 jb = lnbloc
9901 ELSE
9902 jb = nb
9903 END IF
9904*
9905 DO 40 jk = jj, jj + jb - 1
9906*
9907 ii = 1
9908*
9909 DO 30 iblk = 1, mblks
9910*
9911 IF( iblk.EQ.1 ) THEN
9912 ib = imbloc
9913 ELSE IF( iblk.EQ.mblks ) THEN
9914 ib = lmbloc
9915 ELSE
9916 ib = mb
9917 END IF
9918*
9919* Blocks are IB by JB
9920*
9921 DO 20 ik = ii, ii + ib - 1
9922 a( ik, jk ) = pb_drand( 0 )
9923 20 CONTINUE
9924*
9925 ii = ii + ib
9926*
9927 IF( iblk.EQ.1 ) THEN
9928*
9929* Jump IMBLOC + ( NPROW - 1 ) * MB rows
9930*
9931 CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib1,
9932 $ ib0 )
9933*
9934 ELSE
9935*
9936* Jump NPROW * MB rows
9937*
9938 CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib1, ib0 )
9939*
9940 END IF
9941*
9942 ib1( 1 ) = ib0( 1 )
9943 ib1( 2 ) = ib0( 2 )
9944*
9945 30 CONTINUE
9946*
9947* Jump one column
9948*
9949 CALL pb_jumpit( imuladd( 1, jmp_col ), ib2, ib0 )
9950*
9951 ib1( 1 ) = ib0( 1 )
9952 ib1( 2 ) = ib0( 2 )
9953 ib2( 1 ) = ib0( 1 )
9954 ib2( 2 ) = ib0( 2 )
9955*
9956 40 CONTINUE
9957*
9958 jj = jj + jb
9959*
9960 IF( jblk.EQ.1 ) THEN
9961*
9962* Jump INBLOC + ( NPCOL - 1 ) * NB columns
9963*
9964 CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib3, ib0 )
9965*
9966 ELSE
9967*
9968* Jump NPCOL * NB columns
9969*
9970 CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib3, ib0 )
9971*
9972 END IF
9973*
9974 ib1( 1 ) = ib0( 1 )
9975 ib1( 2 ) = ib0( 2 )
9976 ib2( 1 ) = ib0( 1 )
9977 ib2( 2 ) = ib0( 2 )
9978 ib3( 1 ) = ib0( 1 )
9979 ib3( 2 ) = ib0( 2 )
9980*
9981 50 CONTINUE
9982*
9983 ELSE IF( lsame( aform, 'T' ) .OR. lsame( aform, 'C' ) ) THEN
9984*
9985* Generate the transpose of the matrix that would be normally
9986* generated.
9987*
9988 ii = 1
9989*
9990 DO 90 iblk = 1, mblks
9991*
9992 IF( iblk.EQ.1 ) THEN
9993 ib = imbloc
9994 ELSE IF( iblk.EQ.mblks ) THEN
9995 ib = lmbloc
9996 ELSE
9997 ib = mb
9998 END IF
9999*
10000 DO 80 ik = ii, ii + ib - 1
10001*
10002 jj = 1
10003*
10004 DO 70 jblk = 1, nblks
10005*
10006 IF( jblk.EQ.1 ) THEN
10007 jb = inbloc
10008 ELSE IF( jblk.EQ.nblks ) THEN
10009 jb = lnbloc
10010 ELSE
10011 jb = nb
10012 END IF
10013*
10014* Blocks are IB by JB
10015*
10016 DO 60 jk = jj, jj + jb - 1
10017 a( ik, jk ) = pb_drand( 0 )
10018 60 CONTINUE
10019*
10020 jj = jj + jb
10021*
10022 IF( jblk.EQ.1 ) THEN
10023*
10024* Jump INBLOC + ( NPCOL - 1 ) * NB columns
10025*
10026 CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib1,
10027 $ ib0 )
10028*
10029 ELSE
10030*
10031* Jump NPCOL * NB columns
10032*
10033 CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib1, ib0 )
10034*
10035 END IF
10036*
10037 ib1( 1 ) = ib0( 1 )
10038 ib1( 2 ) = ib0( 2 )
10039*
10040 70 CONTINUE
10041*
10042* Jump one row
10043*
10044 CALL pb_jumpit( imuladd( 1, jmp_row ), ib2, ib0 )
10045*
10046 ib1( 1 ) = ib0( 1 )
10047 ib1( 2 ) = ib0( 2 )
10048 ib2( 1 ) = ib0( 1 )
10049 ib2( 2 ) = ib0( 2 )
10050*
10051 80 CONTINUE
10052*
10053 ii = ii + ib
10054*
10055 IF( iblk.EQ.1 ) THEN
10056*
10057* Jump IMBLOC + ( NPROW - 1 ) * MB rows
10058*
10059 CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib3, ib0 )
10060*
10061 ELSE
10062*
10063* Jump NPROW * MB rows
10064*
10065 CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib3, ib0 )
10066*
10067 END IF
10068*
10069 ib1( 1 ) = ib0( 1 )
10070 ib1( 2 ) = ib0( 2 )
10071 ib2( 1 ) = ib0( 1 )
10072 ib2( 2 ) = ib0( 2 )
10073 ib3( 1 ) = ib0( 1 )
10074 ib3( 2 ) = ib0( 2 )
10075*
10076 90 CONTINUE
10077*
10078 ELSE IF( ( lsame( aform, 'S' ) ).OR.( lsame( aform, 'H' ) ) ) THEN
10079*
10080* Generate a symmetric matrix
10081*
10082 IF( lsame( uplo, 'L' ) ) THEN
10083*
10084* generate lower trapezoidal part
10085*
10086 jj = 1
10087 lcmtc = lcmt00
10088*
10089 DO 170 jblk = 1, nblks
10090*
10091 IF( jblk.EQ.1 ) THEN
10092 jb = inbloc
10093 low = 1 - inbloc
10094 ELSE IF( jblk.EQ.nblks ) THEN
10095 jb = lnbloc
10096 low = 1 - nb
10097 ELSE
10098 jb = nb
10099 low = 1 - nb
10100 END IF
10101*
10102 DO 160 jk = jj, jj + jb - 1
10103*
10104 ii = 1
10105 lcmtr = lcmtc
10106*
10107 DO 150 iblk = 1, mblks
10108*
10109 IF( iblk.EQ.1 ) THEN
10110 ib = imbloc
10111 upp = imbloc - 1
10112 ELSE IF( iblk.EQ.mblks ) THEN
10113 ib = lmbloc
10114 upp = mb - 1
10115 ELSE
10116 ib = mb
10117 upp = mb - 1
10118 END IF
10119*
10120* Blocks are IB by JB
10121*
10122 IF( lcmtr.GT.upp ) THEN
10123*
10124 DO 100 ik = ii, ii + ib - 1
10125 dummy = pb_drand( 0 )
10126 100 CONTINUE
10127*
10128 ELSE IF( lcmtr.GE.low ) THEN
10129*
10130 jtmp = jk - jj + 1
10131 mnb = max( 0, -lcmtr )
10132*
10133 IF( jtmp.LE.min( mnb, jb ) ) THEN
10134*
10135 DO 110 ik = ii, ii + ib - 1
10136 a( ik, jk ) = pb_drand( 0 )
10137 110 CONTINUE
10138*
10139 ELSE IF( ( jtmp.GE.( mnb + 1 ) ) .AND.
10140 $ ( jtmp.LE.min( ib-lcmtr, jb ) ) ) THEN
10141*
10142 itmp = ii + jtmp + lcmtr - 1
10143*
10144 DO 120 ik = ii, itmp - 1
10145 dummy = pb_drand( 0 )
10146 120 CONTINUE
10147*
10148 DO 130 ik = itmp, ii + ib - 1
10149 a( ik, jk ) = pb_drand( 0 )
10150 130 CONTINUE
10151*
10152 END IF
10153*
10154 ELSE
10155*
10156 DO 140 ik = ii, ii + ib - 1
10157 a( ik, jk ) = pb_drand( 0 )
10158 140 CONTINUE
10159*
10160 END IF
10161*
10162 ii = ii + ib
10163*
10164 IF( iblk.EQ.1 ) THEN
10165*
10166* Jump IMBLOC + ( NPROW - 1 ) * MB rows
10167*
10168 lcmtr = lcmtr - jmp( jmp_npimbloc )
10169 CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib1,
10170 $ ib0 )
10171*
10172 ELSE
10173*
10174* Jump NPROW * MB rows
10175*
10176 lcmtr = lcmtr - jmp( jmp_npmb )
10177 CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib1,
10178 $ ib0 )
10179*
10180 END IF
10181*
10182 ib1( 1 ) = ib0( 1 )
10183 ib1( 2 ) = ib0( 2 )
10184*
10185 150 CONTINUE
10186*
10187* Jump one column
10188*
10189 CALL pb_jumpit( imuladd( 1, jmp_col ), ib2, ib0 )
10190*
10191 ib1( 1 ) = ib0( 1 )
10192 ib1( 2 ) = ib0( 2 )
10193 ib2( 1 ) = ib0( 1 )
10194 ib2( 2 ) = ib0( 2 )
10195*
10196 160 CONTINUE
10197*
10198 jj = jj + jb
10199*
10200 IF( jblk.EQ.1 ) THEN
10201*
10202* Jump INBLOC + ( NPCOL - 1 ) * NB columns
10203*
10204 lcmtc = lcmtc + jmp( jmp_nqinbloc )
10205 CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib3, ib0 )
10206*
10207 ELSE
10208*
10209* Jump NPCOL * NB columns
10210*
10211 lcmtc = lcmtc + jmp( jmp_nqnb )
10212 CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib3, ib0 )
10213*
10214 END IF
10215*
10216 ib1( 1 ) = ib0( 1 )
10217 ib1( 2 ) = ib0( 2 )
10218 ib2( 1 ) = ib0( 1 )
10219 ib2( 2 ) = ib0( 2 )
10220 ib3( 1 ) = ib0( 1 )
10221 ib3( 2 ) = ib0( 2 )
10222*
10223 170 CONTINUE
10224*
10225 ELSE
10226*
10227* generate upper trapezoidal part
10228*
10229 ii = 1
10230 lcmtr = lcmt00
10231*
10232 DO 250 iblk = 1, mblks
10233*
10234 IF( iblk.EQ.1 ) THEN
10235 ib = imbloc
10236 upp = imbloc - 1
10237 ELSE IF( iblk.EQ.mblks ) THEN
10238 ib = lmbloc
10239 upp = mb - 1
10240 ELSE
10241 ib = mb
10242 upp = mb - 1
10243 END IF
10244*
10245 DO 240 ik = ii, ii + ib - 1
10246*
10247 jj = 1
10248 lcmtc = lcmtr
10249*
10250 DO 230 jblk = 1, nblks
10251*
10252 IF( jblk.EQ.1 ) THEN
10253 jb = inbloc
10254 low = 1 - inbloc
10255 ELSE IF( jblk.EQ.nblks ) THEN
10256 jb = lnbloc
10257 low = 1 - nb
10258 ELSE
10259 jb = nb
10260 low = 1 - nb
10261 END IF
10262*
10263* Blocks are IB by JB
10264*
10265 IF( lcmtc.LT.low ) THEN
10266*
10267 DO 180 jk = jj, jj + jb - 1
10268 dummy = pb_drand( 0 )
10269 180 CONTINUE
10270*
10271 ELSE IF( lcmtc.LE.upp ) THEN
10272*
10273 itmp = ik - ii + 1
10274 mnb = max( 0, lcmtc )
10275*
10276 IF( itmp.LE.min( mnb, ib ) ) THEN
10277*
10278 DO 190 jk = jj, jj + jb - 1
10279 a( ik, jk ) = pb_drand( 0 )
10280 190 CONTINUE
10281*
10282 ELSE IF( ( itmp.GE.( mnb + 1 ) ) .AND.
10283 $ ( itmp.LE.min( jb+lcmtc, ib ) ) ) THEN
10284*
10285 jtmp = jj + itmp - lcmtc - 1
10286*
10287 DO 200 jk = jj, jtmp - 1
10288 dummy = pb_drand( 0 )
10289 200 CONTINUE
10290*
10291 DO 210 jk = jtmp, jj + jb - 1
10292 a( ik, jk ) = pb_drand( 0 )
10293 210 CONTINUE
10294*
10295 END IF
10296*
10297 ELSE
10298*
10299 DO 220 jk = jj, jj + jb - 1
10300 a( ik, jk ) = pb_drand( 0 )
10301 220 CONTINUE
10302*
10303 END IF
10304*
10305 jj = jj + jb
10306*
10307 IF( jblk.EQ.1 ) THEN
10308*
10309* Jump INBLOC + ( NPCOL - 1 ) * NB columns
10310*
10311 lcmtc = lcmtc + jmp( jmp_nqinbloc )
10312 CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib1,
10313 $ ib0 )
10314*
10315 ELSE
10316*
10317* Jump NPCOL * NB columns
10318*
10319 lcmtc = lcmtc + jmp( jmp_nqnb )
10320 CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib1,
10321 $ ib0 )
10322*
10323 END IF
10324*
10325 ib1( 1 ) = ib0( 1 )
10326 ib1( 2 ) = ib0( 2 )
10327*
10328 230 CONTINUE
10329*
10330* Jump one row
10331*
10332 CALL pb_jumpit( imuladd( 1, jmp_row ), ib2, ib0 )
10333*
10334 ib1( 1 ) = ib0( 1 )
10335 ib1( 2 ) = ib0( 2 )
10336 ib2( 1 ) = ib0( 1 )
10337 ib2( 2 ) = ib0( 2 )
10338*
10339 240 CONTINUE
10340*
10341 ii = ii + ib
10342*
10343 IF( iblk.EQ.1 ) THEN
10344*
10345* Jump IMBLOC + ( NPROW - 1 ) * MB rows
10346*
10347 lcmtr = lcmtr - jmp( jmp_npimbloc )
10348 CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib3, ib0 )
10349*
10350 ELSE
10351*
10352* Jump NPROW * MB rows
10353*
10354 lcmtr = lcmtr - jmp( jmp_npmb )
10355 CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib3, ib0 )
10356*
10357 END IF
10358*
10359 ib1( 1 ) = ib0( 1 )
10360 ib1( 2 ) = ib0( 2 )
10361 ib2( 1 ) = ib0( 1 )
10362 ib2( 2 ) = ib0( 2 )
10363 ib3( 1 ) = ib0( 1 )
10364 ib3( 2 ) = ib0( 2 )
10365*
10366 250 CONTINUE
10367*
10368 END IF
10369*
10370 END IF
10371*
10372 RETURN
10373*
10374* End of PB_DLAGEN
10375*
10376 END
10377 DOUBLE PRECISION FUNCTION pb_drand( IDUMM )
10378*
10379* -- PBLAS test routine (version 2.0) --
10380* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
10381* and University of California, Berkeley.
10382* April 1, 1998
10383*
10384* .. Scalar Arguments ..
10385 INTEGER idumm
10386* ..
10387*
10388* Purpose
10389* =======
10390*
10391* PB_DRAND generates the next number in the random sequence. This func-
10392* tion ensures that this number will be in the interval ( -1.0, 1.0 ).
10393*
10394* Arguments
10395* =========
10396*
10397* IDUMM (local input) INTEGER
10398* This argument is ignored, but necessary to a FORTRAN 77 func-
10399* tion.
10400*
10401* Further Details
10402* ===============
10403*
10404* On entry, the array IRAND stored in the common block RANCOM contains
10405* the information (2 integers) required to generate the next number in
10406* the sequence X( n ). This number is computed as
10407*
10408* X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d,
10409*
10410* where the constant d is the largest 32 bit positive integer. The
10411* array IRAND is then updated for the generation of the next number
10412* X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c.
10413* The constants a and c should have been preliminarily stored in the
10414* array IACS as 2 pairs of integers. The initial set up of IRAND and
10415* IACS is performed by the routine PB_SETRAN.
10416*
10417* -- Written on April 1, 1998 by
10418* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
10419*
10420* =====================================================================
10421*
10422* .. Parameters ..
10423 DOUBLE PRECISION one, two
10424 PARAMETER ( one = 1.0d+0, two = 2.0d+0 )
10425* ..
10426* .. External Functions ..
10427 DOUBLE PRECISION pb_dran
10428 EXTERNAL pb_dran
10429* ..
10430* .. Executable Statements ..
10431*
10432 pb_drand = one - two * pb_dran( idumm )
10433*
10434 RETURN
10435*
10436* End of PB_DRAND
10437*
10438 END
10439 DOUBLE PRECISION FUNCTION pb_dran( IDUMM )
10440*
10441* -- PBLAS test routine (version 2.0) --
10442* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
10443* and University of California, Berkeley.
10444* April 1, 1998
10445*
10446* .. Scalar Arguments ..
10447 INTEGER idumm
10448* ..
10449*
10450* Purpose
10451* =======
10452*
10453* PB_DRAN generates the next number in the random sequence.
10454*
10455* Arguments
10456* =========
10457*
10458* IDUMM (local input) INTEGER
10459* This argument is ignored, but necessary to a FORTRAN 77 func-
10460* tion.
10461*
10462* Further Details
10463* ===============
10464*
10465* On entry, the array IRAND stored in the common block RANCOM contains
10466* the information (2 integers) required to generate the next number in
10467* the sequence X( n ). This number is computed as
10468*
10469* X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d,
10470*
10471* where the constant d is the largest 32 bit positive integer. The
10472* array IRAND is then updated for the generation of the next number
10473* X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c.
10474* The constants a and c should have been preliminarily stored in the
10475* array IACS as 2 pairs of integers. The initial set up of IRAND and
10476* IACS is performed by the routine PB_SETRAN.
10477*
10478* -- Written on April 1, 1998 by
10479* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
10480*
10481* =====================================================================
10482*
10483* .. Parameters ..
10484 DOUBLE PRECISION divfac, pow16
10485 PARAMETER ( divfac = 2.147483648d+9,
10486 $ pow16 = 6.5536d+4 )
10487* ..
10488* .. Local Arrays ..
10489 INTEGER j( 2 )
10490* ..
10491* .. External Subroutines ..
10492 EXTERNAL pb_ladd, pb_lmul
10493* ..
10494* .. Intrinsic Functions ..
10495 INTRINSIC dble
10496* ..
10497* .. Common Blocks ..
10498 INTEGER iacs( 4 ), irand( 2 )
10499 common /rancom/ irand, iacs
10500* ..
10501* .. Save Statements ..
10502 SAVE /rancom/
10503* ..
10504* .. Executable Statements ..
10505*
10506 pb_dran = ( dble( irand( 1 ) ) + pow16 * dble( irand( 2 ) ) ) /
10507 $ divfac
10508*
10509 CALL pb_lmul( irand, iacs, j )
10510 CALL pb_ladd( j, iacs( 3 ), irand )
10511*
10512 RETURN
10513*
10514* End of PB_DRAN
10515*
10516 END
subroutine pb_ainfog2l(m, n, i, j, desc, nprow, npcol, myrow, mycol, imb1, inb1, mp, nq, ii, jj, prow, pcol, rprow, rpcol)
Definition pblastst.f:2023
subroutine pb_descset2(desc, m, n, imb, inb, mb, nb, rsrc, csrc, ctxt, lld)
Definition pblastst.f:3172
subroutine pb_ladd(j, k, i)
Definition pblastst.f:4480
subroutine pb_binfo(offd, m, n, imb1, inb1, mb, nb, mrrow, mrcol, lcmt00, mblks, nblks, imbloc, inbloc, lmbloc, lnbloc, ilow, low, iupp, upp)
Definition pblastst.f:3577
subroutine pb_setran(iran, iac)
Definition pblastst.f:4759
subroutine pb_infog2l(i, j, desc, nprow, npcol, myrow, mycol, ii, jj, prow, pcol)
Definition pblastst.f:1673
subroutine pb_locinfo(i, inb, nb, myroc, srcproc, nprocs, ilocblk, ilocoff, mydist)
Definition pblastst.f:3910
subroutine pchkpbe(ictxt, nout, sname, infot)
Definition pblastst.f:1084
subroutine pb_chkmat(ictxt, m, mpos0, n, npos0, ia, ja, desca, dpos0, info)
Definition pblastst.f:2742
subroutine pb_lmul(k, j, i)
Definition pblastst.f:4559
subroutine pb_jump(k, muladd, irann, iranm, ima)
Definition pblastst.f:4648
subroutine pb_setlocran(seed, ilocblk, jlocblk, ilocoff, jlocoff, myrdist, mycdist, nprow, npcol, jmp, imuladd, iran)
Definition pblastst.f:4302
subroutine pb_initmuladd(muladd0, jmp, imuladd)
Definition pblastst.f:4196
subroutine pb_desctrans(descin, descout)
Definition pblastst.f:2964
subroutine pb_initjmp(colmaj, nvir, imbvir, inbvir, imbloc, inbloc, mb, nb, rsrc, csrc, nprow, npcol, stride, jmp)
Definition pblastst.f:4045
subroutine pb_jumpit(muladd, irann, iranm)
Definition pblastst.f:4822
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pdmmch2(ictxt, uplo, trans, n, k, alpha, a, ia, ja, desca, b, ib, jb, descb, beta, c, pc, ic, jc, descc, ct, g, err, info)
Definition pdblastst.f:5996
subroutine pdchkmout(m, n, a, pa, ia, ja, desca, info)
Definition pdblastst.f:3627
subroutine pdchkopt(ictxt, nout, subptr, scode, sname, argnam, argpos)
Definition pdblastst.f:266
subroutine pb_pdlaprnt(m, n, a, ia, ja, desca, irprnt, icprnt, cmatnm, nout, work)
Definition pdblastst.f:8634
subroutine pdmmch(ictxt, transa, transb, m, n, k, alpha, a, ia, ja, desca, b, ib, jb, descb, beta, c, pc, ic, jc, descc, ct, g, err, info)
Definition pdblastst.f:5272
subroutine pb_dlaset(uplo, m, n, ioffd, alpha, beta, a, lda)
Definition pdblastst.f:9359
subroutine pb_pdlaprn2(m, n, a, ia, ja, desca, irprnt, icprnt, cmatnm, nout, prow, pcol, work)
Definition pdblastst.f:8848
subroutine pdchkvout(n, x, px, ix, jx, descx, incx, info)
Definition pdblastst.f:2870
subroutine pdsetpblas(ictxt)
Definition pdblastst.f:1478
subroutine pderrset(err, errmax, xtrue, x)
Definition pdblastst.f:2456
subroutine pderraxpby(errbnd, alpha, x, beta, y, prec)
Definition pdblastst.f:6684
subroutine pdvmch2(ictxt, uplo, m, n, alpha, x, ix, jx, descx, incx, y, iy, jy, descy, incy, a, pa, ia, ja, desca, g, err, info)
Definition pdblastst.f:4919
double precision function pb_dran(idumm)
subroutine pdlagen(inplace, aform, diag, offa, m, n, ia, ja, desca, iaseed, a, lda)
Definition pdblastst.f:7845
subroutine pdoptee(ictxt, nout, subptr, scode, sname)
Definition pdblastst.f:2
subroutine pb_dlagen(uplo, aform, a, lda, lcmt00, iran, mblks, imbloc, mb, lmbloc, nblks, inbloc, nb, lnbloc, jmp, imuladd)
Definition pdblastst.f:9737
subroutine pdmprnt(ictxt, nout, m, n, a, lda, irprnt, icprnt, cmatnm)
Definition pdblastst.f:3949
subroutine pdlascal(type, m, n, alpha, a, ia, ja, desca)
Definition pdblastst.f:7337
subroutine pdmmch3(uplo, trans, m, n, alpha, a, ia, ja, desca, beta, c, pc, ic, jc, descc, err, info)
Definition pdblastst.f:6372
subroutine pb_dfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pdblastst.f:9079
subroutine pdlaset(uplo, m, n, alpha, beta, a, ia, ja, desca)
Definition pdblastst.f:6862
subroutine pdvmch(ictxt, uplo, m, n, alpha, x, ix, jx, descx, incx, y, iy, jy, descy, incy, a, pa, ia, ja, desca, g, err, info)
Definition pdblastst.f:4570
subroutine pdcallsub(subptr, scode)
Definition pdblastst.f:2180
subroutine pb_dchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pdblastst.f:9192
subroutine pdladom(inplace, n, alpha, a, ia, ja, desca)
Definition pdblastst.f:8242
double precision function pb_drand(idumm)
subroutine pdvprnt(ictxt, nout, n, x, incx, irprnt, icprnt, cvecnm)
Definition pdblastst.f:4056
subroutine pdmmch1(ictxt, uplo, trans, n, k, alpha, a, ia, ja, desca, beta, c, pc, ic, jc, descc, ct, g, err, info)
Definition pdblastst.f:5649
subroutine pdmvch(ictxt, trans, m, n, alpha, a, ia, ja, desca, x, ix, jx, descx, incx, beta, y, py, iy, jy, descy, incy, g, err, info)
Definition pdblastst.f:4157
subroutine pdchkvin(errmax, n, x, px, ix, jx, descx, incx, info)
Definition pdblastst.f:2576
subroutine pb_dlascal(uplo, m, n, ioffd, alpha, a, lda)
Definition pdblastst.f:9556
subroutine pdmatee(ictxt, nout, subptr, scode, sname)
Definition pdblastst.f:1190
subroutine pddimee(ictxt, nout, subptr, scode, sname)
Definition pdblastst.f:455
subroutine pdchkmat(ictxt, nout, subptr, scode, sname, argnam, argpos)
Definition pdblastst.f:1674
subroutine pdchkmin(errmax, m, n, a, pa, ia, ja, desca, info)
Definition pdblastst.f:3326
subroutine pdvecee(ictxt, nout, subptr, scode, sname)
Definition pdblastst.f:936
subroutine pdchkdim(ictxt, nout, subptr, scode, sname, argnam, argpos)
Definition pdblastst.f:759
double precision function pdlamch(ictxt, cmach)
Definition pdblastst.f:6769
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
logical function lsame(ca, cb)
Definition tools.f:1724
double precision function dlamch(cmach)
Definition tools.f:10