SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pcblastst.f
Go to the documentation of this file.
1 SUBROUTINE pcoptee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
2*
3* -- PBLAS test routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* .. Scalar Arguments ..
9 INTEGER ICTXT, NOUT, SCODE
10* ..
11* .. Array Arguments ..
12 CHARACTER*(*) SNAME
13* ..
14* .. Subroutine Arguments ..
15 EXTERNAL subptr
16* ..
17*
18* Purpose
19* =======
20*
21* PCOPTEE tests whether the PBLAS respond correctly to a bad option
22* argument.
23*
24* Notes
25* =====
26*
27* A description vector is associated with each 2D block-cyclicly dis-
28* tributed matrix. This vector stores the information required to
29* establish the mapping between a matrix entry and its corresponding
30* process and memory location.
31*
32* In the following comments, the character _ should be read as
33* "of the distributed matrix". Let A be a generic term for any 2D
34* block cyclicly distributed matrix. Its description vector is DESCA:
35*
36* NOTATION STORED IN EXPLANATION
37* ---------------- --------------- ------------------------------------
38* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
39* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
40* the NPROW x NPCOL BLACS process grid
41* A is distributed over. The context
42* itself is global, but the handle
43* (the integer value) may vary.
44* M_A (global) DESCA( M_ ) The number of rows in the distribu-
45* ted matrix A, M_A >= 0.
46* N_A (global) DESCA( N_ ) The number of columns in the distri-
47* buted matrix A, N_A >= 0.
48* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
49* block of the matrix A, IMB_A > 0.
50* INB_A (global) DESCA( INB_ ) The number of columns of the upper
51* left block of the matrix A,
52* INB_A > 0.
53* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
54* bute the last M_A-IMB_A rows of A,
55* MB_A > 0.
56* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
57* bute the last N_A-INB_A columns of
58* A, NB_A > 0.
59* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
60* row of the matrix A is distributed,
61* NPROW > RSRC_A >= 0.
62* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
63* first column of A is distributed.
64* NPCOL > CSRC_A >= 0.
65* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
66* array storing the local blocks of
67* the distributed matrix A,
68* IF( Lc( 1, N_A ) > 0 )
69* LLD_A >= MAX( 1, Lr( 1, M_A ) )
70* ELSE
71* LLD_A >= 1.
72*
73* Let K be the number of rows of a matrix A starting at the global in-
74* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
75* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
76* receive if these K rows were distributed over NPROW processes. If K
77* is the number of columns of a matrix A starting at the global index
78* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
79* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
80* these K columns were distributed over NPCOL processes.
81*
82* The values of Lr() and Lc() may be determined via a call to the func-
83* tion PB_NUMROC:
84* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
85* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
86*
87* Arguments
88* =========
89*
90* ICTXT (local input) INTEGER
91* On entry, ICTXT specifies the BLACS context handle, indica-
92* ting the global context of the operation. The context itself
93* is global, but the value of ICTXT is local.
94*
95* NOUT (global input) INTEGER
96* On entry, NOUT specifies the unit number for the output file.
97* When NOUT is 6, output to screen, when NOUT is 0, output to
98* stderr. NOUT is only defined for process 0.
99*
100* SUBPTR (global input) SUBROUTINE
101* On entry, SUBPTR is a subroutine. SUBPTR must be declared
102* EXTERNAL in the calling subroutine.
103*
104* SCODE (global input) INTEGER
105* On entry, SCODE specifies the calling sequence code.
106*
107* SNAME (global input) CHARACTER*(*)
108* On entry, SNAME specifies the subroutine name calling this
109* subprogram.
110*
111* Calling sequence encodings
112* ==========================
113*
114* code Formal argument list Examples
115*
116* 11 (n, v1,v2) _SWAP, _COPY
117* 12 (n,s1, v1 ) _SCAL, _SCAL
118* 13 (n,s1, v1,v2) _AXPY, _DOT_
119* 14 (n,s1,i1,v1 ) _AMAX
120* 15 (n,u1, v1 ) _ASUM, _NRM2
121*
122* 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
123* 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
124* 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
125* 24 ( m,n,s1,v1,v2,m1) _GER_
126* 25 (uplo, n,s1,v1, m1) _SYR
127* 26 (uplo, n,u1,v1, m1) _HER
128* 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
129*
130* 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
131* 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
132* 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
133* 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
134* 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
135* 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
136* 37 ( m,n, s1,m1, s2,m3) _TRAN_
137* 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
138* 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
139* 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
140*
141* -- Written on April 1, 1998 by
142* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
143*
144* =====================================================================
145*
146* .. Local Scalars ..
147 INTEGER APOS
148* ..
149* .. External Subroutines ..
150 EXTERNAL pcchkopt
151* ..
152* .. Executable Statements ..
153*
154* Level 2 PBLAS
155*
156 IF( scode.EQ.21 ) THEN
157*
158* Check 1st (and only) option
159*
160 apos = 1
161 CALL pcchkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
162*
163 ELSE IF( scode.EQ.22 .OR. scode.EQ.25 .OR. scode.EQ.26 .OR.
164 $ scode.EQ.27 ) THEN
165*
166* Check 1st (and only) option
167*
168 apos = 1
169 CALL pcchkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
170*
171 ELSE IF( scode.EQ.23 ) THEN
172*
173* Check 1st option
174*
175 apos = 1
176 CALL pcchkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
177*
178* Check 2nd option
179*
180 apos = 2
181 CALL pcchkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
182*
183* Check 3rd option
184*
185 apos = 3
186 CALL pcchkopt( ictxt, nout, subptr, scode, sname, 'D', apos )
187*
188* Level 3 PBLAS
189*
190 ELSE IF( scode.EQ.31 ) THEN
191*
192* Check 1st option
193*
194 apos = 1
195 CALL pcchkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
196*
197* Check 2'nd option
198*
199 apos = 2
200 CALL pcchkopt( ictxt, nout, subptr, scode, sname, 'B', apos )
201*
202 ELSE IF( scode.EQ.32 ) THEN
203*
204* Check 1st option
205*
206 apos = 1
207 CALL pcchkopt( ictxt, nout, subptr, scode, sname, 'S', apos )
208*
209* Check 2nd option
210*
211 apos = 2
212 CALL pcchkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
213*
214 ELSE IF( scode.EQ.33 .OR. scode.EQ.34 .OR. scode.EQ.35 .OR.
215 $ scode.EQ.36 .OR. scode.EQ.40 ) THEN
216*
217* Check 1st option
218*
219 apos = 1
220 CALL pcchkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
221*
222* Check 2'nd option
223*
224 apos = 2
225 CALL pcchkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
226*
227 ELSE IF( scode.EQ.38 ) THEN
228*
229* Check 1st option
230*
231 apos = 1
232 CALL pcchkopt( ictxt, nout, subptr, scode, sname, 'S', apos )
233*
234* Check 2nd option
235*
236 apos = 2
237 CALL pcchkopt( ictxt, nout, subptr, scode, sname, 'U', apos )
238*
239* Check 3rd option
240*
241 apos = 3
242 CALL pcchkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
243*
244* Check 4th option
245*
246 apos = 4
247 CALL pcchkopt( ictxt, nout, subptr, scode, sname, 'D', apos )
248*
249*
250 ELSE IF( scode.EQ.39 ) THEN
251*
252* Check 1st option
253*
254 apos = 1
255 CALL pcchkopt( ictxt, nout, subptr, scode, sname, 'A', apos )
256*
257 END IF
258*
259 RETURN
260*
261* End of PCOPTEE
262*
263 END
264 SUBROUTINE pcchkopt( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM,
265 $ ARGPOS )
266*
267* -- PBLAS test routine (version 2.0) --
268* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
269* and University of California, Berkeley.
270* April 1, 1998
271*
272* .. Scalar Arguments ..
273 CHARACTER*1 ARGNAM
274 INTEGER ARGPOS, ICTXT, NOUT, SCODE
275* ..
276* .. Array Arguments ..
277 CHARACTER*(*) SNAME
278* ..
279* .. Subroutine Arguments ..
280 EXTERNAL subptr
281* ..
282*
283* Purpose
284* =======
285*
286* PCCHKOPT tests the option ARGNAM in any PBLAS routine.
287*
288* Notes
289* =====
290*
291* A description vector is associated with each 2D block-cyclicly dis-
292* tributed matrix. This vector stores the information required to
293* establish the mapping between a matrix entry and its corresponding
294* process and memory location.
295*
296* In the following comments, the character _ should be read as
297* "of the distributed matrix". Let A be a generic term for any 2D
298* block cyclicly distributed matrix. Its description vector is DESCA:
299*
300* NOTATION STORED IN EXPLANATION
301* ---------------- --------------- ------------------------------------
302* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
303* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
304* the NPROW x NPCOL BLACS process grid
305* A is distributed over. The context
306* itself is global, but the handle
307* (the integer value) may vary.
308* M_A (global) DESCA( M_ ) The number of rows in the distribu-
309* ted matrix A, M_A >= 0.
310* N_A (global) DESCA( N_ ) The number of columns in the distri-
311* buted matrix A, N_A >= 0.
312* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
313* block of the matrix A, IMB_A > 0.
314* INB_A (global) DESCA( INB_ ) The number of columns of the upper
315* left block of the matrix A,
316* INB_A > 0.
317* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
318* bute the last M_A-IMB_A rows of A,
319* MB_A > 0.
320* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
321* bute the last N_A-INB_A columns of
322* A, NB_A > 0.
323* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
324* row of the matrix A is distributed,
325* NPROW > RSRC_A >= 0.
326* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
327* first column of A is distributed.
328* NPCOL > CSRC_A >= 0.
329* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
330* array storing the local blocks of
331* the distributed matrix A,
332* IF( Lc( 1, N_A ) > 0 )
333* LLD_A >= MAX( 1, Lr( 1, M_A ) )
334* ELSE
335* LLD_A >= 1.
336*
337* Let K be the number of rows of a matrix A starting at the global in-
338* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
339* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
340* receive if these K rows were distributed over NPROW processes. If K
341* is the number of columns of a matrix A starting at the global index
342* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
343* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
344* these K columns were distributed over NPCOL processes.
345*
346* The values of Lr() and Lc() may be determined via a call to the func-
347* tion PB_NUMROC:
348* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
349* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
350*
351* Arguments
352* =========
353*
354* ICTXT (local input) INTEGER
355* On entry, ICTXT specifies the BLACS context handle, indica-
356* ting the global context of the operation. The context itself
357* is global, but the value of ICTXT is local.
358*
359* NOUT (global input) INTEGER
360* On entry, NOUT specifies the unit number for the output file.
361* When NOUT is 6, output to screen, when NOUT is 0, output to
362* stderr. NOUT is only defined for process 0.
363*
364* SUBPTR (global input) SUBROUTINE
365* On entry, SUBPTR is a subroutine. SUBPTR must be declared
366* EXTERNAL in the calling subroutine.
367*
368* SCODE (global input) INTEGER
369* On entry, SCODE specifies the calling sequence code.
370*
371* SNAME (global input) CHARACTER*(*)
372* On entry, SNAME specifies the subroutine name calling this
373* subprogram.
374*
375* ARGNAM (global input) CHARACTER*(*)
376* On entry, ARGNAM specifies the name of the option to be
377* checked. ARGNAM can either be 'D', 'S', 'A', 'B', or 'U'.
378*
379* ARGPOS (global input) INTEGER
380* On entry, ARGPOS indicates the position of the option ARGNAM
381* to be tested.
382*
383* -- Written on April 1, 1998 by
384* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
385*
386* =====================================================================
387*
388* .. Local Scalars ..
389 INTEGER INFOT
390* ..
391* .. External Subroutines ..
392 EXTERNAL pccallsub, pchkpbe, pcsetpblas
393* ..
394* .. External Functions ..
395 LOGICAL LSAME
396 EXTERNAL lsame
397* ..
398* .. Common Blocks ..
399 CHARACTER DIAG, SIDE, TRANSA, TRANSB, UPLO
400 COMMON /pblasc/diag, side, transa, transb, uplo
401* ..
402* .. Executable Statements ..
403*
404* Reiniatilize the dummy arguments to correct values
405*
406 CALL pcsetpblas( ictxt )
407*
408 IF( lsame( argnam, 'D' ) ) THEN
409*
410* Generate bad DIAG option
411*
412 diag = '/'
413*
414 ELSE IF( lsame( argnam, 'S' ) ) THEN
415*
416* Generate bad SIDE option
417*
418 side = '/'
419*
420 ELSE IF( lsame( argnam, 'A' ) ) THEN
421*
422* Generate bad TRANSA option
423*
424 transa = '/'
425*
426 ELSE IF( lsame( argnam, 'B' ) ) THEN
427*
428* Generate bad TRANSB option
429*
430 transb = '/'
431*
432 ELSE IF( lsame( argnam, 'U' ) ) THEN
433*
434* Generate bad UPLO option
435*
436 uplo = '/'
437*
438 END IF
439*
440* Set INFOT to the position of the bad dimension argument
441*
442 infot = argpos
443*
444* Call the PBLAS routine
445*
446 CALL pccallsub( subptr, scode )
447 CALL pchkpbe( ictxt, nout, sname, infot )
448*
449 RETURN
450*
451* End of PCCHKOPT
452*
453 END
454 SUBROUTINE pcdimee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
455*
456* -- PBLAS test routine (version 2.0) --
457* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
458* and University of California, Berkeley.
459* April 1, 1998
460*
461* .. Scalar Arguments ..
462 INTEGER ICTXT, NOUT, SCODE
463* ..
464* .. Array Arguments ..
465 CHARACTER*(*) SNAME
466* ..
467* .. Subroutine Arguments ..
468 EXTERNAL subptr
469* ..
470*
471* Purpose
472* =======
473*
474* PCDIMEE tests whether the PBLAS respond correctly to a bad dimension
475* argument.
476*
477* Notes
478* =====
479*
480* A description vector is associated with each 2D block-cyclicly dis-
481* tributed matrix. This vector stores the information required to
482* establish the mapping between a matrix entry and its corresponding
483* process and memory location.
484*
485* In the following comments, the character _ should be read as
486* "of the distributed matrix". Let A be a generic term for any 2D
487* block cyclicly distributed matrix. Its description vector is DESCA:
488*
489* NOTATION STORED IN EXPLANATION
490* ---------------- --------------- ------------------------------------
491* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
492* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
493* the NPROW x NPCOL BLACS process grid
494* A is distributed over. The context
495* itself is global, but the handle
496* (the integer value) may vary.
497* M_A (global) DESCA( M_ ) The number of rows in the distribu-
498* ted matrix A, M_A >= 0.
499* N_A (global) DESCA( N_ ) The number of columns in the distri-
500* buted matrix A, N_A >= 0.
501* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
502* block of the matrix A, IMB_A > 0.
503* INB_A (global) DESCA( INB_ ) The number of columns of the upper
504* left block of the matrix A,
505* INB_A > 0.
506* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
507* bute the last M_A-IMB_A rows of A,
508* MB_A > 0.
509* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
510* bute the last N_A-INB_A columns of
511* A, NB_A > 0.
512* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
513* row of the matrix A is distributed,
514* NPROW > RSRC_A >= 0.
515* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
516* first column of A is distributed.
517* NPCOL > CSRC_A >= 0.
518* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
519* array storing the local blocks of
520* the distributed matrix A,
521* IF( Lc( 1, N_A ) > 0 )
522* LLD_A >= MAX( 1, Lr( 1, M_A ) )
523* ELSE
524* LLD_A >= 1.
525*
526* Let K be the number of rows of a matrix A starting at the global in-
527* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
528* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
529* receive if these K rows were distributed over NPROW processes. If K
530* is the number of columns of a matrix A starting at the global index
531* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
532* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
533* these K columns were distributed over NPCOL processes.
534*
535* The values of Lr() and Lc() may be determined via a call to the func-
536* tion PB_NUMROC:
537* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
538* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
539*
540* Arguments
541* =========
542*
543* ICTXT (local input) INTEGER
544* On entry, ICTXT specifies the BLACS context handle, indica-
545* ting the global context of the operation. The context itself
546* is global, but the value of ICTXT is local.
547*
548* NOUT (global input) INTEGER
549* On entry, NOUT specifies the unit number for the output file.
550* When NOUT is 6, output to screen, when NOUT is 0, output to
551* stderr. NOUT is only defined for process 0.
552*
553* SUBPTR (global input) SUBROUTINE
554* On entry, SUBPTR is a subroutine. SUBPTR must be declared
555* EXTERNAL in the calling subroutine.
556*
557* SCODE (global input) INTEGER
558* On entry, SCODE specifies the calling sequence code.
559*
560* SNAME (global input) CHARACTER*(*)
561* On entry, SNAME specifies the subroutine name calling this
562* subprogram.
563*
564* Calling sequence encodings
565* ==========================
566*
567* code Formal argument list Examples
568*
569* 11 (n, v1,v2) _SWAP, _COPY
570* 12 (n,s1, v1 ) _SCAL, _SCAL
571* 13 (n,s1, v1,v2) _AXPY, _DOT_
572* 14 (n,s1,i1,v1 ) _AMAX
573* 15 (n,u1, v1 ) _ASUM, _NRM2
574*
575* 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
576* 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
577* 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
578* 24 ( m,n,s1,v1,v2,m1) _GER_
579* 25 (uplo, n,s1,v1, m1) _SYR
580* 26 (uplo, n,u1,v1, m1) _HER
581* 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
582*
583* 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
584* 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
585* 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
586* 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
587* 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
588* 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
589* 37 ( m,n, s1,m1, s2,m3) _TRAN_
590* 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
591* 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
592* 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
593*
594* -- Written on April 1, 1998 by
595* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
596*
597* =====================================================================
598*
599* .. Local Scalars ..
600 INTEGER APOS
601* ..
602* .. External Subroutines ..
603 EXTERNAL pcchkdim
604* ..
605* .. Executable Statements ..
606*
607* Level 1 PBLAS
608*
609 IF( scode.EQ.11 .OR. scode.EQ.12 .OR. scode.EQ.13 .OR.
610 $ scode.EQ.14 .OR. scode.EQ.15 ) THEN
611*
612* Check 1st (and only) dimension
613*
614 apos = 1
615 CALL pcchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
616*
617* Level 2 PBLAS
618*
619 ELSE IF( scode.EQ.21 ) THEN
620*
621* Check 1st dimension
622*
623 apos = 2
624 CALL pcchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
625*
626* Check 2nd dimension
627*
628 apos = 3
629 CALL pcchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
630*
631 ELSE IF( scode.EQ.22 .OR. scode.EQ.25 .OR. scode.EQ.26 .OR.
632 $ scode.EQ.27 ) THEN
633*
634* Check 1st (and only) dimension
635*
636 apos = 2
637 CALL pcchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
638*
639 ELSE IF( scode.EQ.23 ) THEN
640*
641* Check 1st (and only) dimension
642*
643 apos = 4
644 CALL pcchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
645*
646 ELSE IF( scode.EQ.24 ) THEN
647*
648* Check 1st dimension
649*
650 apos = 1
651 CALL pcchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
652*
653* Check 2nd dimension
654*
655 apos = 2
656 CALL pcchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
657*
658* Level 3 PBLAS
659*
660 ELSE IF( scode.EQ.31 ) THEN
661*
662* Check 1st dimension
663*
664 apos = 3
665 CALL pcchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
666*
667* Check 2nd dimension
668*
669 apos = 4
670 CALL pcchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
671*
672* Check 3rd dimension
673*
674 apos = 5
675 CALL pcchkdim( ictxt, nout, subptr, scode, sname, 'K', apos )
676*
677 ELSE IF( scode.EQ.32 ) THEN
678*
679* Check 1st dimension
680*
681 apos = 3
682 CALL pcchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
683*
684* Check 2nd dimension
685*
686 apos = 4
687 CALL pcchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
688*
689 ELSE IF( scode.EQ.33 .OR. scode.EQ.34 .OR. scode.EQ.35 .OR.
690 $ scode.EQ.36 ) THEN
691*
692* Check 1st dimension
693*
694 apos = 3
695 CALL pcchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
696*
697* Check 2nd dimension
698*
699 apos = 4
700 CALL pcchkdim( ictxt, nout, subptr, scode, sname, 'K', apos )
701*
702 ELSE IF( scode.EQ.37 ) THEN
703*
704* Check 1st dimension
705*
706 apos = 1
707 CALL pcchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
708*
709* Check 2nd dimension
710*
711 apos = 2
712 CALL pcchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
713*
714 ELSE IF( scode.EQ.38 ) THEN
715*
716* Check 1st dimension
717*
718 apos = 5
719 CALL pcchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
720*
721* Check 2nd dimension
722*
723 apos = 6
724 CALL pcchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
725*
726 ELSE IF( scode.EQ.39 ) THEN
727*
728* Check 1st dimension
729*
730 apos = 2
731 CALL pcchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
732*
733* Check 2nd dimension
734*
735 apos = 3
736 CALL pcchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
737*
738 ELSE IF( scode.EQ.40 ) THEN
739*
740* Check 1st dimension
741*
742 apos = 3
743 CALL pcchkdim( ictxt, nout, subptr, scode, sname, 'M', apos )
744*
745* Check 2nd dimension
746*
747 apos = 4
748 CALL pcchkdim( ictxt, nout, subptr, scode, sname, 'N', apos )
749*
750 END IF
751*
752 RETURN
753*
754* End of PCDIMEE
755*
756 END
757 SUBROUTINE pcchkdim( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM,
758 $ ARGPOS )
759*
760* -- PBLAS test routine (version 2.0) --
761* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
762* and University of California, Berkeley.
763* April 1, 1998
764*
765* .. Scalar Arguments ..
766 CHARACTER*1 ARGNAM
767 INTEGER ARGPOS, ICTXT, NOUT, SCODE
768* ..
769* .. Array Arguments ..
770 CHARACTER*(*) SNAME
771* ..
772* .. Subroutine Arguments ..
773 EXTERNAL subptr
774* ..
775*
776* Purpose
777* =======
778*
779* PCCHKDIM tests the dimension ARGNAM in any PBLAS routine.
780*
781* Notes
782* =====
783*
784* A description vector is associated with each 2D block-cyclicly dis-
785* tributed matrix. This vector stores the information required to
786* establish the mapping between a matrix entry and its corresponding
787* process and memory location.
788*
789* In the following comments, the character _ should be read as
790* "of the distributed matrix". Let A be a generic term for any 2D
791* block cyclicly distributed matrix. Its description vector is DESCA:
792*
793* NOTATION STORED IN EXPLANATION
794* ---------------- --------------- ------------------------------------
795* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
796* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
797* the NPROW x NPCOL BLACS process grid
798* A is distributed over. The context
799* itself is global, but the handle
800* (the integer value) may vary.
801* M_A (global) DESCA( M_ ) The number of rows in the distribu-
802* ted matrix A, M_A >= 0.
803* N_A (global) DESCA( N_ ) The number of columns in the distri-
804* buted matrix A, N_A >= 0.
805* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
806* block of the matrix A, IMB_A > 0.
807* INB_A (global) DESCA( INB_ ) The number of columns of the upper
808* left block of the matrix A,
809* INB_A > 0.
810* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
811* bute the last M_A-IMB_A rows of A,
812* MB_A > 0.
813* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
814* bute the last N_A-INB_A columns of
815* A, NB_A > 0.
816* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
817* row of the matrix A is distributed,
818* NPROW > RSRC_A >= 0.
819* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
820* first column of A is distributed.
821* NPCOL > CSRC_A >= 0.
822* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
823* array storing the local blocks of
824* the distributed matrix A,
825* IF( Lc( 1, N_A ) > 0 )
826* LLD_A >= MAX( 1, Lr( 1, M_A ) )
827* ELSE
828* LLD_A >= 1.
829*
830* Let K be the number of rows of a matrix A starting at the global in-
831* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
832* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
833* receive if these K rows were distributed over NPROW processes. If K
834* is the number of columns of a matrix A starting at the global index
835* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
836* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
837* these K columns were distributed over NPCOL processes.
838*
839* The values of Lr() and Lc() may be determined via a call to the func-
840* tion PB_NUMROC:
841* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
842* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
843*
844* Arguments
845* =========
846*
847* ICTXT (local input) INTEGER
848* On entry, ICTXT specifies the BLACS context handle, indica-
849* ting the global context of the operation. The context itself
850* is global, but the value of ICTXT is local.
851*
852* NOUT (global input) INTEGER
853* On entry, NOUT specifies the unit number for the output file.
854* When NOUT is 6, output to screen, when NOUT is 0, output to
855* stderr. NOUT is only defined for process 0.
856*
857* SUBPTR (global input) SUBROUTINE
858* On entry, SUBPTR is a subroutine. SUBPTR must be declared
859* EXTERNAL in the calling subroutine.
860*
861* SCODE (global input) INTEGER
862* On entry, SCODE specifies the calling sequence code.
863*
864* SNAME (global input) CHARACTER*(*)
865* On entry, SNAME specifies the subroutine name calling this
866* subprogram.
867*
868* ARGNAM (global input) CHARACTER*(*)
869* On entry, ARGNAM specifies the name of the dimension to be
870* checked. ARGNAM can either be 'M', 'N' or 'K'.
871*
872* ARGPOS (global input) INTEGER
873* On entry, ARGPOS indicates the position of the option ARGNAM
874* to be tested.
875*
876* -- Written on April 1, 1998 by
877* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
878*
879* =====================================================================
880*
881* .. Local Scalars ..
882 INTEGER INFOT
883* ..
884* .. External Subroutines ..
885 EXTERNAL pccallsub, pchkpbe, pcsetpblas
886* ..
887* .. External Functions ..
888 LOGICAL LSAME
889 EXTERNAL LSAME
890* ..
891* .. Common Blocks ..
892 INTEGER KDIM, MDIM, NDIM
893 COMMON /PBLASN/KDIM, MDIM, NDIM
894* ..
895* .. Executable Statements ..
896*
897* Reiniatilize the dummy arguments to correct values
898*
899 CALL pcsetpblas( ictxt )
900*
901 IF( lsame( argnam, 'M' ) ) THEN
902*
903* Generate bad MDIM
904*
905 mdim = -1
906*
907 ELSE IF( lsame( argnam, 'N' ) ) THEN
908*
909* Generate bad NDIM
910*
911 ndim = -1
912*
913 ELSE
914*
915* Generate bad KDIM
916*
917 kdim = -1
918*
919 END IF
920*
921* Set INFOT to the position of the bad dimension argument
922*
923 infot = argpos
924*
925* Call the PBLAS routine
926*
927 CALL pccallsub( subptr, scode )
928 CALL pchkpbe( ictxt, nout, sname, infot )
929*
930 RETURN
931*
932* End of PCCHKDIM
933*
934 END
935 SUBROUTINE pcvecee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
936*
937* -- PBLAS test routine (version 2.0) --
938* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
939* and University of California, Berkeley.
940* April 1, 1998
941*
942* .. Scalar Arguments ..
943 INTEGER ICTXT, NOUT, SCODE
944* ..
945* .. Array Arguments ..
946 CHARACTER*7 SNAME
947* ..
948* .. Subroutine Arguments ..
949 EXTERNAL subptr
950* ..
951*
952* Purpose
953* =======
954*
955* PCVECEE tests whether the PBLAS respond correctly to a bad vector
956* argument. Each vector <vec> is described by: <vec>, I<vec>, J<vec>,
957* DESC<vec>, INC<vec>. Out of all these, only I<vec>, J<vec>,
958* DESC<vec>, and INC<vec> can be tested.
959*
960* Notes
961* =====
962*
963* A description vector is associated with each 2D block-cyclicly dis-
964* tributed matrix. This vector stores the information required to
965* establish the mapping between a matrix entry and its corresponding
966* process and memory location.
967*
968* In the following comments, the character _ should be read as
969* "of the distributed matrix". Let A be a generic term for any 2D
970* block cyclicly distributed matrix. Its description vector is DESCA:
971*
972* NOTATION STORED IN EXPLANATION
973* ---------------- --------------- ------------------------------------
974* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
975* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
976* the NPROW x NPCOL BLACS process grid
977* A is distributed over. The context
978* itself is global, but the handle
979* (the integer value) may vary.
980* M_A (global) DESCA( M_ ) The number of rows in the distribu-
981* ted matrix A, M_A >= 0.
982* N_A (global) DESCA( N_ ) The number of columns in the distri-
983* buted matrix A, N_A >= 0.
984* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
985* block of the matrix A, IMB_A > 0.
986* INB_A (global) DESCA( INB_ ) The number of columns of the upper
987* left block of the matrix A,
988* INB_A > 0.
989* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
990* bute the last M_A-IMB_A rows of A,
991* MB_A > 0.
992* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
993* bute the last N_A-INB_A columns of
994* A, NB_A > 0.
995* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
996* row of the matrix A is distributed,
997* NPROW > RSRC_A >= 0.
998* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
999* first column of A is distributed.
1000* NPCOL > CSRC_A >= 0.
1001* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1002* array storing the local blocks of
1003* the distributed matrix A,
1004* IF( Lc( 1, N_A ) > 0 )
1005* LLD_A >= MAX( 1, Lr( 1, M_A ) )
1006* ELSE
1007* LLD_A >= 1.
1008*
1009* Let K be the number of rows of a matrix A starting at the global in-
1010* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1011* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1012* receive if these K rows were distributed over NPROW processes. If K
1013* is the number of columns of a matrix A starting at the global index
1014* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1015* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1016* these K columns were distributed over NPCOL processes.
1017*
1018* The values of Lr() and Lc() may be determined via a call to the func-
1019* tion PB_NUMROC:
1020* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1021* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1022*
1023* Arguments
1024* =========
1025*
1026* ICTXT (local input) INTEGER
1027* On entry, ICTXT specifies the BLACS context handle, indica-
1028* ting the global context of the operation. The context itself
1029* is global, but the value of ICTXT is local.
1030*
1031* NOUT (global input) INTEGER
1032* On entry, NOUT specifies the unit number for the output file.
1033* When NOUT is 6, output to screen, when NOUT is 0, output to
1034* stderr. NOUT is only defined for process 0.
1035*
1036* SUBPTR (global input) SUBROUTINE
1037* On entry, SUBPTR is a subroutine. SUBPTR must be declared
1038* EXTERNAL in the calling subroutine.
1039*
1040* SCODE (global input) INTEGER
1041* On entry, SCODE specifies the calling sequence code.
1042*
1043* SNAME (global input) CHARACTER*(*)
1044* On entry, SNAME specifies the subroutine name calling this
1045* subprogram.
1046*
1047* Calling sequence encodings
1048* ==========================
1049*
1050* code Formal argument list Examples
1051*
1052* 11 (n, v1,v2) _SWAP, _COPY
1053* 12 (n,s1, v1 ) _SCAL, _SCAL
1054* 13 (n,s1, v1,v2) _AXPY, _DOT_
1055* 14 (n,s1,i1,v1 ) _AMAX
1056* 15 (n,u1, v1 ) _ASUM, _NRM2
1057*
1058* 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
1059* 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
1060* 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
1061* 24 ( m,n,s1,v1,v2,m1) _GER_
1062* 25 (uplo, n,s1,v1, m1) _SYR
1063* 26 (uplo, n,u1,v1, m1) _HER
1064* 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
1065*
1066* 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
1067* 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
1068* 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
1069* 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
1070* 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
1071* 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
1072* 37 ( m,n, s1,m1, s2,m3) _TRAN_
1073* 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
1074* 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
1075* 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
1076*
1077* -- Written on April 1, 1998 by
1078* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1079*
1080* =====================================================================
1081*
1082* .. Local Scalars ..
1083 INTEGER APOS
1084* ..
1085* .. External Subroutines ..
1086 EXTERNAL pcchkmat
1087* ..
1088* .. Executable Statements ..
1089*
1090* Level 1 PBLAS
1091*
1092 IF( scode.EQ.11 ) THEN
1093*
1094* Check 1st vector
1095*
1096 apos = 2
1097 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1098*
1099* Check 2nd vector
1100*
1101 apos = 7
1102 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1103*
1104 ELSE IF( scode.EQ.12 .OR. scode.EQ.15 ) THEN
1105*
1106* Check 1st (and only) vector
1107*
1108 apos = 3
1109 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1110*
1111 ELSE IF( scode.EQ.13 ) THEN
1112*
1113* Check 1st vector
1114*
1115 apos = 3
1116 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1117*
1118* Check 2nd vector
1119*
1120 apos = 8
1121 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1122*
1123 ELSE IF( scode.EQ.14 ) THEN
1124*
1125* Check 1st (and only) vector
1126*
1127 apos = 4
1128 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1129*
1130* Level 2 PBLAS
1131*
1132 ELSE IF( scode.EQ.21 ) THEN
1133*
1134* Check 1st vector
1135*
1136 apos = 9
1137 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1138*
1139* Check 2nd vector
1140*
1141 apos = 15
1142 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1143*
1144 ELSE IF( scode.EQ.22 ) THEN
1145*
1146* Check 1st vector
1147*
1148 apos = 8
1149 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1150*
1151* Check 2nd vector
1152*
1153 apos = 14
1154 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1155*
1156 ELSE IF( scode.EQ.23 ) THEN
1157*
1158* Check 1st (and only) vector
1159*
1160 apos = 9
1161 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1162*
1163 ELSE IF( scode.EQ.24 .OR. scode.EQ.27 ) THEN
1164*
1165* Check 1st vector
1166*
1167 apos = 4
1168 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1169*
1170* Check 2nd vector
1171*
1172 apos = 9
1173 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'Y', apos )
1174*
1175 ELSE IF( scode.EQ.26 .OR. scode.EQ.27 ) THEN
1176*
1177* Check 1'st (and only) vector
1178*
1179 apos = 4
1180 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'X', apos )
1181*
1182 END IF
1183*
1184 RETURN
1185*
1186* End of PCVECEE
1187*
1188 END
1189 SUBROUTINE pcmatee( ICTXT, NOUT, SUBPTR, SCODE, SNAME )
1190*
1191* -- PBLAS test routine (version 2.0) --
1192* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
1193* and University of California, Berkeley.
1194* April 1, 1998
1195*
1196* .. Scalar Arguments ..
1197 INTEGER ICTXT, NOUT, SCODE
1198* ..
1199* .. Array Arguments ..
1200 CHARACTER*7 SNAME
1201* ..
1202* .. Subroutine Arguments ..
1203 EXTERNAL subptr
1204* ..
1205*
1206* Purpose
1207* =======
1208*
1209* PCMATEE tests whether the PBLAS respond correctly to a bad matrix
1210* argument. Each matrix <mat> is described by: <mat>, I<mat>, J<mat>,
1211* and DESC<mat>. Out of all these, only I<vec>, J<vec> and DESC<mat>
1212* can be tested.
1213*
1214* Notes
1215* =====
1216*
1217* A description vector is associated with each 2D block-cyclicly dis-
1218* tributed matrix. This vector stores the information required to
1219* establish the mapping between a matrix entry and its corresponding
1220* process and memory location.
1221*
1222* In the following comments, the character _ should be read as
1223* "of the distributed matrix". Let A be a generic term for any 2D
1224* block cyclicly distributed matrix. Its description vector is DESCA:
1225*
1226* NOTATION STORED IN EXPLANATION
1227* ---------------- --------------- ------------------------------------
1228* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
1229* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
1230* the NPROW x NPCOL BLACS process grid
1231* A is distributed over. The context
1232* itself is global, but the handle
1233* (the integer value) may vary.
1234* M_A (global) DESCA( M_ ) The number of rows in the distribu-
1235* ted matrix A, M_A >= 0.
1236* N_A (global) DESCA( N_ ) The number of columns in the distri-
1237* buted matrix A, N_A >= 0.
1238* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
1239* block of the matrix A, IMB_A > 0.
1240* INB_A (global) DESCA( INB_ ) The number of columns of the upper
1241* left block of the matrix A,
1242* INB_A > 0.
1243* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
1244* bute the last M_A-IMB_A rows of A,
1245* MB_A > 0.
1246* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
1247* bute the last N_A-INB_A columns of
1248* A, NB_A > 0.
1249* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
1250* row of the matrix A is distributed,
1251* NPROW > RSRC_A >= 0.
1252* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
1253* first column of A is distributed.
1254* NPCOL > CSRC_A >= 0.
1255* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1256* array storing the local blocks of
1257* the distributed matrix A,
1258* IF( Lc( 1, N_A ) > 0 )
1259* LLD_A >= MAX( 1, Lr( 1, M_A ) )
1260* ELSE
1261* LLD_A >= 1.
1262*
1263* Let K be the number of rows of a matrix A starting at the global in-
1264* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1265* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1266* receive if these K rows were distributed over NPROW processes. If K
1267* is the number of columns of a matrix A starting at the global index
1268* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1269* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1270* these K columns were distributed over NPCOL processes.
1271*
1272* The values of Lr() and Lc() may be determined via a call to the func-
1273* tion PB_NUMROC:
1274* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1275* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1276*
1277* Arguments
1278* =========
1279*
1280* ICTXT (local input) INTEGER
1281* On entry, ICTXT specifies the BLACS context handle, indica-
1282* ting the global context of the operation. The context itself
1283* is global, but the value of ICTXT is local.
1284*
1285* NOUT (global input) INTEGER
1286* On entry, NOUT specifies the unit number for the output file.
1287* When NOUT is 6, output to screen, when NOUT is 0, output to
1288* stderr. NOUT is only defined for process 0.
1289*
1290* SUBPTR (global input) SUBROUTINE
1291* On entry, SUBPTR is a subroutine. SUBPTR must be declared
1292* EXTERNAL in the calling subroutine.
1293*
1294* SCODE (global input) INTEGER
1295* On entry, SCODE specifies the calling sequence code.
1296*
1297* SNAME (global input) CHARACTER*(*)
1298* On entry, SNAME specifies the subroutine name calling this
1299* subprogram.
1300*
1301* Calling sequence encodings
1302* ==========================
1303*
1304* code Formal argument list Examples
1305*
1306* 11 (n, v1,v2) _SWAP, _COPY
1307* 12 (n,s1, v1 ) _SCAL, _SCAL
1308* 13 (n,s1, v1,v2) _AXPY, _DOT_
1309* 14 (n,s1,i1,v1 ) _AMAX
1310* 15 (n,u1, v1 ) _ASUM, _NRM2
1311*
1312* 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
1313* 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
1314* 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
1315* 24 ( m,n,s1,v1,v2,m1) _GER_
1316* 25 (uplo, n,s1,v1, m1) _SYR
1317* 26 (uplo, n,u1,v1, m1) _HER
1318* 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
1319*
1320* 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
1321* 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
1322* 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
1323* 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
1324* 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
1325* 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
1326* 37 ( m,n, s1,m1, s2,m3) _TRAN_
1327* 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
1328* 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
1329* 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
1330*
1331* -- Written on April 1, 1998 by
1332* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1333*
1334* =====================================================================
1335*
1336* .. Local Scalars ..
1337 INTEGER APOS
1338* ..
1339* .. External Subroutines ..
1340 EXTERNAL pcchkmat
1341* ..
1342* .. Executable Statements ..
1343*
1344* Level 2 PBLAS
1345*
1346 IF( scode.EQ.21 .OR. scode.EQ.23 ) THEN
1347*
1348* Check 1st (and only) matrix
1349*
1350 apos = 5
1351 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1352*
1353 ELSE IF( scode.EQ.22 ) THEN
1354*
1355* Check 1st (and only) matrix
1356*
1357 apos = 4
1358 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1359*
1360 ELSE IF( scode.EQ.24 .OR. scode.EQ.27 ) THEN
1361*
1362* Check 1st (and only) matrix
1363*
1364 apos = 14
1365 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1366*
1367 ELSE IF( scode.EQ.25 .OR. scode.EQ.26 ) THEN
1368*
1369* Check 1st (and only) matrix
1370*
1371 apos = 9
1372 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1373*
1374* Level 3 PBLAS
1375*
1376 ELSE IF( scode.EQ.31 ) THEN
1377*
1378* Check 1st matrix
1379*
1380 apos = 7
1381 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1382*
1383* Check 2nd matrix
1384*
1385 apos = 11
1386 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'B', apos )
1387*
1388* Check 3nd matrix
1389*
1390 apos = 16
1391 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1392*
1393 ELSE IF( scode.EQ.32 .OR. scode.EQ.35 .OR. scode.EQ.36 ) THEN
1394*
1395* Check 1st matrix
1396*
1397 apos = 6
1398 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1399*
1400* Check 2nd matrix
1401*
1402 apos = 10
1403 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'B', apos )
1404*
1405* Check 3nd matrix
1406*
1407 apos = 15
1408 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1409*
1410 ELSE IF( scode.EQ.33 .OR. scode.EQ.34 ) THEN
1411*
1412* Check 1st matrix
1413*
1414 apos = 6
1415 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1416*
1417* Check 2nd matrix
1418*
1419 apos = 11
1420 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1421*
1422 ELSE IF( scode.EQ.37 ) THEN
1423*
1424* Check 1st matrix
1425*
1426 apos = 4
1427 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1428*
1429* Check 2nd matrix
1430*
1431 apos = 9
1432 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1433*
1434 ELSE IF( scode.EQ.38 ) THEN
1435*
1436* Check 1st matrix
1437*
1438 apos = 8
1439 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1440*
1441* Check 2nd matrix
1442*
1443 apos = 12
1444 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'B', apos )
1445*
1446 ELSE IF( scode.EQ.39 ) THEN
1447*
1448* Check 1st matrix
1449*
1450 apos = 5
1451 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1452*
1453* Check 2nd matrix
1454*
1455 apos = 10
1456 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1457*
1458 ELSE IF( scode.EQ.40 ) THEN
1459*
1460* Check 1st matrix
1461*
1462 apos = 6
1463 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'A', apos )
1464*
1465* Check 2nd matrix
1466*
1467 apos = 11
1468 CALL pcchkmat( ictxt, nout, subptr, scode, sname, 'C', apos )
1469*
1470 END IF
1471*
1472 RETURN
1473*
1474* End of PCMATEE
1475*
1476 END
1477 SUBROUTINE pcsetpblas( ICTXT )
1478*
1479* -- PBLAS test routine (version 2.0) --
1480* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
1481* and University of California, Berkeley.
1482* April 1, 1998
1483*
1484* .. Scalar Arguments ..
1485 INTEGER ICTXT
1486* ..
1487*
1488* Purpose
1489* =======
1490*
1491* PCSETPBLAS initializes *all* the dummy arguments to correct values.
1492*
1493* Notes
1494* =====
1495*
1496* A description vector is associated with each 2D block-cyclicly dis-
1497* tributed matrix. This vector stores the information required to
1498* establish the mapping between a matrix entry and its corresponding
1499* process and memory location.
1500*
1501* In the following comments, the character _ should be read as
1502* "of the distributed matrix". Let A be a generic term for any 2D
1503* block cyclicly distributed matrix. Its description vector is DESCA:
1504*
1505* NOTATION STORED IN EXPLANATION
1506* ---------------- --------------- ------------------------------------
1507* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
1508* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
1509* the NPROW x NPCOL BLACS process grid
1510* A is distributed over. The context
1511* itself is global, but the handle
1512* (the integer value) may vary.
1513* M_A (global) DESCA( M_ ) The number of rows in the distribu-
1514* ted matrix A, M_A >= 0.
1515* N_A (global) DESCA( N_ ) The number of columns in the distri-
1516* buted matrix A, N_A >= 0.
1517* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
1518* block of the matrix A, IMB_A > 0.
1519* INB_A (global) DESCA( INB_ ) The number of columns of the upper
1520* left block of the matrix A,
1521* INB_A > 0.
1522* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
1523* bute the last M_A-IMB_A rows of A,
1524* MB_A > 0.
1525* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
1526* bute the last N_A-INB_A columns of
1527* A, NB_A > 0.
1528* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
1529* row of the matrix A is distributed,
1530* NPROW > RSRC_A >= 0.
1531* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
1532* first column of A is distributed.
1533* NPCOL > CSRC_A >= 0.
1534* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1535* array storing the local blocks of
1536* the distributed matrix A,
1537* IF( Lc( 1, N_A ) > 0 )
1538* LLD_A >= MAX( 1, Lr( 1, M_A ) )
1539* ELSE
1540* LLD_A >= 1.
1541*
1542* Let K be the number of rows of a matrix A starting at the global in-
1543* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1544* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1545* receive if these K rows were distributed over NPROW processes. If K
1546* is the number of columns of a matrix A starting at the global index
1547* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1548* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1549* these K columns were distributed over NPCOL processes.
1550*
1551* The values of Lr() and Lc() may be determined via a call to the func-
1552* tion PB_NUMROC:
1553* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1554* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1555*
1556* Arguments
1557* =========
1558*
1559* ICTXT (local input) INTEGER
1560* On entry, ICTXT specifies the BLACS context handle, indica-
1561* ting the global context of the operation. The context itself
1562* is global, but the value of ICTXT is local.
1563*
1564* -- Written on April 1, 1998 by
1565* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1566*
1567* =====================================================================
1568*
1569* .. Parameters ..
1570 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
1571 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
1572 $ rsrc_
1573 parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
1574 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
1575 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
1576 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
1577 REAL RONE
1578 COMPLEX ONE
1579 parameter( one = ( 1.0e+0, 0.0e+0 ),
1580 $ rone = 1.0e+0 )
1581* ..
1582* .. External Subroutines ..
1583 EXTERNAL pb_descset2
1584* ..
1585* .. Common Blocks ..
1586 CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO
1587 INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB,
1588 $ jc, jx, jy, kdim, mdim, ndim
1589 REAL USCLR
1590 COMPLEX SCLR
1591 INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ),
1592 $ descx( dlen_ ), descy( dlen_ )
1593 COMPLEX A( 2, 2 ), B( 2, 2 ), C( 2, 2 ), X( 2 ), Y( 2 )
1594 COMMON /PBLASC/DIAG, SIDE, TRANSA, TRANSB, UPLO
1595 COMMON /pblasd/desca, descb, descc, descx, descy
1596 COMMON /pblasi/ia, ib, ic, incx, incy, isclr, ix, iy,
1597 $ ja, jb, jc, jx, jy
1598 COMMON /pblasm/a, b, c
1599 COMMON /pblasn/kdim, mdim, ndim
1600 COMMON /pblass/sclr, usclr
1601 COMMON /pblasv/x, y
1602* ..
1603* .. Executable Statements ..
1604*
1605* Set default values for options
1606*
1607 diag = 'N'
1608 side = 'L'
1609 transa = 'N'
1610 transb = 'N'
1611 uplo = 'U'
1612*
1613* Set default values for scalars
1614*
1615 kdim = 1
1616 mdim = 1
1617 ndim = 1
1618 isclr = 1
1619 sclr = one
1620 usclr = rone
1621*
1622* Set default values for distributed matrix A
1623*
1624 a( 1, 1 ) = one
1625 a( 2, 1 ) = one
1626 a( 1, 2 ) = one
1627 a( 2, 2 ) = one
1628 ia = 1
1629 ja = 1
1630 CALL pb_descset2( desca, 2, 2, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1631*
1632* Set default values for distributed matrix B
1633*
1634 b( 1, 1 ) = one
1635 b( 2, 1 ) = one
1636 b( 1, 2 ) = one
1637 b( 2, 2 ) = one
1638 ib = 1
1639 jb = 1
1640 CALL pb_descset2( descb, 2, 2, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1641*
1642* Set default values for distributed matrix C
1643*
1644 c( 1, 1 ) = one
1645 c( 2, 1 ) = one
1646 c( 1, 2 ) = one
1647 c( 2, 2 ) = one
1648 ic = 1
1649 jc = 1
1650 CALL pb_descset2( descc, 2, 2, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1651*
1652* Set default values for distributed matrix X
1653*
1654 x( 1 ) = one
1655 x( 2 ) = one
1656 ix = 1
1657 jx = 1
1658 CALL pb_descset2( descx, 2, 1, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1659 incx = 1
1660*
1661* Set default values for distributed matrix Y
1662*
1663 y( 1 ) = one
1664 y( 2 ) = one
1665 iy = 1
1666 jy = 1
1667 CALL pb_descset2( descy, 2, 1, 1, 1, 1, 1, 0, 0, ictxt, 2 )
1668 incy = 1
1669*
1670 RETURN
1671*
1672* End of PCSETPBLAS
1673*
1674 END
1675 SUBROUTINE pcchkmat( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM,
1676 $ ARGPOS )
1677*
1678* -- PBLAS test routine (version 2.0) --
1679* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
1680* and University of California, Berkeley.
1681* April 1, 1998
1682*
1683* .. Scalar Arguments ..
1684 CHARACTER*1 ARGNAM
1685 INTEGER ARGPOS, ICTXT, NOUT, SCODE
1686* ..
1687* .. Array Arguments ..
1688 CHARACTER*(*) SNAME
1689* ..
1690* .. Subroutine Arguments ..
1691 EXTERNAL subptr
1692* ..
1693*
1694* Purpose
1695* =======
1696*
1697* PCCHKMAT tests the matrix (or vector) ARGNAM in any PBLAS routine.
1698*
1699* Notes
1700* =====
1701*
1702* A description vector is associated with each 2D block-cyclicly dis-
1703* tributed matrix. This vector stores the information required to
1704* establish the mapping between a matrix entry and its corresponding
1705* process and memory location.
1706*
1707* In the following comments, the character _ should be read as
1708* "of the distributed matrix". Let A be a generic term for any 2D
1709* block cyclicly distributed matrix. Its description vector is DESCA:
1710*
1711* NOTATION STORED IN EXPLANATION
1712* ---------------- --------------- ------------------------------------
1713* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
1714* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
1715* the NPROW x NPCOL BLACS process grid
1716* A is distributed over. The context
1717* itself is global, but the handle
1718* (the integer value) may vary.
1719* M_A (global) DESCA( M_ ) The number of rows in the distribu-
1720* ted matrix A, M_A >= 0.
1721* N_A (global) DESCA( N_ ) The number of columns in the distri-
1722* buted matrix A, N_A >= 0.
1723* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
1724* block of the matrix A, IMB_A > 0.
1725* INB_A (global) DESCA( INB_ ) The number of columns of the upper
1726* left block of the matrix A,
1727* INB_A > 0.
1728* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
1729* bute the last M_A-IMB_A rows of A,
1730* MB_A > 0.
1731* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
1732* bute the last N_A-INB_A columns of
1733* A, NB_A > 0.
1734* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
1735* row of the matrix A is distributed,
1736* NPROW > RSRC_A >= 0.
1737* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
1738* first column of A is distributed.
1739* NPCOL > CSRC_A >= 0.
1740* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
1741* array storing the local blocks of
1742* the distributed matrix A,
1743* IF( Lc( 1, N_A ) > 0 )
1744* LLD_A >= MAX( 1, Lr( 1, M_A ) )
1745* ELSE
1746* LLD_A >= 1.
1747*
1748* Let K be the number of rows of a matrix A starting at the global in-
1749* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
1750* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
1751* receive if these K rows were distributed over NPROW processes. If K
1752* is the number of columns of a matrix A starting at the global index
1753* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
1754* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
1755* these K columns were distributed over NPCOL processes.
1756*
1757* The values of Lr() and Lc() may be determined via a call to the func-
1758* tion PB_NUMROC:
1759* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
1760* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
1761*
1762* Arguments
1763* =========
1764*
1765* ICTXT (local input) INTEGER
1766* On entry, ICTXT specifies the BLACS context handle, indica-
1767* ting the global context of the operation. The context itself
1768* is global, but the value of ICTXT is local.
1769*
1770* NOUT (global input) INTEGER
1771* On entry, NOUT specifies the unit number for the output file.
1772* When NOUT is 6, output to screen, when NOUT is 0, output to
1773* stderr. NOUT is only defined for process 0.
1774*
1775* SUBPTR (global input) SUBROUTINE
1776* On entry, SUBPTR is a subroutine. SUBPTR must be declared
1777* EXTERNAL in the calling subroutine.
1778*
1779* SCODE (global input) INTEGER
1780* On entry, SCODE specifies the calling sequence code.
1781*
1782* SNAME (global input) CHARACTER*(*)
1783* On entry, SNAME specifies the subroutine name calling this
1784* subprogram.
1785*
1786* ARGNAM (global input) CHARACTER*(*)
1787* On entry, ARGNAM specifies the name of the matrix or vector
1788* to be checked. ARGNAM can either be 'A', 'B' or 'C' when one
1789* wants to check a matrix, and 'X' or 'Y' for a vector.
1790*
1791* ARGPOS (global input) INTEGER
1792* On entry, ARGPOS indicates the position of the first argument
1793* of the matrix (or vector) ARGNAM.
1794*
1795* -- Written on April 1, 1998 by
1796* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
1797*
1798* =====================================================================
1799*
1800* .. Parameters ..
1801 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
1802 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
1803 $ RSRC_
1804 parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
1805 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
1806 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
1807 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
1808 INTEGER DESCMULT
1809 PARAMETER ( DESCMULT = 100 )
1810* ..
1811* .. Local Scalars ..
1812 INTEGER I, INFOT, NPROW, NPCOL, MYROW, MYCOL
1813* ..
1814* .. External Subroutines ..
1815 EXTERNAL blacs_gridinfo, pccallsub, pchkpbe, pcsetpblas
1816* ..
1817* .. External Functions ..
1818 LOGICAL LSAME
1819 EXTERNAL LSAME
1820* ..
1821* .. Common Blocks ..
1822 INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB,
1823 $ JC, JX, JY
1824 INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ),
1825 $ descx( dlen_ ), descy( dlen_ )
1826 COMMON /pblasd/desca, descb, descc, descx, descy
1827 COMMON /pblasi/ia, ib, ic, incx, incy, isclr, ix, iy,
1828 $ ja, jb, jc, jx, jy
1829* ..
1830* .. Executable Statements ..
1831*
1832 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
1833*
1834 IF( lsame( argnam, 'A' ) ) THEN
1835*
1836* Check IA. Set all other OK, bad IA
1837*
1838 CALL pcsetpblas( ictxt )
1839 ia = -1
1840 infot = argpos + 1
1841 CALL pccallsub( subptr, scode )
1842 CALL pchkpbe( ictxt, nout, sname, infot )
1843*
1844* Check JA. Set all other OK, bad JA
1845*
1846 CALL pcsetpblas( ictxt )
1847 ja = -1
1848 infot = argpos + 2
1849 CALL pccallsub( subptr, scode )
1850 CALL pchkpbe( ictxt, nout, sname, infot )
1851*
1852* Check DESCA. Set all other OK, bad DESCA
1853*
1854 DO 10 i = 1, dlen_
1855*
1856* Set I'th entry of DESCA to incorrect value, rest ok.
1857*
1858 CALL pcsetpblas( ictxt )
1859 desca( i ) = -2
1860 infot = ( ( argpos + 3 ) * descmult ) + i
1861 CALL pccallsub( subptr, scode )
1862 CALL pchkpbe( ictxt, nout, sname, infot )
1863*
1864* Extra tests for RSRCA, CSRCA, LDA
1865*
1866 IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
1867 $ ( i.EQ.lld_ ) ) THEN
1868*
1869 CALL pcsetpblas( ictxt )
1870*
1871* Test RSRCA >= NPROW
1872*
1873 IF( i.EQ.rsrc_ )
1874 $ desca( i ) = nprow
1875*
1876* Test CSRCA >= NPCOL
1877*
1878 IF( i.EQ.csrc_ )
1879 $ desca( i ) = npcol
1880*
1881* Test LDA >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
1882*
1883 IF( i.EQ.lld_ ) THEN
1884 IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
1885 desca( i ) = 1
1886 ELSE
1887 desca( i ) = 0
1888 END IF
1889 END IF
1890*
1891 infot = ( ( argpos + 3 ) * descmult ) + i
1892 CALL pccallsub( subptr, scode )
1893 CALL pchkpbe( ictxt, nout, sname, infot )
1894*
1895 END IF
1896*
1897 10 CONTINUE
1898*
1899 ELSE IF( lsame( argnam, 'B' ) ) THEN
1900*
1901* Check IB. Set all other OK, bad IB
1902*
1903 CALL pcsetpblas( ictxt )
1904 ib = -1
1905 infot = argpos + 1
1906 CALL pccallsub( subptr, scode )
1907 CALL pchkpbe( ictxt, nout, sname, infot )
1908*
1909* Check JB. Set all other OK, bad JB
1910*
1911 CALL pcsetpblas( ictxt )
1912 jb = -1
1913 infot = argpos + 2
1914 CALL pccallsub( subptr, scode )
1915 CALL pchkpbe( ictxt, nout, sname, infot )
1916*
1917* Check DESCB. Set all other OK, bad DESCB
1918*
1919 DO 20 i = 1, dlen_
1920*
1921* Set I'th entry of DESCB to incorrect value, rest ok.
1922*
1923 CALL pcsetpblas( ictxt )
1924 descb( i ) = -2
1925 infot = ( ( argpos + 3 ) * descmult ) + i
1926 CALL pccallsub( subptr, scode )
1927 CALL pchkpbe( ictxt, nout, sname, infot )
1928*
1929* Extra tests for RSRCB, CSRCB, LDB
1930*
1931 IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
1932 $ ( i.EQ.lld_ ) ) THEN
1933*
1934 CALL pcsetpblas( ictxt )
1935*
1936* Test RSRCB >= NPROW
1937*
1938 IF( i.EQ.rsrc_ )
1939 $ descb( i ) = nprow
1940*
1941* Test CSRCB >= NPCOL
1942*
1943 IF( i.EQ.csrc_ )
1944 $ descb( i ) = npcol
1945*
1946* Test LDB >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
1947*
1948 IF( i.EQ.lld_ ) THEN
1949 IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
1950 descb( i ) = 1
1951 ELSE
1952 descb( i ) = 0
1953 END IF
1954 END IF
1955*
1956 infot = ( ( argpos + 3 ) * descmult ) + i
1957 CALL pccallsub( subptr, scode )
1958 CALL pchkpbe( ictxt, nout, sname, infot )
1959*
1960 END IF
1961*
1962 20 CONTINUE
1963*
1964 ELSE IF( lsame( argnam, 'C' ) ) THEN
1965*
1966* Check IC. Set all other OK, bad IC
1967*
1968 CALL pcsetpblas( ictxt )
1969 ic = -1
1970 infot = argpos + 1
1971 CALL pccallsub( subptr, scode )
1972 CALL pchkpbe( ictxt, nout, sname, infot )
1973*
1974* Check JC. Set all other OK, bad JC
1975*
1976 CALL pcsetpblas( ictxt )
1977 jc = -1
1978 infot = argpos + 2
1979 CALL pccallsub( subptr, scode )
1980 CALL pchkpbe( ictxt, nout, sname, infot )
1981*
1982* Check DESCC. Set all other OK, bad DESCC
1983*
1984 DO 30 i = 1, dlen_
1985*
1986* Set I'th entry of DESCC to incorrect value, rest ok.
1987*
1988 CALL pcsetpblas( ictxt )
1989 descc( i ) = -2
1990 infot = ( ( argpos + 3 ) * descmult ) + i
1991 CALL pccallsub( subptr, scode )
1992 CALL pchkpbe( ictxt, nout, sname, infot )
1993*
1994* Extra tests for RSRCC, CSRCC, LDC
1995*
1996 IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
1997 $ ( i.EQ.lld_ ) ) THEN
1998*
1999 CALL pcsetpblas( ictxt )
2000*
2001* Test RSRCC >= NPROW
2002*
2003 IF( i.EQ.rsrc_ )
2004 $ descc( i ) = nprow
2005*
2006* Test CSRCC >= NPCOL
2007*
2008 IF( i.EQ.csrc_ )
2009 $ descc( i ) = npcol
2010*
2011* Test LDC >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
2012*
2013 IF( i.EQ.lld_ ) THEN
2014 IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
2015 descc( i ) = 1
2016 ELSE
2017 descc( i ) = 0
2018 END IF
2019 END IF
2020*
2021 infot = ( ( argpos + 3 ) * descmult ) + i
2022 CALL pccallsub( subptr, scode )
2023 CALL pchkpbe( ictxt, nout, sname, infot )
2024*
2025 END IF
2026*
2027 30 CONTINUE
2028*
2029 ELSE IF( lsame( argnam, 'X' ) ) THEN
2030*
2031* Check IX. Set all other OK, bad IX
2032*
2033 CALL pcsetpblas( ictxt )
2034 ix = -1
2035 infot = argpos + 1
2036 CALL pccallsub( subptr, scode )
2037 CALL pchkpbe( ictxt, nout, sname, infot )
2038*
2039* Check JX. Set all other OK, bad JX
2040*
2041 CALL pcsetpblas( ictxt )
2042 jx = -1
2043 infot = argpos + 2
2044 CALL pccallsub( subptr, scode )
2045 CALL pchkpbe( ictxt, nout, sname, infot )
2046*
2047* Check DESCX. Set all other OK, bad DESCX
2048*
2049 DO 40 i = 1, dlen_
2050*
2051* Set I'th entry of DESCX to incorrect value, rest ok.
2052*
2053 CALL pcsetpblas( ictxt )
2054 descx( i ) = -2
2055 infot = ( ( argpos + 3 ) * descmult ) + i
2056 CALL pccallsub( subptr, scode )
2057 CALL pchkpbe( ictxt, nout, sname, infot )
2058*
2059* Extra tests for RSRCX, CSRCX, LDX
2060*
2061 IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
2062 $ ( i.EQ.lld_ ) ) THEN
2063*
2064 CALL pcsetpblas( ictxt )
2065*
2066* Test RSRCX >= NPROW
2067*
2068 IF( i.EQ.rsrc_ )
2069 $ descx( i ) = nprow
2070*
2071* Test CSRCX >= NPCOL
2072*
2073 IF( i.EQ.csrc_ )
2074 $ descx( i ) = npcol
2075*
2076* Test LDX >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
2077*
2078 IF( i.EQ.lld_ ) THEN
2079 IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
2080 descx( i ) = 1
2081 ELSE
2082 descx( i ) = 0
2083 END IF
2084 END IF
2085*
2086 infot = ( ( argpos + 3 ) * descmult ) + i
2087 CALL pccallsub( subptr, scode )
2088 CALL pchkpbe( ictxt, nout, sname, infot )
2089*
2090 END IF
2091*
2092 40 CONTINUE
2093*
2094* Check INCX. Set all other OK, bad INCX
2095*
2096 CALL pcsetpblas( ictxt )
2097 incx = -1
2098 infot = argpos + 4
2099 CALL pccallsub( subptr, scode )
2100 CALL pchkpbe( ictxt, nout, sname, infot )
2101*
2102 ELSE
2103*
2104* Check IY. Set all other OK, bad IY
2105*
2106 CALL pcsetpblas( ictxt )
2107 iy = -1
2108 infot = argpos + 1
2109 CALL pccallsub( subptr, scode )
2110 CALL pchkpbe( ictxt, nout, sname, infot )
2111*
2112* Check JY. Set all other OK, bad JY
2113*
2114 CALL pcsetpblas( ictxt )
2115 jy = -1
2116 infot = argpos + 2
2117 CALL pccallsub( subptr, scode )
2118 CALL pchkpbe( ictxt, nout, sname, infot )
2119*
2120* Check DESCY. Set all other OK, bad DESCY
2121*
2122 DO 50 i = 1, dlen_
2123*
2124* Set I'th entry of DESCY to incorrect value, rest ok.
2125*
2126 CALL pcsetpblas( ictxt )
2127 descy( i ) = -2
2128 infot = ( ( argpos + 3 ) * descmult ) + i
2129 CALL pccallsub( subptr, scode )
2130 CALL pchkpbe( ictxt, nout, sname, infot )
2131*
2132* Extra tests for RSRCY, CSRCY, LDY
2133*
2134 IF( ( i.EQ.rsrc_ ) .OR. ( i.EQ.csrc_ ) .OR.
2135 $ ( i.EQ.lld_ ) ) THEN
2136*
2137 CALL pcsetpblas( ictxt )
2138*
2139* Test RSRCY >= NPROW
2140*
2141 IF( i.EQ.rsrc_ )
2142 $ descy( i ) = nprow
2143*
2144* Test CSRCY >= NPCOL
2145*
2146 IF( i.EQ.csrc_ )
2147 $ descy( i ) = npcol
2148*
2149* Test LDY >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2.
2150*
2151 IF( i.EQ.lld_ ) THEN
2152 IF( myrow.EQ.0 .AND.mycol.EQ.0 ) THEN
2153 descy( i ) = 1
2154 ELSE
2155 descy( i ) = 0
2156 END IF
2157 END IF
2158*
2159 infot = ( ( argpos + 3 ) * descmult ) + i
2160 CALL pccallsub( subptr, scode )
2161 CALL pchkpbe( ictxt, nout, sname, infot )
2162*
2163 END IF
2164*
2165 50 CONTINUE
2166*
2167* Check INCY. Set all other OK, bad INCY
2168*
2169 CALL pcsetpblas( ictxt )
2170 incy = -1
2171 infot = argpos + 4
2172 CALL pccallsub( subptr, scode )
2173 CALL pchkpbe( ictxt, nout, sname, infot )
2174*
2175 END IF
2176*
2177 RETURN
2178*
2179* End of PCCHKMAT
2180*
2181 END
2182 SUBROUTINE pccallsub( SUBPTR, SCODE )
2183*
2184* -- PBLAS test routine (version 2.0) --
2185* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2186* and University of California, Berkeley.
2187* April 1, 1998
2188*
2189* .. Scalar Arguments ..
2190 INTEGER SCODE
2191* ..
2192* .. Subroutine Arguments ..
2193 EXTERNAL subptr
2194* ..
2195*
2196* Purpose
2197* =======
2198*
2199* PCCALLSUB calls the subroutine SUBPTR with the calling sequence iden-
2200* tified by SCODE.
2201*
2202* Notes
2203* =====
2204*
2205* A description vector is associated with each 2D block-cyclicly dis-
2206* tributed matrix. This vector stores the information required to
2207* establish the mapping between a matrix entry and its corresponding
2208* process and memory location.
2209*
2210* In the following comments, the character _ should be read as
2211* "of the distributed matrix". Let A be a generic term for any 2D
2212* block cyclicly distributed matrix. Its description vector is DESCA:
2213*
2214* NOTATION STORED IN EXPLANATION
2215* ---------------- --------------- ------------------------------------
2216* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2217* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2218* the NPROW x NPCOL BLACS process grid
2219* A is distributed over. The context
2220* itself is global, but the handle
2221* (the integer value) may vary.
2222* M_A (global) DESCA( M_ ) The number of rows in the distribu-
2223* ted matrix A, M_A >= 0.
2224* N_A (global) DESCA( N_ ) The number of columns in the distri-
2225* buted matrix A, N_A >= 0.
2226* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2227* block of the matrix A, IMB_A > 0.
2228* INB_A (global) DESCA( INB_ ) The number of columns of the upper
2229* left block of the matrix A,
2230* INB_A > 0.
2231* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2232* bute the last M_A-IMB_A rows of A,
2233* MB_A > 0.
2234* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2235* bute the last N_A-INB_A columns of
2236* A, NB_A > 0.
2237* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2238* row of the matrix A is distributed,
2239* NPROW > RSRC_A >= 0.
2240* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2241* first column of A is distributed.
2242* NPCOL > CSRC_A >= 0.
2243* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2244* array storing the local blocks of
2245* the distributed matrix A,
2246* IF( Lc( 1, N_A ) > 0 )
2247* LLD_A >= MAX( 1, Lr( 1, M_A ) )
2248* ELSE
2249* LLD_A >= 1.
2250*
2251* Let K be the number of rows of a matrix A starting at the global in-
2252* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2253* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2254* receive if these K rows were distributed over NPROW processes. If K
2255* is the number of columns of a matrix A starting at the global index
2256* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2257* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2258* these K columns were distributed over NPCOL processes.
2259*
2260* The values of Lr() and Lc() may be determined via a call to the func-
2261* tion PB_NUMROC:
2262* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2263* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2264*
2265* Arguments
2266* =========
2267*
2268* SUBPTR (global input) SUBROUTINE
2269* On entry, SUBPTR is a subroutine. SUBPTR must be declared
2270* EXTERNAL in the calling subroutine.
2271*
2272* SCODE (global input) INTEGER
2273* On entry, SCODE specifies the calling sequence code.
2274*
2275* Calling sequence encodings
2276* ==========================
2277*
2278* code Formal argument list Examples
2279*
2280* 11 (n, v1,v2) _SWAP, _COPY
2281* 12 (n,s1, v1 ) _SCAL, _SCAL
2282* 13 (n,s1, v1,v2) _AXPY, _DOT_
2283* 14 (n,s1,i1,v1 ) _AMAX
2284* 15 (n,u1, v1 ) _ASUM, _NRM2
2285*
2286* 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV
2287* 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV
2288* 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV
2289* 24 ( m,n,s1,v1,v2,m1) _GER_
2290* 25 (uplo, n,s1,v1, m1) _SYR
2291* 26 (uplo, n,u1,v1, m1) _HER
2292* 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2
2293*
2294* 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM
2295* 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM
2296* 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK
2297* 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK
2298* 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K
2299* 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K
2300* 37 ( m,n, s1,m1, s2,m3) _TRAN_
2301* 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM
2302* 39 ( trans, m,n, s1,m1, s2,m3) _GEADD
2303* 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD
2304*
2305* -- Written on April 1, 1998 by
2306* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2307*
2308* =====================================================================
2309*
2310* .. Parameters ..
2311 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
2312 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
2313 $ RSRC_
2314 parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
2315 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
2316 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
2317 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
2318* ..
2319* .. Common Blocks ..
2320 CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO
2321 INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB,
2322 $ JC, JX, JY, KDIM, MDIM, NDIM
2323 REAL USCLR
2324 COMPLEX SCLR
2325 INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ),
2326 $ descx( dlen_ ), descy( dlen_ )
2327 COMPLEX A( 2, 2 ), B( 2, 2 ), C( 2, 2 ), X( 2 ), Y( 2 )
2328 COMMON /pblasc/diag, side, transa, transb, uplo
2329 COMMON /pblasd/desca, descb, descc, descx, descy
2330 COMMON /pblasi/ia, ib, ic, incx, incy, isclr, ix, iy,
2331 $ ja, jb, jc, jx, jy
2332 COMMON /pblasm/a, b, c
2333 COMMON /pblasn/kdim, mdim, ndim
2334 COMMON /pblass/sclr, usclr
2335 COMMON /pblasv/x, y
2336* ..
2337* .. Executable Statements ..
2338*
2339* Level 1 PBLAS
2340*
2341 IF( scode.EQ.11 ) THEN
2342*
2343 CALL subptr( ndim, x, ix, jx, descx, incx, y, iy, jy, descy,
2344 $ incy )
2345*
2346 ELSE IF( scode.EQ.12 ) THEN
2347*
2348 CALL subptr( ndim, sclr, x, ix, jx, descx, incx )
2349*
2350 ELSE IF( scode.EQ.13 ) THEN
2351*
2352 CALL subptr( ndim, sclr, x, ix, jx, descx, incx, y, iy, jy,
2353 $ descy, incy )
2354*
2355 ELSE IF( scode.EQ.14 ) THEN
2356*
2357 CALL subptr( ndim, sclr, isclr, x, ix, jx, descx, incx )
2358*
2359 ELSE IF( scode.EQ.15 ) THEN
2360*
2361 CALL subptr( ndim, usclr, x, ix, jx, descx, incx )
2362*
2363* Level 2 PBLAS
2364*
2365 ELSE IF( scode.EQ.21 ) THEN
2366*
2367 CALL subptr( transa, mdim, ndim, sclr, a, ia, ja, desca, x, ix,
2368 $ jx, descx, incx, sclr, y, iy, jy, descy, incy )
2369*
2370 ELSE IF( scode.EQ.22 ) THEN
2371*
2372 CALL subptr( uplo, ndim, sclr, a, ia, ja, desca, x, ix, jx,
2373 $ descx, incx, sclr, y, iy, jy, descy, incy )
2374*
2375 ELSE IF( scode.EQ.23 ) THEN
2376*
2377 CALL subptr( uplo, transa, diag, ndim, a, ia, ja, desca, x, ix,
2378 $ jx, descx, incx )
2379*
2380 ELSE IF( scode.EQ.24 ) THEN
2381*
2382 CALL subptr( mdim, ndim, sclr, x, ix, jx, descx, incx, y, iy,
2383 $ jy, descy, incy, a, ia, ja, desca )
2384*
2385 ELSE IF( scode.EQ.25 ) THEN
2386*
2387 CALL subptr( uplo, ndim, sclr, x, ix, jx, descx, incx, a, ia,
2388 $ ja, desca )
2389*
2390 ELSE IF( scode.EQ.26 ) THEN
2391*
2392 CALL subptr( uplo, ndim, usclr, x, ix, jx, descx, incx, a, ia,
2393 $ ja, desca )
2394*
2395 ELSE IF( scode.EQ.27 ) THEN
2396*
2397 CALL subptr( uplo, ndim, sclr, x, ix, jx, descx, incx, y, iy,
2398 $ jy, descy, incy, a, ia, ja, desca )
2399*
2400* Level 3 PBLAS
2401*
2402 ELSE IF( scode.EQ.31 ) THEN
2403*
2404 CALL subptr( transa, transb, mdim, ndim, kdim, sclr, a, ia, ja,
2405 $ desca, b, ib, jb, descb, sclr, c, ic, jc, descc )
2406*
2407 ELSE IF( scode.EQ.32 ) THEN
2408*
2409 CALL subptr( side, uplo, mdim, ndim, sclr, a, ia, ja, desca, b,
2410 $ ib, jb, descb, sclr, c, ic, jc, descc )
2411*
2412 ELSE IF( scode.EQ.33 ) THEN
2413*
2414 CALL subptr( uplo, transa, ndim, kdim, sclr, a, ia, ja, desca,
2415 $ sclr, c, ic, jc, descc )
2416*
2417 ELSE IF( scode.EQ.34 ) THEN
2418*
2419 CALL subptr( uplo, transa, ndim, kdim, usclr, a, ia, ja, desca,
2420 $ usclr, c, ic, jc, descc )
2421*
2422 ELSE IF( scode.EQ.35 ) THEN
2423*
2424 CALL subptr( uplo, transa, ndim, kdim, sclr, a, ia, ja, desca,
2425 $ b, ib, jb, descb, sclr, c, ic, jc, descc )
2426*
2427 ELSE IF( scode.EQ.36 ) THEN
2428*
2429 CALL subptr( uplo, transa, ndim, kdim, sclr, a, ia, ja, desca,
2430 $ b, ib, jb, descb, usclr, c, ic, jc, descc )
2431*
2432 ELSE IF( scode.EQ.37 ) THEN
2433*
2434 CALL subptr( mdim, ndim, sclr, a, ia, ja, desca, sclr, c, ic,
2435 $ jc, descc )
2436*
2437 ELSE IF( scode.EQ.38 ) THEN
2438*
2439 CALL subptr( side, uplo, transa, diag, mdim, ndim, sclr, a, ia,
2440 $ ja, desca, b, ib, jb, descb )
2441*
2442 ELSE IF( scode.EQ.39 ) THEN
2443*
2444 CALL subptr( transa, mdim, ndim, sclr, a, ia, ja, desca, sclr,
2445 $ c, ic, jc, descc )
2446*
2447 ELSE IF( scode.EQ.40 ) THEN
2448*
2449 CALL subptr( uplo, transa, mdim, ndim, sclr, a, ia, ja, desca,
2450 $ sclr, c, ic, jc, descc )
2451*
2452 END IF
2453*
2454 RETURN
2455*
2456* End of PCCALLSUB
2457*
2458 END
2459 SUBROUTINE pcerrset( ERR, ERRMAX, XTRUE, X )
2460*
2461* -- PBLAS test routine (version 2.0) --
2462* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2463* and University of California, Berkeley.
2464* April 1, 1998
2465*
2466* .. Scalar Arguments ..
2467 REAL ERR, ERRMAX
2468 COMPLEX X, XTRUE
2469* ..
2470*
2471* Purpose
2472* =======
2473*
2474* PCERRSET computes the absolute difference ERR = |XTRUE - X| and com-
2475* pares it with zero. ERRMAX accumulates the absolute error difference.
2476*
2477* Notes
2478* =====
2479*
2480* A description vector is associated with each 2D block-cyclicly dis-
2481* tributed matrix. This vector stores the information required to
2482* establish the mapping between a matrix entry and its corresponding
2483* process and memory location.
2484*
2485* In the following comments, the character _ should be read as
2486* "of the distributed matrix". Let A be a generic term for any 2D
2487* block cyclicly distributed matrix. Its description vector is DESCA:
2488*
2489* NOTATION STORED IN EXPLANATION
2490* ---------------- --------------- ------------------------------------
2491* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2492* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2493* the NPROW x NPCOL BLACS process grid
2494* A is distributed over. The context
2495* itself is global, but the handle
2496* (the integer value) may vary.
2497* M_A (global) DESCA( M_ ) The number of rows in the distribu-
2498* ted matrix A, M_A >= 0.
2499* N_A (global) DESCA( N_ ) The number of columns in the distri-
2500* buted matrix A, N_A >= 0.
2501* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2502* block of the matrix A, IMB_A > 0.
2503* INB_A (global) DESCA( INB_ ) The number of columns of the upper
2504* left block of the matrix A,
2505* INB_A > 0.
2506* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2507* bute the last M_A-IMB_A rows of A,
2508* MB_A > 0.
2509* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2510* bute the last N_A-INB_A columns of
2511* A, NB_A > 0.
2512* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2513* row of the matrix A is distributed,
2514* NPROW > RSRC_A >= 0.
2515* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2516* first column of A is distributed.
2517* NPCOL > CSRC_A >= 0.
2518* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2519* array storing the local blocks of
2520* the distributed matrix A,
2521* IF( Lc( 1, N_A ) > 0 )
2522* LLD_A >= MAX( 1, Lr( 1, M_A ) )
2523* ELSE
2524* LLD_A >= 1.
2525*
2526* Let K be the number of rows of a matrix A starting at the global in-
2527* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2528* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2529* receive if these K rows were distributed over NPROW processes. If K
2530* is the number of columns of a matrix A starting at the global index
2531* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2532* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2533* these K columns were distributed over NPCOL processes.
2534*
2535* The values of Lr() and Lc() may be determined via a call to the func-
2536* tion PB_NUMROC:
2537* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2538* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2539*
2540* Arguments
2541* =========
2542*
2543* ERR (local output) REAL
2544* On exit, ERR specifies the absolute difference |XTRUE - X|.
2545*
2546* ERRMAX (local input/local output) REAL
2547* On entry, ERRMAX specifies a previously computed error. On
2548* exit ERRMAX is the accumulated error MAX( ERRMAX, ERR ).
2549*
2550* XTRUE (local input) COMPLEX
2551* On entry, XTRUE specifies the true value.
2552*
2553* X (local input) COMPLEX
2554* On entry, X specifies the value to be compared to XTRUE.
2555*
2556* -- Written on April 1, 1998 by
2557* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2558*
2559* =====================================================================
2560*
2561* .. External Functions ..
2562 REAL PSDIFF
2563 EXTERNAL PSDIFF
2564* ..
2565* .. Intrinsic Functions ..
2566 INTRINSIC abs, aimag, max, real
2567* ..
2568* .. Executable Statements ..
2569*
2570 err = abs( psdiff( real( xtrue ), real( x ) ) )
2571 err = max( err, abs( psdiff( aimag( xtrue ), aimag( x ) ) ) )
2572*
2573 errmax = max( errmax, err )
2574*
2575 RETURN
2576*
2577* End of PCERRSET
2578*
2579 END
2580 SUBROUTINE pcchkvin( ERRMAX, N, X, PX, IX, JX, DESCX, INCX,
2581 $ INFO )
2582*
2583* -- PBLAS test routine (version 2.0) --
2584* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2585* and University of California, Berkeley.
2586* April 1, 1998
2587*
2588* .. Scalar Arguments ..
2589 INTEGER INCX, INFO, IX, JX, N
2590 REAL ERRMAX
2591* ..
2592* .. Array Arguments ..
2593 INTEGER DESCX( * )
2594 COMPLEX PX( * ), X( * )
2595* ..
2596*
2597* Purpose
2598* =======
2599*
2600* PCCHKVIN checks that the submatrix sub( PX ) remained unchanged. The
2601* local array entries are compared element by element, and their dif-
2602* ference is tested against 0.0 as well as the epsilon machine. Notice
2603* that this difference should be numerically exactly the zero machine,
2604* but because of the possible fluctuation of some of the data we flag-
2605* ged differently a difference less than twice the epsilon machine. The
2606* largest error is also returned.
2607*
2608* Notes
2609* =====
2610*
2611* A description vector is associated with each 2D block-cyclicly dis-
2612* tributed matrix. This vector stores the information required to
2613* establish the mapping between a matrix entry and its corresponding
2614* process and memory location.
2615*
2616* In the following comments, the character _ should be read as
2617* "of the distributed matrix". Let A be a generic term for any 2D
2618* block cyclicly distributed matrix. Its description vector is DESCA:
2619*
2620* NOTATION STORED IN EXPLANATION
2621* ---------------- --------------- ------------------------------------
2622* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2623* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2624* the NPROW x NPCOL BLACS process grid
2625* A is distributed over. The context
2626* itself is global, but the handle
2627* (the integer value) may vary.
2628* M_A (global) DESCA( M_ ) The number of rows in the distribu-
2629* ted matrix A, M_A >= 0.
2630* N_A (global) DESCA( N_ ) The number of columns in the distri-
2631* buted matrix A, N_A >= 0.
2632* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2633* block of the matrix A, IMB_A > 0.
2634* INB_A (global) DESCA( INB_ ) The number of columns of the upper
2635* left block of the matrix A,
2636* INB_A > 0.
2637* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2638* bute the last M_A-IMB_A rows of A,
2639* MB_A > 0.
2640* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2641* bute the last N_A-INB_A columns of
2642* A, NB_A > 0.
2643* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2644* row of the matrix A is distributed,
2645* NPROW > RSRC_A >= 0.
2646* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2647* first column of A is distributed.
2648* NPCOL > CSRC_A >= 0.
2649* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2650* array storing the local blocks of
2651* the distributed matrix A,
2652* IF( Lc( 1, N_A ) > 0 )
2653* LLD_A >= MAX( 1, Lr( 1, M_A ) )
2654* ELSE
2655* LLD_A >= 1.
2656*
2657* Let K be the number of rows of a matrix A starting at the global in-
2658* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2659* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2660* receive if these K rows were distributed over NPROW processes. If K
2661* is the number of columns of a matrix A starting at the global index
2662* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2663* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2664* these K columns were distributed over NPCOL processes.
2665*
2666* The values of Lr() and Lc() may be determined via a call to the func-
2667* tion PB_NUMROC:
2668* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2669* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2670*
2671* Arguments
2672* =========
2673*
2674* ERRMAX (global output) REAL
2675* On exit, ERRMAX specifies the largest absolute element-wise
2676* difference between sub( X ) and sub( PX ).
2677*
2678* N (global input) INTEGER
2679* On entry, N specifies the length of the subvector operand
2680* sub( X ). N must be at least zero.
2681*
2682* X (local input) COMPLEX array
2683* On entry, X is an array of dimension (DESCX( M_ ),*). This
2684* array contains a local copy of the initial entire matrix PX.
2685*
2686* PX (local input) COMPLEX array
2687* On entry, PX is an array of dimension (DESCX( LLD_ ),*). This
2688* array contains the local entries of the matrix PX.
2689*
2690* IX (global input) INTEGER
2691* On entry, IX specifies X's global row index, which points to
2692* the beginning of the submatrix sub( X ).
2693*
2694* JX (global input) INTEGER
2695* On entry, JX specifies X's global column index, which points
2696* to the beginning of the submatrix sub( X ).
2697*
2698* DESCX (global and local input) INTEGER array
2699* On entry, DESCX is an integer array of dimension DLEN_. This
2700* is the array descriptor for the matrix X.
2701*
2702* INCX (global input) INTEGER
2703* On entry, INCX specifies the global increment for the
2704* elements of X. Only two values of INCX are supported in
2705* this version, namely 1 and M_X. INCX must not be zero.
2706*
2707* INFO (global output) INTEGER
2708* On exit, if INFO = 0, no error has been found,
2709* If INFO > 0, the maximum abolute error found is in (0,eps],
2710* If INFO < 0, the maximum abolute error found is in (eps,+oo).
2711*
2712* -- Written on April 1, 1998 by
2713* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
2714*
2715* =====================================================================
2716*
2717* .. Parameters ..
2718 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
2719 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
2720 $ RSRC_
2721 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
2722 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
2723 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
2724 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
2725 REAL ZERO
2726 PARAMETER ( ZERO = 0.0e+0 )
2727* ..
2728* .. Local Scalars ..
2729 LOGICAL COLREP, ROWREP
2730 INTEGER I, IB, ICTXT, ICURCOL, ICURROW, IIX, IN, IXCOL,
2731 $ IXROW, J, JB, JJX, JN, KK, LDPX, LDX, LL,
2732 $ MYCOL, MYROW, NPCOL, NPROW
2733 REAL ERR, EPS
2734* ..
2735* .. External Subroutines ..
2736 EXTERNAL blacs_gridinfo, pb_infog2l, pcerrset, sgamx2d
2737* ..
2738* .. External Functions ..
2739 REAL PSLAMCH
2740 EXTERNAL pslamch
2741* ..
2742* .. Intrinsic Functions ..
2743 INTRINSIC abs, aimag, max, min, mod, real
2744* ..
2745* .. Executable Statements ..
2746*
2747 info = 0
2748 errmax = zero
2749*
2750* Quick return if possible
2751*
2752 IF( n.LE.0 )
2753 $ RETURN
2754*
2755 ictxt = descx( ctxt_ )
2756 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
2757*
2758 eps = pslamch( ictxt, 'eps' )
2759*
2760 CALL pb_infog2l( ix, jx, descx, nprow, npcol, myrow, mycol, iix,
2761 $ jjx, ixrow, ixcol )
2762*
2763 ldx = descx( m_ )
2764 ldpx = descx( lld_ )
2765 rowrep = ( ixrow.EQ.-1 )
2766 colrep = ( ixcol.EQ.-1 )
2767*
2768 IF( n.EQ.1 ) THEN
2769*
2770 IF( ( myrow.EQ.ixrow .OR. rowrep ) .AND.
2771 $ ( mycol.EQ.ixcol .OR. colrep ) )
2772 $ CALL pcerrset( err, errmax, x( ix+(jx-1)*ldx ),
2773 $ px( iix+(jjx-1)*ldpx ) )
2774*
2775 ELSE IF( incx.EQ.descx( m_ ) ) THEN
2776*
2777* sub( X ) is a row vector
2778*
2779 jb = descx( inb_ ) - jx + 1
2780 IF( jb.LE.0 )
2781 $ jb = ( ( -jb ) / descx( nb_ ) + 1 ) * descx( nb_ ) + jb
2782 jb = min( jb, n )
2783 jn = jx + jb - 1
2784*
2785 IF( myrow.EQ.ixrow .OR. rowrep ) THEN
2786*
2787 icurcol = ixcol
2788 IF( mycol.EQ.icurcol .OR. colrep ) THEN
2789 DO 10 j = jx, jn
2790 CALL pcerrset( err, errmax, x( ix+(j-1)*ldx ),
2791 $ px( iix+(jjx-1)*ldpx ) )
2792 jjx = jjx + 1
2793 10 CONTINUE
2794 END IF
2795 icurcol = mod( icurcol+1, npcol )
2796*
2797 DO 30 j = jn+1, jx+n-1, descx( nb_ )
2798 jb = min( jx+n-j, descx( nb_ ) )
2799*
2800 IF( mycol.EQ.icurcol .OR. colrep ) THEN
2801*
2802 DO 20 kk = 0, jb-1
2803 CALL pcerrset( err, errmax, x( ix+(j+kk-1)*ldx ),
2804 $ px( iix+(jjx+kk-1)*ldpx ) )
2805 20 CONTINUE
2806*
2807 jjx = jjx + jb
2808*
2809 END IF
2810*
2811 icurcol = mod( icurcol+1, npcol )
2812*
2813 30 CONTINUE
2814*
2815 END IF
2816*
2817 ELSE
2818*
2819* sub( X ) is a column vector
2820*
2821 ib = descx( imb_ ) - ix + 1
2822 IF( ib.LE.0 )
2823 $ ib = ( ( -ib ) / descx( mb_ ) + 1 ) * descx( mb_ ) + ib
2824 ib = min( ib, n )
2825 in = ix + ib - 1
2826*
2827 IF( mycol.EQ.ixcol .OR. colrep ) THEN
2828*
2829 icurrow = ixrow
2830 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
2831 DO 40 i = ix, in
2832 CALL pcerrset( err, errmax, x( i+(jx-1)*ldx ),
2833 $ px( iix+(jjx-1)*ldpx ) )
2834 iix = iix + 1
2835 40 CONTINUE
2836 END IF
2837 icurrow = mod( icurrow+1, nprow )
2838*
2839 DO 60 i = in+1, ix+n-1, descx( mb_ )
2840 ib = min( ix+n-i, descx( mb_ ) )
2841*
2842 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
2843*
2844 DO 50 kk = 0, ib-1
2845 CALL pcerrset( err, errmax, x( i+kk+(jx-1)*ldx ),
2846 $ px( iix+kk+(jjx-1)*ldpx ) )
2847 50 CONTINUE
2848*
2849 iix = iix + ib
2850*
2851 END IF
2852*
2853 icurrow = mod( icurrow+1, nprow )
2854*
2855 60 CONTINUE
2856*
2857 END IF
2858*
2859 END IF
2860*
2861 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
2862 $ -1, -1 )
2863*
2864 IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
2865 info = 1
2866 ELSE IF( errmax.GT.eps ) THEN
2867 info = -1
2868 END IF
2869*
2870 RETURN
2871*
2872* End of PCCHKVIN
2873*
2874 END
2875 SUBROUTINE pcchkvout( N, X, PX, IX, JX, DESCX, INCX, INFO )
2876*
2877* -- PBLAS test routine (version 2.0) --
2878* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
2879* and University of California, Berkeley.
2880* April 1, 1998
2881*
2882* .. Scalar Arguments ..
2883 INTEGER INCX, INFO, IX, JX, N
2884* ..
2885* .. Array Arguments ..
2886 INTEGER DESCX( * )
2887 COMPLEX PX( * ), X( * )
2888* ..
2889*
2890* Purpose
2891* =======
2892*
2893* PCCHKVOUT checks that the matrix PX \ sub( PX ) remained unchanged.
2894* The local array entries are compared element by element, and their
2895* difference is tested against 0.0 as well as the epsilon machine. No-
2896* tice that this difference should be numerically exactly the zero ma-
2897* chine, but because of the possible movement of some of the data we
2898* flagged differently a difference less than twice the epsilon machine.
2899* The largest error is reported.
2900*
2901* Notes
2902* =====
2903*
2904* A description vector is associated with each 2D block-cyclicly dis-
2905* tributed matrix. This vector stores the information required to
2906* establish the mapping between a matrix entry and its corresponding
2907* process and memory location.
2908*
2909* In the following comments, the character _ should be read as
2910* "of the distributed matrix". Let A be a generic term for any 2D
2911* block cyclicly distributed matrix. Its description vector is DESCA:
2912*
2913* NOTATION STORED IN EXPLANATION
2914* ---------------- --------------- ------------------------------------
2915* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
2916* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
2917* the NPROW x NPCOL BLACS process grid
2918* A is distributed over. The context
2919* itself is global, but the handle
2920* (the integer value) may vary.
2921* M_A (global) DESCA( M_ ) The number of rows in the distribu-
2922* ted matrix A, M_A >= 0.
2923* N_A (global) DESCA( N_ ) The number of columns in the distri-
2924* buted matrix A, N_A >= 0.
2925* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
2926* block of the matrix A, IMB_A > 0.
2927* INB_A (global) DESCA( INB_ ) The number of columns of the upper
2928* left block of the matrix A,
2929* INB_A > 0.
2930* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
2931* bute the last M_A-IMB_A rows of A,
2932* MB_A > 0.
2933* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
2934* bute the last N_A-INB_A columns of
2935* A, NB_A > 0.
2936* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
2937* row of the matrix A is distributed,
2938* NPROW > RSRC_A >= 0.
2939* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
2940* first column of A is distributed.
2941* NPCOL > CSRC_A >= 0.
2942* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
2943* array storing the local blocks of
2944* the distributed matrix A,
2945* IF( Lc( 1, N_A ) > 0 )
2946* LLD_A >= MAX( 1, Lr( 1, M_A ) )
2947* ELSE
2948* LLD_A >= 1.
2949*
2950* Let K be the number of rows of a matrix A starting at the global in-
2951* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
2952* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
2953* receive if these K rows were distributed over NPROW processes. If K
2954* is the number of columns of a matrix A starting at the global index
2955* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
2956* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
2957* these K columns were distributed over NPCOL processes.
2958*
2959* The values of Lr() and Lc() may be determined via a call to the func-
2960* tion PB_NUMROC:
2961* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
2962* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
2963*
2964* Arguments
2965* =========
2966*
2967* N (global input) INTEGER
2968* On entry, N specifies the length of the subvector operand
2969* sub( X ). N must be at least zero.
2970*
2971* X (local input) COMPLEX array
2972* On entry, X is an array of dimension (DESCX( M_ ),*). This
2973* array contains a local copy of the initial entire matrix PX.
2974*
2975* PX (local input) COMPLEX array
2976* On entry, PX is an array of dimension (DESCX( LLD_ ),*). This
2977* array contains the local entries of the matrix PX.
2978*
2979* IX (global input) INTEGER
2980* On entry, IX specifies X's global row index, which points to
2981* the beginning of the submatrix sub( X ).
2982*
2983* JX (global input) INTEGER
2984* On entry, JX specifies X's global column index, which points
2985* to the beginning of the submatrix sub( X ).
2986*
2987* DESCX (global and local input) INTEGER array
2988* On entry, DESCX is an integer array of dimension DLEN_. This
2989* is the array descriptor for the matrix X.
2990*
2991* INCX (global input) INTEGER
2992* On entry, INCX specifies the global increment for the
2993* elements of X. Only two values of INCX are supported in
2994* this version, namely 1 and M_X. INCX must not be zero.
2995*
2996* INFO (global output) INTEGER
2997* On exit, if INFO = 0, no error has been found,
2998* If INFO > 0, the maximum abolute error found is in (0,eps],
2999* If INFO < 0, the maximum abolute error found is in (eps,+oo).
3000*
3001* -- Written on April 1, 1998 by
3002* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
3003*
3004* =====================================================================
3005*
3006* .. Parameters ..
3007 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
3008 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
3009 $ RSRC_
3010 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
3011 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
3012 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
3013 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
3014 REAL ZERO
3015 PARAMETER ( ZERO = 0.0e+0 )
3016* ..
3017* .. Local Scalars ..
3018 LOGICAL COLREP, ROWREP
3019 INTEGER I, IB, ICTXT, ICURCOL, ICURROW, II, IMBX, INBX,
3020 $ J, JB, JJ, KK, LDPX, LDX, LL, MBX, MPALL,
3021 $ MYCOL, MYCOLDIST, MYROW, MYROWDIST, NBX, NPCOL,
3022 $ nprow, nqall
3023 REAL EPS, ERR, ERRMAX
3024* ..
3025* .. External Subroutines ..
3026 EXTERNAL BLACS_GRIDINFO, PCERRSET, SGAMX2D
3027* ..
3028* .. External Functions ..
3029 INTEGER PB_NUMROC
3030 REAL PSLAMCH
3031 EXTERNAL PSLAMCH, PB_NUMROC
3032* ..
3033* .. Intrinsic Functions ..
3034 INTRINSIC abs, aimag, max, min, mod, real
3035* ..
3036* .. Executable Statements ..
3037*
3038 info = 0
3039 errmax = zero
3040*
3041* Quick return if possible
3042*
3043 IF( ( descx( m_ ).LE.0 ).OR.( descx( n_ ).LE.0 ) )
3044 $ RETURN
3045*
3046* Start the operations
3047*
3048 ictxt = descx( ctxt_ )
3049 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
3050*
3051 eps = pslamch( ictxt, 'eps' )
3052*
3053 mpall = pb_numroc( descx( m_ ), 1, descx( imb_ ), descx( mb_ ),
3054 $ myrow, descx( rsrc_ ), nprow )
3055 nqall = pb_numroc( descx( n_ ), 1, descx( inb_ ), descx( nb_ ),
3056 $ mycol, descx( csrc_ ), npcol )
3057*
3058 mbx = descx( mb_ )
3059 nbx = descx( nb_ )
3060 ldx = descx( m_ )
3061 ldpx = descx( lld_ )
3062 icurrow = descx( rsrc_ )
3063 icurcol = descx( csrc_ )
3064 rowrep = ( icurrow.EQ.-1 )
3065 colrep = ( icurcol.EQ.-1 )
3066 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3067 imbx = descx( imb_ )
3068 ELSE
3069 imbx = mbx
3070 END IF
3071 IF( mycol.EQ.icurcol .OR. colrep ) THEN
3072 inbx = descx( inb_ )
3073 ELSE
3074 inbx = nbx
3075 END IF
3076 IF( rowrep ) THEN
3077 myrowdist = 0
3078 ELSE
3079 myrowdist = mod( myrow - icurrow + nprow, nprow )
3080 END IF
3081 IF( colrep ) THEN
3082 mycoldist = 0
3083 ELSE
3084 mycoldist = mod( mycol - icurcol + npcol, npcol )
3085 END IF
3086 ii = 1
3087 jj = 1
3088*
3089 IF( incx.EQ.descx( m_ ) ) THEN
3090*
3091* sub( X ) is a row vector
3092*
3093 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3094*
3095 i = 1
3096 IF( mycoldist.EQ.0 ) THEN
3097 j = 1
3098 ELSE
3099 j = descx( inb_ ) + ( mycoldist - 1 ) * nbx + 1
3100 END IF
3101 jb = min( max( 0, descx( n_ ) - j + 1 ), inbx )
3102 ib = min( descx( m_ ), descx( imb_ ) )
3103*
3104 DO 20 kk = 0, jb-1
3105 DO 10 ll = 0, ib-1
3106 IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR. j+kk.GT.jx+n-1 )
3107 $ CALL pcerrset( err, errmax,
3108 $ x( i+ll+(j+kk-1)*ldx ),
3109 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3110 10 CONTINUE
3111 20 CONTINUE
3112 IF( colrep ) THEN
3113 j = j + inbx
3114 ELSE
3115 j = j + inbx + ( npcol - 1 ) * nbx
3116 END IF
3117*
3118 DO 50 jj = inbx+1, nqall, nbx
3119 jb = min( nqall-jj+1, nbx )
3120*
3121 DO 40 kk = 0, jb-1
3122 DO 30 ll = 0, ib-1
3123 IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR.
3124 $ j+kk.GT.jx+n-1 )
3125 $ CALL pcerrset( err, errmax,
3126 $ x( i+ll+(j+kk-1)*ldx ),
3127 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3128 30 CONTINUE
3129 40 CONTINUE
3130*
3131 IF( colrep ) THEN
3132 j = j + nbx
3133 ELSE
3134 j = j + npcol * nbx
3135 END IF
3136*
3137 50 CONTINUE
3138*
3139 ii = ii + ib
3140*
3141 END IF
3142*
3143 icurrow = mod( icurrow + 1, nprow )
3144*
3145 DO 110 i = descx( imb_ ) + 1, descx( m_ ), mbx
3146 ib = min( descx( m_ ) - i + 1, mbx )
3147*
3148 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3149*
3150 IF( mycoldist.EQ.0 ) THEN
3151 j = 1
3152 ELSE
3153 j = descx( inb_ ) + ( mycoldist - 1 ) * nbx + 1
3154 END IF
3155*
3156 jj = 1
3157 jb = min( max( 0, descx( n_ ) - j + 1 ), inbx )
3158 DO 70 kk = 0, jb-1
3159 DO 60 ll = 0, ib-1
3160 IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR.
3161 $ j+kk.GT.jx+n-1 )
3162 $ CALL pcerrset( err, errmax,
3163 $ x( i+ll+(j+kk-1)*ldx ),
3164 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3165 60 CONTINUE
3166 70 CONTINUE
3167 IF( colrep ) THEN
3168 j = j + inbx
3169 ELSE
3170 j = j + inbx + ( npcol - 1 ) * nbx
3171 END IF
3172*
3173 DO 100 jj = inbx+1, nqall, nbx
3174 jb = min( nqall-jj+1, nbx )
3175*
3176 DO 90 kk = 0, jb-1
3177 DO 80 ll = 0, ib-1
3178 IF( i+ll.NE.ix .OR. j+kk.LT.jx .OR.
3179 $ j+kk.GT.jx+n-1 )
3180 $ CALL pcerrset( err, errmax,
3181 $ x( i+ll+(j+kk-1)*ldx ),
3182 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3183 80 CONTINUE
3184 90 CONTINUE
3185*
3186 IF( colrep ) THEN
3187 j = j + nbx
3188 ELSE
3189 j = j + npcol * nbx
3190 END IF
3191*
3192 100 CONTINUE
3193*
3194 ii = ii + ib
3195*
3196 END IF
3197*
3198 icurrow = mod( icurrow + 1, nprow )
3199*
3200 110 CONTINUE
3201*
3202 ELSE
3203*
3204* sub( X ) is a column vector
3205*
3206 IF( mycol.EQ.icurcol .OR. colrep ) THEN
3207*
3208 j = 1
3209 IF( myrowdist.EQ.0 ) THEN
3210 i = 1
3211 ELSE
3212 i = descx( imb_ ) + ( myrowdist - 1 ) * mbx + 1
3213 END IF
3214 ib = min( max( 0, descx( m_ ) - i + 1 ), imbx )
3215 jb = min( descx( n_ ), descx( inb_ ) )
3216*
3217 DO 130 kk = 0, jb-1
3218 DO 120 ll = 0, ib-1
3219 IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR. i+ll.GT.ix+n-1 )
3220 $ CALL pcerrset( err, errmax,
3221 $ x( i+ll+(j+kk-1)*ldx ),
3222 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3223 120 CONTINUE
3224 130 CONTINUE
3225 IF( rowrep ) THEN
3226 i = i + imbx
3227 ELSE
3228 i = i + imbx + ( nprow - 1 ) * mbx
3229 END IF
3230*
3231 DO 160 ii = imbx+1, mpall, mbx
3232 ib = min( mpall-ii+1, mbx )
3233*
3234 DO 150 kk = 0, jb-1
3235 DO 140 ll = 0, ib-1
3236 IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR.
3237 $ i+ll.GT.ix+n-1 )
3238 $ CALL pcerrset( err, errmax,
3239 $ x( i+ll+(j+kk-1)*ldx ),
3240 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3241 140 CONTINUE
3242 150 CONTINUE
3243*
3244 IF( rowrep ) THEN
3245 i = i + mbx
3246 ELSE
3247 i = i + nprow * mbx
3248 END IF
3249*
3250 160 CONTINUE
3251*
3252 jj = jj + jb
3253*
3254 END IF
3255*
3256 icurcol = mod( icurcol + 1, npcol )
3257*
3258 DO 220 j = descx( inb_ ) + 1, descx( n_ ), nbx
3259 jb = min( descx( n_ ) - j + 1, nbx )
3260*
3261 IF( mycol.EQ.icurcol .OR. colrep ) THEN
3262*
3263 IF( myrowdist.EQ.0 ) THEN
3264 i = 1
3265 ELSE
3266 i = descx( imb_ ) + ( myrowdist - 1 ) * mbx + 1
3267 END IF
3268*
3269 ii = 1
3270 ib = min( max( 0, descx( m_ ) - i + 1 ), imbx )
3271 DO 180 kk = 0, jb-1
3272 DO 170 ll = 0, ib-1
3273 IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR.
3274 $ i+ll.GT.ix+n-1 )
3275 $ CALL pcerrset( err, errmax,
3276 $ x( i+ll+(j+kk-1)*ldx ),
3277 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3278 170 CONTINUE
3279 180 CONTINUE
3280 IF( rowrep ) THEN
3281 i = i + imbx
3282 ELSE
3283 i = i + imbx + ( nprow - 1 ) * mbx
3284 END IF
3285*
3286 DO 210 ii = imbx+1, mpall, mbx
3287 ib = min( mpall-ii+1, mbx )
3288*
3289 DO 200 kk = 0, jb-1
3290 DO 190 ll = 0, ib-1
3291 IF( j+kk.NE.jx .OR. i+ll.LT.ix .OR.
3292 $ i+ll.GT.ix+n-1 )
3293 $ CALL pcerrset( err, errmax,
3294 $ x( i+ll+(j+kk-1)*ldx ),
3295 $ px( ii+ll+(jj+kk-1)*ldpx ) )
3296 190 CONTINUE
3297 200 CONTINUE
3298*
3299 IF( rowrep ) THEN
3300 i = i + mbx
3301 ELSE
3302 i = i + nprow * mbx
3303 END IF
3304*
3305 210 CONTINUE
3306*
3307 jj = jj + jb
3308*
3309 END IF
3310*
3311 icurcol = mod( icurcol + 1, npcol )
3312*
3313 220 CONTINUE
3314*
3315 END IF
3316*
3317 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
3318 $ -1, -1 )
3319*
3320 IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
3321 info = 1
3322 ELSE IF( errmax.GT.eps ) THEN
3323 info = -1
3324 END IF
3325*
3326 RETURN
3327*
3328* End of PCCHKVOUT
3329*
3330 END
3331 SUBROUTINE pcchkmin( ERRMAX, M, N, A, PA, IA, JA, DESCA, INFO )
3332*
3333* -- PBLAS test routine (version 2.0) --
3334* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
3335* and University of California, Berkeley.
3336* April 1, 1998
3337*
3338* .. Scalar Arguments ..
3339 INTEGER IA, INFO, JA, M, N
3340 REAL ERRMAX
3341* ..
3342* .. Array Arguments ..
3343 INTEGER DESCA( * )
3344 COMPLEX PA( * ), A( * )
3345* ..
3346*
3347* Purpose
3348* =======
3349*
3350* PCCHKMIN checks that the submatrix sub( PA ) remained unchanged. The
3351* local array entries are compared element by element, and their dif-
3352* ference is tested against 0.0 as well as the epsilon machine. Notice
3353* that this difference should be numerically exactly the zero machine,
3354* but because of the possible fluctuation of some of the data we flag-
3355* ged differently a difference less than twice the epsilon machine. The
3356* largest error is also returned.
3357*
3358* Notes
3359* =====
3360*
3361* A description vector is associated with each 2D block-cyclicly dis-
3362* tributed matrix. This vector stores the information required to
3363* establish the mapping between a matrix entry and its corresponding
3364* process and memory location.
3365*
3366* In the following comments, the character _ should be read as
3367* "of the distributed matrix". Let A be a generic term for any 2D
3368* block cyclicly distributed matrix. Its description vector is DESCA:
3369*
3370* NOTATION STORED IN EXPLANATION
3371* ---------------- --------------- ------------------------------------
3372* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
3373* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
3374* the NPROW x NPCOL BLACS process grid
3375* A is distributed over. The context
3376* itself is global, but the handle
3377* (the integer value) may vary.
3378* M_A (global) DESCA( M_ ) The number of rows in the distribu-
3379* ted matrix A, M_A >= 0.
3380* N_A (global) DESCA( N_ ) The number of columns in the distri-
3381* buted matrix A, N_A >= 0.
3382* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
3383* block of the matrix A, IMB_A > 0.
3384* INB_A (global) DESCA( INB_ ) The number of columns of the upper
3385* left block of the matrix A,
3386* INB_A > 0.
3387* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
3388* bute the last M_A-IMB_A rows of A,
3389* MB_A > 0.
3390* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
3391* bute the last N_A-INB_A columns of
3392* A, NB_A > 0.
3393* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
3394* row of the matrix A is distributed,
3395* NPROW > RSRC_A >= 0.
3396* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
3397* first column of A is distributed.
3398* NPCOL > CSRC_A >= 0.
3399* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
3400* array storing the local blocks of
3401* the distributed matrix A,
3402* IF( Lc( 1, N_A ) > 0 )
3403* LLD_A >= MAX( 1, Lr( 1, M_A ) )
3404* ELSE
3405* LLD_A >= 1.
3406*
3407* Let K be the number of rows of a matrix A starting at the global in-
3408* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
3409* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
3410* receive if these K rows were distributed over NPROW processes. If K
3411* is the number of columns of a matrix A starting at the global index
3412* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
3413* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
3414* these K columns were distributed over NPCOL processes.
3415*
3416* The values of Lr() and Lc() may be determined via a call to the func-
3417* tion PB_NUMROC:
3418* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
3419* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
3420*
3421* Arguments
3422* =========
3423*
3424* ERRMAX (global output) REAL
3425* On exit, ERRMAX specifies the largest absolute element-wise
3426* difference between sub( A ) and sub( PA ).
3427*
3428* M (global input) INTEGER
3429* On entry, M specifies the number of rows of the submatrix
3430* operand sub( A ). M must be at least zero.
3431*
3432* N (global input) INTEGER
3433* On entry, N specifies the number of columns of the submatrix
3434* operand sub( A ). N must be at least zero.
3435*
3436* A (local input) COMPLEX array
3437* On entry, A is an array of dimension (DESCA( M_ ),*). This
3438* array contains a local copy of the initial entire matrix PA.
3439*
3440* PA (local input) COMPLEX array
3441* On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
3442* array contains the local entries of the matrix PA.
3443*
3444* IA (global input) INTEGER
3445* On entry, IA specifies A's global row index, which points to
3446* the beginning of the submatrix sub( A ).
3447*
3448* JA (global input) INTEGER
3449* On entry, JA specifies A's global column index, which points
3450* to the beginning of the submatrix sub( A ).
3451*
3452* DESCA (global and local input) INTEGER array
3453* On entry, DESCA is an integer array of dimension DLEN_. This
3454* is the array descriptor for the matrix A.
3455*
3456* INFO (global output) INTEGER
3457* On exit, if INFO = 0, no error has been found,
3458* If INFO > 0, the maximum abolute error found is in (0,eps],
3459* If INFO < 0, the maximum abolute error found is in (eps,+oo).
3460*
3461* -- Written on April 1, 1998 by
3462* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
3463*
3464* =====================================================================
3465*
3466* .. Parameters ..
3467 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
3468 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
3469 $ RSRC_
3470 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
3471 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
3472 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
3473 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
3474 REAL ZERO
3475 PARAMETER ( ZERO = 0.0e+0 )
3476* ..
3477* .. Local Scalars ..
3478 LOGICAL COLREP, ROWREP
3479 INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL,
3480 $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K,
3481 $ KK, LDA, LDPA, LL, MYCOL, MYROW, NPCOL, NPROW
3482 REAL ERR, EPS
3483* ..
3484* .. External Subroutines ..
3485 EXTERNAL blacs_gridinfo, pb_infog2l, pcerrset, sgamx2d
3486* ..
3487* .. External Functions ..
3488 REAL PSLAMCH
3489 EXTERNAL pslamch
3490* ..
3491* .. Intrinsic Functions ..
3492 INTRINSIC abs, aimag, max, min, mod, real
3493* ..
3494* .. Executable Statements ..
3495*
3496 info = 0
3497 errmax = zero
3498*
3499* Quick return if posssible
3500*
3501 IF( ( m.EQ.0 ).OR.( n.EQ.0 ) )
3502 $ RETURN
3503*
3504* Start the operations
3505*
3506 ictxt = desca( ctxt_ )
3507 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
3508*
3509 eps = pslamch( ictxt, 'eps' )
3510*
3511 CALL pb_infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia,
3512 $ jja, iarow, iacol )
3513*
3514 ii = iia
3515 jj = jja
3516 lda = desca( m_ )
3517 ldpa = desca( lld_ )
3518 icurrow = iarow
3519 icurcol = iacol
3520 rowrep = ( iarow.EQ.-1 )
3521 colrep = ( iacol.EQ.-1 )
3522*
3523* Handle the first block of column separately
3524*
3525 jb = desca( inb_ ) - ja + 1
3526 IF( jb.LE.0 )
3527 $ jb = ( ( -jb ) / desca( nb_ ) + 1 ) * desca( nb_ ) + jb
3528 jb = min( jb, n )
3529 jn = ja + jb - 1
3530*
3531 IF( mycol.EQ.icurcol .OR. colrep ) THEN
3532*
3533 DO 40 h = 0, jb-1
3534 ib = desca( imb_ ) - ia + 1
3535 IF( ib.LE.0 )
3536 $ ib = ( ( -ib ) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
3537 ib = min( ib, m )
3538 in = ia + ib - 1
3539 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3540 DO 10 k = 0, ib-1
3541 CALL pcerrset( err, errmax, a( ia+k+(ja+h-1)*lda ),
3542 $ pa( ii+k+(jj+h-1)*ldpa ) )
3543 10 CONTINUE
3544 ii = ii + ib
3545 END IF
3546 icurrow = mod( icurrow+1, nprow )
3547*
3548* Loop over remaining block of rows
3549*
3550 DO 30 i = in+1, ia+m-1, desca( mb_ )
3551 ib = min( desca( mb_ ), ia+m-i )
3552 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3553 DO 20 k = 0, ib-1
3554 CALL pcerrset( err, errmax, a( i+k+(ja+h-1)*lda ),
3555 $ pa( ii+k+(jj+h-1)*ldpa ) )
3556 20 CONTINUE
3557 ii = ii + ib
3558 END IF
3559 icurrow = mod( icurrow+1, nprow )
3560 30 CONTINUE
3561*
3562 ii = iia
3563 icurrow = iarow
3564 40 CONTINUE
3565*
3566 jj = jj + jb
3567*
3568 END IF
3569*
3570 icurcol = mod( icurcol+1, npcol )
3571*
3572* Loop over remaining column blocks
3573*
3574 DO 90 j = jn+1, ja+n-1, desca( nb_ )
3575 jb = min( desca( nb_ ), ja+n-j )
3576 IF( mycol.EQ.icurcol .OR. colrep ) THEN
3577 DO 80 h = 0, jb-1
3578 ib = desca( imb_ ) - ia + 1
3579 IF( ib.LE.0 )
3580 $ ib = ( ( -ib ) / desca( mb_ ) + 1 )*desca( mb_ ) + ib
3581 ib = min( ib, m )
3582 in = ia + ib - 1
3583 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3584 DO 50 k = 0, ib-1
3585 CALL pcerrset( err, errmax, a( ia+k+(j+h-1)*lda ),
3586 $ pa( ii+k+(jj+h-1)*ldpa ) )
3587 50 CONTINUE
3588 ii = ii + ib
3589 END IF
3590 icurrow = mod( icurrow+1, nprow )
3591*
3592* Loop over remaining block of rows
3593*
3594 DO 70 i = in+1, ia+m-1, desca( mb_ )
3595 ib = min( desca( mb_ ), ia+m-i )
3596 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
3597 DO 60 k = 0, ib-1
3598 CALL pcerrset( err, errmax,
3599 $ a( i+k+(j+h-1)*lda ),
3600 $ pa( ii+k+(jj+h-1)*ldpa ) )
3601 60 CONTINUE
3602 ii = ii + ib
3603 END IF
3604 icurrow = mod( icurrow+1, nprow )
3605 70 CONTINUE
3606*
3607 ii = iia
3608 icurrow = iarow
3609 80 CONTINUE
3610*
3611 jj = jj + jb
3612 END IF
3613*
3614 icurcol = mod( icurcol+1, npcol )
3615*
3616 90 CONTINUE
3617*
3618 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
3619 $ -1, -1 )
3620*
3621 IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
3622 info = 1
3623 ELSE IF( errmax.GT.eps ) THEN
3624 info = -1
3625 END IF
3626*
3627 RETURN
3628*
3629* End of PCCHKMIN
3630*
3631 END
3632 SUBROUTINE pcchkmout( M, N, A, PA, IA, JA, DESCA, INFO )
3633*
3634* -- PBLAS test routine (version 2.0) --
3635* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
3636* and University of California, Berkeley.
3637* April 1, 1998
3638*
3639* .. Scalar Arguments ..
3640 INTEGER IA, INFO, JA, M, N
3641* ..
3642* .. Array Arguments ..
3643 INTEGER DESCA( * )
3644 COMPLEX A( * ), PA( * )
3645* ..
3646*
3647* Purpose
3648* =======
3649*
3650* PCCHKMOUT checks that the matrix PA \ sub( PA ) remained unchanged.
3651* The local array entries are compared element by element, and their
3652* difference is tested against 0.0 as well as the epsilon machine. No-
3653* tice that this difference should be numerically exactly the zero ma-
3654* chine, but because of the possible movement of some of the data we
3655* flagged differently a difference less than twice the epsilon machine.
3656* The largest error is reported.
3657*
3658* Notes
3659* =====
3660*
3661* A description vector is associated with each 2D block-cyclicly dis-
3662* tributed matrix. This vector stores the information required to
3663* establish the mapping between a matrix entry and its corresponding
3664* process and memory location.
3665*
3666* In the following comments, the character _ should be read as
3667* "of the distributed matrix". Let A be a generic term for any 2D
3668* block cyclicly distributed matrix. Its description vector is DESCA:
3669*
3670* NOTATION STORED IN EXPLANATION
3671* ---------------- --------------- ------------------------------------
3672* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
3673* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
3674* the NPROW x NPCOL BLACS process grid
3675* A is distributed over. The context
3676* itself is global, but the handle
3677* (the integer value) may vary.
3678* M_A (global) DESCA( M_ ) The number of rows in the distribu-
3679* ted matrix A, M_A >= 0.
3680* N_A (global) DESCA( N_ ) The number of columns in the distri-
3681* buted matrix A, N_A >= 0.
3682* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
3683* block of the matrix A, IMB_A > 0.
3684* INB_A (global) DESCA( INB_ ) The number of columns of the upper
3685* left block of the matrix A,
3686* INB_A > 0.
3687* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
3688* bute the last M_A-IMB_A rows of A,
3689* MB_A > 0.
3690* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
3691* bute the last N_A-INB_A columns of
3692* A, NB_A > 0.
3693* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
3694* row of the matrix A is distributed,
3695* NPROW > RSRC_A >= 0.
3696* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
3697* first column of A is distributed.
3698* NPCOL > CSRC_A >= 0.
3699* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
3700* array storing the local blocks of
3701* the distributed matrix A,
3702* IF( Lc( 1, N_A ) > 0 )
3703* LLD_A >= MAX( 1, Lr( 1, M_A ) )
3704* ELSE
3705* LLD_A >= 1.
3706*
3707* Let K be the number of rows of a matrix A starting at the global in-
3708* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
3709* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
3710* receive if these K rows were distributed over NPROW processes. If K
3711* is the number of columns of a matrix A starting at the global index
3712* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
3713* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
3714* these K columns were distributed over NPCOL processes.
3715*
3716* The values of Lr() and Lc() may be determined via a call to the func-
3717* tion PB_NUMROC:
3718* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
3719* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
3720*
3721* Arguments
3722* =========
3723*
3724* M (global input) INTEGER
3725* On entry, M specifies the number of rows of the submatrix
3726* sub( PA ). M must be at least zero.
3727*
3728* N (global input) INTEGER
3729* On entry, N specifies the number of columns of the submatrix
3730* sub( PA ). N must be at least zero.
3731*
3732* A (local input) COMPLEX array
3733* On entry, A is an array of dimension (DESCA( M_ ),*). This
3734* array contains a local copy of the initial entire matrix PA.
3735*
3736* PA (local input) COMPLEX array
3737* On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
3738* array contains the local entries of the matrix PA.
3739*
3740* IA (global input) INTEGER
3741* On entry, IA specifies A's global row index, which points to
3742* the beginning of the submatrix sub( A ).
3743*
3744* JA (global input) INTEGER
3745* On entry, JA specifies A's global column index, which points
3746* to the beginning of the submatrix sub( A ).
3747*
3748* DESCA (global and local input) INTEGER array
3749* On entry, DESCA is an integer array of dimension DLEN_. This
3750* is the array descriptor for the matrix A.
3751*
3752* INFO (global output) INTEGER
3753* On exit, if INFO = 0, no error has been found,
3754* If INFO > 0, the maximum abolute error found is in (0,eps],
3755* If INFO < 0, the maximum abolute error found is in (eps,+oo).
3756*
3757* -- Written on April 1, 1998 by
3758* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
3759*
3760* =====================================================================
3761*
3762* .. Parameters ..
3763 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
3764 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
3765 $ RSRC_
3766 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
3767 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
3768 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
3769 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
3770 REAL ZERO
3771 PARAMETER ( ZERO = 0.0e+0 )
3772* ..
3773* .. Local Scalars ..
3774 LOGICAL COLREP, ROWREP
3775 INTEGER I, IB, ICTXT, ICURCOL, II, IMBA, J, JB, JJ, KK,
3776 $ LDA, LDPA, LL, MPALL, MYCOL, MYROW, MYROWDIST,
3777 $ NPCOL, NPROW
3778 REAL EPS, ERR, ERRMAX
3779* ..
3780* .. External Subroutines ..
3781 EXTERNAL blacs_gridinfo, pcerrset, sgamx2d
3782* ..
3783* .. External Functions ..
3784 INTEGER PB_NUMROC
3785 REAL PSLAMCH
3786 EXTERNAL PSLAMCH, PB_NUMROC
3787* ..
3788* .. Intrinsic Functions ..
3789 INTRINSIC max, min, mod
3790* ..
3791* .. Executable Statements ..
3792*
3793 info = 0
3794 errmax = zero
3795*
3796* Quick return if possible
3797*
3798 IF( ( desca( m_ ).LE.0 ).OR.( desca( n_ ).LE.0 ) )
3799 $ RETURN
3800*
3801* Start the operations
3802*
3803 ictxt = desca( ctxt_ )
3804 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
3805*
3806 eps = pslamch( ictxt, 'eps' )
3807*
3808 mpall = pb_numroc( desca( m_ ), 1, desca( imb_ ), desca( mb_ ),
3809 $ myrow, desca( rsrc_ ), nprow )
3810*
3811 lda = desca( m_ )
3812 ldpa = desca( lld_ )
3813*
3814 ii = 1
3815 jj = 1
3816 rowrep = ( desca( rsrc_ ).EQ.-1 )
3817 colrep = ( desca( csrc_ ).EQ.-1 )
3818 icurcol = desca( csrc_ )
3819 IF( myrow.EQ.desca( rsrc_ ) .OR. rowrep ) THEN
3820 imba = desca( imb_ )
3821 ELSE
3822 imba = desca( mb_ )
3823 END IF
3824 IF( rowrep ) THEN
3825 myrowdist = 0
3826 ELSE
3827 myrowdist = mod( myrow - desca( rsrc_ ) + nprow, nprow )
3828 END IF
3829*
3830 IF( mycol.EQ.icurcol .OR. colrep ) THEN
3831*
3832 j = 1
3833 IF( myrowdist.EQ.0 ) THEN
3834 i = 1
3835 ELSE
3836 i = desca( imb_ ) + ( myrowdist - 1 ) * desca( mb_ ) + 1
3837 END IF
3838 ib = min( max( 0, desca( m_ ) - i + 1 ), imba )
3839 jb = min( desca( n_ ), desca( inb_ ) )
3840*
3841 DO 20 kk = 0, jb-1
3842 DO 10 ll = 0, ib-1
3843 IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3844 $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3845 $ CALL pcerrset( err, errmax, a( i+ll+(j+kk-1)*lda ),
3846 $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3847 10 CONTINUE
3848 20 CONTINUE
3849 IF( rowrep ) THEN
3850 i = i + imba
3851 ELSE
3852 i = i + imba + ( nprow - 1 ) * desca( mb_ )
3853 END IF
3854*
3855 DO 50 ii = imba + 1, mpall, desca( mb_ )
3856 ib = min( mpall-ii+1, desca( mb_ ) )
3857*
3858 DO 40 kk = 0, jb-1
3859 DO 30 ll = 0, ib-1
3860 IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3861 $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3862 $ CALL pcerrset( err, errmax,
3863 $ a( i+ll+(j+kk-1)*lda ),
3864 $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3865 30 CONTINUE
3866 40 CONTINUE
3867*
3868 IF( rowrep ) THEN
3869 i = i + desca( mb_ )
3870 ELSE
3871 i = i + nprow * desca( mb_ )
3872 END IF
3873*
3874 50 CONTINUE
3875*
3876 jj = jj + jb
3877*
3878 END IF
3879*
3880 icurcol = mod( icurcol + 1, npcol )
3881*
3882 DO 110 j = desca( inb_ ) + 1, desca( n_ ), desca( nb_ )
3883 jb = min( desca( n_ ) - j + 1, desca( nb_ ) )
3884*
3885 IF( mycol.EQ.icurcol .OR. colrep ) THEN
3886*
3887 IF( myrowdist.EQ.0 ) THEN
3888 i = 1
3889 ELSE
3890 i = desca( imb_ ) + ( myrowdist - 1 ) * desca( mb_ ) + 1
3891 END IF
3892*
3893 ii = 1
3894 ib = min( max( 0, desca( m_ ) - i + 1 ), imba )
3895 DO 70 kk = 0, jb-1
3896 DO 60 ll = 0, ib-1
3897 IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3898 $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3899 $ CALL pcerrset( err, errmax,
3900 $ a( i+ll+(j+kk-1)*lda ),
3901 $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3902 60 CONTINUE
3903 70 CONTINUE
3904 IF( rowrep ) THEN
3905 i = i + imba
3906 ELSE
3907 i = i + imba + ( nprow - 1 ) * desca( mb_ )
3908 END IF
3909*
3910 DO 100 ii = imba+1, mpall, desca( mb_ )
3911 ib = min( mpall-ii+1, desca( mb_ ) )
3912*
3913 DO 90 kk = 0, jb-1
3914 DO 80 ll = 0, ib-1
3915 IF( i+ll.LT.ia .OR. i+ll.GT.ia+m-1 .OR.
3916 $ j+kk.LT.ja .OR. j+kk.GT.ja+n-1 )
3917 $ CALL pcerrset( err, errmax,
3918 $ a( i+ll+(j+kk-1)*lda ),
3919 $ pa( ii+ll+(jj+kk-1)*ldpa ) )
3920 80 CONTINUE
3921 90 CONTINUE
3922*
3923 IF( rowrep ) THEN
3924 i = i + desca( mb_ )
3925 ELSE
3926 i = i + nprow * desca( mb_ )
3927 END IF
3928*
3929 100 CONTINUE
3930*
3931 jj = jj + jb
3932*
3933 END IF
3934*
3935 icurcol = mod( icurcol + 1, npcol )
3936* INSERT MODE
3937 110 CONTINUE
3938*
3939 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, errmax, 1, kk, ll, -1,
3940 $ -1, -1 )
3941*
3942 IF( errmax.GT.zero .AND. errmax.LE.eps ) THEN
3943 info = 1
3944 ELSE IF( errmax.GT.eps ) THEN
3945 info = -1
3946 END IF
3947*
3948 RETURN
3949*
3950* End of PCCHKMOUT
3951*
3952 END
3953 SUBROUTINE pcmprnt( ICTXT, NOUT, M, N, A, LDA, IRPRNT, ICPRNT,
3954 $ CMATNM )
3955*
3956* -- PBLAS test routine (version 2.0) --
3957* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
3958* and University of California, Berkeley.
3959* April 1, 1998
3960*
3961* .. Scalar Arguments ..
3962 INTEGER ICPRNT, ICTXT, IRPRNT, LDA, M, N, NOUT
3963* ..
3964* .. Array Arguments ..
3965 CHARACTER*(*) CMATNM
3966 COMPLEX A( LDA, * )
3967* ..
3968*
3969* Purpose
3970* =======
3971*
3972* PCMPRNT prints to the standard output an array A of size m by n. Only
3973* the process of coordinates ( IRPRNT, ICPRNT ) is printing.
3974*
3975* Arguments
3976* =========
3977*
3978* ICTXT (local input) INTEGER
3979* On entry, ICTXT specifies the BLACS context handle, indica-
3980* ting the global context of the operation. The context itself
3981* is global, but the value of ICTXT is local.
3982*
3983* NOUT (global input) INTEGER
3984* On entry, NOUT specifies the unit number for the output file.
3985* When NOUT is 6, output to screen, when NOUT is 0, output to
3986* stderr. NOUT is only defined for process 0.
3987*
3988* M (global input) INTEGER
3989* On entry, M specifies the number of rows of the matrix A. M
3990* must be at least zero.
3991*
3992* N (global input) INTEGER
3993* On entry, N specifies the number of columns of the matrix A.
3994* N must be at least zero.
3995*
3996* A (local input) COMPLEX array
3997* On entry, A is an array of dimension (LDA,N). The leading m
3998* by n part of this array is printed.
3999*
4000* LDA (local input) INTEGER
4001* On entry, LDA specifies the leading dimension of the local
4002* array A to be printed. LDA must be at least MAX( 1, M ).
4003*
4004* IRPRNT (global input) INTEGER
4005* On entry, IRPRNT specifies the process row coordinate of the
4006* printing process.
4007*
4008* ICPRNT (global input) INTEGER
4009* On entry, ICPRNT specifies the process column coordinate of
4010* the printing process.
4011*
4012* CMATNM (global input) CHARACTER*(*)
4013* On entry, CMATNM specifies the identifier of the matrix to be
4014* printed.
4015*
4016* -- Written on April 1, 1998 by
4017* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4018*
4019* =====================================================================
4020*
4021* .. Local Scalars ..
4022 INTEGER I, J, MYCOL, MYROW, NPCOL, NPROW
4023* ..
4024* .. External Subroutines ..
4025 EXTERNAL BLACS_GRIDINFO
4026* ..
4027* .. Intrinsic Functions ..
4028 INTRINSIC aimag, real
4029* ..
4030* .. Executable Statements ..
4031*
4032* Quick return if possible
4033*
4034 IF( ( m.LE.0 ).OR.( n.LE.0 ) )
4035 $ RETURN
4036*
4037* Get grid parameters
4038*
4039 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4040*
4041 IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
4042*
4043 WRITE( nout, fmt = * )
4044 DO 20 j = 1, n
4045*
4046 DO 10 i = 1, m
4047*
4048 WRITE( nout, fmt = 9999 ) cmatnm, i, j,
4049 $ real( a( i, j ) ), aimag( a( i, j ) )
4050*
4051 10 CONTINUE
4052*
4053 20 CONTINUE
4054*
4055 END IF
4056*
4057 9999 FORMAT( 1x, a, '(', i6, ',', i6, ')=', e16.8, '+i*(',
4058 $ e16.8, ')' )
4059*
4060 RETURN
4061*
4062* End of PCMPRNT
4063*
4064 END
4065 SUBROUTINE pcvprnt( ICTXT, NOUT, N, X, INCX, IRPRNT, ICPRNT,
4066 $ CVECNM )
4067*
4068* -- PBLAS test routine (version 2.0) --
4069* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4070* and University of California, Berkeley.
4071* April 1, 1998
4072*
4073* .. Scalar Arguments ..
4074 INTEGER ICPRNT, ICTXT, INCX, IRPRNT, N, NOUT
4075* ..
4076* .. Array Arguments ..
4077 CHARACTER*(*) CVECNM
4078 COMPLEX X( * )
4079* ..
4080*
4081* Purpose
4082* =======
4083*
4084* PCVPRNT prints to the standard output an vector x of length n. Only
4085* the process of coordinates ( IRPRNT, ICPRNT ) is printing.
4086*
4087* Arguments
4088* =========
4089*
4090* ICTXT (local input) INTEGER
4091* On entry, ICTXT specifies the BLACS context handle, indica-
4092* ting the global context of the operation. The context itself
4093* is global, but the value of ICTXT is local.
4094*
4095* NOUT (global input) INTEGER
4096* On entry, NOUT specifies the unit number for the output file.
4097* When NOUT is 6, output to screen, when NOUT is 0, output to
4098* stderr. NOUT is only defined for process 0.
4099*
4100* N (global input) INTEGER
4101* On entry, N specifies the length of the vector X. N must be
4102* at least zero.
4103*
4104* X (global input) COMPLEX array
4105* On entry, X is an array of dimension at least
4106* ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen-
4107* ted array X must contain the vector x.
4108*
4109* INCX (global input) INTEGER.
4110* On entry, INCX specifies the increment for the elements of X.
4111* INCX must not be zero.
4112*
4113* IRPRNT (global input) INTEGER
4114* On entry, IRPRNT specifies the process row coordinate of the
4115* printing process.
4116*
4117* ICPRNT (global input) INTEGER
4118* On entry, ICPRNT specifies the process column coordinate of
4119* the printing process.
4120*
4121* CVECNM (global input) CHARACTER*(*)
4122* On entry, CVECNM specifies the identifier of the vector to be
4123* printed.
4124*
4125* -- Written on April 1, 1998 by
4126* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4127*
4128* =====================================================================
4129*
4130* .. Local Scalars ..
4131 INTEGER I, MYCOL, MYROW, NPCOL, NPROW
4132* ..
4133* .. External Subroutines ..
4134 EXTERNAL BLACS_GRIDINFO
4135* ..
4136* .. Intrinsic Functions ..
4137 INTRINSIC aimag, real
4138* ..
4139* .. Executable Statements ..
4140*
4141* Quick return if possible
4142*
4143 IF( n.LE.0 )
4144 $ RETURN
4145*
4146* Get grid parameters
4147*
4148 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4149*
4150 IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
4151*
4152 WRITE( nout, fmt = * )
4153 DO 10 i = 1, 1 + ( n-1 )*incx, incx
4154*
4155 WRITE( nout, fmt = 9999 ) cvecnm, i, real( x( i ) ),
4156 $ aimag( x( i ) )
4157*
4158 10 CONTINUE
4159*
4160 END IF
4161*
4162 9999 FORMAT( 1x, a, '(', i6, ')=', e16.8, '+i*(', e16.8, ')' )
4163*
4164 RETURN
4165*
4166* End of PCVPRNT
4167*
4168 END
4169 SUBROUTINE pcmvch( ICTXT, TRANS, M, N, ALPHA, A, IA, JA, DESCA,
4170 $ X, IX, JX, DESCX, INCX, BETA, Y, PY, IY, JY,
4171 $ DESCY, INCY, G, ERR, INFO )
4172*
4173* -- PBLAS test routine (version 2.0) --
4174* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4175* and University of California, Berkeley.
4176* April 1, 1998
4177*
4178* .. Scalar Arguments ..
4179 CHARACTER*1 TRANS
4180 INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX,
4181 $ JY, M, N
4182 REAL ERR
4183 COMPLEX ALPHA, BETA
4184* ..
4185* .. Array Arguments ..
4186 INTEGER DESCA( * ), DESCX( * ), DESCY( * )
4187 REAL G( * )
4188 COMPLEX A( * ), PY( * ), X( * ), Y( * )
4189* ..
4190*
4191* Purpose
4192* =======
4193*
4194* PCMVCH checks the results of the computational tests.
4195*
4196* Notes
4197* =====
4198*
4199* A description vector is associated with each 2D block-cyclicly dis-
4200* tributed matrix. This vector stores the information required to
4201* establish the mapping between a matrix entry and its corresponding
4202* process and memory location.
4203*
4204* In the following comments, the character _ should be read as
4205* "of the distributed matrix". Let A be a generic term for any 2D
4206* block cyclicly distributed matrix. Its description vector is DESCA:
4207*
4208* NOTATION STORED IN EXPLANATION
4209* ---------------- --------------- ------------------------------------
4210* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
4211* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
4212* the NPROW x NPCOL BLACS process grid
4213* A is distributed over. The context
4214* itself is global, but the handle
4215* (the integer value) may vary.
4216* M_A (global) DESCA( M_ ) The number of rows in the distribu-
4217* ted matrix A, M_A >= 0.
4218* N_A (global) DESCA( N_ ) The number of columns in the distri-
4219* buted matrix A, N_A >= 0.
4220* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
4221* block of the matrix A, IMB_A > 0.
4222* INB_A (global) DESCA( INB_ ) The number of columns of the upper
4223* left block of the matrix A,
4224* INB_A > 0.
4225* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
4226* bute the last M_A-IMB_A rows of A,
4227* MB_A > 0.
4228* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
4229* bute the last N_A-INB_A columns of
4230* A, NB_A > 0.
4231* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
4232* row of the matrix A is distributed,
4233* NPROW > RSRC_A >= 0.
4234* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
4235* first column of A is distributed.
4236* NPCOL > CSRC_A >= 0.
4237* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
4238* array storing the local blocks of
4239* the distributed matrix A,
4240* IF( Lc( 1, N_A ) > 0 )
4241* LLD_A >= MAX( 1, Lr( 1, M_A ) )
4242* ELSE
4243* LLD_A >= 1.
4244*
4245* Let K be the number of rows of a matrix A starting at the global in-
4246* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
4247* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
4248* receive if these K rows were distributed over NPROW processes. If K
4249* is the number of columns of a matrix A starting at the global index
4250* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
4251* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
4252* these K columns were distributed over NPCOL processes.
4253*
4254* The values of Lr() and Lc() may be determined via a call to the func-
4255* tion PB_NUMROC:
4256* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
4257* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
4258*
4259* Arguments
4260* =========
4261*
4262* ICTXT (local input) INTEGER
4263* On entry, ICTXT specifies the BLACS context handle, indica-
4264* ting the global context of the operation. The context itself
4265* is global, but the value of ICTXT is local.
4266*
4267* TRANS (global input) CHARACTER*1
4268* On entry, TRANS specifies which matrix-vector product is to
4269* be computed as follows:
4270* If TRANS = 'T',
4271* sub( Y ) = BETA * sub( Y ) + sub( A )**T * sub( X ),
4272* else if TRANS = 'C',
4273* sub( Y ) = BETA * sub( Y ) + sub( A )**H * sub( X ),
4274* otherwise
4275* sub( Y ) = BETA * sub( Y ) + sub( A ) * sub( X ).
4276*
4277* M (global input) INTEGER
4278* On entry, M specifies the number of rows of the submatrix
4279* operand matrix A. M must be at least zero.
4280*
4281* N (global input) INTEGER
4282* On entry, N specifies the number of columns of the subma-
4283* trix operand matrix A. N must be at least zero.
4284*
4285* ALPHA (global input) COMPLEX
4286* On entry, ALPHA specifies the scalar alpha.
4287*
4288* A (local input) COMPLEX array
4289* On entry, A is an array of dimension (DESCA( M_ ),*). This
4290* array contains a local copy of the initial entire matrix PA.
4291*
4292* IA (global input) INTEGER
4293* On entry, IA specifies A's global row index, which points to
4294* the beginning of the submatrix sub( A ).
4295*
4296* JA (global input) INTEGER
4297* On entry, JA specifies A's global column index, which points
4298* to the beginning of the submatrix sub( A ).
4299*
4300* DESCA (global and local input) INTEGER array
4301* On entry, DESCA is an integer array of dimension DLEN_. This
4302* is the array descriptor for the matrix A.
4303*
4304* X (local input) COMPLEX array
4305* On entry, X is an array of dimension (DESCX( M_ ),*). This
4306* array contains a local copy of the initial entire matrix PX.
4307*
4308* IX (global input) INTEGER
4309* On entry, IX specifies X's global row index, which points to
4310* the beginning of the submatrix sub( X ).
4311*
4312* JX (global input) INTEGER
4313* On entry, JX specifies X's global column index, which points
4314* to the beginning of the submatrix sub( X ).
4315*
4316* DESCX (global and local input) INTEGER array
4317* On entry, DESCX is an integer array of dimension DLEN_. This
4318* is the array descriptor for the matrix X.
4319*
4320* INCX (global input) INTEGER
4321* On entry, INCX specifies the global increment for the
4322* elements of X. Only two values of INCX are supported in
4323* this version, namely 1 and M_X. INCX must not be zero.
4324*
4325* BETA (global input) COMPLEX
4326* On entry, BETA specifies the scalar beta.
4327*
4328* Y (local input/local output) COMPLEX array
4329* On entry, Y is an array of dimension (DESCY( M_ ),*). This
4330* array contains a local copy of the initial entire matrix PY.
4331*
4332* PY (local input) COMPLEX array
4333* On entry, PY is an array of dimension (DESCY( LLD_ ),*). This
4334* array contains the local entries of the matrix PY.
4335*
4336* IY (global input) INTEGER
4337* On entry, IY specifies Y's global row index, which points to
4338* the beginning of the submatrix sub( Y ).
4339*
4340* JY (global input) INTEGER
4341* On entry, JY specifies Y's global column index, which points
4342* to the beginning of the submatrix sub( Y ).
4343*
4344* DESCY (global and local input) INTEGER array
4345* On entry, DESCY is an integer array of dimension DLEN_. This
4346* is the array descriptor for the matrix Y.
4347*
4348* INCY (global input) INTEGER
4349* On entry, INCY specifies the global increment for the
4350* elements of Y. Only two values of INCY are supported in
4351* this version, namely 1 and M_Y. INCY must not be zero.
4352*
4353* G (workspace) REAL array
4354* On entry, G is an array of dimension at least MAX( M, N ). G
4355* is used to compute the gauges.
4356*
4357* ERR (global output) REAL
4358* On exit, ERR specifies the largest error in absolute value.
4359*
4360* INFO (global output) INTEGER
4361* On exit, if INFO <> 0, the result is less than half accurate.
4362*
4363* -- Written on April 1, 1998 by
4364* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4365*
4366* =====================================================================
4367*
4368* .. Parameters ..
4369 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
4370 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
4371 $ RSRC_
4372 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
4373 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
4374 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
4375 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
4376 REAL RZERO, RONE
4377 parameter( rzero = 0.0e+0, rone = 1.0e+0 )
4378 COMPLEX ZERO, ONE
4379 PARAMETER ( ZERO = ( 0.0e+0, 0.0e+0 ),
4380 $ one = ( 1.0e+0, 0.0e+0 ) )
4381* ..
4382* .. Local Scalars ..
4383 LOGICAL COLREP, CTRAN, ROWREP, TRAN
4384 INTEGER I, IB, ICURCOL, ICURROW, IIY, IN, IOFFA, IOFFX,
4385 $ ioffy, iycol, iyrow, j, jb, jjy, jn, kk, lda,
4386 $ ldpy, ldx, ldy, ml, mycol, myrow, nl, npcol,
4387 $ nprow
4388 REAL EPS, ERRI, GTMP
4389 COMPLEX C, TBETA, YTMP
4390* ..
4391* .. External Subroutines ..
4392 EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
4393* ..
4394* .. External Functions ..
4395 LOGICAL LSAME
4396 REAL PSLAMCH
4397 EXTERNAL lsame, pslamch
4398* ..
4399* .. Intrinsic Functions ..
4400 INTRINSIC abs, aimag, conjg, max, min, mod, real, sqrt
4401* ..
4402* .. Statement Functions ..
4403 REAL ABS1
4404 abs1( c ) = abs( real( c ) ) + abs( aimag( c ) )
4405* ..
4406* .. Executable Statements ..
4407*
4408 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4409*
4410 eps = pslamch( ictxt, 'eps' )
4411*
4412 IF( m.EQ.0 .OR. n.EQ.0 ) THEN
4413 tbeta = one
4414 ELSE
4415 tbeta = beta
4416 END IF
4417*
4418 tran = lsame( trans, 'T' )
4419 ctran = lsame( trans, 'C' )
4420 IF( tran.OR.ctran ) THEN
4421 ml = n
4422 nl = m
4423 ELSE
4424 ml = m
4425 nl = n
4426 END IF
4427*
4428 lda = max( 1, desca( m_ ) )
4429 ldx = max( 1, descx( m_ ) )
4430 ldy = max( 1, descy( m_ ) )
4431*
4432* Compute expected result in Y using data in A, X and Y.
4433* Compute gauges in G. This part of the computation is performed
4434* by every process in the grid.
4435*
4436 ioffy = iy + ( jy - 1 ) * ldy
4437 DO 40 i = 1, ml
4438 ytmp = zero
4439 gtmp = rzero
4440 ioffx = ix + ( jx - 1 ) * ldx
4441 IF( tran )THEN
4442 ioffa = ia + ( ja + i - 2 ) * lda
4443 DO 10 j = 1, nl
4444 ytmp = ytmp + a( ioffa ) * x( ioffx )
4445 gtmp = gtmp + abs1( a( ioffa ) ) * abs1( x( ioffx ) )
4446 ioffa = ioffa + 1
4447 ioffx = ioffx + incx
4448 10 CONTINUE
4449 ELSE IF( ctran )THEN
4450 ioffa = ia + ( ja + i - 2 ) * lda
4451 DO 20 j = 1, nl
4452 ytmp = ytmp + conjg( a( ioffa ) ) * x( ioffx )
4453 gtmp = gtmp + abs1( a( ioffa ) ) * abs1( x( ioffx ) )
4454 ioffa = ioffa + 1
4455 ioffx = ioffx + incx
4456 20 CONTINUE
4457 ELSE
4458 ioffa = ia + i - 1 + ( ja - 1 ) * lda
4459 DO 30 j = 1, nl
4460 ytmp = ytmp + a( ioffa ) * x( ioffx )
4461 gtmp = gtmp + abs1( a( ioffa ) ) * abs1( x( ioffx ) )
4462 ioffa = ioffa + lda
4463 ioffx = ioffx + incx
4464 30 CONTINUE
4465 END IF
4466 g( i ) = abs1( alpha )*gtmp + abs1( tbeta )*abs1( y( ioffy ) )
4467 y( ioffy ) = alpha * ytmp + tbeta * y( ioffy )
4468 ioffy = ioffy + incy
4469 40 CONTINUE
4470*
4471* Compute the error ratio for this result.
4472*
4473 err = rzero
4474 info = 0
4475 ldpy = descy( lld_ )
4476 ioffy = iy + ( jy - 1 ) * ldy
4477 CALL pb_infog2l( iy, jy, descy, nprow, npcol, myrow, mycol, iiy,
4478 $ jjy, iyrow, iycol )
4479 icurrow = iyrow
4480 icurcol = iycol
4481 rowrep = ( iyrow.EQ.-1 )
4482 colrep = ( iycol.EQ.-1 )
4483*
4484 IF( incy.EQ.descy( m_ ) ) THEN
4485*
4486* sub( Y ) is a row vector
4487*
4488 jb = descy( inb_ ) - jy + 1
4489 IF( jb.LE.0 )
4490 $ jb = ( ( -jb ) / descy( nb_ ) + 1 ) * descy( nb_ ) + jb
4491 jb = min( jb, ml )
4492 jn = jy + jb - 1
4493*
4494 DO 50 j = jy, jn
4495*
4496 IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4497 $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4498 erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) ) / eps
4499 IF( g( j-jy+1 ).NE.rzero )
4500 $ erri = erri / g( j-jy+1 )
4501 err = max( err, erri )
4502 IF( err*sqrt( eps ).GE.rone )
4503 $ info = 1
4504 jjy = jjy + 1
4505 END IF
4506*
4507 ioffy = ioffy + incy
4508*
4509 50 CONTINUE
4510*
4511 icurcol = mod( icurcol+1, npcol )
4512*
4513 DO 70 j = jn+1, jy+ml-1, descy( nb_ )
4514 jb = min( jy+ml-j, descy( nb_ ) )
4515*
4516 DO 60 kk = 0, jb-1
4517*
4518 IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4519 $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4520 erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) )/eps
4521 IF( g( j+kk-jy+1 ).NE.rzero )
4522 $ erri = erri / g( j+kk-jy+1 )
4523 err = max( err, erri )
4524 IF( err*sqrt( eps ).GE.rone )
4525 $ info = 1
4526 jjy = jjy + 1
4527 END IF
4528*
4529 ioffy = ioffy + incy
4530*
4531 60 CONTINUE
4532*
4533 icurcol = mod( icurcol+1, npcol )
4534*
4535 70 CONTINUE
4536*
4537 ELSE
4538*
4539* sub( Y ) is a column vector
4540*
4541 ib = descy( imb_ ) - iy + 1
4542 IF( ib.LE.0 )
4543 $ ib = ( ( -ib ) / descy( mb_ ) + 1 ) * descy( mb_ ) + ib
4544 ib = min( ib, ml )
4545 in = iy + ib - 1
4546*
4547 DO 80 i = iy, in
4548*
4549 IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4550 $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4551 erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) ) / eps
4552 IF( g( i-iy+1 ).NE.rzero )
4553 $ erri = erri / g( i-iy+1 )
4554 err = max( err, erri )
4555 IF( err*sqrt( eps ).GE.rone )
4556 $ info = 1
4557 iiy = iiy + 1
4558 END IF
4559*
4560 ioffy = ioffy + incy
4561*
4562 80 CONTINUE
4563*
4564 icurrow = mod( icurrow+1, nprow )
4565*
4566 DO 100 i = in+1, iy+ml-1, descy( mb_ )
4567 ib = min( iy+ml-i, descy( mb_ ) )
4568*
4569 DO 90 kk = 0, ib-1
4570*
4571 IF( ( myrow.EQ.icurrow .OR. rowrep ) .AND.
4572 $ ( mycol.EQ.icurcol .OR. colrep ) ) THEN
4573 erri = abs( py( iiy+(jjy-1)*ldpy ) - y( ioffy ) )/eps
4574 IF( g( i+kk-iy+1 ).NE.rzero )
4575 $ erri = erri / g( i+kk-iy+1 )
4576 err = max( err, erri )
4577 IF( err*sqrt( eps ).GE.rone )
4578 $ info = 1
4579 iiy = iiy + 1
4580 END IF
4581*
4582 ioffy = ioffy + incy
4583*
4584 90 CONTINUE
4585*
4586 icurrow = mod( icurrow+1, nprow )
4587*
4588 100 CONTINUE
4589*
4590 END IF
4591*
4592* If INFO = 0, all results are at least half accurate.
4593*
4594 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
4595 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
4596 $ mycol )
4597*
4598 RETURN
4599*
4600* End of PCMVCH
4601*
4602 END
4603 SUBROUTINE pcvmch( ICTXT, TRANS, UPLO, M, N, ALPHA, X, IX, JX,
4604 $ DESCX, INCX, Y, IY, JY, DESCY, INCY, A, PA,
4605 $ IA, JA, DESCA, G, ERR, INFO )
4606*
4607* -- PBLAS test routine (version 2.0) --
4608* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4609* and University of California, Berkeley.
4610* April 1, 1998
4611*
4612* .. Scalar Arguments ..
4613 CHARACTER*1 TRANS, UPLO
4614 INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX,
4615 $ JY, M, N
4616 REAL ERR
4617 COMPLEX ALPHA
4618* ..
4619* .. Array Arguments ..
4620 INTEGER DESCA( * ), DESCX( * ), DESCY( * )
4621 REAL G( * )
4622 COMPLEX A( * ), PA( * ), X( * ), Y( * )
4623* ..
4624*
4625* Purpose
4626* =======
4627*
4628* PCVMCH checks the results of the computational tests.
4629*
4630* Notes
4631* =====
4632*
4633* A description vector is associated with each 2D block-cyclicly dis-
4634* tributed matrix. This vector stores the information required to
4635* establish the mapping between a matrix entry and its corresponding
4636* process and memory location.
4637*
4638* In the following comments, the character _ should be read as
4639* "of the distributed matrix". Let A be a generic term for any 2D
4640* block cyclicly distributed matrix. Its description vector is DESCA:
4641*
4642* NOTATION STORED IN EXPLANATION
4643* ---------------- --------------- ------------------------------------
4644* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
4645* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
4646* the NPROW x NPCOL BLACS process grid
4647* A is distributed over. The context
4648* itself is global, but the handle
4649* (the integer value) may vary.
4650* M_A (global) DESCA( M_ ) The number of rows in the distribu-
4651* ted matrix A, M_A >= 0.
4652* N_A (global) DESCA( N_ ) The number of columns in the distri-
4653* buted matrix A, N_A >= 0.
4654* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
4655* block of the matrix A, IMB_A > 0.
4656* INB_A (global) DESCA( INB_ ) The number of columns of the upper
4657* left block of the matrix A,
4658* INB_A > 0.
4659* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
4660* bute the last M_A-IMB_A rows of A,
4661* MB_A > 0.
4662* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
4663* bute the last N_A-INB_A columns of
4664* A, NB_A > 0.
4665* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
4666* row of the matrix A is distributed,
4667* NPROW > RSRC_A >= 0.
4668* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
4669* first column of A is distributed.
4670* NPCOL > CSRC_A >= 0.
4671* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
4672* array storing the local blocks of
4673* the distributed matrix A,
4674* IF( Lc( 1, N_A ) > 0 )
4675* LLD_A >= MAX( 1, Lr( 1, M_A ) )
4676* ELSE
4677* LLD_A >= 1.
4678*
4679* Let K be the number of rows of a matrix A starting at the global in-
4680* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
4681* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
4682* receive if these K rows were distributed over NPROW processes. If K
4683* is the number of columns of a matrix A starting at the global index
4684* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
4685* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
4686* these K columns were distributed over NPCOL processes.
4687*
4688* The values of Lr() and Lc() may be determined via a call to the func-
4689* tion PB_NUMROC:
4690* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
4691* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
4692*
4693* Arguments
4694* =========
4695*
4696* ICTXT (local input) INTEGER
4697* On entry, ICTXT specifies the BLACS context handle, indica-
4698* ting the global context of the operation. The context itself
4699* is global, but the value of ICTXT is local.
4700*
4701* TRANS (global input) CHARACTER*1
4702* On entry, TRANS specifies the operation to be performed in
4703* the complex cases:
4704* if TRANS = 'C',
4705* sub( A ) := sub( A ) + alpha * sub( X ) * sub( Y )**H,
4706* otherwise
4707* sub( A ) := sub( A ) + alpha * sub( X ) * sub( Y )**T.
4708*
4709* UPLO (global input) CHARACTER*1
4710* On entry, UPLO specifies which part of the submatrix sub( A )
4711* is to be referenced as follows:
4712* If UPLO = 'L', only the lower triangular part,
4713* If UPLO = 'U', only the upper triangular part,
4714* else the entire matrix is to be referenced.
4715*
4716* M (global input) INTEGER
4717* On entry, M specifies the number of rows of the submatrix
4718* operand matrix A. M must be at least zero.
4719*
4720* N (global input) INTEGER
4721* On entry, N specifies the number of columns of the subma-
4722* trix operand matrix A. N must be at least zero.
4723*
4724* ALPHA (global input) COMPLEX
4725* On entry, ALPHA specifies the scalar alpha.
4726*
4727* X (local input) COMPLEX array
4728* On entry, X is an array of dimension (DESCX( M_ ),*). This
4729* array contains a local copy of the initial entire matrix PX.
4730*
4731* IX (global input) INTEGER
4732* On entry, IX specifies X's global row index, which points to
4733* the beginning of the submatrix sub( X ).
4734*
4735* JX (global input) INTEGER
4736* On entry, JX specifies X's global column index, which points
4737* to the beginning of the submatrix sub( X ).
4738*
4739* DESCX (global and local input) INTEGER array
4740* On entry, DESCX is an integer array of dimension DLEN_. This
4741* is the array descriptor for the matrix X.
4742*
4743* INCX (global input) INTEGER
4744* On entry, INCX specifies the global increment for the
4745* elements of X. Only two values of INCX are supported in
4746* this version, namely 1 and M_X. INCX must not be zero.
4747*
4748* Y (local input) COMPLEX array
4749* On entry, Y is an array of dimension (DESCY( M_ ),*). This
4750* array contains a local copy of the initial entire matrix PY.
4751*
4752* IY (global input) INTEGER
4753* On entry, IY specifies Y's global row index, which points to
4754* the beginning of the submatrix sub( Y ).
4755*
4756* JY (global input) INTEGER
4757* On entry, JY specifies Y's global column index, which points
4758* to the beginning of the submatrix sub( Y ).
4759*
4760* DESCY (global and local input) INTEGER array
4761* On entry, DESCY is an integer array of dimension DLEN_. This
4762* is the array descriptor for the matrix Y.
4763*
4764* INCY (global input) INTEGER
4765* On entry, INCY specifies the global increment for the
4766* elements of Y. Only two values of INCY are supported in
4767* this version, namely 1 and M_Y. INCY must not be zero.
4768*
4769* A (local input/local output) COMPLEX array
4770* On entry, A is an array of dimension (DESCA( M_ ),*). This
4771* array contains a local copy of the initial entire matrix PA.
4772*
4773* PA (local input) COMPLEX array
4774* On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
4775* array contains the local entries of the matrix PA.
4776*
4777* IA (global input) INTEGER
4778* On entry, IA specifies A's global row index, which points to
4779* the beginning of the submatrix sub( A ).
4780*
4781* JA (global input) INTEGER
4782* On entry, JA specifies A's global column index, which points
4783* to the beginning of the submatrix sub( A ).
4784*
4785* DESCA (global and local input) INTEGER array
4786* On entry, DESCA is an integer array of dimension DLEN_. This
4787* is the array descriptor for the matrix A.
4788*
4789* G (workspace) REAL array
4790* On entry, G is an array of dimension at least MAX( M, N ). G
4791* is used to compute the gauges.
4792*
4793* ERR (global output) REAL
4794* On exit, ERR specifies the largest error in absolute value.
4795*
4796* INFO (global output) INTEGER
4797* On exit, if INFO <> 0, the result is less than half accurate.
4798*
4799* -- Written on April 1, 1998 by
4800* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
4801*
4802* =====================================================================
4803*
4804* .. Parameters ..
4805 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
4806 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
4807 $ RSRC_
4808 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
4809 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
4810 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
4811 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
4812 REAL ZERO, ONE
4813 PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0 )
4814* ..
4815* .. Local Scalars ..
4816 LOGICAL COLREP, CTRAN, LOWER, ROWREP, UPPER
4817 INTEGER I, IACOL, IAROW, IB, IBEG, ICURROW, IEND, IIA,
4818 $ in, ioffa, ioffx, ioffy, j, jja, kk, lda, ldpa,
4819 $ ldx, ldy, mycol, myrow, npcol, nprow
4820 REAL EPS, ERRI, GTMP
4821 COMPLEX ATMP, C
4822* ..
4823* .. External Subroutines ..
4824 EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
4825* ..
4826* .. External Functions ..
4827 LOGICAL LSAME
4828 REAL PSLAMCH
4829 EXTERNAL LSAME, PSLAMCH
4830* ..
4831* .. Intrinsic Functions ..
4832 INTRINSIC abs, aimag, conjg, max, min, mod, real, sqrt
4833* ..
4834* .. Statement Functions ..
4835 REAL ABS1
4836 ABS1( C ) = abs( real( c ) ) + abs( aimag( c ) )
4837* ..
4838* .. Executable Statements ..
4839*
4840 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
4841*
4842 eps = pslamch( ictxt, 'eps' )
4843*
4844 ctran = lsame( trans, 'C' )
4845 upper = lsame( uplo, 'U' )
4846 lower = lsame( uplo, 'L' )
4847*
4848 lda = max( 1, desca( m_ ) )
4849 ldx = max( 1, descx( m_ ) )
4850 ldy = max( 1, descy( m_ ) )
4851*
4852* Compute expected result in A using data in A, X and Y.
4853* Compute gauges in G. This part of the computation is performed
4854* by every process in the grid.
4855*
4856 DO 70 j = 1, n
4857*
4858 ioffy = iy + ( jy - 1 ) * ldy + ( j - 1 ) * incy
4859*
4860 IF( lower ) THEN
4861 ibeg = j
4862 iend = m
4863 DO 10 i = 1, j-1
4864 g( i ) = zero
4865 10 CONTINUE
4866 ELSE IF( upper ) THEN
4867 ibeg = 1
4868 iend = j
4869 DO 20 i = j+1, m
4870 g( i ) = zero
4871 20 CONTINUE
4872 ELSE
4873 ibeg = 1
4874 iend = m
4875 END IF
4876*
4877 DO 30 i = ibeg, iend
4878*
4879 ioffx = ix + ( jx - 1 ) * ldx + ( i - 1 ) * incx
4880 ioffa = ia + i - 1 + ( ja + j - 2 ) * lda
4881 IF( ctran ) THEN
4882 atmp = x( ioffx ) * conjg( y( ioffy ) )
4883 ELSE
4884 atmp = x( ioffx ) * y( ioffy )
4885 END IF
4886 gtmp = abs1( x( ioffx ) ) * abs1( y( ioffy ) )
4887 g( i ) = abs1( alpha ) * gtmp + abs1( a( ioffa ) )
4888 a( ioffa ) = alpha * atmp + a( ioffa )
4889*
4890 30 CONTINUE
4891*
4892* Compute the error ratio for this result.
4893*
4894 info = 0
4895 err = zero
4896 ldpa = desca( lld_ )
4897 ioffa = ia + ( ja + j - 2 ) * lda
4898 CALL pb_infog2l( ia, ja+j-1, desca, nprow, npcol, myrow, mycol,
4899 $ iia, jja, iarow, iacol )
4900 rowrep = ( iarow.EQ.-1 )
4901 colrep = ( iacol.EQ.-1 )
4902*
4903 IF( mycol.EQ.iacol .OR. colrep ) THEN
4904*
4905 icurrow = iarow
4906 ib = desca( imb_ ) - ia + 1
4907 IF( ib.LE.0 )
4908 $ ib = ( ( -ib ) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
4909 ib = min( ib, m )
4910 in = ia + ib - 1
4911*
4912 DO 40 i = ia, in
4913*
4914 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
4915 erri = abs( pa( iia+(jja-1)*ldpa ) - a( ioffa ) )/eps
4916 IF( g( i-ia+1 ).NE.zero )
4917 $ erri = erri / g( i-ia+1 )
4918 err = max( err, erri )
4919 IF( err*sqrt( eps ).GE.one )
4920 $ info = 1
4921 iia = iia + 1
4922 END IF
4923*
4924 ioffa = ioffa + 1
4925*
4926 40 CONTINUE
4927*
4928 icurrow = mod( icurrow+1, nprow )
4929*
4930 DO 60 i = in+1, ia+m-1, desca( mb_ )
4931 ib = min( ia+m-i, desca( mb_ ) )
4932*
4933 DO 50 kk = 0, ib-1
4934*
4935 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
4936 erri = abs( pa( iia+(jja-1)*ldpa )-a( ioffa ) )/eps
4937 IF( g( i+kk-ia+1 ).NE.zero )
4938 $ erri = erri / g( i+kk-ia+1 )
4939 err = max( err, erri )
4940 IF( err*sqrt( eps ).GE.one )
4941 $ info = 1
4942 iia = iia + 1
4943 END IF
4944*
4945 ioffa = ioffa + 1
4946*
4947 50 CONTINUE
4948*
4949 icurrow = mod( icurrow+1, nprow )
4950*
4951 60 CONTINUE
4952*
4953 END IF
4954*
4955* If INFO = 0, all results are at least half accurate.
4956*
4957 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
4958 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
4959 $ mycol )
4960 IF( info.NE.0 )
4961 $ GO TO 80
4962*
4963 70 CONTINUE
4964*
4965 80 CONTINUE
4966*
4967 RETURN
4968*
4969* End of PCVMCH
4970*
4971 END
4972 SUBROUTINE pcvmch2( ICTXT, UPLO, M, N, ALPHA, X, IX, JX, DESCX,
4973 $ INCX, Y, IY, JY, DESCY, INCY, A, PA, IA,
4974 $ JA, DESCA, G, ERR, INFO )
4975*
4976* -- PBLAS test routine (version 2.0) --
4977* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
4978* and University of California, Berkeley.
4979* April 1, 1998
4980*
4981* .. Scalar Arguments ..
4982 CHARACTER*1 UPLO
4983 INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX,
4984 $ jy, m, n
4985 REAL ERR
4986 COMPLEX ALPHA
4987* ..
4988* .. Array Arguments ..
4989 INTEGER DESCA( * ), DESCX( * ), DESCY( * )
4990 REAL G( * )
4991 COMPLEX A( * ), PA( * ), X( * ), Y( * )
4992* ..
4993*
4994* Purpose
4995* =======
4996*
4997* PCVMCH2 checks the results of the computational tests.
4998*
4999* Notes
5000* =====
5001*
5002* A description vector is associated with each 2D block-cyclicly dis-
5003* tributed matrix. This vector stores the information required to
5004* establish the mapping between a matrix entry and its corresponding
5005* process and memory location.
5006*
5007* In the following comments, the character _ should be read as
5008* "of the distributed matrix". Let A be a generic term for any 2D
5009* block cyclicly distributed matrix. Its description vector is DESCA:
5010*
5011* NOTATION STORED IN EXPLANATION
5012* ---------------- --------------- ------------------------------------
5013* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
5014* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
5015* the NPROW x NPCOL BLACS process grid
5016* A is distributed over. The context
5017* itself is global, but the handle
5018* (the integer value) may vary.
5019* M_A (global) DESCA( M_ ) The number of rows in the distribu-
5020* ted matrix A, M_A >= 0.
5021* N_A (global) DESCA( N_ ) The number of columns in the distri-
5022* buted matrix A, N_A >= 0.
5023* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
5024* block of the matrix A, IMB_A > 0.
5025* INB_A (global) DESCA( INB_ ) The number of columns of the upper
5026* left block of the matrix A,
5027* INB_A > 0.
5028* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
5029* bute the last M_A-IMB_A rows of A,
5030* MB_A > 0.
5031* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
5032* bute the last N_A-INB_A columns of
5033* A, NB_A > 0.
5034* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
5035* row of the matrix A is distributed,
5036* NPROW > RSRC_A >= 0.
5037* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
5038* first column of A is distributed.
5039* NPCOL > CSRC_A >= 0.
5040* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
5041* array storing the local blocks of
5042* the distributed matrix A,
5043* IF( Lc( 1, N_A ) > 0 )
5044* LLD_A >= MAX( 1, Lr( 1, M_A ) )
5045* ELSE
5046* LLD_A >= 1.
5047*
5048* Let K be the number of rows of a matrix A starting at the global in-
5049* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
5050* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
5051* receive if these K rows were distributed over NPROW processes. If K
5052* is the number of columns of a matrix A starting at the global index
5053* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
5054* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
5055* these K columns were distributed over NPCOL processes.
5056*
5057* The values of Lr() and Lc() may be determined via a call to the func-
5058* tion PB_NUMROC:
5059* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
5060* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
5061*
5062* Arguments
5063* =========
5064*
5065* ICTXT (local input) INTEGER
5066* On entry, ICTXT specifies the BLACS context handle, indica-
5067* ting the global context of the operation. The context itself
5068* is global, but the value of ICTXT is local.
5069*
5070* UPLO (global input) CHARACTER*1
5071* On entry, UPLO specifies which part of the submatrix sub( A )
5072* is to be referenced as follows:
5073* If UPLO = 'L', only the lower triangular part,
5074* If UPLO = 'U', only the upper triangular part,
5075* else the entire matrix is to be referenced.
5076*
5077* M (global input) INTEGER
5078* On entry, M specifies the number of rows of the submatrix
5079* operand matrix A. M must be at least zero.
5080*
5081* N (global input) INTEGER
5082* On entry, N specifies the number of columns of the subma-
5083* trix operand matrix A. N must be at least zero.
5084*
5085* ALPHA (global input) COMPLEX
5086* On entry, ALPHA specifies the scalar alpha.
5087*
5088* X (local input) COMPLEX array
5089* On entry, X is an array of dimension (DESCX( M_ ),*). This
5090* array contains a local copy of the initial entire matrix PX.
5091*
5092* IX (global input) INTEGER
5093* On entry, IX specifies X's global row index, which points to
5094* the beginning of the submatrix sub( X ).
5095*
5096* JX (global input) INTEGER
5097* On entry, JX specifies X's global column index, which points
5098* to the beginning of the submatrix sub( X ).
5099*
5100* DESCX (global and local input) INTEGER array
5101* On entry, DESCX is an integer array of dimension DLEN_. This
5102* is the array descriptor for the matrix X.
5103*
5104* INCX (global input) INTEGER
5105* On entry, INCX specifies the global increment for the
5106* elements of X. Only two values of INCX are supported in
5107* this version, namely 1 and M_X. INCX must not be zero.
5108*
5109* Y (local input) COMPLEX array
5110* On entry, Y is an array of dimension (DESCY( M_ ),*). This
5111* array contains a local copy of the initial entire matrix PY.
5112*
5113* IY (global input) INTEGER
5114* On entry, IY specifies Y's global row index, which points to
5115* the beginning of the submatrix sub( Y ).
5116*
5117* JY (global input) INTEGER
5118* On entry, JY specifies Y's global column index, which points
5119* to the beginning of the submatrix sub( Y ).
5120*
5121* DESCY (global and local input) INTEGER array
5122* On entry, DESCY is an integer array of dimension DLEN_. This
5123* is the array descriptor for the matrix Y.
5124*
5125* INCY (global input) INTEGER
5126* On entry, INCY specifies the global increment for the
5127* elements of Y. Only two values of INCY are supported in
5128* this version, namely 1 and M_Y. INCY must not be zero.
5129*
5130* A (local input/local output) COMPLEX array
5131* On entry, A is an array of dimension (DESCA( M_ ),*). This
5132* array contains a local copy of the initial entire matrix PA.
5133*
5134* PA (local input) COMPLEX array
5135* On entry, PA is an array of dimension (DESCA( LLD_ ),*). This
5136* array contains the local entries of the matrix PA.
5137*
5138* IA (global input) INTEGER
5139* On entry, IA specifies A's global row index, which points to
5140* the beginning of the submatrix sub( A ).
5141*
5142* JA (global input) INTEGER
5143* On entry, JA specifies A's global column index, which points
5144* to the beginning of the submatrix sub( A ).
5145*
5146* DESCA (global and local input) INTEGER array
5147* On entry, DESCA is an integer array of dimension DLEN_. This
5148* is the array descriptor for the matrix A.
5149*
5150* G (workspace) REAL array
5151* On entry, G is an array of dimension at least MAX( M, N ). G
5152* is used to compute the gauges.
5153*
5154* ERR (global output) REAL
5155* On exit, ERR specifies the largest error in absolute value.
5156*
5157* INFO (global output) INTEGER
5158* On exit, if INFO <> 0, the result is less than half accurate.
5159*
5160* -- Written on April 1, 1998 by
5161* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
5162*
5163* =====================================================================
5164*
5165* .. Parameters ..
5166 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
5167 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
5168 $ RSRC_
5169 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
5170 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
5171 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
5172 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
5173 REAL ZERO, ONE
5174 PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0 )
5175* ..
5176* .. Local Scalars ..
5177 LOGICAL COLREP, LOWER, ROWREP, UPPER
5178 INTEGER I, IACOL, IAROW, IB, IBEG, ICURROW, IEND, IIA,
5179 $ IN, IOFFA, IOFFXI, IOFFXJ, IOFFYI, IOFFYJ, J,
5180 $ JJA, KK, LDA, LDPA, LDX, LDY, MYCOL, MYROW,
5181 $ npcol, nprow
5182 REAL EPS, ERRI, GTMP
5183 COMPLEX C, ATMP
5184* ..
5185* .. External Subroutines ..
5186 EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
5187* ..
5188* .. External Functions ..
5189 LOGICAL LSAME
5190 REAL PSLAMCH
5191 EXTERNAL LSAME, PSLAMCH
5192* ..
5193* .. Intrinsic Functions ..
5194 INTRINSIC abs, aimag, conjg, max, min, mod, real, sqrt
5195* ..
5196* .. Statement Functions ..
5197 REAL ABS1
5198 abs1( c ) = abs( real( c ) ) + abs( aimag( c ) )
5199* ..
5200* .. Executable Statements ..
5201*
5202 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
5203*
5204 eps = pslamch( ictxt, 'eps' )
5205*
5206 upper = lsame( uplo, 'U' )
5207 lower = lsame( uplo, 'L' )
5208*
5209 lda = max( 1, desca( m_ ) )
5210 ldx = max( 1, descx( m_ ) )
5211 ldy = max( 1, descy( m_ ) )
5212*
5213* Compute expected result in A using data in A, X and Y.
5214* Compute gauges in G. This part of the computation is performed
5215* by every process in the grid.
5216*
5217 DO 70 j = 1, n
5218*
5219 ioffxj = ix + ( jx - 1 ) * ldx + ( j - 1 ) * incx
5220 ioffyj = iy + ( jy - 1 ) * ldy + ( j - 1 ) * incy
5221*
5222 IF( lower ) THEN
5223 ibeg = j
5224 iend = m
5225 DO 10 i = 1, j-1
5226 g( i ) = zero
5227 10 CONTINUE
5228 ELSE IF( upper ) THEN
5229 ibeg = 1
5230 iend = j
5231 DO 20 i = j+1, m
5232 g( i ) = zero
5233 20 CONTINUE
5234 ELSE
5235 ibeg = 1
5236 iend = m
5237 END IF
5238*
5239 DO 30 i = ibeg, iend
5240 ioffa = ia + i - 1 + ( ja + j - 2 ) * lda
5241 ioffxi = ix + ( jx - 1 ) * ldx + ( i - 1 ) * incx
5242 ioffyi = iy + ( jy - 1 ) * ldy + ( i - 1 ) * incy
5243 atmp = alpha * x( ioffxi ) * conjg( y( ioffyj ) )
5244 atmp = atmp + y( ioffyi ) * conjg( alpha * x( ioffxj ) )
5245 gtmp = abs1( alpha * x( ioffxi ) ) * abs1( y( ioffyj ) )
5246 gtmp = gtmp + abs1( y( ioffyi ) ) *
5247 $ abs1( conjg( alpha * x( ioffxj ) ) )
5248 g( i ) = gtmp + abs1( a( ioffa ) )
5249 a( ioffa ) = a( ioffa ) + atmp
5250*
5251 30 CONTINUE
5252*
5253* Compute the error ratio for this result.
5254*
5255 info = 0
5256 err = zero
5257 ldpa = desca( lld_ )
5258 ioffa = ia + ( ja + j - 2 ) * lda
5259 CALL pb_infog2l( ia, ja+j-1, desca, nprow, npcol, myrow, mycol,
5260 $ iia, jja, iarow, iacol )
5261 rowrep = ( iarow.EQ.-1 )
5262 colrep = ( iacol.EQ.-1 )
5263*
5264 IF( mycol.EQ.iacol .OR. colrep ) THEN
5265*
5266 icurrow = iarow
5267 ib = desca( imb_ ) - ia + 1
5268 IF( ib.LE.0 )
5269 $ ib = ( ( -ib ) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
5270 ib = min( ib, m )
5271 in = ia + ib - 1
5272*
5273 DO 40 i = ia, in
5274*
5275 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5276 erri = abs( pa( iia+(jja-1)*ldpa ) - a( ioffa ) )/eps
5277 IF( g( i-ia+1 ).NE.zero )
5278 $ erri = erri / g( i-ia+1 )
5279 err = max( err, erri )
5280 IF( err*sqrt( eps ).GE.one )
5281 $ info = 1
5282 iia = iia + 1
5283 END IF
5284*
5285 ioffa = ioffa + 1
5286*
5287 40 CONTINUE
5288*
5289 icurrow = mod( icurrow+1, nprow )
5290*
5291 DO 60 i = in+1, ia+m-1, desca( mb_ )
5292 ib = min( ia+m-i, desca( mb_ ) )
5293*
5294 DO 50 kk = 0, ib-1
5295*
5296 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5297 erri = abs( pa( iia+(jja-1)*ldpa )-a( ioffa ) )/eps
5298 IF( g( i+kk-ia+1 ).NE.zero )
5299 $ erri = erri / g( i+kk-ia+1 )
5300 err = max( err, erri )
5301 IF( err*sqrt( eps ).GE.one )
5302 $ info = 1
5303 iia = iia + 1
5304 END IF
5305*
5306 ioffa = ioffa + 1
5307*
5308 50 CONTINUE
5309*
5310 icurrow = mod( icurrow+1, nprow )
5311*
5312 60 CONTINUE
5313*
5314 END IF
5315*
5316* If INFO = 0, all results are at least half accurate.
5317*
5318 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
5319 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
5320 $ mycol )
5321 IF( info.NE.0 )
5322 $ GO TO 80
5323*
5324 70 CONTINUE
5325*
5326 80 CONTINUE
5327*
5328 RETURN
5329*
5330* End of PCVMCH2
5331*
5332 END
5333 SUBROUTINE pcmmch( ICTXT, TRANSA, TRANSB, M, N, K, ALPHA, A, IA,
5334 $ JA, DESCA, B, IB, JB, DESCB, BETA, C, PC, IC,
5335 $ JC, DESCC, CT, G, ERR, INFO )
5336*
5337* -- PBLAS test routine (version 2.0) --
5338* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5339* and University of California, Berkeley.
5340* April 1, 1998
5341*
5342* .. Scalar Arguments ..
5343 CHARACTER*1 TRANSA, TRANSB
5344 INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, M, N
5345 REAL ERR
5346 COMPLEX ALPHA, BETA
5347* ..
5348* .. Array Arguments ..
5349 INTEGER DESCA( * ), DESCB( * ), DESCC( * )
5350 REAL G( * )
5351 COMPLEX A( * ), B( * ), C( * ), CT( * ), PC( * )
5352* ..
5353*
5354* Purpose
5355* =======
5356*
5357* PCMMCH checks the results of the computational tests.
5358*
5359* Notes
5360* =====
5361*
5362* A description vector is associated with each 2D block-cyclicly dis-
5363* tributed matrix. This vector stores the information required to
5364* establish the mapping between a matrix entry and its corresponding
5365* process and memory location.
5366*
5367* In the following comments, the character _ should be read as
5368* "of the distributed matrix". Let A be a generic term for any 2D
5369* block cyclicly distributed matrix. Its description vector is DESCA:
5370*
5371* NOTATION STORED IN EXPLANATION
5372* ---------------- --------------- ------------------------------------
5373* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
5374* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
5375* the NPROW x NPCOL BLACS process grid
5376* A is distributed over. The context
5377* itself is global, but the handle
5378* (the integer value) may vary.
5379* M_A (global) DESCA( M_ ) The number of rows in the distribu-
5380* ted matrix A, M_A >= 0.
5381* N_A (global) DESCA( N_ ) The number of columns in the distri-
5382* buted matrix A, N_A >= 0.
5383* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
5384* block of the matrix A, IMB_A > 0.
5385* INB_A (global) DESCA( INB_ ) The number of columns of the upper
5386* left block of the matrix A,
5387* INB_A > 0.
5388* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
5389* bute the last M_A-IMB_A rows of A,
5390* MB_A > 0.
5391* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
5392* bute the last N_A-INB_A columns of
5393* A, NB_A > 0.
5394* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
5395* row of the matrix A is distributed,
5396* NPROW > RSRC_A >= 0.
5397* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
5398* first column of A is distributed.
5399* NPCOL > CSRC_A >= 0.
5400* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
5401* array storing the local blocks of
5402* the distributed matrix A,
5403* IF( Lc( 1, N_A ) > 0 )
5404* LLD_A >= MAX( 1, Lr( 1, M_A ) )
5405* ELSE
5406* LLD_A >= 1.
5407*
5408* Let K be the number of rows of a matrix A starting at the global in-
5409* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
5410* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
5411* receive if these K rows were distributed over NPROW processes. If K
5412* is the number of columns of a matrix A starting at the global index
5413* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
5414* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
5415* these K columns were distributed over NPCOL processes.
5416*
5417* The values of Lr() and Lc() may be determined via a call to the func-
5418* tion PB_NUMROC:
5419* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
5420* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
5421*
5422* Arguments
5423* =========
5424*
5425* ICTXT (local input) INTEGER
5426* On entry, ICTXT specifies the BLACS context handle, indica-
5427* ting the global context of the operation. The context itself
5428* is global, but the value of ICTXT is local.
5429*
5430* TRANSA (global input) CHARACTER*1
5431* On entry, TRANSA specifies if the matrix operand A is to be
5432* transposed.
5433*
5434* TRANSB (global input) CHARACTER*1
5435* On entry, TRANSB specifies if the matrix operand B is to be
5436* transposed.
5437*
5438* M (global input) INTEGER
5439* On entry, M specifies the number of rows of C.
5440*
5441* N (global input) INTEGER
5442* On entry, N specifies the number of columns of C.
5443*
5444* K (global input) INTEGER
5445* On entry, K specifies the number of columns (resp. rows) of A
5446* when TRANSA = 'N' (resp. TRANSA <> 'N') in PxGEMM, PxSYRK,
5447* PxSYR2K, PxHERK and PxHER2K.
5448*
5449* ALPHA (global input) COMPLEX
5450* On entry, ALPHA specifies the scalar alpha.
5451*
5452* A (local input) COMPLEX array
5453* On entry, A is an array of dimension (DESCA( M_ ),*). This
5454* array contains a local copy of the initial entire matrix PA.
5455*
5456* IA (global input) INTEGER
5457* On entry, IA specifies A's global row index, which points to
5458* the beginning of the submatrix sub( A ).
5459*
5460* JA (global input) INTEGER
5461* On entry, JA specifies A's global column index, which points
5462* to the beginning of the submatrix sub( A ).
5463*
5464* DESCA (global and local input) INTEGER array
5465* On entry, DESCA is an integer array of dimension DLEN_. This
5466* is the array descriptor for the matrix A.
5467*
5468* B (local input) COMPLEX array
5469* On entry, B is an array of dimension (DESCB( M_ ),*). This
5470* array contains a local copy of the initial entire matrix PB.
5471*
5472* IB (global input) INTEGER
5473* On entry, IB specifies B's global row index, which points to
5474* the beginning of the submatrix sub( B ).
5475*
5476* JB (global input) INTEGER
5477* On entry, JB specifies B's global column index, which points
5478* to the beginning of the submatrix sub( B ).
5479*
5480* DESCB (global and local input) INTEGER array
5481* On entry, DESCB is an integer array of dimension DLEN_. This
5482* is the array descriptor for the matrix B.
5483*
5484* BETA (global input) COMPLEX
5485* On entry, BETA specifies the scalar beta.
5486*
5487* C (local input/local output) COMPLEX array
5488* On entry, C is an array of dimension (DESCC( M_ ),*). This
5489* array contains a local copy of the initial entire matrix PC.
5490*
5491* PC (local input) COMPLEX array
5492* On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
5493* array contains the local pieces of the matrix PC.
5494*
5495* IC (global input) INTEGER
5496* On entry, IC specifies C's global row index, which points to
5497* the beginning of the submatrix sub( C ).
5498*
5499* JC (global input) INTEGER
5500* On entry, JC specifies C's global column index, which points
5501* to the beginning of the submatrix sub( C ).
5502*
5503* DESCC (global and local input) INTEGER array
5504* On entry, DESCC is an integer array of dimension DLEN_. This
5505* is the array descriptor for the matrix C.
5506*
5507* CT (workspace) COMPLEX array
5508* On entry, CT is an array of dimension at least MAX(M,N,K). CT
5509* holds a copy of the current column of C.
5510*
5511* G (workspace) REAL array
5512* On entry, G is an array of dimension at least MAX(M,N,K). G
5513* is used to compute the gauges.
5514*
5515* ERR (global output) REAL
5516* On exit, ERR specifies the largest error in absolute value.
5517*
5518* INFO (global output) INTEGER
5519* On exit, if INFO <> 0, the result is less than half accurate.
5520*
5521* -- Written on April 1, 1998 by
5522* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
5523*
5524* =====================================================================
5525*
5526* .. Parameters ..
5527 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
5528 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
5529 $ RSRC_
5530 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
5531 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
5532 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
5533 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
5534 REAL RZERO, RONE
5535 PARAMETER ( RZERO = 0.0e+0, rone = 1.0e+0 )
5536 COMPLEX ZERO
5537 PARAMETER ( ZERO = ( 0.0e+0, 0.0e+0 ) )
5538* ..
5539* .. Local Scalars ..
5540 LOGICAL COLREP, CTRANA, CTRANB, ROWREP, TRANA, TRANB
5541 INTEGER I, IBB, ICCOL, ICROW, ICURROW, IIC, IN, IOFFA,
5542 $ IOFFB, IOFFC, J, JJC, KK, LDA, LDB, LDC, LDPC,
5543 $ MYCOL, MYROW, NPCOL, NPROW
5544 REAL EPS, ERRI
5545 COMPLEX Z
5546* ..
5547* .. External Subroutines ..
5548 EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
5549* ..
5550* .. External Functions ..
5551 LOGICAL LSAME
5552 REAL PSLAMCH
5553 EXTERNAL LSAME, PSLAMCH
5554* ..
5555* .. Intrinsic Functions ..
5556 INTRINSIC abs, aimag, conjg, max, min, mod, real, sqrt
5557* ..
5558* .. Statement Functions ..
5559 REAL ABS1
5560 ABS1( Z ) = abs( real( z ) ) + abs( aimag( z ) )
5561* ..
5562* .. Executable Statements ..
5563*
5564 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
5565*
5566 eps = pslamch( ictxt, 'eps' )
5567*
5568 trana = lsame( transa, 'T' ).OR.lsame( transa, 'C' )
5569 tranb = lsame( transb, 'T' ).OR.lsame( transb, 'C' )
5570 ctrana = lsame( transa, 'C' )
5571 ctranb = lsame( transb, 'C' )
5572*
5573 lda = max( 1, desca( m_ ) )
5574 ldb = max( 1, descb( m_ ) )
5575 ldc = max( 1, descc( m_ ) )
5576*
5577* Compute expected result in C using data in A, B and C.
5578* Compute gauges in G. This part of the computation is performed
5579* by every process in the grid.
5580*
5581 DO 240 j = 1, n
5582*
5583 ioffc = ic + ( jc + j - 2 ) * ldc
5584 DO 10 i = 1, m
5585 ct( i ) = zero
5586 g( i ) = rzero
5587 10 CONTINUE
5588*
5589 IF( .NOT.trana .AND. .NOT.tranb ) THEN
5590 DO 30 kk = 1, k
5591 ioffb = ib + kk - 1 + ( jb + j - 2 ) * ldb
5592 DO 20 i = 1, m
5593 ioffa = ia + i - 1 + ( ja + kk - 2 ) * lda
5594 ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5595 g( i ) = g( i ) + abs( a( ioffa ) ) *
5596 $ abs( b( ioffb ) )
5597 20 CONTINUE
5598 30 CONTINUE
5599 ELSE IF( trana .AND. .NOT.tranb ) THEN
5600 IF( ctrana ) THEN
5601 DO 50 kk = 1, k
5602 ioffb = ib + kk - 1 + ( jb + j - 2 ) * ldb
5603 DO 40 i = 1, m
5604 ioffa = ia + kk - 1 + ( ja + i - 2 ) * lda
5605 ct( i ) = ct( i ) + conjg( a( ioffa ) ) *
5606 $ b( ioffb )
5607 g( i ) = g( i ) + abs1( a( ioffa ) ) *
5608 $ abs1( b( ioffb ) )
5609 40 CONTINUE
5610 50 CONTINUE
5611 ELSE
5612 DO 70 kk = 1, k
5613 ioffb = ib + kk - 1 + ( jb + j - 2 ) * ldb
5614 DO 60 i = 1, m
5615 ioffa = ia + kk - 1 + ( ja + i - 2 ) * lda
5616 ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5617 g( i ) = g( i ) + abs1( a( ioffa ) ) *
5618 $ abs1( b( ioffb ) )
5619 60 CONTINUE
5620 70 CONTINUE
5621 END IF
5622 ELSE IF( .NOT.trana .AND. tranb ) THEN
5623 IF( ctranb ) THEN
5624 DO 90 kk = 1, k
5625 ioffb = ib + j - 1 + ( jb + kk - 2 ) * ldb
5626 DO 80 i = 1, m
5627 ioffa = ia + i - 1 + ( ja + kk - 2 ) * lda
5628 ct( i ) = ct( i ) + a( ioffa ) *
5629 $ conjg( b( ioffb ) )
5630 g( i ) = g( i ) + abs1( a( ioffa ) ) *
5631 $ abs1( b( ioffb ) )
5632 80 CONTINUE
5633 90 CONTINUE
5634 ELSE
5635 DO 110 kk = 1, k
5636 ioffb = ib + j - 1 + ( jb + kk - 2 ) * ldb
5637 DO 100 i = 1, m
5638 ioffa = ia + i - 1 + ( ja + kk - 2 ) * lda
5639 ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5640 g( i ) = g( i ) + abs1( a( ioffa ) ) *
5641 $ abs1( b( ioffb ) )
5642 100 CONTINUE
5643 110 CONTINUE
5644 END IF
5645 ELSE IF( trana .AND. tranb ) THEN
5646 IF( ctrana ) THEN
5647 IF( ctranb ) THEN
5648 DO 130 kk = 1, k
5649 ioffb = ib + j - 1 + ( jb + kk - 2 ) * ldb
5650 DO 120 i = 1, m
5651 ioffa = ia + kk - 1 + ( ja + i - 2 ) * lda
5652 ct( i ) = ct( i ) + conjg( a( ioffa ) ) *
5653 $ conjg( b( ioffb ) )
5654 g( i ) = g( i ) + abs1( a( ioffa ) ) *
5655 $ abs1( b( ioffb ) )
5656 120 CONTINUE
5657 130 CONTINUE
5658 ELSE
5659 DO 150 kk = 1, k
5660 ioffb = ib + j - 1 + ( jb + kk - 2 ) * ldb
5661 DO 140 i = 1, m
5662 ioffa = ia + kk - 1 + ( ja + i - 2 ) * lda
5663 ct( i ) = ct( i ) + conjg( a( ioffa ) ) *
5664 $ b( ioffb )
5665 g( i ) = g( i ) + abs1( a( ioffa ) ) *
5666 $ abs1( b( ioffb ) )
5667 140 CONTINUE
5668 150 CONTINUE
5669 END IF
5670 ELSE
5671 IF( ctranb ) THEN
5672 DO 170 kk = 1, k
5673 ioffb = ib + j - 1 + ( jb + kk - 2 ) * ldb
5674 DO 160 i = 1, m
5675 ioffa = ia + kk - 1 + ( ja + i - 2 ) * lda
5676 ct( i ) = ct( i ) + a( ioffa ) *
5677 $ conjg( b( ioffb ) )
5678 g( i ) = g( i ) + abs1( a( ioffa ) ) *
5679 $ abs1( b( ioffb ) )
5680 160 CONTINUE
5681 170 CONTINUE
5682 ELSE
5683 DO 190 kk = 1, k
5684 ioffb = ib + j - 1 + ( jb + kk - 2 ) * ldb
5685 DO 180 i = 1, m
5686 ioffa = ia + kk - 1 + ( ja + i - 2 ) * lda
5687 ct( i ) = ct( i ) + a( ioffa ) * b( ioffb )
5688 g( i ) = g( i ) + abs1( a( ioffa ) ) *
5689 $ abs1( b( ioffb ) )
5690 180 CONTINUE
5691 190 CONTINUE
5692 END IF
5693 END IF
5694 END IF
5695*
5696 DO 200 i = 1, m
5697 ct( i ) = alpha*ct( i ) + beta * c( ioffc )
5698 g( i ) = abs1( alpha )*g( i ) +
5699 $ abs1( beta )*abs1( c( ioffc ) )
5700 c( ioffc ) = ct( i )
5701 ioffc = ioffc + 1
5702 200 CONTINUE
5703*
5704* Compute the error ratio for this result.
5705*
5706 err = rzero
5707 info = 0
5708 ldpc = descc( lld_ )
5709 ioffc = ic + ( jc + j - 2 ) * ldc
5710 CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
5711 $ iic, jjc, icrow, iccol )
5712 icurrow = icrow
5713 rowrep = ( icrow.EQ.-1 )
5714 colrep = ( iccol.EQ.-1 )
5715*
5716 IF( mycol.EQ.iccol .OR. colrep ) THEN
5717*
5718 ibb = descc( imb_ ) - ic + 1
5719 IF( ibb.LE.0 )
5720 $ ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
5721 ibb = min( ibb, m )
5722 in = ic + ibb - 1
5723*
5724 DO 210 i = ic, in
5725*
5726 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5727 erri = abs( pc( iic+(jjc-1)*ldpc ) -
5728 $ c( ioffc ) ) / eps
5729 IF( g( i-ic+1 ).NE.rzero )
5730 $ erri = erri / g( i-ic+1 )
5731 err = max( err, erri )
5732 IF( err*sqrt( eps ).GE.rone )
5733 $ info = 1
5734 iic = iic + 1
5735 END IF
5736*
5737 ioffc = ioffc + 1
5738*
5739 210 CONTINUE
5740*
5741 icurrow = mod( icurrow+1, nprow )
5742*
5743 DO 230 i = in+1, ic+m-1, descc( mb_ )
5744 ibb = min( ic+m-i, descc( mb_ ) )
5745*
5746 DO 220 kk = 0, ibb-1
5747*
5748 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
5749 erri = abs( pc( iic+(jjc-1)*ldpc ) -
5750 $ c( ioffc ) )/eps
5751 IF( g( i+kk-ic+1 ).NE.rzero )
5752 $ erri = erri / g( i+kk-ic+1 )
5753 err = max( err, erri )
5754 IF( err*sqrt( eps ).GE.rone )
5755 $ info = 1
5756 iic = iic + 1
5757 END IF
5758*
5759 ioffc = ioffc + 1
5760*
5761 220 CONTINUE
5762*
5763 icurrow = mod( icurrow+1, nprow )
5764*
5765 230 CONTINUE
5766*
5767 END IF
5768*
5769* If INFO = 0, all results are at least half accurate.
5770*
5771 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
5772 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
5773 $ mycol )
5774 IF( info.NE.0 )
5775 $ GO TO 250
5776*
5777 240 CONTINUE
5778*
5779 250 CONTINUE
5780*
5781 RETURN
5782*
5783* End of PCMMCH
5784*
5785 END
5786 SUBROUTINE pcmmch1( ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA,
5787 $ DESCA, BETA, C, PC, IC, JC, DESCC, CT, G,
5788 $ ERR, INFO )
5789*
5790* -- PBLAS test routine (version 2.0) --
5791* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5792* and University of California, Berkeley.
5793* April 1, 1998
5794*
5795* .. Scalar Arguments ..
5796 CHARACTER*1 TRANS, UPLO
5797 INTEGER IA, IC, ICTXT, INFO, JA, JC, K, N
5798 REAL ERR
5799 COMPLEX ALPHA, BETA
5800* ..
5801* .. Array Arguments ..
5802 INTEGER DESCA( * ), DESCC( * )
5803 REAL G( * )
5804 COMPLEX A( * ), C( * ), CT( * ), PC( * )
5805* ..
5806*
5807* Purpose
5808* =======
5809*
5810* PCMMCH1 checks the results of the computational tests.
5811*
5812* Notes
5813* =====
5814*
5815* A description vector is associated with each 2D block-cyclicly dis-
5816* tributed matrix. This vector stores the information required to
5817* establish the mapping between a matrix entry and its corresponding
5818* process and memory location.
5819*
5820* In the following comments, the character _ should be read as
5821* "of the distributed matrix". Let A be a generic term for any 2D
5822* block cyclicly distributed matrix. Its description vector is DESCA:
5823*
5824* NOTATION STORED IN EXPLANATION
5825* ---------------- --------------- ------------------------------------
5826* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
5827* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
5828* the NPROW x NPCOL BLACS process grid
5829* A is distributed over. The context
5830* itself is global, but the handle
5831* (the integer value) may vary.
5832* M_A (global) DESCA( M_ ) The number of rows in the distribu-
5833* ted matrix A, M_A >= 0.
5834* N_A (global) DESCA( N_ ) The number of columns in the distri-
5835* buted matrix A, N_A >= 0.
5836* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
5837* block of the matrix A, IMB_A > 0.
5838* INB_A (global) DESCA( INB_ ) The number of columns of the upper
5839* left block of the matrix A,
5840* INB_A > 0.
5841* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
5842* bute the last M_A-IMB_A rows of A,
5843* MB_A > 0.
5844* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
5845* bute the last N_A-INB_A columns of
5846* A, NB_A > 0.
5847* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
5848* row of the matrix A is distributed,
5849* NPROW > RSRC_A >= 0.
5850* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
5851* first column of A is distributed.
5852* NPCOL > CSRC_A >= 0.
5853* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
5854* array storing the local blocks of
5855* the distributed matrix A,
5856* IF( Lc( 1, N_A ) > 0 )
5857* LLD_A >= MAX( 1, Lr( 1, M_A ) )
5858* ELSE
5859* LLD_A >= 1.
5860*
5861* Let K be the number of rows of a matrix A starting at the global in-
5862* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
5863* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
5864* receive if these K rows were distributed over NPROW processes. If K
5865* is the number of columns of a matrix A starting at the global index
5866* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
5867* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
5868* these K columns were distributed over NPCOL processes.
5869*
5870* The values of Lr() and Lc() may be determined via a call to the func-
5871* tion PB_NUMROC:
5872* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
5873* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
5874*
5875* Arguments
5876* =========
5877*
5878* ICTXT (local input) INTEGER
5879* On entry, ICTXT specifies the BLACS context handle, indica-
5880* ting the global context of the operation. The context itself
5881* is global, but the value of ICTXT is local.
5882*
5883* UPLO (global input) CHARACTER*1
5884* On entry, UPLO specifies which part of C should contain the
5885* result.
5886*
5887* TRANS (global input) CHARACTER*1
5888* On entry, TRANS specifies whether the matrix A has to be
5889* transposed or not before computing the matrix-matrix product.
5890*
5891* N (global input) INTEGER
5892* On entry, N specifies the order the submatrix operand C. N
5893* must be at least zero.
5894*
5895* K (global input) INTEGER
5896* On entry, K specifies the number of columns (resp. rows) of A
5897* when TRANS = 'N' (resp. TRANS <> 'N'). K must be at least
5898* zero.
5899*
5900* ALPHA (global input) COMPLEX
5901* On entry, ALPHA specifies the scalar alpha.
5902*
5903* A (local input) COMPLEX array
5904* On entry, A is an array of dimension (DESCA( M_ ),*). This
5905* array contains a local copy of the initial entire matrix PA.
5906*
5907* IA (global input) INTEGER
5908* On entry, IA specifies A's global row index, which points to
5909* the beginning of the submatrix sub( A ).
5910*
5911* JA (global input) INTEGER
5912* On entry, JA specifies A's global column index, which points
5913* to the beginning of the submatrix sub( A ).
5914*
5915* DESCA (global and local input) INTEGER array
5916* On entry, DESCA is an integer array of dimension DLEN_. This
5917* is the array descriptor for the matrix A.
5918*
5919* BETA (global input) COMPLEX
5920* On entry, BETA specifies the scalar beta.
5921*
5922* C (local input/local output) COMPLEX array
5923* On entry, C is an array of dimension (DESCC( M_ ),*). This
5924* array contains a local copy of the initial entire matrix PC.
5925*
5926* PC (local input) COMPLEX array
5927* On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
5928* array contains the local pieces of the matrix PC.
5929*
5930* IC (global input) INTEGER
5931* On entry, IC specifies C's global row index, which points to
5932* the beginning of the submatrix sub( C ).
5933*
5934* JC (global input) INTEGER
5935* On entry, JC specifies C's global column index, which points
5936* to the beginning of the submatrix sub( C ).
5937*
5938* DESCC (global and local input) INTEGER array
5939* On entry, DESCC is an integer array of dimension DLEN_. This
5940* is the array descriptor for the matrix C.
5941*
5942* CT (workspace) COMPLEX array
5943* On entry, CT is an array of dimension at least MAX(M,N,K). CT
5944* holds a copy of the current column of C.
5945*
5946* G (workspace) REAL array
5947* On entry, G is an array of dimension at least MAX(M,N,K). G
5948* is used to compute the gauges.
5949*
5950* ERR (global output) REAL
5951* On exit, ERR specifies the largest error in absolute value.
5952*
5953* INFO (global output) INTEGER
5954* On exit, if INFO <> 0, the result is less than half accurate.
5955*
5956* -- Written on April 1, 1998 by
5957* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
5958*
5959* =====================================================================
5960*
5961* .. Parameters ..
5962 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
5963 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
5964 $ RSRC_
5965 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
5966 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
5967 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
5968 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
5969 REAL RZERO, RONE
5970 PARAMETER ( RZERO = 0.0e+0, rone = 1.0e+0 )
5971 COMPLEX ZERO
5972 PARAMETER ( ZERO = ( 0.0e+0, 0.0e+0 ) )
5973* ..
5974* .. Local Scalars ..
5975 LOGICAL COLREP, HTRAN, NOTRAN, ROWREP, TRAN, UPPER
5976 INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC,
5977 $ IN, IOFFAK, IOFFAN, IOFFC, J, JJC, KK, LDA,
5978 $ ldc, ldpc, mycol, myrow, npcol, nprow
5979 REAL EPS, ERRI
5980 COMPLEX Z
5981* ..
5982* .. External Subroutines ..
5983 EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
5984* ..
5985* .. External Functions ..
5986 LOGICAL LSAME
5987 REAL PSLAMCH
5988 EXTERNAL lsame, pslamch
5989* ..
5990* .. Intrinsic Functions ..
5991 INTRINSIC abs, aimag, conjg, max, min, mod, real, sqrt
5992* ..
5993* .. Statement Functions ..
5994 REAL ABS1
5995 abs1( z ) = abs( real( z ) ) + abs( aimag( z ) )
5996* ..
5997* .. Executable Statements ..
5998*
5999 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
6000*
6001 eps = pslamch( ictxt, 'eps' )
6002*
6003 upper = lsame( uplo, 'U' )
6004 notran = lsame( trans, 'N' )
6005 tran = lsame( trans, 'T' )
6006 htran = lsame( trans, 'H' )
6007*
6008 lda = max( 1, desca( m_ ) )
6009 ldc = max( 1, descc( m_ ) )
6010*
6011* Compute expected result in C using data in A, B and C.
6012* Compute gauges in G. This part of the computation is performed
6013* by every process in the grid.
6014*
6015 DO 140 j = 1, n
6016*
6017 IF( upper ) THEN
6018 ibeg = 1
6019 iend = j
6020 ELSE
6021 ibeg = j
6022 iend = n
6023 END IF
6024*
6025 DO 10 i = 1, n
6026 ct( i ) = zero
6027 g( i ) = rzero
6028 10 CONTINUE
6029*
6030 IF( notran ) THEN
6031 DO 30 kk = 1, k
6032 ioffak = ia + j - 1 + ( ja + kk - 2 ) * lda
6033 DO 20 i = ibeg, iend
6034 ioffan = ia + i - 1 + ( ja + kk - 2 ) * lda
6035 ct( i ) = ct( i ) + a( ioffak ) * a( ioffan )
6036 g( i ) = g( i ) + abs1( a( ioffak ) ) *
6037 $ abs1( a( ioffan ) )
6038 20 CONTINUE
6039 30 CONTINUE
6040 ELSE IF( tran ) THEN
6041 DO 50 kk = 1, k
6042 ioffak = ia + kk - 1 + ( ja + j - 2 ) * lda
6043 DO 40 i = ibeg, iend
6044 ioffan = ia + kk - 1 + ( ja + i - 2 ) * lda
6045 ct( i ) = ct( i ) + a( ioffak ) * a( ioffan )
6046 g( i ) = g( i ) + abs1( a( ioffak ) ) *
6047 $ abs1( a( ioffan ) )
6048 40 CONTINUE
6049 50 CONTINUE
6050 ELSE IF( htran ) THEN
6051 DO 70 kk = 1, k
6052 ioffak = ia + j - 1 + ( ja + kk - 2 ) * lda
6053 DO 60 i = ibeg, iend
6054 ioffan = ia + i - 1 + ( ja + kk - 2 ) * lda
6055 ct( i ) = ct( i ) + a( ioffan ) *
6056 $ conjg( a( ioffak ) )
6057 g( i ) = g( i ) + abs1( a( ioffak ) ) *
6058 $ abs1( a( ioffan ) )
6059 60 CONTINUE
6060 70 CONTINUE
6061 ELSE
6062 DO 90 kk = 1, k
6063 ioffak = ia + kk - 1 + ( ja + j - 2 ) * lda
6064 DO 80 i = ibeg, iend
6065 ioffan = ia + kk - 1 + ( ja + i - 2 ) * lda
6066 ct( i ) = ct( i ) + conjg( a( ioffan ) ) * a( ioffak )
6067 g( i ) = g( i ) + abs1( conjg( a( ioffan ) ) ) *
6068 $ abs1( a( ioffak ) )
6069 80 CONTINUE
6070 90 CONTINUE
6071 END IF
6072*
6073 ioffc = ic + ibeg - 1 + ( jc + j - 2 ) * ldc
6074*
6075 DO 100 i = ibeg, iend
6076 ct( i ) = alpha*ct( i ) + beta * c( ioffc )
6077 g( i ) = abs1( alpha )*g( i ) +
6078 $ abs1( beta )*abs1( c( ioffc ) )
6079 c( ioffc ) = ct( i )
6080 ioffc = ioffc + 1
6081 100 CONTINUE
6082*
6083* Compute the error ratio for this result.
6084*
6085 err = rzero
6086 info = 0
6087 ldpc = descc( lld_ )
6088 ioffc = ic + ( jc + j - 2 ) * ldc
6089 CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
6090 $ iic, jjc, icrow, iccol )
6091 icurrow = icrow
6092 rowrep = ( icrow.EQ.-1 )
6093 colrep = ( iccol.EQ.-1 )
6094*
6095 IF( mycol.EQ.iccol .OR. colrep ) THEN
6096*
6097 ibb = descc( imb_ ) - ic + 1
6098 IF( ibb.LE.0 )
6099 $ ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
6100 ibb = min( ibb, n )
6101 in = ic + ibb - 1
6102*
6103 DO 110 i = ic, in
6104*
6105 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
6106 erri = abs( pc( iic+(jjc-1)*ldpc ) -
6107 $ c( ioffc ) ) / eps
6108 IF( g( i-ic+1 ).NE.rzero )
6109 $ erri = erri / g( i-ic+1 )
6110 err = max( err, erri )
6111 IF( err*sqrt( eps ).GE.rone )
6112 $ info = 1
6113 iic = iic + 1
6114 END IF
6115*
6116 ioffc = ioffc + 1
6117*
6118 110 CONTINUE
6119*
6120 icurrow = mod( icurrow+1, nprow )
6121*
6122 DO 130 i = in+1, ic+n-1, descc( mb_ )
6123 ibb = min( ic+n-i, descc( mb_ ) )
6124*
6125 DO 120 kk = 0, ibb-1
6126*
6127 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
6128 erri = abs( pc( iic+(jjc-1)*ldpc ) -
6129 $ c( ioffc ) )/eps
6130 IF( g( i+kk-ic+1 ).NE.rzero )
6131 $ erri = erri / g( i+kk-ic+1 )
6132 err = max( err, erri )
6133 IF( err*sqrt( eps ).GE.rone )
6134 $ info = 1
6135 iic = iic + 1
6136 END IF
6137*
6138 ioffc = ioffc + 1
6139*
6140 120 CONTINUE
6141*
6142 icurrow = mod( icurrow+1, nprow )
6143*
6144 130 CONTINUE
6145*
6146 END IF
6147*
6148* If INFO = 0, all results are at least half accurate.
6149*
6150 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
6151 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
6152 $ mycol )
6153 IF( info.NE.0 )
6154 $ GO TO 150
6155*
6156 140 CONTINUE
6157*
6158 150 CONTINUE
6159*
6160 RETURN
6161*
6162* End of PCMMCH1
6163*
6164 END
6165 SUBROUTINE pcmmch2( ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA,
6166 $ DESCA, B, IB, JB, DESCB, BETA, C, PC, IC,
6167 $ JC, DESCC, CT, G, ERR, INFO )
6168*
6169* -- PBLAS test routine (version 2.0) --
6170* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6171* and University of California, Berkeley.
6172* April 1, 1998
6173*
6174* .. Scalar Arguments ..
6175 CHARACTER*1 TRANS, UPLO
6176 INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, N
6177 REAL ERR
6178 COMPLEX ALPHA, BETA
6179* ..
6180* .. Array Arguments ..
6181 INTEGER DESCA( * ), DESCB( * ), DESCC( * )
6182 REAL G( * )
6183 COMPLEX A( * ), B( * ), C( * ), CT( * ),
6184 $ PC( * )
6185* ..
6186*
6187* Purpose
6188* =======
6189*
6190* PCMMCH2 checks the results of the computational tests.
6191*
6192* Notes
6193* =====
6194*
6195* A description vector is associated with each 2D block-cyclicly dis-
6196* tributed matrix. This vector stores the information required to
6197* establish the mapping between a matrix entry and its corresponding
6198* process and memory location.
6199*
6200* In the following comments, the character _ should be read as
6201* "of the distributed matrix". Let A be a generic term for any 2D
6202* block cyclicly distributed matrix. Its description vector is DESCA:
6203*
6204* NOTATION STORED IN EXPLANATION
6205* ---------------- --------------- ------------------------------------
6206* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
6207* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
6208* the NPROW x NPCOL BLACS process grid
6209* A is distributed over. The context
6210* itself is global, but the handle
6211* (the integer value) may vary.
6212* M_A (global) DESCA( M_ ) The number of rows in the distribu-
6213* ted matrix A, M_A >= 0.
6214* N_A (global) DESCA( N_ ) The number of columns in the distri-
6215* buted matrix A, N_A >= 0.
6216* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
6217* block of the matrix A, IMB_A > 0.
6218* INB_A (global) DESCA( INB_ ) The number of columns of the upper
6219* left block of the matrix A,
6220* INB_A > 0.
6221* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
6222* bute the last M_A-IMB_A rows of A,
6223* MB_A > 0.
6224* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
6225* bute the last N_A-INB_A columns of
6226* A, NB_A > 0.
6227* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
6228* row of the matrix A is distributed,
6229* NPROW > RSRC_A >= 0.
6230* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
6231* first column of A is distributed.
6232* NPCOL > CSRC_A >= 0.
6233* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
6234* array storing the local blocks of
6235* the distributed matrix A,
6236* IF( Lc( 1, N_A ) > 0 )
6237* LLD_A >= MAX( 1, Lr( 1, M_A ) )
6238* ELSE
6239* LLD_A >= 1.
6240*
6241* Let K be the number of rows of a matrix A starting at the global in-
6242* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
6243* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
6244* receive if these K rows were distributed over NPROW processes. If K
6245* is the number of columns of a matrix A starting at the global index
6246* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
6247* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
6248* these K columns were distributed over NPCOL processes.
6249*
6250* The values of Lr() and Lc() may be determined via a call to the func-
6251* tion PB_NUMROC:
6252* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
6253* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
6254*
6255* Arguments
6256* =========
6257*
6258* ICTXT (local input) INTEGER
6259* On entry, ICTXT specifies the BLACS context handle, indica-
6260* ting the global context of the operation. The context itself
6261* is global, but the value of ICTXT is local.
6262*
6263* UPLO (global input) CHARACTER*1
6264* On entry, UPLO specifies which part of C should contain the
6265* result.
6266*
6267* TRANS (global input) CHARACTER*1
6268* On entry, TRANS specifies whether the matrices A and B have
6269* to be transposed or not before computing the matrix-matrix
6270* product.
6271*
6272* N (global input) INTEGER
6273* On entry, N specifies the order the submatrix operand C. N
6274* must be at least zero.
6275*
6276* K (global input) INTEGER
6277* On entry, K specifies the number of columns (resp. rows) of A
6278* and B when TRANS = 'N' (resp. TRANS <> 'N'). K must be at
6279* least zero.
6280*
6281* ALPHA (global input) COMPLEX
6282* On entry, ALPHA specifies the scalar alpha.
6283*
6284* A (local input) COMPLEX array
6285* On entry, A is an array of dimension (DESCA( M_ ),*). This
6286* array contains a local copy of the initial entire matrix PA.
6287*
6288* IA (global input) INTEGER
6289* On entry, IA specifies A's global row index, which points to
6290* the beginning of the submatrix sub( A ).
6291*
6292* JA (global input) INTEGER
6293* On entry, JA specifies A's global column index, which points
6294* to the beginning of the submatrix sub( A ).
6295*
6296* DESCA (global and local input) INTEGER array
6297* On entry, DESCA is an integer array of dimension DLEN_. This
6298* is the array descriptor for the matrix A.
6299*
6300* B (local input) COMPLEX array
6301* On entry, B is an array of dimension (DESCB( M_ ),*). This
6302* array contains a local copy of the initial entire matrix PB.
6303*
6304* IB (global input) INTEGER
6305* On entry, IB specifies B's global row index, which points to
6306* the beginning of the submatrix sub( B ).
6307*
6308* JB (global input) INTEGER
6309* On entry, JB specifies B's global column index, which points
6310* to the beginning of the submatrix sub( B ).
6311*
6312* DESCB (global and local input) INTEGER array
6313* On entry, DESCB is an integer array of dimension DLEN_. This
6314* is the array descriptor for the matrix B.
6315*
6316* BETA (global input) COMPLEX
6317* On entry, BETA specifies the scalar beta.
6318*
6319* C (local input/local output) COMPLEX array
6320* On entry, C is an array of dimension (DESCC( M_ ),*). This
6321* array contains a local copy of the initial entire matrix PC.
6322*
6323* PC (local input) COMPLEX array
6324* On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
6325* array contains the local pieces of the matrix PC.
6326*
6327* IC (global input) INTEGER
6328* On entry, IC specifies C's global row index, which points to
6329* the beginning of the submatrix sub( C ).
6330*
6331* JC (global input) INTEGER
6332* On entry, JC specifies C's global column index, which points
6333* to the beginning of the submatrix sub( C ).
6334*
6335* DESCC (global and local input) INTEGER array
6336* On entry, DESCC is an integer array of dimension DLEN_. This
6337* is the array descriptor for the matrix C.
6338*
6339* CT (workspace) COMPLEX array
6340* On entry, CT is an array of dimension at least MAX(M,N,K). CT
6341* holds a copy of the current column of C.
6342*
6343* G (workspace) REAL array
6344* On entry, G is an array of dimension at least MAX(M,N,K). G
6345* is used to compute the gauges.
6346*
6347* ERR (global output) REAL
6348* On exit, ERR specifies the largest error in absolute value.
6349*
6350* INFO (global output) INTEGER
6351* On exit, if INFO <> 0, the result is less than half accurate.
6352*
6353* -- Written on April 1, 1998 by
6354* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6355*
6356* =====================================================================
6357*
6358* .. Parameters ..
6359 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
6360 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
6361 $ RSRC_
6362 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
6363 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
6364 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
6365 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
6366 REAL RZERO, RONE
6367 PARAMETER ( RZERO = 0.0e+0, rone = 1.0e+0 )
6368 COMPLEX ZERO
6369 PARAMETER ( ZERO = ( 0.0e+0, 0.0e+0 ) )
6370* ..
6371* .. Local Scalars ..
6372 LOGICAL COLREP, HTRAN, NOTRAN, ROWREP, TRAN, UPPER
6373 INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC,
6374 $ IN, IOFFAK, IOFFAN, IOFFBK, IOFFBN, IOFFC, J,
6375 $ JJC, KK, LDA, LDB, LDC, LDPC, MYCOL, MYROW,
6376 $ NPCOL, NPROW
6377 REAL EPS, ERRI
6378 COMPLEX Z
6379* ..
6380* .. External Subroutines ..
6381 EXTERNAL blacs_gridinfo, igsum2d, pb_infog2l, sgamx2d
6382* ..
6383* .. External Functions ..
6384 LOGICAL LSAME
6385 REAL PSLAMCH
6386 EXTERNAL lsame, pslamch
6387* ..
6388* .. Intrinsic Functions ..
6389 INTRINSIC abs, aimag, conjg, max, min, mod, real, sqrt
6390* ..
6391* .. Statement Functions ..
6392 REAL ABS1
6393 ABS1( Z ) = abs( real( z ) ) + abs( aimag( z ) )
6394* ..
6395* .. Executable Statements ..
6396*
6397 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
6398*
6399 eps = pslamch( ictxt, 'eps' )
6400*
6401 upper = lsame( uplo, 'U' )
6402 htran = lsame( trans, 'H' )
6403 notran = lsame( trans, 'N' )
6404 tran = lsame( trans, 'T' )
6405*
6406 lda = max( 1, desca( m_ ) )
6407 ldb = max( 1, descb( m_ ) )
6408 ldc = max( 1, descc( m_ ) )
6409*
6410* Compute expected result in C using data in A, B and C.
6411* Compute gauges in G. This part of the computation is performed
6412* by every process in the grid.
6413*
6414 DO 140 j = 1, n
6415*
6416 IF( upper ) THEN
6417 ibeg = 1
6418 iend = j
6419 ELSE
6420 ibeg = j
6421 iend = n
6422 END IF
6423*
6424 DO 10 i = 1, n
6425 ct( i ) = zero
6426 g( i ) = rzero
6427 10 CONTINUE
6428*
6429 IF( notran ) THEN
6430 DO 30 kk = 1, k
6431 ioffak = ia + j - 1 + ( ja + kk - 2 ) * lda
6432 ioffbk = ib + j - 1 + ( jb + kk - 2 ) * ldb
6433 DO 20 i = ibeg, iend
6434 ioffan = ia + i - 1 + ( ja + kk - 2 ) * lda
6435 ioffbn = ib + i - 1 + ( jb + kk - 2 ) * ldb
6436 ct( i ) = ct( i ) + alpha * (
6437 $ a( ioffan ) * b( ioffbk ) +
6438 $ b( ioffbn ) * a( ioffak ) )
6439 g( i ) = g( i ) + abs( alpha ) * (
6440 $ abs1( a( ioffan ) ) * abs1( b( ioffbk ) ) +
6441 $ abs1( b( ioffbn ) ) * abs1( a( ioffak ) ) )
6442 20 CONTINUE
6443 30 CONTINUE
6444 ELSE IF( tran ) THEN
6445 DO 50 kk = 1, k
6446 ioffak = ia + kk - 1 + ( ja + j - 2 ) * lda
6447 ioffbk = ib + kk - 1 + ( jb + j - 2 ) * ldb
6448 DO 40 i = ibeg, iend
6449 ioffan = ia + kk - 1 + ( ja + i - 2 ) * lda
6450 ioffbn = ib + kk - 1 + ( jb + i - 2 ) * ldb
6451 ct( i ) = ct( i ) + alpha * (
6452 $ a( ioffan ) * b( ioffbk ) +
6453 $ b( ioffbn ) * a( ioffak ) )
6454 g( i ) = g( i ) + abs( alpha ) * (
6455 $ abs1( a( ioffan ) ) * abs1( b( ioffbk ) ) +
6456 $ abs1( b( ioffbn ) ) * abs1( a( ioffak ) ) )
6457 40 CONTINUE
6458 50 CONTINUE
6459 ELSE IF( htran ) THEN
6460 DO 70 kk = 1, k
6461 ioffak = ia + j - 1 + ( ja + kk - 2 ) * lda
6462 ioffbk = ib + j - 1 + ( jb + kk - 2 ) * ldb
6463 DO 60 i = ibeg, iend
6464 ioffan = ia + i - 1 + ( ja + kk - 2 ) * lda
6465 ioffbn = ib + i - 1 + ( jb + kk - 2 ) * ldb
6466 ct( i ) = ct( i ) +
6467 $ alpha * a( ioffan ) * conjg( b( ioffbk ) ) +
6468 $ b( ioffbn ) * conjg( alpha * a( ioffak ) )
6469 g( i ) = g( i ) + abs1( alpha ) * (
6470 $ abs1( a( ioffan ) ) * abs1( b( ioffbk ) ) +
6471 $ abs1( b( ioffbn ) ) * abs1( a( ioffak ) ) )
6472 60 CONTINUE
6473 70 CONTINUE
6474 ELSE
6475 DO 90 kk = 1, k
6476 ioffak = ia + kk - 1 + ( ja + j - 2 ) * lda
6477 ioffbk = ib + kk - 1 + ( jb + j - 2 ) * ldb
6478 DO 80 i = ibeg, iend
6479 ioffan = ia + kk - 1 + ( ja + i - 2 ) * lda
6480 ioffbn = ib + kk - 1 + ( jb + i - 2 ) * ldb
6481 ct( i ) = ct( i ) +
6482 $ alpha * conjg( a( ioffan ) ) * b( ioffbk ) +
6483 $ conjg( alpha * b( ioffbn ) ) * a( ioffak )
6484 g( i ) = g( i ) + abs1( alpha ) * (
6485 $ abs1( conjg( a( ioffan ) ) * b( ioffbk ) ) +
6486 $ abs1( conjg( b( ioffbn ) ) * a( ioffak ) ) )
6487 80 CONTINUE
6488 90 CONTINUE
6489 END IF
6490*
6491 ioffc = ic + ibeg - 1 + ( jc + j - 2 ) * ldc
6492*
6493 DO 100 i = ibeg, iend
6494 ct( i ) = ct( i ) + beta * c( ioffc )
6495 g( i ) = g( i ) + abs1( beta )*abs1( c( ioffc ) )
6496 c( ioffc ) = ct( i )
6497 ioffc = ioffc + 1
6498 100 CONTINUE
6499*
6500* Compute the error ratio for this result.
6501*
6502 err = rzero
6503 info = 0
6504 ldpc = descc( lld_ )
6505 ioffc = ic + ( jc + j - 2 ) * ldc
6506 CALL pb_infog2l( ic, jc+j-1, descc, nprow, npcol, myrow, mycol,
6507 $ iic, jjc, icrow, iccol )
6508 icurrow = icrow
6509 rowrep = ( icrow.EQ.-1 )
6510 colrep = ( iccol.EQ.-1 )
6511*
6512 IF( mycol.EQ.iccol .OR. colrep ) THEN
6513*
6514 ibb = descc( imb_ ) - ic + 1
6515 IF( ibb.LE.0 )
6516 $ ibb = ( ( -ibb ) / descc( mb_ ) + 1 )*descc( mb_ ) + ibb
6517 ibb = min( ibb, n )
6518 in = ic + ibb - 1
6519*
6520 DO 110 i = ic, in
6521*
6522 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
6523 erri = abs( pc( iic+(jjc-1)*ldpc ) -
6524 $ c( ioffc ) ) / eps
6525 IF( g( i-ic+1 ).NE.rzero )
6526 $ erri = erri / g( i-ic+1 )
6527 err = max( err, erri )
6528 IF( err*sqrt( eps ).GE.rone )
6529 $ info = 1
6530 iic = iic + 1
6531 END IF
6532*
6533 ioffc = ioffc + 1
6534*
6535 110 CONTINUE
6536*
6537 icurrow = mod( icurrow+1, nprow )
6538*
6539 DO 130 i = in+1, ic+n-1, descc( mb_ )
6540 ibb = min( ic+n-i, descc( mb_ ) )
6541*
6542 DO 120 kk = 0, ibb-1
6543*
6544 IF( myrow.EQ.icurrow .OR. rowrep ) THEN
6545 erri = abs( pc( iic+(jjc-1)*ldpc ) -
6546 $ c( ioffc ) )/eps
6547 IF( g( i+kk-ic+1 ).NE.rzero )
6548 $ erri = erri / g( i+kk-ic+1 )
6549 err = max( err, erri )
6550 IF( err*sqrt( eps ).GE.rone )
6551 $ info = 1
6552 iic = iic + 1
6553 END IF
6554*
6555 ioffc = ioffc + 1
6556*
6557 120 CONTINUE
6558*
6559 icurrow = mod( icurrow+1, nprow )
6560*
6561 130 CONTINUE
6562*
6563 END IF
6564*
6565* If INFO = 0, all results are at least half accurate.
6566*
6567 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
6568 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
6569 $ mycol )
6570 IF( info.NE.0 )
6571 $ GO TO 150
6572*
6573 140 CONTINUE
6574*
6575 150 CONTINUE
6576*
6577 RETURN
6578*
6579* End of PCMMCH2
6580*
6581 END
6582 SUBROUTINE pcmmch3( UPLO, TRANS, M, N, ALPHA, A, IA, JA, DESCA,
6583 $ BETA, C, PC, IC, JC, DESCC, ERR, INFO )
6584*
6585* -- PBLAS test routine (version 2.0) --
6586* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6587* and University of California, Berkeley.
6588* April 1, 1998
6589*
6590* .. Scalar Arguments ..
6591 CHARACTER*1 TRANS, UPLO
6592 INTEGER IA, IC, INFO, JA, JC, M, N
6593 REAL ERR
6594 COMPLEX ALPHA, BETA
6595* ..
6596* .. Array Arguments ..
6597 INTEGER DESCA( * ), DESCC( * )
6598 COMPLEX A( * ), C( * ), PC( * )
6599* ..
6600*
6601* Purpose
6602* =======
6603*
6604* PCMMCH3 checks the results of the computational tests.
6605*
6606* Notes
6607* =====
6608*
6609* A description vector is associated with each 2D block-cyclicly dis-
6610* tributed matrix. This vector stores the information required to
6611* establish the mapping between a matrix entry and its corresponding
6612* process and memory location.
6613*
6614* In the following comments, the character _ should be read as
6615* "of the distributed matrix". Let A be a generic term for any 2D
6616* block cyclicly distributed matrix. Its description vector is DESCA:
6617*
6618* NOTATION STORED IN EXPLANATION
6619* ---------------- --------------- ------------------------------------
6620* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
6621* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
6622* the NPROW x NPCOL BLACS process grid
6623* A is distributed over. The context
6624* itself is global, but the handle
6625* (the integer value) may vary.
6626* M_A (global) DESCA( M_ ) The number of rows in the distribu-
6627* ted matrix A, M_A >= 0.
6628* N_A (global) DESCA( N_ ) The number of columns in the distri-
6629* buted matrix A, N_A >= 0.
6630* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
6631* block of the matrix A, IMB_A > 0.
6632* INB_A (global) DESCA( INB_ ) The number of columns of the upper
6633* left block of the matrix A,
6634* INB_A > 0.
6635* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
6636* bute the last M_A-IMB_A rows of A,
6637* MB_A > 0.
6638* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
6639* bute the last N_A-INB_A columns of
6640* A, NB_A > 0.
6641* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
6642* row of the matrix A is distributed,
6643* NPROW > RSRC_A >= 0.
6644* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
6645* first column of A is distributed.
6646* NPCOL > CSRC_A >= 0.
6647* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
6648* array storing the local blocks of
6649* the distributed matrix A,
6650* IF( Lc( 1, N_A ) > 0 )
6651* LLD_A >= MAX( 1, Lr( 1, M_A ) )
6652* ELSE
6653* LLD_A >= 1.
6654*
6655* Let K be the number of rows of a matrix A starting at the global in-
6656* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
6657* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
6658* receive if these K rows were distributed over NPROW processes. If K
6659* is the number of columns of a matrix A starting at the global index
6660* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
6661* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
6662* these K columns were distributed over NPCOL processes.
6663*
6664* The values of Lr() and Lc() may be determined via a call to the func-
6665* tion PB_NUMROC:
6666* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
6667* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
6668*
6669* Arguments
6670* =========
6671*
6672* UPLO (global input) CHARACTER*1
6673* On entry, UPLO specifies which part of C should contain the
6674* result.
6675*
6676* TRANS (global input) CHARACTER*1
6677* On entry, TRANS specifies whether the matrix A has to be
6678* transposed or not before computing the matrix-matrix addi-
6679* tion.
6680*
6681* M (global input) INTEGER
6682* On entry, M specifies the number of rows of C.
6683*
6684* N (global input) INTEGER
6685* On entry, N specifies the number of columns of C.
6686*
6687* ALPHA (global input) COMPLEX
6688* On entry, ALPHA specifies the scalar alpha.
6689*
6690* A (local input) COMPLEX array
6691* On entry, A is an array of dimension (DESCA( M_ ),*). This
6692* array contains a local copy of the initial entire matrix PA.
6693*
6694* IA (global input) INTEGER
6695* On entry, IA specifies A's global row index, which points to
6696* the beginning of the submatrix sub( A ).
6697*
6698* JA (global input) INTEGER
6699* On entry, JA specifies A's global column index, which points
6700* to the beginning of the submatrix sub( A ).
6701*
6702* DESCA (global and local input) INTEGER array
6703* On entry, DESCA is an integer array of dimension DLEN_. This
6704* is the array descriptor for the matrix A.
6705*
6706* BETA (global input) COMPLEX
6707* On entry, BETA specifies the scalar beta.
6708*
6709* C (local input/local output) COMPLEX array
6710* On entry, C is an array of dimension (DESCC( M_ ),*). This
6711* array contains a local copy of the initial entire matrix PC.
6712*
6713* PC (local input) COMPLEX array
6714* On entry, PC is an array of dimension (DESCC( LLD_ ),*). This
6715* array contains the local pieces of the matrix PC.
6716*
6717* IC (global input) INTEGER
6718* On entry, IC specifies C's global row index, which points to
6719* the beginning of the submatrix sub( C ).
6720*
6721* JC (global input) INTEGER
6722* On entry, JC specifies C's global column index, which points
6723* to the beginning of the submatrix sub( C ).
6724*
6725* DESCC (global and local input) INTEGER array
6726* On entry, DESCC is an integer array of dimension DLEN_. This
6727* is the array descriptor for the matrix C.
6728*
6729* ERR (global output) REAL
6730* On exit, ERR specifies the largest error in absolute value.
6731*
6732* INFO (global output) INTEGER
6733* On exit, if INFO <> 0, the result is less than half accurate.
6734*
6735* -- Written on April 1, 1998 by
6736* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6737*
6738* =====================================================================
6739*
6740* .. Parameters ..
6741 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
6742 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
6743 $ RSRC_
6744 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
6745 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
6746 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
6747 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
6748 REAL ZERO
6749 PARAMETER ( ZERO = 0.0e+0 )
6750* ..
6751* .. Local Scalars ..
6752 LOGICAL COLREP, CTRAN, LOWER, NOTRAN, ROWREP, UPPER
6753 INTEGER I, ICCOL, ICROW, ICTXT, IIC, IOFFA, IOFFC, J,
6754 $ JJC, LDA, LDC, LDPC, MYCOL, MYROW, NPCOL,
6755 $ NPROW
6756 REAL ERR0, ERRI, PREC
6757* ..
6758* .. External Subroutines ..
6759 EXTERNAL BLACS_GRIDINFO, IGSUM2D, PB_INFOG2L,
6760 $ pcerraxpby, sgamx2d
6761* ..
6762* .. External Functions ..
6763 LOGICAL LSAME
6764 REAL PSLAMCH
6765 EXTERNAL LSAME, PSLAMCH
6766* ..
6767* .. Intrinsic Functions ..
6768 INTRINSIC abs, conjg, max
6769* ..
6770* .. Executable Statements ..
6771*
6772 ictxt = descc( ctxt_ )
6773 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
6774*
6775 prec = pslamch( ictxt, 'eps' )
6776*
6777 upper = lsame( uplo, 'U' )
6778 lower = lsame( uplo, 'L' )
6779 notran = lsame( trans, 'N' )
6780 ctran = lsame( trans, 'C' )
6781*
6782* Compute expected result in C using data in A and C. This part of
6783* the computation is performed by every process in the grid.
6784*
6785 info = 0
6786 err = zero
6787*
6788 lda = max( 1, desca( m_ ) )
6789 ldc = max( 1, descc( m_ ) )
6790 ldpc = max( 1, descc( lld_ ) )
6791 rowrep = ( descc( rsrc_ ).EQ.-1 )
6792 colrep = ( descc( csrc_ ).EQ.-1 )
6793*
6794 IF( notran ) THEN
6795*
6796 DO 20 j = jc, jc + n - 1
6797*
6798 ioffc = ic + ( j - 1 ) * ldc
6799 ioffa = ia + ( ja - 1 + j - jc ) * lda
6800*
6801 DO 10 i = ic, ic + m - 1
6802*
6803 IF( upper ) THEN
6804 IF( ( j - jc ).GE.( i - ic ) ) THEN
6805 CALL pcerraxpby( erri, alpha, a( ioffa ), beta,
6806 $ c( ioffc ), prec )
6807 ELSE
6808 erri = zero
6809 END IF
6810 ELSE IF( lower ) THEN
6811 IF( ( j - jc ).LE.( i - ic ) ) THEN
6812 CALL pcerraxpby( erri, alpha, a( ioffa ), beta,
6813 $ c( ioffc ), prec )
6814 ELSE
6815 erri = zero
6816 END IF
6817 ELSE
6818 CALL pcerraxpby( erri, alpha, a( ioffa ), beta,
6819 $ c( ioffc ), prec )
6820 END IF
6821*
6822 CALL pb_infog2l( i, j, descc, nprow, npcol, myrow, mycol,
6823 $ iic, jjc, icrow, iccol )
6824 IF( ( myrow.EQ.icrow .OR. rowrep ) .AND.
6825 $ ( mycol.EQ.iccol .OR. colrep ) ) THEN
6826 err0 = abs( pc( iic+(jjc-1)*ldpc )-c( ioffc ) )
6827 IF( err0.GT.erri )
6828 $ info = 1
6829 err = max( err, err0 )
6830 END IF
6831*
6832 ioffa = ioffa + 1
6833 ioffc = ioffc + 1
6834*
6835 10 CONTINUE
6836*
6837 20 CONTINUE
6838*
6839 ELSE IF( ctran ) THEN
6840*
6841 DO 40 j = jc, jc + n - 1
6842*
6843 ioffc = ic + ( j - 1 ) * ldc
6844 ioffa = ia + ( j - jc ) + ( ja - 1 ) * lda
6845*
6846 DO 30 i = ic, ic + m - 1
6847*
6848 IF( upper ) THEN
6849 IF( ( j - jc ).GE.( i - ic ) ) THEN
6850 CALL pcerraxpby( erri, alpha, conjg( a( ioffa ) ),
6851 $ beta, c( ioffc ), prec )
6852 ELSE
6853 erri = zero
6854 END IF
6855 ELSE IF( lower ) THEN
6856 IF( ( j - jc ).LE.( i - ic ) ) THEN
6857 CALL pcerraxpby( erri, alpha, conjg( a( ioffa ) ),
6858 $ beta, c( ioffc ), prec )
6859 ELSE
6860 erri = zero
6861 END IF
6862 ELSE
6863 CALL pcerraxpby( erri, alpha, conjg( a( ioffa ) ),
6864 $ beta, c( ioffc ), prec )
6865 END IF
6866*
6867 CALL pb_infog2l( i, j, descc, nprow, npcol, myrow, mycol,
6868 $ iic, jjc, icrow, iccol )
6869 IF( ( myrow.EQ.icrow .OR. rowrep ) .AND.
6870 $ ( mycol.EQ.iccol .OR. colrep ) ) THEN
6871 err0 = abs( pc( iic+(jjc-1)*ldpc )-c( ioffc ) )
6872 IF( err0.GT.erri )
6873 $ info = 1
6874 err = max( err, err0 )
6875 END IF
6876*
6877 ioffc = ioffc + 1
6878 ioffa = ioffa + lda
6879*
6880 30 CONTINUE
6881*
6882 40 CONTINUE
6883*
6884 ELSE
6885*
6886 DO 60 j = jc, jc + n - 1
6887*
6888 ioffc = ic + ( j - 1 ) * ldc
6889 ioffa = ia + ( j - jc ) + ( ja - 1 ) * lda
6890*
6891 DO 50 i = ic, ic + m - 1
6892*
6893 IF( upper ) THEN
6894 IF( ( j - jc ).GE.( i - ic ) ) THEN
6895 CALL pcerraxpby( erri, alpha, a( ioffa ), beta,
6896 $ c( ioffc ), prec )
6897 ELSE
6898 erri = zero
6899 END IF
6900 ELSE IF( lower ) THEN
6901 IF( ( j - jc ).LE.( i - ic ) ) THEN
6902 CALL pcerraxpby( erri, alpha, a( ioffa ), beta,
6903 $ c( ioffc ), prec )
6904 ELSE
6905 erri = zero
6906 END IF
6907 ELSE
6908 CALL pcerraxpby( erri, alpha, a( ioffa ), beta,
6909 $ c( ioffc ), prec )
6910 END IF
6911*
6912 CALL pb_infog2l( i, j, descc, nprow, npcol, myrow, mycol,
6913 $ iic, jjc, icrow, iccol )
6914 IF( ( myrow.EQ.icrow .OR. rowrep ) .AND.
6915 $ ( mycol.EQ.iccol .OR. colrep ) ) THEN
6916 err0 = abs( pc( iic+(jjc-1)*ldpc )-c( ioffc ) )
6917 IF( err0.GT.erri )
6918 $ info = 1
6919 err = max( err, err0 )
6920 END IF
6921*
6922 ioffc = ioffc + 1
6923 ioffa = ioffa + lda
6924*
6925 50 CONTINUE
6926*
6927 60 CONTINUE
6928*
6929 END IF
6930*
6931* If INFO = 0, all results are at least half accurate.
6932*
6933 CALL igsum2d( ictxt, 'All', ' ', 1, 1, info, 1, -1, mycol )
6934 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, i, j, -1, -1,
6935 $ mycol )
6936*
6937 RETURN
6938*
6939* End of PCMMCH3
6940*
6941 END
6942 SUBROUTINE pcerraxpby( ERRBND, ALPHA, X, BETA, Y, PREC )
6943*
6944* -- PBLAS test routine (version 2.0) --
6945* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6946* and University of California, Berkeley.
6947* April 1, 1998
6948*
6949* .. Scalar Arguments ..
6950 REAL ERRBND, PREC
6951 COMPLEX ALPHA, BETA, X, Y
6952* ..
6953*
6954* Purpose
6955* =======
6956*
6957* PCERRAXPBY serially computes y := beta*y + alpha * x and returns a
6958* scaled relative acceptable error bound on the result.
6959*
6960* Arguments
6961* =========
6962*
6963* ERRBND (global output) REAL
6964* On exit, ERRBND specifies the scaled relative acceptable er-
6965* ror bound.
6966*
6967* ALPHA (global input) COMPLEX
6968* On entry, ALPHA specifies the scalar alpha.
6969*
6970* X (global input) COMPLEX
6971* On entry, X specifies the scalar x to be scaled.
6972*
6973* BETA (global input) COMPLEX
6974* On entry, BETA specifies the scalar beta.
6975*
6976* Y (global input/global output) COMPLEX
6977* On entry, Y specifies the scalar y to be added. On exit, Y
6978* contains the resulting scalar y.
6979*
6980* PREC (global input) REAL
6981* On entry, PREC specifies the machine precision.
6982*
6983* -- Written on April 1, 1998 by
6984* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
6985*
6986* =====================================================================
6987*
6988* .. Parameters ..
6989 REAL ONE, TWO, ZERO
6990 PARAMETER ( ONE = 1.0e+0, two = 2.0e+0,
6991 $ zero = 0.0e+0 )
6992* ..
6993* .. Local Scalars ..
6994 REAL ADDBND, FACT, SUMINEG, SUMIPOS, SUMRNEG,
6995 $ SUMRPOS
6996 COMPLEX TMP
6997* ..
6998* .. Intrinsic Functions ..
6999* ..
7000* .. Executable Statements ..
7001*
7002 SUMIPOS = zero
7003 sumineg = zero
7004 sumrpos = zero
7005 sumrneg = zero
7006 fact = one + two * prec
7007 addbnd = two * two * two * prec
7008*
7009 tmp = alpha * x
7010 IF( real( tmp ).GE.zero ) THEN
7011 sumrpos = sumrpos + real( tmp ) * fact
7012 ELSE
7013 sumrneg = sumrneg - real( tmp ) * fact
7014 END IF
7015 IF( aimag( tmp ).GE.zero ) THEN
7016 sumipos = sumipos + aimag( tmp ) * fact
7017 ELSE
7018 sumineg = sumineg - aimag( tmp ) * fact
7019 END IF
7020*
7021 tmp = beta * y
7022 IF( real( tmp ).GE.zero ) THEN
7023 sumrpos = sumrpos + real( tmp ) * fact
7024 ELSE
7025 sumrneg = sumrneg - real( tmp ) * fact
7026 END IF
7027 IF( aimag( tmp ).GE.zero ) THEN
7028 sumipos = sumipos + aimag( tmp ) * fact
7029 ELSE
7030 sumineg = sumineg - aimag( tmp ) * fact
7031 END IF
7032*
7033 y = ( beta * y ) + ( alpha * x )
7034*
7035 errbnd = addbnd * max( max( sumrpos, sumrneg ),
7036 $ max( sumipos, sumineg ) )
7037*
7038 RETURN
7039*
7040* End of PCERRAXPBY
7041*
7042 END
7043 SUBROUTINE pcipset( TOGGLE, N, A, IA, JA, DESCA )
7044*
7045* -- PBLAS test routine (version 2.0) --
7046* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7047* and University of California, Berkeley.
7048* April 1, 1998
7049*
7050* .. Scalar Arguments ..
7051 CHARACTER*1 TOGGLE
7052 INTEGER IA, JA, N
7053* ..
7054* .. Array Arguments ..
7055 INTEGER DESCA( * )
7056 COMPLEX A( * )
7057* ..
7058*
7059* Purpose
7060* =======
7061*
7062* PCIPSET sets the imaginary part of the diagonal entries of an n by n
7063* matrix sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ). This is used to
7064* test the PBLAS routines for complex Hermitian matrices, which are
7065* either not supposed to access or use the imaginary parts of the dia-
7066* gonals, or supposed to set them to zero. The value used to set the
7067* imaginary part of the diagonals depends on the value of TOGGLE.
7068*
7069* Notes
7070* =====
7071*
7072* A description vector is associated with each 2D block-cyclicly dis-
7073* tributed matrix. This vector stores the information required to
7074* establish the mapping between a matrix entry and its corresponding
7075* process and memory location.
7076*
7077* In the following comments, the character _ should be read as
7078* "of the distributed matrix". Let A be a generic term for any 2D
7079* block cyclicly distributed matrix. Its description vector is DESCA:
7080*
7081* NOTATION STORED IN EXPLANATION
7082* ---------------- --------------- ------------------------------------
7083* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
7084* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
7085* the NPROW x NPCOL BLACS process grid
7086* A is distributed over. The context
7087* itself is global, but the handle
7088* (the integer value) may vary.
7089* M_A (global) DESCA( M_ ) The number of rows in the distribu-
7090* ted matrix A, M_A >= 0.
7091* N_A (global) DESCA( N_ ) The number of columns in the distri-
7092* buted matrix A, N_A >= 0.
7093* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
7094* block of the matrix A, IMB_A > 0.
7095* INB_A (global) DESCA( INB_ ) The number of columns of the upper
7096* left block of the matrix A,
7097* INB_A > 0.
7098* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
7099* bute the last M_A-IMB_A rows of A,
7100* MB_A > 0.
7101* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
7102* bute the last N_A-INB_A columns of
7103* A, NB_A > 0.
7104* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
7105* row of the matrix A is distributed,
7106* NPROW > RSRC_A >= 0.
7107* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
7108* first column of A is distributed.
7109* NPCOL > CSRC_A >= 0.
7110* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
7111* array storing the local blocks of
7112* the distributed matrix A,
7113* IF( Lc( 1, N_A ) > 0 )
7114* LLD_A >= MAX( 1, Lr( 1, M_A ) )
7115* ELSE
7116* LLD_A >= 1.
7117*
7118* Let K be the number of rows of a matrix A starting at the global in-
7119* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
7120* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
7121* receive if these K rows were distributed over NPROW processes. If K
7122* is the number of columns of a matrix A starting at the global index
7123* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
7124* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
7125* these K columns were distributed over NPCOL processes.
7126*
7127* The values of Lr() and Lc() may be determined via a call to the func-
7128* tion PB_NUMROC:
7129* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
7130* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
7131*
7132* Arguments
7133* =========
7134*
7135* TOGGLE (global input) CHARACTER*1
7136* On entry, TOGGLE specifies the set-value to be used as fol-
7137* lows:
7138* If TOGGLE = 'Z' or 'z', the imaginary part of the diago-
7139* nals are set to zero,
7140* If TOGGLE = 'B' or 'b', the imaginary part of the diago-
7141* nals are set to a large value.
7142*
7143* N (global input) INTEGER
7144* On entry, N specifies the order of sub( A ). N must be at
7145* least zero.
7146*
7147* A (local input/local output) pointer to COMPLEX
7148* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
7149* at least Lc( 1, JA+N-1 ). Before entry, this array contains
7150* the local entries of the matrix A. On exit, the diagonals of
7151* sub( A ) have been updated as specified by TOGGLE.
7152*
7153* IA (global input) INTEGER
7154* On entry, IA specifies A's global row index, which points to
7155* the beginning of the submatrix sub( A ).
7156*
7157* JA (global input) INTEGER
7158* On entry, JA specifies A's global column index, which points
7159* to the beginning of the submatrix sub( A ).
7160*
7161* DESCA (global and local input) INTEGER array
7162* On entry, DESCA is an integer array of dimension DLEN_. This
7163* is the array descriptor for the matrix A.
7164*
7165* -- Written on April 1, 1998 by
7166* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
7167*
7168* =====================================================================
7169*
7170* .. Parameters ..
7171 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
7172 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
7173 $ RSRC_
7174 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
7175 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
7176 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
7177 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
7178 REAL ZERO
7179 PARAMETER ( ZERO = 0.0e+0 )
7180* ..
7181* .. Local Scalars ..
7182 LOGICAL COLREP, GODOWN, GOLEFT, ROWREP
7183 INTEGER I, IACOL, IAROW, ICTXT, IIA, IJOFFA, ILOW,
7184 $ IMB1, IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP,
7185 $ JJA, JOFFA, JOFFD, LCMT, LCMT00, LDA, LDAP1,
7186 $ LMBLOC, LNBLOC, LOW, MB, MBLKD, MBLKS, MBLOC,
7187 $ MRCOL, MRROW, MYCOL, MYROW, NB, NBLKD, NBLKS,
7188 $ NBLOC, NP, NPCOL, NPROW, NQ, PMB, QNB, UPP
7189 REAL ALPHA, ATMP
7190* ..
7191* .. Local Arrays ..
7192 INTEGER DESCA2( DLEN_ )
7193* ..
7194* .. External Subroutines ..
7195 EXTERNAL blacs_gridinfo, pb_ainfog2l, pb_binfo,
7196 $ pb_desctrans
7197* ..
7198* .. External Functions ..
7199 LOGICAL LSAME
7200 REAL PSLAMCH
7201 EXTERNAL lsame, pslamch
7202* ..
7203* .. Intrinsic Functions ..
7204 INTRINSIC cmplx, max, min, real
7205* ..
7206* .. Executable Statements ..
7207*
7208* Convert descriptor
7209*
7210 CALL pb_desctrans( desca, desca2 )
7211*
7212* Get grid parameters
7213*
7214 ictxt = desca2( ctxt_ )
7215 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
7216*
7217 IF( n.LE.0 )
7218 $ RETURN
7219*
7220 IF( lsame( toggle, 'Z' ) ) THEN
7221 alpha = zero
7222 ELSE IF( lsame( toggle, 'B' ) ) THEN
7223 alpha = pslamch( ictxt, 'Epsilon' )
7224 alpha = alpha / pslamch( ictxt, 'Safe minimum' )
7225 END IF
7226*
7227 CALL pb_ainfog2l( n, n, ia, ja, desca2, nprow, npcol, myrow,
7228 $ mycol, imb1, inb1, np, nq, iia, jja, iarow,
7229 $ iacol, mrrow, mrcol )
7230*
7231 IF( np.LE.0 .OR. nq.LE.0 )
7232 $ RETURN
7233*
7234* Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC,
7235* ILOW, LOW, IUPP, and UPP.
7236*
7237 mb = desca2( mb_ )
7238 nb = desca2( nb_ )
7239 CALL pb_binfo( 0, np, nq, imb1, inb1, mb, nb, mrrow, mrcol,
7240 $ lcmt00, mblks, nblks, imbloc, inbloc, lmbloc,
7241 $ lnbloc, ilow, low, iupp, upp )
7242*
7243 ioffa = iia - 1
7244 joffa = jja - 1
7245 rowrep = ( desca2( rsrc_ ).EQ.-1 )
7246 colrep = ( desca2( csrc_ ).EQ.-1 )
7247 lda = desca2( lld_ )
7248 ldap1 = lda + 1
7249*
7250 IF( rowrep ) THEN
7251 pmb = mb
7252 ELSE
7253 pmb = nprow * mb
7254 END IF
7255 IF( colrep ) THEN
7256 qnb = nb
7257 ELSE
7258 qnb = npcol * nb
7259 END IF
7260*
7261* Handle the first block of rows or columns separately, and update
7262* LCMT00, MBLKS and NBLKS.
7263*
7264 godown = ( lcmt00.GT.iupp )
7265 goleft = ( lcmt00.LT.ilow )
7266*
7267 IF( .NOT.godown .AND. .NOT.goleft ) THEN
7268*
7269* LCMT00 >= ILOW && LCMT00 <= IUPP
7270*
7271 IF( lcmt00.GE.0 ) THEN
7272 ijoffa = ioffa + lcmt00 + ( joffa - 1 ) * lda
7273 DO 10 i = 1, min( inbloc, max( 0, imbloc - lcmt00 ) )
7274 atmp = real( a( ijoffa + i*ldap1 ) )
7275 a( ijoffa + i*ldap1 ) = cmplx( atmp, alpha )
7276 10 CONTINUE
7277 ELSE
7278 ijoffa = ioffa + ( joffa - lcmt00 - 1 ) * lda
7279 DO 20 i = 1, min( imbloc, max( 0, inbloc + lcmt00 ) )
7280 atmp = real( a( ijoffa + i*ldap1 ) )
7281 a( ijoffa + i*ldap1 ) = cmplx( atmp, alpha )
7282 20 CONTINUE
7283 END IF
7284 goleft = ( ( lcmt00 - ( iupp - upp + pmb ) ).LT.ilow )
7285 godown = .NOT.goleft
7286*
7287 END IF
7288*
7289 IF( godown ) THEN
7290*
7291 lcmt00 = lcmt00 - ( iupp - upp + pmb )
7292 mblks = mblks - 1
7293 ioffa = ioffa + imbloc
7294*
7295 30 CONTINUE
7296 IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
7297 lcmt00 = lcmt00 - pmb
7298 mblks = mblks - 1
7299 ioffa = ioffa + mb
7300 GO TO 30
7301 END IF
7302*
7303 IF( mblks.LE.0 )
7304 $ RETURN
7305*
7306 lcmt = lcmt00
7307 mblkd = mblks
7308 ioffd = ioffa
7309*
7310 mbloc = mb
7311 40 CONTINUE
7312 IF( mblkd.GT.0 .AND. lcmt.GE.ilow ) THEN
7313 IF( mblkd.EQ.1 )
7314 $ mbloc = lmbloc
7315 IF( lcmt.GE.0 ) THEN
7316 ijoffa = ioffd + lcmt + ( joffa - 1 ) * lda
7317 DO 50 i = 1, min( inbloc, max( 0, mbloc - lcmt ) )
7318 atmp = real( a( ijoffa + i*ldap1 ) )
7319 a( ijoffa + i*ldap1 ) = cmplx( atmp, alpha )
7320 50 CONTINUE
7321 ELSE
7322 ijoffa = ioffd + ( joffa - lcmt - 1 ) * lda
7323 DO 60 i = 1, min( mbloc, max( 0, inbloc + lcmt ) )
7324 atmp = real( a( ijoffa + i*ldap1 ) )
7325 a( ijoffa + i*ldap1 ) = cmplx( atmp, alpha )
7326 60 CONTINUE
7327 END IF
7328 lcmt00 = lcmt
7329 lcmt = lcmt - pmb
7330 mblks = mblkd
7331 mblkd = mblkd - 1
7332 ioffa = ioffd
7333 ioffd = ioffd + mbloc
7334 GO TO 40
7335 END IF
7336*
7337 lcmt00 = lcmt00 + low - ilow + qnb
7338 nblks = nblks - 1
7339 joffa = joffa + inbloc
7340*
7341 ELSE IF( goleft ) THEN
7342*
7343 lcmt00 = lcmt00 + low - ilow + qnb
7344 nblks = nblks - 1
7345 joffa = joffa + inbloc
7346*
7347 70 CONTINUE
7348 IF( nblks.GT.0 .AND. lcmt00.LT.low ) THEN
7349 lcmt00 = lcmt00 + qnb
7350 nblks = nblks - 1
7351 joffa = joffa + nb
7352 GO TO 70
7353 END IF
7354*
7355 IF( nblks.LE.0 )
7356 $ RETURN
7357*
7358 lcmt = lcmt00
7359 nblkd = nblks
7360 joffd = joffa
7361*
7362 nbloc = nb
7363 80 CONTINUE
7364 IF( nblkd.GT.0 .AND. lcmt.LE.iupp ) THEN
7365 IF( nblkd.EQ.1 )
7366 $ nbloc = lnbloc
7367 IF( lcmt.GE.0 ) THEN
7368 ijoffa = ioffa + lcmt + ( joffd - 1 ) * lda
7369 DO 90 i = 1, min( nbloc, max( 0, imbloc - lcmt ) )
7370 atmp = real( a( ijoffa + i*ldap1 ) )
7371 a( ijoffa + i*ldap1 ) = cmplx( atmp, alpha )
7372 90 CONTINUE
7373 ELSE
7374 ijoffa = ioffa + ( joffd - lcmt - 1 ) * lda
7375 DO 100 i = 1, min( imbloc, max( 0, nbloc + lcmt ) )
7376 atmp = real( a( ijoffa + i*ldap1 ) )
7377 a( ijoffa + i*ldap1 ) = cmplx( atmp, alpha )
7378 100 CONTINUE
7379 END IF
7380 lcmt00 = lcmt
7381 lcmt = lcmt + qnb
7382 nblks = nblkd
7383 nblkd = nblkd - 1
7384 joffa = joffd
7385 joffd = joffd + nbloc
7386 GO TO 80
7387 END IF
7388*
7389 lcmt00 = lcmt00 - ( iupp - upp + pmb )
7390 mblks = mblks - 1
7391 ioffa = ioffa + imbloc
7392*
7393 END IF
7394*
7395 nbloc = nb
7396 110 CONTINUE
7397 IF( nblks.GT.0 ) THEN
7398 IF( nblks.EQ.1 )
7399 $ nbloc = lnbloc
7400 120 CONTINUE
7401 IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
7402 lcmt00 = lcmt00 - pmb
7403 mblks = mblks - 1
7404 ioffa = ioffa + mb
7405 GO TO 120
7406 END IF
7407*
7408 IF( mblks.LE.0 )
7409 $ RETURN
7410*
7411 lcmt = lcmt00
7412 mblkd = mblks
7413 ioffd = ioffa
7414*
7415 mbloc = mb
7416 130 CONTINUE
7417 IF( mblkd.GT.0 .AND. lcmt.GE.low ) THEN
7418 IF( mblkd.EQ.1 )
7419 $ mbloc = lmbloc
7420 IF( lcmt.GE.0 ) THEN
7421 ijoffa = ioffd + lcmt + ( joffa - 1 ) * lda
7422 DO 140 i = 1, min( nbloc, max( 0, mbloc - lcmt ) )
7423 atmp = real( a( ijoffa + i*ldap1 ) )
7424 a( ijoffa + i*ldap1 ) = cmplx( atmp, alpha )
7425 140 CONTINUE
7426 ELSE
7427 ijoffa = ioffd + ( joffa - lcmt - 1 ) * lda
7428 DO 150 i = 1, min( mbloc, max( 0, nbloc + lcmt ) )
7429 atmp = real( a( ijoffa + i*ldap1 ) )
7430 a( ijoffa + i*ldap1 ) = cmplx( atmp, alpha )
7431 150 CONTINUE
7432 END IF
7433 lcmt00 = lcmt
7434 lcmt = lcmt - pmb
7435 mblks = mblkd
7436 mblkd = mblkd - 1
7437 ioffa = ioffd
7438 ioffd = ioffd + mbloc
7439 GO TO 130
7440 END IF
7441*
7442 lcmt00 = lcmt00 + qnb
7443 nblks = nblks - 1
7444 joffa = joffa + nbloc
7445 GO TO 110
7446*
7447 END IF
7448*
7449 RETURN
7450*
7451* End of PCIPSET
7452*
7453 END
7454 REAL function pslamch( ictxt, cmach )
7455*
7456* -- PBLAS test routine (version 2.0) --
7457* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7458* and University of California, Berkeley.
7459* April 1, 1998
7460*
7461* .. Scalar Arguments ..
7462 CHARACTER*1 cmach
7463 INTEGER ictxt
7464* ..
7465*
7466* Purpose
7467* =======
7468*
7469*
7470* .. Local Scalars ..
7471 CHARACTER*1 top
7472 INTEGER idumm
7473 REAL temp
7474* ..
7475* .. External Subroutines ..
7476 EXTERNAL pb_topget, sgamn2d, sgamx2d
7477* ..
7478* .. External Functions ..
7479 LOGICAL lsame
7480 REAL slamch
7481 EXTERNAL lsame, slamch
7482* ..
7483* .. Executable Statements ..
7484*
7485 temp = slamch( cmach )
7486*
7487 IF( lsame( cmach, 'E' ).OR.lsame( cmach, 'S' ).OR.
7488 $ lsame( cmach, 'M' ).OR.lsame( cmach, 'U' ) ) THEN
7489 CALL pb_topget( ictxt, 'Combine', 'All', top )
7490 idumm = 0
7491 CALL sgamx2d( ictxt, 'All', top, 1, 1, temp, 1, idumm,
7492 $ idumm, -1, -1, idumm )
7493 ELSE IF( lsame( cmach, 'L' ).OR.lsame( cmach, 'O' ) ) THEN
7494 CALL pb_topget( ictxt, 'Combine', 'All', top )
7495 idumm = 0
7496 CALL sgamn2d( ictxt, 'All', top, 1, 1, temp, 1, idumm,
7497 $ idumm, -1, -1, idumm )
7498 END IF
7499*
7500 pslamch = temp
7501*
7502 RETURN
7503*
7504* End of PSLAMCH
7505*
7506 END
7507 SUBROUTINE pclaset( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA )
7508*
7509* -- PBLAS test routine (version 2.0) --
7510* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7511* and University of California, Berkeley.
7512* April 1, 1998
7513*
7514* .. Scalar Arguments ..
7515 CHARACTER*1 UPLO
7516 INTEGER IA, JA, M, N
7517 COMPLEX ALPHA, BETA
7518* ..
7519* .. Array Arguments ..
7520 INTEGER DESCA( * )
7521 COMPLEX A( * )
7522* ..
7523*
7524* Purpose
7525* =======
7526*
7527* PCLASET initializes an m by n submatrix A(IA:IA+M-1,JA:JA+N-1) deno-
7528* ted by sub( A ) to beta on the diagonal and alpha on the offdiago-
7529* nals.
7530*
7531* Notes
7532* =====
7533*
7534* A description vector is associated with each 2D block-cyclicly dis-
7535* tributed matrix. This vector stores the information required to
7536* establish the mapping between a matrix entry and its corresponding
7537* process and memory location.
7538*
7539* In the following comments, the character _ should be read as
7540* "of the distributed matrix". Let A be a generic term for any 2D
7541* block cyclicly distributed matrix. Its description vector is DESCA:
7542*
7543* NOTATION STORED IN EXPLANATION
7544* ---------------- --------------- ------------------------------------
7545* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
7546* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
7547* the NPROW x NPCOL BLACS process grid
7548* A is distributed over. The context
7549* itself is global, but the handle
7550* (the integer value) may vary.
7551* M_A (global) DESCA( M_ ) The number of rows in the distribu-
7552* ted matrix A, M_A >= 0.
7553* N_A (global) DESCA( N_ ) The number of columns in the distri-
7554* buted matrix A, N_A >= 0.
7555* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
7556* block of the matrix A, IMB_A > 0.
7557* INB_A (global) DESCA( INB_ ) The number of columns of the upper
7558* left block of the matrix A,
7559* INB_A > 0.
7560* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
7561* bute the last M_A-IMB_A rows of A,
7562* MB_A > 0.
7563* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
7564* bute the last N_A-INB_A columns of
7565* A, NB_A > 0.
7566* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
7567* row of the matrix A is distributed,
7568* NPROW > RSRC_A >= 0.
7569* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
7570* first column of A is distributed.
7571* NPCOL > CSRC_A >= 0.
7572* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
7573* array storing the local blocks of
7574* the distributed matrix A,
7575* IF( Lc( 1, N_A ) > 0 )
7576* LLD_A >= MAX( 1, Lr( 1, M_A ) )
7577* ELSE
7578* LLD_A >= 1.
7579*
7580* Let K be the number of rows of a matrix A starting at the global in-
7581* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
7582* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
7583* receive if these K rows were distributed over NPROW processes. If K
7584* is the number of columns of a matrix A starting at the global index
7585* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
7586* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
7587* these K columns were distributed over NPCOL processes.
7588*
7589* The values of Lr() and Lc() may be determined via a call to the func-
7590* tion PB_NUMROC:
7591* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
7592* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
7593*
7594* Arguments
7595* =========
7596*
7597* UPLO (global input) CHARACTER*1
7598* On entry, UPLO specifies the part of the submatrix sub( A )
7599* to be set:
7600* = 'L' or 'l': Lower triangular part is set; the strictly
7601* upper triangular part of sub( A ) is not changed;
7602* = 'U' or 'u': Upper triangular part is set; the strictly
7603* lower triangular part of sub( A ) is not changed;
7604* Otherwise: All of the matrix sub( A ) is set.
7605*
7606* M (global input) INTEGER
7607* On entry, M specifies the number of rows of the submatrix
7608* sub( A ). M must be at least zero.
7609*
7610* N (global input) INTEGER
7611* On entry, N specifies the number of columns of the submatrix
7612* sub( A ). N must be at least zero.
7613*
7614* ALPHA (global input) COMPLEX
7615* On entry, ALPHA specifies the scalar alpha, i.e., the cons-
7616* tant to which the offdiagonal elements are to be set.
7617*
7618* BETA (global input) COMPLEX
7619* On entry, BETA specifies the scalar beta, i.e., the constant
7620* to which the diagonal elements are to be set.
7621*
7622* A (local input/local output) COMPLEX array
7623* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
7624* at least Lc( 1, JA+N-1 ). Before entry, this array contains
7625* the local entries of the matrix A to be set. On exit, the
7626* leading m by n submatrix sub( A ) is set as follows:
7627*
7628* if UPLO = 'U', A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=j-1, 1<=j<=N,
7629* if UPLO = 'L', A(IA+i-1,JA+j-1) = ALPHA, j+1<=i<=M, 1<=j<=N,
7630* otherwise, A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=M, 1<=j<=N,
7631* and IA+i.NE.JA+j,
7632* and, for all UPLO, A(IA+i-1,JA+i-1) = BETA, 1<=i<=min(M,N).
7633*
7634* IA (global input) INTEGER
7635* On entry, IA specifies A's global row index, which points to
7636* the beginning of the submatrix sub( A ).
7637*
7638* JA (global input) INTEGER
7639* On entry, JA specifies A's global column index, which points
7640* to the beginning of the submatrix sub( A ).
7641*
7642* DESCA (global and local input) INTEGER array
7643* On entry, DESCA is an integer array of dimension DLEN_. This
7644* is the array descriptor for the matrix A.
7645*
7646* -- Written on April 1, 1998 by
7647* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
7648*
7649* =====================================================================
7650*
7651* .. Parameters ..
7652 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
7653 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
7654 $ RSRC_
7655 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
7656 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
7657 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
7658 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
7659* ..
7660* .. Local Scalars ..
7661 LOGICAL GODOWN, GOLEFT, ISCOLREP, ISROWREP, LOWER,
7662 $ UPPER
7663 INTEGER IACOL, IAROW, ICTXT, IIA, IIMAX, ILOW, IMB1,
7664 $ IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP, JJA,
7665 $ JJMAX, JOFFA, JOFFD, LCMT, LCMT00, LDA, LMBLOC,
7666 $ LNBLOC, LOW, M1, MB, MBLKD, MBLKS, MBLOC, MP,
7667 $ MRCOL, MRROW, MYCOL, MYROW, N1, NB, NBLKD,
7668 $ NBLKS, NBLOC, NPCOL, NPROW, NQ, PMB, QNB, TMP1,
7669 $ UPP
7670* ..
7671* .. Local Arrays ..
7672 INTEGER DESCA2( DLEN_ )
7673* ..
7674* .. External Subroutines ..
7675 EXTERNAL blacs_gridinfo, pb_ainfog2l, pb_binfo,
7677* ..
7678* .. External Functions ..
7679 LOGICAL LSAME
7680 EXTERNAL lsame
7681* ..
7682* .. Intrinsic Functions ..
7683 INTRINSIC min
7684* ..
7685* .. Executable Statements ..
7686*
7687 IF( m.EQ.0 .OR. n.EQ.0 )
7688 $ RETURN
7689*
7690* Convert descriptor
7691*
7692 CALL pb_desctrans( desca, desca2 )
7693*
7694* Get grid parameters
7695*
7696 ictxt = desca2( ctxt_ )
7697 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
7698*
7699 CALL pb_ainfog2l( m, n, ia, ja, desca2, nprow, npcol, myrow,
7700 $ mycol, imb1, inb1, mp, nq, iia, jja, iarow,
7701 $ iacol, mrrow, mrcol )
7702*
7703 IF( mp.LE.0 .OR. nq.LE.0 )
7704 $ RETURN
7705*
7706 isrowrep = ( desca2( rsrc_ ).LT.0 )
7707 iscolrep = ( desca2( csrc_ ).LT.0 )
7708 lda = desca2( lld_ )
7709*
7710 upper = .NOT.( lsame( uplo, 'L' ) )
7711 lower = .NOT.( lsame( uplo, 'U' ) )
7712*
7713 IF( ( ( lower.AND.upper ).AND.( alpha.EQ.beta ) ).OR.
7714 $ ( isrowrep .AND. iscolrep ) ) THEN
7715 IF( ( mp.GT.0 ).AND.( nq.GT.0 ) )
7716 $ CALL pb_claset( uplo, mp, nq, 0, alpha, beta,
7717 $ a( iia + ( jja - 1 ) * lda ), lda )
7718 RETURN
7719 END IF
7720*
7721* Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC,
7722* ILOW, LOW, IUPP, and UPP.
7723*
7724 mb = desca2( mb_ )
7725 nb = desca2( nb_ )
7726 CALL pb_binfo( 0, mp, nq, imb1, inb1, mb, nb, mrrow, mrcol,
7727 $ lcmt00, mblks, nblks, imbloc, inbloc, lmbloc,
7728 $ lnbloc, ilow, low, iupp, upp )
7729*
7730 ioffa = iia - 1
7731 joffa = jja - 1
7732 iimax = ioffa + mp
7733 jjmax = joffa + nq
7734*
7735 IF( isrowrep ) THEN
7736 pmb = mb
7737 ELSE
7738 pmb = nprow * mb
7739 END IF
7740 IF( iscolrep ) THEN
7741 qnb = nb
7742 ELSE
7743 qnb = npcol * nb
7744 END IF
7745*
7746 m1 = mp
7747 n1 = nq
7748*
7749* Handle the first block of rows or columns separately, and update
7750* LCMT00, MBLKS and NBLKS.
7751*
7752 godown = ( lcmt00.GT.iupp )
7753 goleft = ( lcmt00.LT.ilow )
7754*
7755 IF( .NOT.godown .AND. .NOT.goleft ) THEN
7756*
7757* LCMT00 >= ILOW && LCMT00 <= IUPP
7758*
7759 goleft = ( ( lcmt00 - ( iupp - upp + pmb ) ).LT.ilow )
7760 godown = .NOT.goleft
7761*
7762 CALL pb_claset( uplo, imbloc, inbloc, lcmt00, alpha, beta,
7763 $ a( iia+joffa*lda ), lda )
7764 IF( godown ) THEN
7765 IF( upper .AND. nq.GT.inbloc )
7766 $ CALL pb_claset( 'All', imbloc, nq-inbloc, 0, alpha,
7767 $ alpha, a( iia+(joffa+inbloc)*lda ), lda )
7768 iia = iia + imbloc
7769 m1 = m1 - imbloc
7770 ELSE
7771 IF( lower .AND. mp.GT.imbloc )
7772 $ CALL pb_claset( 'All', mp-imbloc, inbloc, 0, alpha,
7773 $ alpha, a( iia+imbloc+joffa*lda ), lda )
7774 jja = jja + inbloc
7775 n1 = n1 - inbloc
7776 END IF
7777*
7778 END IF
7779*
7780 IF( godown ) THEN
7781*
7782 lcmt00 = lcmt00 - ( iupp - upp + pmb )
7783 mblks = mblks - 1
7784 ioffa = ioffa + imbloc
7785*
7786 10 CONTINUE
7787 IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
7788 lcmt00 = lcmt00 - pmb
7789 mblks = mblks - 1
7790 ioffa = ioffa + mb
7791 GO TO 10
7792 END IF
7793*
7794 tmp1 = min( ioffa, iimax ) - iia + 1
7795 IF( upper .AND. tmp1.GT.0 ) THEN
7796 CALL pb_claset( 'All', tmp1, n1, 0, alpha, alpha,
7797 $ a( iia+joffa*lda ), lda )
7798 iia = iia + tmp1
7799 m1 = m1 - tmp1
7800 END IF
7801*
7802 IF( mblks.LE.0 )
7803 $ RETURN
7804*
7805 lcmt = lcmt00
7806 mblkd = mblks
7807 ioffd = ioffa
7808*
7809 mbloc = mb
7810 20 CONTINUE
7811 IF( mblkd.GT.0 .AND. lcmt.GE.ilow ) THEN
7812 IF( mblkd.EQ.1 )
7813 $ mbloc = lmbloc
7814 CALL pb_claset( uplo, mbloc, inbloc, lcmt, alpha, beta,
7815 $ a( ioffd+1+joffa*lda ), lda )
7816 lcmt00 = lcmt
7817 lcmt = lcmt - pmb
7818 mblks = mblkd
7819 mblkd = mblkd - 1
7820 ioffa = ioffd
7821 ioffd = ioffd + mbloc
7822 GO TO 20
7823 END IF
7824*
7825 tmp1 = m1 - ioffd + iia - 1
7826 IF( lower .AND. tmp1.GT.0 )
7827 $ CALL pb_claset( 'ALL', tmp1, inbloc, 0, alpha, alpha,
7828 $ a( ioffd+1+joffa*lda ), lda )
7829*
7830 tmp1 = ioffa - iia + 1
7831 m1 = m1 - tmp1
7832 n1 = n1 - inbloc
7833 lcmt00 = lcmt00 + low - ilow + qnb
7834 nblks = nblks - 1
7835 joffa = joffa + inbloc
7836*
7837 IF( upper .AND. tmp1.GT.0 .AND. n1.GT.0 )
7838 $ CALL pb_claset( 'ALL', tmp1, n1, 0, alpha, alpha,
7839 $ a( iia+joffa*lda ), lda )
7840*
7841 iia = ioffa + 1
7842 jja = joffa + 1
7843*
7844 ELSE IF( goleft ) THEN
7845*
7846 lcmt00 = lcmt00 + low - ilow + qnb
7847 nblks = nblks - 1
7848 joffa = joffa + inbloc
7849*
7850 30 CONTINUE
7851 IF( nblks.GT.0 .AND. lcmt00.LT.low ) THEN
7852 lcmt00 = lcmt00 + qnb
7853 nblks = nblks - 1
7854 joffa = joffa + nb
7855 GO TO 30
7856 END IF
7857*
7858 tmp1 = min( joffa, jjmax ) - jja + 1
7859 IF( lower .AND. tmp1.GT.0 ) THEN
7860 CALL pb_claset( 'All', m1, tmp1, 0, alpha, alpha,
7861 $ a( iia+(jja-1)*lda ), lda )
7862 jja = jja + tmp1
7863 n1 = n1 - tmp1
7864 END IF
7865*
7866 IF( nblks.LE.0 )
7867 $ RETURN
7868*
7869 lcmt = lcmt00
7870 nblkd = nblks
7871 joffd = joffa
7872*
7873 nbloc = nb
7874 40 CONTINUE
7875 IF( nblkd.GT.0 .AND. lcmt.LE.iupp ) THEN
7876 IF( nblkd.EQ.1 )
7877 $ nbloc = lnbloc
7878 CALL pb_claset( uplo, imbloc, nbloc, lcmt, alpha, beta,
7879 $ a( iia+joffd*lda ), lda )
7880 lcmt00 = lcmt
7881 lcmt = lcmt + qnb
7882 nblks = nblkd
7883 nblkd = nblkd - 1
7884 joffa = joffd
7885 joffd = joffd + nbloc
7886 GO TO 40
7887 END IF
7888*
7889 tmp1 = n1 - joffd + jja - 1
7890 IF( upper .AND. tmp1.GT.0 )
7891 $ CALL pb_claset( 'All', imbloc, tmp1, 0, alpha, alpha,
7892 $ a( iia+joffd*lda ), lda )
7893*
7894 tmp1 = joffa - jja + 1
7895 m1 = m1 - imbloc
7896 n1 = n1 - tmp1
7897 lcmt00 = lcmt00 - ( iupp - upp + pmb )
7898 mblks = mblks - 1
7899 ioffa = ioffa + imbloc
7900*
7901 IF( lower .AND. m1.GT.0 .AND. tmp1.GT.0 )
7902 $ CALL pb_claset( 'All', m1, tmp1, 0, alpha, alpha,
7903 $ a( ioffa+1+(jja-1)*lda ), lda )
7904*
7905 iia = ioffa + 1
7906 jja = joffa + 1
7907*
7908 END IF
7909*
7910 nbloc = nb
7911 50 CONTINUE
7912 IF( nblks.GT.0 ) THEN
7913 IF( nblks.EQ.1 )
7914 $ nbloc = lnbloc
7915 60 CONTINUE
7916 IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
7917 lcmt00 = lcmt00 - pmb
7918 mblks = mblks - 1
7919 ioffa = ioffa + mb
7920 GO TO 60
7921 END IF
7922*
7923 tmp1 = min( ioffa, iimax ) - iia + 1
7924 IF( upper .AND. tmp1.GT.0 ) THEN
7925 CALL pb_claset( 'All', tmp1, n1, 0, alpha, alpha,
7926 $ a( iia+joffa*lda ), lda )
7927 iia = iia + tmp1
7928 m1 = m1 - tmp1
7929 END IF
7930*
7931 IF( mblks.LE.0 )
7932 $ RETURN
7933*
7934 lcmt = lcmt00
7935 mblkd = mblks
7936 ioffd = ioffa
7937*
7938 mbloc = mb
7939 70 CONTINUE
7940 IF( mblkd.GT.0 .AND. lcmt.GE.low ) THEN
7941 IF( mblkd.EQ.1 )
7942 $ mbloc = lmbloc
7943 CALL pb_claset( uplo, mbloc, nbloc, lcmt, alpha, beta,
7944 $ a( ioffd+1+joffa*lda ), lda )
7945 lcmt00 = lcmt
7946 lcmt = lcmt - pmb
7947 mblks = mblkd
7948 mblkd = mblkd - 1
7949 ioffa = ioffd
7950 ioffd = ioffd + mbloc
7951 GO TO 70
7952 END IF
7953*
7954 tmp1 = m1 - ioffd + iia - 1
7955 IF( lower .AND. tmp1.GT.0 )
7956 $ CALL pb_claset( 'All', tmp1, nbloc, 0, alpha, alpha,
7957 $ a( ioffd+1+joffa*lda ), lda )
7958*
7959 tmp1 = min( ioffa, iimax ) - iia + 1
7960 m1 = m1 - tmp1
7961 n1 = n1 - nbloc
7962 lcmt00 = lcmt00 + qnb
7963 nblks = nblks - 1
7964 joffa = joffa + nbloc
7965*
7966 IF( upper .AND. tmp1.GT.0 .AND. n1.GT.0 )
7967 $ CALL pb_claset( 'All', tmp1, n1, 0, alpha, alpha,
7968 $ a( iia+joffa*lda ), lda )
7969*
7970 iia = ioffa + 1
7971 jja = joffa + 1
7972*
7973 GO TO 50
7974*
7975 END IF
7976*
7977 RETURN
7978*
7979* End of PCLASET
7980*
7981 END
7982 SUBROUTINE pclascal( TYPE, M, N, ALPHA, A, IA, JA, DESCA )
7983*
7984* -- PBLAS test routine (version 2.0) --
7985* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7986* and University of California, Berkeley.
7987* April 1, 1998
7988*
7989* .. Scalar Arguments ..
7990 CHARACTER*1 TYPE
7991 INTEGER IA, JA, M, N
7992 COMPLEX ALPHA
7993* ..
7994* .. Array Arguments ..
7995 INTEGER DESCA( * )
7996 COMPLEX A( * )
7997* ..
7998*
7999* Purpose
8000* =======
8001*
8002* PCLASCAL scales the m by n submatrix A(IA:IA+M-1,JA:JA+N-1) denoted
8003* by sub( A ) by the scalar alpha. TYPE specifies if sub( A ) is full,
8004* upper triangular, lower triangular or upper Hessenberg.
8005*
8006* Notes
8007* =====
8008*
8009* A description vector is associated with each 2D block-cyclicly dis-
8010* tributed matrix. This vector stores the information required to
8011* establish the mapping between a matrix entry and its corresponding
8012* process and memory location.
8013*
8014* In the following comments, the character _ should be read as
8015* "of the distributed matrix". Let A be a generic term for any 2D
8016* block cyclicly distributed matrix. Its description vector is DESCA:
8017*
8018* NOTATION STORED IN EXPLANATION
8019* ---------------- --------------- ------------------------------------
8020* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
8021* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
8022* the NPROW x NPCOL BLACS process grid
8023* A is distributed over. The context
8024* itself is global, but the handle
8025* (the integer value) may vary.
8026* M_A (global) DESCA( M_ ) The number of rows in the distribu-
8027* ted matrix A, M_A >= 0.
8028* N_A (global) DESCA( N_ ) The number of columns in the distri-
8029* buted matrix A, N_A >= 0.
8030* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
8031* block of the matrix A, IMB_A > 0.
8032* INB_A (global) DESCA( INB_ ) The number of columns of the upper
8033* left block of the matrix A,
8034* INB_A > 0.
8035* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
8036* bute the last M_A-IMB_A rows of A,
8037* MB_A > 0.
8038* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
8039* bute the last N_A-INB_A columns of
8040* A, NB_A > 0.
8041* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
8042* row of the matrix A is distributed,
8043* NPROW > RSRC_A >= 0.
8044* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
8045* first column of A is distributed.
8046* NPCOL > CSRC_A >= 0.
8047* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
8048* array storing the local blocks of
8049* the distributed matrix A,
8050* IF( Lc( 1, N_A ) > 0 )
8051* LLD_A >= MAX( 1, Lr( 1, M_A ) )
8052* ELSE
8053* LLD_A >= 1.
8054*
8055* Let K be the number of rows of a matrix A starting at the global in-
8056* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
8057* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
8058* receive if these K rows were distributed over NPROW processes. If K
8059* is the number of columns of a matrix A starting at the global index
8060* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
8061* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
8062* these K columns were distributed over NPCOL processes.
8063*
8064* The values of Lr() and Lc() may be determined via a call to the func-
8065* tion PB_NUMROC:
8066* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
8067* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
8068*
8069* Arguments
8070* =========
8071*
8072* TYPE (global input) CHARACTER*1
8073* On entry, TYPE specifies the type of the input submatrix as
8074* follows:
8075* = 'L' or 'l': sub( A ) is a lower triangular matrix,
8076* = 'U' or 'u': sub( A ) is an upper triangular matrix,
8077* = 'H' or 'h': sub( A ) is an upper Hessenberg matrix,
8078* otherwise sub( A ) is a full matrix.
8079*
8080* M (global input) INTEGER
8081* On entry, M specifies the number of rows of the submatrix
8082* sub( A ). M must be at least zero.
8083*
8084* N (global input) INTEGER
8085* On entry, N specifies the number of columns of the submatrix
8086* sub( A ). N must be at least zero.
8087*
8088* ALPHA (global input) COMPLEX
8089* On entry, ALPHA specifies the scalar alpha.
8090*
8091* A (local input/local output) COMPLEX array
8092* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
8093* at least Lc( 1, JA+N-1 ). Before entry, this array contains
8094* the local entries of the matrix A.
8095* On exit, the local entries of this array corresponding to the
8096* to the entries of the submatrix sub( A ) are overwritten by
8097* the local entries of the m by n scaled submatrix.
8098*
8099* IA (global input) INTEGER
8100* On entry, IA specifies A's global row index, which points to
8101* the beginning of the submatrix sub( A ).
8102*
8103* JA (global input) INTEGER
8104* On entry, JA specifies A's global column index, which points
8105* to the beginning of the submatrix sub( A ).
8106*
8107* DESCA (global and local input) INTEGER array
8108* On entry, DESCA is an integer array of dimension DLEN_. This
8109* is the array descriptor for the matrix A.
8110*
8111* -- Written on April 1, 1998 by
8112* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
8113*
8114* =====================================================================
8115*
8116* .. Parameters ..
8117 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
8118 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
8119 $ RSRC_
8120 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
8121 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
8122 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
8123 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
8124* ..
8125* .. Local Scalars ..
8126 CHARACTER*1 UPLO
8127 LOGICAL GODOWN, GOLEFT, LOWER, UPPER
8128 INTEGER IACOL, IAROW, ICTXT, IIA, IIMAX, ILOW, IMB1,
8129 $ IMBLOC, INB1, INBLOC, IOFFA, IOFFD, ITYPE,
8130 $ IUPP, JJA, JJMAX, JOFFA, JOFFD, LCMT, LCMT00,
8131 $ LDA, LMBLOC, LNBLOC, LOW, M1, MB, MBLKD, MBLKS,
8132 $ MBLOC, MP, MRCOL, MRROW, MYCOL, MYROW, N1, NB,
8133 $ NBLKD, NBLKS, NBLOC, NPCOL, NPROW, NQ, PMB,
8134 $ QNB, TMP1, UPP
8135* ..
8136* .. Local Arrays ..
8137 INTEGER DESCA2( DLEN_ )
8138* ..
8139* .. External Subroutines ..
8140 EXTERNAL blacs_gridinfo, pb_ainfog2l, pb_binfo,
8142* ..
8143* .. External Functions ..
8144 LOGICAL LSAME
8145 INTEGER PB_NUMROC
8146 EXTERNAL lsame, pb_numroc
8147* ..
8148* .. Intrinsic Functions ..
8149 INTRINSIC min
8150* ..
8151* .. Executable Statements ..
8152*
8153* Convert descriptor
8154*
8155 CALL pb_desctrans( desca, desca2 )
8156*
8157* Get grid parameters
8158*
8159 ictxt = desca2( ctxt_ )
8160 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
8161*
8162* Quick return if possible
8163*
8164 IF( m.EQ.0 .OR. n.EQ.0 )
8165 $ RETURN
8166*
8167 IF( lsame( TYPE, 'L' ) ) then
8168 itype = 1
8169 uplo = TYPE
8170 upper = .false.
8171 lower = .true.
8172 ioffd = 0
8173 ELSE IF( lsame( TYPE, 'U' ) ) then
8174 itype = 2
8175 uplo = TYPE
8176 upper = .true.
8177 lower = .false.
8178 ioffd = 0
8179 ELSE IF( lsame( TYPE, 'H' ) ) then
8180 itype = 3
8181 uplo = 'U'
8182 upper = .true.
8183 lower = .false.
8184 ioffd = 1
8185 ELSE
8186 itype = 0
8187 uplo = 'A'
8188 upper = .true.
8189 lower = .true.
8190 ioffd = 0
8191 END IF
8192*
8193* Compute local indexes
8194*
8195 IF( itype.EQ.0 ) THEN
8196*
8197* Full matrix
8198*
8199 CALL pb_infog2l( ia, ja, desca2, nprow, npcol, myrow, mycol,
8200 $ iia, jja, iarow, iacol )
8201 mp = pb_numroc( m, ia, desca2( imb_ ), desca2( mb_ ), myrow,
8202 $ desca2( rsrc_ ), nprow )
8203 nq = pb_numroc( n, ja, desca2( inb_ ), desca2( nb_ ), mycol,
8204 $ desca2( csrc_ ), npcol )
8205*
8206 IF( mp.LE.0 .OR. nq.LE.0 )
8207 $ RETURN
8208*
8209 lda = desca2( lld_ )
8210 ioffa = iia + ( jja - 1 ) * lda
8211*
8212 CALL pb_clascal( 'All', mp, nq, 0, alpha, a( ioffa ), lda )
8213*
8214 ELSE
8215*
8216* Trapezoidal matrix
8217*
8218 CALL pb_ainfog2l( m, n, ia, ja, desca2, nprow, npcol, myrow,
8219 $ mycol, imb1, inb1, mp, nq, iia, jja, iarow,
8220 $ iacol, mrrow, mrcol )
8221*
8222 IF( mp.LE.0 .OR. nq.LE.0 )
8223 $ RETURN
8224*
8225* Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC,
8226* LNBLOC, ILOW, LOW, IUPP, and UPP.
8227*
8228 mb = desca2( mb_ )
8229 nb = desca2( nb_ )
8230 lda = desca2( lld_ )
8231*
8232 CALL pb_binfo( ioffd, mp, nq, imb1, inb1, mb, nb, mrrow,
8233 $ mrcol, lcmt00, mblks, nblks, imbloc, inbloc,
8234 $ lmbloc, lnbloc, ilow, low, iupp, upp )
8235*
8236 m1 = mp
8237 n1 = nq
8238 ioffa = iia - 1
8239 joffa = jja - 1
8240 iimax = ioffa + mp
8241 jjmax = joffa + nq
8242*
8243 IF( desca2( rsrc_ ).LT.0 ) THEN
8244 pmb = mb
8245 ELSE
8246 pmb = nprow * mb
8247 END IF
8248 IF( desca2( csrc_ ).LT.0 ) THEN
8249 qnb = nb
8250 ELSE
8251 qnb = npcol * nb
8252 END IF
8253*
8254* Handle the first block of rows or columns separately, and
8255* update LCMT00, MBLKS and NBLKS.
8256*
8257 godown = ( lcmt00.GT.iupp )
8258 goleft = ( lcmt00.LT.ilow )
8259*
8260 IF( .NOT.godown .AND. .NOT.goleft ) THEN
8261*
8262* LCMT00 >= ILOW && LCMT00 <= IUPP
8263*
8264 goleft = ( ( lcmt00 - ( iupp - upp + pmb ) ).LT.ilow )
8265 godown = .NOT.goleft
8266*
8267 CALL pb_clascal( uplo, imbloc, inbloc, lcmt00, alpha,
8268 $ a( iia+joffa*lda ), lda )
8269 IF( godown ) THEN
8270 IF( upper .AND. nq.GT.inbloc )
8271 $ CALL pb_clascal( 'All', imbloc, nq-inbloc, 0, alpha,
8272 $ a( iia+(joffa+inbloc)*lda ), lda )
8273 iia = iia + imbloc
8274 m1 = m1 - imbloc
8275 ELSE
8276 IF( lower .AND. mp.GT.imbloc )
8277 $ CALL pb_clascal( 'All', mp-imbloc, inbloc, 0, alpha,
8278 $ a( iia+imbloc+joffa*lda ), lda )
8279 jja = jja + inbloc
8280 n1 = n1 - inbloc
8281 END IF
8282*
8283 END IF
8284*
8285 IF( godown ) THEN
8286*
8287 lcmt00 = lcmt00 - ( iupp - upp + pmb )
8288 mblks = mblks - 1
8289 ioffa = ioffa + imbloc
8290*
8291 10 CONTINUE
8292 IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
8293 lcmt00 = lcmt00 - pmb
8294 mblks = mblks - 1
8295 ioffa = ioffa + mb
8296 GO TO 10
8297 END IF
8298*
8299 tmp1 = min( ioffa, iimax ) - iia + 1
8300 IF( upper .AND. tmp1.GT.0 ) THEN
8301 CALL pb_clascal( 'All', tmp1, n1, 0, alpha,
8302 $ a( iia+joffa*lda ), lda )
8303 iia = iia + tmp1
8304 m1 = m1 - tmp1
8305 END IF
8306*
8307 IF( mblks.LE.0 )
8308 $ RETURN
8309*
8310 lcmt = lcmt00
8311 mblkd = mblks
8312 ioffd = ioffa
8313*
8314 mbloc = mb
8315 20 CONTINUE
8316 IF( mblkd.GT.0 .AND. lcmt.GE.ilow ) THEN
8317 IF( mblkd.EQ.1 )
8318 $ mbloc = lmbloc
8319 CALL pb_clascal( uplo, mbloc, inbloc, lcmt, alpha,
8320 $ a( ioffd+1+joffa*lda ), lda )
8321 lcmt00 = lcmt
8322 lcmt = lcmt - pmb
8323 mblks = mblkd
8324 mblkd = mblkd - 1
8325 ioffa = ioffd
8326 ioffd = ioffd + mbloc
8327 GO TO 20
8328 END IF
8329*
8330 tmp1 = m1 - ioffd + iia - 1
8331 IF( lower .AND. tmp1.GT.0 )
8332 $ CALL pb_clascal( 'All', tmp1, inbloc, 0, alpha,
8333 $ a( ioffd+1+joffa*lda ), lda )
8334*
8335 tmp1 = ioffa - iia + 1
8336 m1 = m1 - tmp1
8337 n1 = n1 - inbloc
8338 lcmt00 = lcmt00 + low - ilow + qnb
8339 nblks = nblks - 1
8340 joffa = joffa + inbloc
8341*
8342 IF( upper .AND. tmp1.GT.0 .AND. n1.GT.0 )
8343 $ CALL pb_clascal( 'All', tmp1, n1, 0, alpha,
8344 $ a( iia+joffa*lda ), lda )
8345*
8346 iia = ioffa + 1
8347 jja = joffa + 1
8348*
8349 ELSE IF( goleft ) THEN
8350*
8351 lcmt00 = lcmt00 + low - ilow + qnb
8352 nblks = nblks - 1
8353 joffa = joffa + inbloc
8354*
8355 30 CONTINUE
8356 IF( nblks.GT.0 .AND. lcmt00.LT.low ) THEN
8357 lcmt00 = lcmt00 + qnb
8358 nblks = nblks - 1
8359 joffa = joffa + nb
8360 GO TO 30
8361 END IF
8362*
8363 tmp1 = min( joffa, jjmax ) - jja + 1
8364 IF( lower .AND. tmp1.GT.0 ) THEN
8365 CALL pb_clascal( 'All', m1, tmp1, 0, alpha,
8366 $ a( iia+(jja-1)*lda ), lda )
8367 jja = jja + tmp1
8368 n1 = n1 - tmp1
8369 END IF
8370*
8371 IF( nblks.LE.0 )
8372 $ RETURN
8373*
8374 lcmt = lcmt00
8375 nblkd = nblks
8376 joffd = joffa
8377*
8378 nbloc = nb
8379 40 CONTINUE
8380 IF( nblkd.GT.0 .AND. lcmt.LE.iupp ) THEN
8381 IF( nblkd.EQ.1 )
8382 $ nbloc = lnbloc
8383 CALL pb_clascal( uplo, imbloc, nbloc, lcmt, alpha,
8384 $ a( iia+joffd*lda ), lda )
8385 lcmt00 = lcmt
8386 lcmt = lcmt + qnb
8387 nblks = nblkd
8388 nblkd = nblkd - 1
8389 joffa = joffd
8390 joffd = joffd + nbloc
8391 GO TO 40
8392 END IF
8393*
8394 tmp1 = n1 - joffd + jja - 1
8395 IF( upper .AND. tmp1.GT.0 )
8396 $ CALL pb_clascal( 'All', imbloc, tmp1, 0, alpha,
8397 $ a( iia+joffd*lda ), lda )
8398*
8399 tmp1 = joffa - jja + 1
8400 m1 = m1 - imbloc
8401 n1 = n1 - tmp1
8402 lcmt00 = lcmt00 - ( iupp - upp + pmb )
8403 mblks = mblks - 1
8404 ioffa = ioffa + imbloc
8405*
8406 IF( lower .AND. m1.GT.0 .AND. tmp1.GT.0 )
8407 $ CALL pb_clascal( 'All', m1, tmp1, 0, alpha,
8408 $ a( ioffa+1+(jja-1)*lda ), lda )
8409*
8410 iia = ioffa + 1
8411 jja = joffa + 1
8412*
8413 END IF
8414*
8415 nbloc = nb
8416 50 CONTINUE
8417 IF( nblks.GT.0 ) THEN
8418 IF( nblks.EQ.1 )
8419 $ nbloc = lnbloc
8420 60 CONTINUE
8421 IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
8422 lcmt00 = lcmt00 - pmb
8423 mblks = mblks - 1
8424 ioffa = ioffa + mb
8425 GO TO 60
8426 END IF
8427*
8428 tmp1 = min( ioffa, iimax ) - iia + 1
8429 IF( upper .AND. tmp1.GT.0 ) THEN
8430 CALL pb_clascal( 'All', tmp1, n1, 0, alpha,
8431 $ a( iia+joffa*lda ), lda )
8432 iia = iia + tmp1
8433 m1 = m1 - tmp1
8434 END IF
8435*
8436 IF( mblks.LE.0 )
8437 $ RETURN
8438*
8439 lcmt = lcmt00
8440 mblkd = mblks
8441 ioffd = ioffa
8442*
8443 mbloc = mb
8444 70 CONTINUE
8445 IF( mblkd.GT.0 .AND. lcmt.GE.low ) THEN
8446 IF( mblkd.EQ.1 )
8447 $ mbloc = lmbloc
8448 CALL pb_clascal( uplo, mbloc, nbloc, lcmt, alpha,
8449 $ a( ioffd+1+joffa*lda ), lda )
8450 lcmt00 = lcmt
8451 lcmt = lcmt - pmb
8452 mblks = mblkd
8453 mblkd = mblkd - 1
8454 ioffa = ioffd
8455 ioffd = ioffd + mbloc
8456 GO TO 70
8457 END IF
8458*
8459 tmp1 = m1 - ioffd + iia - 1
8460 IF( lower .AND. tmp1.GT.0 )
8461 $ CALL pb_clascal( 'All', tmp1, nbloc, 0, alpha,
8462 $ a( ioffd+1+joffa*lda ), lda )
8463*
8464 tmp1 = min( ioffa, iimax ) - iia + 1
8465 m1 = m1 - tmp1
8466 n1 = n1 - nbloc
8467 lcmt00 = lcmt00 + qnb
8468 nblks = nblks - 1
8469 joffa = joffa + nbloc
8470*
8471 IF( upper .AND. tmp1.GT.0 .AND. n1.GT.0 )
8472 $ CALL pb_clascal( 'All', tmp1, n1, 0, alpha,
8473 $ a( iia+joffa*lda ), lda )
8474*
8475 iia = ioffa + 1
8476 jja = joffa + 1
8477*
8478 GO TO 50
8479*
8480 END IF
8481*
8482 END IF
8483*
8484 RETURN
8485*
8486* End of PCLASCAL
8487*
8488 END
8489 SUBROUTINE pclagen( INPLACE, AFORM, DIAG, OFFA, M, N, IA, JA,
8490 $ DESCA, IASEED, A, LDA )
8491*
8492* -- PBLAS test routine (version 2.0) --
8493* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8494* and University of California, Berkeley.
8495* April 1, 1998
8496*
8497* .. Scalar Arguments ..
8498 LOGICAL inplace
8499 CHARACTER*1 aform, diag
8500 INTEGER ia, iaseed, ja, lda, m, n, offa
8501* ..
8502* .. Array Arguments ..
8503 INTEGER desca( * )
8504 COMPLEX A( LDA, * )
8505* ..
8506*
8507* Purpose
8508* =======
8509*
8510* PCLAGEN generates (or regenerates) a submatrix sub( A ) denoting
8511* A(IA:IA+M-1,JA:JA+N-1).
8512*
8513* Notes
8514* =====
8515*
8516* A description vector is associated with each 2D block-cyclicly dis-
8517* tributed matrix. This vector stores the information required to
8518* establish the mapping between a matrix entry and its corresponding
8519* process and memory location.
8520*
8521* In the following comments, the character _ should be read as
8522* "of the distributed matrix". Let A be a generic term for any 2D
8523* block cyclicly distributed matrix. Its description vector is DESCA:
8524*
8525* NOTATION STORED IN EXPLANATION
8526* ---------------- --------------- ------------------------------------
8527* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
8528* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
8529* the NPROW x NPCOL BLACS process grid
8530* A is distributed over. The context
8531* itself is global, but the handle
8532* (the integer value) may vary.
8533* M_A (global) DESCA( M_ ) The number of rows in the distribu-
8534* ted matrix A, M_A >= 0.
8535* N_A (global) DESCA( N_ ) The number of columns in the distri-
8536* buted matrix A, N_A >= 0.
8537* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
8538* block of the matrix A, IMB_A > 0.
8539* INB_A (global) DESCA( INB_ ) The number of columns of the upper
8540* left block of the matrix A,
8541* INB_A > 0.
8542* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
8543* bute the last M_A-IMB_A rows of A,
8544* MB_A > 0.
8545* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
8546* bute the last N_A-INB_A columns of
8547* A, NB_A > 0.
8548* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
8549* row of the matrix A is distributed,
8550* NPROW > RSRC_A >= 0.
8551* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
8552* first column of A is distributed.
8553* NPCOL > CSRC_A >= 0.
8554* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
8555* array storing the local blocks of
8556* the distributed matrix A,
8557* IF( Lc( 1, N_A ) > 0 )
8558* LLD_A >= MAX( 1, Lr( 1, M_A ) )
8559* ELSE
8560* LLD_A >= 1.
8561*
8562* Let K be the number of rows of a matrix A starting at the global in-
8563* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
8564* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
8565* receive if these K rows were distributed over NPROW processes. If K
8566* is the number of columns of a matrix A starting at the global index
8567* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
8568* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
8569* these K columns were distributed over NPCOL processes.
8570*
8571* The values of Lr() and Lc() may be determined via a call to the func-
8572* tion PB_NUMROC:
8573* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
8574* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
8575*
8576* Arguments
8577* =========
8578*
8579* INPLACE (global input) LOGICAL
8580* On entry, INPLACE specifies if the matrix should be generated
8581* in place or not. If INPLACE is .TRUE., the local random array
8582* to be generated will start in memory at the local memory lo-
8583* cation A( 1, 1 ), otherwise it will start at the local posi-
8584* tion induced by IA and JA.
8585*
8586* AFORM (global input) CHARACTER*1
8587* On entry, AFORM specifies the type of submatrix to be genera-
8588* ted as follows:
8589* AFORM = 'S', sub( A ) is a symmetric matrix,
8590* AFORM = 'H', sub( A ) is a Hermitian matrix,
8591* AFORM = 'T', sub( A ) is overrwritten with the transpose
8592* of what would normally be generated,
8593* AFORM = 'C', sub( A ) is overwritten with the conjugate
8594* transpose of what would normally be genera-
8595* ted.
8596* AFORM = 'N', a random submatrix is generated.
8597*
8598* DIAG (global input) CHARACTER*1
8599* On entry, DIAG specifies if the generated submatrix is diago-
8600* nally dominant or not as follows:
8601* DIAG = 'D' : sub( A ) is diagonally dominant,
8602* DIAG = 'N' : sub( A ) is not diagonally dominant.
8603*
8604* OFFA (global input) INTEGER
8605* On entry, OFFA specifies the offdiagonal of the underlying
8606* matrix A(1:DESCA(M_),1:DESCA(N_)) of interest when the subma-
8607* trix is symmetric, Hermitian or diagonally dominant. OFFA = 0
8608* specifies the main diagonal, OFFA > 0 specifies a subdiago-
8609* nal, and OFFA < 0 specifies a superdiagonal (see further de-
8610* tails).
8611*
8612* M (global input) INTEGER
8613* On entry, M specifies the global number of matrix rows of the
8614* submatrix sub( A ) to be generated. M must be at least zero.
8615*
8616* N (global input) INTEGER
8617* On entry, N specifies the global number of matrix columns of
8618* the submatrix sub( A ) to be generated. N must be at least
8619* zero.
8620*
8621* IA (global input) INTEGER
8622* On entry, IA specifies A's global row index, which points to
8623* the beginning of the submatrix sub( A ).
8624*
8625* JA (global input) INTEGER
8626* On entry, JA specifies A's global column index, which points
8627* to the beginning of the submatrix sub( A ).
8628*
8629* DESCA (global and local input) INTEGER array
8630* On entry, DESCA is an integer array of dimension DLEN_. This
8631* is the array descriptor for the matrix A.
8632*
8633* IASEED (global input) INTEGER
8634* On entry, IASEED specifies the seed number to generate the
8635* matrix A. IASEED must be at least zero.
8636*
8637* A (local output) COMPLEX array
8638* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
8639* at least Lc( 1, JA+N-1 ). On exit, this array contains the
8640* local entries of the randomly generated submatrix sub( A ).
8641*
8642* LDA (local input) INTEGER
8643* On entry, LDA specifies the local leading dimension of the
8644* array A. When INPLACE is .FALSE., LDA is usually DESCA(LLD_).
8645* This restriction is however not enforced, and this subroutine
8646* requires only that LDA >= MAX( 1, Mp ) where
8647*
8648* Mp = PB_NUMROC( M, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ).
8649*
8650* PB_NUMROC is a ScaLAPACK tool function; MYROW, MYCOL, NPROW
8651* and NPCOL can be determined by calling the BLACS subroutine
8652* BLACS_GRIDINFO.
8653*
8654* Further Details
8655* ===============
8656*
8657* OFFD is tied to the matrix described by DESCA, as opposed to the
8658* piece that is currently (re)generated. This is a global information
8659* independent from the distribution parameters. Below are examples of
8660* the meaning of OFFD for a global 7 by 5 matrix:
8661*
8662* ---------------------------------------------------------------------
8663* OFFD | 0 -1 -2 -3 -4 0 -1 -2 -3 -4 0 -1 -2 -3 -4
8664* -------|-------------------------------------------------------------
8665* | | OFFD=-1 | OFFD=0 OFFD=2
8666* | V V
8667* 0 | . d . . . -> d . . . . . . . . .
8668* 1 | . . d . . . d . . . . . . . .
8669* 2 | . . . d . . . d . . -> d . . . .
8670* 3 | . . . . d . . . d . . d . . .
8671* 4 | . . . . . . . . . d . . d . .
8672* 5 | . . . . . . . . . . . . . d .
8673* 6 | . . . . . . . . . . . . . . d
8674* ---------------------------------------------------------------------
8675*
8676* -- Written on April 1, 1998 by
8677* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
8678*
8679* =====================================================================
8680*
8681* .. Parameters ..
8682 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
8683 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
8684 $ RSRC_
8685 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
8686 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
8687 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
8688 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
8689 INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN,
8690 $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB,
8691 $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW
8692 PARAMETER ( JMP_1 = 1, jmp_row = 2, jmp_col = 3,
8693 $ jmp_mb = 4, jmp_imbv = 5, jmp_npmb = 6,
8694 $ jmp_npimbloc = 7, jmp_nb = 8, jmp_inbv = 9,
8695 $ jmp_nqnb = 10, jmp_nqinbloc = 11,
8696 $ jmp_len = 11 )
8697 REAL ZERO
8698 PARAMETER ( ZERO = 0.0e+0 )
8699* ..
8700* .. Local Scalars ..
8701 LOGICAL DIAGDO, SYMM, HERM, NOTRAN
8702 INTEGER CSRC, I, IACOL, IAROW, ICTXT, IIA, ILOCBLK,
8703 $ ILOCOFF, ILOW, IMB, IMB1, IMBLOC, IMBVIR, INB,
8704 $ INB1, INBLOC, INBVIR, INFO, IOFFDA, ITMP, IUPP,
8705 $ ivir, jja, jlocblk, jlocoff, jvir, lcmt00,
8706 $ lmbloc, lnbloc, low, maxmn, mb, mblks, mp,
8707 $ mrcol, mrrow, mycdist, mycol, myrdist, myrow,
8708 $ nb, nblks, npcol, nprow, nq, nvir, rsrc, upp
8709 COMPLEX ALPHA
8710* ..
8711* .. Local Arrays ..
8712 INTEGER DESCA2( DLEN_ ), IMULADD( 4, JMP_LEN ),
8713 $ IRAN( 2 ), JMP( JMP_LEN ), MULADD0( 4 )
8714* ..
8715* .. External Subroutines ..
8716 EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO,
8720* ..
8721* .. External Functions ..
8722 LOGICAL LSAME
8723 EXTERNAL LSAME
8724* ..
8725* .. Intrinsic Functions ..
8726 INTRINSIC CMPLX, MAX, MIN, REAL
8727* ..
8728* .. Data Statements ..
8729 DATA ( muladd0( i ), i = 1, 4 ) / 20077, 16838,
8730 $ 12345, 0 /
8731* ..
8732* .. Executable Statements ..
8733*
8734* Convert descriptor
8735*
8736 CALL pb_desctrans( desca, desca2 )
8737*
8738* Test the input arguments
8739*
8740 ictxt = desca2( ctxt_ )
8741 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
8742*
8743* Test the input parameters
8744*
8745 info = 0
8746 IF( nprow.EQ.-1 ) THEN
8747 info = -( 1000 + ctxt_ )
8748 ELSE
8749 symm = lsame( aform, 'S' )
8750 herm = lsame( aform, 'H' )
8751 notran = lsame( aform, 'N' )
8752 diagdo = lsame( diag, 'D' )
8753 IF( .NOT.( symm.OR.herm.OR.notran ) .AND.
8754 $ .NOT.( lsame( aform, 'T' ) ) .AND.
8755 $ .NOT.( lsame( aform, 'C' ) ) ) THEN
8756 info = -2
8757 ELSE IF( ( .NOT.diagdo ) .AND.
8758 $ ( .NOT.lsame( diag, 'N' ) ) ) THEN
8759 info = -3
8760 END IF
8761 CALL pb_chkmat( ictxt, m, 5, n, 6, ia, ja, desca2, 10, info )
8762 END IF
8763*
8764 IF( info.NE.0 ) THEN
8765 CALL pxerbla( ictxt, 'PCLAGEN', -info )
8766 RETURN
8767 END IF
8768*
8769* Quick return if possible
8770*
8771 IF( ( m.LE.0 ).OR.( n.LE.0 ) )
8772 $ RETURN
8773*
8774* Start the operations
8775*
8776 mb = desca2( mb_ )
8777 nb = desca2( nb_ )
8778 imb = desca2( imb_ )
8779 inb = desca2( inb_ )
8780 rsrc = desca2( rsrc_ )
8781 csrc = desca2( csrc_ )
8782*
8783* Figure out local information about the distributed matrix operand
8784*
8785 CALL pb_ainfog2l( m, n, ia, ja, desca2, nprow, npcol, myrow,
8786 $ mycol, imb1, inb1, mp, nq, iia, jja, iarow,
8787 $ iacol, mrrow, mrcol )
8788*
8789* Decide where the entries shall be stored in memory
8790*
8791 IF( inplace ) THEN
8792 iia = 1
8793 jja = 1
8794 END IF
8795*
8796* Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC,
8797* ILOW, LOW, IUPP, and UPP.
8798*
8799 ioffda = ja + offa - ia
8800 CALL pb_binfo( ioffda, mp, nq, imb1, inb1, mb, nb, mrrow,
8801 $ mrcol, lcmt00, mblks, nblks, imbloc, inbloc,
8802 $ lmbloc, lnbloc, ilow, low, iupp, upp )
8803*
8804* Initialize ILOCBLK, ILOCOFF, MYRDIST, JLOCBLK, JLOCOFF, MYCDIST
8805* This values correspond to the square virtual underlying matrix
8806* of size MAX( M_ + MAX( 0, -OFFA ), N_ + MAX( 0, OFFA ) ) used
8807* to set up the random sequence. For practical purposes, the size
8808* of this virtual matrix is upper bounded by M_ + N_ - 1.
8809*
8810 itmp = max( 0, -offa )
8811 ivir = ia + itmp
8812 imbvir = imb + itmp
8813 nvir = desca2( m_ ) + itmp
8814*
8815 CALL pb_locinfo( ivir, imbvir, mb, myrow, rsrc, nprow, ilocblk,
8816 $ ilocoff, myrdist )
8817*
8818 itmp = max( 0, offa )
8819 jvir = ja + itmp
8820 inbvir = inb + itmp
8821 nvir = max( max( nvir, desca2( n_ ) + itmp ),
8822 $ desca2( m_ ) + desca2( n_ ) - 1 )
8823*
8824 CALL pb_locinfo( jvir, inbvir, nb, mycol, csrc, npcol, jlocblk,
8825 $ jlocoff, mycdist )
8826*
8827 IF( symm .OR. herm .OR. notran ) THEN
8828*
8829 CALL pb_initjmp( .true., nvir, imbvir, inbvir, imbloc, inbloc,
8830 $ mb, nb, rsrc, csrc, nprow, npcol, 2, jmp )
8831*
8832* Compute constants to jump JMP( * ) numbers in the sequence
8833*
8834 CALL pb_initmuladd( muladd0, jmp, imuladd )
8835*
8836* Compute and set the random value corresponding to A( IA, JA )
8837*
8838 CALL pb_setlocran( iaseed, ilocblk, jlocblk, ilocoff, jlocoff,
8839 $ myrdist, mycdist, nprow, npcol, jmp,
8840 $ imuladd, iran )
8841*
8842 CALL pb_clagen( 'Lower', aform, a( iia, jja ), lda, lcmt00,
8843 $ iran, mblks, imbloc, mb, lmbloc, nblks, inbloc,
8844 $ nb, lnbloc, jmp, imuladd )
8845*
8846 END IF
8847*
8848 IF( symm .OR. herm .OR. ( .NOT. notran ) ) THEN
8849*
8850 CALL pb_initjmp( .false., nvir, imbvir, inbvir, imbloc, inbloc,
8851 $ mb, nb, rsrc, csrc, nprow, npcol, 2, jmp )
8852*
8853* Compute constants to jump JMP( * ) numbers in the sequence
8854*
8855 CALL pb_initmuladd( muladd0, jmp, imuladd )
8856*
8857* Compute and set the random value corresponding to A( IA, JA )
8858*
8859 CALL pb_setlocran( iaseed, ilocblk, jlocblk, ilocoff, jlocoff,
8860 $ myrdist, mycdist, nprow, npcol, jmp,
8861 $ imuladd, iran )
8862*
8863 CALL pb_clagen( 'Upper', aform, a( iia, jja ), lda, lcmt00,
8864 $ iran, mblks, imbloc, mb, lmbloc, nblks, inbloc,
8865 $ nb, lnbloc, jmp, imuladd )
8866*
8867 END IF
8868*
8869 IF( diagdo ) THEN
8870*
8871 maxmn = max( desca2( m_ ), desca2( n_ ) )
8872 IF( herm ) THEN
8873 alpha = cmplx( real( 2 * maxmn ), zero )
8874 ELSE
8875 alpha = cmplx( real( maxmn ), real( maxmn ) )
8876 END IF
8877*
8878 IF( ioffda.GE.0 ) THEN
8879 CALL pcladom( inplace, min( max( 0, m-ioffda ), n ), alpha,
8880 $ a, min( ia+ioffda, ia+m-1 ), ja, desca )
8881 ELSE
8882 CALL pcladom( inplace, min( m, max( 0, n+ioffda ) ), alpha,
8883 $ a, ia, min( ja-ioffda, ja+n-1 ), desca )
8884 END IF
8885*
8886 END IF
8887*
8888 RETURN
8889*
8890* End of PCLAGEN
8891*
8892 END
8893 SUBROUTINE pcladom( INPLACE, N, ALPHA, A, IA, JA, DESCA )
8894*
8895* -- PBLAS test routine (version 2.0) --
8896* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8897* and University of California, Berkeley.
8898* April 1, 1998
8899*
8900* .. Scalar Arguments ..
8901 LOGICAL INPLACE
8902 INTEGER IA, JA, N
8903 COMPLEX ALPHA
8904* ..
8905* .. Array Arguments ..
8906 INTEGER DESCA( * )
8907 COMPLEX A( * )
8908* ..
8909*
8910* Purpose
8911* =======
8912*
8913* PCLADOM adds alpha to the diagonal entries of an n by n submatrix
8914* sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ).
8915*
8916* Notes
8917* =====
8918*
8919* A description vector is associated with each 2D block-cyclicly dis-
8920* tributed matrix. This vector stores the information required to
8921* establish the mapping between a matrix entry and its corresponding
8922* process and memory location.
8923*
8924* In the following comments, the character _ should be read as
8925* "of the distributed matrix". Let A be a generic term for any 2D
8926* block cyclicly distributed matrix. Its description vector is DESCA:
8927*
8928* NOTATION STORED IN EXPLANATION
8929* ---------------- --------------- ------------------------------------
8930* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
8931* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
8932* the NPROW x NPCOL BLACS process grid
8933* A is distributed over. The context
8934* itself is global, but the handle
8935* (the integer value) may vary.
8936* M_A (global) DESCA( M_ ) The number of rows in the distribu-
8937* ted matrix A, M_A >= 0.
8938* N_A (global) DESCA( N_ ) The number of columns in the distri-
8939* buted matrix A, N_A >= 0.
8940* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
8941* block of the matrix A, IMB_A > 0.
8942* INB_A (global) DESCA( INB_ ) The number of columns of the upper
8943* left block of the matrix A,
8944* INB_A > 0.
8945* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
8946* bute the last M_A-IMB_A rows of A,
8947* MB_A > 0.
8948* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
8949* bute the last N_A-INB_A columns of
8950* A, NB_A > 0.
8951* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
8952* row of the matrix A is distributed,
8953* NPROW > RSRC_A >= 0.
8954* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
8955* first column of A is distributed.
8956* NPCOL > CSRC_A >= 0.
8957* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
8958* array storing the local blocks of
8959* the distributed matrix A,
8960* IF( Lc( 1, N_A ) > 0 )
8961* LLD_A >= MAX( 1, Lr( 1, M_A ) )
8962* ELSE
8963* LLD_A >= 1.
8964*
8965* Let K be the number of rows of a matrix A starting at the global in-
8966* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
8967* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
8968* receive if these K rows were distributed over NPROW processes. If K
8969* is the number of columns of a matrix A starting at the global index
8970* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
8971* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
8972* these K columns were distributed over NPCOL processes.
8973*
8974* The values of Lr() and Lc() may be determined via a call to the func-
8975* tion PB_NUMROC:
8976* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
8977* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
8978*
8979* Arguments
8980* =========
8981*
8982* INPLACE (global input) LOGICAL
8983* On entry, INPLACE specifies if the matrix should be generated
8984* in place or not. If INPLACE is .TRUE., the local random array
8985* to be generated will start in memory at the local memory lo-
8986* cation A( 1, 1 ), otherwise it will start at the local posi-
8987* tion induced by IA and JA.
8988*
8989* N (global input) INTEGER
8990* On entry, N specifies the global order of the submatrix
8991* sub( A ) to be modified. N must be at least zero.
8992*
8993* ALPHA (global input) COMPLEX
8994* On entry, ALPHA specifies the scalar alpha.
8995*
8996* A (local input/local output) COMPLEX array
8997* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
8998* at least Lc( 1, JA+N-1 ). Before entry, this array contains
8999* the local entries of the matrix A. On exit, the local entries
9000* of this array corresponding to the main diagonal of sub( A )
9001* have been updated.
9002*
9003* IA (global input) INTEGER
9004* On entry, IA specifies A's global row index, which points to
9005* the beginning of the submatrix sub( A ).
9006*
9007* JA (global input) INTEGER
9008* On entry, JA specifies A's global column index, which points
9009* to the beginning of the submatrix sub( A ).
9010*
9011* DESCA (global and local input) INTEGER array
9012* On entry, DESCA is an integer array of dimension DLEN_. This
9013* is the array descriptor for the matrix A.
9014*
9015* -- Written on April 1, 1998 by
9016* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
9017*
9018* =====================================================================
9019*
9020* .. Parameters ..
9021 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
9022 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
9023 $ RSRC_
9024 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
9025 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
9026 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
9027 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
9028* ..
9029* .. Local Scalars ..
9030 LOGICAL GODOWN, GOLEFT
9031 INTEGER I, IACOL, IAROW, ICTXT, IIA, IJOFFA, ILOW,
9032 $ IMB1, IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP,
9033 $ JJA, JOFFA, JOFFD, LCMT, LCMT00, LDA, LDAP1,
9034 $ LMBLOC, LNBLOC, LOW, MB, MBLKD, MBLKS, MBLOC,
9035 $ MRCOL, MRROW, MYCOL, MYROW, NB, NBLKD, NBLKS,
9036 $ NBLOC, NP, NPCOL, NPROW, NQ, PMB, QNB, UPP
9037 COMPLEX ATMP
9038* ..
9039* .. Local Scalars ..
9040 INTEGER DESCA2( DLEN_ )
9041* ..
9042* .. External Subroutines ..
9043 EXTERNAL blacs_gridinfo, pb_ainfog2l, pb_binfo,
9044 $ pb_desctrans
9045* ..
9046* .. Intrinsic Functions ..
9047 INTRINSIC abs, aimag, cmplx, max, min, real
9048* ..
9049* .. Executable Statements ..
9050*
9051* Convert descriptor
9052*
9053 CALL pb_desctrans( desca, desca2 )
9054*
9055* Get grid parameters
9056*
9057 ictxt = desca2( ctxt_ )
9058 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
9059*
9060 IF( n.EQ.0 )
9061 $ RETURN
9062*
9063 CALL pb_ainfog2l( n, n, ia, ja, desca2, nprow, npcol, myrow,
9064 $ mycol, imb1, inb1, np, nq, iia, jja, iarow,
9065 $ iacol, mrrow, mrcol )
9066*
9067* Decide where the entries shall be stored in memory
9068*
9069 IF( inplace ) THEN
9070 iia = 1
9071 jja = 1
9072 END IF
9073*
9074* Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC,
9075* ILOW, LOW, IUPP, and UPP.
9076*
9077 mb = desca2( mb_ )
9078 nb = desca2( nb_ )
9079*
9080 CALL pb_binfo( 0, np, nq, imb1, inb1, mb, nb, mrrow, mrcol,
9081 $ lcmt00, mblks, nblks, imbloc, inbloc, lmbloc,
9082 $ lnbloc, ilow, low, iupp, upp )
9083*
9084 ioffa = iia - 1
9085 joffa = jja - 1
9086 lda = desca2( lld_ )
9087 ldap1 = lda + 1
9088*
9089 IF( desca2( rsrc_ ).LT.0 ) THEN
9090 pmb = mb
9091 ELSE
9092 pmb = nprow * mb
9093 END IF
9094 IF( desca2( csrc_ ).LT.0 ) THEN
9095 qnb = nb
9096 ELSE
9097 qnb = npcol * nb
9098 END IF
9099*
9100* Handle the first block of rows or columns separately, and update
9101* LCMT00, MBLKS and NBLKS.
9102*
9103 godown = ( lcmt00.GT.iupp )
9104 goleft = ( lcmt00.LT.ilow )
9105*
9106 IF( .NOT.godown .AND. .NOT.goleft ) THEN
9107*
9108* LCMT00 >= ILOW && LCMT00 <= IUPP
9109*
9110 IF( lcmt00.GE.0 ) THEN
9111 ijoffa = ioffa+lcmt00 + ( joffa - 1 ) * lda
9112 DO 10 i = 1, min( inbloc, max( 0, imbloc - lcmt00 ) )
9113 atmp = a( ijoffa + i*ldap1 )
9114 a( ijoffa + i*ldap1 ) = alpha +
9115 $ cmplx( abs( real( atmp ) ),
9116 $ abs( aimag( atmp ) ) )
9117 10 CONTINUE
9118 ELSE
9119 ijoffa = ioffa + ( joffa - lcmt00 - 1 ) * lda
9120 DO 20 i = 1, min( imbloc, max( 0, inbloc + lcmt00 ) )
9121 atmp = a( ijoffa + i*ldap1 )
9122 a( ijoffa + i*ldap1 ) = alpha +
9123 $ cmplx( abs( real( atmp ) ),
9124 $ abs( aimag( atmp ) ) )
9125 20 CONTINUE
9126 END IF
9127 goleft = ( ( lcmt00 - ( iupp - upp + pmb ) ).LT.ilow )
9128 godown = .NOT.goleft
9129*
9130 END IF
9131*
9132 IF( godown ) THEN
9133*
9134 lcmt00 = lcmt00 - ( iupp - upp + pmb )
9135 mblks = mblks - 1
9136 ioffa = ioffa + imbloc
9137*
9138 30 CONTINUE
9139 IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
9140 lcmt00 = lcmt00 - pmb
9141 mblks = mblks - 1
9142 ioffa = ioffa + mb
9143 GO TO 30
9144 END IF
9145*
9146 lcmt = lcmt00
9147 mblkd = mblks
9148 ioffd = ioffa
9149*
9150 mbloc = mb
9151 40 CONTINUE
9152 IF( mblkd.GT.0 .AND. lcmt.GE.ilow ) THEN
9153 IF( mblkd.EQ.1 )
9154 $ mbloc = lmbloc
9155 IF( lcmt.GE.0 ) THEN
9156 ijoffa = ioffd + lcmt + ( joffa - 1 ) * lda
9157 DO 50 i = 1, min( inbloc, max( 0, mbloc - lcmt ) )
9158 atmp = a( ijoffa + i*ldap1 )
9159 a( ijoffa + i*ldap1 ) = alpha +
9160 $ cmplx( abs( real( atmp ) ),
9161 $ abs( aimag( atmp ) ) )
9162 50 CONTINUE
9163 ELSE
9164 ijoffa = ioffd + ( joffa - lcmt - 1 ) * lda
9165 DO 60 i = 1, min( mbloc, max( 0, inbloc + lcmt ) )
9166 atmp = a( ijoffa + i*ldap1 )
9167 a( ijoffa + i*ldap1 ) = alpha +
9168 $ cmplx( abs( real( atmp ) ),
9169 $ abs( aimag( atmp ) ) )
9170 60 CONTINUE
9171 END IF
9172 lcmt00 = lcmt
9173 lcmt = lcmt - pmb
9174 mblks = mblkd
9175 mblkd = mblkd - 1
9176 ioffa = ioffd
9177 ioffd = ioffd + mbloc
9178 GO TO 40
9179 END IF
9180*
9181 lcmt00 = lcmt00 + low - ilow + qnb
9182 nblks = nblks - 1
9183 joffa = joffa + inbloc
9184*
9185 ELSE IF( goleft ) THEN
9186*
9187 lcmt00 = lcmt00 + low - ilow + qnb
9188 nblks = nblks - 1
9189 joffa = joffa + inbloc
9190*
9191 70 CONTINUE
9192 IF( nblks.GT.0 .AND. lcmt00.LT.low ) THEN
9193 lcmt00 = lcmt00 + qnb
9194 nblks = nblks - 1
9195 joffa = joffa + nb
9196 GO TO 70
9197 END IF
9198*
9199 lcmt = lcmt00
9200 nblkd = nblks
9201 joffd = joffa
9202*
9203 nbloc = nb
9204 80 CONTINUE
9205 IF( nblkd.GT.0 .AND. lcmt.LE.iupp ) THEN
9206 IF( nblkd.EQ.1 )
9207 $ nbloc = lnbloc
9208 IF( lcmt.GE.0 ) THEN
9209 ijoffa = ioffa + lcmt + ( joffd - 1 ) * lda
9210 DO 90 i = 1, min( nbloc, max( 0, imbloc - lcmt ) )
9211 atmp = a( ijoffa + i*ldap1 )
9212 a( ijoffa + i*ldap1 ) = alpha +
9213 $ cmplx( abs( real( atmp ) ),
9214 $ abs( aimag( atmp ) ) )
9215 90 CONTINUE
9216 ELSE
9217 ijoffa = ioffa + ( joffd - lcmt - 1 ) * lda
9218 DO 100 i = 1, min( imbloc, max( 0, nbloc + lcmt ) )
9219 atmp = a( ijoffa + i*ldap1 )
9220 a( ijoffa + i*ldap1 ) = alpha +
9221 $ cmplx( abs( real( atmp ) ),
9222 $ abs( aimag( atmp ) ) )
9223 100 CONTINUE
9224 END IF
9225 lcmt00 = lcmt
9226 lcmt = lcmt + qnb
9227 nblks = nblkd
9228 nblkd = nblkd - 1
9229 joffa = joffd
9230 joffd = joffd + nbloc
9231 GO TO 80
9232 END IF
9233*
9234 lcmt00 = lcmt00 - ( iupp - upp + pmb )
9235 mblks = mblks - 1
9236 ioffa = ioffa + imbloc
9237*
9238 END IF
9239*
9240 nbloc = nb
9241 110 CONTINUE
9242 IF( nblks.GT.0 ) THEN
9243 IF( nblks.EQ.1 )
9244 $ nbloc = lnbloc
9245 120 CONTINUE
9246 IF( mblks.GT.0 .AND. lcmt00.GT.upp ) THEN
9247 lcmt00 = lcmt00 - pmb
9248 mblks = mblks - 1
9249 ioffa = ioffa + mb
9250 GO TO 120
9251 END IF
9252*
9253 lcmt = lcmt00
9254 mblkd = mblks
9255 ioffd = ioffa
9256*
9257 mbloc = mb
9258 130 CONTINUE
9259 IF( mblkd.GT.0 .AND. lcmt.GE.low ) THEN
9260 IF( mblkd.EQ.1 )
9261 $ mbloc = lmbloc
9262 IF( lcmt.GE.0 ) THEN
9263 ijoffa = ioffd + lcmt + ( joffa - 1 ) * lda
9264 DO 140 i = 1, min( nbloc, max( 0, mbloc - lcmt ) )
9265 atmp = a( ijoffa + i*ldap1 )
9266 a( ijoffa + i*ldap1 ) = alpha +
9267 $ cmplx( abs( real( atmp ) ),
9268 $ abs( aimag( atmp ) ) )
9269 140 CONTINUE
9270 ELSE
9271 ijoffa = ioffd + ( joffa - lcmt - 1 ) * lda
9272 DO 150 i = 1, min( mbloc, max( 0, nbloc + lcmt ) )
9273 atmp = a( ijoffa + i*ldap1 )
9274 a( ijoffa + i*ldap1 ) = alpha +
9275 $ cmplx( abs( real( atmp ) ),
9276 $ abs( aimag( atmp ) ) )
9277 150 CONTINUE
9278 END IF
9279 lcmt00 = lcmt
9280 lcmt = lcmt - pmb
9281 mblks = mblkd
9282 mblkd = mblkd - 1
9283 ioffa = ioffd
9284 ioffd = ioffd + mbloc
9285 GO TO 130
9286 END IF
9287*
9288 lcmt00 = lcmt00 + qnb
9289 nblks = nblks - 1
9290 joffa = joffa + nbloc
9291 GO TO 110
9292*
9293 END IF
9294*
9295 RETURN
9296*
9297* End of PCLADOM
9298*
9299 END
9300 SUBROUTINE pb_pclaprnt( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT,
9301 $ CMATNM, NOUT, WORK )
9302*
9303* -- PBLAS test routine (version 2.0) --
9304* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9305* and University of California, Berkeley.
9306* April 1, 1998
9307*
9308* .. Scalar Arguments ..
9309 INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT
9310* ..
9311* .. Array Arguments ..
9312 CHARACTER*(*) CMATNM
9313 INTEGER DESCA( * )
9314 COMPLEX A( * ), WORK( * )
9315* ..
9316*
9317* Purpose
9318* =======
9319*
9320* PB_PCLAPRNT prints to the standard output a submatrix sub( A ) deno-
9321* ting A(IA:IA+M-1,JA:JA+N-1). The local pieces are sent and printed by
9322* the process of coordinates (IRPRNT, ICPRNT).
9323*
9324* Notes
9325* =====
9326*
9327* A description vector is associated with each 2D block-cyclicly dis-
9328* tributed matrix. This vector stores the information required to
9329* establish the mapping between a matrix entry and its corresponding
9330* process and memory location.
9331*
9332* In the following comments, the character _ should be read as
9333* "of the distributed matrix". Let A be a generic term for any 2D
9334* block cyclicly distributed matrix. Its description vector is DESCA:
9335*
9336* NOTATION STORED IN EXPLANATION
9337* ---------------- --------------- ------------------------------------
9338* DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type.
9339* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
9340* the NPROW x NPCOL BLACS process grid
9341* A is distributed over. The context
9342* itself is global, but the handle
9343* (the integer value) may vary.
9344* M_A (global) DESCA( M_ ) The number of rows in the distribu-
9345* ted matrix A, M_A >= 0.
9346* N_A (global) DESCA( N_ ) The number of columns in the distri-
9347* buted matrix A, N_A >= 0.
9348* IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left
9349* block of the matrix A, IMB_A > 0.
9350* INB_A (global) DESCA( INB_ ) The number of columns of the upper
9351* left block of the matrix A,
9352* INB_A > 0.
9353* MB_A (global) DESCA( MB_ ) The blocking factor used to distri-
9354* bute the last M_A-IMB_A rows of A,
9355* MB_A > 0.
9356* NB_A (global) DESCA( NB_ ) The blocking factor used to distri-
9357* bute the last N_A-INB_A columns of
9358* A, NB_A > 0.
9359* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
9360* row of the matrix A is distributed,
9361* NPROW > RSRC_A >= 0.
9362* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
9363* first column of A is distributed.
9364* NPCOL > CSRC_A >= 0.
9365* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
9366* array storing the local blocks of
9367* the distributed matrix A,
9368* IF( Lc( 1, N_A ) > 0 )
9369* LLD_A >= MAX( 1, Lr( 1, M_A ) )
9370* ELSE
9371* LLD_A >= 1.
9372*
9373* Let K be the number of rows of a matrix A starting at the global in-
9374* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
9375* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
9376* receive if these K rows were distributed over NPROW processes. If K
9377* is the number of columns of a matrix A starting at the global index
9378* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
9379* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
9380* these K columns were distributed over NPCOL processes.
9381*
9382* The values of Lr() and Lc() may be determined via a call to the func-
9383* tion PB_NUMROC:
9384* Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
9385* Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
9386*
9387* Arguments
9388* =========
9389*
9390* M (global input) INTEGER
9391* On entry, M specifies the number of rows of the submatrix
9392* sub( A ). M must be at least zero.
9393*
9394* N (global input) INTEGER
9395* On entry, N specifies the number of columns of the submatrix
9396* sub( A ). N must be at least zero.
9397*
9398* A (local input) COMPLEX array
9399* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
9400* at least Lc( 1, JA+N-1 ). Before entry, this array contains
9401* the local entries of the matrix A.
9402*
9403* IA (global input) INTEGER
9404* On entry, IA specifies A's global row index, which points to
9405* the beginning of the submatrix sub( A ).
9406*
9407* JA (global input) INTEGER
9408* On entry, JA specifies A's global column index, which points
9409* to the beginning of the submatrix sub( A ).
9410*
9411* DESCA (global and local input) INTEGER array
9412* On entry, DESCA is an integer array of dimension DLEN_. This
9413* is the array descriptor for the matrix A.
9414*
9415* IRPRNT (global input) INTEGER
9416* On entry, IRPRNT specifies the row index of the printing pro-
9417* cess.
9418*
9419* ICPRNT (global input) INTEGER
9420* On entry, ICPRNT specifies the column index of the printing
9421* process.
9422*
9423* CMATNM (global input) CHARACTER*(*)
9424* On entry, CMATNM is the name of the matrix to be printed.
9425*
9426* NOUT (global input) INTEGER
9427* On entry, NOUT specifies the output unit number. When NOUT is
9428* equal to 6, the submatrix is printed on the screen.
9429*
9430* WORK (local workspace) COMPLEX array
9431* On entry, WORK is a work array of dimension at least equal to
9432* MAX( IMB_A, MB_A ).
9433*
9434* -- Written on April 1, 1998 by
9435* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
9436*
9437* =====================================================================
9438*
9439* .. Parameters ..
9440 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
9441 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
9442 $ RSRC_
9443 PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, dlen_ = 11,
9444 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
9445 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
9446 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
9447* ..
9448* .. Local Scalars ..
9449 INTEGER MYCOL, MYROW, NPCOL, NPROW, PCOL, PROW
9450* ..
9451* .. Local Arrays ..
9452 INTEGER DESCA2( DLEN_ )
9453* ..
9454* .. External Subroutines ..
9455 EXTERNAL BLACS_GRIDINFO, PB_DESCTRANS, PB_PCLAPRN2
9456* ..
9457* .. Executable Statements ..
9458*
9459* Quick return if possible
9460*
9461 IF( ( m.LE.0 ).OR.( n.LE.0 ) )
9462 $ RETURN
9463*
9464* Convert descriptor
9465*
9466 CALL pb_desctrans( desca, desca2 )
9467*
9468 CALL blacs_gridinfo( desca2( ctxt_ ), nprow, npcol, myrow, mycol )
9469*
9470 IF( desca2( rsrc_ ).GE.0 ) THEN
9471 IF( desca2( csrc_ ).GE.0 ) THEN
9472 CALL pb_pclaprn2( m, n, a, ia, ja, desca2, irprnt, icprnt,
9473 $ cmatnm, nout, desca2( rsrc_ ),
9474 $ desca2( csrc_ ), work )
9475 ELSE
9476 DO 10 pcol = 0, npcol - 1
9477 IF( ( myrow.EQ.irprnt ).AND.( mycol.EQ.icprnt ) )
9478 $ WRITE( nout, * ) 'Colum-replicated array -- ' ,
9479 $ 'copy in process column: ', pcol
9480 CALL pb_pclaprn2( m, n, a, ia, ja, desca2, irprnt,
9481 $ icprnt, cmatnm, nout, desca2( rsrc_ ),
9482 $ pcol, work )
9483 10 CONTINUE
9484 END IF
9485 ELSE
9486 IF( desca2( csrc_ ).GE.0 ) THEN
9487 DO 20 prow = 0, nprow - 1
9488 IF( ( myrow.EQ.irprnt ).AND.( mycol.EQ.icprnt ) )
9489 $ WRITE( nout, * ) 'Row-replicated array -- ' ,
9490 $ 'copy in process row: ', prow
9491 CALL pb_pclaprn2( m, n, a, ia, ja, desca2, irprnt,
9492 $ icprnt, cmatnm, nout, prow,
9493 $ desca2( csrc_ ), work )
9494 20 CONTINUE
9495 ELSE
9496 DO 40 prow = 0, nprow - 1
9497 DO 30 pcol = 0, npcol - 1
9498 IF( ( myrow.EQ.irprnt ).AND.( mycol.EQ.icprnt ) )
9499 $ WRITE( nout, * ) 'Replicated array -- ' ,
9500 $ 'copy in process (', prow, ',', pcol, ')'
9501 CALL pb_pclaprn2( m, n, a, ia, ja, desca2, irprnt,
9502 $ icprnt, cmatnm, nout, prow, pcol,
9503 $ work )
9504 30 CONTINUE
9505 40 CONTINUE
9506 END IF
9507 END IF
9508*
9509 RETURN
9510*
9511* End of PB_PCLAPRNT
9512*
9513 END
9514 SUBROUTINE pb_pclaprn2( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT,
9515 $ CMATNM, NOUT, PROW, PCOL, WORK )
9516*
9517* -- PBLAS test routine (version 2.0) --
9518* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9519* and University of California, Berkeley.
9520* April 1, 1998
9521*
9522* .. Scalar Arguments ..
9523 INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT, PCOL, PROW
9524* ..
9525* .. Array Arguments ..
9526 CHARACTER*(*) CMATNM
9527 INTEGER DESCA( * )
9528 COMPLEX A( * ), WORK( * )
9529* ..
9530*
9531* .. Parameters ..
9532 INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_,
9533 $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_,
9534 $ RSRC_
9535 parameter( block_cyclic_2d_inb = 2, dlen_ = 11,
9536 $ dtype_ = 1, ctxt_ = 2, m_ = 3, n_ = 4,
9537 $ imb_ = 5, inb_ = 6, mb_ = 7, nb_ = 8,
9538 $ rsrc_ = 9, csrc_ = 10, lld_ = 11 )
9539* ..
9540* .. Local Scalars ..
9541 LOGICAL AISCOLREP, AISROWREP
9542 INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL,
9543 $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K,
9544 $ LDA, LDW, MYCOL, MYROW, NPCOL, NPROW
9545* ..
9546* .. External Subroutines ..
9547 EXTERNAL blacs_barrier, blacs_gridinfo, cgerv2d,
9548 $ cgesd2d, pb_infog2l
9549* ..
9550* .. Intrinsic Functions ..
9551 INTRINSIC aimag, min, real
9552* ..
9553* .. Executable Statements ..
9554*
9555* Get grid parameters
9556*
9557 ictxt = desca( ctxt_ )
9558 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
9559 CALL pb_infog2l( ia, ja, desca, nprow, npcol, myrow, mycol,
9560 $ iia, jja, iarow, iacol )
9561 ii = iia
9562 jj = jja
9563 IF( desca( rsrc_ ).LT.0 ) THEN
9564 aisrowrep = .true.
9565 iarow = prow
9566 icurrow = prow
9567 ELSE
9568 aisrowrep = .false.
9569 icurrow = iarow
9570 END IF
9571 IF( desca( csrc_ ).LT.0 ) THEN
9572 aiscolrep = .true.
9573 iacol = pcol
9574 icurcol = pcol
9575 ELSE
9576 aiscolrep = .false.
9577 icurcol = iacol
9578 END IF
9579 lda = desca( lld_ )
9580 ldw = max( desca( imb_ ), desca( mb_ ) )
9581*
9582* Handle the first block of column separately
9583*
9584 jb = desca( inb_ ) - ja + 1
9585 IF( jb.LE.0 )
9586 $ jb = ( (-jb) / desca( nb_ ) + 1 ) * desca( nb_ ) + jb
9587 jb = min( jb, n )
9588 jn = ja+jb-1
9589 DO 60 h = 0, jb-1
9590 ib = desca( imb_ ) - ia + 1
9591 IF( ib.LE.0 )
9592 $ ib = ( (-ib) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
9593 ib = min( ib, m )
9594 in = ia+ib-1
9595 IF( icurrow.EQ.irprnt .AND. icurcol.EQ.icprnt ) THEN
9596 IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9597 DO 10 k = 0, ib-1
9598 WRITE( nout, fmt = 9999 )
9599 $ cmatnm, ia+k, ja+h,
9600 $ real( a(ii+k+(jj+h-1)*lda) ),
9601 $ aimag( a(ii+k+(jj+h-1)*lda) )
9602 10 CONTINUE
9603 END IF
9604 ELSE
9605 IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
9606 CALL cgesd2d( ictxt, ib, 1, a( ii+(jj+h-1)*lda ), lda,
9607 $ irprnt, icprnt )
9608 ELSE IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9609 CALL cgerv2d( ictxt, ib, 1, work, ldw, icurrow, icurcol )
9610 DO 20 k = 1, ib
9611 WRITE( nout, fmt = 9999 )
9612 $ cmatnm, ia+k-1, ja+h, real( work( k ) ),
9613 $ aimag( work( k ) )
9614 20 CONTINUE
9615 END IF
9616 END IF
9617 IF( myrow.EQ.icurrow )
9618 $ ii = ii + ib
9619 IF( .NOT.aisrowrep )
9620 $ icurrow = mod( icurrow+1, nprow )
9621 CALL blacs_barrier( ictxt, 'All' )
9622*
9623* Loop over remaining block of rows
9624*
9625 DO 50 i = in+1, ia+m-1, desca( mb_ )
9626 ib = min( desca( mb_ ), ia+m-i )
9627 IF( icurrow.EQ.irprnt .AND. icurcol.EQ.icprnt ) THEN
9628 IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9629 DO 30 k = 0, ib-1
9630 WRITE( nout, fmt = 9999 )
9631 $ cmatnm, i+k, ja+h,
9632 $ real( a( ii+k+(jj+h-1)*lda ) ),
9633 $ aimag( a( ii+k+(jj+h-1)*lda ) )
9634 30 CONTINUE
9635 END IF
9636 ELSE
9637 IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
9638 CALL cgesd2d( ictxt, ib, 1, a( ii+(jj+h-1)*lda ),
9639 $ lda, irprnt, icprnt )
9640 ELSE IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9641 CALL cgerv2d( ictxt, ib, 1, work, ldw, icurrow,
9642 $ icurcol )
9643 DO 40 k = 1, ib
9644 WRITE( nout, fmt = 9999 )
9645 $ cmatnm, i+k-1, ja+h, real( work( k ) ),
9646 $ aimag( work( k ) )
9647 40 CONTINUE
9648 END IF
9649 END IF
9650 IF( myrow.EQ.icurrow )
9651 $ ii = ii + ib
9652 IF( .NOT.aisrowrep )
9653 $ icurrow = mod( icurrow+1, nprow )
9654 CALL blacs_barrier( ictxt, 'All' )
9655 50 CONTINUE
9656*
9657 ii = iia
9658 icurrow = iarow
9659 60 CONTINUE
9660*
9661 IF( mycol.EQ.icurcol )
9662 $ jj = jj + jb
9663 IF( .NOT.aiscolrep )
9664 $ icurcol = mod( icurcol+1, npcol )
9665 CALL blacs_barrier( ictxt, 'All' )
9666*
9667* Loop over remaining column blocks
9668*
9669 DO 130 j = jn+1, ja+n-1, desca( nb_ )
9670 jb = min( desca( nb_ ), ja+n-j )
9671 DO 120 h = 0, jb-1
9672 ib = desca( imb_ )-ia+1
9673 IF( ib.LE.0 )
9674 $ ib = ( (-ib) / desca( mb_ ) + 1 ) * desca( mb_ ) + ib
9675 ib = min( ib, m )
9676 in = ia+ib-1
9677 IF( icurrow.EQ.irprnt .AND. icurcol.EQ.icprnt ) THEN
9678 IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9679 DO 70 k = 0, ib-1
9680 WRITE( nout, fmt = 9999 )
9681 $ cmatnm, ia+k, j+h,
9682 $ real( a( ii+k+(jj+h-1)*lda ) ),
9683 $ aimag( a( ii+k+(jj+h-1)*lda ) )
9684 70 CONTINUE
9685 END IF
9686 ELSE
9687 IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
9688 CALL cgesd2d( ictxt, ib, 1, a( ii+(jj+h-1)*lda ),
9689 $ lda, irprnt, icprnt )
9690 ELSE IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9691 CALL cgerv2d( ictxt, ib, 1, work, ldw, icurrow,
9692 $ icurcol )
9693 DO 80 k = 1, ib
9694 WRITE( nout, fmt = 9999 )
9695 $ cmatnm, ia+k-1, j+h, real( work( k ) ),
9696 $ aimag( work( k ) )
9697 80 CONTINUE
9698 END IF
9699 END IF
9700 IF( myrow.EQ.icurrow )
9701 $ ii = ii + ib
9702 icurrow = mod( icurrow+1, nprow )
9703 CALL blacs_barrier( ictxt, 'All' )
9704*
9705* Loop over remaining block of rows
9706*
9707 DO 110 i = in+1, ia+m-1, desca( mb_ )
9708 ib = min( desca( mb_ ), ia+m-i )
9709 IF( icurrow.EQ.irprnt .AND. icurcol.EQ.icprnt ) THEN
9710 IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9711 DO 90 k = 0, ib-1
9712 WRITE( nout, fmt = 9999 )
9713 $ cmatnm, i+k, j+h,
9714 $ real( a( ii+k+(jj+h-1)*lda ) ),
9715 $ aimag( a( ii+k+(jj+h-1)*lda ) )
9716 90 CONTINUE
9717 END IF
9718 ELSE
9719 IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
9720 CALL cgesd2d( ictxt, ib, 1, a( ii+(jj+h-1)*lda ),
9721 $ lda, irprnt, icprnt )
9722 ELSE IF( myrow.EQ.irprnt .AND. mycol.EQ.icprnt ) THEN
9723 CALL cgerv2d( ictxt, ib, 1, work, ldw, icurrow,
9724 $ icurcol )
9725 DO 100 k = 1, ib
9726 WRITE( nout, fmt = 9999 )
9727 $ cmatnm, i+k-1, j+h, real( work( k ) ),
9728 $ aimag( work( k ) )
9729 100 CONTINUE
9730 END IF
9731 END IF
9732 IF( myrow.EQ.icurrow )
9733 $ ii = ii + ib
9734 IF( .NOT.aisrowrep )
9735 $ icurrow = mod( icurrow+1, nprow )
9736 CALL blacs_barrier( ictxt, 'All' )
9737 110 CONTINUE
9738*
9739 ii = iia
9740 icurrow = iarow
9741 120 CONTINUE
9742*
9743 IF( mycol.EQ.icurcol )
9744 $ jj = jj + jb
9745 IF( .NOT.aiscolrep )
9746 $ icurcol = mod( icurcol+1, npcol )
9747 CALL blacs_barrier( ictxt, 'All' )
9748*
9749 130 CONTINUE
9750*
9751 9999 FORMAT( 1x, a, '(', i6, ',', i6, ')=', e16.8, '+i*(',
9752 $ e16.8, ')' )
9753*
9754 RETURN
9755*
9756* End of PB_PCLAPRN2
9757*
9758 END
9759 SUBROUTINE pb_cfillpad( ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL )
9760*
9761* -- PBLAS test routine (version 2.0) --
9762* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9763* and University of California, Berkeley.
9764* April 1, 1998
9765*
9766* .. Scalar Arguments ..
9767 INTEGER ICTXT, IPOST, IPRE, LDA, M, N
9768 COMPLEX CHKVAL
9769* ..
9770* .. Array Arguments ..
9771 COMPLEX A( * )
9772* ..
9773*
9774* Purpose
9775* =======
9776*
9777* PB_CFILLPAD surrounds a two dimensional local array with a guard-zone
9778* initialized to the value CHKVAL. The user may later call the routine
9779* PB_CCHEKPAD to discover if the guardzone has been violated. There are
9780* three guardzones. The first is a buffer of size IPRE that is before
9781* the start of the array. The second is the buffer of size IPOST which
9782* is after the end of the array to be padded. Finally, there is a guard
9783* zone inside every column of the array to be padded, in the elements
9784* of A(M+1:LDA, J).
9785*
9786* Arguments
9787* =========
9788*
9789* ICTXT (local input) INTEGER
9790* On entry, ICTXT specifies the BLACS context handle, indica-
9791* ting the global context of the operation. The context itself
9792* is global, but the value of ICTXT is local.
9793*
9794* M (local input) INTEGER
9795* On entry, M specifies the number of rows in the local array
9796* A. M must be at least zero.
9797*
9798* N (local input) INTEGER
9799* On entry, N specifies the number of columns in the local ar-
9800* ray A. N must be at least zero.
9801*
9802* A (local input/local output) COMPLEX array
9803* On entry, A is an array of dimension (LDA,N). On exit, this
9804* array is the padded array.
9805*
9806* LDA (local input) INTEGER
9807* On entry, LDA specifies the leading dimension of the local
9808* array to be padded. LDA must be at least MAX( 1, M ).
9809*
9810* IPRE (local input) INTEGER
9811* On entry, IPRE specifies the size of the guard zone to put
9812* before the start of the padded array.
9813*
9814* IPOST (local input) INTEGER
9815* On entry, IPOST specifies the size of the guard zone to put
9816* after the end of the padded array.
9817*
9818* CHKVAL (local input) COMPLEX
9819* On entry, CHKVAL specifies the value to pad the array with.
9820*
9821* -- Written on April 1, 1998 by
9822* R. Clint Whaley, University of Tennessee, Knoxville 37996, USA.
9823*
9824* =====================================================================
9825*
9826* .. Local Scalars ..
9827 INTEGER I, J, K
9828* ..
9829* .. Executable Statements ..
9830*
9831* Put check buffer in front of A
9832*
9833 IF( IPRE.GT.0 ) THEN
9834 DO 10 I = 1, ipre
9835 a( i ) = chkval
9836 10 CONTINUE
9837 ELSE
9838 WRITE( *, fmt = '(A)' )
9839 $ 'WARNING no pre-guardzone in PB_CFILLPAD'
9840 END IF
9841*
9842* Put check buffer in back of A
9843*
9844 IF( ipost.GT.0 ) THEN
9845 j = ipre+lda*n+1
9846 DO 20 i = j, j+ipost-1
9847 a( i ) = chkval
9848 20 CONTINUE
9849 ELSE
9850 WRITE( *, fmt = '(A)' )
9851 $ 'WARNING no post-guardzone in PB_CFILLPAD'
9852 END IF
9853*
9854* Put check buffer in all (LDA-M) gaps
9855*
9856 IF( lda.GT.m ) THEN
9857 k = ipre + m + 1
9858 DO 40 j = 1, n
9859 DO 30 i = k, k + ( lda - m ) - 1
9860 a( i ) = chkval
9861 30 CONTINUE
9862 k = k + lda
9863 40 CONTINUE
9864 END IF
9865*
9866 RETURN
9867*
9868* End of PB_CFILLPAD
9869*
9870 END
9871 SUBROUTINE pb_cchekpad( ICTXT, MESS, M, N, A, LDA, IPRE, IPOST,
9872 $ CHKVAL )
9873*
9874* -- PBLAS test routine (version 2.0) --
9875* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9876* and University of California, Berkeley.
9877* April 1, 1998
9878*
9879* .. Scalar Arguments ..
9880 INTEGER ICTXT, IPOST, IPRE, LDA, M, N
9881 COMPLEX CHKVAL
9882* ..
9883* .. Array Arguments ..
9884 CHARACTER*(*) MESS
9885 COMPLEX A( * )
9886* ..
9887*
9888* Purpose
9889* =======
9890*
9891* PB_CCHEKPAD checks that the padding around a local array has not been
9892* overwritten since the call to PB_CFILLPAD. Three types of errors are
9893* reported:
9894*
9895* 1) Overwrite in pre-guardzone. This indicates a memory overwrite has
9896* occurred in the first IPRE elements which form a buffer before the
9897* beginning of A. Therefore, the error message:
9898* 'Overwrite in pre-guardzone: loc( 5) = 18.00000'
9899* tells that the 5th element of the IPRE long buffer has been overwrit-
9900* ten with the value 18, where it should still have the value CHKVAL.
9901*
9902* 2) Overwrite in post-guardzone. This indicates a memory overwrite has
9903* occurred in the last IPOST elements which form a buffer after the end
9904* of A. Error reports are refered from the end of A. Therefore,
9905* 'Overwrite in post-guardzone: loc( 19) = 24.00000'
9906* tells that the 19th element after the end of A was overwritten with
9907* the value 24, where it should still have the value of CHKVAL.
9908*
9909* 3) Overwrite in lda-m gap. Tells you elements between M and LDA were
9910* overwritten. So,
9911* 'Overwrite in lda-m gap: A( 12, 3) = 22.00000'
9912* tells that the element at the 12th row and 3rd column of A was over-
9913* written with the value of 22, where it should still have the value of
9914* CHKVAL.
9915*
9916* Arguments
9917* =========
9918*
9919* ICTXT (local input) INTEGER
9920* On entry, ICTXT specifies the BLACS context handle, indica-
9921* ting the global context of the operation. The context itself
9922* is global, but the value of ICTXT is local.
9923*
9924* MESS (local input) CHARACTER*(*)
9925* On entry, MESS is a ttring containing a user-defined message.
9926*
9927* M (local input) INTEGER
9928* On entry, M specifies the number of rows in the local array
9929* A. M must be at least zero.
9930*
9931* N (local input) INTEGER
9932* On entry, N specifies the number of columns in the local ar-
9933* ray A. N must be at least zero.
9934*
9935* A (local input) COMPLEX array
9936* On entry, A is an array of dimension (LDA,N).
9937*
9938* LDA (local input) INTEGER
9939* On entry, LDA specifies the leading dimension of the local
9940* array to be padded. LDA must be at least MAX( 1, M ).
9941*
9942* IPRE (local input) INTEGER
9943* On entry, IPRE specifies the size of the guard zone to put
9944* before the start of the padded array.
9945*
9946* IPOST (local input) INTEGER
9947* On entry, IPOST specifies the size of the guard zone to put
9948* after the end of the padded array.
9949*
9950* CHKVAL (local input) COMPLEX
9951* On entry, CHKVAL specifies the value to pad the array with.
9952*
9953*
9954* -- Written on April 1, 1998 by
9955* R. Clint Whaley, University of Tennessee, Knoxville 37996, USA.
9956*
9957* =====================================================================
9958*
9959* .. Local Scalars ..
9960 CHARACTER*1 TOP
9961 INTEGER I, IAM, IDUMM, INFO, J, K, MYCOL, MYROW, NPCOL,
9962 $ NPROW
9963* ..
9964* .. External Subroutines ..
9965 EXTERNAL BLACS_GRIDINFO, IGAMX2D, PB_TOPGET
9966* ..
9967* .. Intrinsic Functions ..
9968 INTRINSIC AIMAG, REAL
9969* ..
9970* .. Executable Statements ..
9971*
9972* Get grid parameters
9973*
9974 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
9975 IAM = myrow*npcol + mycol
9976 info = -1
9977*
9978* Check buffer in front of A
9979*
9980 IF( ipre.GT.0 ) THEN
9981 DO 10 i = 1, ipre
9982 IF( a( i ).NE.chkval ) THEN
9983 WRITE( *, fmt = 9998 ) myrow, mycol, mess, ' pre', i,
9984 $ real( a( i ) ), aimag( a( i ) )
9985 info = iam
9986 END IF
9987 10 CONTINUE
9988 ELSE
9989 WRITE( *, fmt = * ) 'WARNING no pre-guardzone in PB_CCHEKPAD'
9990 END IF
9991*
9992* Check buffer after A
9993*
9994 IF( ipost.GT.0 ) THEN
9995 j = ipre+lda*n+1
9996 DO 20 i = j, j+ipost-1
9997 IF( a( i ).NE.chkval ) THEN
9998 WRITE( *, fmt = 9998 ) myrow, mycol, mess, 'post',
9999 $ i-j+1, real( a( i ) ),
10000 $ aimag( a( i ) )
10001 info = iam
10002 END IF
10003 20 CONTINUE
10004 ELSE
10005 WRITE( *, fmt = * )
10006 $ 'WARNING no post-guardzone buffer in PB_CCHEKPAD'
10007 END IF
10008*
10009* Check all (LDA-M) gaps
10010*
10011 IF( lda.GT.m ) THEN
10012 k = ipre + m + 1
10013 DO 40 j = 1, n
10014 DO 30 i = k, k + (lda-m) - 1
10015 IF( a( i ).NE.chkval ) THEN
10016 WRITE( *, fmt = 9997 ) myrow, mycol, mess,
10017 $ i-ipre-lda*(j-1), j, real( a( i ) ),
10018 $ aimag( a( i ) )
10019 info = iam
10020 END IF
10021 30 CONTINUE
10022 k = k + lda
10023 40 CONTINUE
10024 END IF
10025*
10026 CALL pb_topget( ictxt, 'Combine', 'All', top )
10027 CALL igamx2d( ictxt, 'All', top, 1, 1, info, 1, idumm, idumm, -1,
10028 $ 0, 0 )
10029 IF( iam.EQ.0 .AND. info.GE.0 ) THEN
10030 WRITE( *, fmt = 9999 ) info / npcol, mod( info, npcol ), mess
10031 END IF
10032*
10033 9999 FORMAT( '{', i5, ',', i5, '}: Memory overwrite in ', a )
10034 9998 FORMAT( '{', i5, ',', i5, '}: ', a, ' memory overwrite in ',
10035 $ a4, '-guardzone: loc(', i3, ') = ', g11.4, '+ i*',
10036 $ g11.4 )
10037 9997 FORMAT( '{', i5, ',', i5, '}: ', a, ' memory overwrite in ',
10038 $ 'lda-m gap: loc(', i3, ',', i3, ') = ', g11.4,
10039 $ '+ i*', g11.4 )
10040*
10041 RETURN
10042*
10043* End of PB_CCHEKPAD
10044*
10045 END
10046 SUBROUTINE pb_claset( UPLO, M, N, IOFFD, ALPHA, BETA, A, LDA )
10047*
10048* -- PBLAS test routine (version 2.0) --
10049* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
10050* and University of California, Berkeley.
10051* April 1, 1998
10052*
10053* .. Scalar Arguments ..
10054 CHARACTER*1 UPLO
10055 INTEGER IOFFD, LDA, M, N
10056 COMPLEX ALPHA, BETA
10057* ..
10058* .. Array Arguments ..
10059 COMPLEX A( LDA, * )
10060* ..
10061*
10062* Purpose
10063* =======
10064*
10065* PB_CLASET initializes a two-dimensional array A to beta on the diago-
10066* nal specified by IOFFD and alpha on the offdiagonals.
10067*
10068* Arguments
10069* =========
10070*
10071* UPLO (global input) CHARACTER*1
10072* On entry, UPLO specifies which trapezoidal part of the ar-
10073* ray A is to be set as follows:
10074* = 'L' or 'l': Lower triangular part is set; the strictly
10075* upper triangular part of A is not changed,
10076* = 'U' or 'u': Upper triangular part is set; the strictly
10077* lower triangular part of A is not changed,
10078* = 'D' or 'd' Only the diagonal of A is set,
10079* Otherwise: All of the array A is set.
10080*
10081* M (input) INTEGER
10082* On entry, M specifies the number of rows of the array A. M
10083* must be at least zero.
10084*
10085* N (input) INTEGER
10086* On entry, N specifies the number of columns of the array A.
10087* N must be at least zero.
10088*
10089* IOFFD (input) INTEGER
10090* On entry, IOFFD specifies the position of the offdiagonal de-
10091* limiting the upper and lower trapezoidal part of A as follows
10092* (see the notes below):
10093*
10094* IOFFD = 0 specifies the main diagonal A( i, i ),
10095* with i = 1 ... MIN( M, N ),
10096* IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ),
10097* with i = 1 ... MIN( M-IOFFD, N ),
10098* IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ),
10099* with i = 1 ... MIN( M, N+IOFFD ).
10100*
10101* ALPHA (input) COMPLEX
10102* On entry, ALPHA specifies the value to which the offdiagonal
10103* array elements are set to.
10104*
10105* BETA (input) COMPLEX
10106* On entry, BETA specifies the value to which the diagonal ar-
10107* ray elements are set to.
10108*
10109* A (input/output) COMPLEX array
10110* On entry, A is an array of dimension (LDA,N). Before entry
10111* with UPLO = 'U' or 'u', the leading m by n part of the array
10112* A must contain the upper trapezoidal part of the matrix as
10113* specified by IOFFD to be set, and the strictly lower trape-
10114* zoidal part of A is not referenced; When IUPLO = 'L' or 'l',
10115* the leading m by n part of the array A must contain the
10116* lower trapezoidal part of the matrix as specified by IOFFD to
10117* be set, and the strictly upper trapezoidal part of A is
10118* not referenced.
10119*
10120* LDA (input) INTEGER
10121* On entry, LDA specifies the leading dimension of the array A.
10122* LDA must be at least max( 1, M ).
10123*
10124* Notes
10125* =====
10126* N N
10127* ---------------------------- -----------
10128* | d | | |
10129* M | d 'U' | | 'U' |
10130* | 'L' 'D' | |d |
10131* | d | M | d |
10132* ---------------------------- | 'D' |
10133* | d |
10134* IOFFD < 0 | 'L' d |
10135* | d|
10136* N | |
10137* ----------- -----------
10138* | d 'U'|
10139* | d | IOFFD > 0
10140* M | 'D' |
10141* | d| N
10142* | 'L' | ----------------------------
10143* | | | 'U' |
10144* | | |d |
10145* | | | 'D' |
10146* | | | d |
10147* | | |'L' d |
10148* ----------- ----------------------------
10149*
10150* -- Written on April 1, 1998 by
10151* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
10152*
10153* =====================================================================
10154*
10155* .. Local Scalars ..
10156 INTEGER I, J, JTMP, MN
10157* ..
10158* .. External Functions ..
10159 LOGICAL LSAME
10160 EXTERNAL LSAME
10161* ..
10162* .. Intrinsic Functions ..
10163 INTRINSIC MAX, MIN
10164* ..
10165* .. Executable Statements ..
10166*
10167* Quick return if possible
10168*
10169 IF( M.LE.0 .OR. N.LE.0 )
10170 $ RETURN
10171*
10172* Start the operations
10173*
10174 IF( LSAME( UPLO, 'L' ) ) THEN
10175*
10176* Set the diagonal to BETA and the strictly lower triangular
10177* part of the array to ALPHA.
10178*
10179 mn = max( 0, -ioffd )
10180 DO 20 j = 1, min( mn, n )
10181 DO 10 i = 1, m
10182 a( i, j ) = alpha
10183 10 CONTINUE
10184 20 CONTINUE
10185 DO 40 j = mn + 1, min( m - ioffd, n )
10186 jtmp = j + ioffd
10187 a( jtmp, j ) = beta
10188 DO 30 i = jtmp + 1, m
10189 a( i, j ) = alpha
10190 30 CONTINUE
10191 40 CONTINUE
10192*
10193 ELSE IF( lsame( uplo, 'U' ) ) THEN
10194*
10195* Set the diagonal to BETA and the strictly upper triangular
10196* part of the array to ALPHA.
10197*
10198 mn = min( m - ioffd, n )
10199 DO 60 j = max( 0, -ioffd ) + 1, mn
10200 jtmp = j + ioffd
10201 DO 50 i = 1, jtmp - 1
10202 a( i, j ) = alpha
10203 50 CONTINUE
10204 a( jtmp, j ) = beta
10205 60 CONTINUE
10206 DO 80 j = max( 0, mn ) + 1, n
10207 DO 70 i = 1, m
10208 a( i, j ) = alpha
10209 70 CONTINUE
10210 80 CONTINUE
10211*
10212 ELSE IF( lsame( uplo, 'D' ) ) THEN
10213*
10214* Set the array to BETA on the diagonal.
10215*
10216 DO 90 j = max( 0, -ioffd ) + 1, min( m - ioffd, n )
10217 a( j + ioffd, j ) = beta
10218 90 CONTINUE
10219*
10220 ELSE
10221*
10222* Set the array to BETA on the diagonal and ALPHA on the
10223* offdiagonal.
10224*
10225 DO 110 j = 1, n
10226 DO 100 i = 1, m
10227 a( i, j ) = alpha
10228 100 CONTINUE
10229 110 CONTINUE
10230 IF( alpha.NE.beta .AND. ioffd.LT.m .AND. ioffd.GT.-n ) THEN
10231 DO 120 j = max( 0, -ioffd ) + 1, min( m - ioffd, n )
10232 a( j + ioffd, j ) = beta
10233 120 CONTINUE
10234 END IF
10235*
10236 END IF
10237*
10238 RETURN
10239*
10240* End of PB_CLASET
10241*
10242 END
10243 SUBROUTINE pb_clascal( UPLO, M, N, IOFFD, ALPHA, A, LDA )
10244*
10245* -- PBLAS test routine (version 2.0) --
10246* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
10247* and University of California, Berkeley.
10248* April 1, 1998
10249*
10250* .. Scalar Arguments ..
10251 CHARACTER*1 UPLO
10252 INTEGER IOFFD, LDA, M, N
10253 COMPLEX ALPHA
10254* ..
10255* .. Array Arguments ..
10256 COMPLEX A( LDA, * )
10257* ..
10258*
10259* Purpose
10260* =======
10261*
10262* PB_CLASCAL scales a two-dimensional array A by the scalar alpha.
10263*
10264* Arguments
10265* =========
10266*
10267* UPLO (input) CHARACTER*1
10268* On entry, UPLO specifies which trapezoidal part of the ar-
10269* ray A is to be scaled as follows:
10270* = 'L' or 'l': the lower trapezoid of A is scaled,
10271* = 'U' or 'u': the upper trapezoid of A is scaled,
10272* = 'D' or 'd': diagonal specified by IOFFD is scaled,
10273* Otherwise: all of the array A is scaled.
10274*
10275* M (input) INTEGER
10276* On entry, M specifies the number of rows of the array A. M
10277* must be at least zero.
10278*
10279* N (input) INTEGER
10280* On entry, N specifies the number of columns of the array A.
10281* N must be at least zero.
10282*
10283* IOFFD (input) INTEGER
10284* On entry, IOFFD specifies the position of the offdiagonal de-
10285* limiting the upper and lower trapezoidal part of A as follows
10286* (see the notes below):
10287*
10288* IOFFD = 0 specifies the main diagonal A( i, i ),
10289* with i = 1 ... MIN( M, N ),
10290* IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ),
10291* with i = 1 ... MIN( M-IOFFD, N ),
10292* IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ),
10293* with i = 1 ... MIN( M, N+IOFFD ).
10294*
10295* ALPHA (input) COMPLEX
10296* On entry, ALPHA specifies the scalar alpha.
10297*
10298* A (input/output) COMPLEX array
10299* On entry, A is an array of dimension (LDA,N). Before entry
10300* with UPLO = 'U' or 'u', the leading m by n part of the array
10301* A must contain the upper trapezoidal part of the matrix as
10302* specified by IOFFD to be scaled, and the strictly lower tra-
10303* pezoidal part of A is not referenced; When UPLO = 'L' or 'l',
10304* the leading m by n part of the array A must contain the lower
10305* trapezoidal part of the matrix as specified by IOFFD to be
10306* scaled, and the strictly upper trapezoidal part of A is not
10307* referenced. On exit, the entries of the trapezoid part of A
10308* determined by UPLO and IOFFD are scaled.
10309*
10310* LDA (input) INTEGER
10311* On entry, LDA specifies the leading dimension of the array A.
10312* LDA must be at least max( 1, M ).
10313*
10314* Notes
10315* =====
10316* N N
10317* ---------------------------- -----------
10318* | d | | |
10319* M | d 'U' | | 'U' |
10320* | 'L' 'D' | |d |
10321* | d | M | d |
10322* ---------------------------- | 'D' |
10323* | d |
10324* IOFFD < 0 | 'L' d |
10325* | d|
10326* N | |
10327* ----------- -----------
10328* | d 'U'|
10329* | d | IOFFD > 0
10330* M | 'D' |
10331* | d| N
10332* | 'L' | ----------------------------
10333* | | | 'U' |
10334* | | |d |
10335* | | | 'D' |
10336* | | | d |
10337* | | |'L' d |
10338* ----------- ----------------------------
10339*
10340* -- Written on April 1, 1998 by
10341* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
10342*
10343* =====================================================================
10344*
10345* .. Local Scalars ..
10346 INTEGER I, J, JTMP, MN
10347* ..
10348* .. External Functions ..
10349 LOGICAL LSAME
10350 EXTERNAL LSAME
10351* ..
10352* .. Intrinsic Functions ..
10353 INTRINSIC MAX, MIN
10354* ..
10355* .. Executable Statements ..
10356*
10357* Quick return if possible
10358*
10359 IF( M.LE.0 .OR. N.LE.0 )
10360 $ RETURN
10361*
10362* Start the operations
10363*
10364 IF( LSAME( UPLO, 'L' ) ) THEN
10365*
10366* Scales the lower triangular part of the array by ALPHA.
10367*
10368 MN = max( 0, -ioffd )
10369 DO 20 j = 1, min( mn, n )
10370 DO 10 i = 1, m
10371 a( i, j ) = alpha * a( i, j )
10372 10 CONTINUE
10373 20 CONTINUE
10374 DO 40 j = mn + 1, min( m - ioffd, n )
10375 DO 30 i = j + ioffd, m
10376 a( i, j ) = alpha * a( i, j )
10377 30 CONTINUE
10378 40 CONTINUE
10379*
10380 ELSE IF( lsame( uplo, 'U' ) ) THEN
10381*
10382* Scales the upper triangular part of the array by ALPHA.
10383*
10384 mn = min( m - ioffd, n )
10385 DO 60 j = max( 0, -ioffd ) + 1, mn
10386 DO 50 i = 1, j + ioffd
10387 a( i, j ) = alpha * a( i, j )
10388 50 CONTINUE
10389 60 CONTINUE
10390 DO 80 j = max( 0, mn ) + 1, n
10391 DO 70 i = 1, m
10392 a( i, j ) = alpha * a( i, j )
10393 70 CONTINUE
10394 80 CONTINUE
10395*
10396 ELSE IF( lsame( uplo, 'D' ) ) THEN
10397*
10398* Scales the diagonal entries by ALPHA.
10399*
10400 DO 90 j = max( 0, -ioffd ) + 1, min( m - ioffd, n )
10401 jtmp = j + ioffd
10402 a( jtmp, j ) = alpha * a( jtmp, j )
10403 90 CONTINUE
10404*
10405 ELSE
10406*
10407* Scales the entire array by ALPHA.
10408*
10409 DO 110 j = 1, n
10410 DO 100 i = 1, m
10411 a( i, j ) = alpha * a( i, j )
10412 100 CONTINUE
10413 110 CONTINUE
10414*
10415 END IF
10416*
10417 RETURN
10418*
10419* End of PB_CLASCAL
10420*
10421 END
10422 SUBROUTINE pb_clagen( UPLO, AFORM, A, LDA, LCMT00, IRAN, MBLKS,
10423 $ IMBLOC, MB, LMBLOC, NBLKS, INBLOC, NB,
10424 $ LNBLOC, JMP, IMULADD )
10425*
10426* -- PBLAS test routine (version 2.0) --
10427* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
10428* and University of California, Berkeley.
10429* April 1, 1998
10430*
10431* .. Scalar Arguments ..
10432 CHARACTER*1 UPLO, AFORM
10433 INTEGER IMBLOC, INBLOC, LCMT00, LDA, LMBLOC, LNBLOC,
10434 $ mb, mblks, nb, nblks
10435* ..
10436* .. Array Arguments ..
10437 INTEGER IMULADD( 4, * ), IRAN( * ), JMP( * )
10438 COMPLEX A( LDA, * )
10439* ..
10440*
10441* Purpose
10442* =======
10443*
10444* PB_CLAGEN locally initializes an array A.
10445*
10446* Arguments
10447* =========
10448*
10449* UPLO (global input) CHARACTER*1
10450* On entry, UPLO specifies whether the lower (UPLO='L') trape-
10451* zoidal part or the upper (UPLO='U') trapezoidal part is to be
10452* generated when the matrix to be generated is symmetric or
10453* Hermitian. For all the other values of AFORM, the value of
10454* this input argument is ignored.
10455*
10456* AFORM (global input) CHARACTER*1
10457* On entry, AFORM specifies the type of submatrix to be genera-
10458* ted as follows:
10459* AFORM = 'S', sub( A ) is a symmetric matrix,
10460* AFORM = 'H', sub( A ) is a Hermitian matrix,
10461* AFORM = 'T', sub( A ) is overrwritten with the transpose
10462* of what would normally be generated,
10463* AFORM = 'C', sub( A ) is overwritten with the conjugate
10464* transpose of what would normally be genera-
10465* ted.
10466* AFORM = 'N', a random submatrix is generated.
10467*
10468* A (local output) COMPLEX array
10469* On entry, A is an array of dimension (LLD_A, *). On exit,
10470* this array contains the local entries of the randomly genera-
10471* ted submatrix sub( A ).
10472*
10473* LDA (local input) INTEGER
10474* On entry, LDA specifies the local leading dimension of the
10475* array A. LDA must be at least one.
10476*
10477* LCMT00 (global input) INTEGER
10478* On entry, LCMT00 is the LCM value specifying the off-diagonal
10479* of the underlying matrix of interest. LCMT00=0 specifies the
10480* main diagonal, LCMT00 > 0 specifies a subdiagonal, LCMT00 < 0
10481* specifies superdiagonals.
10482*
10483* IRAN (local input) INTEGER array
10484* On entry, IRAN is an array of dimension 2 containing respec-
10485* tively the 16-lower and 16-higher bits of the encoding of the
10486* entry of the random sequence corresponding locally to the
10487* first local array entry to generate. Usually, this array is
10488* computed by PB_SETLOCRAN.
10489*
10490* MBLKS (local input) INTEGER
10491* On entry, MBLKS specifies the local number of blocks of rows.
10492* MBLKS is at least zero.
10493*
10494* IMBLOC (local input) INTEGER
10495* On entry, IMBLOC specifies the number of rows (size) of the
10496* local uppest blocks. IMBLOC is at least zero.
10497*
10498* MB (global input) INTEGER
10499* On entry, MB specifies the blocking factor used to partition
10500* the rows of the matrix. MB must be at least one.
10501*
10502* LMBLOC (local input) INTEGER
10503* On entry, LMBLOC specifies the number of rows (size) of the
10504* local lowest blocks. LMBLOC is at least zero.
10505*
10506* NBLKS (local input) INTEGER
10507* On entry, NBLKS specifies the local number of blocks of co-
10508* lumns. NBLKS is at least zero.
10509*
10510* INBLOC (local input) INTEGER
10511* On entry, INBLOC specifies the number of columns (size) of
10512* the local leftmost blocks. INBLOC is at least zero.
10513*
10514* NB (global input) INTEGER
10515* On entry, NB specifies the blocking factor used to partition
10516* the the columns of the matrix. NB must be at least one.
10517*
10518* LNBLOC (local input) INTEGER
10519* On entry, LNBLOC specifies the number of columns (size) of
10520* the local rightmost blocks. LNBLOC is at least zero.
10521*
10522* JMP (local input) INTEGER array
10523* On entry, JMP is an array of dimension JMP_LEN containing the
10524* different jump values used by the random matrix generator.
10525*
10526* IMULADD (local input) INTEGER array
10527* On entry, IMULADD is an array of dimension (4, JMP_LEN). The
10528* jth column of this array contains the encoded initial cons-
10529* tants a_j and c_j to jump from X( n ) to X( n + JMP( j ) )
10530* (= a_j * X( n ) + c_j) in the random sequence. IMULADD(1:2,j)
10531* contains respectively the 16-lower and 16-higher bits of the
10532* constant a_j, and IMULADD(3:4,j) contains the 16-lower and
10533* 16-higher bits of the constant c_j.
10534*
10535* -- Written on April 1, 1998 by
10536* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
10537*
10538* =====================================================================
10539*
10540* .. Parameters ..
10541 INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN,
10542 $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB,
10543 $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW
10544 PARAMETER ( JMP_1 = 1, jmp_row = 2, jmp_col = 3,
10545 $ jmp_mb = 4, jmp_imbv = 5, jmp_npmb = 6,
10546 $ jmp_npimbloc = 7, jmp_nb = 8, jmp_inbv = 9,
10547 $ jmp_nqnb = 10, jmp_nqinbloc = 11,
10548 $ jmp_len = 11 )
10549 REAL ZERO
10550 PARAMETER ( ZERO = 0.0e+0 )
10551* ..
10552* .. Local Scalars ..
10553 INTEGER I, IB, IBLK, II, IK, ITMP, JB, JBLK, JJ, JK,
10554 $ JTMP, LCMTC, LCMTR, LOW, MNB, UPP
10555 COMPLEX DUMMY
10556* ..
10557* .. Local Arrays ..
10558 INTEGER IB0( 2 ), IB1( 2 ), IB2( 2 ), IB3( 2 )
10559* ..
10560* .. External Subroutines ..
10561 EXTERNAL PB_JUMPIT
10562* ..
10563* .. External Functions ..
10564 LOGICAL LSAME
10565 REAL PB_SRAND
10566 EXTERNAL lsame, pb_srand
10567* ..
10568* .. Intrinsic Functions ..
10569 INTRINSIC cmplx, max, min, real
10570* ..
10571* .. Executable Statements ..
10572*
10573 DO 10 i = 1, 2
10574 ib1( i ) = iran( i )
10575 ib2( i ) = iran( i )
10576 ib3( i ) = iran( i )
10577 10 CONTINUE
10578*
10579 IF( lsame( aform, 'N' ) ) THEN
10580*
10581* Generate random matrix
10582*
10583 jj = 1
10584*
10585 DO 50 jblk = 1, nblks
10586*
10587 IF( jblk.EQ.1 ) THEN
10588 jb = inbloc
10589 ELSE IF( jblk.EQ.nblks ) THEN
10590 jb = lnbloc
10591 ELSE
10592 jb = nb
10593 END IF
10594*
10595 DO 40 jk = jj, jj + jb - 1
10596*
10597 ii = 1
10598*
10599 DO 30 iblk = 1, mblks
10600*
10601 IF( iblk.EQ.1 ) THEN
10602 ib = imbloc
10603 ELSE IF( iblk.EQ.mblks ) THEN
10604 ib = lmbloc
10605 ELSE
10606 ib = mb
10607 END IF
10608*
10609* Blocks are IB by JB
10610*
10611 DO 20 ik = ii, ii + ib - 1
10612 a( ik, jk ) = cmplx( pb_srand( 0 ), pb_srand( 0 ) )
10613 20 CONTINUE
10614*
10615 ii = ii + ib
10616*
10617 IF( iblk.EQ.1 ) THEN
10618*
10619* Jump IMBLOC + ( NPROW - 1 ) * MB rows
10620*
10621 CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib1,
10622 $ ib0 )
10623*
10624 ELSE
10625*
10626* Jump NPROW * MB rows
10627*
10628 CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib1, ib0 )
10629*
10630 END IF
10631*
10632 ib1( 1 ) = ib0( 1 )
10633 ib1( 2 ) = ib0( 2 )
10634*
10635 30 CONTINUE
10636*
10637* Jump one column
10638*
10639 CALL pb_jumpit( imuladd( 1, jmp_col ), ib2, ib0 )
10640*
10641 ib1( 1 ) = ib0( 1 )
10642 ib1( 2 ) = ib0( 2 )
10643 ib2( 1 ) = ib0( 1 )
10644 ib2( 2 ) = ib0( 2 )
10645*
10646 40 CONTINUE
10647*
10648 jj = jj + jb
10649*
10650 IF( jblk.EQ.1 ) THEN
10651*
10652* Jump INBLOC + ( NPCOL - 1 ) * NB columns
10653*
10654 CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib3, ib0 )
10655*
10656 ELSE
10657*
10658* Jump NPCOL * NB columns
10659*
10660 CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib3, ib0 )
10661*
10662 END IF
10663*
10664 ib1( 1 ) = ib0( 1 )
10665 ib1( 2 ) = ib0( 2 )
10666 ib2( 1 ) = ib0( 1 )
10667 ib2( 2 ) = ib0( 2 )
10668 ib3( 1 ) = ib0( 1 )
10669 ib3( 2 ) = ib0( 2 )
10670*
10671 50 CONTINUE
10672*
10673 ELSE IF( lsame( aform, 'T' ) ) THEN
10674*
10675* Generate the transpose of the matrix that would be normally
10676* generated.
10677*
10678 ii = 1
10679*
10680 DO 90 iblk = 1, mblks
10681*
10682 IF( iblk.EQ.1 ) THEN
10683 ib = imbloc
10684 ELSE IF( iblk.EQ.mblks ) THEN
10685 ib = lmbloc
10686 ELSE
10687 ib = mb
10688 END IF
10689*
10690 DO 80 ik = ii, ii + ib - 1
10691*
10692 jj = 1
10693*
10694 DO 70 jblk = 1, nblks
10695*
10696 IF( jblk.EQ.1 ) THEN
10697 jb = inbloc
10698 ELSE IF( jblk.EQ.nblks ) THEN
10699 jb = lnbloc
10700 ELSE
10701 jb = nb
10702 END IF
10703*
10704* Blocks are IB by JB
10705*
10706 DO 60 jk = jj, jj + jb - 1
10707 a( ik, jk ) = cmplx( pb_srand( 0 ), pb_srand( 0 ) )
10708 60 CONTINUE
10709*
10710 jj = jj + jb
10711*
10712 IF( jblk.EQ.1 ) THEN
10713*
10714* Jump INBLOC + ( NPCOL - 1 ) * NB columns
10715*
10716 CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib1,
10717 $ ib0 )
10718*
10719 ELSE
10720*
10721* Jump NPCOL * NB columns
10722*
10723 CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib1, ib0 )
10724*
10725 END IF
10726*
10727 ib1( 1 ) = ib0( 1 )
10728 ib1( 2 ) = ib0( 2 )
10729*
10730 70 CONTINUE
10731*
10732* Jump one row
10733*
10734 CALL pb_jumpit( imuladd( 1, jmp_row ), ib2, ib0 )
10735*
10736 ib1( 1 ) = ib0( 1 )
10737 ib1( 2 ) = ib0( 2 )
10738 ib2( 1 ) = ib0( 1 )
10739 ib2( 2 ) = ib0( 2 )
10740*
10741 80 CONTINUE
10742*
10743 ii = ii + ib
10744*
10745 IF( iblk.EQ.1 ) THEN
10746*
10747* Jump IMBLOC + ( NPROW - 1 ) * MB rows
10748*
10749 CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib3, ib0 )
10750*
10751 ELSE
10752*
10753* Jump NPROW * MB rows
10754*
10755 CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib3, ib0 )
10756*
10757 END IF
10758*
10759 ib1( 1 ) = ib0( 1 )
10760 ib1( 2 ) = ib0( 2 )
10761 ib2( 1 ) = ib0( 1 )
10762 ib2( 2 ) = ib0( 2 )
10763 ib3( 1 ) = ib0( 1 )
10764 ib3( 2 ) = ib0( 2 )
10765*
10766 90 CONTINUE
10767*
10768 ELSE IF( lsame( aform, 'S' ) ) THEN
10769*
10770* Generate a symmetric matrix
10771*
10772 IF( lsame( uplo, 'L' ) ) THEN
10773*
10774* generate lower trapezoidal part
10775*
10776 jj = 1
10777 lcmtc = lcmt00
10778*
10779 DO 170 jblk = 1, nblks
10780*
10781 IF( jblk.EQ.1 ) THEN
10782 jb = inbloc
10783 low = 1 - inbloc
10784 ELSE IF( jblk.EQ.nblks ) THEN
10785 jb = lnbloc
10786 low = 1 - nb
10787 ELSE
10788 jb = nb
10789 low = 1 - nb
10790 END IF
10791*
10792 DO 160 jk = jj, jj + jb - 1
10793*
10794 ii = 1
10795 lcmtr = lcmtc
10796*
10797 DO 150 iblk = 1, mblks
10798*
10799 IF( iblk.EQ.1 ) THEN
10800 ib = imbloc
10801 upp = imbloc - 1
10802 ELSE IF( iblk.EQ.mblks ) THEN
10803 ib = lmbloc
10804 upp = mb - 1
10805 ELSE
10806 ib = mb
10807 upp = mb - 1
10808 END IF
10809*
10810* Blocks are IB by JB
10811*
10812 IF( lcmtr.GT.upp ) THEN
10813*
10814 DO 100 ik = ii, ii + ib - 1
10815 dummy = cmplx( pb_srand( 0 ),
10816 $ pb_srand( 0 ) )
10817 100 CONTINUE
10818*
10819 ELSE IF( lcmtr.GE.low ) THEN
10820*
10821 jtmp = jk - jj + 1
10822 mnb = max( 0, -lcmtr )
10823*
10824 IF( jtmp.LE.min( mnb, jb ) ) THEN
10825*
10826 DO 110 ik = ii, ii + ib - 1
10827 a( ik, jk ) = cmplx( pb_srand( 0 ),
10828 $ pb_srand( 0 ) )
10829 110 CONTINUE
10830*
10831 ELSE IF( ( jtmp.GE.( mnb + 1 ) ) .AND.
10832 $ ( jtmp.LE.min( ib-lcmtr, jb ) ) ) THEN
10833*
10834 itmp = ii + jtmp + lcmtr - 1
10835*
10836 DO 120 ik = ii, itmp - 1
10837 dummy = cmplx( pb_srand( 0 ),
10838 $ pb_srand( 0 ) )
10839 120 CONTINUE
10840*
10841 DO 130 ik = itmp, ii + ib - 1
10842 a( ik, jk ) = cmplx( pb_srand( 0 ),
10843 $ pb_srand( 0 ) )
10844 130 CONTINUE
10845*
10846 END IF
10847*
10848 ELSE
10849*
10850 DO 140 ik = ii, ii + ib - 1
10851 a( ik, jk ) = cmplx( pb_srand( 0 ),
10852 $ pb_srand( 0 ) )
10853 140 CONTINUE
10854*
10855 END IF
10856*
10857 ii = ii + ib
10858*
10859 IF( iblk.EQ.1 ) THEN
10860*
10861* Jump IMBLOC + ( NPROW - 1 ) * MB rows
10862*
10863 lcmtr = lcmtr - jmp( jmp_npimbloc )
10864 CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib1,
10865 $ ib0 )
10866*
10867 ELSE
10868*
10869* Jump NPROW * MB rows
10870*
10871 lcmtr = lcmtr - jmp( jmp_npmb )
10872 CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib1,
10873 $ ib0 )
10874*
10875 END IF
10876*
10877 ib1( 1 ) = ib0( 1 )
10878 ib1( 2 ) = ib0( 2 )
10879*
10880 150 CONTINUE
10881*
10882* Jump one column
10883*
10884 CALL pb_jumpit( imuladd( 1, jmp_col ), ib2, ib0 )
10885*
10886 ib1( 1 ) = ib0( 1 )
10887 ib1( 2 ) = ib0( 2 )
10888 ib2( 1 ) = ib0( 1 )
10889 ib2( 2 ) = ib0( 2 )
10890*
10891 160 CONTINUE
10892*
10893 jj = jj + jb
10894*
10895 IF( jblk.EQ.1 ) THEN
10896*
10897* Jump INBLOC + ( NPCOL - 1 ) * NB columns
10898*
10899 lcmtc = lcmtc + jmp( jmp_nqinbloc )
10900 CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib3, ib0 )
10901*
10902 ELSE
10903*
10904* Jump NPCOL * NB columns
10905*
10906 lcmtc = lcmtc + jmp( jmp_nqnb )
10907 CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib3, ib0 )
10908*
10909 END IF
10910*
10911 ib1( 1 ) = ib0( 1 )
10912 ib1( 2 ) = ib0( 2 )
10913 ib2( 1 ) = ib0( 1 )
10914 ib2( 2 ) = ib0( 2 )
10915 ib3( 1 ) = ib0( 1 )
10916 ib3( 2 ) = ib0( 2 )
10917*
10918 170 CONTINUE
10919*
10920 ELSE
10921*
10922* generate upper trapezoidal part
10923*
10924 ii = 1
10925 lcmtr = lcmt00
10926*
10927 DO 250 iblk = 1, mblks
10928*
10929 IF( iblk.EQ.1 ) THEN
10930 ib = imbloc
10931 upp = imbloc - 1
10932 ELSE IF( iblk.EQ.mblks ) THEN
10933 ib = lmbloc
10934 upp = mb - 1
10935 ELSE
10936 ib = mb
10937 upp = mb - 1
10938 END IF
10939*
10940 DO 240 ik = ii, ii + ib - 1
10941*
10942 jj = 1
10943 lcmtc = lcmtr
10944*
10945 DO 230 jblk = 1, nblks
10946*
10947 IF( jblk.EQ.1 ) THEN
10948 jb = inbloc
10949 low = 1 - inbloc
10950 ELSE IF( jblk.EQ.nblks ) THEN
10951 jb = lnbloc
10952 low = 1 - nb
10953 ELSE
10954 jb = nb
10955 low = 1 - nb
10956 END IF
10957*
10958* Blocks are IB by JB
10959*
10960 IF( lcmtc.LT.low ) THEN
10961*
10962 DO 180 jk = jj, jj + jb - 1
10963 dummy = cmplx( pb_srand( 0 ), pb_srand( 0 ) )
10964 180 CONTINUE
10965*
10966 ELSE IF( lcmtc.LE.upp ) THEN
10967*
10968 itmp = ik - ii + 1
10969 mnb = max( 0, lcmtc )
10970*
10971 IF( itmp.LE.min( mnb, ib ) ) THEN
10972*
10973 DO 190 jk = jj, jj + jb - 1
10974 a( ik, jk ) = cmplx( pb_srand( 0 ),
10975 $ pb_srand( 0 ) )
10976 190 CONTINUE
10977*
10978 ELSE IF( ( itmp.GE.( mnb + 1 ) ) .AND.
10979 $ ( itmp.LE.min( jb+lcmtc, ib ) ) ) THEN
10980*
10981 jtmp = jj + itmp - lcmtc - 1
10982*
10983 DO 200 jk = jj, jtmp - 1
10984 dummy = cmplx( pb_srand( 0 ),
10985 $ pb_srand( 0 ) )
10986 200 CONTINUE
10987*
10988 DO 210 jk = jtmp, jj + jb - 1
10989 a( ik, jk ) = cmplx( pb_srand( 0 ),
10990 $ pb_srand( 0 ) )
10991 210 CONTINUE
10992*
10993 END IF
10994*
10995 ELSE
10996*
10997 DO 220 jk = jj, jj + jb - 1
10998 a( ik, jk ) = cmplx( pb_srand( 0 ),
10999 $ pb_srand( 0 ) )
11000 220 CONTINUE
11001*
11002 END IF
11003*
11004 jj = jj + jb
11005*
11006 IF( jblk.EQ.1 ) THEN
11007*
11008* Jump INBLOC + ( NPCOL - 1 ) * NB columns
11009*
11010 lcmtc = lcmtc + jmp( jmp_nqinbloc )
11011 CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib1,
11012 $ ib0 )
11013*
11014 ELSE
11015*
11016* Jump NPCOL * NB columns
11017*
11018 lcmtc = lcmtc + jmp( jmp_nqnb )
11019 CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib1,
11020 $ ib0 )
11021*
11022 END IF
11023*
11024 ib1( 1 ) = ib0( 1 )
11025 ib1( 2 ) = ib0( 2 )
11026*
11027 230 CONTINUE
11028*
11029* Jump one row
11030*
11031 CALL pb_jumpit( imuladd( 1, jmp_row ), ib2, ib0 )
11032*
11033 ib1( 1 ) = ib0( 1 )
11034 ib1( 2 ) = ib0( 2 )
11035 ib2( 1 ) = ib0( 1 )
11036 ib2( 2 ) = ib0( 2 )
11037*
11038 240 CONTINUE
11039*
11040 ii = ii + ib
11041*
11042 IF( iblk.EQ.1 ) THEN
11043*
11044* Jump IMBLOC + ( NPROW - 1 ) * MB rows
11045*
11046 lcmtr = lcmtr - jmp( jmp_npimbloc )
11047 CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib3, ib0 )
11048*
11049 ELSE
11050*
11051* Jump NPROW * MB rows
11052*
11053 lcmtr = lcmtr - jmp( jmp_npmb )
11054 CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib3, ib0 )
11055*
11056 END IF
11057*
11058 ib1( 1 ) = ib0( 1 )
11059 ib1( 2 ) = ib0( 2 )
11060 ib2( 1 ) = ib0( 1 )
11061 ib2( 2 ) = ib0( 2 )
11062 ib3( 1 ) = ib0( 1 )
11063 ib3( 2 ) = ib0( 2 )
11064*
11065 250 CONTINUE
11066*
11067 END IF
11068*
11069 ELSE IF( lsame( aform, 'C' ) ) THEN
11070*
11071* Generate the conjugate transpose of the matrix that would be
11072* normally generated.
11073*
11074 ii = 1
11075*
11076 DO 290 iblk = 1, mblks
11077*
11078 IF( iblk.EQ.1 ) THEN
11079 ib = imbloc
11080 ELSE IF( iblk.EQ.mblks ) THEN
11081 ib = lmbloc
11082 ELSE
11083 ib = mb
11084 END IF
11085*
11086 DO 280 ik = ii, ii + ib - 1
11087*
11088 jj = 1
11089*
11090 DO 270 jblk = 1, nblks
11091*
11092 IF( jblk.EQ.1 ) THEN
11093 jb = inbloc
11094 ELSE IF( jblk.EQ.nblks ) THEN
11095 jb = lnbloc
11096 ELSE
11097 jb = nb
11098 END IF
11099*
11100* Blocks are IB by JB
11101*
11102 DO 260 jk = jj, jj + jb - 1
11103 a( ik, jk ) = cmplx( pb_srand( 0 ),
11104 $ -pb_srand( 0 ) )
11105 260 CONTINUE
11106*
11107 jj = jj + jb
11108*
11109 IF( jblk.EQ.1 ) THEN
11110*
11111* Jump INBLOC + ( NPCOL - 1 ) * NB columns
11112*
11113 CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib1,
11114 $ ib0 )
11115*
11116 ELSE
11117*
11118* Jump NPCOL * NB columns
11119*
11120 CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib1,
11121 $ ib0 )
11122*
11123 END IF
11124*
11125 ib1( 1 ) = ib0( 1 )
11126 ib1( 2 ) = ib0( 2 )
11127*
11128 270 CONTINUE
11129*
11130* Jump one row
11131*
11132 CALL pb_jumpit( imuladd( 1, jmp_row ), ib2, ib0 )
11133*
11134 ib1( 1 ) = ib0( 1 )
11135 ib1( 2 ) = ib0( 2 )
11136 ib2( 1 ) = ib0( 1 )
11137 ib2( 2 ) = ib0( 2 )
11138*
11139 280 CONTINUE
11140*
11141 ii = ii + ib
11142*
11143 IF( iblk.EQ.1 ) THEN
11144*
11145* Jump IMBLOC + ( NPROW - 1 ) * MB rows
11146*
11147 CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib3, ib0 )
11148*
11149 ELSE
11150*
11151* Jump NPROW * MB rows
11152*
11153 CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib3, ib0 )
11154*
11155 END IF
11156*
11157 ib1( 1 ) = ib0( 1 )
11158 ib1( 2 ) = ib0( 2 )
11159 ib2( 1 ) = ib0( 1 )
11160 ib2( 2 ) = ib0( 2 )
11161 ib3( 1 ) = ib0( 1 )
11162 ib3( 2 ) = ib0( 2 )
11163*
11164 290 CONTINUE
11165*
11166 ELSE IF( lsame( aform, 'H' ) ) THEN
11167*
11168* Generate a Hermitian matrix
11169*
11170 IF( lsame( uplo, 'L' ) ) THEN
11171*
11172* generate lower trapezoidal part
11173*
11174 jj = 1
11175 lcmtc = lcmt00
11176*
11177 DO 370 jblk = 1, nblks
11178*
11179 IF( jblk.EQ.1 ) THEN
11180 jb = inbloc
11181 low = 1 - inbloc
11182 ELSE IF( jblk.EQ.nblks ) THEN
11183 jb = lnbloc
11184 low = 1 - nb
11185 ELSE
11186 jb = nb
11187 low = 1 - nb
11188 END IF
11189*
11190 DO 360 jk = jj, jj + jb - 1
11191*
11192 ii = 1
11193 lcmtr = lcmtc
11194*
11195 DO 350 iblk = 1, mblks
11196*
11197 IF( iblk.EQ.1 ) THEN
11198 ib = imbloc
11199 upp = imbloc - 1
11200 ELSE IF( iblk.EQ.mblks ) THEN
11201 ib = lmbloc
11202 upp = mb - 1
11203 ELSE
11204 ib = mb
11205 upp = mb - 1
11206 END IF
11207*
11208* Blocks are IB by JB
11209*
11210 IF( lcmtr.GT.upp ) THEN
11211*
11212 DO 300 ik = ii, ii + ib - 1
11213 dummy = cmplx( pb_srand( 0 ),
11214 $ pb_srand( 0 ) )
11215 300 CONTINUE
11216*
11217 ELSE IF( lcmtr.GE.low ) THEN
11218*
11219 jtmp = jk - jj + 1
11220 mnb = max( 0, -lcmtr )
11221*
11222 IF( jtmp.LE.min( mnb, jb ) ) THEN
11223*
11224 DO 310 ik = ii, ii + ib - 1
11225 a( ik, jk ) = cmplx( pb_srand( 0 ),
11226 $ pb_srand( 0 ) )
11227 310 CONTINUE
11228*
11229 ELSE IF( ( jtmp.GE.( mnb + 1 ) ) .AND.
11230 $ ( jtmp.LE.min( ib-lcmtr, jb ) ) ) THEN
11231*
11232 itmp = ii + jtmp + lcmtr - 1
11233*
11234 DO 320 ik = ii, itmp - 1
11235 dummy = cmplx( pb_srand( 0 ),
11236 $ pb_srand( 0 ) )
11237 320 CONTINUE
11238*
11239 IF( itmp.LE.( ii + ib - 1 ) ) THEN
11240 dummy = cmplx( pb_srand( 0 ),
11241 $ -pb_srand( 0 ) )
11242 a( itmp, jk ) = cmplx( real( dummy ),
11243 $ zero )
11244 END IF
11245*
11246 DO 330 ik = itmp + 1, ii + ib - 1
11247 a( ik, jk ) = cmplx( pb_srand( 0 ),
11248 $ pb_srand( 0 ) )
11249 330 CONTINUE
11250*
11251 END IF
11252*
11253 ELSE
11254*
11255 DO 340 ik = ii, ii + ib - 1
11256 a( ik, jk ) = cmplx( pb_srand( 0 ),
11257 $ pb_srand( 0 ) )
11258 340 CONTINUE
11259*
11260 END IF
11261*
11262 ii = ii + ib
11263*
11264 IF( iblk.EQ.1 ) THEN
11265*
11266* Jump IMBLOC + ( NPROW - 1 ) * MB rows
11267*
11268 lcmtr = lcmtr - jmp( jmp_npimbloc )
11269 CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib1,
11270 $ ib0 )
11271*
11272 ELSE
11273*
11274* Jump NPROW * MB rows
11275*
11276 lcmtr = lcmtr - jmp( jmp_npmb )
11277 CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib1,
11278 $ ib0 )
11279*
11280 END IF
11281*
11282 ib1( 1 ) = ib0( 1 )
11283 ib1( 2 ) = ib0( 2 )
11284*
11285 350 CONTINUE
11286*
11287* Jump one column
11288*
11289 CALL pb_jumpit( imuladd( 1, jmp_col ), ib2, ib0 )
11290*
11291 ib1( 1 ) = ib0( 1 )
11292 ib1( 2 ) = ib0( 2 )
11293 ib2( 1 ) = ib0( 1 )
11294 ib2( 2 ) = ib0( 2 )
11295*
11296 360 CONTINUE
11297*
11298 jj = jj + jb
11299*
11300 IF( jblk.EQ.1 ) THEN
11301*
11302* Jump INBLOC + ( NPCOL - 1 ) * NB columns
11303*
11304 lcmtc = lcmtc + jmp( jmp_nqinbloc )
11305 CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib3, ib0 )
11306*
11307 ELSE
11308*
11309* Jump NPCOL * NB columns
11310*
11311 lcmtc = lcmtc + jmp( jmp_nqnb )
11312 CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib3, ib0 )
11313*
11314 END IF
11315*
11316 ib1( 1 ) = ib0( 1 )
11317 ib1( 2 ) = ib0( 2 )
11318 ib2( 1 ) = ib0( 1 )
11319 ib2( 2 ) = ib0( 2 )
11320 ib3( 1 ) = ib0( 1 )
11321 ib3( 2 ) = ib0( 2 )
11322*
11323 370 CONTINUE
11324*
11325 ELSE
11326*
11327* generate upper trapezoidal part
11328*
11329 ii = 1
11330 lcmtr = lcmt00
11331*
11332 DO 450 iblk = 1, mblks
11333*
11334 IF( iblk.EQ.1 ) THEN
11335 ib = imbloc
11336 upp = imbloc - 1
11337 ELSE IF( iblk.EQ.mblks ) THEN
11338 ib = lmbloc
11339 upp = mb - 1
11340 ELSE
11341 ib = mb
11342 upp = mb - 1
11343 END IF
11344*
11345 DO 440 ik = ii, ii + ib - 1
11346*
11347 jj = 1
11348 lcmtc = lcmtr
11349*
11350 DO 430 jblk = 1, nblks
11351*
11352 IF( jblk.EQ.1 ) THEN
11353 jb = inbloc
11354 low = 1 - inbloc
11355 ELSE IF( jblk.EQ.nblks ) THEN
11356 jb = lnbloc
11357 low = 1 - nb
11358 ELSE
11359 jb = nb
11360 low = 1 - nb
11361 END IF
11362*
11363* Blocks are IB by JB
11364*
11365 IF( lcmtc.LT.low ) THEN
11366*
11367 DO 380 jk = jj, jj + jb - 1
11368 dummy = cmplx( pb_srand( 0 ),
11369 $ -pb_srand( 0 ) )
11370 380 CONTINUE
11371*
11372 ELSE IF( lcmtc.LE.upp ) THEN
11373*
11374 itmp = ik - ii + 1
11375 mnb = max( 0, lcmtc )
11376*
11377 IF( itmp.LE.min( mnb, ib ) ) THEN
11378*
11379 DO 390 jk = jj, jj + jb - 1
11380 a( ik, jk ) = cmplx( pb_srand( 0 ),
11381 $ -pb_srand( 0 ) )
11382 390 CONTINUE
11383*
11384 ELSE IF( ( itmp.GE.( mnb + 1 ) ) .AND.
11385 $ ( itmp.LE.min( jb+lcmtc, ib ) ) ) THEN
11386*
11387 jtmp = jj + itmp - lcmtc - 1
11388*
11389 DO 400 jk = jj, jtmp - 1
11390 dummy = cmplx( pb_srand( 0 ),
11391 $ -pb_srand( 0 ) )
11392 400 CONTINUE
11393*
11394 IF( jtmp.LE.( jj + jb - 1 ) ) THEN
11395 dummy = cmplx( pb_srand( 0 ),
11396 $ -pb_srand( 0 ) )
11397 a( ik, jtmp ) = cmplx( real( dummy ),
11398 $ zero )
11399 END IF
11400*
11401 DO 410 jk = jtmp + 1, jj + jb - 1
11402 a( ik, jk ) = cmplx( pb_srand( 0 ),
11403 $ -pb_srand( 0 ) )
11404 410 CONTINUE
11405*
11406 END IF
11407*
11408 ELSE
11409*
11410 DO 420 jk = jj, jj + jb - 1
11411 a( ik, jk ) = cmplx( pb_srand( 0 ),
11412 $ -pb_srand( 0 ) )
11413 420 CONTINUE
11414*
11415 END IF
11416*
11417 jj = jj + jb
11418*
11419 IF( jblk.EQ.1 ) THEN
11420*
11421* Jump INBLOC + ( NPCOL - 1 ) * NB columns
11422*
11423 lcmtc = lcmtc + jmp( jmp_nqinbloc )
11424 CALL pb_jumpit( imuladd( 1, jmp_nqinbloc ), ib1,
11425 $ ib0 )
11426*
11427 ELSE
11428*
11429* Jump NPCOL * NB columns
11430*
11431 lcmtc = lcmtc + jmp( jmp_nqnb )
11432 CALL pb_jumpit( imuladd( 1, jmp_nqnb ), ib1,
11433 $ ib0 )
11434*
11435 END IF
11436*
11437 ib1( 1 ) = ib0( 1 )
11438 ib1( 2 ) = ib0( 2 )
11439*
11440 430 CONTINUE
11441*
11442* Jump one row
11443*
11444 CALL pb_jumpit( imuladd( 1, jmp_row ), ib2, ib0 )
11445*
11446 ib1( 1 ) = ib0( 1 )
11447 ib1( 2 ) = ib0( 2 )
11448 ib2( 1 ) = ib0( 1 )
11449 ib2( 2 ) = ib0( 2 )
11450*
11451 440 CONTINUE
11452*
11453 ii = ii + ib
11454*
11455 IF( iblk.EQ.1 ) THEN
11456*
11457* Jump IMBLOC + ( NPROW - 1 ) * MB rows
11458*
11459 lcmtr = lcmtr - jmp( jmp_npimbloc )
11460 CALL pb_jumpit( imuladd( 1, jmp_npimbloc ), ib3, ib0 )
11461*
11462 ELSE
11463*
11464* Jump NPROW * MB rows
11465*
11466 lcmtr = lcmtr - jmp( jmp_npmb )
11467 CALL pb_jumpit( imuladd( 1, jmp_npmb ), ib3, ib0 )
11468*
11469 END IF
11470*
11471 ib1( 1 ) = ib0( 1 )
11472 ib1( 2 ) = ib0( 2 )
11473 ib2( 1 ) = ib0( 1 )
11474 ib2( 2 ) = ib0( 2 )
11475 ib3( 1 ) = ib0( 1 )
11476 ib3( 2 ) = ib0( 2 )
11477*
11478 450 CONTINUE
11479*
11480 END IF
11481*
11482 END IF
11483*
11484 RETURN
11485*
11486* End of PB_CLAGEN
11487*
11488 END
11489 REAL function pb_srand( idumm )
11490*
11491* -- PBLAS test routine (version 2.0) --
11492* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
11493* and University of California, Berkeley.
11494* April 1, 1998
11495*
11496* .. Scalar Arguments ..
11497 INTEGER idumm
11498* ..
11499*
11500* Purpose
11501* =======
11502*
11503* PB_SRAND generates the next number in the random sequence. This func-
11504* tion ensures that this number will be in the interval ( -1.0, 1.0 ).
11505*
11506* Arguments
11507* =========
11508*
11509* IDUMM (local input) INTEGER
11510* This argument is ignored, but necessary to a FORTRAN 77 func-
11511* tion.
11512*
11513* Further Details
11514* ===============
11515*
11516* On entry, the array IRAND stored in the common block RANCOM contains
11517* the information (2 integers) required to generate the next number in
11518* the sequence X( n ). This number is computed as
11519*
11520* X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d,
11521*
11522* where the constant d is the largest 32 bit positive integer. The
11523* array IRAND is then updated for the generation of the next number
11524* X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c.
11525* The constants a and c should have been preliminarily stored in the
11526* array IACS as 2 pairs of integers. The initial set up of IRAND and
11527* IACS is performed by the routine PB_SETRAN.
11528*
11529* -- Written on April 1, 1998 by
11530* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
11531*
11532* =====================================================================
11533*
11534* .. Parameters ..
11535 REAL one, two
11536 PARAMETER ( one = 1.0e+0, two = 2.0e+0 )
11537* ..
11538* .. External Functions ..
11539 REAL pb_sran
11540 EXTERNAL pb_sran
11541* ..
11542* .. Executable Statements ..
11543*
11544 pb_srand = one - two * pb_sran( idumm )
11545*
11546 RETURN
11547*
11548* End of PB_SRAND
11549*
11550 END
11551 REAL function pb_sran( idumm )
11552*
11553* -- PBLAS test routine (version 2.0) --
11554* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
11555* and University of California, Berkeley.
11556* April 1, 1998
11557*
11558* .. Scalar Arguments ..
11559 INTEGER idumm
11560* ..
11561*
11562* Purpose
11563* =======
11564*
11565* PB_SRAN generates the next number in the random sequence.
11566*
11567* Arguments
11568* =========
11569*
11570* IDUMM (local input) INTEGER
11571* This argument is ignored, but necessary to a FORTRAN 77 func-
11572* tion.
11573*
11574* Further Details
11575* ===============
11576*
11577* On entry, the array IRAND stored in the common block RANCOM contains
11578* the information (2 integers) required to generate the next number in
11579* the sequence X( n ). This number is computed as
11580*
11581* X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d,
11582*
11583* where the constant d is the largest 32 bit positive integer. The
11584* array IRAND is then updated for the generation of the next number
11585* X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c.
11586* The constants a and c should have been preliminarily stored in the
11587* array IACS as 2 pairs of integers. The initial set up of IRAND and
11588* IACS is performed by the routine PB_SETRAN.
11589*
11590* -- Written on April 1, 1998 by
11591* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
11592*
11593* =====================================================================
11594*
11595* .. Parameters ..
11596 REAL divfac, pow16
11597 PARAMETER ( divfac = 2.147483648e+9,
11598 $ pow16 = 6.5536e+4 )
11599* ..
11600* .. Local Arrays ..
11601 INTEGER j( 2 )
11602* ..
11603* .. External Subroutines ..
11604 EXTERNAL pb_ladd, pb_lmul
11605* ..
11606* .. Intrinsic Functions ..
11607 INTRINSIC real
11608* ..
11609* .. Common Blocks ..
11610 INTEGER iacs( 4 ), irand( 2 )
11611 common /rancom/ irand, iacs
11612* ..
11613* .. Save Statements ..
11614 SAVE /rancom/
11615* ..
11616* .. Executable Statements ..
11617*
11618 pb_sran = ( real( irand( 1 ) ) + pow16 * real( irand( 2 ) ) ) /
11619 $ divfac
11620*
11621 CALL pb_lmul( irand, iacs, j )
11622 CALL pb_ladd( j, iacs( 3 ), irand )
11623*
11624 RETURN
11625*
11626* End of PB_SRAN
11627*
11628 END
float cmplx[2]
Definition pblas.h:136
subroutine pb_ainfog2l(m, n, i, j, desc, nprow, npcol, myrow, mycol, imb1, inb1, mp, nq, ii, jj, prow, pcol, rprow, rpcol)
Definition pblastst.f:2023
subroutine pb_descset2(desc, m, n, imb, inb, mb, nb, rsrc, csrc, ctxt, lld)
Definition pblastst.f:3172
subroutine pb_ladd(j, k, i)
Definition pblastst.f:4480
subroutine pb_binfo(offd, m, n, imb1, inb1, mb, nb, mrrow, mrcol, lcmt00, mblks, nblks, imbloc, inbloc, lmbloc, lnbloc, ilow, low, iupp, upp)
Definition pblastst.f:3577
subroutine pb_setran(iran, iac)
Definition pblastst.f:4759
subroutine pb_infog2l(i, j, desc, nprow, npcol, myrow, mycol, ii, jj, prow, pcol)
Definition pblastst.f:1673
subroutine pb_locinfo(i, inb, nb, myroc, srcproc, nprocs, ilocblk, ilocoff, mydist)
Definition pblastst.f:3910
subroutine pchkpbe(ictxt, nout, sname, infot)
Definition pblastst.f:1084
subroutine pb_chkmat(ictxt, m, mpos0, n, npos0, ia, ja, desca, dpos0, info)
Definition pblastst.f:2742
subroutine pb_lmul(k, j, i)
Definition pblastst.f:4559
subroutine pb_jump(k, muladd, irann, iranm, ima)
Definition pblastst.f:4648
subroutine pb_setlocran(seed, ilocblk, jlocblk, ilocoff, jlocoff, myrdist, mycdist, nprow, npcol, jmp, imuladd, iran)
Definition pblastst.f:4302
subroutine pb_initmuladd(muladd0, jmp, imuladd)
Definition pblastst.f:4196
subroutine pb_desctrans(descin, descout)
Definition pblastst.f:2964
subroutine pb_initjmp(colmaj, nvir, imbvir, inbvir, imbloc, inbloc, mb, nb, rsrc, csrc, nprow, npcol, stride, jmp)
Definition pblastst.f:4045
subroutine pb_jumpit(muladd, irann, iranm)
Definition pblastst.f:4822
subroutine pb_pclaprn2(m, n, a, ia, ja, desca, irprnt, icprnt, cmatnm, nout, prow, pcol, work)
Definition pcblastst.f:9516
subroutine pcmmch1(ictxt, uplo, trans, n, k, alpha, a, ia, ja, desca, beta, c, pc, ic, jc, descc, ct, g, err, info)
Definition pcblastst.f:5789
subroutine pcmmch(ictxt, transa, transb, m, n, k, alpha, a, ia, ja, desca, b, ib, jb, descb, beta, c, pc, ic, jc, descc, ct, g, err, info)
Definition pcblastst.f:5336
subroutine pclagen(inplace, aform, diag, offa, m, n, ia, ja, desca, iaseed, a, lda)
Definition pcblastst.f:8491
subroutine pcchkvout(n, x, px, ix, jx, descx, incx, info)
Definition pcblastst.f:2876
subroutine pcchkvin(errmax, n, x, px, ix, jx, descx, incx, info)
Definition pcblastst.f:2582
subroutine pclascal(type, m, n, alpha, a, ia, ja, desca)
Definition pcblastst.f:7983
subroutine pcipset(toggle, n, a, ia, ja, desca)
Definition pcblastst.f:7044
subroutine pcladom(inplace, n, alpha, a, ia, ja, desca)
Definition pcblastst.f:8894
subroutine pb_cchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pcblastst.f:9873
subroutine pcvecee(ictxt, nout, subptr, scode, sname)
Definition pcblastst.f:936
subroutine pcerrset(err, errmax, xtrue, x)
Definition pcblastst.f:2460
subroutine pcmatee(ictxt, nout, subptr, scode, sname)
Definition pcblastst.f:1190
subroutine pb_pclaprnt(m, n, a, ia, ja, desca, irprnt, icprnt, cmatnm, nout, work)
Definition pcblastst.f:9302
subroutine pcchkmat(ictxt, nout, subptr, scode, sname, argnam, argpos)
Definition pcblastst.f:1677
subroutine pcchkmout(m, n, a, pa, ia, ja, desca, info)
Definition pcblastst.f:3633
subroutine pcchkmin(errmax, m, n, a, pa, ia, ja, desca, info)
Definition pcblastst.f:3332
subroutine pcmprnt(ictxt, nout, m, n, a, lda, irprnt, icprnt, cmatnm)
Definition pcblastst.f:3955
subroutine pcoptee(ictxt, nout, subptr, scode, sname)
Definition pcblastst.f:2
subroutine pcvmch(ictxt, trans, uplo, m, n, alpha, x, ix, jx, descx, incx, y, iy, jy, descy, incy, a, pa, ia, ja, desca, g, err, info)
Definition pcblastst.f:4606
subroutine pcchkopt(ictxt, nout, subptr, scode, sname, argnam, argpos)
Definition pcblastst.f:266
subroutine pb_clagen(uplo, aform, a, lda, lcmt00, iran, mblks, imbloc, mb, lmbloc, nblks, inbloc, nb, lnbloc, jmp, imuladd)
real function pb_sran(idumm)
subroutine pcdimee(ictxt, nout, subptr, scode, sname)
Definition pcblastst.f:455
subroutine pb_clascal(uplo, m, n, ioffd, alpha, a, lda)
subroutine pcmmch2(ictxt, uplo, trans, n, k, alpha, a, ia, ja, desca, b, ib, jb, descb, beta, c, pc, ic, jc, descc, ct, g, err, info)
Definition pcblastst.f:6168
subroutine pb_cfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pcblastst.f:9760
subroutine pcmmch3(uplo, trans, m, n, alpha, a, ia, ja, desca, beta, c, pc, ic, jc, descc, err, info)
Definition pcblastst.f:6584
subroutine pcvmch2(ictxt, uplo, m, n, alpha, x, ix, jx, descx, incx, y, iy, jy, descy, incy, a, pa, ia, ja, desca, g, err, info)
Definition pcblastst.f:4975
subroutine pcerraxpby(errbnd, alpha, x, beta, y, prec)
Definition pcblastst.f:6943
subroutine pccallsub(subptr, scode)
Definition pcblastst.f:2183
subroutine pcchkdim(ictxt, nout, subptr, scode, sname, argnam, argpos)
Definition pcblastst.f:759
subroutine pcmvch(ictxt, trans, m, n, alpha, a, ia, ja, desca, x, ix, jx, descx, incx, beta, y, py, iy, jy, descy, incy, g, err, info)
Definition pcblastst.f:4172
subroutine pb_claset(uplo, m, n, ioffd, alpha, beta, a, lda)
subroutine pcvprnt(ictxt, nout, n, x, incx, irprnt, icprnt, cvecnm)
Definition pcblastst.f:4067
real function pb_srand(idumm)
subroutine pclaset(uplo, m, n, alpha, beta, a, ia, ja, desca)
Definition pcblastst.f:7508
real function pslamch(ictxt, cmach)
Definition pcblastst.f:7455
subroutine pcsetpblas(ictxt)
Definition pcblastst.f:1478
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
logical function lsame(ca, cb)
Definition tools.f:1724
real function slamch(cmach)
Definition tools.f:867