LAPACK 3.11.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
iparam2stage.F
Go to the documentation of this file.
1*> \brief \b IPARAM2STAGE
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download IPARAM2STAGE + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/iparam2stage.F">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/iparam2stage.F">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/iparam2stage.F">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18* Definition:
19* ===========
20*
21* INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS,
22* NI, NBI, IBI, NXI )
23* #if defined(_OPENMP)
24* use omp_lib
25* #endif
26* IMPLICIT NONE
27*
28* .. Scalar Arguments ..
29* CHARACTER*( * ) NAME, OPTS
30* INTEGER ISPEC, NI, NBI, IBI, NXI
31*
32*> \par Purpose:
33* =============
34*>
35*> \verbatim
36*>
37*> This program sets problem and machine dependent parameters
38*> useful for xHETRD_2STAGE, xHETRD_HE2HB, xHETRD_HB2ST,
39*> xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD
40*> and related subroutines for eigenvalue problems.
41*> It is called whenever ILAENV is called with 17 <= ISPEC <= 21.
42*> It is called whenever ILAENV2STAGE is called with 1 <= ISPEC <= 5
43*> with a direct conversion ISPEC + 16.
44*> \endverbatim
45*
46* Arguments:
47* ==========
48*
49*> \param[in] ISPEC
50*> \verbatim
51*> ISPEC is integer scalar
52*> ISPEC specifies which tunable parameter IPARAM2STAGE should
53*> return.
54*>
55*> ISPEC=17: the optimal blocksize nb for the reduction to
56*> BAND
57*>
58*> ISPEC=18: the optimal blocksize ib for the eigenvectors
59*> singular vectors update routine
60*>
61*> ISPEC=19: The length of the array that store the Housholder
62*> representation for the second stage
63*> Band to Tridiagonal or Bidiagonal
64*>
65*> ISPEC=20: The workspace needed for the routine in input.
66*>
67*> ISPEC=21: For future release.
68*> \endverbatim
69*>
70*> \param[in] NAME
71*> \verbatim
72*> NAME is character string
73*> Name of the calling subroutine
74*> \endverbatim
75*>
76*> \param[in] OPTS
77*> \verbatim
78*> OPTS is CHARACTER*(*)
79*> The character options to the subroutine NAME, concatenated
80*> into a single character string. For example, UPLO = 'U',
81*> TRANS = 'T', and DIAG = 'N' for a triangular routine would
82*> be specified as OPTS = 'UTN'.
83*> \endverbatim
84*>
85*> \param[in] NI
86*> \verbatim
87*> NI is INTEGER which is the size of the matrix
88*> \endverbatim
89*>
90*> \param[in] NBI
91*> \verbatim
92*> NBI is INTEGER which is the used in the reduciton,
93*> (e.g., the size of the band), needed to compute workspace
94*> and LHOUS2.
95*> \endverbatim
96*>
97*> \param[in] IBI
98*> \verbatim
99*> IBI is INTEGER which represent the IB of the reduciton,
100*> needed to compute workspace and LHOUS2.
101*> \endverbatim
102*>
103*> \param[in] NXI
104*> \verbatim
105*> NXI is INTEGER needed in the future release.
106*> \endverbatim
107*
108* Authors:
109* ========
110*
111*> \author Univ. of Tennessee
112*> \author Univ. of California Berkeley
113*> \author Univ. of Colorado Denver
114*> \author NAG Ltd.
115*
116*> \ingroup auxOTHERauxiliary
117*
118*> \par Further Details:
119* =====================
120*>
121*> \verbatim
122*>
123*> Implemented by Azzam Haidar.
124*>
125*> All detail are available on technical report, SC11, SC13 papers.
126*>
127*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
128*> Parallel reduction to condensed forms for symmetric eigenvalue problems
129*> using aggregated fine-grained and memory-aware kernels. In Proceedings
130*> of 2011 International Conference for High Performance Computing,
131*> Networking, Storage and Analysis (SC '11), New York, NY, USA,
132*> Article 8 , 11 pages.
133*> http://doi.acm.org/10.1145/2063384.2063394
134*>
135*> A. Haidar, J. Kurzak, P. Luszczek, 2013.
136*> An improved parallel singular value algorithm and its implementation
137*> for multicore hardware, In Proceedings of 2013 International Conference
138*> for High Performance Computing, Networking, Storage and Analysis (SC '13).
139*> Denver, Colorado, USA, 2013.
140*> Article 90, 12 pages.
141*> http://doi.acm.org/10.1145/2503210.2503292
142*>
143*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
144*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
145*> calculations based on fine-grained memory aware tasks.
146*> International Journal of High Performance Computing Applications.
147*> Volume 28 Issue 2, Pages 196-209, May 2014.
148*> http://hpc.sagepub.com/content/28/2/196
149*>
150*> \endverbatim
151*>
152* =====================================================================
153 INTEGER FUNCTION iparam2stage( ISPEC, NAME, OPTS,
154 $ NI, NBI, IBI, NXI )
155#if defined(_OPENMP)
156 use omp_lib
157#endif
158 IMPLICIT NONE
159*
160* -- LAPACK auxiliary routine --
161* -- LAPACK is a software package provided by Univ. of Tennessee, --
162* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
163*
164* .. Scalar Arguments ..
165 CHARACTER*( * ) name, opts
166 INTEGER ispec, ni, nbi, ibi, nxi
167*
168* ================================================================
169* ..
170* .. Local Scalars ..
171 INTEGER i, ic, iz, kd, ib, lhous, lwork, nthreads,
172 $ factoptnb, qroptnb, lqoptnb
173 LOGICAL rprec, cprec
174 CHARACTER prec*1, algo*3, stag*5, subnam*12, vect*1
175* ..
176* .. Intrinsic Functions ..
177 INTRINSIC char, ichar, max
178* ..
179* .. External Functions ..
180 INTEGER ilaenv
181 EXTERNAL ilaenv
182* ..
183* .. Executable Statements ..
184*
185* Invalid value for ISPEC
186*
187 IF( (ispec.LT.17).OR.(ispec.GT.21) ) THEN
188 iparam2stage = -1
189 RETURN
190 ENDIF
191*
192* Get the number of threads
193*
194 nthreads = 1
195#if defined(_OPENMP)
196!$OMP PARALLEL
197 nthreads = omp_get_num_threads()
198!$OMP END PARALLEL
199#endif
200* WRITE(*,*) 'IPARAM VOICI NTHREADS ISPEC ',NTHREADS, ISPEC
201*
202 IF( ispec .NE. 19 ) THEN
203*
204* Convert NAME to upper case if the first character is lower case.
205*
206 iparam2stage = -1
207 subnam = name
208 ic = ichar( subnam( 1: 1 ) )
209 iz = ichar( 'Z' )
210 IF( iz.EQ.90 .OR. iz.EQ.122 ) THEN
211*
212* ASCII character set
213*
214 IF( ic.GE.97 .AND. ic.LE.122 ) THEN
215 subnam( 1: 1 ) = char( ic-32 )
216 DO 100 i = 2, 12
217 ic = ichar( subnam( i: i ) )
218 IF( ic.GE.97 .AND. ic.LE.122 )
219 $ subnam( i: i ) = char( ic-32 )
220 100 CONTINUE
221 END IF
222*
223 ELSE IF( iz.EQ.233 .OR. iz.EQ.169 ) THEN
224*
225* EBCDIC character set
226*
227 IF( ( ic.GE.129 .AND. ic.LE.137 ) .OR.
228 $ ( ic.GE.145 .AND. ic.LE.153 ) .OR.
229 $ ( ic.GE.162 .AND. ic.LE.169 ) ) THEN
230 subnam( 1: 1 ) = char( ic+64 )
231 DO 110 i = 2, 12
232 ic = ichar( subnam( i: i ) )
233 IF( ( ic.GE.129 .AND. ic.LE.137 ) .OR.
234 $ ( ic.GE.145 .AND. ic.LE.153 ) .OR.
235 $ ( ic.GE.162 .AND. ic.LE.169 ) )subnam( i:
236 $ i ) = char( ic+64 )
237 110 CONTINUE
238 END IF
239*
240 ELSE IF( iz.EQ.218 .OR. iz.EQ.250 ) THEN
241*
242* Prime machines: ASCII+128
243*
244 IF( ic.GE.225 .AND. ic.LE.250 ) THEN
245 subnam( 1: 1 ) = char( ic-32 )
246 DO 120 i = 2, 12
247 ic = ichar( subnam( i: i ) )
248 IF( ic.GE.225 .AND. ic.LE.250 )
249 $ subnam( i: i ) = char( ic-32 )
250 120 CONTINUE
251 END IF
252 END IF
253*
254 prec = subnam( 1: 1 )
255 algo = subnam( 4: 6 )
256 stag = subnam( 8:12 )
257 rprec = prec.EQ.'S' .OR. prec.EQ.'D'
258 cprec = prec.EQ.'C' .OR. prec.EQ.'Z'
259*
260* Invalid value for PRECISION
261*
262 IF( .NOT.( rprec .OR. cprec ) ) THEN
263 iparam2stage = -1
264 RETURN
265 ENDIF
266 ENDIF
267* WRITE(*,*),'RPREC,CPREC ',RPREC,CPREC,
268* $ ' ALGO ',ALGO,' STAGE ',STAG
269*
270*
271 IF (( ispec .EQ. 17 ) .OR. ( ispec .EQ. 18 )) THEN
272*
273* ISPEC = 17, 18: block size KD, IB
274* Could be also dependent from N but for now it
275* depend only on sequential or parallel
276*
277 IF( nthreads.GT.4 ) THEN
278 IF( cprec ) THEN
279 kd = 128
280 ib = 32
281 ELSE
282 kd = 160
283 ib = 40
284 ENDIF
285 ELSE IF( nthreads.GT.1 ) THEN
286 IF( cprec ) THEN
287 kd = 64
288 ib = 32
289 ELSE
290 kd = 64
291 ib = 32
292 ENDIF
293 ELSE
294 IF( cprec ) THEN
295 kd = 16
296 ib = 16
297 ELSE
298 kd = 32
299 ib = 16
300 ENDIF
301 ENDIF
302 IF( ispec.EQ.17 ) iparam2stage = kd
303 IF( ispec.EQ.18 ) iparam2stage = ib
304*
305 ELSE IF ( ispec .EQ. 19 ) THEN
306*
307* ISPEC = 19:
308* LHOUS length of the Houselholder representation
309* matrix (V,T) of the second stage. should be >= 1.
310*
311* Will add the VECT OPTION HERE next release
312 vect = opts(1:1)
313 IF( vect.EQ.'N' ) THEN
314 lhous = max( 1, 4*ni )
315 ELSE
316* This is not correct, it need to call the ALGO and the stage2
317 lhous = max( 1, 4*ni ) + ibi
318 ENDIF
319 IF( lhous.GE.0 ) THEN
320 iparam2stage = lhous
321 ELSE
322 iparam2stage = -1
323 ENDIF
324*
325 ELSE IF ( ispec .EQ. 20 ) THEN
326*
327* ISPEC = 20: (21 for future use)
328* LWORK length of the workspace for
329* either or both stages for TRD and BRD. should be >= 1.
330* TRD:
331* TRD_stage 1: = LT + LW + LS1 + LS2
332* = LDT*KD + N*KD + N*MAX(KD,FACTOPTNB) + LDS2*KD
333* where LDT=LDS2=KD
334* = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
335* TRD_stage 2: = (2NB+1)*N + KD*NTHREADS
336* TRD_both : = max(stage1,stage2) + AB ( AB=(KD+1)*N )
337* = N*KD + N*max(KD+1,FACTOPTNB)
338* + max(2*KD*KD, KD*NTHREADS)
339* + (KD+1)*N
340 lwork = -1
341 subnam(1:1) = prec
342 subnam(2:6) = 'GEQRF'
343 qroptnb = ilaenv( 1, subnam, ' ', ni, nbi, -1, -1 )
344 subnam(2:6) = 'GELQF'
345 lqoptnb = ilaenv( 1, subnam, ' ', nbi, ni, -1, -1 )
346* Could be QR or LQ for TRD and the max for BRD
347 factoptnb = max(qroptnb, lqoptnb)
348 IF( algo.EQ.'TRD' ) THEN
349 IF( stag.EQ.'2STAG' ) THEN
350 lwork = ni*nbi + ni*max(nbi+1,factoptnb)
351 $ + max(2*nbi*nbi, nbi*nthreads)
352 $ + (nbi+1)*ni
353 ELSE IF( (stag.EQ.'HE2HB').OR.(stag.EQ.'SY2SB') ) THEN
354 lwork = ni*nbi + ni*max(nbi,factoptnb) + 2*nbi*nbi
355 ELSE IF( (stag.EQ.'HB2ST').OR.(stag.EQ.'SB2ST') ) THEN
356 lwork = (2*nbi+1)*ni + nbi*nthreads
357 ENDIF
358 ELSE IF( algo.EQ.'BRD' ) THEN
359 IF( stag.EQ.'2STAG' ) THEN
360 lwork = 2*ni*nbi + ni*max(nbi+1,factoptnb)
361 $ + max(2*nbi*nbi, nbi*nthreads)
362 $ + (nbi+1)*ni
363 ELSE IF( stag.EQ.'GE2GB' ) THEN
364 lwork = ni*nbi + ni*max(nbi,factoptnb) + 2*nbi*nbi
365 ELSE IF( stag.EQ.'GB2BD' ) THEN
366 lwork = (3*nbi+1)*ni + nbi*nthreads
367 ENDIF
368 ENDIF
369 lwork = max( 1, lwork )
370
371 IF( lwork.GT.0 ) THEN
372 iparam2stage = lwork
373 ELSE
374 iparam2stage = -1
375 ENDIF
376*
377 ELSE IF ( ispec .EQ. 21 ) THEN
378*
379* ISPEC = 21 for future use
380 iparam2stage = nxi
381 ENDIF
382*
383* ==== End of IPARAM2STAGE ====
384*
385 END
integer function ilaenv(ISPEC, NAME, OPTS, N1, N2, N3, N4)
ILAENV
Definition: ilaenv.f:162
integer function iparam2stage(ISPEC, NAME, OPTS, NI, NBI, IBI, NXI)
IPARAM2STAGE
Definition: iparam2stage.F:155