LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
iparam2stage.F
Go to the documentation of this file.
1*> \brief \b IPARAM2STAGE
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download IPARAM2STAGE + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/iparam2stage.F">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/iparam2stage.F">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/iparam2stage.F">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18* Definition:
19* ===========
20*
21* INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS,
22* NI, NBI, IBI, NXI )
23* #if defined(_OPENMP)
24* use omp_lib
25* #endif
26* IMPLICIT NONE
27*
28* .. Scalar Arguments ..
29* CHARACTER*( * ) NAME, OPTS
30* INTEGER ISPEC, NI, NBI, IBI, NXI
31*
32*> \par Purpose:
33* =============
34*>
35*> \verbatim
36*>
37*> This program sets problem and machine dependent parameters
38*> useful for xHETRD_2STAGE, xHETRD_HE2HB, xHETRD_HB2ST,
39*> xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD
40*> and related subroutines for eigenvalue problems.
41*> It is called whenever ILAENV is called with 17 <= ISPEC <= 21.
42*> It is called whenever ILAENV2STAGE is called with 1 <= ISPEC <= 5
43*> with a direct conversion ISPEC + 16.
44*> \endverbatim
45*
46* Arguments:
47* ==========
48*
49*> \param[in] ISPEC
50*> \verbatim
51*> ISPEC is integer scalar
52*> ISPEC specifies which tunable parameter IPARAM2STAGE should
53*> return.
54*>
55*> ISPEC=17: the optimal blocksize nb for the reduction to
56*> BAND
57*>
58*> ISPEC=18: the optimal blocksize ib for the eigenvectors
59*> singular vectors update routine
60*>
61*> ISPEC=19: The length of the array that store the Housholder
62*> representation for the second stage
63*> Band to Tridiagonal or Bidiagonal
64*>
65*> ISPEC=20: The workspace needed for the routine in input.
66*>
67*> ISPEC=21: For future release.
68*> \endverbatim
69*>
70*> \param[in] NAME
71*> \verbatim
72*> NAME is character string
73*> Name of the calling subroutine
74*> \endverbatim
75*>
76*> \param[in] OPTS
77*> \verbatim
78*> OPTS is CHARACTER*(*)
79*> The character options to the subroutine NAME, concatenated
80*> into a single character string. For example, UPLO = 'U',
81*> TRANS = 'T', and DIAG = 'N' for a triangular routine would
82*> be specified as OPTS = 'UTN'.
83*> \endverbatim
84*>
85*> \param[in] NI
86*> \verbatim
87*> NI is INTEGER which is the size of the matrix
88*> \endverbatim
89*>
90*> \param[in] NBI
91*> \verbatim
92*> NBI is INTEGER which is the used in the reduction,
93*> (e.g., the size of the band), needed to compute workspace
94*> and LHOUS2.
95*> \endverbatim
96*>
97*> \param[in] IBI
98*> \verbatim
99*> IBI is INTEGER which represent the IB of the reduction,
100*> needed to compute workspace and LHOUS2.
101*> \endverbatim
102*>
103*> \param[in] NXI
104*> \verbatim
105*> NXI is INTEGER needed in the future release.
106*> \endverbatim
107*
108* Authors:
109* ========
110*
111*> \author Univ. of Tennessee
112*> \author Univ. of California Berkeley
113*> \author Univ. of Colorado Denver
114*> \author NAG Ltd.
115*
116*> \ingroup iparam2stage
117*
118*> \par Further Details:
119* =====================
120*>
121*> \verbatim
122*>
123*> Implemented by Azzam Haidar.
124*>
125*> All detail are available on technical report, SC11, SC13 papers.
126*>
127*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
128*> Parallel reduction to condensed forms for symmetric eigenvalue problems
129*> using aggregated fine-grained and memory-aware kernels. In Proceedings
130*> of 2011 International Conference for High Performance Computing,
131*> Networking, Storage and Analysis (SC '11), New York, NY, USA,
132*> Article 8 , 11 pages.
133*> http://doi.acm.org/10.1145/2063384.2063394
134*>
135*> A. Haidar, J. Kurzak, P. Luszczek, 2013.
136*> An improved parallel singular value algorithm and its implementation
137*> for multicore hardware, In Proceedings of 2013 International Conference
138*> for High Performance Computing, Networking, Storage and Analysis (SC '13).
139*> Denver, Colorado, USA, 2013.
140*> Article 90, 12 pages.
141*> http://doi.acm.org/10.1145/2503210.2503292
142*>
143*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
144*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
145*> calculations based on fine-grained memory aware tasks.
146*> International Journal of High Performance Computing Applications.
147*> Volume 28 Issue 2, Pages 196-209, May 2014.
148*> http://hpc.sagepub.com/content/28/2/196
149*>
150*> \endverbatim
151*>
152* =====================================================================
153 INTEGER FUNCTION iparam2stage( ISPEC, NAME, OPTS,
154 $ NI, NBI, IBI, NXI )
155#if defined(_OPENMP)
156 use omp_lib
157#endif
158 IMPLICIT NONE
159*
160* -- LAPACK auxiliary routine --
161* -- LAPACK is a software package provided by Univ. of Tennessee, --
162* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
163*
164* .. Scalar Arguments ..
165 CHARACTER*( * ) name, opts
166 INTEGER ispec, ni, nbi, ibi, nxi
167*
168* ================================================================
169* ..
170* .. Local Scalars ..
171 INTEGER i, ic, iz, kd, ib, lhous, lwork, nthreads,
172 $ factoptnb, qroptnb, lqoptnb
173 LOGICAL rprec, cprec
174 CHARACTER prec*1, algo*3, stag*5, subnam*12, vect*1
175* ..
176* .. Intrinsic Functions ..
177 INTRINSIC char, ichar, max
178* ..
179* .. External Functions ..
180 INTEGER ilaenv
181 LOGICAL lsame
182 EXTERNAL ilaenv, lsame
183* ..
184* .. Executable Statements ..
185*
186* Invalid value for ISPEC
187*
188 IF( (ispec.LT.17).OR.(ispec.GT.21) ) THEN
189 iparam2stage = -1
190 RETURN
191 ENDIF
192*
193* Get the number of threads
194*
195 nthreads = 1
196#if defined(_OPENMP)
197!$OMP PARALLEL
198 nthreads = omp_get_num_threads()
199!$OMP END PARALLEL
200#endif
201* WRITE(*,*) 'IPARAM VOICI NTHREADS ISPEC ',NTHREADS, ISPEC
202*
203 IF( ispec .NE. 19 ) THEN
204*
205* Convert NAME to upper case if the first character is lower case.
206*
207 iparam2stage = -1
208 subnam = name
209 ic = ichar( subnam( 1: 1 ) )
210 iz = ichar( 'Z' )
211 IF( iz.EQ.90 .OR. iz.EQ.122 ) THEN
212*
213* ASCII character set
214*
215 IF( ic.GE.97 .AND. ic.LE.122 ) THEN
216 subnam( 1: 1 ) = char( ic-32 )
217 DO 100 i = 2, 12
218 ic = ichar( subnam( i: i ) )
219 IF( ic.GE.97 .AND. ic.LE.122 )
220 $ subnam( i: i ) = char( ic-32 )
221 100 CONTINUE
222 END IF
223*
224 ELSE IF( iz.EQ.233 .OR. iz.EQ.169 ) THEN
225*
226* EBCDIC character set
227*
228 IF( ( ic.GE.129 .AND. ic.LE.137 ) .OR.
229 $ ( ic.GE.145 .AND. ic.LE.153 ) .OR.
230 $ ( ic.GE.162 .AND. ic.LE.169 ) ) THEN
231 subnam( 1: 1 ) = char( ic+64 )
232 DO 110 i = 2, 12
233 ic = ichar( subnam( i: i ) )
234 IF( ( ic.GE.129 .AND. ic.LE.137 ) .OR.
235 $ ( ic.GE.145 .AND. ic.LE.153 ) .OR.
236 $ ( ic.GE.162 .AND. ic.LE.169 ) )subnam( i:
237 $ i ) = char( ic+64 )
238 110 CONTINUE
239 END IF
240*
241 ELSE IF( iz.EQ.218 .OR. iz.EQ.250 ) THEN
242*
243* Prime machines: ASCII+128
244*
245 IF( ic.GE.225 .AND. ic.LE.250 ) THEN
246 subnam( 1: 1 ) = char( ic-32 )
247 DO 120 i = 2, 12
248 ic = ichar( subnam( i: i ) )
249 IF( ic.GE.225 .AND. ic.LE.250 )
250 $ subnam( i: i ) = char( ic-32 )
251 120 CONTINUE
252 END IF
253 END IF
254*
255 prec = subnam( 1: 1 )
256 algo = subnam( 4: 6 )
257 stag = subnam( 8:12 )
258 rprec = prec.EQ.'S' .OR. prec.EQ.'D'
259 cprec = prec.EQ.'C' .OR. prec.EQ.'Z'
260*
261* Invalid value for PRECISION
262*
263 IF( .NOT.( rprec .OR. cprec ) ) THEN
264 iparam2stage = -1
265 RETURN
266 ENDIF
267 ENDIF
268* WRITE(*,*),'RPREC,CPREC ',RPREC,CPREC,
269* $ ' ALGO ',ALGO,' STAGE ',STAG
270*
271*
272 IF (( ispec .EQ. 17 ) .OR. ( ispec .EQ. 18 )) THEN
273*
274* ISPEC = 17, 18: block size KD, IB
275* Could be also dependent from N but for now it
276* depend only on sequential or parallel
277*
278 IF( nthreads.GT.4 ) THEN
279 IF( cprec ) THEN
280 kd = 128
281 ib = 32
282 ELSE
283 kd = 160
284 ib = 40
285 ENDIF
286 ELSE IF( nthreads.GT.1 ) THEN
287 IF( cprec ) THEN
288 kd = 64
289 ib = 32
290 ELSE
291 kd = 64
292 ib = 32
293 ENDIF
294 ELSE
295 IF( cprec ) THEN
296 kd = 16
297 ib = 16
298 ELSE
299 kd = 32
300 ib = 16
301 ENDIF
302 ENDIF
303 IF( ispec.EQ.17 ) iparam2stage = kd
304 IF( ispec.EQ.18 ) iparam2stage = ib
305*
306 ELSE IF ( ispec .EQ. 19 ) THEN
307*
308* ISPEC = 19:
309* LHOUS length of the Houselholder representation
310* matrix (V,T) of the second stage. should be >= 1.
311*
312* Will add the VECT OPTION HERE next release
313 vect = opts(1:1)
314 IF( lsame( vect, 'N' ) ) THEN
315 lhous = max( 1, 4*ni )
316 ELSE
317* This is not correct, it need to call the ALGO and the stage2
318 lhous = max( 1, 4*ni ) + ibi
319 ENDIF
320 IF( lhous.GE.0 ) THEN
321 iparam2stage = lhous
322 ELSE
323 iparam2stage = -1
324 ENDIF
325*
326 ELSE IF ( ispec .EQ. 20 ) THEN
327*
328* ISPEC = 20: (21 for future use)
329* LWORK length of the workspace for
330* either or both stages for TRD and BRD. should be >= 1.
331* TRD:
332* TRD_stage 1: = LT + LW + LS1 + LS2
333* = LDT*KD + N*KD + N*MAX(KD,FACTOPTNB) + LDS2*KD
334* where LDT=LDS2=KD
335* = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
336* TRD_stage 2: = (2NB+1)*N + KD*NTHREADS
337* TRD_both : = max(stage1,stage2) + AB ( AB=(KD+1)*N )
338* = N*KD + N*max(KD+1,FACTOPTNB)
339* + max(2*KD*KD, KD*NTHREADS)
340* + (KD+1)*N
341 lwork = -1
342 subnam(1:1) = prec
343 subnam(2:6) = 'GEQRF'
344 qroptnb = ilaenv( 1, subnam, ' ', ni, nbi, -1, -1 )
345 subnam(2:6) = 'GELQF'
346 lqoptnb = ilaenv( 1, subnam, ' ', nbi, ni, -1, -1 )
347* Could be QR or LQ for TRD and the max for BRD
348 factoptnb = max(qroptnb, lqoptnb)
349 IF( algo.EQ.'TRD' ) THEN
350 IF( stag.EQ.'2STAG' ) THEN
351 lwork = ni*nbi + ni*max(nbi+1,factoptnb)
352 $ + max(2*nbi*nbi, nbi*nthreads)
353 $ + (nbi+1)*ni
354 ELSE IF( (stag.EQ.'HE2HB').OR.(stag.EQ.'SY2SB') ) THEN
355 lwork = ni*nbi + ni*max(nbi,factoptnb) + 2*nbi*nbi
356 ELSE IF( (stag.EQ.'HB2ST').OR.(stag.EQ.'SB2ST') ) THEN
357 lwork = (2*nbi+1)*ni + nbi*nthreads
358 ENDIF
359 ELSE IF( algo.EQ.'BRD' ) THEN
360 IF( stag.EQ.'2STAG' ) THEN
361 lwork = 2*ni*nbi + ni*max(nbi+1,factoptnb)
362 $ + max(2*nbi*nbi, nbi*nthreads)
363 $ + (nbi+1)*ni
364 ELSE IF( stag.EQ.'GE2GB' ) THEN
365 lwork = ni*nbi + ni*max(nbi,factoptnb) + 2*nbi*nbi
366 ELSE IF( stag.EQ.'GB2BD' ) THEN
367 lwork = (3*nbi+1)*ni + nbi*nthreads
368 ENDIF
369 ENDIF
370 lwork = max( 1, lwork )
371
372 IF( lwork.GT.0 ) THEN
373 iparam2stage = lwork
374 ELSE
375 iparam2stage = -1
376 ENDIF
377*
378 ELSE IF ( ispec .EQ. 21 ) THEN
379*
380* ISPEC = 21 for future use
381 iparam2stage = nxi
382 ENDIF
383*
384* ==== End of IPARAM2STAGE ====
385*
386 END
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:162
integer function iparam2stage(ispec, name, opts, ni, nbi, ibi, nxi)
IPARAM2STAGE
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48