LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
iparam2stage.F
Go to the documentation of this file.
1*> \brief \b IPARAM2STAGE
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> Download IPARAM2STAGE + dependencies
9*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/iparam2stage.F">
10*> [TGZ]</a>
11*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/iparam2stage.F">
12*> [ZIP]</a>
13*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/iparam2stage.F">
14*> [TXT]</a>
15*
16* Definition:
17* ===========
18*
19* INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS,
20* NI, NBI, IBI, NXI )
21* #if defined(_OPENMP)
22* use omp_lib
23* #endif
24* IMPLICIT NONE
25*
26* .. Scalar Arguments ..
27* CHARACTER*( * ) NAME, OPTS
28* INTEGER ISPEC, NI, NBI, IBI, NXI
29*
30*> \par Purpose:
31* =============
32*>
33*> \verbatim
34*>
35*> This program sets problem and machine dependent parameters
36*> useful for xHETRD_2STAGE, xHETRD_HE2HB, xHETRD_HB2ST,
37*> xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD
38*> and related subroutines for eigenvalue problems.
39*> It is called whenever ILAENV is called with 17 <= ISPEC <= 21.
40*> It is called whenever ILAENV2STAGE is called with 1 <= ISPEC <= 5
41*> with a direct conversion ISPEC + 16.
42*> \endverbatim
43*
44* Arguments:
45* ==========
46*
47*> \param[in] ISPEC
48*> \verbatim
49*> ISPEC is integer scalar
50*> ISPEC specifies which tunable parameter IPARAM2STAGE should
51*> return.
52*>
53*> ISPEC=17: the optimal blocksize nb for the reduction to
54*> BAND
55*>
56*> ISPEC=18: the optimal blocksize ib for the eigenvectors
57*> singular vectors update routine
58*>
59*> ISPEC=19: The length of the array that store the Housholder
60*> representation for the second stage
61*> Band to Tridiagonal or Bidiagonal
62*>
63*> ISPEC=20: The workspace needed for the routine in input.
64*>
65*> ISPEC=21: For future release.
66*> \endverbatim
67*>
68*> \param[in] NAME
69*> \verbatim
70*> NAME is character string
71*> Name of the calling subroutine
72*> \endverbatim
73*>
74*> \param[in] OPTS
75*> \verbatim
76*> OPTS is CHARACTER*(*)
77*> The character options to the subroutine NAME, concatenated
78*> into a single character string. For example, UPLO = 'U',
79*> TRANS = 'T', and DIAG = 'N' for a triangular routine would
80*> be specified as OPTS = 'UTN'.
81*> \endverbatim
82*>
83*> \param[in] NI
84*> \verbatim
85*> NI is INTEGER which is the size of the matrix
86*> \endverbatim
87*>
88*> \param[in] NBI
89*> \verbatim
90*> NBI is INTEGER which is the used in the reduction,
91*> (e.g., the size of the band), needed to compute workspace
92*> and LHOUS2.
93*> \endverbatim
94*>
95*> \param[in] IBI
96*> \verbatim
97*> IBI is INTEGER which represent the IB of the reduction,
98*> needed to compute workspace and LHOUS2.
99*> \endverbatim
100*>
101*> \param[in] NXI
102*> \verbatim
103*> NXI is INTEGER needed in the future release.
104*> \endverbatim
105*
106* Authors:
107* ========
108*
109*> \author Univ. of Tennessee
110*> \author Univ. of California Berkeley
111*> \author Univ. of Colorado Denver
112*> \author NAG Ltd.
113*
114*> \ingroup iparam2stage
115*
116*> \par Further Details:
117* =====================
118*>
119*> \verbatim
120*>
121*> Implemented by Azzam Haidar.
122*>
123*> All detail are available on technical report, SC11, SC13 papers.
124*>
125*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
126*> Parallel reduction to condensed forms for symmetric eigenvalue problems
127*> using aggregated fine-grained and memory-aware kernels. In Proceedings
128*> of 2011 International Conference for High Performance Computing,
129*> Networking, Storage and Analysis (SC '11), New York, NY, USA,
130*> Article 8 , 11 pages.
131*> http://doi.acm.org/10.1145/2063384.2063394
132*>
133*> A. Haidar, J. Kurzak, P. Luszczek, 2013.
134*> An improved parallel singular value algorithm and its implementation
135*> for multicore hardware, In Proceedings of 2013 International Conference
136*> for High Performance Computing, Networking, Storage and Analysis (SC '13).
137*> Denver, Colorado, USA, 2013.
138*> Article 90, 12 pages.
139*> http://doi.acm.org/10.1145/2503210.2503292
140*>
141*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
142*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
143*> calculations based on fine-grained memory aware tasks.
144*> International Journal of High Performance Computing Applications.
145*> Volume 28 Issue 2, Pages 196-209, May 2014.
146*> http://hpc.sagepub.com/content/28/2/196
147*>
148*> \endverbatim
149*>
150* =====================================================================
151 INTEGER FUNCTION iparam2stage( ISPEC, NAME, OPTS,
152 $ NI, NBI, IBI, NXI )
153#if defined(_OPENMP)
154 use omp_lib
155#endif
156 IMPLICIT NONE
157*
158* -- LAPACK auxiliary routine --
159* -- LAPACK is a software package provided by Univ. of Tennessee, --
160* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
161*
162* .. Scalar Arguments ..
163 CHARACTER*( * ) name, opts
164 INTEGER ispec, ni, nbi, ibi, nxi
165*
166* ================================================================
167* ..
168* .. Local Scalars ..
169 INTEGER i, ic, iz, kd, ib, lhous, lwork, nthreads,
170 $ factoptnb, qroptnb, lqoptnb
171 LOGICAL rprec, cprec
172 CHARACTER prec*1, algo*3, stag*5, subnam*12, vect*1
173* ..
174* .. Intrinsic Functions ..
175 INTRINSIC char, ichar, max
176* ..
177* .. External Functions ..
178 INTEGER ilaenv
179 LOGICAL lsame
180 EXTERNAL ilaenv, lsame
181* ..
182* .. Executable Statements ..
183*
184* Invalid value for ISPEC
185*
186 IF( (ispec.LT.17).OR.(ispec.GT.21) ) THEN
187 iparam2stage = -1
188 RETURN
189 ENDIF
190*
191* Get the number of threads
192*
193 nthreads = 1
194#if defined(_OPENMP)
195!$OMP PARALLEL
196 nthreads = omp_get_num_threads()
197!$OMP END PARALLEL
198#endif
199* WRITE(*,*) 'IPARAM VOICI NTHREADS ISPEC ',NTHREADS, ISPEC
200*
201 IF( ispec .NE. 19 ) THEN
202*
203* Convert NAME to upper case if the first character is lower case.
204*
205 iparam2stage = -1
206 subnam = name
207 ic = ichar( subnam( 1: 1 ) )
208 iz = ichar( 'Z' )
209 IF( iz.EQ.90 .OR. iz.EQ.122 ) THEN
210*
211* ASCII character set
212*
213 IF( ic.GE.97 .AND. ic.LE.122 ) THEN
214 subnam( 1: 1 ) = char( ic-32 )
215 DO 100 i = 2, 12
216 ic = ichar( subnam( i: i ) )
217 IF( ic.GE.97 .AND. ic.LE.122 )
218 $ subnam( i: i ) = char( ic-32 )
219 100 CONTINUE
220 END IF
221*
222 ELSE IF( iz.EQ.233 .OR. iz.EQ.169 ) THEN
223*
224* EBCDIC character set
225*
226 IF( ( ic.GE.129 .AND. ic.LE.137 ) .OR.
227 $ ( ic.GE.145 .AND. ic.LE.153 ) .OR.
228 $ ( ic.GE.162 .AND. ic.LE.169 ) ) THEN
229 subnam( 1: 1 ) = char( ic+64 )
230 DO 110 i = 2, 12
231 ic = ichar( subnam( i: i ) )
232 IF( ( ic.GE.129 .AND. ic.LE.137 ) .OR.
233 $ ( ic.GE.145 .AND. ic.LE.153 ) .OR.
234 $ ( ic.GE.162 .AND. ic.LE.169 ) )subnam( i:
235 $ i ) = char( ic+64 )
236 110 CONTINUE
237 END IF
238*
239 ELSE IF( iz.EQ.218 .OR. iz.EQ.250 ) THEN
240*
241* Prime machines: ASCII+128
242*
243 IF( ic.GE.225 .AND. ic.LE.250 ) THEN
244 subnam( 1: 1 ) = char( ic-32 )
245 DO 120 i = 2, 12
246 ic = ichar( subnam( i: i ) )
247 IF( ic.GE.225 .AND. ic.LE.250 )
248 $ subnam( i: i ) = char( ic-32 )
249 120 CONTINUE
250 END IF
251 END IF
252*
253 prec = subnam( 1: 1 )
254 algo = subnam( 4: 6 )
255 stag = subnam( 8:12 )
256 rprec = prec.EQ.'S' .OR. prec.EQ.'D'
257 cprec = prec.EQ.'C' .OR. prec.EQ.'Z'
258*
259* Invalid value for PRECISION
260*
261 IF( .NOT.( rprec .OR. cprec ) ) THEN
262 iparam2stage = -1
263 RETURN
264 ENDIF
265 ENDIF
266* WRITE(*,*),'RPREC,CPREC ',RPREC,CPREC,
267* $ ' ALGO ',ALGO,' STAGE ',STAG
268*
269*
270 IF (( ispec .EQ. 17 ) .OR. ( ispec .EQ. 18 )) THEN
271*
272* ISPEC = 17, 18: block size KD, IB
273* Could be also dependent from N but for now it
274* depend only on sequential or parallel
275*
276 IF( nthreads.GT.4 ) THEN
277 IF( cprec ) THEN
278 kd = 128
279 ib = 32
280 ELSE
281 kd = 160
282 ib = 40
283 ENDIF
284 ELSE IF( nthreads.GT.1 ) THEN
285 IF( cprec ) THEN
286 kd = 64
287 ib = 32
288 ELSE
289 kd = 64
290 ib = 32
291 ENDIF
292 ELSE
293 IF( cprec ) THEN
294 kd = 16
295 ib = 16
296 ELSE
297 kd = 32
298 ib = 16
299 ENDIF
300 ENDIF
301 IF( ispec.EQ.17 ) iparam2stage = kd
302 IF( ispec.EQ.18 ) iparam2stage = ib
303*
304 ELSE IF ( ispec .EQ. 19 ) THEN
305*
306* ISPEC = 19:
307* LHOUS length of the Houselholder representation
308* matrix (V,T) of the second stage. should be >= 1.
309*
310* Will add the VECT OPTION HERE next release
311 vect = opts(1:1)
312 IF( lsame( vect, 'N' ) ) THEN
313 lhous = max( 1, 4*ni )
314 ELSE
315* This is not correct, it need to call the ALGO and the stage2
316 lhous = max( 1, 4*ni ) + ibi
317 ENDIF
318 IF( lhous.GE.0 ) THEN
319 iparam2stage = lhous
320 ELSE
321 iparam2stage = -1
322 ENDIF
323*
324 ELSE IF ( ispec .EQ. 20 ) THEN
325*
326* ISPEC = 20: (21 for future use)
327* LWORK length of the workspace for
328* either or both stages for TRD and BRD. should be >= 1.
329* TRD:
330* TRD_stage 1: = LT + LW + LS1 + LS2
331* = LDT*KD + N*KD + N*MAX(KD,FACTOPTNB) + LDS2*KD
332* where LDT=LDS2=KD
333* = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
334* TRD_stage 2: = (2NB+1)*N + KD*NTHREADS
335* TRD_both : = max(stage1,stage2) + AB ( AB=(KD+1)*N )
336* = N*KD + N*max(KD+1,FACTOPTNB)
337* + max(2*KD*KD, KD*NTHREADS)
338* + (KD+1)*N
339 lwork = -1
340 subnam(1:1) = prec
341 subnam(2:6) = 'GEQRF'
342 qroptnb = ilaenv( 1, subnam, ' ', ni, nbi, -1, -1 )
343 subnam(2:6) = 'GELQF'
344 lqoptnb = ilaenv( 1, subnam, ' ', nbi, ni, -1, -1 )
345* Could be QR or LQ for TRD and the max for BRD
346 factoptnb = max(qroptnb, lqoptnb)
347 IF( algo.EQ.'TRD' ) THEN
348 IF( stag.EQ.'2STAG' ) THEN
349 lwork = ni*nbi + ni*max(nbi+1,factoptnb)
350 $ + max(2*nbi*nbi, nbi*nthreads)
351 $ + (nbi+1)*ni
352 ELSE IF( (stag.EQ.'HE2HB').OR.(stag.EQ.'SY2SB') ) THEN
353 lwork = ni*nbi + ni*max(nbi,factoptnb) + 2*nbi*nbi
354 ELSE IF( (stag.EQ.'HB2ST').OR.(stag.EQ.'SB2ST') ) THEN
355 lwork = (2*nbi+1)*ni + nbi*nthreads
356 ENDIF
357 ELSE IF( algo.EQ.'BRD' ) THEN
358 IF( stag.EQ.'2STAG' ) THEN
359 lwork = 2*ni*nbi + ni*max(nbi+1,factoptnb)
360 $ + max(2*nbi*nbi, nbi*nthreads)
361 $ + (nbi+1)*ni
362 ELSE IF( stag.EQ.'GE2GB' ) THEN
363 lwork = ni*nbi + ni*max(nbi,factoptnb) + 2*nbi*nbi
364 ELSE IF( stag.EQ.'GB2BD' ) THEN
365 lwork = (3*nbi+1)*ni + nbi*nthreads
366 ENDIF
367 ENDIF
368 lwork = max( 1, lwork )
369
370 IF( lwork.GT.0 ) THEN
371 iparam2stage = lwork
372 ELSE
373 iparam2stage = -1
374 ENDIF
375*
376 ELSE IF ( ispec .EQ. 21 ) THEN
377*
378* ISPEC = 21 for future use
379 iparam2stage = nxi
380 ENDIF
381*
382* ==== End of IPARAM2STAGE ====
383*
384 END
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:160
integer function iparam2stage(ispec, name, opts, ni, nbi, ibi, nxi)
IPARAM2STAGE
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48