LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ iparam2stage()

integer function iparam2stage ( integer ispec,
character*( * ) name,
character*( * ) opts,
integer ni,
integer nbi,
integer ibi,
integer nxi )

IPARAM2STAGE

Download IPARAM2STAGE + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!>      This program sets problem and machine dependent parameters
!>      useful for xHETRD_2STAGE, xHETRD_HE2HB, xHETRD_HB2ST,
!>      xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD
!>      and related subroutines for eigenvalue problems.
!>      It is called whenever ILAENV is called with 17 <= ISPEC <= 21.
!>      It is called whenever ILAENV2STAGE is called with 1 <= ISPEC <= 5
!>      with a direct conversion ISPEC + 16.
!> 
Parameters
[in]ISPEC
!>          ISPEC is integer scalar
!>              ISPEC specifies which tunable parameter IPARAM2STAGE should
!>              return.
!>
!>              ISPEC=17: the optimal blocksize nb for the reduction to
!>                        BAND
!>
!>              ISPEC=18: the optimal blocksize ib for the eigenvectors
!>                        singular vectors update routine
!>
!>              ISPEC=19: The length of the array that store the Housholder
!>                        representation for the second stage
!>                        Band to Tridiagonal or Bidiagonal
!>
!>              ISPEC=20: The workspace needed for the routine in input.
!>
!>              ISPEC=21: For future release.
!> 
[in]NAME
!>          NAME is character string
!>               Name of the calling subroutine
!> 
[in]OPTS
!>          OPTS is CHARACTER*(*)
!>          The character options to the subroutine NAME, concatenated
!>          into a single character string.  For example, UPLO = 'U',
!>          TRANS = 'T', and DIAG = 'N' for a triangular routine would
!>          be specified as OPTS = 'UTN'.
!> 
[in]NI
!>          NI is INTEGER which is the size of the matrix
!> 
[in]NBI
!>          NBI is INTEGER which is the used in the reduction,
!>          (e.g., the size of the band), needed to compute workspace
!>          and LHOUS2.
!> 
[in]IBI
!>          IBI is INTEGER which represent the IB of the reduction,
!>          needed to compute workspace and LHOUS2.
!> 
[in]NXI
!>          NXI is INTEGER needed in the future release.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Further Details:
!>
!>  Implemented by Azzam Haidar.
!>
!>  All detail are available on technical report, SC11, SC13 papers.
!>
!>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
!>  Parallel reduction to condensed forms for symmetric eigenvalue problems
!>  using aggregated fine-grained and memory-aware kernels. In Proceedings
!>  of 2011 International Conference for High Performance Computing,
!>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
!>  Article 8 , 11 pages.
!>  http://doi.acm.org/10.1145/2063384.2063394
!>
!>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
!>  An improved parallel singular value algorithm and its implementation
!>  for multicore hardware, In Proceedings of 2013 International Conference
!>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
!>  Denver, Colorado, USA, 2013.
!>  Article 90, 12 pages.
!>  http://doi.acm.org/10.1145/2503210.2503292
!>
!>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
!>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure
!>  calculations based on fine-grained memory aware tasks.
!>  International Journal of High Performance Computing Applications.
!>  Volume 28 Issue 2, Pages 196-209, May 2014.
!>  http://hpc.sagepub.com/content/28/2/196
!>
!> 

Definition at line 151 of file iparam2stage.F.

153#if defined(_OPENMP)
154 use omp_lib
155#endif
156 IMPLICIT NONE
157*
158* -- LAPACK auxiliary routine --
159* -- LAPACK is a software package provided by Univ. of Tennessee, --
160* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
161*
162* .. Scalar Arguments ..
163 CHARACTER*( * ) NAME, OPTS
164 INTEGER ISPEC, NI, NBI, IBI, NXI
165*
166* ================================================================
167* ..
168* .. Local Scalars ..
169 INTEGER I, IC, IZ, KD, IB, LHOUS, LWORK, NTHREADS,
170 $ FACTOPTNB, QROPTNB, LQOPTNB
171 LOGICAL RPREC, CPREC
172 CHARACTER PREC*1, ALGO*3, STAG*5, SUBNAM*12, VECT*1
173* ..
174* .. Intrinsic Functions ..
175 INTRINSIC char, ichar, max
176* ..
177* .. External Functions ..
178 INTEGER ILAENV
179 LOGICAL LSAME
180 EXTERNAL ilaenv, lsame
181* ..
182* .. Executable Statements ..
183*
184* Invalid value for ISPEC
185*
186 IF( (ispec.LT.17).OR.(ispec.GT.21) ) THEN
187 iparam2stage = -1
188 RETURN
189 ENDIF
190*
191* Get the number of threads
192*
193 nthreads = 1
194#if defined(_OPENMP)
195!$OMP PARALLEL
196 nthreads = omp_get_num_threads()
197!$OMP END PARALLEL
198#endif
199* WRITE(*,*) 'IPARAM VOICI NTHREADS ISPEC ',NTHREADS, ISPEC
200*
201 IF( ispec .NE. 19 ) THEN
202*
203* Convert NAME to upper case if the first character is lower case.
204*
205 iparam2stage = -1
206 subnam = name
207 ic = ichar( subnam( 1: 1 ) )
208 iz = ichar( 'Z' )
209 IF( iz.EQ.90 .OR. iz.EQ.122 ) THEN
210*
211* ASCII character set
212*
213 IF( ic.GE.97 .AND. ic.LE.122 ) THEN
214 subnam( 1: 1 ) = char( ic-32 )
215 DO 100 i = 2, 12
216 ic = ichar( subnam( i: i ) )
217 IF( ic.GE.97 .AND. ic.LE.122 )
218 $ subnam( i: i ) = char( ic-32 )
219 100 CONTINUE
220 END IF
221*
222 ELSE IF( iz.EQ.233 .OR. iz.EQ.169 ) THEN
223*
224* EBCDIC character set
225*
226 IF( ( ic.GE.129 .AND. ic.LE.137 ) .OR.
227 $ ( ic.GE.145 .AND. ic.LE.153 ) .OR.
228 $ ( ic.GE.162 .AND. ic.LE.169 ) ) THEN
229 subnam( 1: 1 ) = char( ic+64 )
230 DO 110 i = 2, 12
231 ic = ichar( subnam( i: i ) )
232 IF( ( ic.GE.129 .AND. ic.LE.137 ) .OR.
233 $ ( ic.GE.145 .AND. ic.LE.153 ) .OR.
234 $ ( ic.GE.162 .AND. ic.LE.169 ) )subnam( i:
235 $ i ) = char( ic+64 )
236 110 CONTINUE
237 END IF
238*
239 ELSE IF( iz.EQ.218 .OR. iz.EQ.250 ) THEN
240*
241* Prime machines: ASCII+128
242*
243 IF( ic.GE.225 .AND. ic.LE.250 ) THEN
244 subnam( 1: 1 ) = char( ic-32 )
245 DO 120 i = 2, 12
246 ic = ichar( subnam( i: i ) )
247 IF( ic.GE.225 .AND. ic.LE.250 )
248 $ subnam( i: i ) = char( ic-32 )
249 120 CONTINUE
250 END IF
251 END IF
252*
253 prec = subnam( 1: 1 )
254 algo = subnam( 4: 6 )
255 stag = subnam( 8:12 )
256 rprec = prec.EQ.'S' .OR. prec.EQ.'D'
257 cprec = prec.EQ.'C' .OR. prec.EQ.'Z'
258*
259* Invalid value for PRECISION
260*
261 IF( .NOT.( rprec .OR. cprec ) ) THEN
262 iparam2stage = -1
263 RETURN
264 ENDIF
265 ENDIF
266* WRITE(*,*),'RPREC,CPREC ',RPREC,CPREC,
267* $ ' ALGO ',ALGO,' STAGE ',STAG
268*
269*
270 IF (( ispec .EQ. 17 ) .OR. ( ispec .EQ. 18 )) THEN
271*
272* ISPEC = 17, 18: block size KD, IB
273* Could be also dependent from N but for now it
274* depend only on sequential or parallel
275*
276 IF( nthreads.GT.4 ) THEN
277 IF( cprec ) THEN
278 kd = 128
279 ib = 32
280 ELSE
281 kd = 160
282 ib = 40
283 ENDIF
284 ELSE IF( nthreads.GT.1 ) THEN
285 IF( cprec ) THEN
286 kd = 64
287 ib = 32
288 ELSE
289 kd = 64
290 ib = 32
291 ENDIF
292 ELSE
293 IF( cprec ) THEN
294 kd = 16
295 ib = 16
296 ELSE
297 kd = 32
298 ib = 16
299 ENDIF
300 ENDIF
301 IF( ispec.EQ.17 ) iparam2stage = kd
302 IF( ispec.EQ.18 ) iparam2stage = ib
303*
304 ELSE IF ( ispec .EQ. 19 ) THEN
305*
306* ISPEC = 19:
307* LHOUS length of the Houselholder representation
308* matrix (V,T) of the second stage. should be >= 1.
309*
310* Will add the VECT OPTION HERE next release
311 vect = opts(1:1)
312 IF( lsame( vect, 'N' ) ) THEN
313 lhous = max( 1, 4*ni )
314 ELSE
315* This is not correct, it need to call the ALGO and the stage2
316 lhous = max( 1, 4*ni ) + ibi
317 ENDIF
318 IF( lhous.GE.0 ) THEN
319 iparam2stage = lhous
320 ELSE
321 iparam2stage = -1
322 ENDIF
323*
324 ELSE IF ( ispec .EQ. 20 ) THEN
325*
326* ISPEC = 20: (21 for future use)
327* LWORK length of the workspace for
328* either or both stages for TRD and BRD. should be >= 1.
329* TRD:
330* TRD_stage 1: = LT + LW + LS1 + LS2
331* = LDT*KD + N*KD + N*MAX(KD,FACTOPTNB) + LDS2*KD
332* where LDT=LDS2=KD
333* = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
334* TRD_stage 2: = (2NB+1)*N + KD*NTHREADS
335* TRD_both : = max(stage1,stage2) + AB ( AB=(KD+1)*N )
336* = N*KD + N*max(KD+1,FACTOPTNB)
337* + max(2*KD*KD, KD*NTHREADS)
338* + (KD+1)*N
339 lwork = -1
340 subnam(1:1) = prec
341 subnam(2:6) = 'GEQRF'
342 qroptnb = ilaenv( 1, subnam, ' ', ni, nbi, -1, -1 )
343 subnam(2:6) = 'GELQF'
344 lqoptnb = ilaenv( 1, subnam, ' ', nbi, ni, -1, -1 )
345* Could be QR or LQ for TRD and the max for BRD
346 factoptnb = max(qroptnb, lqoptnb)
347 IF( algo.EQ.'TRD' ) THEN
348 IF( stag.EQ.'2STAG' ) THEN
349 lwork = ni*nbi + ni*max(nbi+1,factoptnb)
350 $ + max(2*nbi*nbi, nbi*nthreads)
351 $ + (nbi+1)*ni
352 ELSE IF( (stag.EQ.'HE2HB').OR.(stag.EQ.'SY2SB') ) THEN
353 lwork = ni*nbi + ni*max(nbi,factoptnb) + 2*nbi*nbi
354 ELSE IF( (stag.EQ.'HB2ST').OR.(stag.EQ.'SB2ST') ) THEN
355 lwork = (2*nbi+1)*ni + nbi*nthreads
356 ENDIF
357 ELSE IF( algo.EQ.'BRD' ) THEN
358 IF( stag.EQ.'2STAG' ) THEN
359 lwork = 2*ni*nbi + ni*max(nbi+1,factoptnb)
360 $ + max(2*nbi*nbi, nbi*nthreads)
361 $ + (nbi+1)*ni
362 ELSE IF( stag.EQ.'GE2GB' ) THEN
363 lwork = ni*nbi + ni*max(nbi,factoptnb) + 2*nbi*nbi
364 ELSE IF( stag.EQ.'GB2BD' ) THEN
365 lwork = (3*nbi+1)*ni + nbi*nthreads
366 ENDIF
367 ENDIF
368 lwork = max( 1, lwork )
369
370 IF( lwork.GT.0 ) THEN
371 iparam2stage = lwork
372 ELSE
373 iparam2stage = -1
374 ENDIF
375*
376 ELSE IF ( ispec .EQ. 21 ) THEN
377*
378* ISPEC = 21 for future use
379 iparam2stage = nxi
380 ENDIF
381*
382* ==== End of IPARAM2STAGE ====
383*
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:160
integer function iparam2stage(ispec, name, opts, ni, nbi, ibi, nxi)
IPARAM2STAGE
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
Here is the call graph for this function:
Here is the caller graph for this function: