LAPACK 3.11.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ iparam2stage()

integer function iparam2stage ( integer  ISPEC,
character*( * )  NAME,
character*( * )  OPTS,
integer  NI,
integer  NBI,
integer  IBI,
integer  NXI 
)

IPARAM2STAGE

Download IPARAM2STAGE + dependencies [TGZ] [ZIP] [TXT]

Purpose:
      This program sets problem and machine dependent parameters
      useful for xHETRD_2STAGE, xHETRD_HE2HB, xHETRD_HB2ST,
      xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD 
      and related subroutines for eigenvalue problems. 
      It is called whenever ILAENV is called with 17 <= ISPEC <= 21.
      It is called whenever ILAENV2STAGE is called with 1 <= ISPEC <= 5
      with a direct conversion ISPEC + 16.
Parameters
[in]ISPEC
          ISPEC is integer scalar
              ISPEC specifies which tunable parameter IPARAM2STAGE should
              return.

              ISPEC=17: the optimal blocksize nb for the reduction to
                        BAND

              ISPEC=18: the optimal blocksize ib for the eigenvectors
                        singular vectors update routine

              ISPEC=19: The length of the array that store the Housholder 
                        representation for the second stage 
                        Band to Tridiagonal or Bidiagonal

              ISPEC=20: The workspace needed for the routine in input.

              ISPEC=21: For future release.
[in]NAME
          NAME is character string
               Name of the calling subroutine
[in]OPTS
          OPTS is CHARACTER*(*)
          The character options to the subroutine NAME, concatenated
          into a single character string.  For example, UPLO = 'U',
          TRANS = 'T', and DIAG = 'N' for a triangular routine would
          be specified as OPTS = 'UTN'.
[in]NI
          NI is INTEGER which is the size of the matrix
[in]NBI
          NBI is INTEGER which is the used in the reduciton, 
          (e.g., the size of the band), needed to compute workspace
          and LHOUS2.
[in]IBI
          IBI is INTEGER which represent the IB of the reduciton,
          needed to compute workspace and LHOUS2.
[in]NXI
          NXI is INTEGER needed in the future release.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Further Details:
  Implemented by Azzam Haidar.

  All detail are available on technical report, SC11, SC13 papers.

  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  Parallel reduction to condensed forms for symmetric eigenvalue problems
  using aggregated fine-grained and memory-aware kernels. In Proceedings
  of 2011 International Conference for High Performance Computing,
  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  Article 8 , 11 pages.
  http://doi.acm.org/10.1145/2063384.2063394

  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  An improved parallel singular value algorithm and its implementation 
  for multicore hardware, In Proceedings of 2013 International Conference
  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  Denver, Colorado, USA, 2013.
  Article 90, 12 pages.
  http://doi.acm.org/10.1145/2503210.2503292

  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
  calculations based on fine-grained memory aware tasks.
  International Journal of High Performance Computing Applications.
  Volume 28 Issue 2, Pages 196-209, May 2014.
  http://hpc.sagepub.com/content/28/2/196 

Definition at line 153 of file iparam2stage.F.

155#if defined(_OPENMP)
156 use omp_lib
157#endif
158 IMPLICIT NONE
159*
160* -- LAPACK auxiliary routine --
161* -- LAPACK is a software package provided by Univ. of Tennessee, --
162* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
163*
164* .. Scalar Arguments ..
165 CHARACTER*( * ) NAME, OPTS
166 INTEGER ISPEC, NI, NBI, IBI, NXI
167*
168* ================================================================
169* ..
170* .. Local Scalars ..
171 INTEGER I, IC, IZ, KD, IB, LHOUS, LWORK, NTHREADS,
172 $ FACTOPTNB, QROPTNB, LQOPTNB
173 LOGICAL RPREC, CPREC
174 CHARACTER PREC*1, ALGO*3, STAG*5, SUBNAM*12, VECT*1
175* ..
176* .. Intrinsic Functions ..
177 INTRINSIC char, ichar, max
178* ..
179* .. External Functions ..
180 INTEGER ILAENV
181 EXTERNAL ilaenv
182* ..
183* .. Executable Statements ..
184*
185* Invalid value for ISPEC
186*
187 IF( (ispec.LT.17).OR.(ispec.GT.21) ) THEN
188 iparam2stage = -1
189 RETURN
190 ENDIF
191*
192* Get the number of threads
193*
194 nthreads = 1
195#if defined(_OPENMP)
196!$OMP PARALLEL
197 nthreads = omp_get_num_threads()
198!$OMP END PARALLEL
199#endif
200* WRITE(*,*) 'IPARAM VOICI NTHREADS ISPEC ',NTHREADS, ISPEC
201*
202 IF( ispec .NE. 19 ) THEN
203*
204* Convert NAME to upper case if the first character is lower case.
205*
206 iparam2stage = -1
207 subnam = name
208 ic = ichar( subnam( 1: 1 ) )
209 iz = ichar( 'Z' )
210 IF( iz.EQ.90 .OR. iz.EQ.122 ) THEN
211*
212* ASCII character set
213*
214 IF( ic.GE.97 .AND. ic.LE.122 ) THEN
215 subnam( 1: 1 ) = char( ic-32 )
216 DO 100 i = 2, 12
217 ic = ichar( subnam( i: i ) )
218 IF( ic.GE.97 .AND. ic.LE.122 )
219 $ subnam( i: i ) = char( ic-32 )
220 100 CONTINUE
221 END IF
222*
223 ELSE IF( iz.EQ.233 .OR. iz.EQ.169 ) THEN
224*
225* EBCDIC character set
226*
227 IF( ( ic.GE.129 .AND. ic.LE.137 ) .OR.
228 $ ( ic.GE.145 .AND. ic.LE.153 ) .OR.
229 $ ( ic.GE.162 .AND. ic.LE.169 ) ) THEN
230 subnam( 1: 1 ) = char( ic+64 )
231 DO 110 i = 2, 12
232 ic = ichar( subnam( i: i ) )
233 IF( ( ic.GE.129 .AND. ic.LE.137 ) .OR.
234 $ ( ic.GE.145 .AND. ic.LE.153 ) .OR.
235 $ ( ic.GE.162 .AND. ic.LE.169 ) )subnam( i:
236 $ i ) = char( ic+64 )
237 110 CONTINUE
238 END IF
239*
240 ELSE IF( iz.EQ.218 .OR. iz.EQ.250 ) THEN
241*
242* Prime machines: ASCII+128
243*
244 IF( ic.GE.225 .AND. ic.LE.250 ) THEN
245 subnam( 1: 1 ) = char( ic-32 )
246 DO 120 i = 2, 12
247 ic = ichar( subnam( i: i ) )
248 IF( ic.GE.225 .AND. ic.LE.250 )
249 $ subnam( i: i ) = char( ic-32 )
250 120 CONTINUE
251 END IF
252 END IF
253*
254 prec = subnam( 1: 1 )
255 algo = subnam( 4: 6 )
256 stag = subnam( 8:12 )
257 rprec = prec.EQ.'S' .OR. prec.EQ.'D'
258 cprec = prec.EQ.'C' .OR. prec.EQ.'Z'
259*
260* Invalid value for PRECISION
261*
262 IF( .NOT.( rprec .OR. cprec ) ) THEN
263 iparam2stage = -1
264 RETURN
265 ENDIF
266 ENDIF
267* WRITE(*,*),'RPREC,CPREC ',RPREC,CPREC,
268* $ ' ALGO ',ALGO,' STAGE ',STAG
269*
270*
271 IF (( ispec .EQ. 17 ) .OR. ( ispec .EQ. 18 )) THEN
272*
273* ISPEC = 17, 18: block size KD, IB
274* Could be also dependent from N but for now it
275* depend only on sequential or parallel
276*
277 IF( nthreads.GT.4 ) THEN
278 IF( cprec ) THEN
279 kd = 128
280 ib = 32
281 ELSE
282 kd = 160
283 ib = 40
284 ENDIF
285 ELSE IF( nthreads.GT.1 ) THEN
286 IF( cprec ) THEN
287 kd = 64
288 ib = 32
289 ELSE
290 kd = 64
291 ib = 32
292 ENDIF
293 ELSE
294 IF( cprec ) THEN
295 kd = 16
296 ib = 16
297 ELSE
298 kd = 32
299 ib = 16
300 ENDIF
301 ENDIF
302 IF( ispec.EQ.17 ) iparam2stage = kd
303 IF( ispec.EQ.18 ) iparam2stage = ib
304*
305 ELSE IF ( ispec .EQ. 19 ) THEN
306*
307* ISPEC = 19:
308* LHOUS length of the Houselholder representation
309* matrix (V,T) of the second stage. should be >= 1.
310*
311* Will add the VECT OPTION HERE next release
312 vect = opts(1:1)
313 IF( vect.EQ.'N' ) THEN
314 lhous = max( 1, 4*ni )
315 ELSE
316* This is not correct, it need to call the ALGO and the stage2
317 lhous = max( 1, 4*ni ) + ibi
318 ENDIF
319 IF( lhous.GE.0 ) THEN
320 iparam2stage = lhous
321 ELSE
322 iparam2stage = -1
323 ENDIF
324*
325 ELSE IF ( ispec .EQ. 20 ) THEN
326*
327* ISPEC = 20: (21 for future use)
328* LWORK length of the workspace for
329* either or both stages for TRD and BRD. should be >= 1.
330* TRD:
331* TRD_stage 1: = LT + LW + LS1 + LS2
332* = LDT*KD + N*KD + N*MAX(KD,FACTOPTNB) + LDS2*KD
333* where LDT=LDS2=KD
334* = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
335* TRD_stage 2: = (2NB+1)*N + KD*NTHREADS
336* TRD_both : = max(stage1,stage2) + AB ( AB=(KD+1)*N )
337* = N*KD + N*max(KD+1,FACTOPTNB)
338* + max(2*KD*KD, KD*NTHREADS)
339* + (KD+1)*N
340 lwork = -1
341 subnam(1:1) = prec
342 subnam(2:6) = 'GEQRF'
343 qroptnb = ilaenv( 1, subnam, ' ', ni, nbi, -1, -1 )
344 subnam(2:6) = 'GELQF'
345 lqoptnb = ilaenv( 1, subnam, ' ', nbi, ni, -1, -1 )
346* Could be QR or LQ for TRD and the max for BRD
347 factoptnb = max(qroptnb, lqoptnb)
348 IF( algo.EQ.'TRD' ) THEN
349 IF( stag.EQ.'2STAG' ) THEN
350 lwork = ni*nbi + ni*max(nbi+1,factoptnb)
351 $ + max(2*nbi*nbi, nbi*nthreads)
352 $ + (nbi+1)*ni
353 ELSE IF( (stag.EQ.'HE2HB').OR.(stag.EQ.'SY2SB') ) THEN
354 lwork = ni*nbi + ni*max(nbi,factoptnb) + 2*nbi*nbi
355 ELSE IF( (stag.EQ.'HB2ST').OR.(stag.EQ.'SB2ST') ) THEN
356 lwork = (2*nbi+1)*ni + nbi*nthreads
357 ENDIF
358 ELSE IF( algo.EQ.'BRD' ) THEN
359 IF( stag.EQ.'2STAG' ) THEN
360 lwork = 2*ni*nbi + ni*max(nbi+1,factoptnb)
361 $ + max(2*nbi*nbi, nbi*nthreads)
362 $ + (nbi+1)*ni
363 ELSE IF( stag.EQ.'GE2GB' ) THEN
364 lwork = ni*nbi + ni*max(nbi,factoptnb) + 2*nbi*nbi
365 ELSE IF( stag.EQ.'GB2BD' ) THEN
366 lwork = (3*nbi+1)*ni + nbi*nthreads
367 ENDIF
368 ENDIF
369 lwork = max( 1, lwork )
370
371 IF( lwork.GT.0 ) THEN
372 iparam2stage = lwork
373 ELSE
374 iparam2stage = -1
375 ENDIF
376*
377 ELSE IF ( ispec .EQ. 21 ) THEN
378*
379* ISPEC = 21 for future use
380 iparam2stage = nxi
381 ENDIF
382*
383* ==== End of IPARAM2STAGE ====
384*
integer function ilaenv(ISPEC, NAME, OPTS, N1, N2, N3, N4)
ILAENV
Definition: ilaenv.f:162
integer function iparam2stage(ISPEC, NAME, OPTS, NI, NBI, IBI, NXI)
IPARAM2STAGE
Definition: iparam2stage.F:155
Here is the call graph for this function:
Here is the caller graph for this function: