◆ iparam2stage()

integer function iparam2stage	(	integer	ispec,
		character( )	name,
		character( )	opts,
		integer	ni,
		integer	nbi,
		integer	ibi,
		integer	nxi )

IPARAM2STAGE

Download IPARAM2STAGE + dependencies [TGZ] [ZIP] [TXT]

Purpose:

!>
!>      This program sets problem and machine dependent parameters
!>      useful for xHETRD_2STAGE, xHETRD_HE2HB, xHETRD_HB2ST,
!>      xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD
!>      and related subroutines for eigenvalue problems.
!>      It is called whenever ILAENV is called with 17 <= ISPEC <= 21.
!>      It is called whenever ILAENV2STAGE is called with 1 <= ISPEC <= 5
!>      with a direct conversion ISPEC + 16.
!>

Parameters

[in]	ISPEC	!> ISPEC is integer scalar !> ISPEC specifies which tunable parameter IPARAM2STAGE should !> return. !> !> ISPEC=17: the optimal blocksize nb for the reduction to !> BAND !> !> ISPEC=18: the optimal blocksize ib for the eigenvectors !> singular vectors update routine !> !> ISPEC=19: The length of the array that store the Housholder !> representation for the second stage !> Band to Tridiagonal or Bidiagonal !> !> ISPEC=20: The workspace needed for the routine in input. !> !> ISPEC=21: For future release. !>
[in]	NAME	!> NAME is character string !> Name of the calling subroutine !>
[in]	OPTS	!> OPTS is CHARACTER() !> The character options to the subroutine NAME, concatenated !> into a single character string. For example, UPLO = 'U', !> TRANS = 'T', and DIAG = 'N' for a triangular routine would !> be specified as OPTS = 'UTN'. !>
[in]	NI	!> NI is INTEGER which is the size of the matrix !>
[in]	NBI	!> NBI is INTEGER which is the used in the reduction, !> (e.g., the size of the band), needed to compute workspace !> and LHOUS2. !>
[in]	IBI	!> IBI is INTEGER which represent the IB of the reduction, !> needed to compute workspace and LHOUS2. !>
[in]	NXI	!> NXI is INTEGER needed in the future release. !>

Author: Univ. of Tennessee; Univ. of California Berkeley; Univ. of Colorado Denver; NAG Ltd.

Further Details:

!>
!>  Implemented by Azzam Haidar.
!>
!>  All detail are available on technical report, SC11, SC13 papers.
!>
!>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
!>  Parallel reduction to condensed forms for symmetric eigenvalue problems
!>  using aggregated fine-grained and memory-aware kernels. In Proceedings
!>  of 2011 International Conference for High Performance Computing,
!>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
!>  Article 8 , 11 pages.
!>  http://doi.acm.org/10.1145/2063384.2063394
!>
!>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
!>  An improved parallel singular value algorithm and its implementation
!>  for multicore hardware, In Proceedings of 2013 International Conference
!>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
!>  Denver, Colorado, USA, 2013.
!>  Article 90, 12 pages.
!>  http://doi.acm.org/10.1145/2503210.2503292
!>
!>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
!>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure
!>  calculations based on fine-grained memory aware tasks.
!>  International Journal of High Performance Computing Applications.
!>  Volume 28 Issue 2, Pages 196-209, May 2014.
!>  http://hpc.sagepub.com/content/28/2/196
!>
!>

Definition at line 151 of file iparam2stage.F.

#if defined(_OPENMP)
      use omp_lib
#endif
      IMPLICIT NONE
*
*  -- LAPACK auxiliary routine --
*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
*     .. Scalar Arguments ..
      CHARACTER*( * )    NAME, OPTS
      INTEGER            ISPEC, NI, NBI, IBI, NXI
*
*  ================================================================
*     ..
*     .. Local Scalars ..
      INTEGER            I, IC, IZ, KD, IB, LHOUS, LWORK, NTHREADS,
     $                   FACTOPTNB, QROPTNB, LQOPTNB
      LOGICAL            RPREC, CPREC
      CHARACTER          PREC*1, ALGO*3, STAG*5, SUBNAM*12, VECT*1
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          char, ichar, max
*     ..
*     .. External Functions ..
      INTEGER            ILAENV
      LOGICAL            LSAME
      EXTERNAL           ilaenv, lsame
*     ..
*     .. Executable Statements ..
*
*     Invalid value for ISPEC
*
      IF( (ispec.LT.17).OR.(ispec.GT.21) ) THEN
          iparam2stage = -1
          RETURN
      ENDIF
*
*     Get the number of threads
*
      nthreads = 1
#if defined(_OPENMP)
!$OMP PARALLEL
      nthreads = omp_get_num_threads()
!$OMP END PARALLEL
#endif
*      WRITE(*,*) 'IPARAM VOICI NTHREADS ISPEC ',NTHREADS, ISPEC
*
      IF( ispec .NE. 19 ) THEN
*
*        Convert NAME to upper case if the first character is lower case.
*
         iparam2stage = -1
         subnam = name
         ic = ichar( subnam( 1: 1 ) )
         iz = ichar( 'Z' )
         IF( iz.EQ.90 .OR. iz.EQ.122 ) THEN
*
*           ASCII character set
*
            IF( ic.GE.97 .AND. ic.LE.122 ) THEN
               subnam( 1: 1 ) = char( ic-32 )
               DO 100 i = 2, 12
                  ic = ichar( subnam( i: i ) )
                  IF( ic.GE.97 .AND. ic.LE.122 )
     $               subnam( i: i ) = char( ic-32 )
  100          CONTINUE
            END IF
*
         ELSE IF( iz.EQ.233 .OR. iz.EQ.169 ) THEN
*
*           EBCDIC character set
*
            IF( ( ic.GE.129 .AND. ic.LE.137 ) .OR.
     $          ( ic.GE.145 .AND. ic.LE.153 ) .OR.
     $          ( ic.GE.162 .AND. ic.LE.169 ) ) THEN
               subnam( 1: 1 ) = char( ic+64 )
               DO 110 i = 2, 12
                  ic = ichar( subnam( i: i ) )
                  IF( ( ic.GE.129 .AND. ic.LE.137 ) .OR.
     $                ( ic.GE.145 .AND. ic.LE.153 ) .OR.
     $                ( ic.GE.162 .AND. ic.LE.169 ) )subnam( i:
     $                i ) = char( ic+64 )
  110          CONTINUE
            END IF
*
         ELSE IF( iz.EQ.218 .OR. iz.EQ.250 ) THEN
*
*           Prime machines:  ASCII+128
*
            IF( ic.GE.225 .AND. ic.LE.250 ) THEN
               subnam( 1: 1 ) = char( ic-32 )
               DO 120 i = 2, 12
                 ic = ichar( subnam( i: i ) )
                 IF( ic.GE.225 .AND. ic.LE.250 )
     $             subnam( i: i ) = char( ic-32 )
  120          CONTINUE
            END IF
         END IF
*
         prec  = subnam( 1: 1 )
         algo  = subnam( 4: 6 )
         stag  = subnam( 8:12 )
         rprec = prec.EQ.'S' .OR. prec.EQ.'D'
         cprec = prec.EQ.'C' .OR. prec.EQ.'Z'
*
*        Invalid value for PRECISION
*
         IF( .NOT.( rprec .OR. cprec ) ) THEN
             iparam2stage = -1
             RETURN
         ENDIF
      ENDIF
*      WRITE(*,*),'RPREC,CPREC ',RPREC,CPREC,
*     $           '   ALGO ',ALGO,'    STAGE ',STAG
*
*
      IF (( ispec .EQ. 17 ) .OR. ( ispec .EQ. 18 )) THEN
*
*     ISPEC = 17, 18:  block size KD, IB
*     Could be also dependent from N but for now it
*     depend only on sequential or parallel
*
         IF( nthreads.GT.4 ) THEN
            IF( cprec ) THEN
               kd = 128
               ib = 32
            ELSE
               kd = 160
               ib = 40
            ENDIF
         ELSE IF( nthreads.GT.1 ) THEN
            IF( cprec ) THEN
               kd = 64
               ib = 32
            ELSE
               kd = 64
               ib = 32
            ENDIF
         ELSE
            IF( cprec ) THEN
               kd = 16
               ib = 16
            ELSE
               kd = 32
               ib = 16
            ENDIF
         ENDIF
         IF( ispec.EQ.17 ) iparam2stage = kd
         IF( ispec.EQ.18 ) iparam2stage = ib
*
      ELSE IF ( ispec .EQ. 19 ) THEN
*
*     ISPEC = 19:
*     LHOUS length of the Houselholder representation
*     matrix (V,T) of the second stage. should be >= 1.
*
*     Will add the VECT OPTION HERE next release
         vect  = opts(1:1)
         IF( lsame( vect, 'N' ) ) THEN
            lhous = max( 1, 4*ni )
         ELSE
*           This is not correct, it need to call the ALGO and the stage2
            lhous = max( 1, 4*ni ) + ibi
         ENDIF
         IF( lhous.GE.0 ) THEN
            iparam2stage = lhous
         ELSE
            iparam2stage = -1
         ENDIF
*
      ELSE IF ( ispec .EQ. 20 ) THEN
*
*     ISPEC = 20: (21 for future use)
*     LWORK length of the workspace for
*     either or both stages for TRD and BRD. should be >= 1.
*     TRD:
*     TRD_stage 1: = LT + LW + LS1 + LS2
*                  = LDT*KD + N*KD + N*MAX(KD,FACTOPTNB) + LDS2*KD
*                    where LDT=LDS2=KD
*                  = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
*     TRD_stage 2: = (2NB+1)*N + KD*NTHREADS
*     TRD_both   : = max(stage1,stage2) + AB ( AB=(KD+1)*N )
*                  = N*KD + N*max(KD+1,FACTOPTNB)
*                    + max(2*KD*KD, KD*NTHREADS)
*                    + (KD+1)*N
         lwork        = -1
         subnam(1:1)  = prec
         subnam(2:6)  = 'GEQRF'
         qroptnb      = ilaenv( 1, subnam, ' ', ni, nbi, -1, -1 )
         subnam(2:6)  = 'GELQF'
         lqoptnb      = ilaenv( 1, subnam, ' ', nbi, ni, -1, -1 )
*        Could be QR or LQ for TRD and the max for BRD
         factoptnb    = max(qroptnb, lqoptnb)
         IF( algo.EQ.'TRD' ) THEN
            IF( stag.EQ.'2STAG' ) THEN
               lwork = ni*nbi + ni*max(nbi+1,factoptnb)
     $              + max(2*nbi*nbi, nbi*nthreads)
     $              + (nbi+1)*ni
            ELSE IF( (stag.EQ.'HE2HB').OR.(stag.EQ.'SY2SB') ) THEN
               lwork = ni*nbi + ni*max(nbi,factoptnb) + 2*nbi*nbi
            ELSE IF( (stag.EQ.'HB2ST').OR.(stag.EQ.'SB2ST') ) THEN
               lwork = (2*nbi+1)*ni + nbi*nthreads
            ENDIF
         ELSE IF( algo.EQ.'BRD' ) THEN
            IF( stag.EQ.'2STAG' ) THEN
               lwork = 2*ni*nbi + ni*max(nbi+1,factoptnb)
     $              + max(2*nbi*nbi, nbi*nthreads)
     $              + (nbi+1)*ni
            ELSE IF( stag.EQ.'GE2GB' ) THEN
               lwork = ni*nbi + ni*max(nbi,factoptnb) + 2*nbi*nbi
            ELSE IF( stag.EQ.'GB2BD' ) THEN
               lwork = (3*nbi+1)*ni + nbi*nthreads
            ENDIF
         ENDIF
         lwork = max( 1, lwork )
 
         IF( lwork.GT.0 ) THEN
            iparam2stage = lwork
         ELSE
            iparam2stage = -1
         ENDIF
*
      ELSE IF ( ispec .EQ. 21 ) THEN
*
*     ISPEC = 21 for future use
         iparam2stage = nxi
      ENDIF
*
*     ==== End of IPARAM2STAGE ====
*

Here is the call graph for this function:

Here is the caller graph for this function: