LAPACK 3.11.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ chb2st_kernels()

subroutine chb2st_kernels ( character  UPLO,
logical  WANTZ,
integer  TTYPE,
integer  ST,
integer  ED,
integer  SWEEP,
integer  N,
integer  NB,
integer  IB,
complex, dimension( lda, * )  A,
integer  LDA,
complex, dimension( * )  V,
complex, dimension( * )  TAU,
integer  LDVT,
complex, dimension( * )  WORK 
)

CHB2ST_KERNELS

Download CHB2ST_KERNELS + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 CHB2ST_KERNELS is an internal routine used by the CHETRD_HB2ST
 subroutine.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
[in]WANTZ
          WANTZ is LOGICAL which indicate if Eigenvalue are requested or both
          Eigenvalue/Eigenvectors.
[in]TTYPE
          TTYPE is INTEGER
[in]ST
          ST is INTEGER
          internal parameter for indices.
[in]ED
          ED is INTEGER
          internal parameter for indices.
[in]SWEEP
          SWEEP is INTEGER
          internal parameter for indices.
[in]N
          N is INTEGER. The order of the matrix A.
[in]NB
          NB is INTEGER. The size of the band.
[in]IB
          IB is INTEGER.
[in,out]A
          A is COMPLEX array. A pointer to the matrix A.
[in]LDA
          LDA is INTEGER. The leading dimension of the matrix A.
[out]V
          V is COMPLEX array, dimension 2*n if eigenvalues only are
          requested or to be queried for vectors.
[out]TAU
          TAU is COMPLEX array, dimension (2*n).
          The scalar factors of the Householder reflectors are stored
          in this array.
[in]LDVT
          LDVT is INTEGER.
[out]WORK
          WORK is COMPLEX array. Workspace of size nb.
Further Details:
  Implemented by Azzam Haidar.

  All details are available on technical report, SC11, SC13 papers.

  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  Parallel reduction to condensed forms for symmetric eigenvalue problems
  using aggregated fine-grained and memory-aware kernels. In Proceedings
  of 2011 International Conference for High Performance Computing,
  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  Article 8 , 11 pages.
  http://doi.acm.org/10.1145/2063384.2063394

  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  An improved parallel singular value algorithm and its implementation
  for multicore hardware, In Proceedings of 2013 International Conference
  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  Denver, Colorado, USA, 2013.
  Article 90, 12 pages.
  http://doi.acm.org/10.1145/2503210.2503292

  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  A novel hybrid CPU-GPU generalized eigensolver for electronic structure
  calculations based on fine-grained memory aware tasks.
  International Journal of High Performance Computing Applications.
  Volume 28 Issue 2, Pages 196-209, May 2014.
  http://hpc.sagepub.com/content/28/2/196

Definition at line 167 of file chb2st_kernels.f.

170*
171 IMPLICIT NONE
172*
173* -- LAPACK computational routine --
174* -- LAPACK is a software package provided by Univ. of Tennessee, --
175* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
176*
177* .. Scalar Arguments ..
178 CHARACTER UPLO
179 LOGICAL WANTZ
180 INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
181* ..
182* .. Array Arguments ..
183 COMPLEX A( LDA, * ), V( * ),
184 $ TAU( * ), WORK( * )
185* ..
186*
187* =====================================================================
188*
189* .. Parameters ..
190 COMPLEX ZERO, ONE
191 parameter( zero = ( 0.0e+0, 0.0e+0 ),
192 $ one = ( 1.0e+0, 0.0e+0 ) )
193* ..
194* .. Local Scalars ..
195 LOGICAL UPPER
196 INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
197 $ DPOS, OFDPOS, AJETER
198 COMPLEX CTMP
199* ..
200* .. External Subroutines ..
201 EXTERNAL clarfg, clarfx, clarfy
202* ..
203* .. Intrinsic Functions ..
204 INTRINSIC conjg, mod
205* .. External Functions ..
206 LOGICAL LSAME
207 EXTERNAL lsame
208* ..
209* ..
210* .. Executable Statements ..
211*
212 ajeter = ib + ldvt
213 upper = lsame( uplo, 'U' )
214
215 IF( upper ) THEN
216 dpos = 2 * nb + 1
217 ofdpos = 2 * nb
218 ELSE
219 dpos = 1
220 ofdpos = 2
221 ENDIF
222
223*
224* Upper case
225*
226 IF( upper ) THEN
227*
228 IF( wantz ) THEN
229 vpos = mod( sweep-1, 2 ) * n + st
230 taupos = mod( sweep-1, 2 ) * n + st
231 ELSE
232 vpos = mod( sweep-1, 2 ) * n + st
233 taupos = mod( sweep-1, 2 ) * n + st
234 ENDIF
235*
236 IF( ttype.EQ.1 ) THEN
237 lm = ed - st + 1
238*
239 v( vpos ) = one
240 DO 10 i = 1, lm-1
241 v( vpos+i ) = conjg( a( ofdpos-i, st+i ) )
242 a( ofdpos-i, st+i ) = zero
243 10 CONTINUE
244 ctmp = conjg( a( ofdpos, st ) )
245 CALL clarfg( lm, ctmp, v( vpos+1 ), 1,
246 $ tau( taupos ) )
247 a( ofdpos, st ) = ctmp
248*
249 lm = ed - st + 1
250 CALL clarfy( uplo, lm, v( vpos ), 1,
251 $ conjg( tau( taupos ) ),
252 $ a( dpos, st ), lda-1, work)
253 ENDIF
254*
255 IF( ttype.EQ.3 ) THEN
256*
257 lm = ed - st + 1
258 CALL clarfy( uplo, lm, v( vpos ), 1,
259 $ conjg( tau( taupos ) ),
260 $ a( dpos, st ), lda-1, work)
261 ENDIF
262*
263 IF( ttype.EQ.2 ) THEN
264 j1 = ed+1
265 j2 = min( ed+nb, n )
266 ln = ed-st+1
267 lm = j2-j1+1
268 IF( lm.GT.0) THEN
269 CALL clarfx( 'Left', ln, lm, v( vpos ),
270 $ conjg( tau( taupos ) ),
271 $ a( dpos-nb, j1 ), lda-1, work)
272*
273 IF( wantz ) THEN
274 vpos = mod( sweep-1, 2 ) * n + j1
275 taupos = mod( sweep-1, 2 ) * n + j1
276 ELSE
277 vpos = mod( sweep-1, 2 ) * n + j1
278 taupos = mod( sweep-1, 2 ) * n + j1
279 ENDIF
280*
281 v( vpos ) = one
282 DO 30 i = 1, lm-1
283 v( vpos+i ) =
284 $ conjg( a( dpos-nb-i, j1+i ) )
285 a( dpos-nb-i, j1+i ) = zero
286 30 CONTINUE
287 ctmp = conjg( a( dpos-nb, j1 ) )
288 CALL clarfg( lm, ctmp, v( vpos+1 ), 1, tau( taupos ) )
289 a( dpos-nb, j1 ) = ctmp
290*
291 CALL clarfx( 'Right', ln-1, lm, v( vpos ),
292 $ tau( taupos ),
293 $ a( dpos-nb+1, j1 ), lda-1, work)
294 ENDIF
295 ENDIF
296*
297* Lower case
298*
299 ELSE
300*
301 IF( wantz ) THEN
302 vpos = mod( sweep-1, 2 ) * n + st
303 taupos = mod( sweep-1, 2 ) * n + st
304 ELSE
305 vpos = mod( sweep-1, 2 ) * n + st
306 taupos = mod( sweep-1, 2 ) * n + st
307 ENDIF
308*
309 IF( ttype.EQ.1 ) THEN
310 lm = ed - st + 1
311*
312 v( vpos ) = one
313 DO 20 i = 1, lm-1
314 v( vpos+i ) = a( ofdpos+i, st-1 )
315 a( ofdpos+i, st-1 ) = zero
316 20 CONTINUE
317 CALL clarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,
318 $ tau( taupos ) )
319*
320 lm = ed - st + 1
321*
322 CALL clarfy( uplo, lm, v( vpos ), 1,
323 $ conjg( tau( taupos ) ),
324 $ a( dpos, st ), lda-1, work)
325
326 ENDIF
327*
328 IF( ttype.EQ.3 ) THEN
329 lm = ed - st + 1
330*
331 CALL clarfy( uplo, lm, v( vpos ), 1,
332 $ conjg( tau( taupos ) ),
333 $ a( dpos, st ), lda-1, work)
334
335 ENDIF
336*
337 IF( ttype.EQ.2 ) THEN
338 j1 = ed+1
339 j2 = min( ed+nb, n )
340 ln = ed-st+1
341 lm = j2-j1+1
342*
343 IF( lm.GT.0) THEN
344 CALL clarfx( 'Right', lm, ln, v( vpos ),
345 $ tau( taupos ), a( dpos+nb, st ),
346 $ lda-1, work)
347*
348 IF( wantz ) THEN
349 vpos = mod( sweep-1, 2 ) * n + j1
350 taupos = mod( sweep-1, 2 ) * n + j1
351 ELSE
352 vpos = mod( sweep-1, 2 ) * n + j1
353 taupos = mod( sweep-1, 2 ) * n + j1
354 ENDIF
355*
356 v( vpos ) = one
357 DO 40 i = 1, lm-1
358 v( vpos+i ) = a( dpos+nb+i, st )
359 a( dpos+nb+i, st ) = zero
360 40 CONTINUE
361 CALL clarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,
362 $ tau( taupos ) )
363*
364 CALL clarfx( 'Left', lm, ln-1, v( vpos ),
365 $ conjg( tau( taupos ) ),
366 $ a( dpos+nb-1, st+1 ), lda-1, work)
367
368 ENDIF
369 ENDIF
370 ENDIF
371*
372 RETURN
373*
374* End of CHB2ST_KERNELS
375*
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:53
subroutine clarfx(SIDE, M, N, V, TAU, C, LDC, WORK)
CLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the ...
Definition: clarfx.f:119
subroutine clarfy(UPLO, N, V, INCV, TAU, C, LDC, WORK)
CLARFY
Definition: clarfy.f:108
subroutine clarfg(N, ALPHA, X, INCX, TAU)
CLARFG generates an elementary reflector (Householder matrix).
Definition: clarfg.f:106
Here is the call graph for this function:
Here is the caller graph for this function: