LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ chb2st_kernels()

subroutine chb2st_kernels ( character  uplo,
logical  wantz,
integer  ttype,
integer  st,
integer  ed,
integer  sweep,
integer  n,
integer  nb,
integer  ib,
complex, dimension( lda, * )  a,
integer  lda,
complex, dimension( * )  v,
complex, dimension( * )  tau,
integer  ldvt,
complex, dimension( * )  work 
)

CHB2ST_KERNELS

Download CHB2ST_KERNELS + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 CHB2ST_KERNELS is an internal routine used by the CHETRD_HB2ST
 subroutine.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
[in]WANTZ
          WANTZ is LOGICAL which indicate if Eigenvalue are requested or both
          Eigenvalue/Eigenvectors.
[in]TTYPE
          TTYPE is INTEGER
[in]ST
          ST is INTEGER
          internal parameter for indices.
[in]ED
          ED is INTEGER
          internal parameter for indices.
[in]SWEEP
          SWEEP is INTEGER
          internal parameter for indices.
[in]N
          N is INTEGER. The order of the matrix A.
[in]NB
          NB is INTEGER. The size of the band.
[in]IB
          IB is INTEGER.
[in,out]A
          A is COMPLEX array. A pointer to the matrix A.
[in]LDA
          LDA is INTEGER. The leading dimension of the matrix A.
[out]V
          V is COMPLEX array, dimension 2*n if eigenvalues only are
          requested or to be queried for vectors.
[out]TAU
          TAU is COMPLEX array, dimension (2*n).
          The scalar factors of the Householder reflectors are stored
          in this array.
[in]LDVT
          LDVT is INTEGER.
[out]WORK
          WORK is COMPLEX array. Workspace of size nb.
Further Details:
  Implemented by Azzam Haidar.

  All details are available on technical report, SC11, SC13 papers.

  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  Parallel reduction to condensed forms for symmetric eigenvalue problems
  using aggregated fine-grained and memory-aware kernels. In Proceedings
  of 2011 International Conference for High Performance Computing,
  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  Article 8 , 11 pages.
  http://doi.acm.org/10.1145/2063384.2063394

  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  An improved parallel singular value algorithm and its implementation
  for multicore hardware, In Proceedings of 2013 International Conference
  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  Denver, Colorado, USA, 2013.
  Article 90, 12 pages.
  http://doi.acm.org/10.1145/2503210.2503292

  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  A novel hybrid CPU-GPU generalized eigensolver for electronic structure
  calculations based on fine-grained memory aware tasks.
  International Journal of High Performance Computing Applications.
  Volume 28 Issue 2, Pages 196-209, May 2014.
  http://hpc.sagepub.com/content/28/2/196

Definition at line 169 of file chb2st_kernels.f.

172*
173 IMPLICIT NONE
174*
175* -- LAPACK computational routine --
176* -- LAPACK is a software package provided by Univ. of Tennessee, --
177* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
178*
179* .. Scalar Arguments ..
180 CHARACTER UPLO
181 LOGICAL WANTZ
182 INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
183* ..
184* .. Array Arguments ..
185 COMPLEX A( LDA, * ), V( * ),
186 $ TAU( * ), WORK( * )
187* ..
188*
189* =====================================================================
190*
191* .. Parameters ..
192 COMPLEX ZERO, ONE
193 parameter( zero = ( 0.0e+0, 0.0e+0 ),
194 $ one = ( 1.0e+0, 0.0e+0 ) )
195* ..
196* .. Local Scalars ..
197 LOGICAL UPPER
198 INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
199 $ DPOS, OFDPOS, AJETER
200 COMPLEX CTMP
201* ..
202* .. External Subroutines ..
203 EXTERNAL clarfg, clarfx, clarfy
204* ..
205* .. Intrinsic Functions ..
206 INTRINSIC conjg, mod
207* .. External Functions ..
208 LOGICAL LSAME
209 EXTERNAL lsame
210* ..
211* ..
212* .. Executable Statements ..
213*
214 ajeter = ib + ldvt
215 upper = lsame( uplo, 'U' )
216
217 IF( upper ) THEN
218 dpos = 2 * nb + 1
219 ofdpos = 2 * nb
220 ELSE
221 dpos = 1
222 ofdpos = 2
223 ENDIF
224
225*
226* Upper case
227*
228 IF( upper ) THEN
229*
230 IF( wantz ) THEN
231 vpos = mod( sweep-1, 2 ) * n + st
232 taupos = mod( sweep-1, 2 ) * n + st
233 ELSE
234 vpos = mod( sweep-1, 2 ) * n + st
235 taupos = mod( sweep-1, 2 ) * n + st
236 ENDIF
237*
238 IF( ttype.EQ.1 ) THEN
239 lm = ed - st + 1
240*
241 v( vpos ) = one
242 DO 10 i = 1, lm-1
243 v( vpos+i ) = conjg( a( ofdpos-i, st+i ) )
244 a( ofdpos-i, st+i ) = zero
245 10 CONTINUE
246 ctmp = conjg( a( ofdpos, st ) )
247 CALL clarfg( lm, ctmp, v( vpos+1 ), 1,
248 $ tau( taupos ) )
249 a( ofdpos, st ) = ctmp
250*
251 lm = ed - st + 1
252 CALL clarfy( uplo, lm, v( vpos ), 1,
253 $ conjg( tau( taupos ) ),
254 $ a( dpos, st ), lda-1, work)
255 ENDIF
256*
257 IF( ttype.EQ.3 ) THEN
258*
259 lm = ed - st + 1
260 CALL clarfy( uplo, lm, v( vpos ), 1,
261 $ conjg( tau( taupos ) ),
262 $ a( dpos, st ), lda-1, work)
263 ENDIF
264*
265 IF( ttype.EQ.2 ) THEN
266 j1 = ed+1
267 j2 = min( ed+nb, n )
268 ln = ed-st+1
269 lm = j2-j1+1
270 IF( lm.GT.0) THEN
271 CALL clarfx( 'Left', ln, lm, v( vpos ),
272 $ conjg( tau( taupos ) ),
273 $ a( dpos-nb, j1 ), lda-1, work)
274*
275 IF( wantz ) THEN
276 vpos = mod( sweep-1, 2 ) * n + j1
277 taupos = mod( sweep-1, 2 ) * n + j1
278 ELSE
279 vpos = mod( sweep-1, 2 ) * n + j1
280 taupos = mod( sweep-1, 2 ) * n + j1
281 ENDIF
282*
283 v( vpos ) = one
284 DO 30 i = 1, lm-1
285 v( vpos+i ) =
286 $ conjg( a( dpos-nb-i, j1+i ) )
287 a( dpos-nb-i, j1+i ) = zero
288 30 CONTINUE
289 ctmp = conjg( a( dpos-nb, j1 ) )
290 CALL clarfg( lm, ctmp, v( vpos+1 ), 1, tau( taupos ) )
291 a( dpos-nb, j1 ) = ctmp
292*
293 CALL clarfx( 'Right', ln-1, lm, v( vpos ),
294 $ tau( taupos ),
295 $ a( dpos-nb+1, j1 ), lda-1, work)
296 ENDIF
297 ENDIF
298*
299* Lower case
300*
301 ELSE
302*
303 IF( wantz ) THEN
304 vpos = mod( sweep-1, 2 ) * n + st
305 taupos = mod( sweep-1, 2 ) * n + st
306 ELSE
307 vpos = mod( sweep-1, 2 ) * n + st
308 taupos = mod( sweep-1, 2 ) * n + st
309 ENDIF
310*
311 IF( ttype.EQ.1 ) THEN
312 lm = ed - st + 1
313*
314 v( vpos ) = one
315 DO 20 i = 1, lm-1
316 v( vpos+i ) = a( ofdpos+i, st-1 )
317 a( ofdpos+i, st-1 ) = zero
318 20 CONTINUE
319 CALL clarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,
320 $ tau( taupos ) )
321*
322 lm = ed - st + 1
323*
324 CALL clarfy( uplo, lm, v( vpos ), 1,
325 $ conjg( tau( taupos ) ),
326 $ a( dpos, st ), lda-1, work)
327
328 ENDIF
329*
330 IF( ttype.EQ.3 ) THEN
331 lm = ed - st + 1
332*
333 CALL clarfy( uplo, lm, v( vpos ), 1,
334 $ conjg( tau( taupos ) ),
335 $ a( dpos, st ), lda-1, work)
336
337 ENDIF
338*
339 IF( ttype.EQ.2 ) THEN
340 j1 = ed+1
341 j2 = min( ed+nb, n )
342 ln = ed-st+1
343 lm = j2-j1+1
344*
345 IF( lm.GT.0) THEN
346 CALL clarfx( 'Right', lm, ln, v( vpos ),
347 $ tau( taupos ), a( dpos+nb, st ),
348 $ lda-1, work)
349*
350 IF( wantz ) THEN
351 vpos = mod( sweep-1, 2 ) * n + j1
352 taupos = mod( sweep-1, 2 ) * n + j1
353 ELSE
354 vpos = mod( sweep-1, 2 ) * n + j1
355 taupos = mod( sweep-1, 2 ) * n + j1
356 ENDIF
357*
358 v( vpos ) = one
359 DO 40 i = 1, lm-1
360 v( vpos+i ) = a( dpos+nb+i, st )
361 a( dpos+nb+i, st ) = zero
362 40 CONTINUE
363 CALL clarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,
364 $ tau( taupos ) )
365*
366 CALL clarfx( 'Left', lm, ln-1, v( vpos ),
367 $ conjg( tau( taupos ) ),
368 $ a( dpos+nb-1, st+1 ), lda-1, work)
369
370 ENDIF
371 ENDIF
372 ENDIF
373*
374 RETURN
375*
376* End of CHB2ST_KERNELS
377*
subroutine clarfg(n, alpha, x, incx, tau)
CLARFG generates an elementary reflector (Householder matrix).
Definition clarfg.f:106
subroutine clarfx(side, m, n, v, tau, c, ldc, work)
CLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the ...
Definition clarfx.f:119
subroutine clarfy(uplo, n, v, incv, tau, c, ldc, work)
CLARFY
Definition clarfy.f:108
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
Here is the call graph for this function:
Here is the caller graph for this function: