LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ zhb2st_kernels()

subroutine zhb2st_kernels ( character  uplo,
logical  wantz,
integer  ttype,
integer  st,
integer  ed,
integer  sweep,
integer  n,
integer  nb,
integer  ib,
complex*16, dimension( lda, * )  a,
integer  lda,
complex*16, dimension( * )  v,
complex*16, dimension( * )  tau,
integer  ldvt,
complex*16, dimension( * )  work 
)

ZHB2ST_KERNELS

Download ZHB2ST_KERNELS + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 ZHB2ST_KERNELS is an internal routine used by the ZHETRD_HB2ST
 subroutine.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
[in]WANTZ
          WANTZ is LOGICAL which indicate if Eigenvalue are requested or both
          Eigenvalue/Eigenvectors.
[in]TTYPE
          TTYPE is INTEGER
[in]ST
          ST is INTEGER
          internal parameter for indices.
[in]ED
          ED is INTEGER
          internal parameter for indices.
[in]SWEEP
          SWEEP is INTEGER
          internal parameter for indices.
[in]N
          N is INTEGER. The order of the matrix A.
[in]NB
          NB is INTEGER. The size of the band.
[in]IB
          IB is INTEGER.
[in,out]A
          A is COMPLEX*16 array. A pointer to the matrix A.
[in]LDA
          LDA is INTEGER. The leading dimension of the matrix A.
[out]V
          V is COMPLEX*16 array, dimension 2*n if eigenvalues only are
          requested or to be queried for vectors.
[out]TAU
          TAU is COMPLEX*16 array, dimension (2*n).
          The scalar factors of the Householder reflectors are stored
          in this array.
[in]LDVT
          LDVT is INTEGER.
[out]WORK
          WORK is COMPLEX*16 array. Workspace of size nb.
Further Details:
  Implemented by Azzam Haidar.

  All details are available on technical report, SC11, SC13 papers.

  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  Parallel reduction to condensed forms for symmetric eigenvalue problems
  using aggregated fine-grained and memory-aware kernels. In Proceedings
  of 2011 International Conference for High Performance Computing,
  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  Article 8 , 11 pages.
  http://doi.acm.org/10.1145/2063384.2063394

  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  An improved parallel singular value algorithm and its implementation
  for multicore hardware, In Proceedings of 2013 International Conference
  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  Denver, Colorado, USA, 2013.
  Article 90, 12 pages.
  http://doi.acm.org/10.1145/2503210.2503292

  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  A novel hybrid CPU-GPU generalized eigensolver for electronic structure
  calculations based on fine-grained memory aware tasks.
  International Journal of High Performance Computing Applications.
  Volume 28 Issue 2, Pages 196-209, May 2014.
  http://hpc.sagepub.com/content/28/2/196

Definition at line 169 of file zhb2st_kernels.f.

172*
173 IMPLICIT NONE
174*
175* -- LAPACK computational routine --
176* -- LAPACK is a software package provided by Univ. of Tennessee, --
177* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
178*
179* .. Scalar Arguments ..
180 CHARACTER UPLO
181 LOGICAL WANTZ
182 INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
183* ..
184* .. Array Arguments ..
185 COMPLEX*16 A( LDA, * ), V( * ),
186 $ TAU( * ), WORK( * )
187* ..
188*
189* =====================================================================
190*
191* .. Parameters ..
192 COMPLEX*16 ZERO, ONE
193 parameter( zero = ( 0.0d+0, 0.0d+0 ),
194 $ one = ( 1.0d+0, 0.0d+0 ) )
195* ..
196* .. Local Scalars ..
197 LOGICAL UPPER
198 INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
199 $ DPOS, OFDPOS, AJETER
200 COMPLEX*16 CTMP
201* ..
202* .. External Subroutines ..
203 EXTERNAL zlarfg, zlarfx, zlarfy
204* ..
205* .. Intrinsic Functions ..
206 INTRINSIC dconjg, mod
207* .. External Functions ..
208 LOGICAL LSAME
209 EXTERNAL lsame
210* ..
211* ..
212* .. Executable Statements ..
213*
214 ajeter = ib + ldvt
215 upper = lsame( uplo, 'U' )
216
217 IF( upper ) THEN
218 dpos = 2 * nb + 1
219 ofdpos = 2 * nb
220 ELSE
221 dpos = 1
222 ofdpos = 2
223 ENDIF
224
225*
226* Upper case
227*
228 IF( upper ) THEN
229*
230 IF( wantz ) THEN
231 vpos = mod( sweep-1, 2 ) * n + st
232 taupos = mod( sweep-1, 2 ) * n + st
233 ELSE
234 vpos = mod( sweep-1, 2 ) * n + st
235 taupos = mod( sweep-1, 2 ) * n + st
236 ENDIF
237*
238 IF( ttype.EQ.1 ) THEN
239 lm = ed - st + 1
240*
241 v( vpos ) = one
242 DO 10 i = 1, lm-1
243 v( vpos+i ) = dconjg( a( ofdpos-i, st+i ) )
244 a( ofdpos-i, st+i ) = zero
245 10 CONTINUE
246 ctmp = dconjg( a( ofdpos, st ) )
247 CALL zlarfg( lm, ctmp, v( vpos+1 ), 1,
248 $ tau( taupos ) )
249 a( ofdpos, st ) = ctmp
250*
251 lm = ed - st + 1
252 CALL zlarfy( uplo, lm, v( vpos ), 1,
253 $ dconjg( tau( taupos ) ),
254 $ a( dpos, st ), lda-1, work)
255 ENDIF
256*
257 IF( ttype.EQ.3 ) THEN
258*
259 lm = ed - st + 1
260 CALL zlarfy( uplo, lm, v( vpos ), 1,
261 $ dconjg( tau( taupos ) ),
262 $ a( dpos, st ), lda-1, work)
263 ENDIF
264*
265 IF( ttype.EQ.2 ) THEN
266 j1 = ed+1
267 j2 = min( ed+nb, n )
268 ln = ed-st+1
269 lm = j2-j1+1
270 IF( lm.GT.0) THEN
271 CALL zlarfx( 'Left', ln, lm, v( vpos ),
272 $ dconjg( tau( taupos ) ),
273 $ a( dpos-nb, j1 ), lda-1, work)
274*
275 IF( wantz ) THEN
276 vpos = mod( sweep-1, 2 ) * n + j1
277 taupos = mod( sweep-1, 2 ) * n + j1
278 ELSE
279 vpos = mod( sweep-1, 2 ) * n + j1
280 taupos = mod( sweep-1, 2 ) * n + j1
281 ENDIF
282*
283 v( vpos ) = one
284 DO 30 i = 1, lm-1
285 v( vpos+i ) =
286 $ dconjg( a( dpos-nb-i, j1+i ) )
287 a( dpos-nb-i, j1+i ) = zero
288 30 CONTINUE
289 ctmp = dconjg( a( dpos-nb, j1 ) )
290 CALL zlarfg( lm, ctmp, v( vpos+1 ), 1, tau( taupos ) )
291 a( dpos-nb, j1 ) = ctmp
292*
293 CALL zlarfx( 'Right', ln-1, lm, v( vpos ),
294 $ tau( taupos ),
295 $ a( dpos-nb+1, j1 ), lda-1, work)
296 ENDIF
297 ENDIF
298*
299* Lower case
300*
301 ELSE
302*
303 IF( wantz ) THEN
304 vpos = mod( sweep-1, 2 ) * n + st
305 taupos = mod( sweep-1, 2 ) * n + st
306 ELSE
307 vpos = mod( sweep-1, 2 ) * n + st
308 taupos = mod( sweep-1, 2 ) * n + st
309 ENDIF
310*
311 IF( ttype.EQ.1 ) THEN
312 lm = ed - st + 1
313*
314 v( vpos ) = one
315 DO 20 i = 1, lm-1
316 v( vpos+i ) = a( ofdpos+i, st-1 )
317 a( ofdpos+i, st-1 ) = zero
318 20 CONTINUE
319 CALL zlarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,
320 $ tau( taupos ) )
321*
322 lm = ed - st + 1
323*
324 CALL zlarfy( uplo, lm, v( vpos ), 1,
325 $ dconjg( tau( taupos ) ),
326 $ a( dpos, st ), lda-1, work)
327
328 ENDIF
329*
330 IF( ttype.EQ.3 ) THEN
331 lm = ed - st + 1
332*
333 CALL zlarfy( uplo, lm, v( vpos ), 1,
334 $ dconjg( tau( taupos ) ),
335 $ a( dpos, st ), lda-1, work)
336
337 ENDIF
338*
339 IF( ttype.EQ.2 ) THEN
340 j1 = ed+1
341 j2 = min( ed+nb, n )
342 ln = ed-st+1
343 lm = j2-j1+1
344*
345 IF( lm.GT.0) THEN
346 CALL zlarfx( 'Right', lm, ln, v( vpos ),
347 $ tau( taupos ), a( dpos+nb, st ),
348 $ lda-1, work)
349*
350 IF( wantz ) THEN
351 vpos = mod( sweep-1, 2 ) * n + j1
352 taupos = mod( sweep-1, 2 ) * n + j1
353 ELSE
354 vpos = mod( sweep-1, 2 ) * n + j1
355 taupos = mod( sweep-1, 2 ) * n + j1
356 ENDIF
357*
358 v( vpos ) = one
359 DO 40 i = 1, lm-1
360 v( vpos+i ) = a( dpos+nb+i, st )
361 a( dpos+nb+i, st ) = zero
362 40 CONTINUE
363 CALL zlarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,
364 $ tau( taupos ) )
365*
366 CALL zlarfx( 'Left', lm, ln-1, v( vpos ),
367 $ dconjg( tau( taupos ) ),
368 $ a( dpos+nb-1, st+1 ), lda-1, work)
369
370 ENDIF
371 ENDIF
372 ENDIF
373*
374 RETURN
375*
376* End of ZHB2ST_KERNELS
377*
subroutine zlarfg(n, alpha, x, incx, tau)
ZLARFG generates an elementary reflector (Householder matrix).
Definition zlarfg.f:106
subroutine zlarfx(side, m, n, v, tau, c, ldc, work)
ZLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the ...
Definition zlarfx.f:119
subroutine zlarfy(uplo, n, v, incv, tau, c, ldc, work)
ZLARFY
Definition zlarfy.f:108
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
Here is the call graph for this function:
Here is the caller graph for this function: