LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ ssb2st_kernels()

subroutine ssb2st_kernels ( character uplo,
logical wantz,
integer ttype,
integer st,
integer ed,
integer sweep,
integer n,
integer nb,
integer ib,
real, dimension( lda, * ) a,
integer lda,
real, dimension( * ) v,
real, dimension( * ) tau,
integer ldvt,
real, dimension( * ) work )

SSB2ST_KERNELS

Download SSB2ST_KERNELS + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> SSB2ST_KERNELS is an internal routine used by the SSYTRD_SB2ST
!> subroutine.
!> 
Parameters
[in]UPLO
!>          UPLO is CHARACTER*1
!> 
[in]WANTZ
!>          WANTZ is LOGICAL which indicate if Eigenvalue are requested or both
!>          Eigenvalue/Eigenvectors.
!> 
[in]TTYPE
!>          TTYPE is INTEGER
!> 
[in]ST
!>          ST is INTEGER
!>          internal parameter for indices.
!> 
[in]ED
!>          ED is INTEGER
!>          internal parameter for indices.
!> 
[in]SWEEP
!>          SWEEP is INTEGER
!>          internal parameter for indices.
!> 
[in]N
!>          N is INTEGER. The order of the matrix A.
!> 
[in]NB
!>          NB is INTEGER. The size of the band.
!> 
[in]IB
!>          IB is INTEGER.
!> 
[in,out]A
!>          A is REAL array. A pointer to the matrix A.
!> 
[in]LDA
!>          LDA is INTEGER. The leading dimension of the matrix A.
!> 
[out]V
!>          V is REAL array, dimension 2*n if eigenvalues only are
!>          requested or to be queried for vectors.
!> 
[out]TAU
!>          TAU is REAL array, dimension (2*n).
!>          The scalar factors of the Householder reflectors are stored
!>          in this array.
!> 
[in]LDVT
!>          LDVT is INTEGER.
!> 
[out]WORK
!>          WORK is REAL array. Workspace of size nb.
!> 
Further Details:
!>
!>  Implemented by Azzam Haidar.
!>
!>  All details are available on technical report, SC11, SC13 papers.
!>
!>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
!>  Parallel reduction to condensed forms for symmetric eigenvalue problems
!>  using aggregated fine-grained and memory-aware kernels. In Proceedings
!>  of 2011 International Conference for High Performance Computing,
!>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
!>  Article 8 , 11 pages.
!>  http://doi.acm.org/10.1145/2063384.2063394
!>
!>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
!>  An improved parallel singular value algorithm and its implementation
!>  for multicore hardware, In Proceedings of 2013 International Conference
!>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
!>  Denver, Colorado, USA, 2013.
!>  Article 90, 12 pages.
!>  http://doi.acm.org/10.1145/2503210.2503292
!>
!>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
!>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure
!>  calculations based on fine-grained memory aware tasks.
!>  International Journal of High Performance Computing Applications.
!>  Volume 28 Issue 2, Pages 196-209, May 2014.
!>  http://hpc.sagepub.com/content/28/2/196
!>
!> 

Definition at line 168 of file ssb2st_kernels.f.

171*
172 IMPLICIT NONE
173*
174* -- LAPACK computational routine --
175* -- LAPACK is a software package provided by Univ. of Tennessee, --
176* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
177*
178* .. Scalar Arguments ..
179 CHARACTER UPLO
180 LOGICAL WANTZ
181 INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
182* ..
183* .. Array Arguments ..
184 REAL A( LDA, * ), V( * ),
185 $ TAU( * ), WORK( * )
186* ..
187*
188* =====================================================================
189*
190* .. Parameters ..
191 REAL ZERO, ONE
192 parameter( zero = 0.0e+0,
193 $ one = 1.0e+0 )
194* ..
195* .. Local Scalars ..
196 LOGICAL UPPER
197 INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
198 $ DPOS, OFDPOS, AJETER
199 REAL CTMP
200* ..
201* .. External Subroutines ..
202 EXTERNAL slarfg, slarfx, slarfy
203* ..
204* .. Intrinsic Functions ..
205 INTRINSIC mod
206* .. External Functions ..
207 LOGICAL LSAME
208 EXTERNAL lsame
209* ..
210* ..
211* .. Executable Statements ..
212*
213 ajeter = ib + ldvt
214 upper = lsame( uplo, 'U' )
215
216 IF( upper ) THEN
217 dpos = 2 * nb + 1
218 ofdpos = 2 * nb
219 ELSE
220 dpos = 1
221 ofdpos = 2
222 ENDIF
223
224*
225* Upper case
226*
227 IF( upper ) THEN
228*
229 IF( wantz ) THEN
230 vpos = mod( sweep-1, 2 ) * n + st
231 taupos = mod( sweep-1, 2 ) * n + st
232 ELSE
233 vpos = mod( sweep-1, 2 ) * n + st
234 taupos = mod( sweep-1, 2 ) * n + st
235 ENDIF
236*
237 IF( ttype.EQ.1 ) THEN
238 lm = ed - st + 1
239*
240 v( vpos ) = one
241 DO 10 i = 1, lm-1
242 v( vpos+i ) = ( a( ofdpos-i, st+i ) )
243 a( ofdpos-i, st+i ) = zero
244 10 CONTINUE
245 ctmp = ( a( ofdpos, st ) )
246 CALL slarfg( lm, ctmp, v( vpos+1 ), 1,
247 $ tau( taupos ) )
248 a( ofdpos, st ) = ctmp
249*
250 lm = ed - st + 1
251 CALL slarfy( uplo, lm, v( vpos ), 1,
252 $ ( tau( taupos ) ),
253 $ a( dpos, st ), lda-1, work)
254 ENDIF
255*
256 IF( ttype.EQ.3 ) THEN
257*
258 lm = ed - st + 1
259 CALL slarfy( uplo, lm, v( vpos ), 1,
260 $ ( tau( taupos ) ),
261 $ a( dpos, st ), lda-1, work)
262 ENDIF
263*
264 IF( ttype.EQ.2 ) THEN
265 j1 = ed+1
266 j2 = min( ed+nb, n )
267 ln = ed-st+1
268 lm = j2-j1+1
269 IF( lm.GT.0) THEN
270 CALL slarfx( 'Left', ln, lm, v( vpos ),
271 $ ( tau( taupos ) ),
272 $ a( dpos-nb, j1 ), lda-1, work)
273*
274 IF( wantz ) THEN
275 vpos = mod( sweep-1, 2 ) * n + j1
276 taupos = mod( sweep-1, 2 ) * n + j1
277 ELSE
278 vpos = mod( sweep-1, 2 ) * n + j1
279 taupos = mod( sweep-1, 2 ) * n + j1
280 ENDIF
281*
282 v( vpos ) = one
283 DO 30 i = 1, lm-1
284 v( vpos+i ) =
285 $ ( a( dpos-nb-i, j1+i ) )
286 a( dpos-nb-i, j1+i ) = zero
287 30 CONTINUE
288 ctmp = ( a( dpos-nb, j1 ) )
289 CALL slarfg( lm, ctmp, v( vpos+1 ), 1,
290 $ tau( taupos ) )
291 a( dpos-nb, j1 ) = ctmp
292*
293 CALL slarfx( 'Right', ln-1, lm, v( vpos ),
294 $ tau( taupos ),
295 $ a( dpos-nb+1, j1 ), lda-1, work)
296 ENDIF
297 ENDIF
298*
299* Lower case
300*
301 ELSE
302*
303 IF( wantz ) THEN
304 vpos = mod( sweep-1, 2 ) * n + st
305 taupos = mod( sweep-1, 2 ) * n + st
306 ELSE
307 vpos = mod( sweep-1, 2 ) * n + st
308 taupos = mod( sweep-1, 2 ) * n + st
309 ENDIF
310*
311 IF( ttype.EQ.1 ) THEN
312 lm = ed - st + 1
313*
314 v( vpos ) = one
315 DO 20 i = 1, lm-1
316 v( vpos+i ) = a( ofdpos+i, st-1 )
317 a( ofdpos+i, st-1 ) = zero
318 20 CONTINUE
319 CALL slarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,
320 $ tau( taupos ) )
321*
322 lm = ed - st + 1
323*
324 CALL slarfy( uplo, lm, v( vpos ), 1,
325 $ ( tau( taupos ) ),
326 $ a( dpos, st ), lda-1, work)
327
328 ENDIF
329*
330 IF( ttype.EQ.3 ) THEN
331 lm = ed - st + 1
332*
333 CALL slarfy( uplo, lm, v( vpos ), 1,
334 $ ( tau( taupos ) ),
335 $ a( dpos, st ), lda-1, work)
336
337 ENDIF
338*
339 IF( ttype.EQ.2 ) THEN
340 j1 = ed+1
341 j2 = min( ed+nb, n )
342 ln = ed-st+1
343 lm = j2-j1+1
344*
345 IF( lm.GT.0) THEN
346 CALL slarfx( 'Right', lm, ln, v( vpos ),
347 $ tau( taupos ), a( dpos+nb, st ),
348 $ lda-1, work)
349*
350 IF( wantz ) THEN
351 vpos = mod( sweep-1, 2 ) * n + j1
352 taupos = mod( sweep-1, 2 ) * n + j1
353 ELSE
354 vpos = mod( sweep-1, 2 ) * n + j1
355 taupos = mod( sweep-1, 2 ) * n + j1
356 ENDIF
357*
358 v( vpos ) = one
359 DO 40 i = 1, lm-1
360 v( vpos+i ) = a( dpos+nb+i, st )
361 a( dpos+nb+i, st ) = zero
362 40 CONTINUE
363 CALL slarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,
364 $ tau( taupos ) )
365*
366 CALL slarfx( 'Left', lm, ln-1, v( vpos ),
367 $ ( tau( taupos ) ),
368 $ a( dpos+nb-1, st+1 ), lda-1, work)
369
370 ENDIF
371 ENDIF
372 ENDIF
373*
374 RETURN
375*
376* End of SSB2ST_KERNELS
377*
subroutine slarfg(n, alpha, x, incx, tau)
SLARFG generates an elementary reflector (Householder matrix).
Definition slarfg.f:104
subroutine slarfx(side, m, n, v, tau, c, ldc, work)
SLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the ...
Definition slarfx.f:118
subroutine slarfy(uplo, n, v, incv, tau, c, ldc, work)
SLARFY
Definition slarfy.f:108
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
Here is the call graph for this function:
Here is the caller graph for this function: