LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
zhb2st_kernels.f
Go to the documentation of this file.
1*> \brief \b ZHB2ST_KERNELS
2*
3* @precisions fortran z -> s d c
4*
5* =========== DOCUMENTATION ===========
6*
7* Online html documentation available at
8* http://www.netlib.org/lapack/explore-html/
9*
10*> Download ZHB2ST_KERNELS + dependencies
11*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhb2st_kernels.f">
12*> [TGZ]</a>
13*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhb2st_kernels.f">
14*> [ZIP]</a>
15*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhb2st_kernels.f">
16*> [TXT]</a>
17*
18* Definition:
19* ===========
20*
21* SUBROUTINE ZHB2ST_KERNELS( UPLO, WANTZ, TTYPE,
22* ST, ED, SWEEP, N, NB, IB,
23* A, LDA, V, TAU, LDVT, WORK)
24*
25* IMPLICIT NONE
26*
27* .. Scalar Arguments ..
28* CHARACTER UPLO
29* LOGICAL WANTZ
30* INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
31* ..
32* .. Array Arguments ..
33* COMPLEX*16 A( LDA, * ), V( * ),
34* TAU( * ), WORK( * )
35*
36*> \par Purpose:
37* =============
38*>
39*> \verbatim
40*>
41*> ZHB2ST_KERNELS is an internal routine used by the ZHETRD_HB2ST
42*> subroutine.
43*> \endverbatim
44*
45* Arguments:
46* ==========
47*
48*> \param[in] UPLO
49*> \verbatim
50*> UPLO is CHARACTER*1
51*> \endverbatim
52*>
53*> \param[in] WANTZ
54*> \verbatim
55*> WANTZ is LOGICAL which indicate if Eigenvalue are requested or both
56*> Eigenvalue/Eigenvectors.
57*> \endverbatim
58*>
59*> \param[in] TTYPE
60*> \verbatim
61*> TTYPE is INTEGER
62*> \endverbatim
63*>
64*> \param[in] ST
65*> \verbatim
66*> ST is INTEGER
67*> internal parameter for indices.
68*> \endverbatim
69*>
70*> \param[in] ED
71*> \verbatim
72*> ED is INTEGER
73*> internal parameter for indices.
74*> \endverbatim
75*>
76*> \param[in] SWEEP
77*> \verbatim
78*> SWEEP is INTEGER
79*> internal parameter for indices.
80*> \endverbatim
81*>
82*> \param[in] N
83*> \verbatim
84*> N is INTEGER. The order of the matrix A.
85*> \endverbatim
86*>
87*> \param[in] NB
88*> \verbatim
89*> NB is INTEGER. The size of the band.
90*> \endverbatim
91*>
92*> \param[in] IB
93*> \verbatim
94*> IB is INTEGER.
95*> \endverbatim
96*>
97*> \param[in, out] A
98*> \verbatim
99*> A is COMPLEX*16 array. A pointer to the matrix A.
100*> \endverbatim
101*>
102*> \param[in] LDA
103*> \verbatim
104*> LDA is INTEGER. The leading dimension of the matrix A.
105*> \endverbatim
106*>
107*> \param[out] V
108*> \verbatim
109*> V is COMPLEX*16 array, dimension 2*n if eigenvalues only are
110*> requested or to be queried for vectors.
111*> \endverbatim
112*>
113*> \param[out] TAU
114*> \verbatim
115*> TAU is COMPLEX*16 array, dimension (2*n).
116*> The scalar factors of the Householder reflectors are stored
117*> in this array.
118*> \endverbatim
119*>
120*> \param[in] LDVT
121*> \verbatim
122*> LDVT is INTEGER.
123*> \endverbatim
124*>
125*> \param[out] WORK
126*> \verbatim
127*> WORK is COMPLEX*16 array. Workspace of size nb.
128*> \endverbatim
129*>
130*> \par Further Details:
131* =====================
132*>
133*> \verbatim
134*>
135*> Implemented by Azzam Haidar.
136*>
137*> All details are available on technical report, SC11, SC13 papers.
138*>
139*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
140*> Parallel reduction to condensed forms for symmetric eigenvalue problems
141*> using aggregated fine-grained and memory-aware kernels. In Proceedings
142*> of 2011 International Conference for High Performance Computing,
143*> Networking, Storage and Analysis (SC '11), New York, NY, USA,
144*> Article 8 , 11 pages.
145*> http://doi.acm.org/10.1145/2063384.2063394
146*>
147*> A. Haidar, J. Kurzak, P. Luszczek, 2013.
148*> An improved parallel singular value algorithm and its implementation
149*> for multicore hardware, In Proceedings of 2013 International Conference
150*> for High Performance Computing, Networking, Storage and Analysis (SC '13).
151*> Denver, Colorado, USA, 2013.
152*> Article 90, 12 pages.
153*> http://doi.acm.org/10.1145/2503210.2503292
154*>
155*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
156*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
157*> calculations based on fine-grained memory aware tasks.
158*> International Journal of High Performance Computing Applications.
159*> Volume 28 Issue 2, Pages 196-209, May 2014.
160*> http://hpc.sagepub.com/content/28/2/196
161*>
162*> \endverbatim
163*>
164*> \ingroup hb2st_kernels
165*>
166* =====================================================================
167 SUBROUTINE zhb2st_kernels( UPLO, WANTZ, TTYPE,
168 $ ST, ED, SWEEP, N, NB, IB,
169 $ A, LDA, V, TAU, LDVT, WORK)
170*
171 IMPLICIT NONE
172*
173* -- LAPACK computational routine --
174* -- LAPACK is a software package provided by Univ. of Tennessee, --
175* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
176*
177* .. Scalar Arguments ..
178 CHARACTER UPLO
179 LOGICAL WANTZ
180 INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
181* ..
182* .. Array Arguments ..
183 COMPLEX*16 A( LDA, * ), V( * ),
184 $ TAU( * ), WORK( * )
185* ..
186*
187* =====================================================================
188*
189* .. Parameters ..
190 COMPLEX*16 ZERO, ONE
191 PARAMETER ( ZERO = ( 0.0d+0, 0.0d+0 ),
192 $ one = ( 1.0d+0, 0.0d+0 ) )
193* ..
194* .. Local Scalars ..
195 LOGICAL UPPER
196 INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
197 $ dpos, ofdpos, ajeter
198 COMPLEX*16 CTMP
199* ..
200* .. External Subroutines ..
201 EXTERNAL zlarfg, zlarfx, zlarfy
202* ..
203* .. Intrinsic Functions ..
204 INTRINSIC dconjg, mod
205* .. External Functions ..
206 LOGICAL LSAME
207 EXTERNAL LSAME
208* ..
209* ..
210* .. Executable Statements ..
211*
212 ajeter = ib + ldvt
213 upper = lsame( uplo, 'U' )
214
215 IF( upper ) THEN
216 dpos = 2 * nb + 1
217 ofdpos = 2 * nb
218 ELSE
219 dpos = 1
220 ofdpos = 2
221 ENDIF
222
223*
224* Upper case
225*
226 IF( upper ) THEN
227*
228 IF( wantz ) THEN
229 vpos = mod( sweep-1, 2 ) * n + st
230 taupos = mod( sweep-1, 2 ) * n + st
231 ELSE
232 vpos = mod( sweep-1, 2 ) * n + st
233 taupos = mod( sweep-1, 2 ) * n + st
234 ENDIF
235*
236 IF( ttype.EQ.1 ) THEN
237 lm = ed - st + 1
238*
239 v( vpos ) = one
240 DO 10 i = 1, lm-1
241 v( vpos+i ) = dconjg( a( ofdpos-i, st+i ) )
242 a( ofdpos-i, st+i ) = zero
243 10 CONTINUE
244 ctmp = dconjg( a( ofdpos, st ) )
245 CALL zlarfg( lm, ctmp, v( vpos+1 ), 1,
246 $ tau( taupos ) )
247 a( ofdpos, st ) = ctmp
248*
249 lm = ed - st + 1
250 CALL zlarfy( uplo, lm, v( vpos ), 1,
251 $ dconjg( tau( taupos ) ),
252 $ a( dpos, st ), lda-1, work)
253 ENDIF
254*
255 IF( ttype.EQ.3 ) THEN
256*
257 lm = ed - st + 1
258 CALL zlarfy( uplo, lm, v( vpos ), 1,
259 $ dconjg( tau( taupos ) ),
260 $ a( dpos, st ), lda-1, work)
261 ENDIF
262*
263 IF( ttype.EQ.2 ) THEN
264 j1 = ed+1
265 j2 = min( ed+nb, n )
266 ln = ed-st+1
267 lm = j2-j1+1
268 IF( lm.GT.0) THEN
269 CALL zlarfx( 'Left', ln, lm, v( vpos ),
270 $ dconjg( tau( taupos ) ),
271 $ a( dpos-nb, j1 ), lda-1, work)
272*
273 IF( wantz ) THEN
274 vpos = mod( sweep-1, 2 ) * n + j1
275 taupos = mod( sweep-1, 2 ) * n + j1
276 ELSE
277 vpos = mod( sweep-1, 2 ) * n + j1
278 taupos = mod( sweep-1, 2 ) * n + j1
279 ENDIF
280*
281 v( vpos ) = one
282 DO 30 i = 1, lm-1
283 v( vpos+i ) =
284 $ dconjg( a( dpos-nb-i, j1+i ) )
285 a( dpos-nb-i, j1+i ) = zero
286 30 CONTINUE
287 ctmp = dconjg( a( dpos-nb, j1 ) )
288 CALL zlarfg( lm, ctmp, v( vpos+1 ), 1,
289 $ tau( taupos ) )
290 a( dpos-nb, j1 ) = ctmp
291*
292 CALL zlarfx( 'Right', ln-1, lm, v( vpos ),
293 $ tau( taupos ),
294 $ a( dpos-nb+1, j1 ), lda-1, work)
295 ENDIF
296 ENDIF
297*
298* Lower case
299*
300 ELSE
301*
302 IF( wantz ) THEN
303 vpos = mod( sweep-1, 2 ) * n + st
304 taupos = mod( sweep-1, 2 ) * n + st
305 ELSE
306 vpos = mod( sweep-1, 2 ) * n + st
307 taupos = mod( sweep-1, 2 ) * n + st
308 ENDIF
309*
310 IF( ttype.EQ.1 ) THEN
311 lm = ed - st + 1
312*
313 v( vpos ) = one
314 DO 20 i = 1, lm-1
315 v( vpos+i ) = a( ofdpos+i, st-1 )
316 a( ofdpos+i, st-1 ) = zero
317 20 CONTINUE
318 CALL zlarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,
319 $ tau( taupos ) )
320*
321 lm = ed - st + 1
322*
323 CALL zlarfy( uplo, lm, v( vpos ), 1,
324 $ dconjg( tau( taupos ) ),
325 $ a( dpos, st ), lda-1, work)
326
327 ENDIF
328*
329 IF( ttype.EQ.3 ) THEN
330 lm = ed - st + 1
331*
332 CALL zlarfy( uplo, lm, v( vpos ), 1,
333 $ dconjg( tau( taupos ) ),
334 $ a( dpos, st ), lda-1, work)
335
336 ENDIF
337*
338 IF( ttype.EQ.2 ) THEN
339 j1 = ed+1
340 j2 = min( ed+nb, n )
341 ln = ed-st+1
342 lm = j2-j1+1
343*
344 IF( lm.GT.0) THEN
345 CALL zlarfx( 'Right', lm, ln, v( vpos ),
346 $ tau( taupos ), a( dpos+nb, st ),
347 $ lda-1, work)
348*
349 IF( wantz ) THEN
350 vpos = mod( sweep-1, 2 ) * n + j1
351 taupos = mod( sweep-1, 2 ) * n + j1
352 ELSE
353 vpos = mod( sweep-1, 2 ) * n + j1
354 taupos = mod( sweep-1, 2 ) * n + j1
355 ENDIF
356*
357 v( vpos ) = one
358 DO 40 i = 1, lm-1
359 v( vpos+i ) = a( dpos+nb+i, st )
360 a( dpos+nb+i, st ) = zero
361 40 CONTINUE
362 CALL zlarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,
363 $ tau( taupos ) )
364*
365 CALL zlarfx( 'Left', lm, ln-1, v( vpos ),
366 $ dconjg( tau( taupos ) ),
367 $ a( dpos+nb-1, st+1 ), lda-1, work)
368
369 ENDIF
370 ENDIF
371 ENDIF
372*
373 RETURN
374*
375* End of ZHB2ST_KERNELS
376*
377 END
subroutine zhb2st_kernels(uplo, wantz, ttype, st, ed, sweep, n, nb, ib, a, lda, v, tau, ldvt, work)
ZHB2ST_KERNELS
subroutine zlarfg(n, alpha, x, incx, tau)
ZLARFG generates an elementary reflector (Householder matrix).
Definition zlarfg.f:104
subroutine zlarfx(side, m, n, v, tau, c, ldc, work)
ZLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the ...
Definition zlarfx.f:117
subroutine zlarfy(uplo, n, v, incv, tau, c, ldc, work)
ZLARFY
Definition zlarfy.f:108