LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
zhb2st_kernels.f
Go to the documentation of this file.
1*> \brief \b ZHB2ST_KERNELS
2*
3* @precisions fortran z -> s d c
4*
5* =========== DOCUMENTATION ===========
6*
7* Online html documentation available at
8* http://www.netlib.org/lapack/explore-html/
9*
10*> \htmlonly
11*> Download ZHB2ST_KERNELS + dependencies
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhb2st_kernels.f">
13*> [TGZ]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhb2st_kernels.f">
15*> [ZIP]</a>
16*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhb2st_kernels.f">
17*> [TXT]</a>
18*> \endhtmlonly
19*
20* Definition:
21* ===========
22*
23* SUBROUTINE ZHB2ST_KERNELS( UPLO, WANTZ, TTYPE,
24* ST, ED, SWEEP, N, NB, IB,
25* A, LDA, V, TAU, LDVT, WORK)
26*
27* IMPLICIT NONE
28*
29* .. Scalar Arguments ..
30* CHARACTER UPLO
31* LOGICAL WANTZ
32* INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
33* ..
34* .. Array Arguments ..
35* COMPLEX*16 A( LDA, * ), V( * ),
36* TAU( * ), WORK( * )
37*
38*> \par Purpose:
39* =============
40*>
41*> \verbatim
42*>
43*> ZHB2ST_KERNELS is an internal routine used by the ZHETRD_HB2ST
44*> subroutine.
45*> \endverbatim
46*
47* Arguments:
48* ==========
49*
50*> \param[in] UPLO
51*> \verbatim
52*> UPLO is CHARACTER*1
53*> \endverbatim
54*>
55*> \param[in] WANTZ
56*> \verbatim
57*> WANTZ is LOGICAL which indicate if Eigenvalue are requested or both
58*> Eigenvalue/Eigenvectors.
59*> \endverbatim
60*>
61*> \param[in] TTYPE
62*> \verbatim
63*> TTYPE is INTEGER
64*> \endverbatim
65*>
66*> \param[in] ST
67*> \verbatim
68*> ST is INTEGER
69*> internal parameter for indices.
70*> \endverbatim
71*>
72*> \param[in] ED
73*> \verbatim
74*> ED is INTEGER
75*> internal parameter for indices.
76*> \endverbatim
77*>
78*> \param[in] SWEEP
79*> \verbatim
80*> SWEEP is INTEGER
81*> internal parameter for indices.
82*> \endverbatim
83*>
84*> \param[in] N
85*> \verbatim
86*> N is INTEGER. The order of the matrix A.
87*> \endverbatim
88*>
89*> \param[in] NB
90*> \verbatim
91*> NB is INTEGER. The size of the band.
92*> \endverbatim
93*>
94*> \param[in] IB
95*> \verbatim
96*> IB is INTEGER.
97*> \endverbatim
98*>
99*> \param[in, out] A
100*> \verbatim
101*> A is COMPLEX*16 array. A pointer to the matrix A.
102*> \endverbatim
103*>
104*> \param[in] LDA
105*> \verbatim
106*> LDA is INTEGER. The leading dimension of the matrix A.
107*> \endverbatim
108*>
109*> \param[out] V
110*> \verbatim
111*> V is COMPLEX*16 array, dimension 2*n if eigenvalues only are
112*> requested or to be queried for vectors.
113*> \endverbatim
114*>
115*> \param[out] TAU
116*> \verbatim
117*> TAU is COMPLEX*16 array, dimension (2*n).
118*> The scalar factors of the Householder reflectors are stored
119*> in this array.
120*> \endverbatim
121*>
122*> \param[in] LDVT
123*> \verbatim
124*> LDVT is INTEGER.
125*> \endverbatim
126*>
127*> \param[out] WORK
128*> \verbatim
129*> WORK is COMPLEX*16 array. Workspace of size nb.
130*> \endverbatim
131*>
132*> \par Further Details:
133* =====================
134*>
135*> \verbatim
136*>
137*> Implemented by Azzam Haidar.
138*>
139*> All details are available on technical report, SC11, SC13 papers.
140*>
141*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
142*> Parallel reduction to condensed forms for symmetric eigenvalue problems
143*> using aggregated fine-grained and memory-aware kernels. In Proceedings
144*> of 2011 International Conference for High Performance Computing,
145*> Networking, Storage and Analysis (SC '11), New York, NY, USA,
146*> Article 8 , 11 pages.
147*> http://doi.acm.org/10.1145/2063384.2063394
148*>
149*> A. Haidar, J. Kurzak, P. Luszczek, 2013.
150*> An improved parallel singular value algorithm and its implementation
151*> for multicore hardware, In Proceedings of 2013 International Conference
152*> for High Performance Computing, Networking, Storage and Analysis (SC '13).
153*> Denver, Colorado, USA, 2013.
154*> Article 90, 12 pages.
155*> http://doi.acm.org/10.1145/2503210.2503292
156*>
157*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
158*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
159*> calculations based on fine-grained memory aware tasks.
160*> International Journal of High Performance Computing Applications.
161*> Volume 28 Issue 2, Pages 196-209, May 2014.
162*> http://hpc.sagepub.com/content/28/2/196
163*>
164*> \endverbatim
165*>
166*> \ingroup hb2st_kernels
167*>
168* =====================================================================
169 SUBROUTINE zhb2st_kernels( UPLO, WANTZ, TTYPE,
170 $ ST, ED, SWEEP, N, NB, IB,
171 $ A, LDA, V, TAU, LDVT, WORK)
172*
173 IMPLICIT NONE
174*
175* -- LAPACK computational routine --
176* -- LAPACK is a software package provided by Univ. of Tennessee, --
177* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
178*
179* .. Scalar Arguments ..
180 CHARACTER UPLO
181 LOGICAL WANTZ
182 INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
183* ..
184* .. Array Arguments ..
185 COMPLEX*16 A( LDA, * ), V( * ),
186 $ TAU( * ), WORK( * )
187* ..
188*
189* =====================================================================
190*
191* .. Parameters ..
192 COMPLEX*16 ZERO, ONE
193 PARAMETER ( ZERO = ( 0.0d+0, 0.0d+0 ),
194 $ one = ( 1.0d+0, 0.0d+0 ) )
195* ..
196* .. Local Scalars ..
197 LOGICAL UPPER
198 INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
199 $ dpos, ofdpos, ajeter
200 COMPLEX*16 CTMP
201* ..
202* .. External Subroutines ..
203 EXTERNAL zlarfg, zlarfx, zlarfy
204* ..
205* .. Intrinsic Functions ..
206 INTRINSIC dconjg, mod
207* .. External Functions ..
208 LOGICAL LSAME
209 EXTERNAL LSAME
210* ..
211* ..
212* .. Executable Statements ..
213*
214 ajeter = ib + ldvt
215 upper = lsame( uplo, 'U' )
216
217 IF( upper ) THEN
218 dpos = 2 * nb + 1
219 ofdpos = 2 * nb
220 ELSE
221 dpos = 1
222 ofdpos = 2
223 ENDIF
224
225*
226* Upper case
227*
228 IF( upper ) THEN
229*
230 IF( wantz ) THEN
231 vpos = mod( sweep-1, 2 ) * n + st
232 taupos = mod( sweep-1, 2 ) * n + st
233 ELSE
234 vpos = mod( sweep-1, 2 ) * n + st
235 taupos = mod( sweep-1, 2 ) * n + st
236 ENDIF
237*
238 IF( ttype.EQ.1 ) THEN
239 lm = ed - st + 1
240*
241 v( vpos ) = one
242 DO 10 i = 1, lm-1
243 v( vpos+i ) = dconjg( a( ofdpos-i, st+i ) )
244 a( ofdpos-i, st+i ) = zero
245 10 CONTINUE
246 ctmp = dconjg( a( ofdpos, st ) )
247 CALL zlarfg( lm, ctmp, v( vpos+1 ), 1,
248 $ tau( taupos ) )
249 a( ofdpos, st ) = ctmp
250*
251 lm = ed - st + 1
252 CALL zlarfy( uplo, lm, v( vpos ), 1,
253 $ dconjg( tau( taupos ) ),
254 $ a( dpos, st ), lda-1, work)
255 ENDIF
256*
257 IF( ttype.EQ.3 ) THEN
258*
259 lm = ed - st + 1
260 CALL zlarfy( uplo, lm, v( vpos ), 1,
261 $ dconjg( tau( taupos ) ),
262 $ a( dpos, st ), lda-1, work)
263 ENDIF
264*
265 IF( ttype.EQ.2 ) THEN
266 j1 = ed+1
267 j2 = min( ed+nb, n )
268 ln = ed-st+1
269 lm = j2-j1+1
270 IF( lm.GT.0) THEN
271 CALL zlarfx( 'Left', ln, lm, v( vpos ),
272 $ dconjg( tau( taupos ) ),
273 $ a( dpos-nb, j1 ), lda-1, work)
274*
275 IF( wantz ) THEN
276 vpos = mod( sweep-1, 2 ) * n + j1
277 taupos = mod( sweep-1, 2 ) * n + j1
278 ELSE
279 vpos = mod( sweep-1, 2 ) * n + j1
280 taupos = mod( sweep-1, 2 ) * n + j1
281 ENDIF
282*
283 v( vpos ) = one
284 DO 30 i = 1, lm-1
285 v( vpos+i ) =
286 $ dconjg( a( dpos-nb-i, j1+i ) )
287 a( dpos-nb-i, j1+i ) = zero
288 30 CONTINUE
289 ctmp = dconjg( a( dpos-nb, j1 ) )
290 CALL zlarfg( lm, ctmp, v( vpos+1 ), 1, tau( taupos ) )
291 a( dpos-nb, j1 ) = ctmp
292*
293 CALL zlarfx( 'Right', ln-1, lm, v( vpos ),
294 $ tau( taupos ),
295 $ a( dpos-nb+1, j1 ), lda-1, work)
296 ENDIF
297 ENDIF
298*
299* Lower case
300*
301 ELSE
302*
303 IF( wantz ) THEN
304 vpos = mod( sweep-1, 2 ) * n + st
305 taupos = mod( sweep-1, 2 ) * n + st
306 ELSE
307 vpos = mod( sweep-1, 2 ) * n + st
308 taupos = mod( sweep-1, 2 ) * n + st
309 ENDIF
310*
311 IF( ttype.EQ.1 ) THEN
312 lm = ed - st + 1
313*
314 v( vpos ) = one
315 DO 20 i = 1, lm-1
316 v( vpos+i ) = a( ofdpos+i, st-1 )
317 a( ofdpos+i, st-1 ) = zero
318 20 CONTINUE
319 CALL zlarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,
320 $ tau( taupos ) )
321*
322 lm = ed - st + 1
323*
324 CALL zlarfy( uplo, lm, v( vpos ), 1,
325 $ dconjg( tau( taupos ) ),
326 $ a( dpos, st ), lda-1, work)
327
328 ENDIF
329*
330 IF( ttype.EQ.3 ) THEN
331 lm = ed - st + 1
332*
333 CALL zlarfy( uplo, lm, v( vpos ), 1,
334 $ dconjg( tau( taupos ) ),
335 $ a( dpos, st ), lda-1, work)
336
337 ENDIF
338*
339 IF( ttype.EQ.2 ) THEN
340 j1 = ed+1
341 j2 = min( ed+nb, n )
342 ln = ed-st+1
343 lm = j2-j1+1
344*
345 IF( lm.GT.0) THEN
346 CALL zlarfx( 'Right', lm, ln, v( vpos ),
347 $ tau( taupos ), a( dpos+nb, st ),
348 $ lda-1, work)
349*
350 IF( wantz ) THEN
351 vpos = mod( sweep-1, 2 ) * n + j1
352 taupos = mod( sweep-1, 2 ) * n + j1
353 ELSE
354 vpos = mod( sweep-1, 2 ) * n + j1
355 taupos = mod( sweep-1, 2 ) * n + j1
356 ENDIF
357*
358 v( vpos ) = one
359 DO 40 i = 1, lm-1
360 v( vpos+i ) = a( dpos+nb+i, st )
361 a( dpos+nb+i, st ) = zero
362 40 CONTINUE
363 CALL zlarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,
364 $ tau( taupos ) )
365*
366 CALL zlarfx( 'Left', lm, ln-1, v( vpos ),
367 $ dconjg( tau( taupos ) ),
368 $ a( dpos+nb-1, st+1 ), lda-1, work)
369
370 ENDIF
371 ENDIF
372 ENDIF
373*
374 RETURN
375*
376* End of ZHB2ST_KERNELS
377*
378 END
subroutine zhb2st_kernels(uplo, wantz, ttype, st, ed, sweep, n, nb, ib, a, lda, v, tau, ldvt, work)
ZHB2ST_KERNELS
subroutine zlarfg(n, alpha, x, incx, tau)
ZLARFG generates an elementary reflector (Householder matrix).
Definition zlarfg.f:106
subroutine zlarfx(side, m, n, v, tau, c, ldc, work)
ZLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the ...
Definition zlarfx.f:119
subroutine zlarfy(uplo, n, v, incv, tau, c, ldc, work)
ZLARFY
Definition zlarfy.f:108