LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
chetrd_hb2st.F
Go to the documentation of this file.
1*> \brief \b CHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric tridiagonal form T
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> Download CHETRD_HB2ST + dependencies
9*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chetrd_hb2st.f">
10*> [TGZ]</a>
11*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chetrd_hb2st.f">
12*> [ZIP]</a>
13*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chetrd_hb2st.f">
14*> [TXT]</a>
15*
16* Definition:
17* ===========
18*
19* SUBROUTINE CHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB,
20* D, E, HOUS, LHOUS, WORK, LWORK, INFO )
21*
22* #if defined(_OPENMP)
23* use omp_lib
24* #endif
25*
26* IMPLICIT NONE
27*
28* .. Scalar Arguments ..
29* CHARACTER STAGE1, UPLO, VECT
30* INTEGER N, KD, IB, LDAB, LHOUS, LWORK, INFO
31* ..
32* .. Array Arguments ..
33* REAL D( * ), E( * )
34* COMPLEX AB( LDAB, * ), HOUS( * ), WORK( * )
35* ..
36*
37*
38*> \par Purpose:
39* =============
40*>
41*> \verbatim
42*>
43*> CHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric
44*> tridiagonal form T by a unitary similarity transformation:
45*> Q**H * A * Q = T.
46*> \endverbatim
47*
48* Arguments:
49* ==========
50*
51*> \param[in] STAGE1
52*> \verbatim
53*> STAGE1 is CHARACTER*1
54*> = 'N': "No": to mention that the stage 1 of the reduction
55*> from dense to band using the chetrd_he2hb routine
56*> was not called before this routine to reproduce AB.
57*> In other term this routine is called as standalone.
58*> = 'Y': "Yes": to mention that the stage 1 of the
59*> reduction from dense to band using the chetrd_he2hb
60*> routine has been called to produce AB (e.g., AB is
61*> the output of chetrd_he2hb.
62*> \endverbatim
63*>
64*> \param[in] VECT
65*> \verbatim
66*> VECT is CHARACTER*1
67*> = 'N': No need for the Housholder representation,
68*> and thus LHOUS is of size max(1, 4*N);
69*> = 'V': the Householder representation is needed to
70*> either generate or to apply Q later on,
71*> then LHOUS is to be queried and computed.
72*> (NOT AVAILABLE IN THIS RELEASE).
73*> \endverbatim
74*>
75*> \param[in] UPLO
76*> \verbatim
77*> UPLO is CHARACTER*1
78*> = 'U': Upper triangle of A is stored;
79*> = 'L': Lower triangle of A is stored.
80*> \endverbatim
81*>
82*> \param[in] N
83*> \verbatim
84*> N is INTEGER
85*> The order of the matrix A. N >= 0.
86*> \endverbatim
87*>
88*> \param[in] KD
89*> \verbatim
90*> KD is INTEGER
91*> The number of superdiagonals of the matrix A if UPLO = 'U',
92*> or the number of subdiagonals if UPLO = 'L'. KD >= 0.
93*> \endverbatim
94*>
95*> \param[in,out] AB
96*> \verbatim
97*> AB is COMPLEX array, dimension (LDAB,N)
98*> On entry, the upper or lower triangle of the Hermitian band
99*> matrix A, stored in the first KD+1 rows of the array. The
100*> j-th column of A is stored in the j-th column of the array AB
101*> as follows:
102*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
103*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd).
104*> On exit, the diagonal elements of AB are overwritten by the
105*> diagonal elements of the tridiagonal matrix T; if KD > 0, the
106*> elements on the first superdiagonal (if UPLO = 'U') or the
107*> first subdiagonal (if UPLO = 'L') are overwritten by the
108*> off-diagonal elements of T; the rest of AB is overwritten by
109*> values generated during the reduction.
110*> \endverbatim
111*>
112*> \param[in] LDAB
113*> \verbatim
114*> LDAB is INTEGER
115*> The leading dimension of the array AB. LDAB >= KD+1.
116*> \endverbatim
117*>
118*> \param[out] D
119*> \verbatim
120*> D is REAL array, dimension (N)
121*> The diagonal elements of the tridiagonal matrix T.
122*> \endverbatim
123*>
124*> \param[out] E
125*> \verbatim
126*> E is REAL array, dimension (N-1)
127*> The off-diagonal elements of the tridiagonal matrix T:
128*> E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'.
129*> \endverbatim
130*>
131*> \param[out] HOUS
132*> \verbatim
133*> HOUS is COMPLEX array, dimension (MAX(1,LHOUS))
134*> Stores the Householder representation.
135*> \endverbatim
136*>
137*> \param[in] LHOUS
138*> \verbatim
139*> LHOUS is INTEGER
140*> The dimension of the array HOUS.
141*> If N = 0 or KD <= 1, LHOUS >= 1, else LHOUS = MAX(1, dimension).
142*>
143*> If LWORK = -1, or LHOUS = -1,
144*> then a query is assumed; the routine
145*> only calculates the optimal size of the HOUS array, returns
146*> this value as the first entry of the HOUS array, and no error
147*> message related to LHOUS is issued by XERBLA.
148*> LHOUS = MAX(1, dimension) where
149*> dimension = 4*N if VECT='N'
150*> not available now if VECT='H'
151*> \endverbatim
152*>
153*> \param[out] WORK
154*> \verbatim
155*> WORK is COMPLEX array, dimension (MAX(1,LWORK)).
156*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
157*> \endverbatim
158*>
159*> \param[in] LWORK
160*> \verbatim
161*> LWORK is INTEGER
162*> The dimension of the array WORK.
163*> If N = 0 or KD <= 1, LWORK >= 1, else LWORK = MAX(1, dimension).
164*>
165*> If LWORK = -1, or LHOUS = -1,
166*> then a workspace query is assumed; the routine
167*> only calculates the optimal size of the WORK array, returns
168*> this value as the first entry of the WORK array, and no error
169*> message related to LWORK is issued by XERBLA.
170*> LWORK = MAX(1, dimension) where
171*> dimension = (2KD+1)*N + KD*NTHREADS
172*> where KD is the blocking size of the reduction,
173*> FACTOPTNB is the blocking used by the QR or LQ
174*> algorithm, usually FACTOPTNB=128 is a good choice
175*> NTHREADS is the number of threads used when
176*> openMP compilation is enabled, otherwise =1.
177*> \endverbatim
178*>
179*> \param[out] INFO
180*> \verbatim
181*> INFO is INTEGER
182*> = 0: successful exit
183*> < 0: if INFO = -i, the i-th argument had an illegal value
184*> \endverbatim
185*
186* Authors:
187* ========
188*
189*> \author Univ. of Tennessee
190*> \author Univ. of California Berkeley
191*> \author Univ. of Colorado Denver
192*> \author NAG Ltd.
193*
194*> \ingroup hetrd_hb2st
195*
196*> \par Further Details:
197* =====================
198*>
199*> \verbatim
200*>
201*> Implemented by Azzam Haidar.
202*>
203*> All details are available on technical report, SC11, SC13 papers.
204*>
205*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
206*> Parallel reduction to condensed forms for symmetric eigenvalue problems
207*> using aggregated fine-grained and memory-aware kernels. In Proceedings
208*> of 2011 International Conference for High Performance Computing,
209*> Networking, Storage and Analysis (SC '11), New York, NY, USA,
210*> Article 8 , 11 pages.
211*> http://doi.acm.org/10.1145/2063384.2063394
212*>
213*> A. Haidar, J. Kurzak, P. Luszczek, 2013.
214*> An improved parallel singular value algorithm and its implementation
215*> for multicore hardware, In Proceedings of 2013 International Conference
216*> for High Performance Computing, Networking, Storage and Analysis (SC '13).
217*> Denver, Colorado, USA, 2013.
218*> Article 90, 12 pages.
219*> http://doi.acm.org/10.1145/2503210.2503292
220*>
221*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
222*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
223*> calculations based on fine-grained memory aware tasks.
224*> International Journal of High Performance Computing Applications.
225*> Volume 28 Issue 2, Pages 196-209, May 2014.
226*> http://hpc.sagepub.com/content/28/2/196
227*>
228*> \endverbatim
229*>
230* =====================================================================
231 SUBROUTINE chetrd_hb2st( STAGE1, VECT, UPLO, N, KD, AB, LDAB,
232 $ D, E, HOUS, LHOUS, WORK, LWORK, INFO )
233*
234*
235#if defined(_OPENMP)
236 use omp_lib
237#endif
238*
239 IMPLICIT NONE
240*
241* -- LAPACK computational routine --
242* -- LAPACK is a software package provided by Univ. of Tennessee, --
243* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
244*
245* .. Scalar Arguments ..
246 CHARACTER STAGE1, UPLO, VECT
247 INTEGER N, KD, LDAB, LHOUS, LWORK, INFO
248* ..
249* .. Array Arguments ..
250 REAL D( * ), E( * )
251 COMPLEX AB( LDAB, * ), HOUS( * ), WORK( * )
252* ..
253*
254* =====================================================================
255*
256* .. Parameters ..
257 REAL RZERO
258 COMPLEX ZERO, ONE
259 parameter( rzero = 0.0e+0,
260 $ zero = ( 0.0e+0, 0.0e+0 ),
261 $ one = ( 1.0e+0, 0.0e+0 ) )
262* ..
263* .. Local Scalars ..
264 LOGICAL LQUERY, WANTQ, UPPER, AFTERS1
265 INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST,
266 $ ed, stind, edind, blklastind, colpt, thed,
267 $ stepercol, grsiz, thgrsiz, thgrnb, thgrid,
268 $ nbtiles, ttype, tid, nthreads,
269 $ abdpos, abofdpos, dpos, ofdpos, awpos,
270 $ inda, indw, apos, sizea, lda, indv, indtau,
271 $ sicev, sizetau, ldv, lhmin, lwmin
272 REAL ABSTMP
273 COMPLEX TMP
274* ..
275* .. External Subroutines ..
276 EXTERNAL chb2st_kernels, clacpy,
277 $ claset, xerbla
278* ..
279* .. Intrinsic Functions ..
280 INTRINSIC min, max, ceiling, real
281* ..
282* .. External Functions ..
283 LOGICAL LSAME
284 INTEGER ILAENV2STAGE
285 REAL SROUNDUP_LWORK
286 EXTERNAL lsame, ilaenv2stage, sroundup_lwork
287* ..
288* .. Executable Statements ..
289*
290* Determine the minimal workspace size required.
291* Test the input parameters
292*
293 info = 0
294 afters1 = lsame( stage1, 'Y' )
295 wantq = lsame( vect, 'V' )
296 upper = lsame( uplo, 'U' )
297 lquery = ( lwork.EQ.-1 ) .OR. ( lhous.EQ.-1 )
298*
299* Determine the block size, the workspace size and the hous size.
300*
301 ib = ilaenv2stage( 2, 'CHETRD_HB2ST', vect, n, kd,
302 $ -1, -1 )
303 IF( n.EQ.0 .OR. kd.LE.1 ) THEN
304 lhmin = 1
305 lwmin = 1
306 ELSE
307 lhmin = ilaenv2stage( 3, 'CHETRD_HB2ST', vect, n, kd, ib,
308 $ -1 )
309 lwmin = ilaenv2stage( 4, 'CHETRD_HB2ST', vect, n, kd, ib,
310 $ -1 )
311 END IF
312*
313 IF( .NOT.afters1 .AND. .NOT.lsame( stage1, 'N' ) ) THEN
314 info = -1
315 ELSE IF( .NOT.lsame( vect, 'N' ) ) THEN
316 info = -2
317 ELSE IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
318 info = -3
319 ELSE IF( n.LT.0 ) THEN
320 info = -4
321 ELSE IF( kd.LT.0 ) THEN
322 info = -5
323 ELSE IF( ldab.LT.(kd+1) ) THEN
324 info = -7
325 ELSE IF( lhous.LT.lhmin .AND. .NOT.lquery ) THEN
326 info = -11
327 ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
328 info = -13
329 END IF
330*
331 IF( info.EQ.0 ) THEN
332 hous( 1 ) = sroundup_lwork( lhmin )
333 work( 1 ) = sroundup_lwork( lwmin )
334 END IF
335*
336 IF( info.NE.0 ) THEN
337 CALL xerbla( 'CHETRD_HB2ST', -info )
338 RETURN
339 ELSE IF( lquery ) THEN
340 RETURN
341 END IF
342*
343* Quick return if possible
344*
345 IF( n.EQ.0 ) THEN
346 hous( 1 ) = 1
347 work( 1 ) = 1
348 RETURN
349 END IF
350*
351* Determine pointer position
352*
353 ldv = kd + ib
354 sizetau = 2 * n
355 sicev = 2 * n
356 indtau = 1
357 indv = indtau + sizetau
358 lda = 2 * kd + 1
359 sizea = lda * n
360 inda = 1
361 indw = inda + sizea
362 nthreads = 1
363 tid = 0
364*
365 IF( upper ) THEN
366 apos = inda + kd
367 awpos = inda
368 dpos = apos + kd
369 ofdpos = dpos - 1
370 abdpos = kd + 1
371 abofdpos = kd
372 ELSE
373 apos = inda
374 awpos = inda + kd + 1
375 dpos = apos
376 ofdpos = dpos + 1
377 abdpos = 1
378 abofdpos = 2
379
380 ENDIF
381*
382* Case KD=0:
383* The matrix is diagonal. We just copy it (convert to "real" for
384* complex because D is double and the imaginary part should be 0)
385* and store it in D. A sequential code here is better or
386* in a parallel environment it might need two cores for D and E
387*
388 IF( kd.EQ.0 ) THEN
389 DO 30 i = 1, n
390 d( i ) = real( ab( abdpos, i ) )
391 30 CONTINUE
392 DO 40 i = 1, n-1
393 e( i ) = rzero
394 40 CONTINUE
395*
396 hous( 1 ) = 1
397 work( 1 ) = 1
398 RETURN
399 END IF
400*
401* Case KD=1:
402* The matrix is already Tridiagonal. We have to make diagonal
403* and offdiagonal elements real, and store them in D and E.
404* For that, for real precision just copy the diag and offdiag
405* to D and E while for the COMPLEX case the bulge chasing is
406* performed to convert the hermetian tridiagonal to symmetric
407* tridiagonal. A simpler conversion formula might be used, but then
408* updating the Q matrix will be required and based if Q is generated
409* or not this might complicate the story.
410*
411 IF( kd.EQ.1 ) THEN
412 DO 50 i = 1, n
413 d( i ) = real( ab( abdpos, i ) )
414 50 CONTINUE
415*
416* make off-diagonal elements real and copy them to E
417*
418 IF( upper ) THEN
419 DO 60 i = 1, n - 1
420 tmp = ab( abofdpos, i+1 )
421 abstmp = abs( tmp )
422 ab( abofdpos, i+1 ) = abstmp
423 e( i ) = abstmp
424 IF( abstmp.NE.rzero ) THEN
425 tmp = tmp / abstmp
426 ELSE
427 tmp = one
428 END IF
429 IF( i.LT.n-1 )
430 $ ab( abofdpos, i+2 ) = ab( abofdpos, i+2 )*tmp
431C IF( WANTZ ) THEN
432C CALL CSCAL( N, CONJG( TMP ), Q( 1, I+1 ), 1 )
433C END IF
434 60 CONTINUE
435 ELSE
436 DO 70 i = 1, n - 1
437 tmp = ab( abofdpos, i )
438 abstmp = abs( tmp )
439 ab( abofdpos, i ) = abstmp
440 e( i ) = abstmp
441 IF( abstmp.NE.rzero ) THEN
442 tmp = tmp / abstmp
443 ELSE
444 tmp = one
445 END IF
446 IF( i.LT.n-1 )
447 $ ab( abofdpos, i+1 ) = ab( abofdpos, i+1 )*tmp
448C IF( WANTQ ) THEN
449C CALL CSCAL( N, TMP, Q( 1, I+1 ), 1 )
450C END IF
451 70 CONTINUE
452 ENDIF
453*
454 hous( 1 ) = 1
455 work( 1 ) = 1
456 RETURN
457 END IF
458*
459* Main code start here.
460* Reduce the hermitian band of A to a tridiagonal matrix.
461*
462 thgrsiz = n
463 grsiz = 1
464 shift = 3
465 nbtiles = ceiling( real(n)/real(kd) )
466 stepercol = ceiling( real(shift)/real(grsiz) )
467 thgrnb = ceiling( real(n-1)/real(thgrsiz) )
468*
469 CALL clacpy( "A", kd+1, n, ab, ldab, work( apos ), lda )
470 CALL claset( "A", kd, n, zero, zero, work( awpos ), lda )
471*
472*
473* openMP parallelisation start here
474*
475#if defined(_OPENMP)
476!$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND )
477!$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID )
478!$OMP$ PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND )
479!$OMP$ SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK)
480!$OMP$ SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA )
481!$OMP$ SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT )
482!$OMP MASTER
483#endif
484*
485* main bulge chasing loop
486*
487 DO 100 thgrid = 1, thgrnb
488 stt = (thgrid-1)*thgrsiz+1
489 thed = min( (stt + thgrsiz -1), (n-1))
490 DO 110 i = stt, n-1
491 ed = min( i, thed )
492 IF( stt.GT.ed ) EXIT
493 DO 120 m = 1, stepercol
494 st = stt
495 DO 130 sweepid = st, ed
496 DO 140 k = 1, grsiz
497 myid = (i-sweepid)*(stepercol*grsiz)
498 $ + (m-1)*grsiz + k
499 IF ( myid.EQ.1 ) THEN
500 ttype = 1
501 ELSE
502 ttype = mod( myid, 2 ) + 2
503 ENDIF
504
505 IF( ttype.EQ.2 ) THEN
506 colpt = (myid/2)*kd + sweepid
507 stind = colpt-kd+1
508 edind = min(colpt,n)
509 blklastind = colpt
510 ELSE
511 colpt = ((myid+1)/2)*kd + sweepid
512 stind = colpt-kd+1
513 edind = min(colpt,n)
514 IF( ( stind.GE.edind-1 ).AND.
515 $ ( edind.EQ.n ) ) THEN
516 blklastind = n
517 ELSE
518 blklastind = 0
519 ENDIF
520 ENDIF
521*
522* Call the kernel
523*
524#if defined(_OPENMP) && _OPENMP >= 201307
525 IF( ttype.NE.1 ) THEN
526!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
527!$OMP$ DEPEND(in:WORK(MYID-1))
528!$OMP$ DEPEND(out:WORK(MYID))
529 tid = omp_get_thread_num()
530 CALL chb2st_kernels(
531 $ uplo, wantq, ttype,
532 $ stind, edind, sweepid, n, kd, ib,
533 $ work( inda ), lda,
534 $ hous( indv ), hous( indtau ), ldv,
535 $ work( indw + tid*kd ) )
536!$OMP END TASK
537 ELSE
538!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
539!$OMP$ DEPEND(out:WORK(MYID))
540 tid = omp_get_thread_num()
541 CALL chb2st_kernels(
542 $ uplo, wantq, ttype,
543 $ stind, edind, sweepid, n, kd, ib,
544 $ work( inda ), lda,
545 $ hous( indv ), hous( indtau ), ldv,
546 $ work( indw + tid*kd ) )
547!$OMP END TASK
548 ENDIF
549#else
550 CALL chb2st_kernels(
551 $ uplo, wantq, ttype,
552 $ stind, edind, sweepid, n, kd, ib,
553 $ work( inda ), lda,
554 $ hous( indv ), hous( indtau ), ldv,
555 $ work( indw ) )
556#endif
557 IF ( blklastind.GE.(n-1) ) THEN
558 stt = stt + 1
559 EXIT
560 ENDIF
561 140 CONTINUE
562 130 CONTINUE
563 120 CONTINUE
564 110 CONTINUE
565 100 CONTINUE
566*
567#if defined(_OPENMP)
568!$OMP END MASTER
569!$OMP END PARALLEL
570#endif
571*
572* Copy the diagonal from A to D. Note that D is REAL thus only
573* the Real part is needed, the imaginary part should be zero.
574*
575 DO 150 i = 1, n
576 d( i ) = real( work( dpos+(i-1)*lda ) )
577 150 CONTINUE
578*
579* Copy the off diagonal from A to E. Note that E is REAL thus only
580* the Real part is needed, the imaginary part should be zero.
581*
582 IF( upper ) THEN
583 DO 160 i = 1, n-1
584 e( i ) = real( work( ofdpos+i*lda ) )
585 160 CONTINUE
586 ELSE
587 DO 170 i = 1, n-1
588 e( i ) = real( work( ofdpos+(i-1)*lda ) )
589 170 CONTINUE
590 ENDIF
591*
592 work( 1 ) = sroundup_lwork( lwmin )
593 RETURN
594*
595* End of CHETRD_HB2ST
596*
597 END
598
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine chb2st_kernels(uplo, wantz, ttype, st, ed, sweep, n, nb, ib, a, lda, v, tau, ldvt, work)
CHB2ST_KERNELS
subroutine chetrd_hb2st(stage1, vect, uplo, n, kd, ab, ldab, d, e, hous, lhous, work, lwork, info)
CHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric tridiagonal form T
subroutine clacpy(uplo, m, n, a, lda, b, ldb)
CLACPY copies all or part of one two-dimensional array to another.
Definition clacpy.f:101
subroutine claset(uplo, m, n, alpha, beta, a, lda)
CLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition claset.f:104