LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
ssytrd_sb2st.F
Go to the documentation of this file.
1*> \brief \b SSYTRD_SB2ST reduces a real symmetric band matrix A to real symmetric tridiagonal form T
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download SSYTRD_SB2ST + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssytrd_sb2st.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssytrd_sb2st.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssytrd_sb2st.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18* Definition:
19* ===========
20*
21* SUBROUTINE SSYTRD_SB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB,
22* D, E, HOUS, LHOUS, WORK, LWORK, INFO )
23*
24* #if defined(_OPENMP)
25* use omp_lib
26* #endif
27*
28* IMPLICIT NONE
29*
30* .. Scalar Arguments ..
31* CHARACTER STAGE1, UPLO, VECT
32* INTEGER N, KD, IB, LDAB, LHOUS, LWORK, INFO
33* ..
34* .. Array Arguments ..
35* REAL D( * ), E( * )
36* REAL AB( LDAB, * ), HOUS( * ), WORK( * )
37* ..
38*
39*
40*> \par Purpose:
41* =============
42*>
43*> \verbatim
44*>
45*> SSYTRD_SB2ST reduces a real symmetric band matrix A to real symmetric
46*> tridiagonal form T by a orthogonal similarity transformation:
47*> Q**T * A * Q = T.
48*> \endverbatim
49*
50* Arguments:
51* ==========
52*
53*> \param[in] STAGE1
54*> \verbatim
55*> STAGE1 is CHARACTER*1
56*> = 'N': "No": to mention that the stage 1 of the reduction
57*> from dense to band using the ssytrd_sy2sb routine
58*> was not called before this routine to reproduce AB.
59*> In other term this routine is called as standalone.
60*> = 'Y': "Yes": to mention that the stage 1 of the
61*> reduction from dense to band using the ssytrd_sy2sb
62*> routine has been called to produce AB (e.g., AB is
63*> the output of ssytrd_sy2sb.
64*> \endverbatim
65*>
66*> \param[in] VECT
67*> \verbatim
68*> VECT is CHARACTER*1
69*> = 'N': No need for the Housholder representation,
70*> and thus LHOUS is of size max(1, 4*N);
71*> = 'V': the Householder representation is needed to
72*> either generate or to apply Q later on,
73*> then LHOUS is to be queried and computed.
74*> (NOT AVAILABLE IN THIS RELEASE).
75*> \endverbatim
76*>
77*> \param[in] UPLO
78*> \verbatim
79*> UPLO is CHARACTER*1
80*> = 'U': Upper triangle of A is stored;
81*> = 'L': Lower triangle of A is stored.
82*> \endverbatim
83*>
84*> \param[in] N
85*> \verbatim
86*> N is INTEGER
87*> The order of the matrix A. N >= 0.
88*> \endverbatim
89*>
90*> \param[in] KD
91*> \verbatim
92*> KD is INTEGER
93*> The number of superdiagonals of the matrix A if UPLO = 'U',
94*> or the number of subdiagonals if UPLO = 'L'. KD >= 0.
95*> \endverbatim
96*>
97*> \param[in,out] AB
98*> \verbatim
99*> AB is REAL array, dimension (LDAB,N)
100*> On entry, the upper or lower triangle of the symmetric band
101*> matrix A, stored in the first KD+1 rows of the array. The
102*> j-th column of A is stored in the j-th column of the array AB
103*> as follows:
104*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
105*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd).
106*> On exit, the diagonal elements of AB are overwritten by the
107*> diagonal elements of the tridiagonal matrix T; if KD > 0, the
108*> elements on the first superdiagonal (if UPLO = 'U') or the
109*> first subdiagonal (if UPLO = 'L') are overwritten by the
110*> off-diagonal elements of T; the rest of AB is overwritten by
111*> values generated during the reduction.
112*> \endverbatim
113*>
114*> \param[in] LDAB
115*> \verbatim
116*> LDAB is INTEGER
117*> The leading dimension of the array AB. LDAB >= KD+1.
118*> \endverbatim
119*>
120*> \param[out] D
121*> \verbatim
122*> D is REAL array, dimension (N)
123*> The diagonal elements of the tridiagonal matrix T.
124*> \endverbatim
125*>
126*> \param[out] E
127*> \verbatim
128*> E is REAL array, dimension (N-1)
129*> The off-diagonal elements of the tridiagonal matrix T:
130*> E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'.
131*> \endverbatim
132*>
133*> \param[out] HOUS
134*> \verbatim
135*> HOUS is REAL array, dimension LHOUS, that
136*> store the Householder representation.
137*> \endverbatim
138*>
139*> \param[in] LHOUS
140*> \verbatim
141*> LHOUS is INTEGER
142*> The dimension of the array HOUS. LHOUS = MAX(1, dimension)
143*> If LWORK = -1, or LHOUS=-1,
144*> then a query is assumed; the routine
145*> only calculates the optimal size of the HOUS array, returns
146*> this value as the first entry of the HOUS array, and no error
147*> message related to LHOUS is issued by XERBLA.
148*> LHOUS = MAX(1, dimension) where
149*> dimension = 4*N if VECT='N'
150*> not available now if VECT='H'
151*> \endverbatim
152*>
153*> \param[out] WORK
154*> \verbatim
155*> WORK is REAL array, dimension LWORK.
156*> \endverbatim
157*>
158*> \param[in] LWORK
159*> \verbatim
160*> LWORK is INTEGER
161*> The dimension of the array WORK. LWORK = MAX(1, dimension)
162*> If LWORK = -1, or LHOUS=-1,
163*> then a workspace query is assumed; the routine
164*> only calculates the optimal size of the WORK array, returns
165*> this value as the first entry of the WORK array, and no error
166*> message related to LWORK is issued by XERBLA.
167*> LWORK = MAX(1, dimension) where
168*> dimension = (2KD+1)*N + KD*NTHREADS
169*> where KD is the blocking size of the reduction,
170*> FACTOPTNB is the blocking used by the QR or LQ
171*> algorithm, usually FACTOPTNB=128 is a good choice
172*> NTHREADS is the number of threads used when
173*> openMP compilation is enabled, otherwise =1.
174*> \endverbatim
175*>
176*> \param[out] INFO
177*> \verbatim
178*> INFO is INTEGER
179*> = 0: successful exit
180*> < 0: if INFO = -i, the i-th argument had an illegal value
181*> \endverbatim
182*
183* Authors:
184* ========
185*
186*> \author Univ. of Tennessee
187*> \author Univ. of California Berkeley
188*> \author Univ. of Colorado Denver
189*> \author NAG Ltd.
190*
191*> \ingroup hetrd_hb2st
192*
193*> \par Further Details:
194* =====================
195*>
196*> \verbatim
197*>
198*> Implemented by Azzam Haidar.
199*>
200*> All details are available on technical report, SC11, SC13 papers.
201*>
202*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
203*> Parallel reduction to condensed forms for symmetric eigenvalue problems
204*> using aggregated fine-grained and memory-aware kernels. In Proceedings
205*> of 2011 International Conference for High Performance Computing,
206*> Networking, Storage and Analysis (SC '11), New York, NY, USA,
207*> Article 8 , 11 pages.
208*> http://doi.acm.org/10.1145/2063384.2063394
209*>
210*> A. Haidar, J. Kurzak, P. Luszczek, 2013.
211*> An improved parallel singular value algorithm and its implementation
212*> for multicore hardware, In Proceedings of 2013 International Conference
213*> for High Performance Computing, Networking, Storage and Analysis (SC '13).
214*> Denver, Colorado, USA, 2013.
215*> Article 90, 12 pages.
216*> http://doi.acm.org/10.1145/2503210.2503292
217*>
218*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
219*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
220*> calculations based on fine-grained memory aware tasks.
221*> International Journal of High Performance Computing Applications.
222*> Volume 28 Issue 2, Pages 196-209, May 2014.
223*> http://hpc.sagepub.com/content/28/2/196
224*>
225*> \endverbatim
226*>
227* =====================================================================
228 SUBROUTINE ssytrd_sb2st( STAGE1, VECT, UPLO, N, KD, AB, LDAB,
229 $ D, E, HOUS, LHOUS, WORK, LWORK, INFO )
230*
231#if defined(_OPENMP)
232 use omp_lib
233#endif
234*
235 IMPLICIT NONE
236*
237* -- LAPACK computational routine --
238* -- LAPACK is a software package provided by Univ. of Tennessee, --
239* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
240*
241* .. Scalar Arguments ..
242 CHARACTER STAGE1, UPLO, VECT
243 INTEGER N, KD, LDAB, LHOUS, LWORK, INFO
244* ..
245* .. Array Arguments ..
246 REAL D( * ), E( * )
247 REAL AB( LDAB, * ), HOUS( * ), WORK( * )
248* ..
249*
250* =====================================================================
251*
252* .. Parameters ..
253 REAL RZERO
254 REAL ZERO, ONE
255 parameter( rzero = 0.0e+0,
256 $ zero = 0.0e+0,
257 $ one = 1.0e+0 )
258* ..
259* .. Local Scalars ..
260 LOGICAL LQUERY, WANTQ, UPPER, AFTERS1
261 INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST,
262 $ ed, stind, edind, blklastind, colpt, thed,
263 $ stepercol, grsiz, thgrsiz, thgrnb, thgrid,
264 $ nbtiles, ttype, tid, nthreads, debug,
265 $ abdpos, abofdpos, dpos, ofdpos, awpos,
266 $ inda, indw, apos, sizea, lda, indv, indtau,
267 $ sisev, sizetau, ldv, lhmin, lwmin
268* ..
269* .. External Subroutines ..
271* ..
272* .. Intrinsic Functions ..
273 INTRINSIC min, max, ceiling, real
274* ..
275* .. External Functions ..
276 LOGICAL LSAME
277 INTEGER ILAENV2STAGE
278 REAL SROUNDUP_LWORK
279 EXTERNAL lsame, ilaenv2stage, sroundup_lwork
280* ..
281* .. Executable Statements ..
282*
283* Determine the minimal workspace size required.
284* Test the input parameters
285*
286 debug = 0
287 info = 0
288 afters1 = lsame( stage1, 'Y' )
289 wantq = lsame( vect, 'V' )
290 upper = lsame( uplo, 'U' )
291 lquery = ( lwork.EQ.-1 ) .OR. ( lhous.EQ.-1 )
292*
293* Determine the block size, the workspace size and the hous size.
294*
295 ib = ilaenv2stage( 2, 'SSYTRD_SB2ST', vect, n, kd, -1, -1 )
296 lhmin = ilaenv2stage( 3, 'SSYTRD_SB2ST', vect, n, kd, ib, -1 )
297 lwmin = ilaenv2stage( 4, 'SSYTRD_SB2ST', vect, n, kd, ib, -1 )
298*
299 IF( .NOT.afters1 .AND. .NOT.lsame( stage1, 'N' ) ) THEN
300 info = -1
301 ELSE IF( .NOT.lsame( vect, 'N' ) ) THEN
302 info = -2
303 ELSE IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
304 info = -3
305 ELSE IF( n.LT.0 ) THEN
306 info = -4
307 ELSE IF( kd.LT.0 ) THEN
308 info = -5
309 ELSE IF( ldab.LT.(kd+1) ) THEN
310 info = -7
311 ELSE IF( lhous.LT.lhmin .AND. .NOT.lquery ) THEN
312 info = -11
313 ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
314 info = -13
315 END IF
316*
317 IF( info.EQ.0 ) THEN
318 hous( 1 ) = lhmin
319 work( 1 ) = sroundup_lwork(lwmin)
320 END IF
321*
322 IF( info.NE.0 ) THEN
323 CALL xerbla( 'SSYTRD_SB2ST', -info )
324 RETURN
325 ELSE IF( lquery ) THEN
326 RETURN
327 END IF
328*
329* Quick return if possible
330*
331 IF( n.EQ.0 ) THEN
332 hous( 1 ) = 1
333 work( 1 ) = 1
334 RETURN
335 END IF
336*
337* Determine pointer position
338*
339 ldv = kd + ib
340 sizetau = 2 * n
341 sisev = 2 * n
342 indtau = 1
343 indv = indtau + sizetau
344 lda = 2 * kd + 1
345 sizea = lda * n
346 inda = 1
347 indw = inda + sizea
348 nthreads = 1
349 tid = 0
350*
351 IF( upper ) THEN
352 apos = inda + kd
353 awpos = inda
354 dpos = apos + kd
355 ofdpos = dpos - 1
356 abdpos = kd + 1
357 abofdpos = kd
358 ELSE
359 apos = inda
360 awpos = inda + kd + 1
361 dpos = apos
362 ofdpos = dpos + 1
363 abdpos = 1
364 abofdpos = 2
365
366 ENDIF
367*
368* Case KD=0:
369* The matrix is diagonal. We just copy it (convert to "real" for
370* real because D is double and the imaginary part should be 0)
371* and store it in D. A sequential code here is better or
372* in a parallel environment it might need two cores for D and E
373*
374 IF( kd.EQ.0 ) THEN
375 DO 30 i = 1, n
376 d( i ) = ( ab( abdpos, i ) )
377 30 CONTINUE
378 DO 40 i = 1, n-1
379 e( i ) = rzero
380 40 CONTINUE
381*
382 hous( 1 ) = 1
383 work( 1 ) = 1
384 RETURN
385 END IF
386*
387* Case KD=1:
388* The matrix is already Tridiagonal. We have to make diagonal
389* and offdiagonal elements real, and store them in D and E.
390* For that, for real precision just copy the diag and offdiag
391* to D and E while for the COMPLEX case the bulge chasing is
392* performed to convert the hermetian tridiagonal to symmetric
393* tridiagonal. A simpler conversion formula might be used, but then
394* updating the Q matrix will be required and based if Q is generated
395* or not this might complicate the story.
396*
397 IF( kd.EQ.1 ) THEN
398 DO 50 i = 1, n
399 d( i ) = ( ab( abdpos, i ) )
400 50 CONTINUE
401*
402 IF( upper ) THEN
403 DO 60 i = 1, n-1
404 e( i ) = ( ab( abofdpos, i+1 ) )
405 60 CONTINUE
406 ELSE
407 DO 70 i = 1, n-1
408 e( i ) = ( ab( abofdpos, i ) )
409 70 CONTINUE
410 ENDIF
411*
412 hous( 1 ) = 1
413 work( 1 ) = 1
414 RETURN
415 END IF
416*
417* Main code start here.
418* Reduce the symmetric band of A to a tridiagonal matrix.
419*
420 thgrsiz = n
421 grsiz = 1
422 shift = 3
423 nbtiles = ceiling( real(n)/real(kd) )
424 stepercol = ceiling( real(shift)/real(grsiz) )
425 thgrnb = ceiling( real(n-1)/real(thgrsiz) )
426*
427 CALL slacpy( "A", kd+1, n, ab, ldab, work( apos ), lda )
428 CALL slaset( "A", kd, n, zero, zero, work( awpos ), lda )
429*
430*
431* openMP parallelisation start here
432*
433#if defined(_OPENMP)
434!$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND )
435!$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID )
436!$OMP$ PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND )
437!$OMP$ SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK)
438!$OMP$ SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA )
439!$OMP$ SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT )
440!$OMP MASTER
441#endif
442*
443* main bulge chasing loop
444*
445 DO 100 thgrid = 1, thgrnb
446 stt = (thgrid-1)*thgrsiz+1
447 thed = min( (stt + thgrsiz -1), (n-1))
448 DO 110 i = stt, n-1
449 ed = min( i, thed )
450 IF( stt.GT.ed ) EXIT
451 DO 120 m = 1, stepercol
452 st = stt
453 DO 130 sweepid = st, ed
454 DO 140 k = 1, grsiz
455 myid = (i-sweepid)*(stepercol*grsiz)
456 $ + (m-1)*grsiz + k
457 IF ( myid.EQ.1 ) THEN
458 ttype = 1
459 ELSE
460 ttype = mod( myid, 2 ) + 2
461 ENDIF
462
463 IF( ttype.EQ.2 ) THEN
464 colpt = (myid/2)*kd + sweepid
465 stind = colpt-kd+1
466 edind = min(colpt,n)
467 blklastind = colpt
468 ELSE
469 colpt = ((myid+1)/2)*kd + sweepid
470 stind = colpt-kd+1
471 edind = min(colpt,n)
472 IF( ( stind.GE.edind-1 ).AND.
473 $ ( edind.EQ.n ) ) THEN
474 blklastind = n
475 ELSE
476 blklastind = 0
477 ENDIF
478 ENDIF
479*
480* Call the kernel
481*
482#if defined(_OPENMP) && _OPENMP >= 201307
483 IF( ttype.NE.1 ) THEN
484!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
485!$OMP$ DEPEND(in:WORK(MYID-1))
486!$OMP$ DEPEND(out:WORK(MYID))
487 tid = omp_get_thread_num()
488 CALL ssb2st_kernels( uplo, wantq, ttype,
489 $ stind, edind, sweepid, n, kd, ib,
490 $ work( inda ), lda,
491 $ hous( indv ), hous( indtau ), ldv,
492 $ work( indw + tid*kd ) )
493!$OMP END TASK
494 ELSE
495!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
496!$OMP$ DEPEND(out:WORK(MYID))
497 tid = omp_get_thread_num()
498 CALL ssb2st_kernels( uplo, wantq, ttype,
499 $ stind, edind, sweepid, n, kd, ib,
500 $ work( inda ), lda,
501 $ hous( indv ), hous( indtau ), ldv,
502 $ work( indw + tid*kd ) )
503!$OMP END TASK
504 ENDIF
505#else
506 CALL ssb2st_kernels( uplo, wantq, ttype,
507 $ stind, edind, sweepid, n, kd, ib,
508 $ work( inda ), lda,
509 $ hous( indv ), hous( indtau ), ldv,
510 $ work( indw ) )
511#endif
512 IF ( blklastind.GE.(n-1) ) THEN
513 stt = stt + 1
514 EXIT
515 ENDIF
516 140 CONTINUE
517 130 CONTINUE
518 120 CONTINUE
519 110 CONTINUE
520 100 CONTINUE
521*
522#if defined(_OPENMP)
523!$OMP END MASTER
524!$OMP END PARALLEL
525#endif
526*
527* Copy the diagonal from A to D. Note that D is REAL thus only
528* the Real part is needed, the imaginary part should be zero.
529*
530 DO 150 i = 1, n
531 d( i ) = ( work( dpos+(i-1)*lda ) )
532 150 CONTINUE
533*
534* Copy the off diagonal from A to E. Note that E is REAL thus only
535* the Real part is needed, the imaginary part should be zero.
536*
537 IF( upper ) THEN
538 DO 160 i = 1, n-1
539 e( i ) = ( work( ofdpos+i*lda ) )
540 160 CONTINUE
541 ELSE
542 DO 170 i = 1, n-1
543 e( i ) = ( work( ofdpos+(i-1)*lda ) )
544 170 CONTINUE
545 ENDIF
546*
547 hous( 1 ) = lhmin
548 work( 1 ) = sroundup_lwork(lwmin)
549 RETURN
550*
551* End of SSYTRD_SB2ST
552*
553 END
554
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine ssb2st_kernels(uplo, wantz, ttype, st, ed, sweep, n, nb, ib, a, lda, v, tau, ldvt, work)
SSB2ST_KERNELS
subroutine ssytrd_sb2st(stage1, vect, uplo, n, kd, ab, ldab, d, e, hous, lhous, work, lwork, info)
SSYTRD_SB2ST reduces a real symmetric band matrix A to real symmetric tridiagonal form T
subroutine slacpy(uplo, m, n, a, lda, b, ldb)
SLACPY copies all or part of one two-dimensional array to another.
Definition slacpy.f:103
subroutine slaset(uplo, m, n, alpha, beta, a, lda)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition slaset.f:110