LAPACK 3.11.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ ssytrd_sb2st()

subroutine ssytrd_sb2st ( character  STAGE1,
character  VECT,
character  UPLO,
integer  N,
integer  KD,
real, dimension( ldab, * )  AB,
integer  LDAB,
real, dimension( * )  D,
real, dimension( * )  E,
real, dimension( * )  HOUS,
integer  LHOUS,
real, dimension( * )  WORK,
integer  LWORK,
integer  INFO 
)

SSYTRD_SB2ST reduces a real symmetric band matrix A to real symmetric tridiagonal form T

Download SSYTRD_SB2ST + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 SSYTRD_SB2ST reduces a real symmetric band matrix A to real symmetric
 tridiagonal form T by a orthogonal similarity transformation:
 Q**T * A * Q = T.
Parameters
[in]STAGE1
          STAGE1 is CHARACTER*1
          = 'N':  "No": to mention that the stage 1 of the reduction  
                  from dense to band using the ssytrd_sy2sb routine
                  was not called before this routine to reproduce AB. 
                  In other term this routine is called as standalone. 
          = 'Y':  "Yes": to mention that the stage 1 of the 
                  reduction from dense to band using the ssytrd_sy2sb 
                  routine has been called to produce AB (e.g., AB is
                  the output of ssytrd_sy2sb.
[in]VECT
          VECT is CHARACTER*1
          = 'N':  No need for the Housholder representation, 
                  and thus LHOUS is of size max(1, 4*N);
          = 'V':  the Householder representation is needed to 
                  either generate or to apply Q later on, 
                  then LHOUS is to be queried and computed.
                  (NOT AVAILABLE IN THIS RELEASE).
[in]UPLO
          UPLO is CHARACTER*1
          = 'U':  Upper triangle of A is stored;
          = 'L':  Lower triangle of A is stored.
[in]N
          N is INTEGER
          The order of the matrix A.  N >= 0.
[in]KD
          KD is INTEGER
          The number of superdiagonals of the matrix A if UPLO = 'U',
          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
[in,out]AB
          AB is REAL array, dimension (LDAB,N)
          On entry, the upper or lower triangle of the symmetric band
          matrix A, stored in the first KD+1 rows of the array.  The
          j-th column of A is stored in the j-th column of the array AB
          as follows:
          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
          On exit, the diagonal elements of AB are overwritten by the
          diagonal elements of the tridiagonal matrix T; if KD > 0, the
          elements on the first superdiagonal (if UPLO = 'U') or the
          first subdiagonal (if UPLO = 'L') are overwritten by the
          off-diagonal elements of T; the rest of AB is overwritten by
          values generated during the reduction.
[in]LDAB
          LDAB is INTEGER
          The leading dimension of the array AB.  LDAB >= KD+1.
[out]D
          D is REAL array, dimension (N)
          The diagonal elements of the tridiagonal matrix T.
[out]E
          E is REAL array, dimension (N-1)
          The off-diagonal elements of the tridiagonal matrix T:
          E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'.
[out]HOUS
          HOUS is REAL array, dimension LHOUS, that
          store the Householder representation.
[in]LHOUS
          LHOUS is INTEGER
          The dimension of the array HOUS. LHOUS = MAX(1, dimension)
          If LWORK = -1, or LHOUS=-1,
          then a query is assumed; the routine
          only calculates the optimal size of the HOUS array, returns
          this value as the first entry of the HOUS array, and no error
          message related to LHOUS is issued by XERBLA.
          LHOUS = MAX(1, dimension) where
          dimension = 4*N if VECT='N'
          not available now if VECT='H'     
[out]WORK
          WORK is REAL array, dimension LWORK.
[in]LWORK
          LWORK is INTEGER
          The dimension of the array WORK. LWORK = MAX(1, dimension)
          If LWORK = -1, or LHOUS=-1,
          then a workspace query is assumed; the routine
          only calculates the optimal size of the WORK array, returns
          this value as the first entry of the WORK array, and no error
          message related to LWORK is issued by XERBLA.
          LWORK = MAX(1, dimension) where
          dimension   = (2KD+1)*N + KD*NTHREADS
          where KD is the blocking size of the reduction,
          FACTOPTNB is the blocking used by the QR or LQ
          algorithm, usually FACTOPTNB=128 is a good choice
          NTHREADS is the number of threads used when
          openMP compilation is enabled, otherwise =1.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit
          < 0:  if INFO = -i, the i-th argument had an illegal value
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Further Details:
  Implemented by Azzam Haidar.

  All details are available on technical report, SC11, SC13 papers.

  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  Parallel reduction to condensed forms for symmetric eigenvalue problems
  using aggregated fine-grained and memory-aware kernels. In Proceedings
  of 2011 International Conference for High Performance Computing,
  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  Article 8 , 11 pages.
  http://doi.acm.org/10.1145/2063384.2063394

  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  An improved parallel singular value algorithm and its implementation 
  for multicore hardware, In Proceedings of 2013 International Conference
  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  Denver, Colorado, USA, 2013.
  Article 90, 12 pages.
  http://doi.acm.org/10.1145/2503210.2503292

  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
  calculations based on fine-grained memory aware tasks.
  International Journal of High Performance Computing Applications.
  Volume 28 Issue 2, Pages 196-209, May 2014.
  http://hpc.sagepub.com/content/28/2/196 

Definition at line 228 of file ssytrd_sb2st.F.

230*
231#if defined(_OPENMP)
232 use omp_lib
233#endif
234*
235 IMPLICIT NONE
236*
237* -- LAPACK computational routine --
238* -- LAPACK is a software package provided by Univ. of Tennessee, --
239* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
240*
241* .. Scalar Arguments ..
242 CHARACTER STAGE1, UPLO, VECT
243 INTEGER N, KD, LDAB, LHOUS, LWORK, INFO
244* ..
245* .. Array Arguments ..
246 REAL D( * ), E( * )
247 REAL AB( LDAB, * ), HOUS( * ), WORK( * )
248* ..
249*
250* =====================================================================
251*
252* .. Parameters ..
253 REAL RZERO
254 REAL ZERO, ONE
255 parameter( rzero = 0.0e+0,
256 $ zero = 0.0e+0,
257 $ one = 1.0e+0 )
258* ..
259* .. Local Scalars ..
260 LOGICAL LQUERY, WANTQ, UPPER, AFTERS1
261 INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST,
262 $ ED, STIND, EDIND, BLKLASTIND, COLPT, THED,
263 $ STEPERCOL, GRSIZ, THGRSIZ, THGRNB, THGRID,
264 $ NBTILES, TTYPE, TID, NTHREADS, DEBUG,
265 $ ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS,
266 $ INDA, INDW, APOS, SIZEA, LDA, INDV, INDTAU,
267 $ SISEV, SIZETAU, LDV, LHMIN, LWMIN
268* ..
269* .. External Subroutines ..
271* ..
272* .. Intrinsic Functions ..
273 INTRINSIC min, max, ceiling, real
274* ..
275* .. External Functions ..
276 LOGICAL LSAME
277 INTEGER ILAENV2STAGE
278 EXTERNAL lsame, ilaenv2stage
279* ..
280* .. Executable Statements ..
281*
282* Determine the minimal workspace size required.
283* Test the input parameters
284*
285 debug = 0
286 info = 0
287 afters1 = lsame( stage1, 'Y' )
288 wantq = lsame( vect, 'V' )
289 upper = lsame( uplo, 'U' )
290 lquery = ( lwork.EQ.-1 ) .OR. ( lhous.EQ.-1 )
291*
292* Determine the block size, the workspace size and the hous size.
293*
294 ib = ilaenv2stage( 2, 'SSYTRD_SB2ST', vect, n, kd, -1, -1 )
295 lhmin = ilaenv2stage( 3, 'SSYTRD_SB2ST', vect, n, kd, ib, -1 )
296 lwmin = ilaenv2stage( 4, 'SSYTRD_SB2ST', vect, n, kd, ib, -1 )
297*
298 IF( .NOT.afters1 .AND. .NOT.lsame( stage1, 'N' ) ) THEN
299 info = -1
300 ELSE IF( .NOT.lsame( vect, 'N' ) ) THEN
301 info = -2
302 ELSE IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
303 info = -3
304 ELSE IF( n.LT.0 ) THEN
305 info = -4
306 ELSE IF( kd.LT.0 ) THEN
307 info = -5
308 ELSE IF( ldab.LT.(kd+1) ) THEN
309 info = -7
310 ELSE IF( lhous.LT.lhmin .AND. .NOT.lquery ) THEN
311 info = -11
312 ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
313 info = -13
314 END IF
315*
316 IF( info.EQ.0 ) THEN
317 hous( 1 ) = lhmin
318 work( 1 ) = lwmin
319 END IF
320*
321 IF( info.NE.0 ) THEN
322 CALL xerbla( 'SSYTRD_SB2ST', -info )
323 RETURN
324 ELSE IF( lquery ) THEN
325 RETURN
326 END IF
327*
328* Quick return if possible
329*
330 IF( n.EQ.0 ) THEN
331 hous( 1 ) = 1
332 work( 1 ) = 1
333 RETURN
334 END IF
335*
336* Determine pointer position
337*
338 ldv = kd + ib
339 sizetau = 2 * n
340 sisev = 2 * n
341 indtau = 1
342 indv = indtau + sizetau
343 lda = 2 * kd + 1
344 sizea = lda * n
345 inda = 1
346 indw = inda + sizea
347 nthreads = 1
348 tid = 0
349*
350 IF( upper ) THEN
351 apos = inda + kd
352 awpos = inda
353 dpos = apos + kd
354 ofdpos = dpos - 1
355 abdpos = kd + 1
356 abofdpos = kd
357 ELSE
358 apos = inda
359 awpos = inda + kd + 1
360 dpos = apos
361 ofdpos = dpos + 1
362 abdpos = 1
363 abofdpos = 2
364
365 ENDIF
366*
367* Case KD=0:
368* The matrix is diagonal. We just copy it (convert to "real" for
369* real because D is double and the imaginary part should be 0)
370* and store it in D. A sequential code here is better or
371* in a parallel environment it might need two cores for D and E
372*
373 IF( kd.EQ.0 ) THEN
374 DO 30 i = 1, n
375 d( i ) = ( ab( abdpos, i ) )
376 30 CONTINUE
377 DO 40 i = 1, n-1
378 e( i ) = rzero
379 40 CONTINUE
380*
381 hous( 1 ) = 1
382 work( 1 ) = 1
383 RETURN
384 END IF
385*
386* Case KD=1:
387* The matrix is already Tridiagonal. We have to make diagonal
388* and offdiagonal elements real, and store them in D and E.
389* For that, for real precision just copy the diag and offdiag
390* to D and E while for the COMPLEX case the bulge chasing is
391* performed to convert the hermetian tridiagonal to symmetric
392* tridiagonal. A simpler conversion formula might be used, but then
393* updating the Q matrix will be required and based if Q is generated
394* or not this might complicate the story.
395*
396 IF( kd.EQ.1 ) THEN
397 DO 50 i = 1, n
398 d( i ) = ( ab( abdpos, i ) )
399 50 CONTINUE
400*
401 IF( upper ) THEN
402 DO 60 i = 1, n-1
403 e( i ) = ( ab( abofdpos, i+1 ) )
404 60 CONTINUE
405 ELSE
406 DO 70 i = 1, n-1
407 e( i ) = ( ab( abofdpos, i ) )
408 70 CONTINUE
409 ENDIF
410*
411 hous( 1 ) = 1
412 work( 1 ) = 1
413 RETURN
414 END IF
415*
416* Main code start here.
417* Reduce the symmetric band of A to a tridiagonal matrix.
418*
419 thgrsiz = n
420 grsiz = 1
421 shift = 3
422 nbtiles = ceiling( real(n)/real(kd) )
423 stepercol = ceiling( real(shift)/real(grsiz) )
424 thgrnb = ceiling( real(n-1)/real(thgrsiz) )
425*
426 CALL slacpy( "A", kd+1, n, ab, ldab, work( apos ), lda )
427 CALL slaset( "A", kd, n, zero, zero, work( awpos ), lda )
428*
429*
430* openMP parallelisation start here
431*
432#if defined(_OPENMP)
433!$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND )
434!$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID )
435!$OMP$ PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND )
436!$OMP$ SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK)
437!$OMP$ SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA )
438!$OMP$ SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT )
439!$OMP MASTER
440#endif
441*
442* main bulge chasing loop
443*
444 DO 100 thgrid = 1, thgrnb
445 stt = (thgrid-1)*thgrsiz+1
446 thed = min( (stt + thgrsiz -1), (n-1))
447 DO 110 i = stt, n-1
448 ed = min( i, thed )
449 IF( stt.GT.ed ) EXIT
450 DO 120 m = 1, stepercol
451 st = stt
452 DO 130 sweepid = st, ed
453 DO 140 k = 1, grsiz
454 myid = (i-sweepid)*(stepercol*grsiz)
455 $ + (m-1)*grsiz + k
456 IF ( myid.EQ.1 ) THEN
457 ttype = 1
458 ELSE
459 ttype = mod( myid, 2 ) + 2
460 ENDIF
461
462 IF( ttype.EQ.2 ) THEN
463 colpt = (myid/2)*kd + sweepid
464 stind = colpt-kd+1
465 edind = min(colpt,n)
466 blklastind = colpt
467 ELSE
468 colpt = ((myid+1)/2)*kd + sweepid
469 stind = colpt-kd+1
470 edind = min(colpt,n)
471 IF( ( stind.GE.edind-1 ).AND.
472 $ ( edind.EQ.n ) ) THEN
473 blklastind = n
474 ELSE
475 blklastind = 0
476 ENDIF
477 ENDIF
478*
479* Call the kernel
480*
481#if defined(_OPENMP) && _OPENMP >= 201307
482 IF( ttype.NE.1 ) THEN
483!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
484!$OMP$ DEPEND(in:WORK(MYID-1))
485!$OMP$ DEPEND(out:WORK(MYID))
486 tid = omp_get_thread_num()
487 CALL ssb2st_kernels( uplo, wantq, ttype,
488 $ stind, edind, sweepid, n, kd, ib,
489 $ work( inda ), lda,
490 $ hous( indv ), hous( indtau ), ldv,
491 $ work( indw + tid*kd ) )
492!$OMP END TASK
493 ELSE
494!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
495!$OMP$ DEPEND(out:WORK(MYID))
496 tid = omp_get_thread_num()
497 CALL ssb2st_kernels( uplo, wantq, ttype,
498 $ stind, edind, sweepid, n, kd, ib,
499 $ work( inda ), lda,
500 $ hous( indv ), hous( indtau ), ldv,
501 $ work( indw + tid*kd ) )
502!$OMP END TASK
503 ENDIF
504#else
505 CALL ssb2st_kernels( uplo, wantq, ttype,
506 $ stind, edind, sweepid, n, kd, ib,
507 $ work( inda ), lda,
508 $ hous( indv ), hous( indtau ), ldv,
509 $ work( indw + tid*kd ) )
510#endif
511 IF ( blklastind.GE.(n-1) ) THEN
512 stt = stt + 1
513 EXIT
514 ENDIF
515 140 CONTINUE
516 130 CONTINUE
517 120 CONTINUE
518 110 CONTINUE
519 100 CONTINUE
520*
521#if defined(_OPENMP)
522!$OMP END MASTER
523!$OMP END PARALLEL
524#endif
525*
526* Copy the diagonal from A to D. Note that D is REAL thus only
527* the Real part is needed, the imaginary part should be zero.
528*
529 DO 150 i = 1, n
530 d( i ) = ( work( dpos+(i-1)*lda ) )
531 150 CONTINUE
532*
533* Copy the off diagonal from A to E. Note that E is REAL thus only
534* the Real part is needed, the imaginary part should be zero.
535*
536 IF( upper ) THEN
537 DO 160 i = 1, n-1
538 e( i ) = ( work( ofdpos+i*lda ) )
539 160 CONTINUE
540 ELSE
541 DO 170 i = 1, n-1
542 e( i ) = ( work( ofdpos+(i-1)*lda ) )
543 170 CONTINUE
544 ENDIF
545*
546 hous( 1 ) = lhmin
547 work( 1 ) = lwmin
548 RETURN
549*
550* End of SSYTRD_SB2ST
551*
integer function ilaenv2stage(ISPEC, NAME, OPTS, N1, N2, N3, N4)
ILAENV2STAGE
Definition: ilaenv2stage.f:149
subroutine slaset(UPLO, M, N, ALPHA, BETA, A, LDA)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition: slaset.f:110
subroutine slacpy(UPLO, M, N, A, LDA, B, LDB)
SLACPY copies all or part of one two-dimensional array to another.
Definition: slacpy.f:103
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:53
subroutine ssb2st_kernels(UPLO, WANTZ, TTYPE, ST, ED, SWEEP, N, NB, IB, A, LDA, V, TAU, LDVT, WORK)
SSB2ST_KERNELS
Here is the call graph for this function:
Here is the caller graph for this function: