From ejr@cs.berkeley.edu Mon Mar 23 15:54:17 2009 Date: Mon, 23 Mar 2009 15:46:15 -0600 From: Jason Riedy To: Sven Hammarling , Yozo Hida , "daniel.kressner@sam.math.ethz.ch" , Julie Langou , "Langou, Julien" Cc: Jason Riedy Subject: [PATCH 2/2] Use the vector scanning routines in xLARFG and xLARFP. If the vector is short, we save on scaling and a little on the two-norm. The two-norm routines already skip zeros within the vector. Also use the ZERO parameter for clearing the vector in {S,D}LARFP. Signed-off-by: Jason Riedy --- SRC/clarfg.f | 14 ++++++++------ SRC/clarfp.f | 18 ++++++++++-------- SRC/dlarfg.f | 14 ++++++++------ SRC/dlarfp.f | 18 ++++++++++-------- SRC/slarfg.f | 14 ++++++++------ SRC/slarfp.f | 18 ++++++++++-------- SRC/zlarfg.f | 14 ++++++++------ SRC/zlarfp.f | 18 ++++++++++-------- 8 files changed, 72 insertions(+), 56 deletions(-) diff --git a/SRC/clarfg.f b/SRC/clarfg.f index 695dd61..8d3f7b1 100644 --- a/SRC/clarfg.f +++ b/SRC/clarfg.f @@ -63,13 +63,14 @@ PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. - INTEGER J, KNT + INTEGER J, KNT, LASTJ REAL ALPHI, ALPHR, BETA, RSAFMN, SAFMIN, XNORM * .. * .. External Functions .. REAL SCNRM2, SLAMCH, SLAPY3 COMPLEX CLADIV - EXTERNAL SCNRM2, SLAMCH, SLAPY3, CLADIV + INTEGER ILACLV + EXTERNAL SCNRM2, SLAMCH, SLAPY3, CLADIV, ILACLV * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CMPLX, REAL, SIGN @@ -84,7 +85,8 @@ RETURN END IF * - XNORM = SCNRM2( N-1, X, INCX ) + LASTJ = ILACLV( N-1, X, INCX ) + XNORM = SCNRM2( LASTJ, X, INCX ) ALPHR = REAL( ALPHA ) ALPHI = AIMAG( ALPHA ) * @@ -108,7 +110,7 @@ * 10 CONTINUE KNT = KNT + 1 - CALL CSSCAL( N-1, RSAFMN, X, INCX ) + CALL CSSCAL( LASTJ, RSAFMN, X, INCX ) BETA = BETA*RSAFMN ALPHI = ALPHI*RSAFMN ALPHR = ALPHR*RSAFMN @@ -117,13 +119,13 @@ * * New BETA is at most 1, at least SAFMIN * - XNORM = SCNRM2( N-1, X, INCX ) + XNORM = SCNRM2( LASTJ, X, INCX ) ALPHA = CMPLX( ALPHR, ALPHI ) BETA = -SIGN( SLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) END IF TAU = CMPLX( ( BETA-ALPHR ) / BETA, -ALPHI / BETA ) ALPHA = CLADIV( CMPLX( ONE ), ALPHA-BETA ) - CALL CSCAL( N-1, ALPHA, X, INCX ) + CALL CSCAL( LASTJ, ALPHA, X, INCX ) * * If ALPHA is subnormal, it may lose relative accuracy * diff --git a/SRC/clarfp.f b/SRC/clarfp.f index 938ae78..e46f7c5 100644 --- a/SRC/clarfp.f +++ b/SRC/clarfp.f @@ -63,13 +63,14 @@ PARAMETER ( TWO = 2.0E+0, ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. - INTEGER J, KNT + INTEGER J, KNT, LASTJ REAL ALPHI, ALPHR, BETA, RSAFMN, SAFMIN, XNORM * .. * .. External Functions .. REAL SCNRM2, SLAMCH, SLAPY3, SLAPY2 COMPLEX CLADIV - EXTERNAL SCNRM2, SLAMCH, SLAPY3, SLAPY2, CLADIV + INTEGER ILACLV + EXTERNAL SCNRM2, SLAMCH, SLAPY3, SLAPY2, CLADIV, ILACLV * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CMPLX, REAL, SIGN @@ -84,7 +85,8 @@ RETURN END IF * - XNORM = SCNRM2( N-1, X, INCX ) + LASTJ = ILACLV( N-1, X, INCX ) + XNORM = SCNRM2( LASTJ, X, INCX ) ALPHR = REAL( ALPHA ) ALPHI = AIMAG( ALPHA ) * @@ -102,7 +104,7 @@ ! However, the application routines rely on explicit ! zero checks when TAU.ne.ZERO, and we must clear X. TAU = TWO - DO J = 1, N-1 + DO J = 1, LASTJ X( 1 + (J-1)*INCX ) = ZERO END DO ALPHA = -ALPHA @@ -111,7 +113,7 @@ ! Only "reflecting" the diagonal entry to be real and non-negative. XNORM = SLAPY2( ALPHR, ALPHI ) TAU = CMPLX( ONE - ALPHR / XNORM, -ALPHI / XNORM ) - DO J = 1, N-1 + DO J = 1, LASTJ X( 1 + (J-1)*INCX ) = ZERO END DO ALPHA = XNORM @@ -131,7 +133,7 @@ * 10 CONTINUE KNT = KNT + 1 - CALL CSSCAL( N-1, RSAFMN, X, INCX ) + CALL CSSCAL( LASTJ, RSAFMN, X, INCX ) BETA = BETA*RSAFMN ALPHI = ALPHI*RSAFMN ALPHR = ALPHR*RSAFMN @@ -140,7 +142,7 @@ * * New BETA is at most 1, at least SAFMIN * - XNORM = SCNRM2( N-1, X, INCX ) + XNORM = SCNRM2( LASTJ, X, INCX ) ALPHA = CMPLX( ALPHR, ALPHI ) BETA = SIGN( SLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) END IF @@ -155,7 +157,7 @@ ALPHA = CMPLX( -ALPHR, ALPHI ) END IF ALPHA = CLADIV( CMPLX( ONE ), ALPHA ) - CALL CSCAL( N-1, ALPHA, X, INCX ) + CALL CSCAL( LASTJ, ALPHA, X, INCX ) * * If BETA is subnormal, it may lose relative accuracy * diff --git a/SRC/dlarfg.f b/SRC/dlarfg.f index 0071149..e312848 100644 --- a/SRC/dlarfg.f +++ b/SRC/dlarfg.f @@ -63,12 +63,13 @@ PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. - INTEGER J, KNT + INTEGER J, KNT, LASTJ DOUBLE PRECISION BETA, RSAFMN, SAFMIN, XNORM * .. * .. External Functions .. DOUBLE PRECISION DLAMCH, DLAPY2, DNRM2 - EXTERNAL DLAMCH, DLAPY2, DNRM2 + INTEGER ILADLV + EXTERNAL DLAMCH, DLAPY2, DNRM2, ILADLV * .. * .. Intrinsic Functions .. INTRINSIC ABS, SIGN @@ -83,7 +84,8 @@ RETURN END IF * - XNORM = DNRM2( N-1, X, INCX ) + LASTJ = ILADLV( N-1, X, INCX ) + XNORM = DNRM2( LASTJ, X, INCX ) * IF( XNORM.EQ.ZERO ) THEN * @@ -104,7 +106,7 @@ RSAFMN = ONE / SAFMIN 10 CONTINUE KNT = KNT + 1 - CALL DSCAL( N-1, RSAFMN, X, INCX ) + CALL DSCAL( LASTJ, RSAFMN, X, INCX ) BETA = BETA*RSAFMN ALPHA = ALPHA*RSAFMN IF( ABS( BETA ).LT.SAFMIN ) @@ -112,11 +114,11 @@ * * New BETA is at most 1, at least SAFMIN * - XNORM = DNRM2( N-1, X, INCX ) + XNORM = DNRM2( LASTJ, X, INCX ) BETA = -SIGN( DLAPY2( ALPHA, XNORM ), ALPHA ) END IF TAU = ( BETA-ALPHA ) / BETA - CALL DSCAL( N-1, ONE / ( ALPHA-BETA ), X, INCX ) + CALL DSCAL( LASTJ, ONE / ( ALPHA-BETA ), X, INCX ) * * If ALPHA is subnormal, it may lose relative accuracy * diff --git a/SRC/dlarfp.f b/SRC/dlarfp.f index 95ac70e..3bc3377 100644 --- a/SRC/dlarfp.f +++ b/SRC/dlarfp.f @@ -63,12 +63,13 @@ PARAMETER ( TWO = 2.0D+0, ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. - INTEGER J, KNT + INTEGER J, KNT, LASTJ DOUBLE PRECISION BETA, RSAFMN, SAFMIN, XNORM * .. * .. External Functions .. DOUBLE PRECISION DLAMCH, DLAPY2, DNRM2 - EXTERNAL DLAMCH, DLAPY2, DNRM2 + INTEGER ILADLV + EXTERNAL DLAMCH, DLAPY2, DNRM2, ILADLV * .. * .. Intrinsic Functions .. INTRINSIC ABS, SIGN @@ -83,7 +84,8 @@ RETURN END IF * - XNORM = DNRM2( N-1, X, INCX ) + LASTJ = ILADLV( N-1, X, INCX ) + XNORM = DNRM2( LASTJ, X, INCX ) * IF( XNORM.EQ.ZERO ) THEN * @@ -98,8 +100,8 @@ ! However, the application routines rely on explicit ! zero checks when TAU.ne.ZERO, and we must clear X. TAU = TWO - DO J = 1, N-1 - X( 1 + (J-1)*INCX ) = 0 + DO J = 1, LASTJ + X( 1 + (J-1)*INCX ) = ZERO END DO ALPHA = -ALPHA END IF @@ -117,7 +119,7 @@ RSAFMN = ONE / SAFMIN 10 CONTINUE KNT = KNT + 1 - CALL DSCAL( N-1, RSAFMN, X, INCX ) + CALL DSCAL( LASTJ, RSAFMN, X, INCX ) BETA = BETA*RSAFMN ALPHA = ALPHA*RSAFMN IF( ABS( BETA ).LT.SAFMIN ) @@ -125,7 +127,7 @@ * * New BETA is at most 1, at least SAFMIN * - XNORM = DNRM2( N-1, X, INCX ) + XNORM = DNRM2( LASTJ, X, INCX ) BETA = SIGN( DLAPY2( ALPHA, XNORM ), ALPHA ) END IF ALPHA = ALPHA + BETA @@ -137,7 +139,7 @@ TAU = ALPHA / BETA ALPHA = -ALPHA END IF - CALL DSCAL( N-1, ONE / ALPHA, X, INCX ) + CALL DSCAL( LASTJ, ONE / ALPHA, X, INCX ) * * If BETA is subnormal, it may lose relative accuracy * diff --git a/SRC/slarfg.f b/SRC/slarfg.f index b5f905c..8945f60 100644 --- a/SRC/slarfg.f +++ b/SRC/slarfg.f @@ -63,12 +63,13 @@ PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. - INTEGER J, KNT + INTEGER J, KNT, LASTJ REAL BETA, RSAFMN, SAFMIN, XNORM * .. * .. External Functions .. REAL SLAMCH, SLAPY2, SNRM2 - EXTERNAL SLAMCH, SLAPY2, SNRM2 + INTEGER ILASLV + EXTERNAL SLAMCH, SLAPY2, SNRM2, ILASLV * .. * .. Intrinsic Functions .. INTRINSIC ABS, SIGN @@ -83,7 +84,8 @@ RETURN END IF * - XNORM = SNRM2( N-1, X, INCX ) + LASTJ = ILASLV( N-1, X, INCX ) + XNORM = SNRM2( LASTJ, X, INCX ) * IF( XNORM.EQ.ZERO ) THEN * @@ -104,7 +106,7 @@ RSAFMN = ONE / SAFMIN 10 CONTINUE KNT = KNT + 1 - CALL SSCAL( N-1, RSAFMN, X, INCX ) + CALL SSCAL( LASTJ, RSAFMN, X, INCX ) BETA = BETA*RSAFMN ALPHA = ALPHA*RSAFMN IF( ABS( BETA ).LT.SAFMIN ) @@ -112,11 +114,11 @@ * * New BETA is at most 1, at least SAFMIN * - XNORM = SNRM2( N-1, X, INCX ) + XNORM = SNRM2( LASTJ, X, INCX ) BETA = -SIGN( SLAPY2( ALPHA, XNORM ), ALPHA ) END IF TAU = ( BETA-ALPHA ) / BETA - CALL SSCAL( N-1, ONE / ( ALPHA-BETA ), X, INCX ) + CALL SSCAL( LASTJ, ONE / ( ALPHA-BETA ), X, INCX ) * * If ALPHA is subnormal, it may lose relative accuracy * diff --git a/SRC/slarfp.f b/SRC/slarfp.f index af6ab63..1b60434 100644 --- a/SRC/slarfp.f +++ b/SRC/slarfp.f @@ -63,12 +63,13 @@ PARAMETER ( TWO = 2.0E+0, ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. - INTEGER J, KNT + INTEGER J, KNT, LASTJ REAL BETA, RSAFMN, SAFMIN, XNORM * .. * .. External Functions .. REAL SLAMCH, SLAPY2, SNRM2 - EXTERNAL SLAMCH, SLAPY2, SNRM2 + INTEGER ILASLV + EXTERNAL SLAMCH, SLAPY2, SNRM2, ILASLV * .. * .. Intrinsic Functions .. INTRINSIC ABS, SIGN @@ -83,7 +84,8 @@ RETURN END IF * - XNORM = SNRM2( N-1, X, INCX ) + LASTJ = ILASLV( N-1, X, INCX ) + XNORM = SNRM2( LASTJ, X, INCX ) * IF( XNORM.EQ.ZERO ) THEN * @@ -98,8 +100,8 @@ ! However, the application routines rely on explicit ! zero checks when TAU.ne.ZERO, and we must clear X. TAU = TWO - DO J = 1, N-1 - X( 1 + (J-1)*INCX ) = 0 + DO J = 1, LASTJ + X( 1 + (J-1)*INCX ) = ZERO END DO ALPHA = -ALPHA END IF @@ -117,7 +119,7 @@ RSAFMN = ONE / SAFMIN 10 CONTINUE KNT = KNT + 1 - CALL SSCAL( N-1, RSAFMN, X, INCX ) + CALL SSCAL( LASTJ, RSAFMN, X, INCX ) BETA = BETA*RSAFMN ALPHA = ALPHA*RSAFMN IF( ABS( BETA ).LT.SAFMIN ) @@ -125,7 +127,7 @@ * * New BETA is at most 1, at least SAFMIN * - XNORM = SNRM2( N-1, X, INCX ) + XNORM = SNRM2( LASTJ, X, INCX ) BETA = SIGN( SLAPY2( ALPHA, XNORM ), ALPHA ) END IF ALPHA = ALPHA + BETA @@ -137,7 +139,7 @@ TAU = ALPHA / BETA ALPHA = -ALPHA END IF - CALL SSCAL( N-1, ONE / ALPHA, X, INCX ) + CALL SSCAL( LASTJ, ONE / ALPHA, X, INCX ) * * If BETA is subnormal, it may lose relative accuracy * diff --git a/SRC/zlarfg.f b/SRC/zlarfg.f index 1df60ca..5745ae4 100644 --- a/SRC/zlarfg.f +++ b/SRC/zlarfg.f @@ -63,13 +63,14 @@ PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. - INTEGER J, KNT + INTEGER J, KNT, LASTJ DOUBLE PRECISION ALPHI, ALPHR, BETA, RSAFMN, SAFMIN, XNORM * .. * .. External Functions .. DOUBLE PRECISION DLAMCH, DLAPY3, DZNRM2 COMPLEX*16 ZLADIV - EXTERNAL DLAMCH, DLAPY3, DZNRM2, ZLADIV + INTEGER ILAZLV + EXTERNAL DLAMCH, DLAPY3, DZNRM2, ZLADIV, ILAZLV * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, DIMAG, SIGN @@ -84,7 +85,8 @@ RETURN END IF * - XNORM = DZNRM2( N-1, X, INCX ) + LASTJ = ILAZLV( N-1, X, INCX ) + XNORM = DZNRM2( LASTJ, X, INCX ) ALPHR = DBLE( ALPHA ) ALPHI = DIMAG( ALPHA ) * @@ -108,7 +110,7 @@ * 10 CONTINUE KNT = KNT + 1 - CALL ZDSCAL( N-1, RSAFMN, X, INCX ) + CALL ZDSCAL( LASTJ, RSAFMN, X, INCX ) BETA = BETA*RSAFMN ALPHI = ALPHI*RSAFMN ALPHR = ALPHR*RSAFMN @@ -117,13 +119,13 @@ * * New BETA is at most 1, at least SAFMIN * - XNORM = DZNRM2( N-1, X, INCX ) + XNORM = DZNRM2( LASTJ, X, INCX ) ALPHA = DCMPLX( ALPHR, ALPHI ) BETA = -SIGN( DLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) END IF TAU = DCMPLX( ( BETA-ALPHR ) / BETA, -ALPHI / BETA ) ALPHA = ZLADIV( DCMPLX( ONE ), ALPHA-BETA ) - CALL ZSCAL( N-1, ALPHA, X, INCX ) + CALL ZSCAL( LASTJ, ALPHA, X, INCX ) * * If ALPHA is subnormal, it may lose relative accuracy * diff --git a/SRC/zlarfp.f b/SRC/zlarfp.f index 13be100..48a5836 100644 --- a/SRC/zlarfp.f +++ b/SRC/zlarfp.f @@ -63,13 +63,14 @@ PARAMETER ( TWO = 2.0D+0, ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. - INTEGER J, KNT + INTEGER J, KNT, LASTJ DOUBLE PRECISION ALPHI, ALPHR, BETA, RSAFMN, SAFMIN, XNORM * .. * .. External Functions .. DOUBLE PRECISION DLAMCH, DLAPY3, DLAPY2, DZNRM2 COMPLEX*16 ZLADIV - EXTERNAL DLAMCH, DLAPY3, DLAPY2, DZNRM2, ZLADIV + INTEGER ILAZLV + EXTERNAL DLAMCH, DLAPY3, DLAPY2, DZNRM2, ZLADIV, ILAZLV * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, DIMAG, SIGN @@ -84,7 +85,8 @@ RETURN END IF * - XNORM = DZNRM2( N-1, X, INCX ) + LASTJ = ILAZLV( N-1, X, INCX ) + XNORM = DZNRM2( LASTJ, X, INCX ) ALPHR = DBLE( ALPHA ) ALPHI = DIMAG( ALPHA ) * @@ -102,7 +104,7 @@ ! However, the application routines rely on explicit ! zero checks when TAU.ne.ZERO, and we must clear X. TAU = TWO - DO J = 1, N-1 + DO J = 1, LASTJ X( 1 + (J-1)*INCX ) = ZERO END DO ALPHA = -ALPHA @@ -111,7 +113,7 @@ ! Only "reflecting" the diagonal entry to be real and non-negative. XNORM = DLAPY2( ALPHR, ALPHI ) TAU = DCMPLX( ONE - ALPHR / XNORM, -ALPHI / XNORM ) - DO J = 1, N-1 + DO J = 1, LASTJ X( 1 + (J-1)*INCX ) = ZERO END DO ALPHA = XNORM @@ -131,7 +133,7 @@ * 10 CONTINUE KNT = KNT + 1 - CALL ZDSCAL( N-1, RSAFMN, X, INCX ) + CALL ZDSCAL( LASTJ, RSAFMN, X, INCX ) BETA = BETA*RSAFMN ALPHI = ALPHI*RSAFMN ALPHR = ALPHR*RSAFMN @@ -140,7 +142,7 @@ * * New BETA is at most 1, at least SAFMIN * - XNORM = DZNRM2( N-1, X, INCX ) + XNORM = DZNRM2( LASTJ, X, INCX ) ALPHA = DCMPLX( ALPHR, ALPHI ) BETA = SIGN( DLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) END IF @@ -155,7 +157,7 @@ ALPHA = DCMPLX( -ALPHR, ALPHI ) END IF ALPHA = ZLADIV( DCMPLX( ONE ), ALPHA ) - CALL ZSCAL( N-1, ALPHA, X, INCX ) + CALL ZSCAL( LASTJ, ALPHA, X, INCX ) * * If BETA is subnormal, it may lose relative accuracy * -- 1.6.2.251.gb2e4c