#!/bin/sh # From bokg@cs.umu.se Tue Oct 30 10:37:01 1990 # This is a shell archive (produced by shar 3.49) # To extract the files from this archive, save it to a file, remove # everything above the "!/bin/sh" line above, and type "sh file_name". # # made 10/30/1990 14:39 UTC by bokg@mariehem # Source directory /home/gandalf/infbeh/bokg/dbl3pol # # existing files will NOT be overwritten unless -c is specified # # This shar contains: # length mode name # ------ ---------- ------------------------------------------ # 2751 -rw------- README # 4426 -rw------- cld.f # 16829 -rw------- dgemm.f # 19467 -rw------- dsymm.f # 18830 -rw------- dsyr2k.f # 21321 -rw------- dsyrk.f # 43449 -rw------- dtrmm.f # 45939 -rw------- dtrsm.f # 2431 -rw------- lsame.f # 1134 -rw------- xerbla.f # # ============= README ============== if test -f 'README' -a X"$1" != X"-c"; then echo 'x - skipping README (File already exists)' else echo 'x - extracting README (Text)' sed 's/^X//' << 'SHAR_EOF' > 'README' && ************************************************************************ * * Double Precision level-3 BLAS for IBM 3090 VF (V1.R0). * * These implementations are structured, blocked and tuned for * the IBM 3090 VF. Apart from the level-3 BLA subroutines * DGEMM, DSYMM, DSYRK, DSYR2K, DTRMM and DTRSM this set also * contains some internal subroutines and functions. The functions * LSAME and CLD, the subroutines UMM001 - UMM008 and USM001 - * USM008 used by DTRMM and DTRSM respectively and the subroutine * XERBLA are not supposed to be called by user programs. DTRMM and * DTRSM also uses the named common blocks UCMN01 and UCMN02. * There are three parameters in the subroutines that may need to * be changed VSS, EFF and EFF2 in order to get high performance. * VSS is the Vector Section Size (length of the vector-registers). * VSS*EFF double precision words (8 byte/word) should correspond * to 3/4 of the size of a cache memory unit. VSS*EFF2 double * precision words should correspond to 1/2 of the size of a cache. * Change VSS, EFF and EFF2 to the appropriate values for the * machine you are using. The values are assigned in PARAMETER * statements in the subroutines DGEMM, DSYMM, DSYRK and DSYR2K * and in UMM001 - UMM008 and USM001 - USM008 for DTRMM and DTRSM * respectively. EFF2 is not present in all of these subroutines. * The default values used if no changes are made corresponds to * the IBM 3090E VF having vector section size 128 and cache size * 64 kbyte: * VSS = 128, EFF = 48 and EFF2 = 32. * Compile with IBM VS FORTRAN Version 2.3 or later releases. Use * the compiler options vector() and opt(3). For convenience the * non-standard Fortran 77 @PROCESS-statements that activates vector * directives precede subroutines that use vector directives. * * For more information see: * Ling P. "A set of High Performance Level-3 BLAS Structured and * Tuned for the IBM 3090 VF and Implemented in Fortran 77", * Report UMINF-179.90, Inst. of Information Processing, * Univ. of Umea, S-901 87 Umea, Sweden, May 1990. * * Kagstrom B. and Ling P. "Level 2 and 3 BLAS Routines for * IBM 3090 VF: Implementation and Experiences", in J. Dongarra * I. Duff, P. Gaffney and S. McKee (eds), Vector and * Parallel Computing, Ellis Horwood, 1989, pp 229-240 * * Address: Institute of Information Processing * University of Umea * S-901 87 UMEA, SWEDEN * Email: pol@cs.umu.se or bokg@cs.umu.se or * na.kagstrom@na-net.staford.edu ************************************************************************ SHAR_EOF chmod 0600 README || echo 'restore of README failed' Wc_c="`wc -c < 'README'`" test 2751 -eq "$Wc_c" || echo 'README: original size 2751, current size' "$Wc_c" fi # ============= cld.f ============== if test -f 'cld.f' -a X"$1" != X"-c"; then echo 'x - skipping cld.f (File already exists)' else echo 'x - extracting cld.f (Text)' sed 's/^X//' << 'SHAR_EOF' > 'cld.f' && X LOGICAL FUNCTION CLD( LD ) * .. Scalar Arguments .. X INTEGER LD * .. * * Purpose * ======= * * CLD tries to determin if the leading dimension LD of a matrix is * "critical" and thereby may cause reduced performance (execution * speed). Certain leading dimensions are critical due to the data * storage scheme, the mapping of data in cache and main memory. * CLD returns .true. if LD is a critical leading dimension. * * Parameters * ========== * * LD - LOGICAL*4 * On entry, LD specify the leading dimension of a matrix. * Unchanged on exit. * * * * CLD : Critical leading dimension. * LD : Leading dimension of matrix. * LOLIM : Lower limit for CLD:s. * SETSZ : Set size, size of a cache-set (number of words in DP). * HLFSET : SETSZ/2. * MSETSZ : Multipple of SETSZ. * MINOFF : Minimum offset from MSETSZ for a multipple of a non-CLD. * TMP : Temporary variable. * * .. Local Variables .. X INTEGER MSETSZ, TMP * .. Intrinsic Functions .. X INTRINSIC MIN * .. Parameters ( for IBM 3090 E, VSS = 128, DOUBLE PRECISION ) .. X INTEGER LOLIM, SETSZ, HLFSET, MINOFF X PARAMETER ( LOLIM = 128, MINOFF = 6 ) X PARAMETER ( SETSZ = 2048, HLFSET = SETSZ/2 ) * * .. Executable Statements .. * X CLD = .FALSE. X IF( LD.LT.LOLIM ) RETURN X MSETSZ = SETSZ X IF( LD.GT.MSETSZ ) MSETSZ = ( ( LD+HLFSET )/SETSZ ) * SETSZ X TMP = MSETSZ/LD X CLD = ( MIN( MSETSZ - TMP*LD, ( TMP+1 )*LD - MSETSZ ).LT.MINOFF) * X RETURN * * End of CLD * X END SHAR_EOF chmod 0600 cld.f || echo 'restore of cld.f failed' Wc_c="`wc -c < 'cld.f'`" test 4426 -eq "$Wc_c" || echo 'cld.f: original size 4426, current size' "$Wc_c" fi # ============= dgemm.f ============== if test -f 'dgemm.f' -a X"$1" != X"-c"; then echo 'x - skipping dgemm.f (File already exists)' else echo 'x - extracting dgemm.f (Text)' sed 's/^X//' << 'SHAR_EOF' > 'dgemm.f' && @PROCESS DIRECTIVE('*VDIR:') X SUBROUTINE DGEMM( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, X + BETA, C, LDC ) * .. Scalar Arguments .. X CHARACTER*1 TRANSA, TRANSB X INTEGER M, N, K, LDA, LDB, LDC X DOUBLE PRECISION ALPHA, BETA * .. Array Arguments .. X DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ) * .. * * Purpose * ======= * * DGEMM performs one of the matrix-matrix operations * * C := alpha*op( A )*op( B ) + beta*C, * * where op( X ) is one of * * op( X ) = X or op( X ) = X', * * alpha and beta are scalars, and A, B and C are matrices, with op( A ) * an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. * * Parameters * ========== * * TRANSA - CHARACTER*1. * On entry, TRANSA specifies the form of op( A ) to be used in * the matrix multiplication as follows: * * TRANSA = 'N' or 'n', op( A ) = A. * * TRANSA = 'T' or 't', op( A ) = A'. * * TRANSA = 'C' or 'c', op( A ) = A'. * * Unchanged on exit. * * TRANSB - CHARACTER*1. * On entry, TRANSB specifies the form of op( B ) to be used in * the matrix multiplication as follows: * * TRANSB = 'N' or 'n', op( B ) = B. * * TRANSB = 'T' or 't', op( B ) = B'. * * TRANSB = 'C' or 'c', op( B ) = B'. * * Unchanged on exit. * * M - INTEGER. * On entry, M specifies the number of rows of the matrix * op( A ) and of the matrix C. M must be at least zero. * Unchanged on exit. * * N - INTEGER. * On entry, N specifies the number of columns of the matrix * op( B ) and the number of columns of the matrix C. N must be * at least zero. * Unchanged on exit. * * K - INTEGER. * On entry, K specifies the number of columns of the matrix * op( A ) and the number of rows of the matrix op( B ). K must * be at least zero. * Unchanged on exit. * * ALPHA - DOUBLE PRECISION. * On entry, ALPHA specifies the scalar alpha. * Unchanged on exit. * * A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is * k when TRANSA = 'N' or 'n', and is m otherwise. * Before entry with TRANSA = 'N' or 'n', the leading m by k * part of the array A must contain the matrix A, otherwise * the leading k by m part of the array A must contain the * matrix A. * Unchanged on exit. * * LDA - INTEGER. * On entry, LDA specifies the first dimension of A as declared * in the calling (sub) program. When TRANSA = 'N' or 'n' then * LDA must be at least max( 1, m ), otherwise LDA must be at * least max( 1, k ). * Unchanged on exit. * * B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is * n when TRANSB = 'N' or 'n', and is k otherwise. * Before entry with TRANSB = 'N' or 'n', the leading k by n * part of the array B must contain the matrix B, otherwise * the leading n by k part of the array B must contain the * matrix B. * Unchanged on exit. * * LDB - INTEGER. * On entry, LDB specifies the first dimension of B as declared * in the calling (sub) program. When TRANSB = 'N' or 'n' then * LDB must be at least max( 1, k ), otherwise LDB must be at * least max( 1, n ). * Unchanged on exit. * * BETA - DOUBLE PRECISION. * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then C need not be set on input. * Unchanged on exit. * * C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). * Before entry, the leading m by n part of the array C must * contain the matrix C, except when beta is zero, in which * case C need not be set on entry. * On exit, the array C is overwritten by the m by n matrix * ( alpha*op( A )*op( B ) + beta*C ). * * LDC - INTEGER. * On entry, LDC specifies the first dimension of C as declared * in the calling (sub) program. LDC must be at least * max( 1, m ). * Unchanged on exit. * * * Level 3 Blas routine. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * -- Restructured, blocked and tuned for IBM 3090 VF in August-1989. * Per Ling, * Institute of Information Processing, University of Umea, Sweden. * V1.R0. * * * .. External Functions .. X LOGICAL LSAME, CLD X EXTERNAL LSAME, CLD * .. External Subroutines .. X EXTERNAL XERBLA * .. Intrinsic Functions .. X INTRINSIC MIN, MAX * .. Local Scalars .. X LOGICAL NOTA, NOTB X LOGICAL CLDA X INTEGER INFO, NCOLA, NROWA, NROWB X INTEGER I, II, J, JJ, L, LL X DOUBLE PRECISION TEMP * .. Parameters .. X DOUBLE PRECISION ONE , ZERO X PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) X INTEGER VSS, EFF, EFF2 X PARAMETER ( VSS = 128, EFF = 48, EFF2 = 32 ) * .. Local Arrays .. * VSS is the Vector Section Size. ( VSS*EFF ) double precision words * should correspond to the Effective Cache Size. X DOUBLE PRECISION T1( VSS, EFF ), T2( VSS, EFF ) * .. * .. Executable Statements .. * * Set NOTA and NOTB as true if A and B respectively are not * transposed and set NROWA, NCOLA and NROWB as the number of rows * and columns of A and the number of rows of B respectively. * X NOTA = LSAME( TRANSA, 'N' ) X NOTB = LSAME( TRANSB, 'N' ) X IF( NOTA )THEN X NROWA = M X NCOLA = K X ELSE X NROWA = K X NCOLA = M X END IF X IF( NOTB )THEN X NROWB = K X ELSE X NROWB = N X END IF * * Test the input parameters. * X INFO = 0 X IF( ( .NOT.NOTA ).AND. X + ( .NOT.LSAME( TRANSA, 'C' ) ).AND. X + ( .NOT.LSAME( TRANSA, 'T' ) ) )THEN X INFO = 1 X ELSE IF( ( .NOT.NOTB ).AND. X + ( .NOT.LSAME( TRANSB, 'C' ) ).AND. X + ( .NOT.LSAME( TRANSB, 'T' ) ) )THEN X INFO = 2 X ELSE IF( M .LT.0 )THEN X INFO = 3 X ELSE IF( N .LT.0 )THEN X INFO = 4 X ELSE IF( K .LT.0 )THEN X INFO = 5 X ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN X INFO = 8 X ELSE IF( LDB.LT.MAX( 1, NROWB ) )THEN X INFO = 10 X ELSE IF( LDC.LT.MAX( 1, M ) )THEN X INFO = 13 X END IF X IF( INFO.NE.0 )THEN X CALL XERBLA( 'DGEMM ', INFO ) X RETURN X END IF * * Quick return if possible. * X IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. X + ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) X + RETURN * * And if alpha.eq.zero. * X IF( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) )THEN X IF( BETA.EQ.ZERO )THEN X DO 20, I = 1, M X DO 10, J = 1, N X C( I, J ) = ZERO X 10 CONTINUE X 20 CONTINUE X ELSE X DO 40, I = 1, M X DO 30, J = 1, N X C( I, J ) = BETA*C( I, J ) X 30 CONTINUE X 40 CONTINUE X END IF X RETURN X END IF * * Start the operations. * X IF( NOTB )THEN X IF( NOTA )THEN * * Form C := alpha*A*B + beta*C. * X DO 120, LL = 1, K, EFF X DO 110, II = 1, M, VSS X IF( ( LL.EQ.1 ).AND.( BETA.NE.ONE ) )THEN X DO 60, I = 1, MIN( VSS, M-II+1 ) X DO 50, J = N, 1, -1 X C( I+II-1, J ) = BETA*C( I+II-1, J ) X 50 CONTINUE X 60 CONTINUE X ENDIF X DO 100, I = 1, MIN( VSS, M-II+1 ) X DO 70, L = 1, MIN( EFF, K-LL+1 ) X T1( I, L ) = ALPHA*A( I+II-1, L+LL-1 ) X 70 CONTINUE X DO 90, J = 1, N X TEMP = C( I+II-1, J ) X DO 80, L = 1, MIN( EFF, K-LL+1 ) X TEMP = TEMP + B( L+LL-1, J )*T1( I, L ) X 80 CONTINUE X C( I+II-1, J ) = TEMP X 90 CONTINUE X 100 CONTINUE X 110 CONTINUE X 120 CONTINUE X ELSE * * Form C := alpha*A'*B + beta*C. * X CLDA = CLD( LDA ) X DO 230, LL = 1, K, EFF X DO 220, II = 1, M, VSS X IF( ( LL.EQ.1 ).AND.( BETA.NE.ONE ) )THEN X DO 140, I = 1, MIN( VSS, M-II+1 ) X DO 130, J = N, 1, -1 X C( I+II-1, J ) = BETA*C( I+II-1, J ) X 130 CONTINUE X 140 CONTINUE X ENDIF X IF( CLDA )THEN C*VDIR: PREFER VECTOR X DO 160, L = 1, MIN( EFF, K-LL+1 ) X DO 150, I = 1, MIN( VSS, M-II+1 ) X T1( I, L ) = ALPHA*A( L+LL-1, I+II-1 ) X 150 CONTINUE X 160 CONTINUE X ELSE C*VDIR: PREFER VECTOR X DO 180, I = 1, MIN( VSS, M-II+1 ) X DO 170, L = 1, MIN( EFF, K-LL+1 ) X T1( I, L ) = ALPHA*A( L+LL-1, I+II-1 ) X 170 CONTINUE X 180 CONTINUE X END IF X DO 210, I = 1, MIN( VSS, M-II+1 ) X DO 200, J = 1, N X TEMP = C( I+II-1, J ) X DO 190, L = 1, MIN( EFF, K-LL+1 ) X TEMP = TEMP + B( L+LL-1, J )*T1( I, L ) X 190 CONTINUE X C( I+II-1, J ) = TEMP X 200 CONTINUE X 210 CONTINUE X 220 CONTINUE X 230 CONTINUE X END IF X ELSE X IF( NOTA )THEN * * Form C := alpha*A*B' + beta*C. * X IF( CLD( LDB ) )THEN X DO 350, LL = 1, K, EFF X DO 340, II = 1, M, VSS X IF( ( LL.EQ.1 ).AND.( BETA.NE.ONE ) )THEN X DO 250, I = 1, MIN( VSS, M-II+1 ) X DO 240, J = N, 1, -1 X C( I+II-1, J ) = BETA*C( I+II-1, J ) X 240 CONTINUE X 250 CONTINUE X END IF X DO 270, I = 1, MIN( VSS, M-II+1 ) X DO 260, L = 1, MIN( EFF, K-LL+1 ) X T1( I, L ) = ALPHA*A( I+II-1, L+LL-1 ) X 260 CONTINUE X 270 CONTINUE X DO 330, JJ = 1, N, EFF C*VDIR: PREFER VECTOR X DO 290, J = 1, MIN( EFF, N-JJ+1 ) X DO 280, L = 1, MIN( EFF, K-LL+1 ) X T2( L, J ) = B( J+JJ-1, L+LL-1 ) X 280 CONTINUE X 290 CONTINUE X DO 320, I = 1, MIN( VSS, M-II+1 ) X DO 310, J = 1, MIN( EFF, N-JJ+1 ) X TEMP = C( I+II-1, J+JJ-1 ) X DO 300, L = 1, MIN( EFF, K-LL+1 ) X TEMP = TEMP + T2( L, J )*T1( I, L ) X 300 CONTINUE X C( I+II-1, J+JJ-1 ) = TEMP X 310 CONTINUE X 320 CONTINUE X 330 CONTINUE X 340 CONTINUE X 350 CONTINUE X ELSE X DO 440, LL = 1, K, EFF2 X DO 430, II = 1, M, VSS X IF( ( LL.EQ.1 ).AND.( BETA.NE.ONE ) )THEN X DO 370, I = 1, MIN( VSS, M-II+1 ) X DO 360, J = N, 1, -1 X C( I+II-1, J ) = BETA*C( I+II-1, J ) X 360 CONTINUE X 370 CONTINUE X ENDIF X DO 390, I = 1, MIN( VSS, M-II+1 ) X DO 380, L = 1, MIN( EFF2, K-LL+1 ) X T1( I, L ) = ALPHA*A( I+II-1, L+LL-1 ) X 380 CONTINUE X 390 CONTINUE X DO 420, I = 1, MIN( VSS, M-II+1 ) X DO 410, J = 1, N X TEMP = C( I+II-1, J ) X DO 400, L = 1, MIN( EFF2, K-LL+1 ) X TEMP = TEMP + B( J, L+LL-1 )*T1( I, L ) X 400 CONTINUE X C( I+II-1, J ) = TEMP X 410 CONTINUE X 420 CONTINUE X 430 CONTINUE X 440 CONTINUE X END IF X ELSE * * Form C := alpha*A'*B' + beta*C, using * C := beta*C + T1', T1 := alpha*B*A. * X IF( CLD( LDB ) )THEN X DO 560, LL = 1, K, EFF X DO 550, JJ = 1, N, VSS X DO 460, J = 1, MIN( VSS, N-JJ+1 ) X DO 450, L = 1, MIN( EFF, K-LL+1 ) X T2( J, L ) = ALPHA*B( J+JJ-1, L+LL-1 ) X 450 CONTINUE X 460 CONTINUE X DO 540, II = 1, M, EFF X DO 490, J = 1, MIN( VSS, N-JJ+1 ) X DO 480, I = 1, MIN( EFF, M-II+1 ) X TEMP = ZERO X DO 470, L = 1, MIN( EFF, K-LL+1 ) X TEMP = TEMP + X + A( L+LL-1, I+II-1 )*T2( J, L ) X 470 CONTINUE X T1( J, I ) = TEMP X 480 CONTINUE X 490 CONTINUE X IF( ( LL.EQ.1 ).AND.( BETA.NE.ONE ) )THEN X DO 510, I = 1, MIN( EFF, M-II+1 ) X DO 500, J = 1, MIN( VSS, N-JJ+1 ) X C( I+II-1, J+JJ-1 ) = X + BETA*C( I+II-1, J+JJ-1 ) + T1( J, I ) X 500 CONTINUE X 510 CONTINUE X ELSE X DO 530, I = 1, MIN( EFF, M-II+1 ) X DO 520, J = 1, MIN( VSS, N-JJ+1 ) X C( I+II-1, J+JJ-1 ) = X + C( I+II-1, J+JJ-1 ) + T1( J, I ) X 520 CONTINUE X 530 CONTINUE X ENDIF X 540 CONTINUE X 550 CONTINUE X 560 CONTINUE X ELSE X CLDA = CLD( LDA ) X DO 670, LL = 1, K, EFF2 X DO 660, II = 1, M, VSS X IF( ( LL.EQ.1 ).AND.( BETA.NE.ONE ) )THEN X DO 580, I = 1, MIN( VSS, M-II+1 ) X DO 570, J = N, 1, -1 X C( I+II-1, J ) = BETA*C( I+II-1, J ) X 570 CONTINUE X 580 CONTINUE X ENDIF X IF( CLDA )THEN C*VDIR: PREFER VECTOR X DO 600, L = 1, MIN( EFF2, K-LL+1 ) X DO 590, I = 1, MIN( VSS, M-II+1 ) X T1( I, L ) = ALPHA*A( L+LL-1, I+II-1 ) X 590 CONTINUE X 600 CONTINUE X ELSE C*VDIR: PREFER VECTOR X DO 620, I = 1, MIN( VSS, M-II+1 ) X DO 610, L = 1, MIN( EFF2, K-LL+1 ) X T1( I, L ) = ALPHA*A( L+LL-1, I+II-1 ) X 610 CONTINUE X 620 CONTINUE X END IF X DO 650, I = 1, MIN( VSS, M-II+1 ) X DO 640, J = 1, N X TEMP = C( I+II-1, J ) X DO 630, L = 1, MIN( EFF2, K-LL+1 ) X TEMP = TEMP + B( J, L+LL-1 )*T1( I, L ) X 630 CONTINUE X C( I+II-1, J ) = TEMP X 640 CONTINUE X 650 CONTINUE X 660 CONTINUE X 670 CONTINUE X END IF X END IF X END IF * X RETURN * * End of DGEMM . * X END SHAR_EOF chmod 0600 dgemm.f || echo 'restore of dgemm.f failed' Wc_c="`wc -c < 'dgemm.f'`" test 16829 -eq "$Wc_c" || echo 'dgemm.f: original size 16829, current size' "$Wc_c" fi # ============= dsymm.f ============== if test -f 'dsymm.f' -a X"$1" != X"-c"; then echo 'x - skipping dsymm.f (File already exists)' else echo 'x - extracting dsymm.f (Text)' sed 's/^X//' << 'SHAR_EOF' > 'dsymm.f' && @PROCESS DIRECTIVE('*VDIR:') X SUBROUTINE DSYMM( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB, X + BETA, C, LDC ) * .. Scalar Arguments .. X CHARACTER*1 SIDE, UPLO X INTEGER M, N, LDA, LDB, LDC X DOUBLE PRECISION ALPHA, BETA * .. Array Arguments .. X DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ) * .. * * Purpose * ======= * * DSYMM performs one of the matrix-matrix operations * * C := alpha*A*B + beta*C, * * or * * C := alpha*B*A + beta*C, * * where alpha and beta are scalars, A is a symmetric matrix and B and * C are m by n matrices. * * Parameters * ========== * * SIDE - CHARACTER*1. * On entry, SIDE specifies whether the symmetric matrix A * appears on the left or right in the operation as follows: * * SIDE = 'L' or 'l' C := alpha*A*B + beta*C, * * SIDE = 'R' or 'r' C := alpha*B*A + beta*C, * * Unchanged on exit. * * UPLO - CHARACTER*1. * On entry, UPLO specifies whether the upper or lower * triangular part of the symmetric matrix A is to be * referenced as follows: * * UPLO = 'U' or 'u' Only the upper triangular part of the * symmetric matrix is to be referenced. * * UPLO = 'L' or 'l' Only the lower triangular part of the * symmetric matrix is to be referenced. * * Unchanged on exit. * * M - INTEGER. * On entry, M specifies the number of rows of the matrix C. * M must be at least zero. * Unchanged on exit. * * N - INTEGER. * On entry, N specifies the number of columns of the matrix C. * N must be at least zero. * Unchanged on exit. * * ALPHA - DOUBLE PRECISION. * On entry, ALPHA specifies the scalar alpha. * Unchanged on exit. * * A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is * m when SIDE = 'L' or 'l' and is n otherwise. * Before entry with SIDE = 'L' or 'l', the m by m part of * the array A must contain the symmetric matrix, such that * when UPLO = 'U' or 'u', the leading m by m upper triangular * part of the array A must contain the upper triangular part * of the symmetric matrix and the strictly lower triangular * part of A is not referenced, and when UPLO = 'L' or 'l', * the leading m by m lower triangular part of the array A * must contain the lower triangular part of the symmetric * matrix and the strictly upper triangular part of A is not * referenced. * Before entry with SIDE = 'R' or 'r', the n by n part of * the array A must contain the symmetric matrix, such that * when UPLO = 'U' or 'u', the leading n by n upper triangular * part of the array A must contain the upper triangular part * of the symmetric matrix and the strictly lower triangular * part of A is not referenced, and when UPLO = 'L' or 'l', * the leading n by n lower triangular part of the array A * must contain the lower triangular part of the symmetric * matrix and the strictly upper triangular part of A is not * referenced. * Unchanged on exit. * * LDA - INTEGER. * On entry, LDA specifies the first dimension of A as declared * in the calling (sub) program. When SIDE = 'L' or 'l' then * LDA must be at least max( 1, m ), otherwise LDA must be at * least max( 1, n ). * Unchanged on exit. * * B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). * Before entry, the leading m by n part of the array B must * contain the matrix B. * Unchanged on exit. * * LDB - INTEGER. * On entry, LDB specifies the first dimension of B as declared * in the calling (sub) program. LDB must be at least * max( 1, m ). * Unchanged on exit. * * BETA - DOUBLE PRECISION. * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then C need not be set on input. * Unchanged on exit. * * C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). * Before entry, the leading m by n part of the array C must * contain the matrix C, except when beta is zero, in which * case C need not be set on entry. * On exit, the array C is overwritten by the m by n updated * matrix. * * LDC - INTEGER. * On entry, LDC specifies the first dimension of C as declared * in the calling (sub) program. LDC must be at least * max( 1, m ). * Unchanged on exit. * * * Level 3 Blas routine. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * -- Restructured, blocked and tuned for IBM 3090 VF in August-1989. * Per Ling, * Institute of Information Processing, University of Umea, Sweden. * V1.R0. * * * .. External Functions .. X LOGICAL LSAME, CLD X EXTERNAL LSAME, CLD * .. External Subroutines .. X EXTERNAL XERBLA * .. Intrinsic Functions .. X INTRINSIC MIN, MAX * .. Local Scalars .. X LOGICAL UPPER, LSIDE, CLDA X INTEGER INFO, NROWA X INTEGER I, II, IIKK, J, JJ, K, KK X DOUBLE PRECISION TEMP * .. Parameters .. X DOUBLE PRECISION ONE , ZERO X PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) X INTEGER VSS, EFF X PARAMETER ( VSS = 128, EFF = 48 ) * .. Local Arrays .. * VSS is the Vector Section Size. ( VSS*EFF ) double precision words * should correspond to the Effective Cache Size. X DOUBLE PRECISION T1( VSS, EFF ), T2( VSS, EFF ) * .. * .. Executable Statements .. * * Set NROWA as the number of rows of A. * X UPPER = LSAME( UPLO, 'U' ) X LSIDE = LSAME( SIDE, 'L' ) X IF( LSIDE )THEN X NROWA = M X ELSE X NROWA = N X END IF * * Test the input parameters. * X INFO = 0 X IF( ( .NOT.LSIDE ).AND. X + ( .NOT.LSAME( SIDE, 'R' ) ) )THEN X INFO = 1 X ELSE IF( ( .NOT.UPPER ).AND. X + ( .NOT.LSAME( UPLO, 'L' ) ) )THEN X INFO = 2 X ELSE IF( M .LT.0 )THEN X INFO = 3 X ELSE IF( N .LT.0 )THEN X INFO = 4 X ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN X INFO = 7 X ELSE IF( LDB.LT.MAX( 1, M ) )THEN X INFO = 9 X ELSE IF( LDC.LT.MAX( 1, M ) )THEN X INFO = 12 X END IF X IF( INFO.NE.0 )THEN X CALL XERBLA( 'DSYMM ', INFO ) X RETURN X END IF * * Quick return if possible. * X IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. X + ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) X + RETURN * * And when alpha.eq.zero. * X IF( ALPHA.EQ.ZERO )THEN X IF( BETA.EQ.ZERO )THEN X DO 20, I = 1, M X DO 10, J = 1, N X C( I, J ) = ZERO X 10 CONTINUE X 20 CONTINUE X ELSE X DO 40, I = 1, M X DO 30, J = 1, N X C( I, J ) = BETA*C( I, J ) X 30 CONTINUE X 40 CONTINUE X END IF X RETURN X ENDIF * * Start the operations. * X IF( LSIDE )THEN X CLDA = CLD( LDA ) X IF( UPPER )THEN * * Form C := alpha*A*B + beta*C. * The upper triangular part of A is referenced. * X DO 210, KK = 1, M, EFF X DO 200, II = 1, M, VSS * * C := beta*C. * X IF( ( KK.EQ.1 ).AND.( BETA.NE.ONE ) )THEN X DO 60, I = II, MIN( II+VSS-1, M ) X DO 50, J = 1, N X C( I, J ) = BETA*C( I, J ) X 50 CONTINUE X 60 CONTINUE X END IF * * Copy a rectangular block of alpha*A to T1. * X IF( II.GE.KK+EFF-1 )THEN X IF( CLDA )THEN C*VDIR: PREFER VECTOR X DO 80, K = KK, MIN( KK+EFF-1, M ) X DO 70, I = II, MIN( II+VSS-1, M ) X T1( I-II+1, K-KK+1 ) = ALPHA*A( K, I ) X 70 CONTINUE X 80 CONTINUE X ELSE X DO 100, I = II, MIN( II+VSS-1, M ) X DO 90, K = KK, MIN( KK+EFF-1, M ) X T1( I-II+1, K-KK+1 ) = ALPHA*A( K, I ) X 90 CONTINUE X 100 CONTINUE X END IF X ELSE IF( ( II.LT.KK+EFF-1 ).AND. X + ( II+VSS-1.GT.KK ) )THEN X IIKK = MAX( II, KK ) X DO 120, K = IIKK, MIN( KK+EFF-1, M ) X DO 110, I = II, MIN( II+VSS-1, K-1) X T1( I-II+1, K-KK+1 ) = ALPHA*A( I, K ) X 110 CONTINUE X 120 CONTINUE X DO 140, K = IIKK, MIN( II+VSS-1, M ) C*VDIR: PREFER VECTOR X DO 130, I = KK, MIN( KK+EFF-1, K ) X T1( K-II+1, I-KK+1 ) = ALPHA*A( I, K ) X 130 CONTINUE X 140 CONTINUE X ELSE IF( II+VSS-1.LE.KK )THEN X DO 160, I = II, MIN( II+VSS-1, M ) X DO 150, K = KK, MIN( KK+EFF-1, M ) X T1( I-II+1, K-KK+1 ) = ALPHA*A( I, K ) X 150 CONTINUE X 160 CONTINUE X END IF X DO 190 I = II, MIN( II+VSS-1, M ) X DO 180, J = 1, N X TEMP = C( I, J ) X DO 170, K = KK, MIN( KK+EFF-1, M ) X TEMP = TEMP + B( K, J )*T1( I-II+1, K-KK+1 ) X 170 CONTINUE X C( I, J ) = TEMP X 180 CONTINUE X 190 CONTINUE X 200 CONTINUE X 210 CONTINUE X ELSE * * Form C := alpha*A*B + beta*C. * The lower triangular part of A is referenced. * X DO 380, KK = M, 1, -EFF X DO 370, II = M, 1, -VSS * * C := beta*C. * X IF( ( KK.EQ.M ).AND.( BETA.NE.ONE ) )THEN X DO 230, I = MAX( II-VSS+1, 1 ), II X DO 220, J = 1, N X C( I, J ) = BETA*C( I, J ) X 220 CONTINUE X 230 CONTINUE X END IF * * Copy a rectangular block of alpha*A to T1. * X IF( II.LE.KK-EFF+1 )THEN X IF( CLDA )THEN C*VDIR: PREFER VECTOR X DO 250, K = MAX( KK-EFF+1, 1 ), KK X DO 240, I = MAX( II-VSS+1, 1 ), II X T1( I-II+VSS, K-KK+EFF ) = ALPHA*A( K, I ) X 240 CONTINUE X 250 CONTINUE X ELSE X DO 270, I = MAX( II-VSS+1, 1 ), II X DO 260, K = MAX( KK-EFF+1, 1 ), KK X T1( I-II+VSS, K-KK+EFF ) = ALPHA*A( K, I ) X 260 CONTINUE X 270 CONTINUE X END IF X ELSE IF( ( II.GT.KK-EFF+1 ).AND. X + ( II-VSS+1.LT.KK ) )THEN X IIKK = MIN( II, KK ) X DO 290, K = MAX( KK-EFF+1, 1 ), IIKK X DO 280, I = MAX( II-VSS+1, K+1 ), II X T1( I-II+VSS, K-KK+EFF ) = ALPHA*A( I, K ) X 280 CONTINUE X 290 CONTINUE X DO 310, K = MAX( II-VSS+1, 1 ), IIKK C*VDIR: PREFER VECTOR X DO 300, I = MAX( KK-EFF+1, K ), KK X T1( K-II+VSS, I-KK+EFF ) = ALPHA*A( I, K ) X 300 CONTINUE X 310 CONTINUE X ELSE IF( II-VSS+1.GE.KK )THEN X DO 330, I = MAX( II-VSS+1, 1 ), II X DO 320, K = MAX( KK-EFF+1, 1 ), KK X T1( I-II+VSS, K-KK+EFF ) = ALPHA*A( I, K ) X 320 CONTINUE X 330 CONTINUE X END IF X DO 360 I = MAX( II-VSS+1, 1 ), II X DO 350, J = 1, N X TEMP = C( I, J ) X DO 340, K = MAX( KK-EFF+1, 1 ), KK X TEMP = TEMP + B( K, J )* X + T1( I-II+VSS, K-KK+EFF ) X 340 CONTINUE X C( I, J ) = TEMP X 350 CONTINUE X 360 CONTINUE X 370 CONTINUE X 380 CONTINUE X END IF X ELSE X IF( UPPER )THEN * * Form C := alpha*B*A + beta*C. * The upper triangular part of A is referenced. * X DO 550, KK = 1, N, EFF X DO 540, II = 1, M, VSS X IF( ( KK.EQ.1 ).AND.( BETA.NE.ONE ) )THEN X DO 400, I = II, MIN( II+VSS-1, M ) X DO 390, J = 1, N X C( I, J ) = BETA*C( I, J ) X 390 CONTINUE X 400 CONTINUE X END IF X DO 420, I = II, MIN( II+VSS-1, M ) X DO 410, K = KK, MIN( KK+EFF-1, N ) X T1( I-II+1, K-KK+1 ) = ALPHA*B( I, K ) X 410 CONTINUE X 420 CONTINUE X DO 530, JJ = 1, N, EFF * * Copy a rectangular block of A to T2. * X IF( JJ.LT.KK )THEN C*VDIR: PREFER VECTOR X DO 440, J = JJ, MIN( JJ+EFF-1, N ) X DO 430, K = KK, MIN( KK+EFF-1, N ) X T2( K-KK+1, J-JJ+1 ) = A( J, K ) X 430 CONTINUE X 440 CONTINUE X ELSE IF( JJ.EQ.KK )THEN C*VDIR: PREFER VECTOR X DO 460, J = JJ, MIN( JJ+EFF-1, N ) X T2( J-JJ+1, J-JJ+1 ) = A( J, J ) C*VDIR: IGNORE RECRDEPS C*VDIR: PREFER VECTOR X DO 450, K = KK, J-1 X T2( K-KK+1, J-JJ+1 ) = A( K, J ) X T2( J-JJ+1, K-KK+1 ) = X + T2( K-KK+1, J-JJ+1 ) X 450 CONTINUE X 460 CONTINUE X END IF * * Perform the matrix multiplication. * X IF( JJ.GT.KK )THEN X DO 490, I = II, MIN( II+VSS-1, M ) X DO 480, J = JJ, MIN( JJ+EFF-1, N ) X TEMP = C( I, J ) X DO 470, K = KK, MIN( KK+EFF-1, N ) X TEMP = TEMP + A( K, J )* X + T1( I-II+1, K-KK+1 ) X 470 CONTINUE X C( I, J ) = TEMP X 480 CONTINUE X 490 CONTINUE X ELSE X DO 520, I = II, MIN( II+VSS-1, M ) X DO 510, J = JJ, MIN( JJ+EFF-1, N ) X TEMP = C( I, J ) X DO 500, K = KK, MIN( KK+EFF-1, N ) X TEMP = TEMP + T2( K-KK+1, J-JJ+1 )* X + T1( I-II+1, K-KK+1 ) X 500 CONTINUE X C( I, J ) = TEMP X 510 CONTINUE X 520 CONTINUE X END IF X 530 CONTINUE X 540 CONTINUE X 550 CONTINUE X ELSE * * Form C := alpha*B*A + beta*C. * The lower triangular part of A is referenced. * X DO 720, KK = 1, N, EFF X DO 710, II = 1, M, VSS X IF( ( KK.EQ.1 ).AND.( BETA.NE.ONE ) )THEN X DO 570, I = II, MIN( II+VSS-1, M ) X DO 560, J = 1, N X C( I, J ) = BETA*C( I, J ) X 560 CONTINUE X 570 CONTINUE X END IF X DO 590, I = II, MIN( II+VSS-1, M ) X DO 580, K = KK, MIN( KK+EFF-1, N ) X T1( I-II+1, K-KK+1 ) = ALPHA*B( I, K ) X 580 CONTINUE X 590 CONTINUE X DO 700, JJ = 1, N, EFF * * Copy a rectangular block of A to T2. * X IF( JJ.GT.KK )THEN C*VDIR: PREFER VECTOR X DO 610, J = JJ, MIN( JJ+EFF-1, N ) X DO 600, K = KK, MIN( KK+EFF-1, N ) X T2( K-KK+1, J-JJ+1 ) = A( J, K ) X 600 CONTINUE X 610 CONTINUE X ELSE IF( JJ.EQ.KK )THEN C*VDIR: PREFER VECTOR X DO 630, J = JJ, MIN( JJ+EFF-1, N ) X T2( J-JJ+1, J-JJ+1 ) = A( J, J ) C*VDIR: IGNORE RECRDEPS C*VDIR: PREFER VECTOR X DO 620, K = J+1, MIN( KK+EFF-1, N ) X T2( K-KK+1, J-JJ+1 ) = A( K, J ) X T2( J-JJ+1, K-KK+1 ) = X + T2( K-KK+1, J-JJ+1 ) X 620 CONTINUE X 630 CONTINUE X END IF * * Perform the matrix multiplication. * X IF( JJ.LT.KK )THEN X DO 660, I = II, MIN( II+VSS-1, M ) X DO 650, J = JJ, MIN( JJ+EFF-1, N ) X TEMP = C( I, J ) X DO 640, K = KK, MIN( KK+EFF-1, N ) X TEMP = TEMP + A( K, J )* X + T1( I-II+1, K-KK+1 ) X 640 CONTINUE X C( I, J ) = TEMP X 650 CONTINUE X 660 CONTINUE X ELSE X DO 690, I = II, MIN( II+VSS-1, M ) X DO 680, J = JJ, MIN( JJ+EFF-1, N ) X TEMP = C( I, J ) X DO 670, K = KK, MIN( KK+EFF-1, N ) X TEMP = TEMP + T2( K-KK+1, J-JJ+1 )* X + T1( I-II+1, K-KK+1 ) X 670 CONTINUE X C( I, J ) = TEMP X 680 CONTINUE X 690 CONTINUE X END IF X 700 CONTINUE X 710 CONTINUE X 720 CONTINUE X END IF X END IF * X RETURN * * End of DSYMM . * X END SHAR_EOF chmod 0600 dsymm.f || echo 'restore of dsymm.f failed' Wc_c="`wc -c < 'dsymm.f'`" test 19467 -eq "$Wc_c" || echo 'dsymm.f: original size 19467, current size' "$Wc_c" fi # ============= dsyr2k.f ============== if test -f 'dsyr2k.f' -a X"$1" != X"-c"; then echo 'x - skipping dsyr2k.f (File already exists)' else echo 'x - extracting dsyr2k.f (Text)' sed 's/^X//' << 'SHAR_EOF' > 'dsyr2k.f' && @PROCESS DIRECTIVE('*VDIR:') X SUBROUTINE DSYR2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB, X + BETA, C, LDC ) * .. Scalar Arguments .. X CHARACTER*1 UPLO, TRANS X INTEGER N, K, LDA, LDB, LDC X DOUBLE PRECISION ALPHA, BETA * .. Array Arguments .. X DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ) * .. * * Purpose * ======= * * DSYR2K performs one of the symmetric rank 2k operations * * C := alpha*A*B' + alpha*B*A' + beta*C, * * or * * C := alpha*A'*B + alpha*B'*A + beta*C, * * where alpha and beta are scalars, C is an n by n symmetric matrix * and A and B are n by k matrices in the first case and k by n * matrices in the second case. * * Parameters * ========== * * UPLO - CHARACTER*1. * On entry, UPLO specifies whether the upper or lower * triangular part of the array C is to be referenced as * follows: * * UPLO = 'U' or 'u' Only the upper triangular part of C * is to be referenced. * * UPLO = 'L' or 'l' Only the lower triangular part of C * is to be referenced. * * Unchanged on exit. * * TRANS - CHARACTER*1. * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' C := alpha*A*B' + alpha*B*A' + * beta*C. * * TRANS = 'T' or 't' C := alpha*A'*B + alpha*B'*A + * beta*C. * * TRANS = 'C' or 'c' C := alpha*A'*B + alpha*B'*A + * beta*C. * * Unchanged on exit. * * N - INTEGER. * On entry, N specifies the order of the matrix C. N must be * at least zero. * Unchanged on exit. * * K - INTEGER. * On entry with TRANS = 'N' or 'n', K specifies the number * of columns of the matrices A and B, and on entry with * TRANS = 'T' or 't' or 'C' or 'c', K specifies the number * of rows of the matrices A and B. K must be at least zero. * Unchanged on exit. * * ALPHA - DOUBLE PRECISION. * On entry, ALPHA specifies the scalar alpha. * Unchanged on exit. * * A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is * k when TRANS = 'N' or 'n', and is n otherwise. * Before entry with TRANS = 'N' or 'n', the leading n by k * part of the array A must contain the matrix A, otherwise * the leading k by n part of the array A must contain the * matrix A. * Unchanged on exit. * * LDA - INTEGER. * On entry, LDA specifies the first dimension of A as declared * in the calling (sub) program. When TRANS = 'N' or 'n' * then LDA must be at least max( 1, n ), otherwise LDA must * be at least max( 1, k ). * Unchanged on exit. * * B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is * k when TRANS = 'N' or 'n', and is n otherwise. * Before entry with TRANS = 'N' or 'n', the leading n by k * part of the array B must contain the matrix B, otherwise * the leading k by n part of the array B must contain the * matrix B. * Unchanged on exit. * * LDB - INTEGER. * On entry, LDB specifies the first dimension of B as declared * in the calling (sub) program. When TRANS = 'N' or 'n' * then LDB must be at least max( 1, n ), otherwise LDB must * be at least max( 1, k ). * Unchanged on exit. * * BETA - DOUBLE PRECISION. * On entry, BETA specifies the scalar beta. * Unchanged on exit. * * C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). * Before entry with UPLO = 'U' or 'u', the leading n by n * upper triangular part of the array C must contain the upper * triangular part of the symmetric matrix and the strictly * lower triangular part of C is not referenced. On exit, the * upper triangular part of the array C is overwritten by the * upper triangular part of the updated matrix. * Before entry with UPLO = 'L' or 'l', the leading n by n * lower triangular part of the array C must contain the lower * triangular part of the symmetric matrix and the strictly * upper triangular part of C is not referenced. On exit, the * lower triangular part of the array C is overwritten by the * lower triangular part of the updated matrix. * * LDC - INTEGER. * On entry, LDC specifies the first dimension of C as declared * in the calling (sub) program. LDC must be at least * max( 1, n ). * Unchanged on exit. * * * Level 3 Blas routine. * * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * -- Restructured, blocked and tuned for IBM 3090 VF in August-1989. * Per Ling, * Institute of Information Processing, University of Umea, Sweden. * V1.R0. * * * .. External Functions .. X LOGICAL LSAME, CLD X EXTERNAL LSAME, CLD * .. External Subroutines .. X EXTERNAL XERBLA * .. Intrinsic Functions .. X INTRINSIC MIN, MAX * .. Local Scalars .. X LOGICAL UPPER, NOTR, CLDA X INTEGER INFO, NROWA X INTEGER I, II, J, JJ, L, LL X DOUBLE PRECISION TEMP * .. Parameters .. X DOUBLE PRECISION ONE , ZERO X PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) X INTEGER VSS, EFF X PARAMETER ( VSS = 128, EFF = 48 ) * .. Local Arrays .. * VSS is the Vector Section Size. ( VSS*EFF ) double precision words * should correspond to the effective cache size. X DOUBLE PRECISION T1( VSS, EFF ), T2( VSS, EFF ), T3( VSS, EFF ) * .. * .. Executable Statements .. * * Test the input parameters. * X UPPER = LSAME( UPLO, 'U' ) X NOTR = LSAME( TRANS, 'N' ) X IF( NOTR )THEN X NROWA = N X ELSE X NROWA = K X END IF * X INFO = 0 X IF( ( .NOT.UPPER ).AND. X + ( .NOT.LSAME( UPLO , 'L' ) ) )THEN X INFO = 1 X ELSE IF( ( .NOT.NOTR ).AND. X + ( .NOT.LSAME( TRANS, 'T' ) ).AND. X + ( .NOT.LSAME( TRANS, 'C' ) ) )THEN X INFO = 2 X ELSE IF( N .LT.0 )THEN X INFO = 3 X ELSE IF( K .LT.0 )THEN X INFO = 4 X ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN X INFO = 7 X ELSE IF( LDB.LT.MAX( 1, NROWA ) )THEN X INFO = 9 X ELSE IF( LDC.LT.MAX( 1, N ) )THEN X INFO = 12 X END IF X IF( INFO.NE.0 )THEN X CALL XERBLA( 'DSYR2K', INFO ) X RETURN X END IF * * Quick return if possible. * X IF( ( N.EQ.0 ).OR. X + ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) X + RETURN * * And when alpha.eq.zero or k.eq.0. * X IF( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) )THEN X IF( UPPER )THEN X IF( BETA.EQ.ZERO )THEN X DO 20, J = 1, N X DO 10, I = 1, J X C( I, J ) = ZERO X 10 CONTINUE X 20 CONTINUE X ELSE X DO 40, J = 1, N X DO 30, I = 1, J X C( I, J ) = BETA*C( I, J ) X 30 CONTINUE X 40 CONTINUE X END IF X ELSE X IF( BETA.EQ.ZERO )THEN X DO 60, J = 1, N X DO 50, I = J, N X C( I, J ) = ZERO X 50 CONTINUE X 60 CONTINUE X ELSE X DO 80, J = 1, N X DO 70, I = J, N X C( I, J ) = BETA*C( I, J ) X 70 CONTINUE X 80 CONTINUE X END IF X END IF X RETURN X END IF * * Start the operations. * X CLDA = CLD( LDA ) X IF( UPPER )THEN X DO 370, LL = K, 1, -EFF X DO 360, II = 1, N, VSS * * Copy a rectangular block of alpha*A or alpha*A' to T1. * X IF( NOTR )THEN X DO 100, I = II, MIN( II+VSS-1, N ) X DO 90, L = MAX( LL-EFF+1, 1 ), LL X T1( I-II+1, L-LL+EFF ) = ALPHA*A( I, L ) X 90 CONTINUE X 100 CONTINUE X ELSE X IF( CLDA )THEN C*VDIR: PREFER VECTOR X DO 120, L = MAX( LL-EFF+1, 1 ), LL X DO 110, I = II, MIN( II+VSS-1, N ) X T1( I-II+1, L-LL+EFF ) = ALPHA*A( L, I ) X 110 CONTINUE X 120 CONTINUE X ELSE C*VDIR: PREFER VECTOR X DO 140, I = II, MIN( II+VSS-1, N ) X DO 130, L = MAX( LL-EFF+1, 1 ), LL X T1( I-II+1, L-LL+EFF ) = ALPHA*A( L, I ) X 130 CONTINUE X 140 CONTINUE X END IF X END IF X DO 350, JJ = N, 1, -EFF * * Form T3 := alpha*A*B' or T3 := alpha*A'*B. * X IF( NOTR )THEN C*VDIR: PREFER VECTOR X DO 160, J = MAX( JJ-EFF+1, 1 ), JJ X DO 150, L = MAX( LL-EFF+1, 1 ), LL X T2( L-LL+EFF, J-JJ+EFF ) = B( J, L ) X 150 CONTINUE X 160 CONTINUE X DO 190, I = II, MIN( II+VSS-1, N ) X DO 180, J = MAX( JJ-EFF+1, 1 ), JJ X TEMP = ZERO X DO 170, L = MAX( LL-EFF+1, 1 ), LL X TEMP = TEMP + T2( L-LL+EFF, J-JJ+EFF )* X + T1( I-II+1, L-LL+EFF ) X 170 CONTINUE X T3( I-II+1, J-JJ+EFF ) = TEMP X 180 CONTINUE X 190 CONTINUE X ELSE X DO 220, I = II, MIN( II+VSS-1, N ) X DO 210, J = MAX( JJ-EFF+1, 1 ), JJ X TEMP = ZERO X DO 200, L = MAX( LL-EFF+1, 1 ), LL X TEMP = TEMP + B( L, J )* X + T1( I-II+1, L-LL+EFF ) X 200 CONTINUE X T3( I-II+1, J-JJ+EFF ) = TEMP X 210 CONTINUE X 220 CONTINUE X END IF * * Form C := alpha*A*B' + alpha*B*A' + beta*C * or C := alpha*A'*B + alpha*B'*A + beta*C * by C := T3 + T3' + beta*C. * X IF( II+VSS-1.LT.JJ-EFF+1 )THEN X IF( ( LL.LT.K ).OR.( BETA.EQ.ONE ) )THEN X DO 240, I = II, MIN( II+VSS-1, N ) X DO 230, J = MAX( JJ-EFF+1, 1 ), JJ X C( I, J ) = C( I, J ) + X + T3( I-II+1, J-JJ+EFF ) X 230 CONTINUE X 240 CONTINUE X ELSE X DO 260, I = II, MIN( II+VSS-1, N ) X DO 250, J = MAX( JJ-EFF+1, 1 ), JJ X C( I, J ) = BETA*C( I, J ) + X + T3( I-II+1, J-JJ+EFF ) X 250 CONTINUE X 260 CONTINUE X END IF X ELSE IF( II.GT.JJ )THEN C*VDIR: PREFER VECTOR X DO 280, J = MAX( JJ-EFF+1, 1 ), JJ X DO 270, I = II, MIN( II+VSS-1, N ) X C( J, I ) = C( J, I ) + X + T3( I-II+1, J-JJ+EFF ) X 270 CONTINUE X 280 CONTINUE X ELSE X IF( ( LL.LT.K ).OR.( BETA.EQ.ONE ) )THEN X DO 300, J = MAX( JJ-EFF+1, II, 1 ), JJ C*VDIR: PREFER VECTOR X DO 290, I = II, MIN( II+VSS-1, J ) X C( I, J ) = C( I, J ) + X + T3( I-II+1, J-JJ+EFF ) X 290 CONTINUE X 300 CONTINUE X ELSE X DO 320, J = MAX( JJ-EFF+1, II, 1 ), JJ C*VDIR: PREFER VECTOR X DO 310, I = II, MIN( II+VSS-1, J ) X C( I, J ) = BETA*C( I, J ) + X + T3( I-II+1, J-JJ+EFF ) X 310 CONTINUE X 320 CONTINUE X END IF X DO 340, I = II, MIN( II+VSS-1, N ) C*VDIR: PREFER VECTOR X DO 330, J = MAX( JJ-EFF+1, 1 ), MIN( I, JJ ) X C( J, I ) = C( J, I ) + X + T3( I-II+1, J-JJ+EFF ) X 330 CONTINUE X 340 CONTINUE X END IF X 350 CONTINUE X 360 CONTINUE X 370 CONTINUE X ELSE * * If C is lower triangular. * X DO 660, LL = 1, K, EFF X DO 650, II = N, 1, -VSS * * Copy a rectangular block of alpha*A or alpha*A' to T1. * X IF( NOTR )THEN X DO 390, I = MAX( II-VSS+1, 1 ), II X DO 380, L = LL, MIN( LL+EFF-1, K ) X T1( I-II+VSS, L-LL+1 ) = ALPHA*A( I, L ) X 380 CONTINUE X 390 CONTINUE X ELSE X IF( CLDA )THEN C*VDIR: PREFER VECTOR X DO 410, L = LL, MIN( LL+EFF-1, K ) X DO 400, I = MAX( II-VSS+1, 1 ), II X T1( I-II+VSS, L-LL+1 ) = ALPHA*A( L, I ) X 400 CONTINUE X 410 CONTINUE X ELSE X DO 430, I = MAX( II-VSS+1, 1 ), II X DO 420, L = LL, MIN( LL+EFF-1, K ) X T1( I-II+VSS, L-LL+1 ) = ALPHA*A( L, I ) X 420 CONTINUE X 430 CONTINUE X END IF X END IF X DO 640, JJ = 1, N, EFF * * Form T3 := alpha*A*B' or T3 := alpha*A'*B. * X IF( NOTR )THEN C*VDIR: PREFER VECTOR X DO 450, J = JJ, MIN( JJ+EFF-1, N ) X DO 440, L = LL, MIN( LL+EFF-1, K ) X T2( L-LL+1, J-JJ+1 ) = B( J, L ) X 440 CONTINUE X 450 CONTINUE X DO 480, I = MAX( II-VSS+1, 1 ), II X DO 470, J = JJ, MIN( JJ+EFF-1, N ) X TEMP = ZERO X DO 460, L = LL, MIN( LL+EFF-1, K ) X TEMP = TEMP + T2( L-LL+1, J-JJ+1 )* X + T1( I-II+VSS, L-LL+1 ) X 460 CONTINUE X T3( I-II+VSS, J-JJ+1 ) = TEMP X 470 CONTINUE X 480 CONTINUE X ELSE X DO 510, I = MAX( II-VSS+1, 1 ), II X DO 500, J = JJ, MIN( JJ+EFF-1, N ) X TEMP = ZERO X DO 490, L = LL, MIN( LL+EFF-1, K ) X TEMP = TEMP + B( L, J )* X + T1( I-II+VSS, L-LL+1 ) X 490 CONTINUE X T3( I-II+VSS, J-JJ+1 ) = TEMP X 500 CONTINUE X 510 CONTINUE X END IF * * Form C := alpha*A*B' + alpha*B*A' + beta*C * or C := alpha*A'*B + alpha*B'*A + beta*C * by C := T3 + T3' + beta*C. * X IF( II-VSS+1.GT.JJ+EFF-1 )THEN X IF( ( LL.GT.1 ).OR.( BETA.EQ.ONE ) )THEN X DO 530, I = MAX( II-VSS+1, 1 ), II X DO 520, J = JJ, MIN( JJ+EFF-1, N ) X C( I, J ) = C( I, J ) + X + T3( I-II+VSS, J-JJ+1 ) X 520 CONTINUE X 530 CONTINUE X ELSE X DO 550, I = MAX( II-VSS+1, 1 ), II X DO 540, J = JJ, MIN( JJ+EFF-1, N ) X C( I, J ) = BETA*C( I, J ) + X + T3( I-II+VSS, J-JJ+1 ) X 540 CONTINUE X 550 CONTINUE X END IF X ELSE IF( II.LT.JJ )THEN C*VDIR: PREFER VECTOR X DO 570, J = JJ, MIN( JJ+EFF-1, N ) X DO 560, I = MAX( II-VSS+1, 1 ), II X C( J, I ) = C( J, I ) + X + T3( I-II+VSS, J-JJ+1 ) X 560 CONTINUE X 570 CONTINUE X ELSE X IF( ( LL.GT.1 ).OR.( BETA.EQ.ONE ) )THEN X DO 590, J = JJ, MIN( JJ+EFF-1, II, N ) C*VDIR: PREFER VECTOR X DO 580, I = MAX( II-VSS+1, J ), II X C( I, J ) = C( I, J ) + X + T3( I-II+VSS, J-JJ+1 ) X 580 CONTINUE X 590 CONTINUE X ELSE X DO 610, J = JJ, MIN( JJ+EFF-1, II, N ) C*VDIR: PREFER VECTOR X DO 600, I = MAX( II-VSS+1, J ), II X C( I, J ) = BETA*C( I, J ) + X + T3( I-II+VSS, J-JJ+1 ) X 600 CONTINUE X 610 CONTINUE X END IF X DO 630, I = MAX( II-VSS+1, 1 ), II C*VDIR: PREFER VECTOR X DO 620, J = MAX( I, JJ ), MIN( JJ+EFF-1, N ) X C( J, I ) = C( J, I ) + X + T3( I-II+VSS, J-JJ+1 ) X 620 CONTINUE X 630 CONTINUE X END IF X 640 CONTINUE X 650 CONTINUE X 660 CONTINUE X END IF * X RETURN * * End of DSYR2K. * X END SHAR_EOF chmod 0600 dsyr2k.f || echo 'restore of dsyr2k.f failed' Wc_c="`wc -c < 'dsyr2k.f'`" test 18830 -eq "$Wc_c" || echo 'dsyr2k.f: original size 18830, current size' "$Wc_c" fi # ============= dsyrk.f ============== if test -f 'dsyrk.f' -a X"$1" != X"-c"; then echo 'x - skipping dsyrk.f (File already exists)' else echo 'x - extracting dsyrk.f (Text)' sed 's/^X//' << 'SHAR_EOF' > 'dsyrk.f' && @PROCESS DIRECTIVE('*VDIR:') X SUBROUTINE DSYRK( UPLO, TRANS, N, K, ALPHA, A, LDA, X + BETA, C, LDC ) * .. Scalar Arguments .. X CHARACTER*1 UPLO, TRANS X INTEGER N, K, LDA, LDC X DOUBLE PRECISION ALPHA, BETA * .. Array Arguments .. X DOUBLE PRECISION A( LDA, * ), C( LDC, * ) * .. * * Purpose * ======= * * DSYRK performs one of the symmetric rank k operations * * C := alpha*A*A' + beta*C, * * or * * C := alpha*A'*A + beta*C, * * where alpha and beta are scalars, C is an n by n symmetric matrix * and A is an n by k matrix in the first case and a k by n matrix * in the second case. * * Parameters * ========== * * UPLO - CHARACTER*1. * On entry, UPLO specifies whether the upper or lower * triangular part of the array C is to be referenced as * follows: * * UPLO = 'U' or 'u' Only the upper triangular part of C * is to be referenced. * * UPLO = 'L' or 'l' Only the lower triangular part of C * is to be referenced. * * Unchanged on exit. * * TRANS - CHARACTER*1. * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' C := alpha*A*A' + beta*C. * * TRANS = 'T' or 't' C := alpha*A'*A + beta*C. * * TRANS = 'C' or 'c' C := alpha*A'*A + beta*C. * * Unchanged on exit. * * N - INTEGER. * On entry, N specifies the order of the matrix C. N must be * at least zero. * Unchanged on exit. * * K - INTEGER. * On entry with TRANS = 'N' or 'n', K specifies the number * of columns of the matrix A, and on entry with * TRANS = 'T' or 't' or 'C' or 'c', K specifies the number * of rows of the matrix A. K must be at least zero. * Unchanged on exit. * * ALPHA - DOUBLE PRECISION. * On entry, ALPHA specifies the scalar alpha. * Unchanged on exit. * * A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is * k when TRANS = 'N' or 'n', and is n otherwise. * Before entry with TRANS = 'N' or 'n', the leading n by k * part of the array A must contain the matrix A, otherwise * the leading k by n part of the array A must contain the * matrix A. * Unchanged on exit. * * LDA - INTEGER. * On entry, LDA specifies the first dimension of A as declared * in the calling (sub) program. When TRANS = 'N' or 'n' * then LDA must be at least max( 1, n ), otherwise LDA must * be at least max( 1, k ). * Unchanged on exit. * * BETA - DOUBLE PRECISION. * On entry, BETA specifies the scalar beta. * Unchanged on exit. * * C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). * Before entry with UPLO = 'U' or 'u', the leading n by n * upper triangular part of the array C must contain the upper * triangular part of the symmetric matrix and the strictly * lower triangular part of C is not referenced. On exit, the * upper triangular part of the array C is overwritten by the * upper triangular part of the updated matrix. * Before entry with UPLO = 'L' or 'l', the leading n by n * lower triangular part of the array C must contain the lower * triangular part of the symmetric matrix and the strictly * upper triangular part of C is not referenced. On exit, the * lower triangular part of the array C is overwritten by the * lower triangular part of the updated matrix. * * LDC - INTEGER. * On entry, LDC specifies the first dimension of C as declared * in the calling (sub) program. LDC must be at least * max( 1, n ). * Unchanged on exit. * * * Level 3 Blas routine. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * -- Restructured, blocked and tuned for IBM 3090 VF in August-1989. * Per Ling, * Institute of Information Processing, University of Umea, Sweden. * V1.R0. * * * .. External Functions .. X LOGICAL LSAME, CLD X EXTERNAL LSAME, CLD * .. External Subroutines .. X EXTERNAL XERBLA * .. Intrinsic Functions .. X INTRINSIC MIN, MAX * .. Local Scalars .. X LOGICAL UPPER, NOTR, CLDA X INTEGER INFO, NROWA X INTEGER I, II, J, JJ, L, LL X DOUBLE PRECISION TEMP * .. Parameters .. X DOUBLE PRECISION ONE , ZERO X PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) X INTEGER VSS, EFF, EFF2 X PARAMETER ( VSS = 128, EFF = 48, EFF2 = 32 ) * .. * .. Additional Local Variables used in this tuned implementation .. * VSS is the Vector Section Size. ( VSS*EFF ) double precision words * should correspond to the Effective Cache Size. X DOUBLE PRECISION T1( VSS, EFF ), T2( VSS, EFF ) * .. * .. Executable Statements .. * * Test the input parameters. * X UPPER = LSAME( UPLO, 'U' ) X NOTR = LSAME( TRANS, 'N' ) X IF( NOTR )THEN X NROWA = N X ELSE X NROWA = K X END IF * X INFO = 0 X IF( ( .NOT.UPPER ).AND. X + ( .NOT.LSAME( UPLO , 'L' ) ) )THEN X INFO = 1 X ELSE IF( ( .NOT.NOTR ).AND. X + ( .NOT.LSAME( TRANS, 'T' ) ).AND. X + ( .NOT.LSAME( TRANS, 'C' ) ) )THEN X INFO = 2 X ELSE IF( N .LT.0 )THEN X INFO = 3 X ELSE IF( K .LT.0 )THEN X INFO = 4 X ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN X INFO = 7 X ELSE IF( LDC.LT.MAX( 1, N ) )THEN X INFO = 10 X END IF X IF( INFO.NE.0 )THEN X CALL XERBLA( 'DSYRK ', INFO ) X RETURN X END IF * * Quick return if possible. * X IF( ( N.EQ.0 ).OR. X + ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) X + RETURN * * And when alpha.eq.zero. * X IF( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) )THEN X IF( UPPER )THEN X IF( BETA.EQ.ZERO )THEN X DO 20, J = 1, N X DO 10, I = 1, J X C( I, J ) = ZERO X 10 CONTINUE X 20 CONTINUE X ELSE X DO 40, J = 1, N X DO 30, I = 1, J X C( I, J ) = BETA*C( I, J ) X 30 CONTINUE X 40 CONTINUE X END IF X ELSE X IF( BETA.EQ.ZERO )THEN X DO 60, J = 1, N X DO 50, I = J, N X C( I, J ) = ZERO X 50 CONTINUE X 60 CONTINUE X ELSE X DO 80, J = 1, N X DO 70, I = J, N X C( I, J ) = BETA*C( I, J ) X 70 CONTINUE X 80 CONTINUE X END IF X END IF X RETURN X END IF * * Start the operations. * X CLDA = CLD( LDA ) X IF( NOTR )THEN X IF( UPPER )THEN * * Form C := alpha*A*A' + beta*C. Notr, Upper. * X IF( CLDA )THEN X DO 230, LL = 1, K, EFF X DO 220, II = 1, N, VSS X IF( ( LL.EQ.1 ).AND.( BETA.NE.ONE ) )THEN X DO 100, J = II, N C*VDIR: PREFER VECTOR X DO 90, I = II, MIN( II+VSS-1, J ) X C( I, J ) = BETA*C( I, J ) X 90 CONTINUE X 100 CONTINUE X ENDIF X DO 120, I = II, MIN( II+VSS-1, N ) X DO 110, L = LL, MIN( LL+EFF-1, K ) X T1( I-II+1, L-LL+1 ) = ALPHA*A( I, L ) X 110 CONTINUE X 120 CONTINUE X DO 210, JJ = II, N, EFF C*VDIR: PREFER VECTOR X DO 140, J = JJ, MIN( JJ+EFF-1, N ) X DO 130, L = LL, MIN( LL+EFF-1, K ) X T2( L-LL+1, J-JJ+1 ) = A( J, L ) X 130 CONTINUE X 140 CONTINUE X IF( JJ.LT.II+VSS-1 )THEN X DO 170, J = JJ, MIN( JJ+EFF-1, N ) C*VDIR: PREFER VECTOR X DO 160, I = II, MIN( II+VSS-1, J ) X TEMP = C( I, J ) X DO 150, L = LL, MIN( LL+EFF-1, K ) X TEMP = TEMP + T2( L-LL+1, J-JJ+1 )* X + T1( I-II+1, L-LL+1 ) X 150 CONTINUE X C( I, J ) = TEMP X 160 CONTINUE X 170 CONTINUE X ELSE X DO 200, I = II, MIN( II+VSS-1, N ) X DO 190, J = JJ, MIN( JJ+EFF-1, N ) X TEMP = C( I, J ) X DO 180, L = LL, MIN( LL+EFF-1, K ) X TEMP = TEMP + T2( L-LL+1, J-JJ+1 )* X + T1( I-II+1, L-LL+1 ) X 180 CONTINUE X C( I, J ) = TEMP X 190 CONTINUE X 200 CONTINUE X ENDIF X 210 CONTINUE X 220 CONTINUE X 230 CONTINUE X ELSE X DO 360, LL = 1, K, EFF2 X DO 350, II = 1, N, VSS X IF( ( LL.EQ.1 ).AND.( BETA.NE.ONE ) )THEN X DO 250, J = II, N X DO 240, I = II, MIN( II+VSS-1, J ) X C( I, J ) = BETA*C( I, J ) X 240 CONTINUE X 250 CONTINUE X ENDIF X DO 270, I = II, MIN( II+VSS-1, N ) X DO 260, L = LL, MIN( LL+EFF2-1, K ) X T1( I-II+1, L-LL+1 ) = ALPHA*A( I, L ) X 260 CONTINUE X 270 CONTINUE X DO 340, JJ = II, N, EFF2 X IF( JJ.LT.II+VSS-1 )THEN X DO 300, J = JJ, MIN( JJ+EFF2-1, N ) X DO 290, I = II, MIN( II+VSS-1, J ) X TEMP = C( I, J ) X DO 280, L = LL, MIN( LL+EFF2-1, K ) X TEMP = TEMP + A( J, L )* X + T1( I-II+1, L-LL+1 ) X 280 CONTINUE X C( I, J ) = TEMP X 290 CONTINUE X 300 CONTINUE X ELSE X DO 330, I = II, MIN( II+VSS-1, N ) X DO 320, J = JJ, MIN( JJ+EFF2-1, N ) X TEMP = C( I, J ) X DO 310, L = LL, MIN( LL+EFF2-1, K ) X TEMP = TEMP + A( J, L )* X + T1( I-II+1, L-LL+1 ) X 310 CONTINUE X C( I, J ) = TEMP X 320 CONTINUE X 330 CONTINUE X ENDIF X 340 CONTINUE X 350 CONTINUE X 360 CONTINUE X END IF X ELSE * * Form C := alpha*A*A' + beta*C. Notr, Lower. * X IF( CLDA )THEN X DO 510, LL = 1, K, EFF X DO 500, II = 1, N, VSS X IF( ( LL.EQ.1 ).AND.( BETA.NE.ONE ) )THEN X DO 380, J = 1, MIN( II+VSS-1, N ) X DO 370, I = MAX( II, J ), MIN( II+VSS-1, N ) X C( I, J ) = BETA*C( I, J ) X 370 CONTINUE X 380 CONTINUE X ENDIF X DO 400, I = II, MIN( II+VSS-1, N ) X DO 390, L = LL, MIN( LL+EFF-1, K ) X T1( I-II+1, L-LL+1 ) = ALPHA*A( I, L ) X 390 CONTINUE X 400 CONTINUE X DO 490, JJ = 1, MIN( II+VSS-1, N ), EFF C*VDIR: PREFER VECTOR X DO 420, J = JJ, MIN( JJ+EFF-1, II+VSS-1, N ) X DO 410, L = LL, MIN( LL+EFF-1, K ) X T2( L-LL+1, J-JJ+1 ) = A( J, L ) X 410 CONTINUE X 420 CONTINUE X IF( JJ+EFF-1.GT.II )THEN X DO 450, J = JJ, MIN( JJ+EFF-1, II+VSS-1, N ) X DO 440, I = MAX( II, J ), X + MIN( II+VSS-1, N ) X TEMP = C( I, J ) X DO 430, L = LL, MIN( LL+EFF-1, K ) X TEMP = TEMP + T2( L-LL+1, J-JJ+1 )* X + T1( I-II+1, L-LL+1 ) X 430 CONTINUE X C( I, J ) = TEMP X 440 CONTINUE X 450 CONTINUE X ELSE X DO 480, I = II, MIN( II+VSS-1, N ) X DO 470, J = JJ, MIN( JJ+EFF-1, N ) X TEMP = C( I, J ) X DO 460, L = LL, MIN( LL+EFF-1, K ) X TEMP = TEMP + T2( L-LL+1, J-JJ+1 )* X + T1( I-II+1, L-LL+1 ) X 460 CONTINUE X C( I, J ) = TEMP X 470 CONTINUE X 480 CONTINUE X ENDIF X 490 CONTINUE X 500 CONTINUE X 510 CONTINUE X ELSE X DO 640, LL = 1, K, EFF2 X DO 630, II = 1, N, VSS X IF( ( LL.EQ.1 ).AND.( BETA.NE.ONE ) )THEN X DO 530, J = 1, MIN( II+VSS-1, N ) X DO 520, I = MAX( II, J ), MIN( II+VSS-1, N ) X C( I, J ) = BETA*C( I, J ) X 520 CONTINUE X 530 CONTINUE X ENDIF X DO 550, I = II, MIN( II+VSS-1, N ) X DO 540, L = LL, MIN( LL+EFF2-1, K ) X T1( I-II+1, L-LL+1 ) = ALPHA*A( I, L ) X 540 CONTINUE X 550 CONTINUE X DO 620, JJ = 1, MIN( II+VSS-1, N ), EFF2 X IF( JJ+EFF2-1.GT.II )THEN X DO 580, J = JJ, MIN( JJ+EFF2-1, II+VSS-1, N ) X DO 570, I = MAX( II, J ), X + MIN( II+VSS-1, N ) X TEMP = C( I, J ) X DO 560, L = LL, MIN( LL+EFF2-1, K ) X TEMP = TEMP + A( J, L )* X + T1( I-II+1, L-LL+1 ) X 560 CONTINUE X C( I, J ) = TEMP X 570 CONTINUE X 580 CONTINUE X ELSE X DO 610, I = II, MIN( II+VSS-1, N ) X DO 600, J = JJ, MIN( JJ+EFF2-1, N ) X TEMP = C( I, J ) X DO 590, L = LL, MIN( LL+EFF2-1, K ) X TEMP = TEMP + A( J, L )* X + T1( I-II+1, L-LL+1 ) X 590 CONTINUE X C( I, J ) = TEMP X 600 CONTINUE X 610 CONTINUE X ENDIF X 620 CONTINUE X 630 CONTINUE X 640 CONTINUE X END IF X END IF X ELSE X IF( UPPER )THEN * * Form C := alpha*A'*A + beta*C, Trans, Upper. * X DO 790, LL = 1, K, EFF X DO 780, II = 1, N, VSS X IF( ( LL.EQ.1 ).AND.( BETA.NE.ONE ) )THEN X DO 660, J = II, N X DO 650, I = II, MIN( II+VSS-1, J ) X C( I, J ) = BETA*C( I, J ) X 650 CONTINUE X 660 CONTINUE X ENDIF X IF( CLDA )THEN C*VDIR: PREFER VECTOR X DO 680, L = LL, MIN( LL+EFF-1, K ) X DO 670, I = II, MIN( II+VSS-1, N ) X T1( I-II+1, L-LL+1 ) = ALPHA*A( L, I ) X 670 CONTINUE X 680 CONTINUE X ELSE X DO 700, I = II, MIN( II+VSS-1, N ) X DO 690, L = LL, MIN( LL+EFF-1, K ) X T1( I-II+1, L-LL+1 ) = ALPHA*A( L, I ) X 690 CONTINUE X 700 CONTINUE X END IF X DO 770, JJ = II, N, EFF X IF( JJ.LT.II+VSS-1 )THEN X DO 730, J = JJ, MIN( JJ+EFF-1, N ) X DO 720, I = II, MIN( II+VSS-1, J ) X TEMP = C( I, J ) X DO 710, L = LL, MIN( LL+EFF-1, K ) X TEMP = TEMP + A( L, J )* X + T1( I-II+1, L-LL+1 ) X 710 CONTINUE X C( I, J ) = TEMP X 720 CONTINUE X 730 CONTINUE X ELSE X DO 760, I = II, MIN( II+VSS-1, N ) X DO 750, J = JJ, MIN( JJ+EFF-1, N ) X TEMP = C( I, J ) X DO 740, L = LL, MIN( LL+EFF-1, K ) X TEMP = TEMP + A( L, J )* X + T1( I-II+1, L-LL+1 ) X 740 CONTINUE X C( I, J ) = TEMP X 750 CONTINUE X 760 CONTINUE X ENDIF X 770 CONTINUE X 780 CONTINUE X 790 CONTINUE X ELSE * * Form C := alpha*A'*A + beta*C, Trans, Lower. * X DO 940, LL = 1, K, EFF X DO 930, II = 1, N, VSS X IF( ( LL.EQ.1 ).AND.( BETA.NE.ONE ) )THEN X DO 810, J = 1, MIN( II+VSS-1, N ) X DO 800, I = MAX( II, J ), MIN( II+VSS-1, N ) X C( I, J ) = BETA*C( I, J ) X 800 CONTINUE X 810 CONTINUE X ENDIF X IF( CLDA )THEN C*VDIR: PREFER VECTOR X DO 830, L = LL, MIN( LL+EFF-1, K ) X DO 820, I = II, MIN( II+VSS-1, N ) X T1( I-II+1, L-LL+1 ) = ALPHA*A( L, I ) X 820 CONTINUE X 830 CONTINUE X ELSE X DO 850, I = II, MIN( II+VSS-1, N ) X DO 840, L = LL, MIN( LL+EFF-1, K ) X T1( I-II+1, L-LL+1 ) = ALPHA*A( L, I ) X 840 CONTINUE X 850 CONTINUE X END IF X DO 920, JJ = 1, MIN( II+VSS-1, N ), EFF X IF( JJ+EFF-1.GT.II )THEN X DO 880, J = JJ, MIN( JJ+EFF-1, II+VSS-1, N ) X DO 870, I = MAX( II, J ), MIN( II+VSS-1, N ) X TEMP = C( I, J ) X DO 860, L = LL, MIN( LL+EFF-1, K ) X TEMP = TEMP + A( L, J )* X + T1( I-II+1, L-LL+1 ) X 860 CONTINUE X C( I, J ) = TEMP X 870 CONTINUE X 880 CONTINUE X ELSE X DO 910, I = II, MIN( II+VSS-1, N ) X DO 900, J = JJ, MIN( JJ+EFF-1, N ) X TEMP = C( I, J ) X DO 890, L = LL, MIN( LL+EFF-1, K ) X TEMP = TEMP + A( L, J )* X + T1( I-II+1, L-LL+1 ) X 890 CONTINUE X C( I, J ) = TEMP X 900 CONTINUE X 910 CONTINUE X ENDIF X 920 CONTINUE X 930 CONTINUE X 940 CONTINUE X END IF X END IF * X RETURN * * End of DSYRK . * X END SHAR_EOF chmod 0600 dsyrk.f || echo 'restore of dsyrk.f failed' Wc_c="`wc -c < 'dsyrk.f'`" test 21321 -eq "$Wc_c" || echo 'dsyrk.f: original size 21321, current size' "$Wc_c" fi # ============= dtrmm.f ============== if test -f 'dtrmm.f' -a X"$1" != X"-c"; then echo 'x - skipping dtrmm.f (File already exists)' else echo 'x - extracting dtrmm.f (Text)' sed 's/^X//' << 'SHAR_EOF' > 'dtrmm.f' && X SUBROUTINE DTRMM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, X + B, LDB ) * .. Scalar Arguments .. X CHARACTER*1 SIDE, UPLO, TRANSA, DIAG X INTEGER M, N, LDA, LDB X DOUBLE PRECISION ALPHA * .. Array Arguments .. X DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * DTRMM performs one of the matrix-matrix operations * * B := alpha*op( A )*B, or B := alpha*B*op( A ), * * where alpha is a scalar, B is an m by n matrix, A is a unit, or * non-unit, upper or lower triangular matrix and op( A ) is one of * * op( A ) = A or op( A ) = A'. * * Parameters * ========== * * SIDE - CHARACTER*1. * On entry, SIDE specifies whether op( A ) multiplies B from * the left or right as follows: * * SIDE = 'L' or 'l' B := alpha*op( A )*B. * * SIDE = 'R' or 'r' B := alpha*B*op( A ). * * Unchanged on exit. * * UPLO - CHARACTER*1. * On entry, UPLO specifies whether the matrix A is an upper or * lower triangular matrix as follows: * * UPLO = 'U' or 'u' A is an upper triangular matrix. * * UPLO = 'L' or 'l' A is a lower triangular matrix. * * Unchanged on exit. * * TRANSA - CHARACTER*1. * On entry, TRANSA specifies the form of op( A ) to be used in * the matrix multiplication as follows: * * TRANSA = 'N' or 'n' op( A ) = A. * * TRANSA = 'T' or 't' op( A ) = A'. * * TRANSA = 'C' or 'c' op( A ) = A'. * * Unchanged on exit. * * DIAG - CHARACTER*1. * On entry, DIAG specifies whether or not A is unit triangular * as follows: * * DIAG = 'U' or 'u' A is assumed to be unit triangular. * * DIAG = 'N' or 'n' A is not assumed to be unit * triangular. * * Unchanged on exit. * * M - INTEGER. * On entry, M specifies the number of rows of B. M must be at * least zero. * Unchanged on exit. * * N - INTEGER. * On entry, N specifies the number of columns of B. N must be * at least zero. * Unchanged on exit. * * ALPHA - DOUBLE PRECISION. * On entry, ALPHA specifies the scalar alpha. When alpha is * zero then A is not referenced and B need not be set before * entry. * Unchanged on exit. * * A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m * when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. * Before entry with UPLO = 'U' or 'u', the leading k by k * upper triangular part of the array A must contain the upper * triangular matrix and the strictly lower triangular part of * A is not referenced. * Before entry with UPLO = 'L' or 'l', the leading k by k * lower triangular part of the array A must contain the lower * triangular matrix and the strictly upper triangular part of * A is not referenced. * Note that when DIAG = 'U' or 'u', the diagonal elements of * A are not referenced either, but are assumed to be unity. * Unchanged on exit. * * LDA - INTEGER. * On entry, LDA specifies the first dimension of A as declared * in the calling (sub) program. When SIDE = 'L' or 'l' then * LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' * then LDA must be at least max( 1, n ). * Unchanged on exit. * * B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). * Before entry, the leading m by n part of the array B must * contain the matrix B, and on exit is overwritten by the * transformed matrix. * * LDB - INTEGER. * On entry, LDB specifies the first dimension of B as declared * in the calling (sub) program. LDB must be at least * max( 1, m ). * Unchanged on exit. * * * Level 3 Blas routine. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * -- Restructured, blocked and tuned for IBM 3090 VF in August-1989. * Per Ling, * Institute of Information Processing, University of Umea, Sweden. * V1.R0. * * * .. Local Scalars .. X INTEGER I, J, INFO, NROWA X LOGICAL LSIDE, UPPER, NOTR, NOUNIT * .. External Functions .. X LOGICAL LSAME X EXTERNAL LSAME * .. External Subroutines .. X EXTERNAL XERBLA X EXTERNAL UMM001, UMM002, UMM003, UMM004 X EXTERNAL UMM005, UMM006, UMM007, UMM008 * .. Parameters .. X DOUBLE PRECISION ZERO X PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Executable Statements .. * * Test the input parameters. * X LSIDE = LSAME( SIDE, 'L' ) X UPPER = LSAME( UPLO, 'U' ) X NOTR = LSAME( TRANSA, 'N' ) X NOUNIT = LSAME( DIAG, 'N' ) X IF( LSIDE )THEN X NROWA = M X ELSE X NROWA = N X END IF X INFO = 0 X IF( ( .NOT.LSIDE ).AND. X + ( .NOT.LSAME( SIDE , 'R' ) ) )THEN X INFO = 1 X ELSE IF( ( .NOT.UPPER ).AND. X + ( .NOT.LSAME( UPLO , 'L' ) ) )THEN X INFO = 2 X ELSE IF( ( .NOT.NOTR ).AND. X + ( .NOT.LSAME( TRANSA, 'T' ) ).AND. X + ( .NOT.LSAME( TRANSA, 'C' ) ) )THEN X INFO = 3 X ELSE IF( ( .NOT.NOUNIT ).AND. X + ( .NOT.LSAME( DIAG , 'U' ) ) )THEN X INFO = 4 X ELSE IF( M.LT.0 )THEN X INFO = 5 X ELSE IF( N.LT.0 )THEN X INFO = 6 X ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN X INFO = 9 X ELSE IF( LDB.LT.MAX( 1, M ) )THEN X INFO = 11 X END IF X IF( INFO.NE.0 )THEN X CALL XERBLA( 'DTRMM ', INFO ) X RETURN X END IF * * Quick return if possible. * X IF( ( M.EQ.0 ).OR.( N.EQ.0 ) ) X + RETURN * * And when alpha.eq.zero. * X IF( ALPHA.EQ.ZERO )THEN X DO 20, I = 1, M X DO 10, J = 1, N X B( I, J ) = ZERO X 10 CONTINUE X 20 CONTINUE X RETURN X END IF * * Start the operations. * X IF( LSIDE )THEN * * Form B := alpha*op( A )*B. * X IF( UPPER )THEN X IF( NOTR )THEN * Left, Upper, No transpose X CALL UMM001( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) X ELSE * Left, Upper, Transpose X CALL UMM002( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) X END IF X ELSE X IF( NOTR )THEN * Left, Lower, No transpose X CALL UMM003( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) X ELSE * Left, Lower, Transpose X CALL UMM004( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) X END IF X END IF * X ELSE * * Form B := alpha*B*op( A ). * X IF( UPPER )THEN X IF( NOTR )THEN * Right, Upper, No transpose X CALL UMM005( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) X ELSE * Right, Upper, Transpose X CALL UMM006( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) X END IF X ELSE X IF( NOTR )THEN * Right, Lower, No transpose X CALL UMM007( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) X ELSE * Right, Lower, Transpose X CALL UMM008( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) X END IF X END IF X END IF * X RETURN * * End of DTRMM . * X END @PROCESS DIRECTIVE('*VDIR:') X SUBROUTINE UMM001( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) * .. Scalar Arguments .. X LOGICAL NOUNIT X INTEGER M, N, LDA, LDB X DOUBLE PRECISION ALPHA * .. Array Arguments .. X DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * UMM001 performs the matrix operation X = alpha*A*B where X and B are * m by n matrices, A is a unit, or non-unit, non-transposed upper * triangular matrix. A appears on the left side of B. Alpha is scalar. * The matrix B is overwritten by X. * * Left, Upper, No transpose * * .. Local Scalars .. X INTEGER I, II, J, JJ, K, KK X LOGICAL CLDB X DOUBLE PRECISION TEMP * .. External Functions .. X LOGICAL CLD X EXTERNAL CLD * .. Intrinsic Functions .. X INTRINSIC MIN * .. Parameters .. X DOUBLE PRECISION ZERO X PARAMETER ( ZERO = 0.0D+0 ) X INTEGER VSS, EFF X PARAMETER ( VSS = 128, EFF = 48 ) * .. Local Arrays .. * VSS is the Vector Section Size. ( VSS*EFF ) double precision words * should correspond to the effective cache size. X DOUBLE PRECISION T1( VSS, EFF ), T2( VSS, EFF ) X COMMON /UCMN01/ T1, T2 * .. * .. Executable Statements .. * X CLDB = CLD( LDB ) X DO 200, KK = 1, M, EFF X IF( KK.NE.1 )THEN X DO 50, II = 1, KK-1, VSS * * Computation using rectangular blocks of A. * C*VDIR: IGNORE RECRDEPS C*VDIR: PREFER VECTOR X DO 40, I = II, MIN( II+VSS-1, KK-1 ) X DO 10, K = KK, MIN( KK+EFF-1, M ) X T1( I-II+1, K-KK+1 ) = ALPHA*A( I, K ) X 10 CONTINUE X DO 30, J = 1, N X TEMP = B( I, J ) X DO 20, K = KK, MIN( KK+EFF-1, M ) X TEMP = TEMP + B( K, J )*T1( I-II+1, K-KK+1 ) X 20 CONTINUE X B( I, J ) = TEMP X 30 CONTINUE X 40 CONTINUE X 50 CONTINUE X END IF * * Computation using triangular diagonal-blocks of A. * X IF( NOUNIT )THEN X DO 70, K = KK, MIN( KK+EFF-1, M ) X DO 60, I = KK, K X T2( K-KK+1, I-KK+1 ) = ALPHA*A( I, K ) X 60 CONTINUE X 70 CONTINUE X ELSE X DO 90, K = KK, MIN( KK+EFF-1, M ) X DO 80, I = KK, K-1 X T2( K-KK+1, I-KK+1 ) = ALPHA*A( I, K ) X 80 CONTINUE X T2( K-KK+1, K-KK+1 ) = ALPHA X 90 CONTINUE X END IF X DO 190, JJ = 1, N, VSS X IF( CLDB )THEN C*VDIR: PREFER VECTOR X DO 110, K = KK, MIN( KK+EFF-1, M ) X DO 100, J = JJ, MIN( JJ+VSS-1, N ) X T1( J-JJ+1, K-KK+1 ) = B( K, J ) X 100 CONTINUE X 110 CONTINUE X ELSE X DO 130, J = JJ, MIN( JJ+VSS-1, N ) X DO 120, K = KK, MIN( KK+EFF-1, M ) X T1( J-JJ+1, K-KK+1 ) = B( K, J ) X 120 CONTINUE X 130 CONTINUE X END IF X DO 160, I = KK, MIN( KK+EFF-1, M ) X DO 150, J = JJ, MIN( JJ+VSS-1, N ) X TEMP = ZERO X DO 140, K = I, MIN( KK+EFF-1, M ) X TEMP = TEMP + T2( K-KK+1, I-KK+1 )* X + T1( J-JJ+1, K-KK+1 ) X 140 CONTINUE X T1( J-JJ+1, I-KK+1 ) = TEMP X 150 CONTINUE X 160 CONTINUE X DO 180, K = KK, MIN( KK+EFF-1, M ) X DO 170, J = JJ, MIN( JJ+VSS-1, N ) X B( K, J ) = T1( J-JJ+1, K-KK+1 ) X 170 CONTINUE X 180 CONTINUE X 190 CONTINUE X 200 CONTINUE * X RETURN * * End of UMM001 * X END @PROCESS DIRECTIVE('*VDIR:') X SUBROUTINE UMM002( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) * .. Scalar Arguments .. X LOGICAL NOUNIT X INTEGER M, N, LDA, LDB X DOUBLE PRECISION ALPHA * .. Array Arguments .. X DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * UMM002 performs the matrix operation X = alpha*A*B where X and B are * m by n matrices, A is a unit, or non-unit, transposed upper * triangular matrix. A appears on the left side of B. Alpha is scalar. * The matrix B is overwritten by X. * * Left, Upper, Transpose * * .. Local Scalars .. X INTEGER I, II, J, JJ, K, KK X LOGICAL CLDA, CLDB X DOUBLE PRECISION TEMP * .. External Functions .. X LOGICAL CLD X EXTERNAL CLD * .. Intrinsic Functions .. X INTRINSIC MAX * .. Parameters .. X DOUBLE PRECISION ZERO X PARAMETER ( ZERO = 0.0D+0 ) X INTEGER VSS, EFF X PARAMETER ( VSS = 128, EFF = 48 ) * .. Local Arrays .. * VSS is the Vector Section Size. ( VSS*EFF ) double precision words * should correspond to the effective cache size. X DOUBLE PRECISION T1( VSS, EFF ), T2( VSS, EFF ) X COMMON /UCMN01/ T1, T2 * .. * .. Executable Statements .. * X CLDA = CLD( LDA ) X CLDB = CLD( LDB ) X DO 220, KK = M, 1, -EFF X IF( KK.NE.M )THEN X DO 80, II = M, KK+1, -VSS * * Computation using rectangular blocks of A. * X IF( CLDA )THEN C*VDIR: PREFER VECTOR X DO 20, K = MAX( KK-EFF+1, 1 ), KK X DO 10, I = MAX( II-VSS+1, KK+1 ), II X T1( I-II+VSS, K-KK+EFF ) = ALPHA*A( K, I ) X 10 CONTINUE X 20 CONTINUE X ELSE X DO 40, I = MAX( II-VSS+1, KK+1 ), II X DO 30, K = MAX( KK-EFF+1, 1 ), KK X T1( I-II+VSS, K-KK+EFF ) = ALPHA*A( K, I ) X 30 CONTINUE X 40 CONTINUE X END IF C*VDIR: IGNORE RECRDEPS X DO 70, I = MAX( II-VSS+1, KK+1 ), II X DO 60, J = 1, N X TEMP = B( I, J ) X DO 50, K = MAX( KK-EFF+1, 1 ), KK X TEMP = TEMP + B( K, J )*T1( I-II+VSS, K-KK+EFF ) X 50 CONTINUE X B( I, J ) = TEMP X 60 CONTINUE X 70 CONTINUE X 80 CONTINUE X END IF * * Computation using triangular diagonal-blocks of A. * X DO 210, JJ = N, 1, -VSS X IF( CLDB )THEN C*VDIR: PREFER VECTOR X DO 100, K = MAX( KK-EFF+1, 1 ), KK X DO 90, J = MAX( JJ-VSS+1, 1 ), JJ X T1( J-JJ+VSS, K-KK+EFF ) = ALPHA*B( K, J ) X 90 CONTINUE X 100 CONTINUE X ELSE X DO 120, J = MAX( JJ-VSS+1, 1 ), JJ X DO 110, K = MAX( KK-EFF+1, 1 ), KK X T1( J-JJ+VSS, K-KK+EFF ) = ALPHA*B( K, J ) X 110 CONTINUE X 120 CONTINUE X END IF X IF( NOUNIT )THEN X DO 150, I = KK, MAX( KK-EFF+1, 1 ), -1 X DO 140, J = MAX( JJ-VSS+1, 1 ), JJ X TEMP = ZERO X DO 130, K = MAX( KK-EFF+1, 1 ), I X TEMP = TEMP + A( K, I )*T1( J-JJ+VSS, K-KK+EFF ) X 130 CONTINUE X T1( J-JJ+VSS, I-KK+EFF ) = TEMP X 140 CONTINUE X 150 CONTINUE X ELSE X DO 180, I = KK, MAX( KK-EFF+2, 2 ), -1 X DO 170, J = MAX( JJ-VSS+1, 1 ), JJ X TEMP = T1( J-JJ+VSS, I-KK+EFF ) X DO 160, K = MAX( KK-EFF+1, 1 ), I-1 X TEMP = TEMP + A( K, I )*T1( J-JJ+VSS, K-KK+EFF ) X 160 CONTINUE X T1( J-JJ+VSS, I-KK+EFF ) = TEMP X 170 CONTINUE X 180 CONTINUE X END IF C*VDIR: PREFER VECTOR X DO 200, K = MAX( KK-EFF+1, 1 ), KK X DO 190, J = MAX( JJ-VSS+1, 1 ), JJ X B( K, J ) = T1( J-JJ+VSS, K-KK+EFF ) X 190 CONTINUE X 200 CONTINUE X 210 CONTINUE X 220 CONTINUE * X RETURN * * End of UMM002 * X END @PROCESS DIRECTIVE('*VDIR:') X SUBROUTINE UMM003( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) * .. Scalar Arguments .. X LOGICAL NOUNIT X INTEGER M, N, LDA, LDB X DOUBLE PRECISION ALPHA * .. Array Arguments .. X DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * UMM003 performs the matrix operation X = alpha*A*B where X and B are * m by n matrices, A is a unit, or non-unit, non-transposed lower * triangular matrix. A appears on the left side of B. Alpha is scalar. * The matrix B is overwritten by X. * * Left, Lower, No transpose * * .. Local Scalars .. X INTEGER I, II, J, JJ, K, KK X LOGICAL CLDB X DOUBLE PRECISION TEMP * .. External Functions .. X LOGICAL CLD X EXTERNAL CLD * .. Intrinsic Functions .. X INTRINSIC MAX * .. Parameters .. X DOUBLE PRECISION ZERO X PARAMETER ( ZERO = 0.0D+0 ) X INTEGER VSS, EFF X PARAMETER ( VSS = 128, EFF = 48 ) * .. Local Arrays .. * VSS is the Vector Section Size. ( VSS*EFF ) double precision words * should correspond to the effective cache size. X DOUBLE PRECISION T1( VSS, EFF ), T2( VSS, EFF ) X COMMON /UCMN01/ T1, T2 * .. * .. Executable Statements .. * X CLDB = CLD( LDB ) X DO 200, KK = M, 1, -EFF X IF( KK.NE.M )THEN X DO 50, II = M, KK+1, -VSS * * Computation using rectangular blocks of A. * C*VDIR: IGNORE RECRDEPS C*VDIR: PREFER VECTOR X DO 40, I = MAX( II-VSS+1, KK+1 ), II X DO 10, K = MAX( KK-EFF+1, 1 ), KK X T1( I-II+VSS, K-KK+EFF ) = ALPHA*A( I, K ) X 10 CONTINUE X DO 30, J = 1, N X TEMP = B( I, J ) X DO 20, K = MAX( KK-EFF+1, 1 ), KK X TEMP = TEMP + B( K, J )*T1( I-II+VSS, K-KK+EFF ) X 20 CONTINUE X B( I, J ) = TEMP X 30 CONTINUE X 40 CONTINUE X 50 CONTINUE X END IF * * Computation using triangular diagonal-blocks of A. * X IF( NOUNIT )THEN X DO 70, K = MAX( KK-EFF+1, 1 ), KK C*VDIR: PREFER VECTOR X DO 60, I = K, KK X T2( K-KK+EFF, I-KK+EFF ) = ALPHA*A( I, K ) X 60 CONTINUE X 70 CONTINUE X ELSE X DO 90, K = MAX( KK-EFF+1, 1 ), KK X T2( K-KK+EFF, K-KK+EFF ) = ALPHA C*VDIR: PREFER VECTOR X DO 80, I = K+1, KK X T2( K-KK+EFF, I-KK+EFF ) = ALPHA*A( I, K ) X 80 CONTINUE X 90 CONTINUE X END IF X DO 190, JJ = N, 1, -VSS X IF( CLDB )THEN C*VDIR: PREFER VECTOR X DO 110, K = MAX( KK-EFF+1, 1 ), KK X DO 100, J = MAX( JJ-VSS+1, 1 ), JJ X T1( J-JJ+VSS, K-KK+EFF ) = B( K, J ) X 100 CONTINUE X 110 CONTINUE X ELSE X DO 130, J = MAX( JJ-VSS+1, 1 ), JJ X DO 120, K = MAX( KK-EFF+1, 1 ), KK X T1( J-JJ+VSS, K-KK+EFF ) = B( K, J ) X 120 CONTINUE X 130 CONTINUE X END IF X DO 160, I = KK, MAX( KK-EFF+1, 1 ), -1 X DO 150, J = MAX( JJ-VSS+1, 1 ), JJ X TEMP = ZERO X DO 140, K = MAX( KK-EFF+1, 1 ), I X TEMP = TEMP + T2( K-KK+EFF, I-KK+EFF )* X + T1( J-JJ+VSS, K-KK+EFF ) X 140 CONTINUE X T1( J-JJ+VSS, I-KK+EFF ) = TEMP X 150 CONTINUE X 160 CONTINUE C*VDIR: PREFER VECTOR X DO 180, K = MAX( KK-EFF+1, 1 ), KK X DO 170, J = MAX( JJ-VSS+1, 1 ), JJ X B( K, J ) = T1( J-JJ+VSS, K-KK+EFF ) X 170 CONTINUE X 180 CONTINUE X 190 CONTINUE X 200 CONTINUE * X RETURN * * End of UMM003 * X END @PROCESS DIRECTIVE('*VDIR:') X SUBROUTINE UMM004( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) * .. Scalar Arguments .. X LOGICAL NOUNIT X INTEGER M, N, LDA, LDB X DOUBLE PRECISION ALPHA * .. Array Arguments .. X DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * UMM004 performs the matrix operation X = alpha*A*B where X and B are * m by n matrices, A is a unit, or non-unit, transposed lower * triangular matrix. A appears on the left side of B. Alpha is scalar. * The matrix B is overwritten by X. * * Left, Lower, Transpose * * .. Local Scalars .. X INTEGER I, II, J, JJ, K, KK X LOGICAL CLDA, CLDB X DOUBLE PRECISION TEMP * .. External Functions .. X LOGICAL CLD X EXTERNAL CLD * .. Intrinsic Functions .. X INTRINSIC MIN * .. Parameters .. X DOUBLE PRECISION ZERO X PARAMETER ( ZERO = 0.0D+0 ) X INTEGER VSS, EFF X PARAMETER ( VSS = 128, EFF = 48 ) * .. Local Arrays .. * VSS is the Vector Section Size. ( VSS*EFF ) double precision words * should correspond to the effective cache size. X DOUBLE PRECISION T1( VSS, EFF ), T2( VSS, EFF ) X COMMON /UCMN01/ T1, T2 * .. * .. Executable Statements .. * X CLDA = CLD( LDA ) X CLDB = CLD( LDB ) X DO 220, KK = 1, M, EFF X IF( KK.NE.1 )THEN X DO 80, II = 1, KK-1, VSS * * Computation using rectangular blocks of A. * X IF( CLDA )THEN C*VDIR: PREFER VECTOR X DO 20, K = KK, MIN( KK+EFF-1, M ) X DO 10, I = II, MIN( II+VSS-1, KK-1 ) X T1( I-II+1, K-KK+1 ) = ALPHA*A( K, I ) X 10 CONTINUE X 20 CONTINUE X ELSE X DO 40, I = II, MIN( II+VSS-1, KK-1 ) X DO 30, K = KK, MIN( KK+EFF-1, M ) X T1( I-II+1, K-KK+1 ) = ALPHA*A( K, I ) X 30 CONTINUE X 40 CONTINUE X END IF C*VDIR: IGNORE RECRDEPS X DO 70, I = II, MIN( II+VSS-1, KK-1 ) X DO 60, J = 1, N X TEMP = B( I, J ) X DO 50, K = KK, MIN( KK+EFF-1, M ) X TEMP = TEMP + B( K, J )*T1( I-II+1, K-KK+1 ) X 50 CONTINUE X B( I, J ) = TEMP X 60 CONTINUE X 70 CONTINUE X 80 CONTINUE X END IF * * Computation using triangular diagonal-blocks of A. * X DO 210, JJ = 1, N, VSS X IF( CLDB )THEN C*VDIR: PREFER VECTOR X DO 100, K = KK, MIN( KK+EFF-1, M ) X DO 90, J = JJ, MIN( JJ+VSS-1, N ) X T1( J-JJ+1, K-KK+1 ) = ALPHA*B( K, J ) X 90 CONTINUE X 100 CONTINUE X ELSE X DO 120, J = JJ, MIN( JJ+VSS-1, N ) X DO 110, K = KK, MIN( KK+EFF-1, M ) X T1( J-JJ+1, K-KK+1 ) = ALPHA*B( K, J ) X 110 CONTINUE X 120 CONTINUE X END IF X IF( NOUNIT )THEN X DO 150, I = KK, MIN( KK+EFF-1, M ) X DO 140, J = JJ, MIN( JJ+VSS-1, N ) X TEMP = ZERO X DO 130, K = I, MIN( KK+EFF-1, M ) X TEMP = TEMP + A( K, I )*T1( J-JJ+1, K-KK+1 ) X 130 CONTINUE X T1( J-JJ+1, I-KK+1 ) = TEMP X 140 CONTINUE X 150 CONTINUE X ELSE X DO 180, I = KK, MIN( KK+EFF-2, M-1 ) X DO 170, J = JJ, MIN( JJ+VSS-1, N ) X TEMP = T1( J-JJ+1, I-KK+1 ) X DO 160, K = I+1, MIN( KK+EFF-1, M ) X TEMP = TEMP + A( K, I )*T1( J-JJ+1, K-KK+1 ) X 160 CONTINUE X T1( J-JJ+1, I-KK+1 ) = TEMP X 170 CONTINUE X 180 CONTINUE X END IF X DO 200, K = KK, MIN( KK+EFF-1, M ) X DO 190, J = JJ, MIN( JJ+VSS-1, N ) X B( K, J ) = T1( J-JJ+1, K-KK+1 ) X 190 CONTINUE X 200 CONTINUE X 210 CONTINUE X 220 CONTINUE * X RETURN * * End of UMM004 * X END @PROCESS DIRECTIVE('*VDIR:') X SUBROUTINE UMM005( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) * .. Scalar Arguments .. X LOGICAL NOUNIT X INTEGER M, N, LDA, LDB X DOUBLE PRECISION ALPHA * .. Array Arguments .. X DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * UMM005 performs the matrix operation X = alpha*B*A where X and B are * m by n matrices, A is a unit, or non-unit, non-transposed upper * triangular matrix. A appears on the right side of B. Alpha is scalar. * The matrix B is overwritten by X. * * Right, Upper, No transpose * * .. Local Scalars .. X INTEGER I, II, J, JJ, K X DOUBLE PRECISION TEMP * .. Intrinsic Functions .. X INTRINSIC MAX * .. Parameters .. X DOUBLE PRECISION ONE, ZERO X PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) X INTEGER VSS, EFF X PARAMETER ( VSS = 128, EFF = 48 ) * .. Local Arrays .. * VSS is the Vector Section Size. ( VSS*EFF ) double precision words * should correspond to the effective cache size. X DOUBLE PRECISION T1( VSS, EFF ), T2( VSS, EFF ) X COMMON /UCMN01/ T1, T2 * .. * .. Executable Statements .. * X DO 130, JJ = N, 1, -EFF X DO 120, II = M, 1, -VSS X DO 20, I = MAX( II-VSS+1, 1 ), II X DO 10, J = MAX( JJ-EFF+1, 1 ), JJ X T1( I-II+VSS, J-JJ+EFF ) = ALPHA*B( I, J ) X 10 CONTINUE X 20 CONTINUE X IF( JJ.NE.N )THEN * * Computation using rectangular blocks of A. * C*VDIR: IGNORE RECRDEPS X DO 50, I = MAX( II-VSS+1, 1 ), II X DO 40, K = JJ+1, N X TEMP = B( I, K ) X DO 30, J = MAX( JJ-EFF+1, 1 ), JJ X TEMP = TEMP + A( J, K )*T1( I-II+VSS, J-JJ+EFF ) X 30 CONTINUE X B( I, K ) = TEMP X 40 CONTINUE X 50 CONTINUE X END IF * * Computation using triangular diagonal-blocks of A. * X IF( NOUNIT )THEN X DO 80, K = MAX( JJ-EFF+1, 1 ), JJ X DO 70, I = MAX( II-VSS+1, 1 ), II X TEMP = ZERO X DO 60, J = MAX( JJ-EFF+1, 1 ), K X TEMP = TEMP + A( J, K )*T1( I-II+VSS, J-JJ+EFF ) X 60 CONTINUE X B( I, K ) = TEMP X 70 CONTINUE X 80 CONTINUE X ELSE X DO 110, K = MAX( JJ-EFF+1, 1 ), JJ X DO 100, I = MAX( II-VSS+1, 1 ), II X TEMP = T1( I-II+VSS, K-JJ+EFF ) X DO 90, J = MAX( JJ-EFF+1, 1 ), K-1 X TEMP = TEMP + A( J, K )*T1( I-II+VSS, J-JJ+EFF ) X 90 CONTINUE X B( I, K ) = TEMP X 100 CONTINUE X 110 CONTINUE X END IF X 120 CONTINUE X 130 CONTINUE * X RETURN * * End of UMM005 * X END @PROCESS DIRECTIVE('*VDIR:') X SUBROUTINE UMM006( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) * .. Scalar Arguments .. X LOGICAL NOUNIT X INTEGER M, N, LDA, LDB X DOUBLE PRECISION ALPHA * .. Array Arguments .. X DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * UMM006 performs the matrix operation X = alpha*B*A where X and B are * m by n matrices, A is a unit, or non-unit, transposed upper * triangular matrix. A appears on the right side of B. Alpha is scalar. * The matrix B is overwritten by X. * * Right, Upper, Transpose * * .. Local Scalars .. X INTEGER I, II, J, JJ, K, KK X DOUBLE PRECISION TEMP * .. External Functions .. X LOGICAL CLD X EXTERNAL CLD * .. Intrinsic Functions .. X INTRINSIC MIN * .. Parameters .. X DOUBLE PRECISION ZERO X PARAMETER ( ZERO = 0.0D+0 ) X INTEGER VSS, EFF, EFF2 X PARAMETER ( VSS = 128, EFF = 48, EFF2 = 32 ) * .. Local Arrays .. * VSS is the Vector Section Size. ( VSS*EFF ) double precision words * should correspond to the effective cache size. X DOUBLE PRECISION T1( VSS, EFF ), T2( VSS, EFF ) X COMMON /UCMN01/ T1, T2 * .. * .. Executable Statements .. * X IF( CLD( LDA ) )THEN X DO 200, JJ = 1, N, EFF X DO 190, II = 1, M, VSS X DO 20, I = II, MIN( II+VSS-1, M ) X DO 10, J = JJ, MIN( JJ+EFF-1, N ) X T1( I-II+1, J-JJ+1 ) = ALPHA*B( I, J ) X 10 CONTINUE X 20 CONTINUE X IF( JJ.NE.1 )THEN * * Computation using rectangular blocks of A. * X DO 80, KK = 1, JJ, EFF C*VDIR: PREFER VECTOR X DO 40, K = KK, MIN( KK+EFF-1, JJ-1 ) X DO 30, J = JJ, MIN( JJ+EFF-1, N ) X T2( J-JJ+1, K-KK+1 ) = A( K, J ) X 30 CONTINUE X 40 CONTINUE C*VDIR: PREFER VECTOR X DO 70, I = II, MIN( II+VSS-1, M ) X DO 60, K = KK, MIN( KK+EFF-1, JJ-1 ) X TEMP = B( I, K ) X DO 50, J = JJ, MIN( JJ+EFF-1, N ) X TEMP = TEMP + T2( J-JJ+1, K-KK+1 )* X + T1( I-II+1, J-JJ+1 ) X 50 CONTINUE X B( I, K ) = TEMP X 60 CONTINUE X 70 CONTINUE X 80 CONTINUE X END IF * * Computation using triangular diagonal-blocks of A. * X IF( NOUNIT )THEN X DO 100, J = JJ, MIN( JJ+EFF-1, N ) C*VDIR: PREFER VECTOR X DO 90, K = JJ, J X T2( J-JJ+1, K-JJ+1 ) = A( K, J ) X 90 CONTINUE X 100 CONTINUE X DO 130, K = JJ, MIN( JJ+EFF-1, N ) C*VDIR: PREFER VECTOR X DO 120, I = II, MIN( II+VSS-1, M ) X TEMP = ZERO X DO 110, J = K, MIN( JJ+EFF-1, N ) X TEMP = TEMP + T2( J-JJ+1, K-JJ+1 )* X + T1( I-II+1, J-JJ+1 ) X 110 CONTINUE X B( I, K ) = TEMP X 120 CONTINUE X 130 CONTINUE X ELSE X DO 150, J = JJ+1, MIN( JJ+EFF-1, N ) C*VDIR: PREFER VECTOR X DO 140, K = JJ, J-1 X T2( J-JJ+1, K-JJ+1 ) = A( K, J ) X 140 CONTINUE X 150 CONTINUE X DO 180, K = JJ, MIN( JJ+EFF-1, N ) C*VDIR: PREFER VECTOR X DO 170, I = II, MIN( II+VSS-1, M ) X TEMP = T1( I-II+1, K-JJ+1 ) X DO 160, J = K+1, MIN( JJ+EFF-1, N ) X TEMP = TEMP + T2( J-JJ+1, K-JJ+1 )* X + T1( I-II+1, J-JJ+1 ) X 160 CONTINUE X B( I, K ) = TEMP X 170 CONTINUE X 180 CONTINUE X END IF X 190 CONTINUE X 200 CONTINUE X ELSE X DO 340, JJ = 1, N, EFF2 X DO 330, II = 1, M, VSS X DO 220, I = II, MIN( II+VSS-1, M ) X DO 210, J = JJ, MIN( JJ+EFF2-1, N ) X T1( I-II+1, J-JJ+1 ) = ALPHA*B( I, J ) X 210 CONTINUE X 220 CONTINUE X IF( JJ.NE.1 )THEN * * Computation using rectangular blocks of A. * X DO 260, KK = 1, JJ, EFF2 C*VDIR: PREFER VECTOR X DO 250, I = II, MIN( II+VSS-1, M ) X DO 240, K = KK, MIN( KK+EFF2-1, JJ-1 ) X TEMP = B( I, K ) X DO 230, J = JJ, MIN( JJ+EFF2-1, N ) X TEMP = TEMP + A( K, J )* X + T1( I-II+1, J-JJ+1 ) X 230 CONTINUE X B( I, K ) = TEMP X 240 CONTINUE X 250 CONTINUE X 260 CONTINUE X END IF * * Computation using triangular diagonal-blocks of A. * X IF( NOUNIT )THEN X DO 290, K = JJ, MIN( JJ+EFF2-1, N ) C*VDIR: PREFER VECTOR X DO 280, I = II, MIN( II+VSS-1, M ) X TEMP = ZERO X DO 270, J = K, MIN( JJ+EFF2-1, N ) X TEMP = TEMP + A( K, J )*T1( I-II+1, J-JJ+1 ) X 270 CONTINUE X B( I, K ) = TEMP X 280 CONTINUE X 290 CONTINUE X ELSE X DO 320, K = JJ, MIN( JJ+EFF2-1, N ) C*VDIR: PREFER VECTOR X DO 310, I = II, MIN( II+VSS-1, M ) X TEMP = T1( I-II+1, K-JJ+1 ) X DO 300, J = K+1, MIN( JJ+EFF2-1, N ) X TEMP = TEMP + A( K, J )*T1( I-II+1, J-JJ+1 ) X 300 CONTINUE X B( I, K ) = TEMP X 310 CONTINUE X 320 CONTINUE X END IF X 330 CONTINUE X 340 CONTINUE X END IF * X RETURN * * End of UMM006 * X END @PROCESS DIRECTIVE('*VDIR:') X SUBROUTINE UMM007( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) * .. Scalar Arguments .. X LOGICAL NOUNIT X INTEGER M, N, LDA, LDB X DOUBLE PRECISION ALPHA * .. Array Arguments .. X DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * UMM007 performs the matrix operation X = alpha*B*A where X and B are * m by n matrices, A is a unit, or non-unit, non-transposed lower * triangular matrix. A appears on the right side of B. Alpha is scalar. * The matrix B is overwritten by X. * * Right, Lower, No transpose * * .. Local Scalars .. X INTEGER I, II, J, JJ, K X DOUBLE PRECISION TEMP * .. Intrinsic Functions .. X INTRINSIC MIN * .. Parameters .. X DOUBLE PRECISION ZERO X PARAMETER ( ZERO = 0.0D+0 ) X INTEGER VSS, EFF X PARAMETER ( VSS = 128, EFF = 48 ) * .. Local Array .. * VSS is the Vector Section Size. ( VSS*EFF ) double precision words * should correspond to the effective cache size. X DOUBLE PRECISION T1( VSS, EFF ), T2( VSS, EFF ) X COMMON /UCMN01/ T1, T2 * .. * .. Executable Statements .. * X DO 130, JJ = 1, N, EFF X DO 120, II = 1, M, VSS X DO 20, I = II, MIN( II+VSS-1, M ) X DO 10, J = JJ, MIN( JJ+EFF-1, N ) X T1( I-II+1, J-JJ+1 ) = ALPHA*B( I, J ) X 10 CONTINUE X 20 CONTINUE X IF( JJ.NE.1 )THEN * * Computation using rectangular blocks of A. * C*VDIR: PREFER VECTOR X DO 50, I = II, MIN( II+VSS-1, M ) X DO 40, K = 1, JJ-1 X TEMP = B( I, K ) X DO 30, J = JJ, MIN( JJ+EFF-1, N ) X TEMP = TEMP + A( J, K )*T1( I-II+1, J-JJ+1 ) X 30 CONTINUE X B( I, K ) = TEMP X 40 CONTINUE X 50 CONTINUE X END IF * * Computation using triangular diagonal-blocks of A. * X IF( NOUNIT )THEN X DO 80, K = JJ, MIN( JJ+EFF-1, N ) C*VDIR: PREFER VECTOR X DO 70, I = II, MIN( II+VSS-1, M ) X TEMP = ZERO X DO 60, J = K, MIN( JJ+EFF-1, N ) X TEMP = TEMP + A( J, K )*T1( I-II+1, J-JJ+1 ) X 60 CONTINUE X B( I, K ) = TEMP X 70 CONTINUE X 80 CONTINUE X ELSE X DO 110, K = JJ, MIN( JJ+EFF-1, N ) C*VDIR: PREFER VECTOR X DO 100, I = II, MIN( II+VSS-1, M ) X TEMP = T1( I-II+1, K-JJ+1 ) X DO 90, J = K+1, MIN( JJ+EFF-1, N ) X TEMP = TEMP + A( J, K )*T1( I-II+1, J-JJ+1 ) X 90 CONTINUE X B( I, K ) = TEMP X 100 CONTINUE X 110 CONTINUE X END IF X 120 CONTINUE X 130 CONTINUE * X RETURN * * End of UMM007 * X END @PROCESS DIRECTIVE('*VDIR:') X SUBROUTINE UMM008( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) * .. Scalar Arguments .. X LOGICAL NOUNIT X INTEGER M, N, LDA, LDB X DOUBLE PRECISION ALPHA * .. Array Arguments .. X DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * UMM008 performs the matrix operation X = alpha*B*A where X and B are * m by n matrices, A is a unit, or non-unit, transposed lower * triangular matrix. A appears on the right side of B. Alpha is scalar. * The matrix B is overwritten by X. * * Right, Lower, Transpose * * .. Local Scalars .. X INTEGER I, II, J, JJ, K, KK X DOUBLE PRECISION TEMP * .. External Functions .. X LOGICAL CLD X EXTERNAL CLD * .. Intrinsic Functions .. X INTRINSIC MAX * .. Parameters .. X DOUBLE PRECISION ONE, ZERO X PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) X INTEGER VSS, EFF, EFF2 X PARAMETER ( VSS = 128, EFF = 48, EFF2 = 32 ) * .. Local Arrays .. * VSS is the Vector Section Size. ( VSS*EFF ) double precision words * should correspond to the effective cache size. X DOUBLE PRECISION T1( VSS, EFF ), T2( VSS, EFF ) X COMMON /UCMN01/ T1, T2 * .. * .. Executable Statements .. * X IF( CLD( LDA ) )THEN X DO 200, JJ = N, 1, -EFF X DO 190, II = M, 1, -VSS X DO 20, I = MAX( II-VSS+1, 1 ), II X DO 10, J = MAX( JJ-EFF+1, 1 ), JJ X T1( I-II+VSS, J-JJ+EFF ) = ALPHA*B( I, J ) X 10 CONTINUE X 20 CONTINUE X IF( JJ.NE.N )THEN * * Computation using rectangular blocks of A. * X DO 80, KK = N, JJ, -EFF C*VDIR: PREFER VECTOR X DO 40, K = MAX( KK-EFF+1, JJ+1 ), KK X DO 30, J = MAX( JJ-EFF+1, 1 ), JJ X T2( J-JJ+EFF, K-KK+EFF ) = A( K, J ) X 30 CONTINUE X 40 CONTINUE C*VDIR: IGNORE RECRDEPS X DO 70, I = MAX( II-VSS+1, 1 ), II X DO 60, K = MAX( KK-EFF+1, JJ+1 ), KK X TEMP = B( I, K ) X DO 50, J = MAX( JJ-EFF+1, 1 ), JJ X TEMP = TEMP + T2( J-JJ+EFF, K-KK+EFF )* X + T1( I-II+VSS, J-JJ+EFF ) X 50 CONTINUE X B( I, K ) = TEMP X 60 CONTINUE X 70 CONTINUE X 80 CONTINUE X END IF * * Computation using triangular diagonal-blocks of A. * X IF( NOUNIT )THEN X DO 100, J = MAX( JJ-EFF+1, 1 ), JJ X DO 90, K = J, JJ X T2( J-JJ+EFF, K-JJ+EFF ) = A( K, J ) X 90 CONTINUE X 100 CONTINUE X DO 130, K = MAX( JJ-EFF+1, 1 ), JJ X DO 120, I = MAX( II-VSS+1, 1 ), II X TEMP = ZERO X DO 110, J = MAX( JJ-EFF+1, 1 ), K X TEMP = TEMP + T2( J-JJ+EFF, K-JJ+EFF )* X + T1( I-II+VSS, J-JJ+EFF ) X 110 CONTINUE X B( I, K ) = TEMP X 120 CONTINUE X 130 CONTINUE X ELSE X DO 150, J = MAX( JJ-EFF+1, 1 ), JJ-1 X DO 140, K = J+1, JJ X T2( J-JJ+EFF, K-JJ+EFF ) = A( K, J ) X 140 CONTINUE X 150 CONTINUE X DO 180, K = MAX( JJ-EFF+1, 1 ), JJ X DO 170, I = MAX( II-VSS+1, 1 ), II X TEMP = T1( I-II+VSS, K-JJ+EFF ) X DO 160, J = MAX( JJ-EFF+1, 1 ), K-1 X TEMP = TEMP + T2( J-JJ+EFF, K-JJ+EFF )* X + T1( I-II+VSS, J-JJ+EFF ) X 160 CONTINUE X B( I, K ) = TEMP X 170 CONTINUE X 180 CONTINUE X END IF X 190 CONTINUE X 200 CONTINUE X ELSE X DO 340, JJ = N, 1, -EFF2 X DO 330, II = M, 1, -VSS X DO 220, I = MAX( II-VSS+1, 1 ), II X DO 210, J = MAX( JJ-EFF2+1, 1 ), JJ X T1( I-II+VSS, J-JJ+EFF2 ) = ALPHA*B( I, J ) X 210 CONTINUE X 220 CONTINUE X IF( JJ.NE.N )THEN * * Computation using rectangular blocks of A. * X DO 260, KK = N, JJ, -EFF2 C*VDIR: IGNORE RECRDEPS X DO 250, I = MAX( II-VSS+1, 1 ), II X DO 240, K = MAX( KK-EFF2+1, JJ+1 ), KK X TEMP = B( I, K ) X DO 230, J = MAX( JJ-EFF2+1, 1 ), JJ X TEMP = TEMP + A( K, J )* X + T1( I-II+VSS, J-JJ+EFF2 ) X 230 CONTINUE X B( I, K ) = TEMP X 240 CONTINUE X 250 CONTINUE X 260 CONTINUE X END IF * * Computation using triangular diagonal-blocks of A. * X IF( NOUNIT )THEN X DO 290, K = MAX( JJ-EFF2+1, 1 ), JJ X DO 280, I = MAX( II-VSS+1, 1 ), II X TEMP = ZERO X DO 270, J = MAX( JJ-EFF2+1, 1 ), K X TEMP = TEMP + A( K, J )* X + T1( I-II+VSS, J-JJ+EFF2 ) X 270 CONTINUE X B( I, K ) = TEMP X 280 CONTINUE X 290 CONTINUE X ELSE X DO 320, K = MAX( JJ-EFF2+1, 1 ), JJ X DO 310, I = MAX( II-VSS+1, 1 ), II X TEMP = T1( I-II+VSS, K-JJ+EFF2 ) X DO 300, J = MAX( JJ-EFF2+1, 1 ), K-1 X TEMP = TEMP + A( K, J )* X + T1( I-II+VSS, J-JJ+EFF2 ) X 300 CONTINUE X B( I, K ) = TEMP X 310 CONTINUE X 320 CONTINUE X END IF X 330 CONTINUE X 340 CONTINUE X END IF * X RETURN * * End of UMM008 * X END SHAR_EOF chmod 0600 dtrmm.f || echo 'restore of dtrmm.f failed' Wc_c="`wc -c < 'dtrmm.f'`" test 43449 -eq "$Wc_c" || echo 'dtrmm.f: original size 43449, current size' "$Wc_c" fi # ============= dtrsm.f ============== if test -f 'dtrsm.f' -a X"$1" != X"-c"; then echo 'x - skipping dtrsm.f (File already exists)' else echo 'x - extracting dtrsm.f (Text)' sed 's/^X//' << 'SHAR_EOF' > 'dtrsm.f' && X SUBROUTINE DTRSM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, X + B, LDB ) * .. Scalar Arguments .. X CHARACTER*1 SIDE, UPLO, TRANSA, DIAG X INTEGER M, N, LDA, LDB X DOUBLE PRECISION ALPHA * .. Array Arguments .. X DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * DTRSM solves one of the matrix equations * * op( A )*X = alpha*B, or X*op( A ) = alpha*B, * * where alpha is a scalar, X and B are m by n matrices, A is a unit, or * non-unit, upper or lower triangular matrix and op( A ) is one of * * op( A ) = A or op( A ) = A'. * * The matrix X is overwritten on B. * * Parameters * ========== * * SIDE - CHARACTER*1. * On entry, SIDE specifies whether op( A ) appears on the left * or right of X as follows: * * SIDE = 'L' or 'l' op( A )*X = alpha*B. * * SIDE = 'R' or 'r' X*op( A ) = alpha*B. * * Unchanged on exit. * * UPLO - CHARACTER*1. * On entry, UPLO specifies whether the matrix A is an upper or * lower triangular matrix as follows: * * UPLO = 'U' or 'u' A is an upper triangular matrix. * * UPLO = 'L' or 'l' A is a lower triangular matrix. * * Unchanged on exit. * * TRANSA - CHARACTER*1. * On entry, TRANSA specifies the form of op( A ) to be used in * the matrix multiplication as follows: * * TRANSA = 'N' or 'n' op( A ) = A. * * TRANSA = 'T' or 't' op( A ) = A'. * * TRANSA = 'C' or 'c' op( A ) = A'. * * Unchanged on exit. * * DIAG - CHARACTER*1. * On entry, DIAG specifies whether or not A is unit triangular * as follows: * * DIAG = 'U' or 'u' A is assumed to be unit triangular. * * DIAG = 'N' or 'n' A is not assumed to be unit * triangular. * * Unchanged on exit. * * M - INTEGER. * On entry, M specifies the number of rows of B. M must be at * least zero. * Unchanged on exit. * * N - INTEGER. * On entry, N specifies the number of columns of B. N must be * at least zero. * Unchanged on exit. * * ALPHA - DOUBLE PRECISION. * On entry, ALPHA specifies the scalar alpha. When alpha is * zero then A is not referenced and B need not be set before * entry. * Unchanged on exit. * * A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m * when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. * Before entry with UPLO = 'U' or 'u', the leading k by k * upper triangular part of the array A must contain the upper * triangular matrix and the strictly lower triangular part of * A is not referenced. * Before entry with UPLO = 'L' or 'l', the leading k by k * lower triangular part of the array A must contain the lower * triangular matrix and the strictly upper triangular part of * A is not referenced. * Note that when DIAG = 'U' or 'u', the diagonal elements of * A are not referenced either, but are assumed to be unity. * Unchanged on exit. * * LDA - INTEGER. * On entry, LDA specifies the first dimension of A as declared * in the calling (sub) program. When SIDE = 'L' or 'l' then * LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' * then LDA must be at least max( 1, n ). * Unchanged on exit. * * B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). * Before entry, the leading m by n part of the array B must * contain the right-hand side matrix B, and on exit is * overwritten by the solution matrix X. * * LDB - INTEGER. * On entry, LDB specifies the first dimension of B as declared * in the calling (sub) program. LDB must be at least * max( 1, m ). * Unchanged on exit. * * * Level 3 Blas routine. * * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * -- Restructured, blocked and tuned for IBM 3090 VF in August-1989. * Per Ling, * Institute of Information Processing, University of Umea, Sweden. * V1.R0. * * * .. Local Scalars .. X INTEGER I, J, INFO, NROWA X LOGICAL LSIDE, UPPER, NOTR, NOUNIT * .. External Functions .. X LOGICAL LSAME X EXTERNAL LSAME * .. External Subroutines .. X EXTERNAL XERBLA X EXTERNAL USM001, USM002, USM003, USM004 X EXTERNAL USM005, USM006, USM007, USM008 * .. Parameters .. X DOUBLE PRECISION ZERO X PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Executable Statements .. * * Test the input parameters. * X LSIDE = LSAME( SIDE, 'L' ) X UPPER = LSAME( UPLO, 'U' ) X NOTR = LSAME( TRANSA, 'N' ) X NOUNIT = LSAME( DIAG, 'N' ) X IF( LSIDE )THEN X NROWA = M X ELSE X NROWA = N X END IF X INFO = 0 X IF( ( .NOT.LSIDE ).AND. X + ( .NOT.LSAME( SIDE , 'R' ) ) )THEN X INFO = 1 X ELSE IF( ( .NOT.UPPER ).AND. X + ( .NOT.LSAME( UPLO , 'L' ) ) )THEN X INFO = 2 X ELSE IF( ( .NOT.NOTR ).AND. X + ( .NOT.LSAME( TRANSA, 'T' ) ).AND. X + ( .NOT.LSAME( TRANSA, 'C' ) ) )THEN X INFO = 3 X ELSE IF( ( .NOT.NOUNIT ).AND. X + ( .NOT.LSAME( DIAG , 'U' ) ) )THEN X INFO = 4 X ELSE IF( M.LT.0 )THEN X INFO = 5 X ELSE IF( N.LT.0 )THEN X INFO = 6 X ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN X INFO = 9 X ELSE IF( LDB.LT.MAX( 1, M ) )THEN X INFO = 11 X END IF X IF( INFO.NE.0 )THEN X CALL XERBLA( 'DTRSM ', INFO ) X RETURN X END IF * * Quick return if possible. * X IF( ( M.EQ.0 ).OR.( N.EQ.0 ) ) X + RETURN * * And when alpha.eq.zero. * X IF( ALPHA.EQ.ZERO )THEN X DO 20, I = 1, M X DO 10, J = 1, N X B( I, J ) = ZERO X 10 CONTINUE X 20 CONTINUE X RETURN X END IF * * Start the operations. * X IF( LSIDE )THEN * * Solve op( A )*X = alpha*B. * X IF( UPPER )THEN X IF( NOTR )THEN * Left, Upper, No transpose X CALL USM001( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) X ELSE * Left, Upper, Transpose X CALL USM002( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) X END IF X ELSE X IF( NOTR )THEN * Left, Lower, No transpose X CALL USM003( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) X ELSE * Left, Lower, Transpose X CALL USM004( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) X END IF X END IF * X ELSE * * Solve X*op( A ) = alpha*B. * X IF( UPPER )THEN X IF( NOTR )THEN * Right, Upper, No transpose X CALL USM005( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) X ELSE * Right, Upper, Transpose X CALL USM006( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) X END IF X ELSE X IF( NOTR )THEN * Right, Lower, No transpose X CALL USM007( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) X ELSE * Right, Lower, Transpose X CALL USM008( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) X END IF X END IF X END IF * X RETURN * * End of DTRSM . * X END @PROCESS DIRECTIVE('*VDIR:') X SUBROUTINE USM001( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) * .. Scalar Arguments .. X LOGICAL NOUNIT X INTEGER M, N, LDA, LDB X DOUBLE PRECISION ALPHA * .. Array Arguments .. X DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * USM001 solves the matrix equation A*X = alpha*B where X and B are * m by n matrices, A appears on the left side of X, A is a unit, or * non-unit, non-transposed upper triangular matrix. Alpha is scalar. * The matrix B is overwritten by X. * * Left, Upper, No transpose * * .. Local Scalars .. X INTEGER I, II, J, JJ, P, PP X LOGICAL CLDB X DOUBLE PRECISION TEMP, IALPHA, IA * .. External Functions .. X LOGICAL CLD X EXTERNAL CLD * .. Intrinsic Functions .. X INTRINSIC MIN, MAX * .. Parameters .. X DOUBLE PRECISION ONE X PARAMETER ( ONE = 1.0D0 ) X INTEGER VSS, EFF X PARAMETER ( VSS = 128, EFF = 48 ) * .. Local Array .. * VSS is the Vector Section Size. ( VSS*EFF ) double precision words * should correspond to the effective cache size. X DOUBLE PRECISION T1( VSS, EFF ), T2( VSS, EFF ) X COMMON /UCMN02/ T1, T2 * .. * .. Executable Statements .. * X IALPHA = ONE/ALPHA X CLDB = CLD( LDB ) X DO 190, JJ = M, 1, -EFF * * Computation using triangular diagonal-blocks of A. * X DO 130, PP = 1, N, VSS X IF( CLDB )THEN C*VDIR: PREFER VECTOR X DO 20, J = MAX( JJ-EFF+1, 1 ), JJ X DO 10, P = PP, MIN( PP+VSS-1, N ) X T1( P-PP+1, J-JJ+EFF ) = ALPHA*B( J, P ) X 10 CONTINUE X 20 CONTINUE X ELSE C*VDIR: PREFER VECTOR X DO 40, P = PP, MIN( PP+VSS-1, N ) X DO 30, J = MAX( JJ-EFF+1, 1 ), JJ X T1( P-PP+1, J-JJ+EFF ) = ALPHA*B( J, P ) X 30 CONTINUE X 40 CONTINUE X END IF X IF( NOUNIT )THEN X DO 70, I = JJ, MAX( JJ-EFF+1, 1 ), -1 X IA = ONE/A( I, I ) X DO 60, P = PP, MIN( PP+VSS-1, N ) X TEMP = T1( P-PP+1, I-JJ+EFF ) X DO 50, J = I+1, JJ X TEMP = TEMP - A( I, J )*T1( P-PP+1, J-JJ+EFF ) X 50 CONTINUE X T1( P-PP+1, I-JJ+EFF ) = TEMP*IA X 60 CONTINUE X 70 CONTINUE X ELSE X DO 100, I = JJ-1, MAX( JJ-EFF+1, 1 ), -1 X DO 90, P = PP, MIN( PP+VSS-1, N ) X TEMP = T1( P-PP+1, I-JJ+EFF ) X DO 80, J = I+1, JJ X TEMP = TEMP - A( I, J )*T1( P-PP+1, J-JJ+EFF ) X 80 CONTINUE X T1( P-PP+1, I-JJ+EFF ) = TEMP X 90 CONTINUE X 100 CONTINUE X END IF X DO 120, J = MAX( JJ-EFF+1, 1 ), JJ X DO 110, P = PP, MIN( PP+VSS-1, N ) X B( J, P ) = T1( P-PP+1, J-JJ+EFF ) X 110 CONTINUE X 120 CONTINUE X 130 CONTINUE * * Computation using rectangular blocks of A. * X IF( JJ-EFF+1.GT.1 )THEN X DO 180, II = JJ-EFF, 1, -VSS C*VDIR: IGNORE RECRDEPS X DO 170, I = MAX( II-VSS+1, 1 ), II X DO 140, J = MAX( JJ-EFF+1, 1 ), JJ X T1( I-II+VSS, J-JJ+EFF ) = IALPHA*A( I, J ) X 140 CONTINUE X DO 160 P = 1, N X TEMP = B( I, P ) X DO 150 J = MAX( JJ-EFF+1, 1 ), JJ X TEMP = TEMP - B( J, P )*T1( I-II+VSS, J-JJ+EFF ) X 150 CONTINUE X B( I, P ) = TEMP X 160 CONTINUE X 170 CONTINUE X 180 CONTINUE X END IF X 190 CONTINUE * X RETURN * * End of USM001 * X END @PROCESS DIRECTIVE('*VDIR:') X SUBROUTINE USM002( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) * .. Scalar Arguments .. X LOGICAL NOUNIT X INTEGER M, N, LDA, LDB X DOUBLE PRECISION ALPHA * .. Array Arguments .. X DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * USM002 solves the matrix equation A*X = alpha*B where X and B are * m by n matrices, A appears on the left side of X, A is a unit, or * non-unit, transposed upper triangular matrix. Alpha is a scalar. * The matrix B is overwritten by X. * * Left, Upper, Transpose * * .. Local Scalars .. X INTEGER I, II, J, JJ, P, PP X LOGICAL CLDA, CLDB X DOUBLE PRECISION TEMP, IALPHA, IA * .. External Functions .. X LOGICAL CLD X EXTERNAL CLD * .. Intrinsic Functions .. X INTRINSIC MIN, MAX * .. Parameters .. X DOUBLE PRECISION ONE X PARAMETER ( ONE = 1.0D0 ) X INTEGER VSS, EFF X PARAMETER ( VSS = 128, EFF = 48 ) * .. Local Array .. * VSS is the Vector Section Size. ( VSS*EFF ) double precision words * should correspond to the effective cache size. X DOUBLE PRECISION T1( VSS, EFF ), T2( VSS, EFF ) X COMMON /UCMN02/ T1, T2 * .. * .. Executable Statements .. * X IALPHA = ONE/ALPHA X CLDA = CLD( LDA ) X CLDB = CLD( LDB ) X DO 220, JJ = 1, M, EFF * * Computation using triangular diagonal-blocks of A. * X DO 130, PP = 1, N, VSS X IF( CLDB )THEN C*VDIR: PREFER VECTOR X DO 20, J = JJ, MIN( JJ+EFF-1, M ) X DO 10, P = PP, MIN( PP+VSS-1, N ) X T1( P-PP+1, J-JJ+1 ) = ALPHA*B( J, P ) X 10 CONTINUE X 20 CONTINUE X ELSE X DO 40, P = PP, MIN( PP+VSS-1, N ) X DO 30, J = JJ, MIN( JJ+EFF-1, M ) X T1( P-PP+1, J-JJ+1 ) = ALPHA*B( J, P ) X 30 CONTINUE X 40 CONTINUE X END IF X IF( NOUNIT )THEN X DO 70, I = JJ, MIN( JJ+EFF-1, M ) X IA = ONE/A( I, I ) X DO 60, P = PP, MIN( PP+VSS-1, N ) X TEMP = T1( P-PP+1, I-JJ+1 ) X DO 50, J = JJ, I-1 X TEMP = TEMP - A( J, I )*T1( P-PP+1, J-JJ+1 ) X 50 CONTINUE X T1( P-PP+1, I-JJ+1 ) = TEMP*IA X 60 CONTINUE X 70 CONTINUE X ELSE X DO 100, I = JJ+1, MIN( JJ+EFF-1, M ) X DO 90, P = PP, MIN( PP+VSS-1, N ) X TEMP = T1( P-PP+1, I-JJ+1 ) X DO 80, J = JJ, I-1 X TEMP = TEMP - A( J, I )*T1( P-PP+1, J-JJ+1 ) X 80 CONTINUE X T1( P-PP+1, I-JJ+1 ) = TEMP X 90 CONTINUE X 100 CONTINUE X END IF X DO 120, J = JJ, MIN( JJ+EFF-1, M ) X DO 110, P = PP, MIN( PP+VSS-1, N ) X B( J, P ) = T1( P-PP+1, J-JJ+1 ) X 110 CONTINUE X 120 CONTINUE X 130 CONTINUE * * Computation using rectangular blocks of A. * X IF( JJ+EFF-1.LT.M )THEN X DO 210, II = JJ+EFF, M, VSS X IF( CLDA )THEN C*VDIR: PREFER VECTOR X DO 150, J = JJ, MIN( JJ+EFF-1, M ) X DO 140, I = II, MIN( II+VSS-1, M ) X T1( I-II+1, J-JJ+1 ) = IALPHA*A( J, I ) X 140 CONTINUE X 150 CONTINUE X ELSE X DO 170, I = II, MIN( II+VSS-1, M ) X DO 160, J = JJ, MIN( JJ+EFF-1, M ) X T1( I-II+1, J-JJ+1 ) = IALPHA*A( J, I ) X 160 CONTINUE X 170 CONTINUE X END IF C*VDIR: IGNORE RECRDEPS X DO 200, I = II, MIN( II+VSS-1, M ) X DO 190, P = 1, N X TEMP = B( I, P ) X DO 180, J = JJ, MIN( JJ+EFF-1, M ) X TEMP = TEMP - B( J, P )*T1( I-II+1, J-JJ+1 ) X 180 CONTINUE X B( I, P ) = TEMP X 190 CONTINUE X 200 CONTINUE X 210 CONTINUE X END IF X 220 CONTINUE * X RETURN * * End of USM002 * X END @PROCESS DIRECTIVE('*VDIR:') X SUBROUTINE USM003( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) * .. Scalar Arguments .. X LOGICAL NOUNIT X INTEGER M, N, LDA, LDB X DOUBLE PRECISION ALPHA * .. Array Arguments .. X DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * USM003 solves the matrix equation A*X = alpha*B where X and B are * m by n matrices, A appears on the left side of X, A is a unit, or * non-unit, non-transposed lower triangular matrix. Alpha is a scalar. * The matrix B is overwritten by X. * * Left, Lower, No transpose * * .. Local Scalars .. X INTEGER I, II, J, JJ, P, PP X LOGICAL CLDB X DOUBLE PRECISION TEMP, IALPHA, IA * .. External Functions .. X LOGICAL CLD X EXTERNAL CLD * .. Intrinsic Functions .. X INTRINSIC MIN, MAX * .. Parameters .. X DOUBLE PRECISION ONE X PARAMETER ( ONE = 1.0D0 ) X INTEGER VSS, EFF X PARAMETER ( VSS = 128, EFF = 48 ) * .. Local Array .. * VSS is the Vector Section Size. ( VSS*EFF ) double precision words * should correspond to the effective cache size. X DOUBLE PRECISION T1( VSS, EFF ), T2( VSS, EFF ) X COMMON /UCMN02/ T1, T2 * .. * .. Executable Statements .. * X IALPHA = ONE/ALPHA X CLDB = CLD( LDB ) X DO 190, JJ = 1, M, EFF * * Computation using triangular diagonal-blocks of A. * X DO 130, PP = 1, N, VSS X IF( CLDB )THEN C*VDIR: PREFER VECTOR X DO 20, J = JJ, MIN( JJ+EFF-1, M ) X DO 10, P = PP, MIN( PP+VSS-1, N ) X T1( P-PP+1, J-JJ+1 ) = ALPHA*B( J, P ) X 10 CONTINUE X 20 CONTINUE X ELSE X DO 40, P = PP, MIN( PP+VSS-1, N ) X DO 30, J = JJ, MIN( JJ+EFF-1, M ) X T1( P-PP+1, J-JJ+1 ) = ALPHA*B( J, P ) X 30 CONTINUE X 40 CONTINUE X END IF X IF( NOUNIT )THEN X DO 70, I = JJ, MIN( JJ+EFF-1, M ) X IA = ONE/A( I, I ) X DO 60, P = PP, MIN( PP+VSS-1, N ) X TEMP = T1( P-PP+1, I-JJ+1 ) X DO 50, J = JJ, I-1 X TEMP = TEMP - A( I, J )*T1( P-PP+1, J-JJ+1 ) X 50 CONTINUE X T1( P-PP+1, I-JJ+1 ) = TEMP*IA X 60 CONTINUE X 70 CONTINUE X ELSE X DO 100, I = JJ+1, MIN( JJ+EFF-1, M ) X DO 90, P = PP, MIN( PP+VSS-1, N ) X TEMP = T1( P-PP+1, I-JJ+1 ) X DO 80, J = JJ, I-1 X TEMP = TEMP - A( I, J )*T1( P-PP+1, J-JJ+1 ) X 80 CONTINUE X T1( P-PP+1, I-JJ+1 ) = TEMP X 90 CONTINUE X 100 CONTINUE X END IF X DO 120 J = JJ, MIN( JJ+EFF-1, M ) X DO 110 P = PP, MIN( PP+VSS-1, N ) X B( J, P ) = T1( P-PP+1, J-JJ+1 ) X 110 CONTINUE X 120 CONTINUE X 130 CONTINUE * * Computation using rectangular blocks of A. * X IF( JJ+EFF-1.LT.M )THEN X DO 180, II = JJ+EFF, M, VSS C*VDIR: IGNORE RECRDEPS X DO 170, I = II, MIN( II+VSS-1, M ) X DO 140, J = JJ, MIN( JJ+EFF-1, M ) X T1( I-II+1, J-JJ+1 ) = IALPHA*A( I, J ) X 140 CONTINUE X DO 160, P = 1, N X TEMP = B( I, P ) X DO 150, J = JJ, MIN( JJ+EFF-1, M ) X TEMP = TEMP - B( J, P )*T1( I-II+1, J-JJ+1 ) X 150 CONTINUE X B( I, P ) = TEMP X 160 CONTINUE X 170 CONTINUE X 180 CONTINUE X END IF X 190 CONTINUE * X RETURN * * End of USM003 * X END @PROCESS DIRECTIVE('*VDIR:') X SUBROUTINE USM004( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) * .. Scalar Arguments .. X LOGICAL NOUNIT X INTEGER M, N, LDA, LDB X DOUBLE PRECISION ALPHA * .. Array Arguments .. X DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * USM004 solves the matrix equation A*X = alpha*B where X and B are * m by n matrices, A appears on the left side of X, A is a unit, or * non-unit, transposed lower triangular matrix. Alpha is a scalar. * The matrix B is overwritten by X. * * Left, Lower, Transpose * * .. Local Scalars .. X INTEGER I, II, J, JJ, P, PP X LOGICAL CLDA, CLDB X DOUBLE PRECISION TEMP, IALPHA, IA * .. External Functions .. X LOGICAL CLD X EXTERNAL CLD * .. Intrinsic Functions .. X INTRINSIC MIN, MAX * .. Parameters .. X DOUBLE PRECISION ONE X PARAMETER ( ONE = 1.0D0 ) X INTEGER VSS, EFF X PARAMETER ( VSS = 128, EFF = 48 ) * .. Local Array .. * VSS is the Vector Section Size. ( VSS*EFF ) double precision words * should correspond to the effective cache size. X DOUBLE PRECISION T1( VSS, EFF ), T2( VSS, EFF ) X COMMON /UCMN02/ T1, T2 * .. * .. Executable Statements .. * X IALPHA = ONE/ALPHA X CLDA = CLD( LDA ) X CLDB = CLD( LDB ) X DO 230, JJ = M, 1, -EFF * * Computation using triangular diagonal-blocks of A. * X DO 130, PP = 1, N, VSS X IF( CLDB )THEN C*VDIR: PREFER VECTOR X DO 20, J = MAX( JJ-EFF+1, 1 ), JJ X DO 10, P = PP, MIN( PP+VSS-1, N ) X T1( P-PP+1, J-JJ+EFF ) = ALPHA*B( J, P ) X 10 CONTINUE X 20 CONTINUE X ELSE C*VDIR: PREFER VECTOR X DO 40, P = PP, MIN( PP+VSS-1, N ) X DO 30, J = MAX( JJ-EFF+1, 1 ), JJ X T1( P-PP+1, J-JJ+EFF ) = ALPHA*B( J, P ) X 30 CONTINUE X 40 CONTINUE X END IF X IF( NOUNIT )THEN X DO 70, I = JJ, MAX( JJ-EFF+1, 1 ), -1 X IA = ONE/A( I, I ) X DO 60, P = PP, MIN( PP+VSS-1, N ) X TEMP = T1( P-PP+1, I-JJ+EFF ) X DO 50, J = I+1, JJ X TEMP = TEMP - A( J, I )*T1( P-PP+1, J-JJ+EFF ) X 50 CONTINUE X T1( P-PP+1, I-JJ+EFF ) = TEMP*IA X 60 CONTINUE X 70 CONTINUE X ELSE X DO 100, I = JJ-1, MAX( JJ-EFF+1, 1 ), -1 X DO 90, P = PP, MIN( PP+VSS-1, N ) X TEMP = T1( P-PP+1, I-JJ+EFF ) X DO 80, J = I+1, JJ X TEMP = TEMP - A( J, I )*T1( P-PP+1, J-JJ+EFF ) X 80 CONTINUE X T1( P-PP+1, I-JJ+EFF ) = TEMP X 90 CONTINUE X 100 CONTINUE X END IF X DO 120, J = MAX( JJ-EFF+1, 1 ), JJ X DO 110, P = PP, MIN( PP+VSS-1, N ) X B( J, P ) = T1( P-PP+1, J-JJ+EFF ) X 110 CONTINUE X 120 CONTINUE X 130 CONTINUE * * Computation using rectangular blocks of A. * X IF( JJ-EFF+1.GT.1 )THEN X DO 220, II = JJ-EFF, 1, -VSS X IF( CLDA )THEN C*VDIR: PREFER VECTOR X DO 160, J = MAX( JJ-EFF+1, 1 ), JJ X DO 150, I = MAX( II-VSS+1, 1 ), II X T1( I-II+VSS, J-JJ+EFF ) = IALPHA*A( J, I ) X 150 CONTINUE X 160 CONTINUE X ELSE X DO 180, I = MAX( II-VSS+1, 1 ), II X DO 170, J = MAX( JJ-EFF+1, 1 ), JJ X T1( I-II+VSS, J-JJ+EFF ) = IALPHA*A( J, I ) X 170 CONTINUE X 180 CONTINUE X END IF C*VDIR: IGNORE RECRDEPS X DO 210, I = MAX( II-VSS+1, 1 ), II X DO 200, P = 1, N X TEMP = B( I, P ) X DO 190, J = MAX( JJ-EFF+1, 1 ), JJ X TEMP = TEMP - B( J, P )*T1( I-II+VSS, J-JJ+EFF ) X 190 CONTINUE X B( I, P ) = TEMP X 200 CONTINUE X 210 CONTINUE X 220 CONTINUE X END IF X 230 CONTINUE * X RETURN * * End of USM004 * X END @PROCESS DIRECTIVE('*VDIR:') X SUBROUTINE USM005( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) * .. Scalar Arguments .. X LOGICAL NOUNIT X INTEGER M, N, LDA, LDB X DOUBLE PRECISION ALPHA * .. Array Arguments .. X DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * USM005 solves the matrix equation X*A = alpha*B where X and B are * m by n matrices, A appears on the right side of X, A is a unit, or * non-unit, non-transposed upper triangular matrix. Alpha is a scalar. * The matrix B is overwritten by X. * * Right, Upper, No transpose * * .. Local Scalars .. X INTEGER I, II, J, JJ, P, PP X DOUBLE PRECISION TEMP, IA * .. Intrinsic Functions .. X INTRINSIC MIN, MAX * .. Parameters .. X DOUBLE PRECISION ONE X PARAMETER ( ONE = 1.0D0 ) X INTEGER VSS, EFF X PARAMETER ( VSS = 128, EFF = 48 ) * .. Local Array .. * VSS is the Vector Section Size. ( VSS*EFF ) double precision words * should correspond to the effective cache size. X DOUBLE PRECISION T1( VSS, EFF ), T2( VSS, EFF ) X COMMON /UCMN02/ T1, T2 * .. * .. Executable Statements .. * X DO 160, JJ = 1, N, EFF X DO 150, II = 1, M, VSS X DO 20, I = II, MIN( II+VSS-1, M ) X DO 10, J = JJ, MIN( JJ+EFF-1, N ) X T1( I-II+1, J-JJ+1 ) = B( I, J ) X 10 CONTINUE X 20 CONTINUE X DO 120, PP = JJ, N, EFF * * Computation using triangular diagonal-blocks of A. * X IF( NOUNIT .AND. PP.EQ.JJ )THEN X DO 50, P = PP, MIN( PP+EFF-1, N ) X IA = ONE/A( P, P ) X DO 40, I = II, MIN( II+VSS-1, M ) X TEMP = T1( I-II+1, P-PP+1 ) X DO 30, J = JJ, P-1 X TEMP = TEMP - A( J, P )*T1( I-II+1, J-JJ+1 ) X 30 CONTINUE X T1( I-II+1, P-PP+1 ) = TEMP*IA X 40 CONTINUE X 50 CONTINUE X ELSE IF( PP.EQ.JJ )THEN X DO 80, P = PP+1, MIN( PP+EFF-1, N ) X DO 70, I = II, MIN( II+VSS-1, M ) X TEMP = T1( I-II+1, P-PP+1 ) X DO 60, J = JJ, P-1 X TEMP = TEMP - A( J, P )*T1( I-II+1, J-JJ+1 ) X 60 CONTINUE X T1( I-II+1, P-PP+1 ) = TEMP X 70 CONTINUE X 80 CONTINUE X ELSE * * Computation using rectangular blocks of A. * C*VDIR: IGNORE RECRDEPS X DO 110, I = II, MIN( II+VSS-1, M ) X DO 100, P = PP, MIN( PP+EFF-1, N ) X TEMP = B( I, P ) X DO 90, J = JJ, MIN( JJ+EFF-1, N ) X TEMP = TEMP - A( J, P )*T1( I-II+1, J-JJ+1 ) X 90 CONTINUE X B( I, P ) = TEMP X 100 CONTINUE X 110 CONTINUE X ENDIF X 120 CONTINUE X DO 140, I = II, MIN( II+VSS-1, M ) X DO 130, J = JJ, MIN( JJ+EFF-1, N ) X B( I, J ) = ALPHA*T1( I-II+1, J-JJ+1 ) X 130 CONTINUE X 140 CONTINUE X 150 CONTINUE X 160 CONTINUE * X RETURN * * End of USM005 * X END @PROCESS DIRECTIVE('*VDIR:') X SUBROUTINE USM006( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) * .. Scalar Arguments .. X LOGICAL NOUNIT X INTEGER M, N, LDA, LDB X DOUBLE PRECISION ALPHA * .. Array Arguments .. X DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * USM006 solves the matrix equation X*A = alpha*B where X and B are * m by n matrices, A appears on the right side of X, A is a unit, or * non-unit, transposed upper triangular matrix. Alpha is a scalar. * The matrix B is overwritten by X. * * Right, Upper, Transpose * * .. Local Scalars .. X INTEGER I, II, J, JJ, P, PP X DOUBLE PRECISION TEMP, IA * .. External Functions .. X LOGICAL CLD X EXTERNAL CLD * .. Intrinsic Functions .. X INTRINSIC MIN, MAX * .. Parameters .. X DOUBLE PRECISION ONE X PARAMETER ( ONE = 1.0D0 ) X INTEGER VSS, EFF, EFF2 X PARAMETER ( VSS = 128, EFF = 48, EFF2 = 32 ) * .. Local Array .. * VSS is the Vector Section Size. ( VSS*EFF ) double precision words * should correspond to the effective cache size. X DOUBLE PRECISION T1( VSS, EFF ), T2( VSS, EFF ) X COMMON /UCMN02/ T1, T2 * .. * .. Executable Statements .. * X IF( CLD( LDA ) )THEN X DO 220, JJ = N, 1, -EFF X DO 210, II = 1, M, VSS X DO 20, I = II, MIN( II+VSS-1, M ) X DO 10, J = MAX( JJ-EFF+1, 1 ), JJ X T1( I-II+1, J-JJ+EFF ) = B( I, J ) X 10 CONTINUE X 20 CONTINUE X DO 180, PP = JJ, 1, -EFF X IF( NOUNIT .AND. PP.EQ.JJ )THEN * * Computation using triangular diagonal-blocks of A. * X DO 40, J = MAX( JJ-EFF+1, 1 ), JJ C*VDIR: PREFER VECTOR X DO 30, P = MAX( PP-EFF+1, 1 ), J X T2( J-JJ+EFF, P-PP+EFF ) = A( P, J ) X 30 CONTINUE X 40 CONTINUE X DO 70, P = PP, MAX( PP-EFF+1, 1 ), -1 X IA = ONE/T2( P-PP+EFF, P-PP+EFF ) X DO 60, I = II, MIN( II+VSS-1, M ) X TEMP = T1( I-II+1, P-PP+EFF ) X DO 50, J = P+1, JJ X TEMP = TEMP - T2( J-JJ+EFF, P-PP+EFF )* X + T1( I-II+1, J-JJ+EFF ) X 50 CONTINUE X T1( I-II+1, P-PP+EFF ) = TEMP*IA X 60 CONTINUE X 70 CONTINUE X ELSE IF( PP.EQ.JJ )THEN X DO 90, J = MAX( JJ-EFF+2, 2 ), JJ C*VDIR: PREFER VECTOR X DO 80, P = MAX( PP-EFF+1, 1 ), J-1 X T2( J-JJ+EFF, P-PP+EFF ) = A( P, J ) X 80 CONTINUE X 90 CONTINUE X DO 120, P = PP-1, MAX( PP-EFF+1, 1 ), -1 X DO 110, I = II, MIN( II+VSS-1, M ) X TEMP = T1( I-II+1, P-PP+EFF ) X DO 100, J = P+1, JJ X TEMP = TEMP - T2( J-JJ+EFF, P-PP+EFF )* X + T1( I-II+1, J-JJ+EFF ) X 100 CONTINUE X T1( I-II+1, P-PP+EFF ) = TEMP X 110 CONTINUE X 120 CONTINUE X ELSE * * Computation using rectangular blocks of A. * C*VDIR: PREFER VECTOR X DO 140, P = MAX( PP-EFF+1, 1 ), PP X DO 130, J = MAX( JJ-EFF+1, 1 ), JJ X T2( J-JJ+EFF, P-PP+EFF ) = A( P, J ) X 130 CONTINUE X 140 CONTINUE X DO 170, I = II, MIN( II+VSS-1, M ) X DO 160, P = MAX( PP-EFF+1, 1 ), PP X TEMP = B( I, P ) X DO 150, J = MAX( JJ-EFF+1, 1 ), JJ X TEMP = TEMP - T2( J-JJ+EFF, P-PP+EFF )* X + T1( I-II+1, J-JJ+EFF ) X 150 CONTINUE X B( I, P ) = TEMP X 160 CONTINUE X 170 CONTINUE X ENDIF X 180 CONTINUE X DO 200, I = II, MIN( II+VSS-1, M ) X DO 190, J = MAX( JJ-EFF+1, 1 ), JJ X B( I, J ) = ALPHA*T1( I-II+1, J-JJ+EFF ) X 190 CONTINUE X 200 CONTINUE X 210 CONTINUE X 220 CONTINUE X ELSE X DO 380, JJ = N, 1, -EFF2 X DO 370, II = 1, M, VSS X DO 240, I = II, MIN( II+VSS-1, M ) X DO 230, J = MAX( JJ-EFF2+1, 1 ), JJ X T1( I-II+1, J-JJ+EFF2 ) = B( I, J ) X 230 CONTINUE X 240 CONTINUE X DO 340, PP = JJ, 1, -EFF2 X IF( NOUNIT .AND. PP.EQ.JJ )THEN * * Computation using triangular diagonal-blocks of A. * X DO 270, P = PP, MAX( PP-EFF2+1, 1 ), -1 X IA = ONE/A( P, P ) X DO 260, I = II, MIN( II+VSS-1, M ) X TEMP = T1( I-II+1, P-PP+EFF2 ) X DO 250, J = P+1, JJ X TEMP = TEMP - A( P, J )* X + T1( I-II+1, J-JJ+EFF2 ) X 250 CONTINUE X T1( I-II+1, P-PP+EFF2 ) = TEMP*IA X 260 CONTINUE X 270 CONTINUE X ELSE IF( PP.EQ.JJ )THEN X DO 300, P = PP-1, MAX( PP-EFF2+1, 1 ), -1 X DO 290, I = II, MIN( II+VSS-1, M ) X TEMP = T1( I-II+1, P-PP+EFF2 ) X DO 280, J = P+1, JJ X TEMP = TEMP - A( P, J )* X + T1( I-II+1, J-JJ+EFF2 ) X 280 CONTINUE X T1( I-II+1, P-PP+EFF2 ) = TEMP X 290 CONTINUE X 300 CONTINUE X ELSE * * Computation using rectangular blocks of A. * X DO 330, I = II, MIN( II+VSS-1, M ) X DO 320, P = MAX( PP-EFF2+1, 1 ), PP X TEMP = B( I, P ) X DO 310, J = MAX( JJ-EFF2+1, 1 ), JJ X TEMP = TEMP - A( P, J )* X + T1( I-II+1, J-JJ+EFF2 ) X 310 CONTINUE X B( I, P ) = TEMP X 320 CONTINUE X 330 CONTINUE X ENDIF X 340 CONTINUE X DO 360, I = II, MIN( II+VSS-1, M ) X DO 350, J = MAX( JJ-EFF2+1, 1 ), JJ X B( I, J ) = ALPHA*T1( I-II+1, J-JJ+EFF2 ) X 350 CONTINUE X 360 CONTINUE X 370 CONTINUE X 380 CONTINUE X END IF * X RETURN * * End of USM006 * X END @PROCESS DIRECTIVE('*VDIR:') X SUBROUTINE USM007( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) * .. Scalar Arguments .. X LOGICAL NOUNIT X INTEGER M, N, LDA, LDB X DOUBLE PRECISION ALPHA * .. Array Arguments .. X DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * USM007 solves the matrix equation X*A = alpha*B where X and B are * m by n matrices, A appears on the right side of X, A is a unit, or * non-unit, non-transposed lower triangular matrix. Alpha is a scalar. * The matrix B is overwritten by X. * * Right, Lower, No transpose * * .. Local Scalars .. X INTEGER I, II, J, JJ, P, PP X DOUBLE PRECISION TEMP, IA * .. Intrinsic Functions .. X INTRINSIC MIN, MAX * .. Parameters .. X DOUBLE PRECISION ONE X PARAMETER ( ONE = 1.0D0 ) X INTEGER VSS, EFF X PARAMETER ( VSS = 128, EFF = 48 ) * .. Local Array .. * VSS is the Vector Section Size. ( VSS*EFF ) double precision words * should correspond to the effective cache size. X DOUBLE PRECISION T1( VSS, EFF ), T2( VSS, EFF ) X COMMON /UCMN02/ T1, T2 * .. * .. Executable Statements .. * X DO 160, JJ = N, 1, -EFF X DO 150, II = 1, M, VSS X DO 20, I = II, MIN( II+VSS-1, M ) X DO 10, J = MAX( JJ-EFF+1, 1 ), JJ X T1( I-II+1, J-JJ+EFF ) = B( I, J ) X 10 CONTINUE X 20 CONTINUE X DO 120, PP = JJ, 1, -EFF X IF( NOUNIT .AND. PP.EQ.JJ )THEN * * Computation using triangular diagonal-blocks of A. * X DO 50, P = PP, MAX( PP-EFF+1, 1 ), -1 X IA = ONE/A( P, P ) X DO 40, I = II, MIN( II+VSS-1, M ) X TEMP = T1( I-II+1, P-PP+EFF ) X DO 30, J = P+1, JJ X TEMP = TEMP - A( J, P )* X + T1( I-II+1, J-JJ+EFF ) X 30 CONTINUE X T1( I-II+1, P-PP+EFF ) = TEMP*IA X 40 CONTINUE X 50 CONTINUE X ELSE IF( PP.EQ.JJ )THEN X DO 80, P = PP-1, MAX( PP-EFF+1, 1 ), -1 X DO 70, I = II, MIN( II+VSS-1, M ) X TEMP = T1( I-II+1, P-PP+EFF ) X DO 60, J = P+1, JJ X TEMP = TEMP - A( J, P )* X + T1( I-II+1, J-JJ+EFF ) X 60 CONTINUE X T1( I-II+1, P-PP+EFF ) = TEMP X 70 CONTINUE X 80 CONTINUE X ELSE * * Computation using rectangular blocks of A. * X DO 110, I = II, MIN( II+VSS-1, M ) X DO 100, P = MAX( PP-EFF+1, 1 ), PP X TEMP = B( I, P ) X DO 90, J = MAX( JJ-EFF+1, 1 ), JJ X TEMP = TEMP - A( J, P )* X + T1( I-II+1, J-JJ+EFF ) X 90 CONTINUE X B( I, P ) = TEMP X 100 CONTINUE X 110 CONTINUE X ENDIF X 120 CONTINUE X DO 140, I = II, MIN( II+VSS-1, M ) X DO 130, J = MAX( JJ-EFF+1, 1 ), JJ X B( I, J ) = ALPHA*T1( I-II+1, J-JJ+EFF ) X 130 CONTINUE X 140 CONTINUE X 150 CONTINUE X 160 CONTINUE * X RETURN * * End of USM007 * X END @PROCESS DIRECTIVE('*VDIR:') X SUBROUTINE USM008( NOUNIT, M, N, ALPHA, A, LDA, B, LDB ) * .. Scalar Arguments .. X LOGICAL NOUNIT X INTEGER M, N, LDA, LDB X DOUBLE PRECISION ALPHA * .. Array Arguments .. X DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * USM008 solves the matrix equation X*A = alpha*B where X and B are * m by n matrices, A appears on the right side of X, A is a unit, or * non-unit, transposed lower triangular matrix. Alpha is a scalar. * The matrix B is overwritten by X. * * Right, Lower, Transpose * * .. Local Scalars .. X INTEGER I, II, J, JJ, P, PP X DOUBLE PRECISION TEMP, IA * .. External Functions .. X LOGICAL CLD X EXTERNAL CLD * .. Intrinsic Functions .. X INTRINSIC MIN, MAX * .. Parameters .. X DOUBLE PRECISION ONE X PARAMETER ( ONE = 1.0D0 ) X INTEGER VSS, EFF, EFF2 X PARAMETER ( VSS = 128, EFF = 48, EFF2 = 32 ) * .. Local Array .. * VSS is the Vector Section Size. ( VSS*EFF ) double precision words * should correspond to the effective cache size. X DOUBLE PRECISION T1( VSS, EFF ), T2( VSS, EFF ) X COMMON /UCMN02/ T1, T2 * .. * .. Executable Statements .. * X IF( CLD( LDA ) )THEN X DO 220, JJ = 1, N, EFF X DO 210, II = 1, M, VSS X DO 20, I = II, MIN( II+VSS-1, M ) X DO 10, J = JJ, MIN( JJ+EFF-1, N ) X T1( I-II+1, J-JJ+1 ) = B( I, J ) X 10 CONTINUE X 20 CONTINUE X DO 180, PP = JJ, N, EFF X IF( NOUNIT .AND. PP.EQ.JJ )THEN * * Computation using triangular diagonal-blocks of A. * X DO 40, J = JJ, MIN( JJ+EFF-1, N ) C*VDIR: PREFER VECTOR X DO 30, P = J, MIN( PP+EFF-1, N ) X T2( J-JJ+1, P-PP+1 ) = A( P, J ) X 30 CONTINUE X 40 CONTINUE X DO 70, P = PP, MIN( PP+EFF-1, N ) X IA = ONE/T2( P-PP+1, P-PP+1 ) X DO 60, I = II, MIN( II+VSS-1, M ) X TEMP = T1( I-II+1, P-PP+1 ) X DO 50, J = JJ, P-1 X TEMP = TEMP - T2( J-JJ+1, P-PP+1 )* X + T1( I-II+1, J-JJ+1 ) X 50 CONTINUE X T1( I-II+1, P-PP+1 ) = TEMP*IA X 60 CONTINUE X 70 CONTINUE X ELSE IF( PP.EQ.JJ )THEN X DO 90, J = JJ, MIN( JJ+EFF-2, N-1 ) C*VDIR: PREFER VECTOR X DO 80, P = J+1, MIN( PP+EFF-1, N ) X T2( J-JJ+1, P-PP+1 ) = A( P, J ) X 80 CONTINUE X 90 CONTINUE X DO 120, P = PP+1, MIN( PP+EFF-1, N ) X DO 110, I = II, MIN( II+VSS-1, M ) X TEMP = T1( I-II+1, P-PP+1 ) X DO 100, J = JJ, P-1 X TEMP = TEMP - T2( J-JJ+1, P-PP+1 )* X + T1( I-II+1, J-JJ+1 ) X 100 CONTINUE X T1( I-II+1, P-PP+1 ) = TEMP X 110 CONTINUE X 120 CONTINUE X ELSE * * Computation using rectangular blocks of A. * C*VDIR: PREFER VECTOR X DO 140, P = PP, MIN( PP+EFF-1, N ) X DO 130, J = JJ, MIN( JJ+EFF-1, N ) X T2( J-JJ+1, P-PP+1 ) = A( P, J ) X 130 CONTINUE X 140 CONTINUE X DO 170, I = II, MIN( II+VSS-1, M ) X DO 160, P = PP, MIN( PP+EFF-1, N ) X TEMP = B( I, P ) X DO 150, J = JJ, MIN( JJ+EFF-1, N ) X TEMP = TEMP - T2( J-JJ+1, P-PP+1 )* X + T1( I-II+1, J-JJ+1 ) X 150 CONTINUE X B( I, P ) = TEMP X 160 CONTINUE X 170 CONTINUE X ENDIF X 180 CONTINUE X DO 200, I = II, MIN( II+VSS-1, M ) X DO 190, J = JJ, MIN( JJ+EFF-1, N ) X B( I, J ) = ALPHA*T1( I-II+1, J-JJ+1 ) X 190 CONTINUE X 200 CONTINUE X 210 CONTINUE X 220 CONTINUE X ELSE X DO 380, JJ = 1, N, EFF2 X DO 370, II = 1, M, VSS X DO 240, I = II, MIN( II+VSS-1, M ) X DO 230, J = JJ, MIN( JJ+EFF2-1, N ) X T1( I-II+1, J-JJ+1 ) = B( I, J ) X 230 CONTINUE X 240 CONTINUE X DO 340, PP = JJ, N, EFF2 X IF( NOUNIT .AND. PP.EQ.JJ )THEN * * Computation using triangular diagonal-blocks of A. * X DO 270, P = PP, MIN( PP+EFF2-1, N ) X IA = ONE/A( P, P ) X DO 260, I = II, MIN( II+VSS-1, M ) X TEMP = T1( I-II+1, P-PP+1 ) X DO 250, J = JJ, P-1 X TEMP = TEMP - A( P, J )* X + T1( I-II+1, J-JJ+1 ) X 250 CONTINUE X T1( I-II+1, P-PP+1 ) = TEMP*IA X 260 CONTINUE X 270 CONTINUE X ELSE IF( PP.EQ.JJ )THEN X DO 300, P = PP+1, MIN( PP+EFF2-1, N ) X DO 290, I = II, MIN( II+VSS-1, M ) X TEMP = T1( I-II+1, P-PP+1 ) X DO 280, J = JJ, P-1 X TEMP = TEMP - A( P, J )* X + T1( I-II+1, J-JJ+1 ) X 280 CONTINUE X T1( I-II+1, P-PP+1 ) = TEMP X 290 CONTINUE X 300 CONTINUE X ELSE * * Computation using rectangular blocks of A. * X DO 330, I = II, MIN( II+VSS-1, M ) X DO 320, P = PP, MIN( PP+EFF2-1, N ) X TEMP = B( I, P ) X DO 310, J = JJ, MIN( JJ+EFF2-1, N ) X TEMP = TEMP - A( P, J )* X + T1( I-II+1, J-JJ+1 ) X 310 CONTINUE X B( I, P ) = TEMP X 320 CONTINUE X 330 CONTINUE X ENDIF X 340 CONTINUE X DO 360, I = II, MIN( II+VSS-1, M ) X DO 350, J = JJ, MIN( JJ+EFF2-1, N ) X B( I, J ) = ALPHA*T1( I-II+1, J-JJ+1 ) X 350 CONTINUE X 360 CONTINUE X 370 CONTINUE X 380 CONTINUE X END IF * X RETURN * * End of USM008 * X END SHAR_EOF chmod 0600 dtrsm.f || echo 'restore of dtrsm.f failed' Wc_c="`wc -c < 'dtrsm.f'`" test 45939 -eq "$Wc_c" || echo 'dtrsm.f: original size 45939, current size' "$Wc_c" fi # ============= lsame.f ============== if test -f 'lsame.f' -a X"$1" != X"-c"; then echo 'x - skipping lsame.f (File already exists)' else echo 'x - extracting lsame.f (Text)' sed 's/^X//' << 'SHAR_EOF' > 'lsame.f' && X LOGICAL FUNCTION LSAME( CA, CB ) * * -- LAPACK auxiliary routine (preliminary version) -- * Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, * Courant Institute, NAG Ltd., and Rice University * March 26, 1990 * * .. Scalar Arguments .. X CHARACTER CA, CB * .. * * Purpose * ======= * * LSAME returns .TRUE. if CA is the same letter as CB regardless of * case. * * This version of the routine is only correct for ASCII code. * Installers must modify the routine for other character-codes. * * For EBCDIC systems the constant IOFF must be changed to -64. * For CDC systems using 6-12 bit representations, the system- * specific code in comments must be activated. * **** Modified for EBCDIC, IOFF = -64 **** * * Arguments * ========= * * CA (input) CHARACTER*1 * CB (input) CHARACTER*1 * CA and CB specify the single characters to be compared. * * * .. Parameters .. X INTEGER IOFF X PARAMETER ( IOFF = -64 ) * .. * .. Intrinsic Functions .. X INTRINSIC ICHAR * .. * .. Executable Statements .. * * Test if the characters are equal * X LSAME = CA.EQ.CB * * Now test for equivalence * X IF( .NOT.LSAME ) THEN X LSAME = ICHAR( CA ) - IOFF.EQ.ICHAR( CB ) X END IF X IF( .NOT.LSAME ) THEN X LSAME = ICHAR( CA ).EQ.ICHAR( CB ) - IOFF X END IF * X RETURN * * The following comments contain code for CDC systems using 6-12 bit * representations. * * .. Parameters .. * INTEGER ICIRFX * PARAMETER ( ICIRFX=62 ) * .. Scalar arguments .. * CHARACTER*1 CB * .. Array arguments .. * CHARACTER*1 CA(*) * .. Local scalars .. * INTEGER IVAL * .. Intrinsic functions .. * INTRINSIC ICHAR, CHAR * .. Executable statements .. * * See if the first character in string CA equals string CB. * * * LSAME = CA(1) .EQ. CB .AND. CA(1) .NE. CHAR(ICIRFX) * * IF (LSAME) RETURN * * The characters are not identical. Now check them for equivalence. * Look for the 'escape' character, circumflex, followed by the * letter. * * IVAL = ICHAR(CA(2)) * IF (IVAL.GE.ICHAR('A') .AND. IVAL.LE.ICHAR('Z')) THEN * LSAME = CA(1) .EQ. CHAR(ICIRFX) .AND. CA(2) .EQ. CB * END IF * * RETURN * * End of LSAME * X END SHAR_EOF chmod 0600 lsame.f || echo 'restore of lsame.f failed' Wc_c="`wc -c < 'lsame.f'`" test 2431 -eq "$Wc_c" || echo 'lsame.f: original size 2431, current size' "$Wc_c" fi # ============= xerbla.f ============== if test -f 'xerbla.f' -a X"$1" != X"-c"; then echo 'x - skipping xerbla.f (File already exists)' else echo 'x - extracting xerbla.f (Text)' sed 's/^X//' << 'SHAR_EOF' > 'xerbla.f' && X SUBROUTINE XERBLA( SRNAME, INFO ) * * -- LAPACK auxiliary routine (preliminary version) -- * Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, * Courant Institute, NAG Ltd., and Rice University * March 26, 1990 * * .. Scalar Arguments .. X CHARACTER*6 SRNAME X INTEGER INFO * .. * * Purpose * ======= * * XERBLA is an error handler for the LAPACK routines. * It is called by an LAPACK routine if an input parameter has an * invalid value. A message is printed and execution stops. * * Installers may consider modifying the STOP statement in order to * call system-specific exception-handling facilities. * * Arguments * ========= * * SRNAME (input) CHARACTER*6 * The name of the routine which called XERBLA. * * INFO (input) INTEGER * The position of the invalid parameter in the parameter list * of the calling routine. * * X WRITE( *, FMT = 9999 )SRNAME, INFO * X STOP * X 9999 FORMAT( ' ** On entry to ', A6, ' parameter number ', I2, ' had ', X $ 'an illegal value' ) * * End of XERBLA * X END SHAR_EOF chmod 0600 xerbla.f || echo 'restore of xerbla.f failed' Wc_c="`wc -c < 'xerbla.f'`" test 1134 -eq "$Wc_c" || echo 'xerbla.f: original size 1134, current size' "$Wc_c" fi exit 0