@Preamble{
"\hyphenation{}"
}
@String{ack-nhfb = "Nelson H. F. Beebe,
University of Utah,
Department of Mathematics, 110 LCB,
155 S 1400 E RM 233,
Salt Lake City, UT 84112-0090, USA,
Tel: +1 801 581 5254,
FAX: +1 801 581 4148,
e-mail: \path|beebe@math.utah.edu|,
\path|beebe@acm.org|,
\path|beebe@computer.org| (Internet),
URL: \path|http://www.math.utah.edu/~beebe/|"}
@String{inst-ANL-MCS = "Mathematics and Computer Science
Division, Argonne National Laboratory"}
@String{inst-ANL-MCS:adr = "9700 South Cass Avenue, Argonne, IL
60439-4801,
USA"}
@String{inst-INRIA = "INRIA (Institut National de Recherche en
Informatique et en Automatique)"}
@String{inst-INRIA:adr = "Rocquencourt, France"}
@String{inst-UTK-CS = "Department of Computer Science,
University of Tennessee, Knoxville"}
@String{inst-UTK-CS:adr = "Knoxville, TN 37996, USA"}
@String{inst-UCB-EECS = "Department of Electrical Engineering
and Computer Science, University of
California, Berkeley"}
@String{inst-UCB-EECS:adr = "Berkeley, CA, USA"}
@String{j-BIT = "BIT"}
@String{j-BIT-NUM-MATH = "BIT Numerical Mathematics"}
@String{j-CCPE = "Concurrency and Computation: Prac\-tice and
Experience"}
@String{j-CPE = "Concurrency: Prac\-tice and Experience"}
@String{j-ETNA = "Electron. Trans. Numer. Anal."}
@String{j-IEEE-TRANS-COMPUT = "IEEE Transactions on Computers"}
@String{j-IEEE-TRANS-PAR-DIST-SYS = "IEEE Transactions on Parallel and
Distributed Systems"}
@String{j-IJHPCA = "The International Journal of High
Performance Computing Applications"}
@String{j-IMA-J-NUMER-ANAL = "IMA Journal of Numerical Analysis"}
@String{j-INT-J-HIGH-SPEED-COMPUTING = "International Journal of High Speed
Computing (IJHSC)"}
@String{j-J-COMPUT-APPL-MATH = "Journal of Computational and Applied
Mathematics"}
@String{j-J-NUM-LIN-ALG-APPL = "Journal of Numerical linear algebra with
applications"}
@String{j-J-PAR-DIST-COMP = "Journal of Parallel and Distributed
Computing"}
@String{j-LECT-NOTES-COMP-SCI = "Lecture Notes in Computer Science"}
@String{j-LINEAR-ALGEBRA-APPL = "Linear Algebra and its Applications"}
@String{j-NUM-MATH = "Numerische Mathematik"}
@String{j-NUMER-ALGORITHMS = "Numerical Algorithms"}
@String{j-PARALLEL-COMPUTING = "Parallel Computing"}
@String{j-PARALLEL-DIST-COMP-PRACT = "Parallel and Distributed Computing
Practices"}
@String{j-PROC-IEEE = "Proceedings of the IEEE"}
@String{j-SCI-PROG = "Scientific Programming"}
@String{j-SIAM-J-MAT-ANA-APPL = "SIAM Journal on Matrix Analysis and
Applications"}
@String{j-SIAM-J-NUMER-ANAL = "SIAM Journal on Numerical Analysis"}
@String{j-SIAM-J-SCI-COMP = "SIAM Journal on Scientific Computing"}
@String{j-SUPERCOMPUTER = "Supercomputer"}
@String{j-TOMS = "ACM Transactions on Mathematical Software"}
@String{pub-ACM = "ACM Press"}
@String{pub-ACM:adr = "New York, NY 10036, USA"}
@String{pub-CAMBRIDGE = "Cambridge University Press"}
@String{pub-CAMBRIDGE:adr = "Cambridge, UK"}
@String{pub-ELSEVIER = "Elsevier"}
@String{pub-ELSEVIER:adr = "Amsterdam, The Netherlands"}
@String{pub-IEEE = "IEEE Computer Society Press"}
@String{pub-IEEE:adr = "1109 Spring Street, Suite 300, Silver Spring, MD 20910, USA"}
@String{pub-KLUWER = "Kluwer Academic Publishers"}
@String{pub-KLUWER:adr = "Dordrecht, The Netherlands"}
@String{pub-LONGMAN = "Longman Scientific and Technical"}
@String{pub-LONGMAN:adr = "Harlow, Essex, UK"}
@String{pub-SIAM = "Society for Industrial and Applied
Mathematics"}
@String{pub-SIAM:adr = "Philadelphia, PA, USA"}
@String{pub-SV = "Spring{\-}er-Ver{\-}lag"}
@String{pub-SV:adr = "Berlin, Germany~/ Heidelberg,
Germany~/ London, UK~/ etc."}
@String{ser-LNCS = "Lecture Notes in Computer Science"}
@TechReport{Demmel:1987:PDL,
author = "J. Demmel and J. Dongarra and J. {Du Croz} and A.
Greenbaum and S. Hammarling and D. Sorensen",
title = "Prospectus for the Development of a Linear Algebra
Library for High-Performance Computers",
type = "LAPACK Working Note",
number = "01",
institution = inst-ANL-MCS,
address = inst-ANL-MCS:adr,
month = sep,
year = "1987",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "ANL, MCS-TM-97, September 1987.",
URL = "http://www.netlib.org/lapack/lawns/lawn01.ps;
http://www.netlib.org/lapack/lawnspdf/lawn01.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:1987:BRM,
author = "J. Dongarra and S. Hammarling and D. Sorensen",
title = "Block Reduction of Matrices to Condensed Forms for
Eigenvalue Computations",
type = "LAPACK Working Note",
number = "02",
institution = inst-ANL-MCS,
address = inst-ANL-MCS:adr,
month = sep,
year = "1987",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "ANL, MCS-TM-99, September 1987. Published in
\cite{Dongarra:1989:BRM}.",
URL = "http://www.netlib.org/lapack/lawns/lawn02.ps;
http://www.netlib.org/lapack/lawnspdf/lawn02.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:1988:CSS,
author = "J. Demmel and W. Kahan",
title = "Computing Small Singular Values of Bidiagonal Matrices
with Guaranteed High Relative Accuracy",
type = "LAPACK Working Note",
number = "03",
institution = inst-ANL-MCS,
address = inst-ANL-MCS:adr,
month = feb,
year = "1988",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "ANL, MCS-TM-110, February 1988.",
URL = "http://www.netlib.org/lapack/lawns/lawn03.ps;
http://www.netlib.org/lapack/lawnspdf/lawn03.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:1988:GDS,
author = "J. Demmel and J. {Du Croz} and S. Hammarling and D.
Sorensen",
title = "Guidelines for the Design of Symmetric Eigenroutines,
{SVD}, and Iterative Refinement and Condition
Estimation for Linear Systems",
type = "LAPACK Working Note",
number = "04",
institution = inst-ANL-MCS,
address = inst-ANL-MCS:adr,
month = mar,
year = "1988",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "ANL, MCS-TM-111, March 1988.",
URL = "http://www.netlib.org/lapack/lawns/lawn04.ps;
http://www.netlib.org/lapack/lawnspdf/lawn04.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Bischof:1988:PC,
author = "C. Bischof and J. Demmel and J. Dongarra and J. {Du
Croz} and A. Greenbaum and S. Hammarling and D.
Sorensen",
title = "Provisional Contents",
type = "LAPACK Working Note",
number = "05",
institution = inst-ANL-MCS,
address = inst-ANL-MCS:adr,
month = sep,
year = "1988",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "ANL, MCS-TM-38, September 1988.",
URL = "http://www.netlib.org/lapack/lawns/lawn05.ps;
http://www.netlib.org/lapack/lawnspdf/lawn05.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Brewer:1988:TAAa,
author = "O. Brewer and J. Dongarra and D. Sorensen",
title = "Tools to Aid in the Analysis of Memory Access Patterns
for {FORTRAN} Programs",
type = "LAPACK Working Note",
number = "06",
institution = inst-ANL-MCS,
address = inst-ANL-MCS:adr,
month = jun,
year = "1988",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "ANL, MCS-TM-120, June 1988. Published in
\cite{Brewer:1988:TAAb}.",
URL = "http://www.netlib.org/lapack/lawns/lawn06.ps;
http://www.netlib.org/lapack/lawnspdf/lawn06.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Barlow:1988:CAE,
author = "J. Barlow and J. Demmel",
title = "Computing Accurate Eigensystems of Scaled Diagonally
Dominant Matrices",
type = "LAPACK Working Note",
number = "07",
institution = inst-ANL-MCS,
address = inst-ANL-MCS:adr,
month = dec,
year = "1988",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "ANL, MCS-TM-126, December 1988. Published in
\cite{Barlow:1990:CAE}.",
URL = "http://www.netlib.org/lapack/lawns/lawn07.ps;
http://www.netlib.org/lapack/lawnspdf/lawn07.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Bai:1989:BIHa,
author = "Z. Bai and J. Demmel",
title = "On a Block Implementation of {Hessenberg} Multishift
{$ Q R $} Iteration",
type = "LAPACK Working Note",
number = "08",
institution = inst-ANL-MCS,
address = inst-ANL-MCS:adr,
month = jan,
year = "1989",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "ANL, MCS-TM-127, January 1989. Published in
\cite{Bai:1989:BIHb}.",
URL = "http://www.netlib.org/lapack/lawns/lawn08.ps;
http://www.netlib.org/lapack/lawnspdf/lawn08.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:1989:TMG,
author = "J. Demmel and A. McKenney",
title = "A Test Matrix Generation Suite",
type = "LAPACK Working Note",
number = "09",
institution = inst-ANL-MCS,
address = inst-ANL-MCS:adr,
month = mar,
year = "1989",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "ANL, MCS-P69-0389, March 1989.",
URL = "http://www.netlib.org/lapack/lawns/lawn09.ps;
http://www.netlib.org/lapack/lawnspdf/lawn09.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Anderson:1989:ITI,
author = "E. Anderson and J. Dongarra",
title = "Installing and Testing the Initial Release of {LAPACK}
--- {Unix} and Non-{Unix} Versions",
type = "LAPACK Working Note",
number = "10",
institution = inst-ANL-MCS,
address = inst-ANL-MCS:adr,
month = may,
year = "1989",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "ANL, MCS-TM-130, May 1989.",
URL = "http://www.netlib.org/lapack/lawns/lawn10.ps;
http://www.netlib.org/lapack/lawnspdf/lawn10.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Deift:1989:BSV,
author = "P. Deift and J. Demmel and L.-C. Li and C. Tomei",
title = "The Bidiagonal Singular Value Decomposition and
{Hamiltonian} Mechanics",
type = "LAPACK Working Note",
number = "11",
institution = inst-ANL-MCS,
address = inst-ANL-MCS:adr,
month = aug,
year = "1989",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "ANL, MCS-TM-133, August 1989. Published in
\cite{Deift:1991:BSV}.",
URL = "http://www.netlib.org/lapack/lawns/lawn11.ps;
http://www.netlib.org/lapack/lawnspdf/lawn11.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Mayes:1989:BCF,
author = "P. Mayes and G. Radicati",
title = "Banded {Cholesky} factorization using level 3 {BLAS}",
type = "LAPACK Working Note",
number = "12",
institution = inst-ANL-MCS,
address = inst-ANL-MCS:adr,
month = aug,
year = "1989",
bibdate = "Sat Apr 23 06:29:27 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "ANL, MCS-TM-134, August 1989",
URL = "http://www.netlib.org/lapack/lawns/lawn12.ps;
http://www.netlib.org/lapack/lawnspdf/lawn12.pdf",
acknowledgement = ack-nhfb,
xxnote = "Not available at Web site.",
}
@TechReport{Bai:1989:CNE,
author = "Z. Bai and J. Demmel and A. McKenney",
title = "On the Conditioning of the Nonsymmetric Eigenproblem:
Theory and Software",
type = "LAPACK Working Note",
number = "13",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "1989",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-89-86, October 1989.",
URL = "http://www.netlib.org/lapack/lawns/lawn13.ps;
http://www.netlib.org/lapack/lawnspdf/lawn13.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:1989:FPE,
author = "J. Demmel",
title = "On Floating Point Errors in {Cholesky}",
type = "LAPACK Working Note",
number = "14",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "1989",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-89-87, October 1989.",
URL = "http://www.netlib.org/lapack/lawns/lawn14.ps;
http://www.netlib.org/lapack/lawnspdf/lawn14.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:1989:JMM,
author = "J. Demmel and K. Veselic",
title = "{Jacobi}'s Method is More Accurate than {$ Q R $}",
type = "LAPACK Working Note",
number = "15",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "1989",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-89-88, October 1989. Published in
\cite{Demmel:1992:JMM}.",
URL = "http://www.netlib.org/lapack/lawns/lawn15.ps;
http://www.netlib.org/lapack/lawnspdf/lawn15.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Anderson:1989:RIR,
author = "E. Anderson and J. Dongarra",
title = "Results from the Initial Release of {LAPACK}",
type = "LAPACK Working Note",
number = "16",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = nov,
year = "1989",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-89-89, November 1989. (Replaced by LAWN 41 or
81!!)",
URL = "http://www.netlib.org/lapack/lawns/lawn16.ps;
http://www.netlib.org/lapack/lawnspdf/lawn16.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Greenbaum:1989:EQQ,
author = "A. Greenbaum and J. Dongarra",
title = "Experiments with {QR\slash QL} Methods for the
Symmetric Tridiagonal Eigenproblem",
type = "LAPACK Working Note",
number = "17",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = nov,
year = "1989",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-89-92, November 1989.",
URL = "http://www.netlib.org/lapack/lawns/lawn17.ps;
http://www.netlib.org/lapack/lawnspdf/lawn17.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Anderson:1990:IGL,
author = "E. Anderson and J. Dongarra",
title = "Implementation Guide for {LAPACK}",
type = "LAPACK Working Note",
number = "18",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = apr,
year = "1990",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-90-101, April 1990.",
URL = "http://www.netlib.org/lapack/lawns/lawn18.ps;
http://www.netlib.org/lapack/lawnspdf/lawn18.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Anderson:1990:EBA,
author = "E. Anderson and J. Dongarra",
title = "Evaluating Block Algorithm Variants in {LAPACK}",
type = "LAPACK Working Note",
number = "19",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = apr,
year = "1990",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-90-103, April 1990.",
URL = "http://www.netlib.org/lapack/lawns/lawn19.ps;
http://www.netlib.org/lapack/lawnspdf/lawn19.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Anderson:1990:LPLa,
author = "E. Anderson and Z. Bai and C. Bischof and J. Demmel
and J. Dongarra and J. {Du Croz} and A. Greenbaum and
S. Hammarling and A. McKenney and D. Sorensen",
title = "{LAPACK}: {A} Portable Linear Algebra Library for
High-Performance Computers",
type = "LAPACK Working Note",
number = "20",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "1990",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-90-105, May 1990. Published in
\cite{Anderson:1990:LPLb}.",
URL = "http://www.netlib.org/lapack/lawns/lawn20.ps;
http://www.netlib.org/lapack/lawnspdf/lawn20.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Croz:1990:FBM,
author = "Jeremy {Du Croz} and Peter Mayes and Giuseppe
Radicati",
title = "Factorizations of Band Matrices Using Level 3 {BLAS}",
type = "LAPACK Working Note",
number = "21",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jul,
year = "1990",
bibdate = "Sat Apr 23 06:32:16 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT, CS-90-109, July 1990.",
URL = "http://www.netlib.org/lapack/lawns/lawn21.ps;
http://www.netlib.org/lapack/lawnspdf/lawn21.pdf",
acknowledgement = ack-nhfb,
remark = "Published in \cite[pp.~222--231]{Burkhart:1990:CVI}.",
xxnote = "Not available at Web site.",
}
@TechReport{Demmel:1990:SBA,
author = "J. Demmel and N. Higham",
title = "Stability of Block Algorithms with Fast Level 3
{BLAS}",
type = "LAPACK Working Note",
number = "22",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jul,
year = "1990",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-90-110, July 1990. Published in
\cite{Demmel:1992:SBA}.",
URL = "http://www.netlib.org/lapack/lawns/lawn22.ps;
http://www.netlib.org/lapack/lawnspdf/lawn22.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:1990:IEB,
author = "J. Demmel and N. Higham",
title = "Improved Error Bounds for Underdetermined System
Solvers",
type = "LAPACK Working Note",
number = "23",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "1990",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-90-113, August 1990. Published in
\cite{Demmel:1993:IEB}.",
URL = "http://www.netlib.org/lapack/lawns/lawn23.ps;
http://www.netlib.org/lapack/lawnspdf/lawn23.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:1990:LBF,
author = "J. Dongarra and S. Ostrouchov",
title = "{LAPACK} Block Factorization Algorithms on the {Intel
iPSC\slash 860}",
type = "LAPACK Working Note",
number = "24",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "1990",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-90-115, October, 1990.",
URL = "http://www.netlib.org/lapack/lawns/lawn24.ps;
http://www.netlib.org/lapack/lawnspdf/lawn24.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:1990:NCC,
author = "J. Dongarra and S. Hammarling and J. Wilkinson",
title = "Numerical Considerations in Computing Invariant
Subspaces",
type = "LAPACK Working Note",
number = "25",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "1990",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-90-117, October, 1990. Published in
\cite{Dongarra:1992:NCC}.",
URL = "http://www.netlib.org/lapack/lawns/lawn25.ps;
http://www.netlib.org/lapack/lawnspdf/lawn25.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Anderson:1990:PEL,
author = "E. Anderson and C. Bischof and J. Demmel and J.
Dongarra and J. {Du Croz} and S. Hammarling and W.
Kahan",
title = "Prospectus for an Extension to {LAPACK}: {A} Portable
Linear Algebra Library for High-Performance Computers",
type = "LAPACK Working Note",
number = "26",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
pages = "10",
month = nov,
year = "1990",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-90-118, November 1990.",
URL = "http://www.netlib.org/lapack/lawns/lawn26.ps;
http://www.netlib.org/lapack/lawnspdf/lawn26.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{DuCroz:1990:SMM,
author = "J. {Du Croz} and N. Higham",
title = "Stability of Methods for Matrix Inversion",
type = "LAPACK Working Note",
number = "27",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "1990",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-90-119, October, 1990. Published in
\cite{Croz:1992:SMM}.",
URL = "http://www.netlib.org/lapack/lawns/lawn27.ps;
http://www.netlib.org/lapack/lawnspdf/lawn27.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:1990:IRS,
author = "J. Dongarra and P. Mayes and G. Radicati",
title = "The {IBM RISC System\slash 6000} and Linear Algebra
Operations",
type = "LAPACK Working Note",
number = "28",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = dec,
year = "1990",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-90-122, December 1990. Published in
\cite{Dongarra:1991:IRS}.",
URL = "http://www.netlib.org/lapack/lawns/lawn28.ps;
http://www.netlib.org/lapack/lawnspdf/lawn28.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{vandeGeijn:1991:GCO,
author = "R. van de Geijn",
title = "On Global Combine Operations",
type = "LAPACK Working Note",
number = "29",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = apr,
year = "1991",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-91-129, April 1991. Published in
\cite{vandeGeijn:1994:GCO}.",
URL = "http://www.netlib.org/lapack/lawns/lawn29.ps;
http://www.netlib.org/lapack/lawnspdf/lawn29.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:1991:RCF,
author = "J. Dongarra and R. van de Geijn",
title = "Reduction to Condensed Form for the Eigenvalue Problem
on Distributed Memory Architectures",
type = "LAPACK Working Note",
number = "30",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = apr,
year = "1991",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-91-130, April 1991. Published in
\cite{Dongarra:1992:RCFb}.",
URL = "http://www.netlib.org/lapack/lawns/lawn30.ps;
http://www.netlib.org/lapack/lawnspdf/lawn30.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Anderson:1991:GQF,
author = "E. Anderson and Z. Bai and J. Dongarra",
title = "Generalized {$ Q R $} Factorization and its
Applications",
type = "LAPACK Working Note",
number = "31",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = apr,
year = "1991",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-91-131, April 1991. Published in
\cite{Anderson:1992:GFA}.",
URL = "http://www.netlib.org/lapack/lawns/lawn31.ps;
http://www.netlib.org/lapack/lawnspdf/lawn31.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Bischof:1991:GIC,
author = "C. Bischof and P. T. P. Tang",
title = "Generalized Incremental Condition Estimation",
type = "LAPACK Working Note",
number = "32",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "1991",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-91-132, May 1991. Published in
\cite{Bischof:1992:GIC}.",
URL = "http://www.netlib.org/lapack/lawns/lawn32.ps;
http://www.netlib.org/lapack/lawnspdf/lawn32.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Bischof:1991:RIC,
author = "C. Bischof and P. T. P. Tang",
title = "Robust Incremental Condition Estimation",
type = "LAPACK Working Note",
number = "33",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "1991",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-91-133, May 1991.",
URL = "http://www.netlib.org/lapack/lawns/lawn33.ps;
http://www.netlib.org/lapack/lawnspdf/lawn33.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:1991:WB,
author = "J. J. Dongarra",
title = "Workshop on the {BLACS}",
type = "LAPACK Working Note",
number = "34",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "1991",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-91-134, May 1991.",
URL = "http://www.netlib.org/lapack/lawns/lawn34.ps;
http://www.netlib.org/lapack/lawnspdf/lawn34.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Anderson:1991:IGL,
author = "E. Anderson and J. Dongarra and S. Ostrouchov",
title = "Implementation guide for {LAPACK}",
type = "LAPACK Working Note",
number = "35",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "1991",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-91-138, August 1991.",
URL = "http://www.netlib.org/lapack/lawns/lawn35.ps;
http://www.netlib.org/lapack/lawnspdf/lawn35.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Anderson:1991:RTS,
author = "E. Anderson",
title = "Robust Triangular solvers",
type = "LAPACK Working Note",
number = "36",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "1991",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-91-142, August, 1991.",
URL = "http://www.netlib.org/lapack/lawns/lawn36.ps;
http://www.netlib.org/lapack/lawnspdf/lawn36.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:1991:TDB,
author = "Jack J. Dongarra and Robert A. van de Geijn",
title = "Two Dimensional Basic Linear Algebra Communication
Subprograms",
type = "LAPACK Working Note",
number = "37",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "1991",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-91-138, October, 1991. Published in
\cite{Dongarra:1993:TDB}.",
URL = "http://www.netlib.org/lapack/lawns/lawn37.ps;
http://www.netlib.org/lapack/lawnspdf/lawn37.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Bai:1991:DAC,
author = "Zhaojun Bai and James W. Demmel",
title = "On a Direct Algorithm for Computing Invariant
Subspaces with Specified Eigenvalues",
type = "LAPACK Working Note",
number = "38",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = nov,
year = "1991",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-91-139, November, 1991.",
URL = "http://www.netlib.org/lapack/lawns/lawn38.ps;
http://www.netlib.org/lapack/lawnspdf/lawn38.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:1991:DPH,
author = "James Demmel and Jack Dongarra and W. Kahan",
title = "On Designing Portable High Performance Numerical
Libraries",
type = "LAPACK Working Note",
number = "39",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jul,
year = "1991",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-91-141, July, 1991. Published in
\cite{Demmel:1992:DPH}.",
URL = "http://www.netlib.org/lapack/lawns/lawn39.ps;
http://www.netlib.org/lapack/lawnspdf/lawn39.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:1992:BLF,
author = "James Demmel and Nick Higham and Rob Schreiber",
title = "Block {$ L U $} Factorization",
type = "LAPACK Working Note",
number = "40",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = feb,
year = "1992",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-92-149, February 1992.",
URL = "http://www.netlib.org/lapack/lawns/lawn40.ps;
http://www.netlib.org/lapack/lawnspdf/lawn40.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Blackford:1992:IGL,
author = "Susan Blackford and Jack Dongarra",
title = "Installation Guide for {LAPACK}",
type = "LAPACK Working Note",
number = "41",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = mar,
year = "1992",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-92-151, March, 1992.",
URL = "http://www.netlib.org/lapack/lawns/lawn41.ps;
http://www.netlib.org/lapack/lawnspdf/lawn41.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Higham:1992:PTB,
author = "Nick Higham",
title = "Perturbation Theory and Backward Error for {$ A X - X
B = C $}",
type = "LAPACK Working Note",
number = "42",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = apr,
year = "1992",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-92-153, April, 1992. Published in
\cite{Higham:1993:PTB}.",
URL = "http://www.netlib.org/lapack/lawns/lawn42.ps;
http://www.netlib.org/lapack/lawnspdf/lawn42.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:1992:LSD,
author = "Jack Dongarra and Robert van de Geijn and David
Walker",
title = "A Look at Scalable Dense Linear Algebra Libraries",
type = "LAPACK Working Note",
number = "43",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = apr,
year = "1992",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-92-155, April, 1992. Published in
\cite{Dongarra:1992:LASb}.",
URL = "http://www.netlib.org/lapack/lawns/lawn43.ps;
http://www.netlib.org/lapack/lawnspdf/lawn43.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Anderson:1992:PLP,
author = "Edward Anderson and Jack Dongarra",
title = "Performance of {LAPACK}: {A} Portable Library of
Numerical Linear Algebra Routines",
type = "LAPACK Working Note",
number = "44",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "1992",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-92-156, May 1992. Published in
\cite{Anderson:1993:PLP}.",
URL = "http://www.netlib.org/lapack/lawns/lawn44.ps;
http://www.netlib.org/lapack/lawnspdf/lawn44.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:1992:III,
author = "J. Demmel",
title = "The Inherent Inaccuracy of Implicit Tridiagonal {$ Q R
$}",
type = "LAPACK Working Note",
number = "45",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "1992",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-92-162, May 1992.",
URL = "http://www.netlib.org/lapack/lawns/lawn45.ps;
http://www.netlib.org/lapack/lawnspdf/lawn45.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Bai:1992:CGS,
author = "Z. Bai and J. Demmel",
title = "Computing the Generalized Singular Value
Decomposition",
type = "LAPACK Working Note",
number = "46",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "1992",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-92-163, May 1992. Published in
\cite{Bai:1993:CGS}.",
URL = "http://www.netlib.org/lapack/lawns/lawn46.ps;
http://www.netlib.org/lapack/lawnspdf/lawn46.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:1992:OPN,
author = "J. Demmel",
title = "Open Problems in Numerical Linear Algebra",
type = "LAPACK Working Note",
number = "47",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "1992",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-92-164, May 1992.",
URL = "http://www.netlib.org/lapack/lawns/lawn47.ps;
http://www.netlib.org/lapack/lawnspdf/lawn47.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:1992:CAS,
author = "J. Demmel and W. Gragg",
title = "On Computing Accurate Singular Values and Eigenvalues
of Matrices with Acyclic Graphs",
type = "LAPACK Working Note",
number = "48",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "1992",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-92-166, May 1992. Published in
\cite{Demmel:1993:CAS}.",
URL = "http://www.netlib.org/lapack/lawns/lawn48.ps;
http://www.netlib.org/lapack/lawnspdf/lawn48.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:1992:SFP,
author = "J. Demmel",
title = "A Specification for Floating Point Parallel Prefix",
type = "LAPACK Working Note",
number = "49",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "1992",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-92-167, May 1992.",
URL = "http://www.netlib.org/lapack/lawns/lawn49.ps;
http://www.netlib.org/lapack/lawnspdf/lawn49.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Eijkhout:1992:DSD,
author = "Victor Eijkhout",
title = "Distributed Sparse Data Structures for Linear Algebra
Operations",
type = "LAPACK Working Note",
number = "50",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "1992",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-92-169, May 1992.",
URL = "http://www.netlib.org/lapack/lawns/lawn50.ps;
http://www.netlib.org/lapack/lawnspdf/lawn50.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Eijkhout:1992:QPC,
author = "Victor Eijkhout",
title = "Qualitative Properties of the Conjugate Gradient and
{Lanczos} Methods in a Matrix Framework",
type = "LAPACK Working Note",
number = "51",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "1992",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-92-170, May 1992.",
URL = "http://www.netlib.org/lapack/lawns/lawn51.ps;
http://www.netlib.org/lapack/lawnspdf/lawn51.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Heath:1992:CPN,
author = "Michael T. Heath and Padma Raghavan",
title = "A {Cartesian} Parallel Nested Dissection Algorithm",
type = "LAPACK Working Note",
number = "52",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jun,
year = "1992",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-92-178, June 1992. Published in
\cite{Heath:1995:CPN}.",
URL = "http://www.netlib.org/lapack/lawns/lawn52.ps;
http://www.netlib.org/lapack/lawnspdf/lawn52.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:1992:TPN,
author = "J. W. Demmel",
title = "Trading Off Parallelism and Numerical Stability",
type = "LAPACK Working Note",
number = "53",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jun,
year = "1992",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-92-179, June 1992. Published in
\cite{Demmel:1993:TPN}.",
URL = "http://www.netlib.org/lapack/lawns/lawn53.ps;
http://www.netlib.org/lapack/lawnspdf/lawn53.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Bai:1992:SDB,
author = "Z. Bai and J. W. Demmel",
title = "On Swapping Diagonal Blocks in Real {Schur} Form",
type = "LAPACK Working Note",
number = "54",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "1992",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-92-182, October 1992. Published in
\cite{Bai:1993:SDB}.",
URL = "http://www.netlib.org/lapack/lawns/lawn54.ps;
http://www.netlib.org/lapack/lawnspdf/lawn54.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Choi:1992:SSLa,
author = "J. Choi and J. Dongarra and R. Pozo and D. Walker",
title = "{ScaLAPACK}: {A} Scalable Linear Algebra for
Distributed Memory Concurrent Computers",
type = "LAPACK Working Note",
number = "55",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = nov,
year = "1992",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-92-181, November 1992. Published in
\cite{Choi:1992:SSLb}.",
URL = "http://www.netlib.org/lapack/lawns/lawn55.ps;
http://www.netlib.org/lapack/lawnspdf/lawn55.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{DAzevedo:1993:RCC,
author = "E. F. D'Azevedo and V. L. Eijkhout and C. H. Romine",
title = "Reducing Communication Costs in the Conjugate Gradient
Algorithm on Distributed Memory Multiprocessors",
type = "LAPACK Working Note",
number = "56",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jan,
year = "1993",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-93-185, January 1993.",
URL = "http://www.netlib.org/lapack/lawns/lawn56.ps;
http://www.netlib.org/lapack/lawnspdf/lawn56.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Choi:1993:PPU,
author = "Jaeyoung Choi and Jack J. Dongarra and David W.
Walker",
title = "{PUMMA}: {Parallel Universal Matrix Multiplication
Algorithms} on Distributed Memory Concurrent
Computers",
type = "LAPACK Working Note",
number = "57",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "1993",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-93-187, May 1993. Published in
\cite{Choi:1994:PPU}.",
URL = "http://www.netlib.org/lapack/lawns/lawn57.ps;
http://www.netlib.org/lapack/lawnspdf/lawn57.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:1993:DLA,
author = "Jack Dongarra and David Walker",
title = "The Design of Linear Algebra Libraries for High
Performance Computer",
type = "LAPACK Working Note",
number = "58",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jun,
year = "1993",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-93-188, June 1993.",
URL = "http://www.netlib.org/lapack/lawns/lawn58.ps;
http://www.netlib.org/lapack/lawnspdf/lawn58.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:1993:FNA,
author = "James W. Demmel and Xiaoye Li",
title = "Faster Numerical Algorithms via Exception Handling",
type = "LAPACK Working Note",
number = "59",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = mar,
year = "1993",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-93-192, March 1993. Published in
\cite{Demmel:1994:FNA}.",
URL = "http://www.netlib.org/lapack/lawns/lawn59.ps;
http://www.netlib.org/lapack/lawnspdf/lawn59.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:1993:PNLa,
author = "James W. Demmel and Michael T. Heath and Henk A. van
der Vorst",
title = "Parallel Numerical Linear Algebra",
type = "LAPACK Working Note",
number = "60",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = mar,
year = "1993",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-93-192, March 1993. Published in
\cite{Demmel:1993:PNLb}.",
URL = "http://www.netlib.org/lapack/lawns/lawn60.ps;
http://www.netlib.org/lapack/lawnspdf/lawn60.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:1993:OOD,
author = "J. Dongarra and R. Pozo and D. Walker",
title = "An Object Oriented Design for High Performance Linear
Algebra on Distributed Memory Architectures",
type = "LAPACK Working Note",
number = "61",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "1993",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-93-200, August 1993.",
URL = "http://www.netlib.org/lapack/lawns/lawn61.ps;
http://www.netlib.org/lapack/lawnspdf/lawn61.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Heath:1993:DSS,
author = "Michael T. Heath and Padma Raghavan",
title = "Distributed Solution of Sparse Linear Systems",
type = "LAPACK Working Note",
number = "62",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "1993",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-93-201, August 1993.",
URL = "http://www.netlib.org/lapack/lawns/lawn62.ps;
http://www.netlib.org/lapack/lawnspdf/lawn62.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Heath:1993:LPS,
author = "Michael T. Heath and Padma Raghavan",
title = "Line and Plane Separators",
type = "LAPACK Working Note",
number = "63",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "1993",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-93-202, August 1993.",
URL = "http://www.netlib.org/lapack/lawns/lawn63.ps;
http://www.netlib.org/lapack/lawnspdf/lawn63.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Raghavan:1993:DSG,
author = "Padma Raghavan",
title = "Distributed Sparse {Gaussian} Elimination and
Orthogonal Factorization",
type = "LAPACK Working Note",
number = "64",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "1993",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-93-203, August 1993. Published in
\cite{Raghavan:1995:DSG}.",
URL = "http://www.netlib.org/lapack/lawns/lawn64.ps;
http://www.netlib.org/lapack/lawnspdf/lawn64.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Choi:1993:PMT,
author = "Jaeyoung Choi and Jack J. Dongarra and David W.
Walker",
title = "Parallel Matrix Transpose Algorithms on Distributed
Memory Concurrent Computers",
type = "LAPACK Working Note",
number = "65",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = nov,
year = "1993",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-93-215, November, 1993. Published in
\cite{Choi:1994:PMT}.",
URL = "http://www.netlib.org/lapack/lawns/lawn65.ps;
http://www.netlib.org/lapack/lawnspdf/lawn65.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Eijkhout:1993:CPI,
author = "Victor Eijkhout",
title = "A Characterization of Polynomial Iterative Methods",
type = "LAPACK Working Note",
number = "66",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = nov,
year = "1993",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-93-216, November 1993.",
URL = "http://www.netlib.org/lapack/lawns/lawn66.ps;
http://www.netlib.org/lapack/lawnspdf/lawn66.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Desprez:1993:PCF,
author = "F. Desprez and J. Dongarra and B. Tourancheau",
title = "Performance Complexity of {$ L U $} Factorization with
Efficient Pipelining and Overlap on a Multiprocessor",
type = "LAPACK Working Note",
number = "67",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = dec,
year = "1993",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-93-218, December, 1993.",
URL = "http://www.netlib.org/lapack/lawns/lawn67.ps;
http://www.netlib.org/lapack/lawnspdf/lawn67.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Berry:1994:HPA,
author = "Michael W. Berry and Jack J. Dongarra and Youngbae
Kim",
title = "A Highly Parallel Algorithm for the Reduction of a
Nonsymmetric Matrix to Block Upper-{Hessenberg} Form",
type = "LAPACK Working Note",
number = "68",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = feb,
year = "1994",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-221, February 1994.",
URL = "http://www.netlib.org/lapack/lawns/lawn68.ps;
http://www.netlib.org/lapack/lawnspdf/lawn68.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Rutter:1994:SIC,
author = "J. Rutter",
title = "A Serial Implementation of {Cuppen}'s Divide and
Conquer Algorithm for the Symmetric Eigenvalue
Problem",
type = "LAPACK Working Note",
number = "69",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = mar,
year = "1994",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-225, March 1994.",
URL = "http://www.netlib.org/lapack/lawns/lawn69.ps;
http://www.netlib.org/lapack/lawnspdf/lawn69.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:1994:CPB,
author = "James Demmel and Inderjit Dhillon and Huan Ren",
title = "On the Correctness of Parallel Bisection in Floating
Point",
type = "LAPACK Working Note",
number = "70",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = mar,
year = "1994",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-228, March 1994. Published in
\cite{Demmel:1995:CSB}.",
URL = "http://www.netlib.org/lapack/lawns/lawn70.ps;
http://www.netlib.org/lapack/lawnspdf/lawn70.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:1994:IRP,
author = "Jack Dongarra and Michael Kolatis",
title = "{IBM RS\slash 6000-550 \& -590} Performance for
Selected Routines in {ESSL}",
type = "LAPACK Working Note",
number = "71",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = apr,
year = "1994",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-231, April 1994.",
URL = "http://www.netlib.org/lapack/lawns/lawn71.ps;
http://www.netlib.org/lapack/lawnspdf/lawn71.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Lehoucq:1995:CEU,
author = "R. Lehoucq",
title = "The Computation of Elementary Unitary Matrices",
type = "LAPACK Working Note",
number = "72",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "1995",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-233, October 1995. Published in
\cite{Lehoucq:1996:CEU}.",
URL = "http://www.netlib.org/lapack/lawns/lawn72.ps;
http://www.netlib.org/lapack/lawnspdf/lawn72.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Whaley:1994:BLA,
author = "R. Clint Whaley",
title = "Basic Linear Algebra Communication Subprograms:
Analysis and Implementation Across Multiple Parallel
Architectures",
type = "LAPACK Working Note",
number = "73",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "1994",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-234, May 1994.",
URL = "http://www.netlib.org/lapack/lawns/lawn73.ps;
http://www.netlib.org/lapack/lawnspdf/lawn73.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:1994:SMLa,
author = "J. Dongarra and A. Lumsdaine and X. Niu and R. Pozo
and K. Remington",
title = "A Sparse Matrix Library in {C++} for High Performance
Architectures",
type = "LAPACK Working Note",
number = "74",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jul,
year = "1994",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-236, July 1994. Published in
\cite{Dongarra:1994:SMLb}.",
URL = "http://www.netlib.org/lapack/lawns/lawn74.ps;
http://www.netlib.org/lapack/lawnspdf/lawn74.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Kaagstrom:1994:LSA,
author = "Bo K{\aa}gstr{\"o}m and Peter Poromaa",
title = "{LAPACK}-Style Algorithms and Software for Solving the
Generalized {Sylvester} Equation and Estimating the
Separating Between Regular Matrix Pairs",
type = "LAPACK Working Note",
number = "75",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jul,
year = "1994",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-237, July 1994. Published in
\cite{Kaagstrom:1996:LSA}.",
URL = "http://www.netlib.org/lapack/lawns/lawn75.ps;
http://www.netlib.org/lapack/lawnspdf/lawn75.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Barrett:1994:ABI,
author = "Richard Barrett and Michael Berry and Jack Dongarra
and Victor Eijkhout and Charles Romine",
title = "Algorithmic Bombardment for the Iterative Solution of
Linear Systems: {A} Poly-Iterative Approach",
type = "LAPACK Working Note",
number = "76",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "1994",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-239, August, 1994. Published in
\cite{Barrett:1996:ABI}.",
URL = "http://www.netlib.org/lapack/lawns/lawn76.ps;
http://www.netlib.org/lapack/lawnspdf/lawn76.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Eijkhout:1994:BCD,
author = "Victor Eijkhout and Roldan Pozo",
title = "Basic Concepts for Distributed Sparse Linear Algebra
Operations",
type = "LAPACK Working Note",
number = "77",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "1994",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-240, August, 1994.",
URL = "http://www.netlib.org/lapack/lawns/lawn77.ps;
http://www.netlib.org/lapack/lawnspdf/lawn77.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Eijkhout:1994:CVC,
author = "Victor Eijkhout",
title = "Computational variants of the {CGS} and {BiCGstab}
methods",
type = "LAPACK Working Note",
number = "78",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "1994",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-241, August, 1994.",
URL = "http://www.netlib.org/lapack/lawns/lawn78.ps;
http://www.netlib.org/lapack/lawnspdf/lawn78.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Henry:1994:PQA,
author = "Greg Henry and Robert van de Geijn",
title = "Parallelizing the {$ Q R $} Algorithm for the
Unsymmetric Algebraic Eigenvalue Problem: Myths and
Reality",
type = "LAPACK Working Note",
number = "79",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "1994",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-244, August, 1994. Published in
\cite{Henry:1996:PAU}.",
URL = "http://www.netlib.org/lapack/lawns/lawn79.ps;
http://www.netlib.org/lapack/lawnspdf/lawn79.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Choi:1994:DIS,
author = "J. Choi and J. J. Dongarra and S. Ostrouchov and A. P.
Petitet and D. W. Walker and R. C. Whaley",
title = "The Design and Implementation of the {ScaLAPACK} {$ L
U $}, {$ Q R $}, and {Cholesky} Factorization
Routines",
type = "LAPACK Working Note",
number = "80",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = sep,
year = "1994",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-246, September, 1994. Published in
\cite{Choi:1996:DIS}.",
URL = "http://www.netlib.org/lapack/lawns/lawn80.ps;
http://www.netlib.org/lapack/lawnspdf/lawn80.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Blackford:1994:QIG,
author = "S. Blackford and J. Dongarra",
title = "Quick Installation Guide for {LAPACK} on {Unix}
Systems",
type = "LAPACK Working Note",
number = "81",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = sep,
year = "1994",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-249, September, 1994.",
URL = "http://www.netlib.org/lapack/lawns/lawn81.ps;
http://www.netlib.org/lapack/lawnspdf/lawn81.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:1994:CCI,
author = "J. Dongarra and M. Kolatis",
title = "Call Conversion Interface ({CCI}) for {LAPACK\slash
ESSL}",
type = "LAPACK Working Note",
number = "82",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "1994",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-250, August, 1994.",
URL = "http://www.netlib.org/lapack/lawns/lawn82.ps;
http://www.netlib.org/lapack/lawnspdf/lawn82.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Li:1994:RPB,
author = "Ren-Cang Li",
title = "Relative Perturbation Bounds for the Unitary Polar
Factor",
type = "LAPACK Working Note",
number = "83",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = sep,
year = "1994",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-251, September, 1994. Published in
\cite{Li:1997:RPB}.",
URL = "http://www.netlib.org/lapack/lawns/lawn83.ps;
http://www.netlib.org/lapack/lawnspdf/lawn83.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Li:1994:RPTa,
author = "Ren-Cang Li",
title = "Relative Perturbation Theory: ({I}) Eigenvalue
Variations",
type = "LAPACK Working Note",
number = "84",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = sep,
year = "1994",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-252, September, 1994. Published in
\cite{Li:1998:RPT}.",
URL = "http://www.netlib.org/lapack/lawns/lawn84.ps;
http://www.netlib.org/lapack/lawnspdf/lawn84.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Li:1994:RPTb,
author = "Ren-Cang Li",
title = "Relative Perturbation Theory: ({II}) Eigenspace
Variations",
type = "LAPACK Working Note",
number = "85",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = sep,
year = "1994",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-253, September, 1994. Published in
\cite{Li:1999:RPT}.",
URL = "http://www.netlib.org/lapack/lawns/lawn85.ps;
http://www.netlib.org/lapack/lawnspdf/lawn85.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:1994:PFE,
author = "J. Demmel and K. Stanley",
title = "The Performance of Finding Eigenvalues and
Eigenvectors of Dense Symmetric Matrices on Distributed
Memory Computers",
type = "LAPACK Working Note",
number = "86",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = sep,
year = "1994",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-254, September, 1994.",
URL = "http://www.netlib.org/lapack/lawns/lawn86.ps;
http://www.netlib.org/lapack/lawnspdf/lawn86.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Kaagstrom:1994:CES,
author = "B. K{\aa}gstr{\"o}m and P. Poromaa",
title = "Computing Eigenspaces with Specified Eigenvalues of a
Regular Matrix Pair ({A},{B}) and Condition Estimation:
Theory, Algorithms and Software",
type = "LAPACK Working Note",
number = "87",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = sep,
year = "1994",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-255, September, 1994. Published in
\cite{Kaagstrom:1996:CES}.",
URL = "http://www.netlib.org/lapack/lawns/lawn87.ps;
http://www.netlib.org/lapack/lawnspdf/lawn87.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Gu:1994:ECS,
author = "Ming Gu and James Demmel and Inderjit Dhillon",
title = "Efficient Computation of the Singular Value
Decomposition with Applications to Least Squares
Problems",
type = "LAPACK Working Note",
number = "88",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "1994",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-257, October, 1994.",
URL = "http://www.netlib.org/lapack/lawns/lawn88.ps;
http://www.netlib.org/lapack/lawnspdf/lawn88.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Li:1994:SSE,
author = "Ren-Cang Li",
title = "Solving Secular Equations Stably and Efficiently",
type = "LAPACK Working Note",
number = "89",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = nov,
year = "1994",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-260, November, 1994.",
URL = "http://www.netlib.org/lapack/lawns/lawn89.ps;
http://www.netlib.org/lapack/lawnspdf/lawn89.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Plank:1994:ABD,
author = "J. S. Plank and Y. Kim and J. J. Dongarra",
title = "Algorithm-Based Diskless Checkpointing for Fault
Tolerant Matrix Operations",
type = "LAPACK Working Note",
number = "90",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = dec,
year = "1994",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-94-268, December 1994. Published in
\cite{Plank:1995:ADC}.",
URL = "http://www.netlib.org/lapack/lawns/lawn90.ps;
http://www.netlib.org/lapack/lawnspdf/lawn90.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Bai:1995:SDN,
author = "Z. Bai and J. Demmel and J. Dongarra and A. Petitet
and H. Robinson and K. Stanley",
title = "The Spectral Decomposition of Nonsymmetric Matrices on
Distributed Memory Parallel Computers",
type = "LAPACK Working Note",
number = "91",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jan,
year = "1995",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-95-273, January 1995. Published in
\cite{Bai:1997:SDN}.",
URL = "http://www.netlib.org/lapack/lawns/lawn91.ps;
http://www.netlib.org/lapack/lawnspdf/lawn91.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Choi:1995:DPDa,
author = "J. Choi and J. Dongarra and D. Walker",
title = "The Design of a Parallel Dense Linear Algebra Software
Library: Reduction to {Hessenberg}, Tridiagonal, and
Bidiagonal Form",
type = "LAPACK Working Note",
number = "92",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = feb,
year = "1995",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-95-275, February 1995. Published in
\cite{Choi:1994:DPD,Choi:1995:DPDb}.",
URL = "http://www.netlib.org/lapack/lawns/lawn92.ps;
http://www.netlib.org/lapack/lawnspdf/lawn92.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Choi:2001:IGS,
author = "J. Choi and J. Demmel and I. Dhillon and J. Dongarra
and S. Ostrouchov and A. Petitet and K. Stanley and D.
Walker and R. C. Whaley",
title = "Installation Guide for {ScaLAPACK}",
type = "LAPACK Working Note",
number = "93",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "31",
month = aug,
year = "2001",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "Updated August 31, 2001 (Version 1.7).",
URL = "http://www.netlib.org/lapack/lawns/lawn93.ps;
http://www.netlib.org/lapack/lawnspdf/lawn93.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:1997:UGB,
author = "J. Dongarra and R. C. Whaley",
title = "A User's Guide to the {BLACS v1.1}",
type = "LAPACK Working Note",
number = "94",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "5",
month = may,
year = "1997",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "Updated May 5, 1997 (Version 1.1).",
URL = "http://www.netlib.org/lapack/lawns/lawn94.ps;
http://www.netlib.org/lapack/lawnspdf/lawn94.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Choi:1995:SPL,
author = "J. Choi and J. Demmel and I. Dhillon and J. Dongarra
and S. Ostrouchov and A. Petitet and K. Stanley and D.
Walker and R. C. Whaley",
title = "{ScaLAPACK}: {A} Portable Linear Algebra Library for
Distributed Memory Computers --- Design Issues and
Performance",
type = "LAPACK Working Note",
number = "95",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = mar,
year = "1995",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-95-283, March 1995. Published in
\cite{Blackford:1996:SPL}.",
URL = "http://www.netlib.org/lapack/lawns/lawn95.ps;
http://www.netlib.org/lapack/lawnspdf/lawn95.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{vandeGeijn:1995:SSU,
author = "R. A. van de Geijn and J. Watts",
title = "{SUMMA}: {Scalable Universal Matrix Multiplication
Algorithm}",
type = "LAPACK Working Note",
number = "96",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = apr,
year = "1995",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-95-286, April 1995. Published in
\cite{vandeGeijn:1997:SSU}.",
URL = "http://www.netlib.org/lapack/lawns/lawn96.ps;
http://www.netlib.org/lapack/lawnspdf/lawn96.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Chakrabarti:1995:MBM,
author = "S. Chakrabarti and J. Demmel and D. Yelick",
title = "Modeling the Benefits of Mixed Data and Task
Parallelism",
type = "LAPACK Working Note",
number = "97",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "1995",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-95-289, May 1995.",
URL = "http://www.netlib.org/lapack/lawns/lawn97.ps;
http://www.netlib.org/lapack/lawnspdf/lawn97.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:1995:LVH,
author = "J. Dongarra and R. Pozo and D. Walker",
title = "{LAPACK++ V. 1.0}: High Performance Linear Algebra
Users' Guide",
type = "LAPACK Working Note",
number = "98",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "1995",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-95-290, May 1995.",
URL = "http://www.netlib.org/lapack/lawns/lawn98.ps;
http://www.netlib.org/lapack/lawnspdf/lawn98.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:1995:RCI,
author = "J. Dongarra and V. Eijkhout and A. Kalhan",
title = "Reverse Communication Interface for Linear Algebra
Templates for Iterative Methods",
type = "LAPACK Working Note",
number = "99",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "1995",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-95-291, May 1995.",
URL = "http://www.netlib.org/lapack/lawns/lawn99.ps;
http://www.netlib.org/lapack/lawnspdf/lawn99.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Choi:1995:PSP,
author = "J. Choi and J. Dongarra and S. Ostrouchov and A.
Petitet and D. Walker and R. C. Whaley",
title = "A Proposal for a Set of Parallel Basic Linear Algebra
Subprograms",
type = "LAPACK Working Note",
number = "100",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "1995",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-95-292, May 1995. Published in
\cite{Choi:1995:PSP}.",
URL = "http://www.netlib.org/lapack/lawns/lawn100.ps;
http://www.netlib.org/lapack/lawnspdf/lawn100.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:1995:PFI,
author = "J. J. Dongarra and J. {Du Croz} and S. Hammarling and
J. Wa{\'s}niewski and A. Zemla",
title = "A Proposal for a {Fortran 90} Interface for {LAPACK}",
type = "LAPACK Working Note",
number = "101",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jul,
year = "1995",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-95-295, July 1995. Published in
\cite{Dongarra:1996:PFI}.",
URL = "http://www.netlib.org/lapack/lawns/lawn101.ps;
http://www.netlib.org/lapack/lawnspdf/lawn101.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:1995:IVI,
author = "J. Dongarra and A. Lumsdaine and R. Pozo and K.
Remington",
title = "{IML++ v. 1.2}: Iterative Methods Library Reference
Guide",
type = "LAPACK Working Note",
number = "102",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "1995",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-95-303, August 1995.",
URL = "http://www.netlib.org/lapack/lawns/lawn102.ps;
http://www.netlib.org/lapack/lawnspdf/lawn102.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:1995:SAS,
author = "J. W. Demmel and S. C. Eisenstat and J. R. Gilbert and
X. S. Li and J. W. H. Liu",
title = "A Supernodal Approach to Sparse Partial Pivoting",
type = "LAPACK Working Note",
number = "103",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = sep,
year = "1995",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-95-304, September 1995. Published in
\cite{Demmel:1999:SAS}.",
URL = "http://www.netlib.org/lapack/lawns/lawn103.ps;
http://www.netlib.org/lapack/lawnspdf/lawn103.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Higham:1995:IRL,
author = "N. J. Higham",
title = "Iterative Refinement and {LAPACK}",
type = "LAPACK Working Note",
number = "104",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "1995",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-95-308, October 1995. Published in
\cite{Higham:1997:IRL}.",
URL = "http://www.netlib.org/lapack/lawns/lawn104.ps;
http://www.netlib.org/lapack/lawnspdf/lawn104.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Higham:1995:SDP,
author = "N. J. Higham",
title = "Stability of the Diagonal Pivoting Method with Partial
Pivoting",
type = "LAPACK Working Note",
number = "105",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "1995",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-95-309, October 1995. Published in
\cite{Higham:1997:SDP}.",
URL = "http://www.netlib.org/lapack/lawns/lawn105.ps;
http://www.netlib.org/lapack/lawnspdf/lawn105.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Bai:1995:TLAa,
author = "Z. Bai and D. Day and J. Demmel and J. Dongarra and M.
Gu and A. Ruhe and H. van der Vorst",
title = "Templates for Linear Algebra Problems",
type = "LAPACK Working Note",
number = "106",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "1995",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-95-311, October 1995. Published in
\cite{Bai:1995:TLAb}.",
URL = "http://www.netlib.org/lapack/lawns/lawn106.ps;
http://www.netlib.org/lapack/lawnspdf/lawn106.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Kaagstrom:1995:GBLa,
author = "B. K{\aa}gstr{\"o}m and P. Ling and C. {Van Loan}",
title = "{GEMM}-Based Level 3 {BLAS}: High-Performance Model
Implementations and Performance Evaluation Benchmark",
type = "LAPACK Working Note",
number = "107",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = nov,
year = "1995",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-95-315, November 1995. Published in
\cite{Kaagstrom:1998:GBL}.",
URL = "http://www.netlib.org/lapack/lawns/lawn107.ps;
http://www.netlib.org/lapack/lawnspdf/lawn107.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Kaagstrom:1995:GBLb,
author = "B. K{\aa}gstr{\"o}m and P. Ling and C. {Van Loan}",
title = "{GEMM}-Based Level 3 {BLAS}: Installation, Tuning and
Use of the Model Implementations and the Performance
Evaluation Benchmark",
type = "LAPACK Working Note",
number = "108",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = nov,
year = "1995",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-95-316, November 1995.",
URL = "http://www.netlib.org/lapack/lawns/lawn108.ps;
http://www.netlib.org/lapack/lawnspdf/lawn108.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:1995:BTW,
author = "J. Dongarra and S. Hammarling and S. Ostrouchov",
title = "{BLAS} Technical Workshop",
type = "LAPACK Working Note",
number = "109",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = nov,
year = "1995",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-95-317, November 1995.",
URL = "http://www.netlib.org/lapack/lawns/lawn109.ps;
http://www.netlib.org/lapack/lawnspdf/lawn109.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:1996:KCP,
author = "J. J. Dongarra and S. Hammarling and D. W. Walker",
title = "Key Concepts For Parallel Out-Of-Core {$ L U $}
Factorization",
type = "LAPACK Working Note",
number = "110",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = apr,
year = "1996",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-96-324, April 1996. Published in
\cite{Dongarra:1997:KCPb}.",
URL = "http://www.netlib.org/lapack/lawns/lawn110.ps;
http://www.netlib.org/lapack/lawnspdf/lawn110.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Bilmes:1996:OMM,
author = "J. Bilmes and K. Asanovic and J. Demmel and D. Lam and
C.-W. Chin",
title = "Optimizing Matrix Multiply using {PHiPAC}: a Portable,
High-Performance, {ANSI C} Coding Methodology",
type = "LAPACK Working Note",
number = "111",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "1996",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-96-326, May 1996.",
URL = "http://www.netlib.org/lapack/lawns/lawn111.ps;
http://www.netlib.org/lapack/lawnspdf/lawn111.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Blackford:1996:PEDa,
author = "L. S. Blackford and A. Cleary and J. Demmel and I.
Dhillon and J. Dongarra and S. Hammarling and A.
Petitet and H. Ren and K. Stanley and R. C. Whaley",
title = "Practical Experience in the Dangers of Heterogeneous
Computing",
type = "LAPACK Working Note",
number = "112",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jul,
year = "1996",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-96-330, July 1996. Published in
\cite{Blackford:1996:PEDb,Blackford:1997:PEN}.",
URL = "http://www.netlib.org/lapack/lawns/lawn112.ps;
http://www.netlib.org/lapack/lawnspdf/lawn112.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Quintana-Orti:1996:BPA,
author = "G. Quintana-Orti and E. S. Quintana-Orti and A.
Petitet",
title = "Block-Partitioned Algorithms for Solving the Linear
Least Squares Problem",
type = "LAPACK Working Note",
number = "113",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jul,
year = "1996",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-96-333, July 1996.",
URL = "http://www.netlib.org/lapack/lawns/lawn113.ps;
http://www.netlib.org/lapack/lawnspdf/lawn113.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Quintana-Orti:1996:BVQ,
author = "G. Quintana-Orti and X. Sun and C. Bischof",
title = "A {BLAS-3} Version of the {$ Q R $} Factorization with
Column Pivoting",
type = "LAPACK Working Note",
number = "114",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "1996",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-96-334, August 1996.",
URL = "http://www.netlib.org/lapack/lawns/lawn114.ps;
http://www.netlib.org/lapack/lawnspdf/lawn114.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Ren:1996:EAI,
author = "H. Ren",
title = "On the Error Analysis and Implementation of Some
Eigenvalue Decomposition and Singular Value
Decomposition Algorithms",
type = "LAPACK Working Note",
number = "115",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = sep,
year = "1996",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-96-336, September 1996.",
URL = "http://www.netlib.org/lapack/lawns/lawn115.ps;
http://www.netlib.org/lapack/lawnspdf/lawn115.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Sidani:1996:PMD,
author = "M. Sidani and B. Harrod",
title = "Parallel Matrix Distributions: Have we been doing it
all right?",
type = "LAPACK Working Note",
number = "116",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = nov,
year = "1996",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-96-340, November 1996.",
URL = "http://www.netlib.org/lapack/lawns/lawn116.ps;
http://www.netlib.org/lapack/lawnspdf/lawn116.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Blackford:1996:FIL,
author = "L. Susan Blackford and Jack J. Dongarra and Jeremy {Du
Croz} and Sven Hammarling and Jerzy Wa{\'s}niewski",
title = "A {Fortran 90} Interface for {LAPACK}",
type = "LAPACK Working Note",
number = "117",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = dec,
year = "1996",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-96-341, December 1996.",
URL = "http://www.netlib.org/lapack/lawns/lawn117.ps;
http://www.netlib.org/lapack/lawnspdf/lawn117.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:1997:DIP,
author = "J. J. Dongarra and E. F. D'Azevedo",
title = "The Design and Implementation of the Parallel
Out-of-core {ScaLAPACK} {$ L U $}, {$ Q R $}, and
{Cholesky} Factorization Routines",
type = "LAPACK Working Note",
number = "118",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jan,
year = "1997",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-97-347, January 1997. Published in
\cite{DAzevedo:2000:DIP}.",
URL = "http://www.netlib.org/lapack/lawns/lawn118.ps;
http://www.netlib.org/lapack/lawnspdf/lawn118.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:1997:CSV,
author = "James Demmel and Ming Gu and Stanley Eisenstat and
Ivan Slapni{\v{c}}ar and Kre{\v{s}}imir Veseli{\'c} and
Zlatko Drma{\v{c}}",
title = "Computing the Singular Value Decomposition with High
Relative Accuracy",
type = "LAPACK Working Note",
number = "119",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = feb,
year = "1997",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-97-348, February 1997. Published in
\cite{Demmel:1999:CSV}.",
URL = "http://www.netlib.org/lapack/lawns/lawn119.ps;
http://www.netlib.org/lapack/lawnspdf/lawn119.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Desprez:1997:SBC,
author = "F. Desprez and J. Dongarra and A. Petitet and C.
Randriamaro and Y. Robert",
title = "Scheduling Block-Cyclic Array Redistribution",
type = "LAPACK Working Note",
number = "120",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = feb,
year = "1997",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-97-349, February 1997. Published in
\cite{Desprez:1998:SBA,Desprez:1998:SBC}.",
URL = "http://www.netlib.org/lapack/lawns/lawn120.ps;
http://www.netlib.org/lapack/lawnspdf/lawn120.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Henry:1997:PIN,
author = "G. Henry and D. Watkins and J. Dongarra",
title = "A Parallel Implementation of the Nonsymmetric {$ Q R
$} Algorithm for Distributed Memory Architectures",
type = "LAPACK Working Note",
number = "121",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = mar,
year = "1997",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-97-352, March 1997. Published in
\cite{Henry:2002:PIN}.",
URL = "http://www.netlib.org/lapack/lawns/lawn121.ps;
http://www.netlib.org/lapack/lawnspdf/lawn121.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Ahues:1997:NDC,
author = "M. Ahues and F. Tisseur",
title = "A New Deflation Criterion for the {$ Q R $}
Algorithm",
type = "LAPACK Working Note",
number = "122",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = mar,
year = "1997",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-97-353, March 1997.",
URL = "http://www.netlib.org/lapack/lawns/lawn122.ps;
http://www.netlib.org/lapack/lawnspdf/lawn122.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Bai:1997:TMC,
author = "Z. Bai and D. Day and J. Demmel and J. Dongarra",
title = "A Test Matrix Collection for Non-{Hermitian}
Eigenvalue Problems",
type = "LAPACK Working Note",
number = "123",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = mar,
year = "1997",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-97-355, March 1997.",
URL = "http://www.netlib.org/lapack/lawns/lawn123.ps;
http://www.netlib.org/lapack/lawnspdf/lawn123.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:1997:APS,
author = "J. Demmel and J. Gilbert and X. Li",
title = "An Asynchronous Parallel Supernodal Algorithm for
Sparse {Gaussian} Elimination",
type = "LAPACK Working Note",
number = "124",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = apr,
year = "1997",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-97-357, April 1997. Published in
\cite{Demmel:1999:APS}.",
URL = "http://www.netlib.org/lapack/lawns/lawn124.ps;
http://www.netlib.org/lapack/lawnspdf/lawn124.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Cleary:1997:ISD,
author = "A. Cleary and J. Dongarra",
title = "Implementation in {ScaLAPACK} of Divide-and-Conquer
Algorithms for Banded and Tridiagonal Linear Systems",
type = "LAPACK Working Note",
number = "125",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = apr,
year = "1997",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-97-358, April 1997.",
URL = "http://www.netlib.org/lapack/lawns/lawn125.ps;
http://www.netlib.org/lapack/lawnspdf/lawn125.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Anderson:1997:PIL,
author = "E. Anderson and M. Fahey",
title = "Performance Improvements to {LAPACK} for the {Cray
Scientific Library}",
type = "LAPACK Working Note",
number = "126",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = apr,
year = "1997",
bibdate = "Fri Apr 22 17:06:37 2005 UT-CS-97-359, April 1997.",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawns/lawn126.ps;
http://www.netlib.org/lapack/lawnspdf/lawn126.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Li:1997:SGE,
author = "X. Li",
title = "Sparse {Gaussian} Elimination on High Performance
Computers",
type = "LAPACK Working Note",
number = "127",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jun,
year = "1997",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-97-368, June 1997.",
URL = "http://www.netlib.org/lapack/lawns/lawn127.ps;
http://www.netlib.org/lapack/lawnspdf/lawn127.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Petitet:1997:ARM,
author = "A. Petitet",
title = "Algorithmic Redistribution Methods for Block Cyclic
Decompositions",
type = "LAPACK Working Note",
number = "128",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jul,
year = "1997",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-97-371, July 1997. Published in
\cite{Petitet:1999:ARM}.",
URL = "http://www.netlib.org/lapack/lawns/lawn128.ps;
http://www.netlib.org/lapack/lawnspdf/lawn128.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Choi:1997:NPM,
author = "J. Choi",
title = "A New Parallel Matrix Multiplication Algorithm on
Distributed-Memory Concurrent Computers",
type = "LAPACK Working Note",
number = "129",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = sep,
year = "1997",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-97-369, September 1997. Published in
\cite{Choi:1998:NPM}.",
URL = "http://www.netlib.org/lapack/lawns/lawn129.ps;
http://www.netlib.org/lapack/lawnspdf/lawn129.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:1997:ASS,
author = "J. Demmel",
title = "Accurate {SVDs} of Structured Matrices",
type = "LAPACK Working Note",
number = "130",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "1997",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-97-375, October 1997.",
URL = "http://www.netlib.org/lapack/lawns/lawn130.ps;
http://www.netlib.org/lapack/lawnspdf/lawn130.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Whaley:1997:ATL,
author = "R. Whaley and J. Dongarra",
title = "Automatically Tuned Linear Algebra Software",
type = "LAPACK Working Note",
number = "131",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = dec,
year = "1997",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-97-366, December 1997. Published in
\cite{Whaley:1998:ATL}.",
URL = "http://www.netlib.org/lapack/lawns/lawn131.ps;
http://www.netlib.org/lapack/lawnspdf/lawn131.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Tisseur:1998:PDC,
author = "F. Tisseur and J. Dongarra",
title = "Parallelizing the Divide and Conquer Algorithm for the
Symmetric Tridiagonal Eigenvalue Problem on Distributed
Memory Architectures",
type = "LAPACK Working Note",
number = "132",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = mar,
year = "1998",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-98-382, March 1998.",
URL = "http://www.netlib.org/lapack/lawns/lawn132.ps;
http://www.netlib.org/lapack/lawnspdf/lawn132.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Petitet:1998:ARM,
author = "A. Petitet and J. Dongarra",
title = "Algorithmic Redistribution Methods for Block Cyclic
Distributions",
type = "LAPACK Working Note",
number = "133",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = mar,
year = "1998",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-98-383, March 1998. Published in
\cite{Petitet:1999:ARM}.",
URL = "http://www.netlib.org/lapack/lawns/lawn133.ps;
http://www.netlib.org/lapack/lawnspdf/lawn133.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Wasniewski:1998:HPL,
author = "J. Wa{\'s}niewski and J. Dongarra",
title = "High Performance Linear Algebra Package ---
{LAPACK90}",
type = "LAPACK Working Note",
number = "134",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = apr,
year = "1998",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-98-384, April 1998. Published in
\cite{Dongarra:1998:HPL}.",
URL = "http://www.netlib.org/lapack/lawns/lawn134.ps;
http://www.netlib.org/lapack/lawnspdf/lawn134.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{DAzevedo:1998:PSE,
author = "E. D'Azevedo and J. Dongarra",
title = "Packed Storage Extensions for {ScaLAPACK}",
type = "LAPACK Working Note",
number = "135",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = apr,
year = "1998",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-98-385, April 1998.",
URL = "http://www.netlib.org/lapack/lawns/lawn135.ps;
http://www.netlib.org/lapack/lawnspdf/lawn135.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Blackford:1998:SEP,
author = "L. S. Blackford and R. C. Whaley",
title = "{ScaLAPACK} Evaluation and Performance at the {DoD}
{MSRCs}",
type = "LAPACK Working Note",
number = "136",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = apr,
year = "1998",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-98-388, April 1998.",
URL = "http://www.netlib.org/lapack/lawns/lawn136.ps;
http://www.netlib.org/lapack/lawnspdf/lawn136.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Blackford:1998:IGD,
author = "L. S. Blackford and J. J. Dongarra and C. A.
Papadopoulos and R. C. Whaley",
title = "Installation Guide and Design of the {HPF 1.1}
interface to {ScaLAPACK}, {SLHPF}",
type = "LAPACK Working Note",
number = "137",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "1998",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-98-396, August 1998.",
URL = "http://www.netlib.org/lapack/lawns/lawn137.ps;
http://www.netlib.org/lapack/lawnspdf/lawn137.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:1998:TSL,
author = "J. Dongarra and W. Owczarz and J. Wa{\'s}niewski and
P. Yalamov",
title = "Testing Software for {LAPACK90}",
type = "LAPACK Working Note",
number = "138",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = sep,
year = "1998",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-98-401, Sept 1998.",
URL = "http://www.netlib.org/lapack/lawns/lawn138.ps;
http://www.netlib.org/lapack/lawnspdf/lawn138.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Petitet:1998:NLA,
author = "A. Petitet and H. Casanova and J. Dongarra and Y.
Robert and R. C. Whaley",
title = "A Numerical Linear Algebra Problem Solving Environment
Designer's Perspective",
type = "LAPACK Working Note",
number = "139",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "1998",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-98-405, Oct 1998. Published in
\cite{Petitet:1999:NLA,Petitet:2000:PDS}.",
URL = "http://www.netlib.org/lapack/lawns/lawn139.ps;
http://www.netlib.org/lapack/lawnspdf/lawn139.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Casanova:1998:NVD,
author = "H. Casanova and J. Dongarra",
title = "{NetSolve version 1.2}: Design and Implementation",
type = "LAPACK Working Note",
number = "140",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = nov,
year = "1998",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-98-406, Nov 1998.",
URL = "http://www.netlib.org/lapack/lawns/lawn140.ps;
http://www.netlib.org/lapack/lawnspdf/lawn140.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Eijkhout:1998:OIL,
author = "Victor Eijkhout",
title = "Overview of Iterative Linear System Solver Packages",
type = "LAPACK Working Note",
number = "141",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = dec,
year = "1998",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-98-411, Dec 1998.",
URL = "http://www.netlib.org/lapack/lawns/lawn141.ps;
http://www.netlib.org/lapack/lawnspdf/lawn141.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Arbenz:1999:CPSa,
author = "P. Arbenz and A. Cleary and J. Dongarra and M.
Hegland",
title = "A Comparison of Parallel Solvers for Diagonally
Dominant and General Narrow-Banded Linear Systems",
type = "LAPACK Working Note",
number = "142",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = feb,
year = "1999",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-99-414, Feb 1999. Published in
\cite{Arbenz:1999:CPSc}.",
URL = "http://www.netlib.org/lapack/lawns/lawn142.ps;
http://www.netlib.org/lapack/lawnspdf/lawn142.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Arbenz:1999:CPSb,
author = "P. Arbenz and A. Cleary and J. Dongarra and M.
Hegland",
title = "A Comparison of Parallel Solvers for Diagonally
Dominant and General Narrow-Banded Linear Systems
{II}",
type = "LAPACK Working Note",
number = "143",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "1999",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-99-415, May 1999.",
URL = "http://www.netlib.org/lapack/lawns/lawn143.ps;
http://www.netlib.org/lapack/lawnspdf/lawn143.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Eijkhout:1999:EPI,
author = "V. Eijkhout",
title = "On the Existence Problem of Incomplete Factorisation
Methods",
type = "LAPACK Working Note",
number = "144",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = dec,
year = "1999",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-99-435, Dec 1999.",
URL = "http://www.netlib.org/lapack/lawns/lawn144.ps;
http://www.netlib.org/lapack/lawnspdf/lawn144.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Eijkhout:1999:WMI,
author = "V. Eijkhout",
title = "The `weighted modification' incomplete factorisation
method",
type = "LAPACK Working Note",
number = "145",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = dec,
year = "1999",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-99-436, Dec 1999.",
URL = "http://www.netlib.org/lapack/lawns/lawn145.ps;
http://www.netlib.org/lapack/lawnspdf/lawn145.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Andersen:2000:RFC,
author = "B. Andersen and F. Gustavson and J. Wa{\'s}niewski",
title = "A recursive formulation of {Cholesky} factorization of
a matrix in packed storage",
type = "LAPACK Working Note",
number = "146",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "2000",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-00-441, May 2000. Published in
\cite{Andersen:2001:RFC}.",
URL = "http://www.netlib.org/lapack/lawns/lawn146.ps;
http://www.netlib.org/lapack/lawnspdf/lawn146.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Whaley:2000:AEO,
author = "R. C. Whaley and A. Petitet and J. Dongarra",
title = "Automated Empirical Optimization of Software and the
{ATLAS Project}",
type = "LAPACK Working Note",
number = "147",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = sep,
year = "2000",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-00-448, September 2000. Published in
\cite{Whaley:2001:AEO}.",
URL = "http://www.netlib.org/lapack/lawns/lawn147.ps;
http://www.netlib.org/lapack/lawnspdf/lawn147.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Bindel:2000:CGR,
author = "D. Bindel and J. Demmel and W. Kahan and O. Marques",
title = "On Computing {Givens} rotations reliably and
efficiently",
type = "LAPACK Working Note",
number = "148",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "2000",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-00-449, October 2000. Published in
\cite{Bindel:2002:CGR}.",
URL = "http://www.netlib.org/lapack/lawns/lawn148.ps;
http://www.netlib.org/lapack/lawnspdf/lawn148.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Li:2000:DIT,
author = "X. Li and J. Demmel and D. Bailey and G. Henry and Y.
Hida and J. Iskandar and W. Kahan and A. Kapur and M.
Martin and T. Tung and D. J. Yoo",
title = "Design, Implementation and Testing of Extended and
Mixed Precision {BLAS}",
type = "LAPACK Working Note",
number = "149",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "2000",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-00-451, October 2000. Published in
\cite{Li:2002:DIT}.",
URL = "http://www.netlib.org/lapack/lawns/lawn149.ps;
http://www.netlib.org/lapack/lawnspdf/lawn149.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Anderson:2000:DPR,
author = "E. Anderson",
title = "Discontinuous Plane Rotations and the Symmetric
Eigenvalue Problem",
type = "LAPACK Working Note",
number = "150",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = dec,
year = "2000",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-00-454, December 2000.",
URL = "http://www.netlib.org/lapack/lawns/lawn150.ps;
http://www.netlib.org/lapack/lawnspdf/lawn150.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Eijkhout:2001:ADM,
author = "V. Eijkhout",
title = "Automatic Determination of Matrix-Blocks",
type = "LAPACK Working Note",
number = "151",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = apr,
year = "2001",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-01-458, April 2001.",
URL = "http://www.netlib.org/lapack/lawns/lawn151.ps;
http://www.netlib.org/lapack/lawnspdf/lawn151.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Cheng:2001:ILB,
author = "S. Cheng and N. Higham",
title = "Implementation for {LAPACK} of a Block Algorithm for
Matrix $1$-Norm Estimation",
type = "LAPACK Working Note",
number = "152",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "2001",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-01-470, August 2001.",
URL = "http://www.netlib.org/lapack/lawns/lawn152.ps;
http://www.netlib.org/lapack/lawnspdf/lawn152.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Fahey:2001:NCP,
author = "M. Fahey",
title = "New Complex Parallel Eigenvalue and Eigenvector
Routines",
type = "LAPACK Working Note",
number = "153",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "2001",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-01-471, August 2001.",
URL = "http://www.netlib.org/lapack/lawns/lawn153.ps;
http://www.netlib.org/lapack/lawnspdf/lawn153.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dhillon:2002:OER,
author = "Inderjit S. Dhillon and Beresford N. Parlett",
title = "Orthogonal Eigenvectors and Relative Gaps",
type = "LAPACK Working Note",
number = "154",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "2002",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-02-474, August 2002 Published in
\cite{Dhillon:2004:OER}.",
URL = "http://www.netlib.org/lapack/lawns/lawn154.ps;
http://www.netlib.org/lapack/lawnspdf/lawn154.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Parlett:2002:IDA,
author = "Beresford N. Parlett and Osni A. Marques",
title = "An implementation of the $ d q d s $ algorithm
positive case",
type = "LAPACK Working Note",
number = "155",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "2002",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "LBNL-43726, UT-CS-02-475, August 2002. Published in
\cite{Parlett:2000:IAP}.",
URL = "http://www.netlib.org/lapack/lawns/lawn155.ps;
http://www.netlib.org/lapack/lawnspdf/lawn155.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Eijkhout:2002:PAO,
author = "Victor Eijkhout",
title = "Polynomial acceleration of optimised multi-grid
smoothers basic theory",
type = "LAPACK Working Note",
number = "156",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "2002",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-02-477, August 2002",
URL = "http://www.netlib.org/lapack/lawns/lawn156.ps;
http://www.netlib.org/lapack/lawnspdf/lawn156.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:2002:SAN,
author = "Jack Dongarra and Victor Eijkhout",
title = "Self-adapting Numerical Software for Next Generation
Applications",
type = "LAPACK Working Note",
number = "157",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "2002",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-02-484, August 2002 Published in
\cite{Dongarra:2003:SANb}.",
URL = "http://www.netlib.org/lapack/lawns/lawn157.ps;
http://www.netlib.org/lapack/lawnspdf/lawn157.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Anderson:2002:LFE,
author = "Edward Anderson",
title = "{LAPACK3E} --- {A} {Fortran 90}-enhanced version of
{LAPACK}",
type = "LAPACK Working Note",
number = "158",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = dec,
year = "2002",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-02-497, December 2002",
URL = "http://www.netlib.org/lapack/lawns/lawn158.ps;
http://www.netlib.org/lapack/lawnspdf/lawn158.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:2003:FCA,
author = "Jack Dongarra and Victor Eijkhout",
title = "Finite-choice algorithm optimization in {Conjugate
Gradients}",
type = "LAPACK Working Note",
number = "159",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jan,
year = "2003",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-03-502, January 2003",
URL = "http://www.netlib.org/lapack/lawns/lawn159.ps;
http://www.netlib.org/lapack/lawnspdf/lawn159.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Chen:2003:SAS,
author = "Zizhong Chen and Jack Dongarra and Piotr Luszczek and
Kenneth Roche",
title = "Self Adapting Software for Numerical Linear Algebra
and {LAPACK} for Clusters",
type = "LAPACK Working Note",
number = "160",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jan,
year = "2003",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-03-499, January 2003",
URL = "http://www.netlib.org/lapack/lawns/lawn160.ps;
http://www.netlib.org/lapack/lawnspdf/lawn160.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Lucas:2003:LSC,
author = "Craig Lucas",
title = "{LAPack}-Style Codes for Level 2 and 3 Pivoted
{Cholesky} Factorizations",
type = "LAPACK Working Note",
number = "161",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jan,
year = "2003",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-04-522, February 2004",
URL = "http://www.netlib.org/lapack/lawns/lawn161.ps;
http://www.netlib.org/lapack/lawnspdf/lawn161.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dhillon:2004:DIM,
author = "Inderjit S. Dhillon and Beresford N. Parlett and
Christof V{\"o}mel",
title = "The Design and Implementation of the {MRRR}
Algorithm",
type = "LAPACK Working Note",
number = "162",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = feb,
year = "2004",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-04-541, December, 2004.",
URL = "http://www.netlib.org/lapack/lawns/lawn162.ps;
http://www.netlib.org/lapack/lawnspdf/lawn162.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Parlett:2004:HMA,
author = "Beresford N. Parlett and Christof V{\"o}mel",
title = "How the {MRRR} Algorithm Can Fail on Tight Eigenvalue
Clusters",
type = "LAPACK Working Note",
number = "163",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
pages = "15",
month = dec,
year = "2004",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-04-542, December, 2004.",
URL = "http://www.eecs.berkeley.edu/Pubs/TechRpts/2004/CSD-04-1367.pdf;
http://www.netlib.org/lapack/lawns/lawn163.ps;
http://www.netlib.org/lapack/lawnspdf/lawn163.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:2005:LPR,
author = "Jim Demmel and Jack Dongarra",
title = "{LAPACK 2005} Prospectus: Reliable and Scalable
Software for Linear Algebra Computations on High End
Computers",
type = "LAPACK Working Note",
number = "164",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = feb,
year = "2005",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-05-546, February 2005.",
URL = "http://www.netlib.org/lapack/lawns/lawn164.ps;
http://www.netlib.org/lapack/lawnspdf/lawn164.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:2005:EBE,
author = "James Demmel and Yozo Hida and W. Kahan and Xiaoye S.
Li and Soni Mukherjee and E. Jason Riedy",
title = "Error Bounds from Extra Precise Iterative Refinement",
type = "LAPACK Working Note",
number = "165",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = feb,
year = "2005",
bibdate = "Fri Apr 22 17:06:37 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-05-547, February 2005.",
URL = "http://www.netlib.org/lapack/lawns/lawn165.ps;
http://www.netlib.org/lapack/lawnspdf/lawn165.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Willems:2005:CBS,
author = "Paul R. Willems and Bruno Lang and Christof
V{\"o}mel",
title = "Computing the Bidiagonal {SVD} Using Multiple
Relatively Robust Representations",
type = "LAPACK Working Note",
number = "166",
institution = "Computer Science Division, University of California,
Berkeley",
address = "Berkeley, CA, USA",
pages = "20",
day = "29",
month = aug,
year = "2005",
MRclass = "15A18, 65-04, 65F15",
bibdate = "Mon Mar 20 12:30:00 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "Also issued as Technical Report Technical Report
UCB//CSD-05-1376",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn166.pdf",
abstract = "We describe the design and implementation of a new
algorithm for computing the singular value
decomposition of a real bidiagonal matrix. This
algorithm uses ideas developed by Gro{\ss}er and Lang
that extend Parlett's and Dhillon's MRRR algorithm for
the tridiagonal symmetric eigenproblem. One key feature
of our new implementation is, that $k$ singular
triplets can be computed using only {$ O(n k) $}
storage units and floating point operations, where $n$
is the dimension of the matrix. The algorithm will be
made available as routine xBDSCR in the upcoming new
release of the LAPACK library.",
acknowledgement = ack-nhfb,
keywords = "Bidiagonal Singular Value Decomposition; Coupling
Relations; LAPACK library; MRRR algorithm; Tridiagonal
Symmetric Eigenproblem",
}
@TechReport{Marques:2005:SCM,
author = "Osni A. Marques and Beresford N. Parlett and Christof
V{\"o}mel",
title = "Subset Computations with the {MRRR} Algorithm",
type = "LAPACK Working Note",
number = "167",
institution = "Computer Science Division, University of California,
Berkeley",
address = "Berkeley, CA, USA",
pages = "9",
day = "26",
month = sep,
year = "2005",
bibdate = "Mon Mar 20 12:30:00 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "Also issued as Technical Report UCB//CSD-05-1392",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn167.pdf",
abstract = "The main advantage of inverse iteration over the QR
algorithm and Divide \& Conquer for the symmetric
tridiagonal eigenproblem is that subsets of eigenpairs
can be computed at reduced cost.
The MRRR algorithm (MRRR = Multiple Relatively Robust
Representations) is a clever variant of inverse
iteration without the need for reorthogonalization.
{\tt stegr}, the current version of MRRR in LAPACK 3.0,
does not allow for subset computations. The next
release of {\tt stegr} is designed to compute a
(sub-)set of $k$ eigenpairs with {$ O(k n) $}
operations.
Because of the special way in which eigenvectors are
computed, MRRR subset computations are more complicated
than when using inverse iteration. Unlike the latter,
MRRR sometimes cannot ignore the unwanted part of the
spectrum.
We describe the problems with what we call false
singletons. These are eigenvalues that appear to be
isolated with respect to the wanted eigenvalues but in
fact belong to a tight cluster of unwanted eigenvalues.
This paper analyzes these complications and ways to
deal with them.",
acknowledgement = ack-nhfb,
keywords = "false singleton; Multiple relatively robust
representations; numerically orthogonal eigenvectors;
subset computation; symmetric tridiagonal matrix",
}
@TechReport{Antonelli:2005:PSP,
author = "Dominic Antonelli and Christof V{\"o}mel",
title = "{PDSYEVR}. {ScaLAPACK}'s Parallel {MRRR} Algorithm for
the Symmetric Eigenvalue Problem",
type = "LAPACK Working Note",
number = "168",
institution = "Computer Science Division, University of California,
Berkeley",
address = "Berkeley, CA, USA",
pages = "18",
day = "29",
month = aug,
year = "2005",
MRclass = "65F15, 65Y15.",
bibdate = "Mon Mar 20 12:30:00 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "Also issued as Technical Report UCB//CSD-05-1399.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn168.pdf",
abstract = "In the 90s, Dhillon and Parlett devised a new
algorithm (Multiple Relatively Robust Representations,
MRRR) for computing numerically orthogonal eigenvectors
of a symmetric tridiagonal matrix {$T$} with {$ O(n^2)
$} cost. In this paper, we describe the design of
PDSYEVR, a ScaLAPACK implementation of the MRRR
algorithm to compute the eigenpairs in parallel. It
represents a substantial improvement over the symmetric
eigensolver PDSYEVX that is currently in ScaLAPACK and
is going to be part of the next ScaLAPACK release.",
acknowledgement = ack-nhfb,
keywords = "design; implementation; Multiple relatively robust
representations; numerical software; parallel
computation; ScaLAPACK; symmetric eigenvalue problem",
}
@TechReport{Drmac:2005:NFA,
author = "Zlatko Drma{\v{c}} and Kre{\v{s}}imir Veseli{\'c}",
title = "New Fast and Accurate {Jacobi} {SVD} Algorithm: {I}",
type = "LAPACK Working Note",
number = "169",
institution = "Department of Mathematics, University of Zagreb",
address = "Bijeni{\v{c}}ka 30, 10000 Zagreb, Croatia.",
pages = "39",
day = "30",
month = aug,
year = "2005",
MRclass = "15A09, 15A12, 15A18, 15A23, 65F15, 65F22, 65F35",
bibdate = "Mon Mar 20 12:30:00 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn169.pdf",
abstract = "This paper is the result of contrived efforts to break
the barrier between numerical accuracy and run time
efficiency in computing the fundamental decomposition
of numerical linear algebra the singular value
decomposition (SVD) of a general dense matrix. It is an
unfortunate fact that the numerically most accurate one
sided Jacobi SVD algorithm is several times slower than
generally less accurate bidiagonalization based methods
such as the QR or the divide and conquer algorithm.
Despite its sound numerical qualities, the Jacobi SVD
is not included in the state of the art matrix
computation libraries and it is even considered
obsolete by some leading researches. Our quest for a
highly accurate and efficient SVD algorithm has led us
to a new, superior variant of the Jacobi algorithm. The
new algorithm has inherited all good high accuracy
properties, and it outperforms not only the best
implementations of the one sided Jacobi algorithm but
also the QR algorithm. Moreover, it seems that the
potential of the new approach is yet to be fully
exploited.",
acknowledgement = ack-nhfb,
keywords = "eigenvalues; Jacobi method; singular value
decomposition",
}
@TechReport{Drmac:2005:NFAb,
author = "Zlatko Drma{\v{c}} and Kre{\v{s}}imir Veseli{\'c}",
title = "New Fast and Accurate {Jacobi} {SVD} Algorithm: {II}",
type = "LAPACK Working Note",
number = "170",
institution = "Department of Mathematics, University of Zagreb",
address = "Bijeni{\v{c}}ka 30, 10000 Zagreb, Croatia.",
pages = "25",
day = "30",
month = aug,
year = "2005",
bibdate = "Mon Mar 20 12:30:00 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn170.pdf",
abstract = "This paper presents new implementation of one sided
Jacobi SVD for triangular matrices and its use as the
core routine in a new preconditioned Jacobi SVD
algorithm, recently proposed by the authors. New pivot
strategy exploits the triangular form and uses the fact
that the input triangular matrix is the result of rank
revealing QR factorization. If used in the
preconditioned Jacobi SVD algorithm, described in the
first part of this report, it delivers superior
performance leading to the currently fastest method for
computing SVD decomposition with high relative
accuracy. Furthermore, the efficiency of the new
algorithm is comparable to the less accurate
bidiagonalization based methods. The paper also
discusses underflow issues in floating point
implementation, and shows how to use perturbation
theory to fix the imperfectness of machine arithmetic
on some systems.",
acknowledgement = ack-nhfb,
keywords = "eigenvalues; Jacobi method; singular value
decomposition; underflow",
}
@TechReport{Kressner:2006:BAR,
author = "Daniel Kressner",
title = "Block Algorithms for Reordering Standard and
Generalized {Schur} Forms",
type = "LAPACK Working Note",
number = "171",
institution = "Department of Mathematics, University of Zagreb",
address = "Bijeni{\v{c}}ka 30, 10000 Zagreb, Croatia.",
pages = "11",
day = "17",
month = feb,
year = "2006",
MRclass = "65F15, 65Y20.",
bibdate = "Mon Mar 20 12:30:00 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn171.pdf",
abstract = "Block algorithms for reordering a selected set of
eigenvalues in a standard or generalized Schur form are
proposed. Efficiency is achieved by delaying orthogonal
transformations and (optionally) making use of level 3
BLAS operations. Numerical experiments demonstrate that
existing algorithms, as currently implemented in
LAPACK, are outperformed by up to a factor of four.",
acknowledgement = ack-nhfb,
keywords = "deflating subspace; invariant subspace; reordering;
Schur form",
}
@TechReport{Marques:2005:BIF,
author = "Osni A. Marques and E. Jason Riedy and Christof
V{\"o}mel",
title = "Benefits of {IEEE-754} Features in Modern Symmetric
Tridiagonal Eigensolvers",
type = "LAPACK Working Note",
number = "172",
institution = "Computer Science Division, University of California,
Berkeley",
address = "Berkeley, CA, USA",
pages = "22",
day = "30",
month = sep,
year = "2005",
MRclass = "15A18, 15A23.",
bibdate = "Mon Mar 20 12:18:56 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "Also issued as Technical Report UCB//CSD-05-1414.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn172.pdf",
abstract = "Bisection is one of the most common methods used to
compute the eigenvalues of symmetric tridiagonal
matrices. Bisection relies on the Sturm count: for a
given shift $ \sigma $, the number of negative pivots
in the factorization {$ T \sigma I = L D L^T $} equals
the number of eigenvalues of {$T$} that are smaller
than $ \sigma $. In IEEE-754 arithmetic, the value $
\infty $ permits the computation to continue past a
zero pivot, producing a correct Sturm count when {$T$}
is unreduced. Demmel and Li showed in the 90s that
using $ \infty $ rather than testing for zero pivots
within the loop could improve performance significantly
on certain architectures.
When eigenvalues are to be computed to high relative
accuracy, it is often preferable to work with {$ L D
L^T $} factorizations instead of the original
tridiagonal {$T$}, see for example the MRRR algorithm.
In these cases, the Sturm count has to be computed from
{$ L D L^T $} . The differential stationary and
progressive qds algorithms are the methods of
choice.
While it seems trivial to replace {$T$} by {$ L D L^T
$}, in reality these algorithms are more complicated:
in IEEE-754 arithmetic, a zero pivot produces an
overflow, followed by an invalid exception (NaN), that
renders the Sturm count incorrect.
We present alternative, safe formulations that are
guaranteed to produce the correct result.
Benchmarking these algorithms on a variety of platforms
shows that the original formulation without tests is
always faster provided no exception occurs. The
transforms see speed-ups of up to $ 2.6 \times $ over
the careful formulations.
Tests on industrial matrices show that encountering
exceptions in practice is rare. This leads to the
following design: First, compute the Sturm count by the
fast but unsafe algorithm. Then, if an exception
occurred, recompute the count by a safe, slower
alternative. The new Sturm count algorithms improve the
speed of bisection by up to $ 2 \times $ on our test
matrices. Furthermore, unlike the traditional
tiny-pivot substitution, proper use of IEEE-754
features provides a careful formulation that imposes no
input range restrictions.",
acknowledgement = ack-nhfb,
keywords = "differential qds algorithms; IEEE-754 arithmetic;
IEEE-754 performance; LAPACK; MRRR algorithm; NaN
arithmetic",
}
@TechReport{Kaagstrom:2006:MVQ,
author = "Bo K{\aa}gstr{\"o}m and Daniel Kressner",
title = "Multishift Variants of the {$ Q Z $} Algorithm with
Aggressive Early Deflation",
type = "LAPACK Working Note",
number = "173",
institution = "Department of Computing Science, Ume{\aa} University",
address = "Ume{\aa}, Sweden",
pages = "42",
day = "20",
month = feb,
year = "2006",
MRclass = "65F15, 15A18, 15A22, 47A75",
bibdate = "Mon Mar 20 12:30:00 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "Also appeared as technical report UMINF-05.11",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn173.pdf",
abstract = "New variants of the QZ algorithm for solving the
generalized eigenvalue problem are proposed. An
extension of the small-bulge multishift QR algorithm is
developed, which chases chains of many small bulges
instead of only one bulge in each QZ iteration. This
allows the effective use of level 3 BLAS operations,
which in turn can provide efficient utilization of high
performance computing systems with deep memory
hierarchies. Moreover, an extension of the aggressive
early deflation strategy is proposed, which can
identify and deflate converged eigenvalues long before
classic deflation strategies would. Consequently, the
number of overall QZ iterations needed until
convergence is considerably reduced. As a third
ingredient, we reconsider the deflation of infinite
eigenvalues and present a new deflation algorithm,
which is particularly effective in the presence of a
large number of infinite eigenvalues. Combining all
these developments, our implementation significantly
improves existing implementations of the QZ algorithm.
This is demonstrated by numerical experiments with
random matrix pairs as well as with matrix pairs
arising from various applications.",
acknowledgement = ack-nhfb,
keywords = "aggressive early deflation; blocked algorithms;
Generalized eigenvalue problem; generalized Schur form;
multishifts; QZ algorithm",
}
@TechReport{Howell:2005:CEB,
author = "G. W. Howell and J. W. Demmel and C. T. Fulton and S.
Hammarling and K. Marmol",
title = "Cache Efficient Bidiagonalization Using {BLAS 2.5}
Operators",
type = "LAPACK Working Note",
number = "174",
institution = "North Carolina State University; University of
California, Berkeley; Florida Institute of Technology;
Numerical Algorithms Group; Harris Corporation",
address = "Raleigh, NC 27697, USA; Berkeley, CA 94720, USA;
Melbourne, FL 32901, USA; Oxford, UK; Melbourne, FL
32901",
pages = "39",
day = "1",
month = nov,
year = "2005",
bibdate = "Mon Mar 20 12:30:00 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn174.pdf",
abstract = "In this paper we reorganize the sequence of operations
for Householder bidiagonalization of a general $ m
\times n $ matrix, so that two (\_GMEV) vector-matrix
multiplications can be done with one pass of the
unreduced trailing part of the matrix through cache.
Two new BLAS 2.5 operations approximately cut in half
the transfer of data from main memory to cache. We give
detailed algorithm descriptions and compare timings
with the current LAPACK bidiagonalization algorithm.",
acknowledgement = ack-nhfb,
}
@TechReport{Langou:2006:EPB,
author = "Julie Langou and Julien Langou and Piotr Luszczek and
Jakub Kurzak and Alfredo Buttari and Jack Dongarra",
title = "Exploiting the Performance of 32 bit Floating Point
Arithmetic in Obtaining 64 bit Accuracy (Revisiting
Iterative Refinement for Linear Systems)",
type = "LAPACK Working Note",
number = "175",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
pages = "17",
month = jun,
year = "2006",
bibdate = "Mon Oct 09 12:05:43 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn175.pdf;
http://www.netlib.org/lapack/lawnspdf/lawn175.ps",
abstract = "Recent versions of microprocessors exhibit performance
characteristics for 32 bit floating point arithmetic
(single precision) that is substantially higher than 64
bit floating point arithmetic (double precision).
Examples include the Intel's Pentium IV and M
processors, AMD's Opteron architectures and the IBM's
Cell Broad Engine processor. When working in single
precision, floating point operations can be performed
up to two times faster on the Pentium and up to ten
times faster on the Cell over double precision. The
performance enhancements in these architectures are
derived by accessing extensions to the basic
architecture, such as SSE2 in the case of the Pentium
and the vector functions on the IBM Cell. The
motivation for this paper is to exploit single
precision operations whenever possible and resort to
double precision at critical stages while attempting to
provide the full double precision results. The results
described here are fairly general and can be applied to
various problems in linear algebra such as solving
large sparse systems, using direct or iterative methods
and some eigenvalue problems. There are limitations to
the success of this process, such as when the
conditioning of the problem exceeds the reciprocal of
the accuracy of the single precision computations. In
that case the double precision algorithm should be
used.",
acknowledgement = ack-nhfb,
}
@TechReport{Drmac:2006:FRR,
author = "Zlatko Drma{\v{c}} and Zvonimir Bujanovi{\'c}",
title = "On the failure of rank revealing {$ Q R $}
factorization software --- a case study",
type = "LAPACK Working Note",
number = "176",
institution = "Department of Mathematics, University of Zagreb",
address = "Bijeni{\v{c}}ka 30, 10000 Zagreb, Croatia",
pages = "27",
day = "2",
month = jun,
year = "2006",
bibdate = "Mon Oct 09 12:05:43 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn176.pdf;
http://www.netlib.org/lapack/lawnspdf/lawn176.ps",
abstract = "This note reports an unexpected and rather erratic
behavior of the LAPACK software implementation of the
QR factorization with Businger--Golub column pivoting.
It is shown that, due to finite precision arithmetic,
software implementation of the factorization can
catastrophically fail to produce triangular factor with
the structure characteristic to the Businger--Golub
pivot strategy. The failure of current state of the art
software, and a proposed alternative implementations
are analyzed in detail.",
acknowledgement = ack-nhfb,
}
@TechReport{Kurzak:2006:IMP,
author = "Jakub Kurzak and Jack Dongarra",
title = "Implementation of the Mixed-Precision High Performance
{LINPACK} Benchmark on the {CELL Processor}",
type = "LAPACK Working Note",
number = "177",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
pages = "12",
month = sep,
year = "2006",
bibdate = "Mon Oct 09 12:05:43 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "Also available as UT-CS-06-580.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn177.pdf;
http://www.netlib.org/lapack/lawnspdf/lawn177.ps",
abstract = "This paper describes the design concepts behind
implementations of mixed-precision linear algebra
routines targeted for the Cell processor. It describes
in detail the implementation of code to solve linear
system of equations using Gaussian elimination in
single precision with iterative refinement of the
solution to the full double precision accuracy. By
utilizing this approach the algorithm achieves close to
an order of magnitude higher performance on the Cell
processor than the performance offered by the standard
double precision algorithm. Effectively the code is an
implementation of the high performance LINPACK
benchmark, since it meets all the requirements
concerning the problem being solved and the numerical
properties of the solution.",
acknowledgement = ack-nhfb,
}
@TechReport{Kurzak:2006:ILA,
author = "Jakub Kurzak and Jack Dongarra",
title = "Implementing Linear Algebra Routines on Multi-Core
Processors with Pipelining and a Look Ahead",
type = "LAPACK Working Note",
number = "178",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
pages = "11",
month = sep,
year = "2006",
bibdate = "Mon Oct 09 12:05:43 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "Also available as UT-CS-06-581.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn178.pdf;
http://www.netlib.org/lapack/lawnspdf/lawn178.ps",
abstract = "Linear algebra algorithms commonly encapsulate
parallelism in Basic Linear Algebra Subroutines (BLAS).
This solution relies on the fork-join model of parallel
execution, which may result in suboptimal performance
on current and future generations of multi-core
processors. To overcome the shortcomings of this
approach a pipelined model of parallel execution is
presented, and the idea of the look ahead is utilized
in order to suppress the negative effects of sequential
formulation of the algorithms. Application to one-sided
matrix factorizations, LU, Cholesky and QR, is
described. Shared memory implementation using POSIX
threads is presented.",
acknowledgement = ack-nhfb,
keywords = "linear algebra; look ahead; multi-core processors;
pipelining",
}
@TechReport{Baboulin:2006:PTS,
author = "Marc Baboulin and Luc Giraud and Serge Gratton and
Julien Langou",
title = "Parallel tools for solving incremental dense least
squares problems. Application to space geodesy",
type = "LAPACK Working Note",
number = "179",
institution = "CERFACS",
address = "42 avenue Gaspard Coriolis, 31057 Toulouse Cedex,
France",
month = sep,
year = "2006",
bibdate = "Mon Oct 09 12:05:43 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "Also available as UT-CS-06-582.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn179.pdf;
http://www.netlib.org/lapack/lawnspdf/lawn179.ps",
abstract = "We present a parallel distributed solver that enables
us to solve incremental dense least squares arising in
some parameter estimation problems. This solver is
based on ScaLAPACK [8] and PBLAS [9] kernel routines.
In the incremental process, the observations are
collected periodically and the solver updates the
solution with new observations using a QR factorization
algorithm. It uses a recently defined distributed
packed format [3] that handles symmetric or triangular
matrices in ScaLAPACK-based implementations. We provide
performance analysis on IBM pSeries 690. We also
present an example of application in the area of space
geodesy for gravity field computations with some
experimental results.",
acknowledgement = ack-nhfb,
keywords = "dense linear algebra; gravity field computation;
parallel distributed algorithms; QR factorization;
ScaLAPACK; scientific computing",
}
@TechReport{Buttari:2006:UMP,
author = "Alfredo Buttari and Jack J. Dongarra and Jakub Kurzak
and Piotr Luszczek and Stanimire Tomov",
title = "Using Mixed Precision for Sparse Matrix Computations
to Enhance the Performance while Achieving 64-bit
Accuracy",
type = "LAPACK Working Note",
number = "180",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "22",
month = oct,
year = "2006",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn180.pdf",
abstract = "By using a combination of 32-bit and 64-bit floating
point arithmetic the performance of many sparse linear
algebra algorithms can be significantly enhanced while
maintaining the 64-bit accuracy of the resulting
solution. These ideas can be applied to sparse
multifrontal and supernodal direct techniques, and
sparse iterative techniques such as Krylov subspace
methods. The approach presented here can apply not only
to conventional processors but also to exotic
technologies such as Field Programmable Gate Arrays
(FPGA), Graphical Processing Units (GPU), and the Cell
BE processor.",
acknowledgement = ack-nhfb,
note = "UT-CS-06-584",
}
@TechReport{Demmel:2007:PNL,
author = "James W. Demmel and Jack J. Dongarra and Beresford N.
Parlett and William Kahan and Ming Gu and David S.
Bindel and Yozo Hida and Xiaoye S. Li and Osni A.
Marques and E. Jason Riedy and Christof V{\"o}mel and
Julien Langou and Piotr Luszczek and Jakub Kurzak and
Alfredo Buttari and Julie Langou and Stanimire Tomov",
title = "Prospectus for the Next {LAPACK} and {ScaLAPACK}
Libraries",
type = "LAPACK Working Note",
number = "181",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "11",
month = mar,
year = "2007",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn181.pdf",
acknowledgement = ack-nhfb,
note = "UT-CS-07-592",
}
@TechReport{Demmel:2007:TIL,
author = "James W. Demmel and Osni A. Marques and Beresford N.
Parlett and Christof V{\"o}mel",
title = "A Testing Infrastructure for {LAPACK}'s Symmetric
Eigensolvers",
type = "LAPACK Working Note",
number = "182",
institution = inst-UCB-EECS,
address = inst-UCB-EECS:adr,
month = apr,
year = "2007",
MRclass = "15A18, 15A23",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn182.pdf",
abstract = "LAPACK is often mentioned as a positive example of a
software library that encapsulates complex, robust, and
widely used numerical algorithms for a wide range of
applications. At installation time, the user has the
option of running a (limited) number of test cases to
verify the integrity of the installation process. On
the algorithm developer's side, however, more
exhaustive tests are usually performed to study
algorithm behavior on a variety of problem settings and
also computer architectures. In this process, difficult
test cases need to be found that reflect particular
challenges of an application or push algorithms to
extreme behavior. These tests are then assembled into a
comprehensive collection, therefore making it possible
for any new or competing algorithm to be stressed in a
similar way. This note describes such an infrastructure
for exhaustively testing the symmetric tridiagonal
eigensolvers implemented in LAPACK. It consists of two
parts: a selection of carefully chosen test matrices
with particular idiosyncrasies and a portable testing
framework that allows easy testing and data processing.
The tester facilitates experiments with algorithmic
choices, parameter and threshold studies, and
performance comparisons on different architectures.",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:2007:PAL,
author = "James W. Demmel and Osni A. Marques and Beresford N.
Parlett and Christof V{\"o}mel",
title = "Performance and Accuracy of {LAPACK}'s Symmetric
Tridiagonal Eigensolvers",
type = "LAPACK Working Note",
number = "183",
institution = inst-UCB-EECS,
address = inst-UCB-EECS:adr,
month = apr,
year = "2007",
MRclass = "15A18, 15A23",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn183.pdf",
abstract = "We compare four algorithms from the latest LAPACK 3.1
release for computing eigenpairs of a symmetric
tridiagonal matrix. These include QR iteration,
bisection and inverse iteration (BI), the
Divide-and-Conquer method (DC), and the method of
Multiple Relatively Robust Representations
(MR).\par
Our evaluation considers speed and accuracy when
computing all eigenpairs, and additionally subset
computations. Using a variety of carefully selected
test problems, our study includes a variety of today's
computer architectures.\par
Our conclusions can be summarized as follows. (1) DC
and MR are generally much faster than QR and BI on
large matrices. (2) MR almost always does the fewest
floating point operations, but at a lower MFlop rate
than all the other algorithms. (3) The exact
performance of MR and DC strongly depends on the matrix
at hand. (4) DC and QR are the most accurate algorithms
with observed accuracy {$ O(\sqrt {n} \epsilon) $}. The
accuracy of BI and MR is generally {$ O(n \epsilon) $}.
(5) MR is preferable to BI for subset computations.",
acknowledgement = ack-nhfb,
}
@TechReport{Kurzak:2007:SSL,
author = "Jakub Kurzak and Alfredo Buttari and Jack J.
Dongarra",
title = "Solving Systems of Linear Equations on the {CELL}
Processor Using {Cholesky} Factorization",
type = "LAPACK Working Note",
number = "184",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "2007",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn184.pdf",
abstract = "The STI CELL processor introduces pioneering solutions
in processor architecture. At the same time it presents
new challenges for the development of numerical
algorithms. One is effective exploitation of the
differential between the speed of single and double
precision arithmetic; the other is efficient
parallelization between the short vector SIMD cores. In
this work, the first challenge is addressed by
utilizing a mixed-precision algorithm for the solution
of a dense symmetric positive definite system of linear
equations, which delivers double precision accuracy,
while performing the bulk of the work in single
precision. The second challenge is approached by
introducing much finer granularity of parallelization
than has been used for other architectures and using a
lightweight decentralized synchronization. The
implementation of the computationally intensive
sections gets within 90 percent of peak floating point
performance, while the implementation of the memory
intensive sections reaches within 90 percent of peak
memory bandwidth. On a single CELL processor, the
algorithm achieves over 170 Gflop/s when solving a
symmetric positive definite system of linear equation
in single precision and over 150 Gflop/s when
delivering the result in double precision accuracy.",
acknowledgement = ack-nhfb,
keywords = "CELL BE; Cholesky factorization; iterative refinement;
mixed-precision algorithms",
note = "UT-CS-07-596",
}
@TechReport{Buttari:2007:LPH,
author = "Alfredo Buttari and Jack J. Dongarra and Jakub
Kurzak",
title = "Limitations of the {PlayStation 3} for High
Performance Cluster Computing",
type = "LAPACK Working Note",
number = "185",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "2007",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn185.pdf",
acknowledgement = ack-nhfb,
note = "UT-CS-07-597",
}
@TechReport{Demmel:2007:FLAa,
author = "James W. Demmel and Ioana Dumitriu and Olga Holtz",
title = "Fast Linear Algebra is Stable",
type = "LAPACK Working Note",
number = "186",
institution = inst-UCB-EECS,
address = inst-UCB-EECS:adr,
day = "18",
month = may,
year = "2007",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "Published in \cite{Demmel:2007:FLAb}.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn186.pdf",
abstract = "In [23] we showed that a large class of fast recursive
matrix multiplication algorithms is stable in a
normwise sense, and that in fact if multiplication of
$n$-by-$n$ matrices can be done by any algorithm in {$
O(n^{\omega + \eta }) $} operations for any $ \eta > 0
$, then it can be done stably in {$ O(n^{\omega + \eta
}) $} operations for any $ \eta > 0 $. Here we extend
this result to show that essentially all standard
linear algebra operations, including LU decomposition,
QR decomposition, linear equation solving, matrix
inversion, solving least squares problems,
(generalized) eigenvalue problems and the singular
value decomposition can also be done stably (in a
normwise sense) in {$ O(n^{\omega + \eta }) $}
operations.",
acknowledgement = ack-nhfb,
}
@TechReport{Byers:2007:LXT,
author = "Ralph Byers",
title = "{LAPACK 3.1 xHSEQR}: Tuning and Implementation Notes
on the Small Bulge Multi-shift {$ Q R $} Algorithm with
Aggressive Early Deflation",
type = "LAPACK Working Note",
number = "187",
institution = "Department of Mathematics, University of Kansas",
address = "Lawrence, Kansas 66045, USA",
month = may,
year = "2007",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn187.pdf",
abstract = "This note documents implementation details of the
small bulge, multi-shift QR algorithm with aggressive
early deflation that appears as LAPACK version 3.1
programs CHSEQR, DHSEQR, SHSEQR and ZHSEQR and the
subroutines they call. These codes calculate
eigenvalues and optionally a Schur factorization of a
Hessenberg matrix. They do the bulk of the work
required to calculate eigenvalues and optionally eigen-
vectors of a general non-symmetric matrix. This report
is intended to provide some guidance for setting the
machine dependent tuning parameters, to help
maintainers to identify and correct problems, and to
help developers improve upon this implementation.",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:2007:EPI,
author = "James W. Demmel and Yozo Hida and Xiaoye S. Li and E.
Jason Riedy",
title = "Extra-precise Iterative Refinement for Overdetermined
Least Squares Problems",
type = "LAPACK Working Note",
number = "188",
institution = inst-UCB-EECS,
address = inst-UCB-EECS:adr,
day = "30",
month = may,
year = "2007",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "Published in \cite{Demmel:2009:EPI}.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn188.pdf",
abstract = "We present the algorithm, error bounds, and numerical
results for extra-precise iterative refinement applied
to overdetermined linear least squares (LLS) problems.
We apply our linear system refinement algorithm to
Bj{\"o}rck's augmented linear system formulation of an
LLS problem. Our algorithm reduces the forward normwise
and componentwise errors to {$ O(\epsilon) $} unless
the system is too ill conditioned. In contrast to
linear systems, we provide two separate error bounds
for the solution $x$ and the residual $r$. The
refinement algorithm requires only limited use of extra
precision and adds only {$ O(m n) $} work to the {$ O(m
n^2) $} cost of QR factorization for problems of size
$m$-by-$n$. The extra precision calculation is
facilitated by the new extended-precision BLAS standard
in a portable way, and the refinement algorithm will be
included in a future release of LAPACK and can be
extended to the other types of least squares
problems.",
acknowledgement = ack-nhfb,
}
@TechReport{Alvaro:2008:FSS,
author = "Wesley Alvaro and Jakub Kurzak and Jack J. Dongarra",
title = "Fast and Small Short Vector {SIMD} Matrix
Multiplication Kernels for the {CELL} Processor",
type = "LAPACK Working Note",
number = "189",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jan,
year = "2008",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn189.pdf",
abstract = "Matrix multiplication is one of the most common
numerical operations, especially in the area of dense
linear algebra, where it forms the core of many
important algorithms, including solvers of linear
systems of equations, least square problems, and
singular and eigenvalue computations. The STI CELL
processor exceeds the capabilities of any other
processor available today in terms of peak single
precision, floating point performance. In order to
fully exploit the potential of the CELL processor for a
wide range of numerical algorithms, fast implementation
of the matrix multiplication operation is essential.
The crucial component is the matrix multiplication
kernel crafted for the short vector Single Instruction
Multiple Data architecture of the Synergistic
Processing Element of the CELL processor. In this
paper, single precision matrix multiplication kernels
are presented implementing the {$ C = C - A \times B T
$} operation and the {$ C = C - A \times B $} operation
for matrices of size $ 64 \times 64 $ elements. For the
latter case, the performance of 25.55 Gflop/s is
reported, or 99.80 percent of the peak, using as little
as 5.9 KB of storage for code and auxiliary data
structures.",
acknowledgement = ack-nhfb,
keywords = "CELL BE; matrix multiplication; SGEMM; short vector
SIMD; SPE",
note = "UT-CS-08-609",
}
@TechReport{Buttari:2007:PTQ,
author = "Alfredo Buttari and Julien Langou and Jakub Kurzak and
Jack J. Dongarra",
title = "Parallel Tiled {$ Q R $} Factorization for Multicore
Architectures",
type = "LAPACK Working Note",
number = "190",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jul,
year = "2007",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-07-598. Published in \cite{Buttari:2008:PTF}.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn190.pdf",
abstract = "As multicore systems continue to gain ground in the
High Performance Computing world, linear algebra
algorithms have to be reformulated or new algorithms
have to be developed in order to take advantage of the
architectural features on these new processors. Fine
grain parallelism becomes a major requirement and
introduces the necessity of loose synchronization in
the parallel execution of an operation. This paper
presents an algorithm for the QR factorization where
the operations can be represented as a sequence of
small tasks that operate on square blocks of data.
These tasks can be dynamically scheduled for execution
based on the dependencies among them and on the
availability of computational resources. This may
result in an out of order execution of the tasks which
will completely hide the presence of intrinsically
sequential tasks in the factorization. Performance
comparisons are presented with the LAPACK algorithm for
QR factorization where parallelism can only be
exploited at the level of the BLAS operations.",
acknowledgement = ack-nhfb,
}
@TechReport{Buttari:2007:CPT,
author = "Alfredo Buttari and Julien Langou and Jakub Kurzak and
Jack J. Dongarra",
title = "A Class of Parallel Tiled Linear Algebra Algorithms
for Multicore Architectures",
type = "LAPACK Working Note",
number = "191",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = sep,
year = "2007",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn191.pdf",
abstract = "As multicore systems continue to gain ground in the
High Performance Computing world, linear algebra
algorithms have to be reformulated or new algorithms
have to be developed in order to take advantage of the
architectural features on these new processors. Fine
grain parallelism becomes a major requirement and
introduces the necessity of loose synchronization in
the parallel execution of an operation. This paper
presents an algorithm for the Cholesky, LU and QR
factorization where the operations can be represented
as a sequence of small tasks that operate on square
blocks of data. These tasks can be dynamically
scheduled for execution based on the dependencies among
them and on the availability of computational
resources. This may result in an out of order execution
of the tasks which will completely hide the presence of
intrinsically sequential tasks in the factorization.
Performance comparisons are presented with the LAPACK
algorithms where parallelism can only be exploited at
the level of the BLAS operations and vendor
implementations.",
acknowledgement = ack-nhfb,
note = "UT-CS-07-600",
}
@TechReport{Granat:2007:PER,
author = "Robert Granat and Bo K{\aa}gstr{\"o}m and Daniel
Kressner",
title = "Parallel eigenvalue reordering in real {Schur} forms",
type = "LAPACK Working Note",
number = "192",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = sep,
year = "2007",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn192.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Baboulin:2007:CCC,
author = "Marc Baboulin and Jack J. Dongarra and Serge Gratton
and Julien Langou",
title = "Computing the Conditioning of the Components of a
Linear Least Squares Solution",
type = "LAPACK Working Note",
number = "193",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = sep,
year = "2007",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn193.pdf",
abstract = "In this paper, we address the accuracy of the results
for the overdetermined full rank linear least squares
problem. We recall theoretical results obtained in [2]
on conditioning of the least squares solution and the
components of the solution when the matrix
perturbations are measured in Frobenius or spectral
norms. Then we define computable estimates for these
condition numbers and we interpret them in terms of
statistical quantities. In particular, we show that, in
the classical linear statistical model, the ratio of
the variance of one component of the solution by the
variance of the right-hand side is exactly the
condition number of this solution component when
perturbations on the right-hand side are considered. We
also provide fragment codes using LAPACK [1] routines
to compute the variance-covariance matrix and the least
squares conditioning and we give the corresponding
computational cost. Finally we present a small
historical numerical example that was used by Laplace
[19] for computing the mass of Jupiter and experiments
from the space industry with real physical data.",
acknowledgement = ack-nhfb,
keywords = "condition number; LAPACK; Linear least squares;
parameter estimation; ScaLAPACK; statistical linear
least squares; variance-covariance matrix",
note = "UT-CS-07-604",
}
@TechReport{Vomel:2007:RRT,
author = "Christof V{\"o}mel",
title = "A Refined Representation Tree for {MRRR}",
type = "LAPACK Working Note",
number = "194",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = nov,
year = "2007",
MRclass = "65F15, 65Y15",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn194.pdf",
abstract = "In order to compute orthogonal eigenvectors of a
symmetric tridiagonal matrix without Gram--Schmidt
orthogonalization, the MRRR algorithm finds a shifted
LDLT factorization (representation) for each eigenvalue
such that the local eigenvalue is a singleton, that is
defined to high relative accuracy and has a large
relative gap.\par
MRRR's representation tree describes how, by successive
shifting and refinement, each eigenvalue becomes
relatively isolated. Its shape plays a crucial role for
complexity: deeper trees are associated with more
eigenvalue refinement to resolve clustering of
eigenvalues.\par
Motivated by recently observed deteriorating complexity
of the LAPACK 3.1 MRRR kernels for certain matrices of
large dimension, we here re-examine and refine the
representation tree concept.\par
We first describe the discovery of what we call a
spectrum peeling problem: even though the matrix at
hand might not have a spectrum with clusters within
clusters, the representation tree might still contain a
long chain of large nodes.\par
We then formulate a refined proposal for the
representation tree that aims at avoiding the un-
warranted work while preserving tight accuracy bounds
where possible. The trade-off between performance and
accuracy in our solution is discussed by practical
examples.",
acknowledgement = ack-nhfb,
keywords = "complexity; LAPACK; MRRR; Multiple relatively robust
representations; representation tree; ScaLAPACK;
spectrum peeling",
}
@TechReport{Vomel:2007:SMA,
author = "Christof V{\"o}mel",
title = "{ScaLAPACK}'s {MRRR} Algorithm",
type = "LAPACK Working Note",
number = "195",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = nov,
year = "2007",
MRclass = "65F15, 65Y15",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn195.pdf",
abstract = "The sequential algorithm of Multiple Relatively Robust
Representations, MRRR, can compute numerically
orthogonal eigenvectors of an unreduced symmetric
tridiagonal matrix {$ T \subset R^{n \times n} $} with
{$ O(n^2) $} cost.\par
This paper describes the design of ScaLAPACK's parallel
MRRR algorithm. One emphasis is on the critical role of
the representation tree in achieving both numerical
accuracy and parallel scalability. A second point
concerns the favorable properties of this code: subset
computation, the use of static memory, and
scalability.\par
Unlike ScaLAPACK's Divide \& Conquer and QR, MRRR can
compute subsets of eigenpairs at reduced cost. And in
contrast to inverse iteration which can fail, it is
guaranteed to produce a numerically satisfactory answer
while maintaining memory scalability.\par
ParEig, the parallel MRRR algorithm for PLAPACK, uses
dynamic memory allocation. This is avoided by our code
at marginal additional cost. We also use a different
representation tree criterion that allows for more
accurate computation of the eigenvectors but can make
parallelization more difficult.",
acknowledgement = ack-nhfb,
keywords = "multiple relatively robust representations; numerical
software; ScaLAPACK; Symmetric eigenproblem",
}
@TechReport{Drmac:2007:GCP,
author = "Zlatko Drma{\v{c}}",
title = "A global convergence proof of cyclic {Jacobi} methods
with block rotations",
type = "LAPACK Working Note",
number = "196",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "10",
month = dec,
year = "2007",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn196.pdf",
abstract = "This paper introduces a globally convergent block
(column- and row-) cyclic Jacobi method for
diagonalization of Hermitian matrices and for
computation of the singular value decomposition of
general matrices. It is shown that a block rotation
(generalization of the Jacobi's 2 \times 2 rotation)
must be computed and implemented in a particular way to
guarantee global convergence. This solves a long
standing open problem of convergence of block cyclic
Jacobi methods. The proof includes the convergence of
the eigenspaces in the general case of multiple
eigenvalues.",
acknowledgement = ack-nhfb,
}
@TechReport{Volkov:2008:UGA,
author = "Vasily Volkov and James W. Demmel",
title = "Using {GPUs} to Accelerate the Bisection Algorithm for
Finding Eigenvalues of Symmetric Tridiagonal Matrices",
type = "LAPACK Working Note",
number = "197",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jan,
year = "2008",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn197.pdf",
abstract = "Graphical Processing Units (GPUs) potentially promise
widespread and inexpensive high performance
computation. However, architectural limitations (only
some operations and memory access patterns can be
performed quickly, partial support for IEEE floating
point arithmetic) make it necessary to change existing
algorithms to attain high performance and correctness.
Here we show how to make the bisection algorithm for
eigenvalues of symmetric tridiagonal matrices (sstebz
from LAPACK) run both fast and correctly on an ATI
Radeon X1900 GPU. Our fastest algorithm takes up to
156! less time than IntelYs Math Kernel Library version
of sstebz running on the CPU, but does so by doing many
redundant floating point operations compared to the CPU
version. We use an automatic tuning procedure analogous
to ATLAS or PHiPAC to decide the optimal redundancy.
Correctness despite partial IEEE floating point
semantics required explicitly adding 0 in the inner
loop. The problems and solutions discussed here are of
interest on other GPU architectures.",
acknowledgement = ack-nhfb,
ucbnumber = "UCB/EECS-2007-179",
}
@TechReport{Kaagstrom:2008:BAR,
author = "Bo K{\aa}gstr{\"o}m and Daniel Kressner and Enrique S.
Quintana-Orti and Gregorio Quintana-Orti",
title = "Blocked Algorithms for the Reduction to
{Hessenberg}-Triangular Form Revisited",
type = "LAPACK Working Note",
number = "198",
institution = "Department of Computing Science and HPC2N",
address = "Ume{\aa} University, S-901 Ume{\aa}, Sweden",
month = feb,
year = "2008",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn198.pdf",
abstract = "We present two variants of Moler and Stewart's
algorithm for reducing a matrix pair to
Hessenberg-triangular (HT) form with increased data
locality in the access to the matrices. In one of these
variants, a careful reoganization and accumulation of
Givens rotations enables the use of efficient level 3
BLAS. Experimental results on four different
architectures, representative of current high
performance processors, compare the performances of the
new variants with those of the implementation of Moler
and Stewart's algorithm in subroutine DGGHRD from
LAPACK, Dackland and K{\aa}gstr{\"o}m's two-stage
algorithm for the HT form, and a modified version of
the latter which requires considerably less flops.",
acknowledgement = ack-nhfb,
keywords = "blocked algorithms; Generalized eigenvalue problems;
Hessenberg-triangular form; high-performance computing;
level 3 BLAS; orthogonal transformations; QZ
algorithm",
}
@TechReport{Gustavson:2008:RFP,
author = "Fred G. Gustavson and Jerzy Wasniewski and Jack J.
Dongarra and Julien Langou",
title = "Rectangular Full Packed Format for {Cholesky}'s
Algorithm: Factorization, Solution and Inversion",
type = "LAPACK Working Note",
number = "199",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = apr,
year = "2008",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn199.pdf",
abstract = "We describe a new data format for storing triangular,
symmetric, and Hermitian matrices called RFPF
(Rectangular Full Packed Format). The standard two
dimensional arrays of Fortran and C (also known as full
format) that are used to represent triangular and
symmetric matrices waste nearly half of the storage
space but provide high performance via the use of Level
3 BLAS. Standard packed format arrays fully utilize
storage (array space) but provide low performance as
there is no Level 3 packed BLAS. We combine the good
features of packed and full storage using RFPF to
obtain high performance via using Level 3 BLAS as RFPF
is a standard full format representation. Also, RFPF
requires exactly the same minimal storage as packed
format. Each LAPACK full and/or packed triangular,
symmetric, and Hermitian routine becomes a single new
RFPF routine based on eight possible data layouts of
RFPF. This new RFPF routine usually consists of two
calls to the corresponding LAPACK full format routine
and two calls to Level 3 BLAS routines. This means no
new software is required. As examples, we present
LAPACK routines for Cholesky factorization, Cholesky
solution and Cholesky inverse computation in RFPF to
illustrate this new work and to describe its
performance on several commonly used computer
platforms. Performance of LAPACK full routines using
RFPF versus LAPACK full routines using standard format
for both serial and SMP parallel processing is about
the same while using half the storage. Performance
gains are roughly one to a factor of 43 for serial and
one to a factor of 97 for SMP parallel times faster
using vendor LAPACK full routines with RFPF than with
using vendor and/or reference packed routines.",
acknowledgement = ack-nhfb,
keywords = "Algorithms; BLAS; Linear Algebra Libraries;
Performance",
subject = "G.1.3 [Numerical Analysis]: Numerical Linear Algebra -
Linear Systems (symmetric and Hermitian); G.4
[Mathematics of Computing]: Mathematical Software",
note = "UT-CS-08-614",
}
@TechReport{Baboulin:2008:SID,
author = "Marc Baboulin and Jack J. Dongarra and Stanimire
Tomov",
title = "Some Issues in Dense Linear Algebra for Multicore and
Special Purpose Architectures",
type = "LAPACK Working Note",
number = "200",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "2008",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn200.pdf",
abstract = "We address some key issues in designing dense linear
algebra (DLA) algorithms that are common for both
multi/many-cores and special purpose architectures (in
particular GPUs). We present them in the context of an
LU factorization algorithm, where randomization
techniques are used as an alternative to pivoting. This
approach yields an algorithm based entirely on a
collection of small Level 3 BLAS type computational
tasks, which has emerged as a common goal in designing
DLA algorithms for new architectures. Other common
trends, also considered here, are block asynchronous
task execution and ``Block'' layouts for the data
associated with the separate tasks. We present
numerical results and other specific experiments with
DLA algorithms on NVIDIA GPUs using CUDA. The GPU
results are also of interest themselves as we show a
performance of up to 160 Glop/s on a single Quadro FX
5600 card. Keywords: dense linear algebra, parallel
algorithms, LU factorization, multicore processors,
graphic process units.",
acknowledgement = ack-nhfb,
note = "UT-CS-08-615",
}
@TechReport{Kurzak:2008:QFC,
author = "Jakub Kurzak and Jack J. Dongarra",
title = "{$ Q R $} Factorization for the {CELL} Processor",
type = "LAPACK Working Note",
number = "201",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "2008",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn201.pdf",
abstract = "The QR factorization is one of the most important
operations in dense linear algebra, offering a
numerically stable method for solving linear systems of
equations including overdetermined and underdetermined
systems. Classic implementation of the QR factorization
suffers from performance limitations due to the use of
matrix-vector type operations in the phase of panel
factorization. These limitations can be remedied by
using the idea of updating of QR factorization,
rendering an algorithm, which is much more scalable and
much more suitable for implementation on a multi-core
processor. It is demonstrated how the potential of the
CELL processor can be utilized to the fullest by
employing the new algorithmic approach and successfully
exploiting the capabilities of the CELL processor in
terms of Instruction Level Parallelism and Thread-Level
Parallelism.",
acknowledgement = ack-nhfb,
keywords = "CELL processor; linear algebra; matrix factorization;
multi-core; numerical algorithms",
note = "UT-CS-08-616",
}
@TechReport{Volkov:2008:LQC,
author = "Vasily Volkov and James W. Demmel",
title = "{$ L U $}, {$ Q R $} and {Cholesky} Factorizations
using Vector Capabilities of {GPUs}",
type = "LAPACK Working Note",
number = "202",
institution = inst-UCB-EECS,
address = inst-UCB-EECS:adr,
month = may,
year = "2008",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn202.pdf",
abstract = "We present performance results for dense linear
algebra using the 8-series NVIDIA GPUs. Our
matrix-matrix multiply routine (GEMM) runs 60\% faster
than the vendor implementation in CUBLAS 1.1 and
approaches the peak of hardware capabilities. Our LU,
QR and Cholesky factorizations achieve up to 80--90\%
of the peak GEMM rate. Our parallel LU running on two
GPUs achieves up to $ \approx $300 Gflop/s. These
results are accomplished by challenging the accepted
view of the GPU architecture and programming
guidelines. We argue that modern GPUs should be viewed
as multithreaded multicore vector units. We exploit
blocking similarly to vector computers and
heterogeneity of the system by computing both on GPU
and CPU. This study includes detailed benchmarking of
the GPU memory system that reveals sizes and latencies
of caches and TLB. We present a couple of algorithmic
optimizations aimed at increasing parallelism and
regularity in the problem that provide us with slightly
higher performance.",
acknowledgement = ack-nhfb,
ucbnumber = "UCB/EECS-2008-49",
}
@TechReport{Demmel:2008:NND,
author = "James W. Demmel and Yozo Hida and Mark F. Hoemmen and
E. Jason Riedy",
title = "Non-Negative Diagonals and High Performance on
Low-Profile Matrices from Householder {$ Q R $}",
type = "LAPACK Working Note",
number = "203",
institution = inst-UCB-EECS,
address = inst-UCB-EECS:adr,
day = "30",
month = may,
year = "2008",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn203.pdf",
abstract = "The Householder reflections used in LAPACK's {$ Q R $}
factorization leave positive and negative real entries
along {$R$}'s diagonal. This is sufficient for most
applications of {$ Q R $} factorizations, but a few
require that {$R$} have a non-negative diagonal. This
note provides a new Householder generation routine to
produce a non-negative diagonal. Additionally, we find
that scanning for trailing zeros in the generated
reflections leads to large performance improvements
when applying reflections with many trailing zeros.
Factoring low-profile matrices, those with non-zero
entries mostly near the diagonal (e.g. band matrices),
now requires far fewer operations. For example, {$ Q R
$} factorization of matrices with profile width $b$
that are stored densely in an $ n \times n $ matrix
improves from {$ O(n^3) $} to {$ O(n^2 + n b^2) $}.",
acknowledgement = ack-nhfb,
ucbnumber = "UCB/EECS-2008-76",
}
@TechReport{Demmel:2008:COP,
author = "James W. Demmel and Laura Grigori and Mark F. Hoemmen
and Julien Langou",
title = "Communication-optimal parallel and sequential {$ Q R
$} and {$ L U $} factorizations",
type = "LAPACK Working Note",
number = "204",
institution = inst-UCB-EECS,
address = inst-UCB-EECS:adr,
month = aug,
year = "2008",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.eecs.berkeley.edu/Pubs/TechRpts/2008/EECS-2008-89.html;
http://www.netlib.org/lapack/lawnspdf/lawn204.pdf",
abstract = "We present parallel and sequential dense QR
factorization algorithms that are both optimal (up to
polylogarithmic factors) in the amount of communication
they perform, and just as stable as Householder QR. Our
first algorithm, Tall Skinny QR (TSQR), factors m
\times n matrices in a one-dimensional (1-D) block
cyclic row layout, and is optimized for m n. Our second
algorithm, CAQR (Communication-Avoiding QR), factors
general rectangular matrices distributed in a
two-dimensional block cyclic layout. It invokes TSQR
for each block column factorization.\par
The new algorithms are superior in both theory and
practice. We have extended known lower bounds on
communication for sequential and parallel matrix
multiplication to provide latency lower bounds, and
show these bounds apply to the LU and QR
decompositions. We not only show that our QR algorithms
attain these lower bounds (up to polylogarithmic
factors), but that existing LAPACK and ScaLAPACK
algorithms perform asymptotically more communication.
We also point out recent LU algorithms in the
literature that attain at least some of these lower
bounds.\par
Both TSQR and CAQR have asymptotically lower latency
cost in the parallel case, and asymptotically lower
latency and bandwidth costs in the sequential case. In
practice, we have implemented parallel TSQR on several
machines, with speedups of up to 6.7 \times on 16
processors of a Pentium III cluster, and up to 4 \times
on 32 processors of a BlueGene/L. We have also
implemented sequential TSQR on a laptop for matrices
that do not fit in DRAM, so that slow memory is disk.
Our out-of-DRAM implementation was as little as 2
\times slower than the predicted runtime as though DRAM
were infinite.\par
We have also modeled the performance of our parallel
CAQR algorithm, yielding predicted speedups over
ScaLAPACK's PDGEQRF of up to 9.7 \times on an IBM
Power5, up to 22.9 \times on a model Petascale machine,
and up to 5.3 \times on a model of the Grid.",
acknowledgement = ack-nhfb,
ucbnumber = "UCB/EECS-2008-89",
}
@TechReport{Bosilca:2008:ABF,
author = "George Bosilca and Remi Delmas and Jack J. Dongarra
and Julien Langou",
title = "Algorithmic Based Fault Tolerance Applied to High
Performance Computing",
type = "LAPACK Working Note",
number = "205",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "23",
month = may,
year = "2008",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn205.pdf",
abstract = "We present a new approach to fault tolerance for High
Performance Computing system. Our approach is based on
a careful adaptation of the Algorithmic Based Fault
Tolerance technique (Huang and Abraham, 1984) to the
need of parallel distributed computation. We obtain a
strongly scalable mechanism for fault tolerance. We can
also detect and correct errors (bit-flip) on the fly of
a computation. To assess the viability of our approach,
we have developed a fault tolerant matrix-matrix
multiplication subroutine and we propose some models to
predict its running time. Our parallel fault-tolerant
matrix-matrix multiplication scores 1.4 TFLOPS on 484
processors (cluster {\tt jacquard.nersc.gov}) and
returns a correct result while one process failure has
happened. This represents 65\% of the machine peak
efficiency and less than 12\% overhead with respect to
the fastest failure-free implementation. We predict
(and have observed) that, as we increase the processor
count, the overhead of the fault tolerance drops
significantly.",
acknowledgement = ack-nhfb,
note = "UT-CS-08-620",
}
@TechReport{Dongarra:2008:PLB,
author = "Jack J. Dongarra and Julien Langou",
title = "The Problem with the {Linpack} Benchmark Matrix
Generator",
type = "LAPACK Working Note",
number = "206",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "12",
month = jun,
year = "2008",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "Version 1; version 2 is dated 18 September 2008.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn206.pdf",
abstract = "We characterize the matrix sizes for which the Linpack
Benchmark 1.0 matrix generator constructs a matrix with
identical columns.",
acknowledgement = ack-nhfb,
ucdenvernumber = "UCD-CCM-271",
}
@TechReport{Baboulin:2008:UDT,
author = "Marc Baboulin and Serge Gratton",
title = "Using dual techniques to derive componentwise and
mixed condition numbers for a linear functional of a
linear least squares solution",
type = "LAPACK Working Note",
number = "207",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "2008",
MRclass = "65F35",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn207.pdf",
abstract = "We prove duality results for adjoint operators and
product norms in the framework of Euclidean spaces. We
show how these results can be used to derive condition
numbers especially when perturbations on data are
measured componentwise relatively to the original data.
We apply this technique to obtain formulas for
componentwise and mixed condition numbers for a linear
functional of a linear least squares solution. These
expressions are closed when perturbations of the
solution are measured using a componentwise norm or the
infinity norm and we get an upper bound for the
Euclidean norm.",
acknowledgement = ack-nhfb,
keywords = "adjoint operator; componentwise perturbations;
condition number; Dual norm; linear least squares",
note = "UT-CS-08-622",
}
@TechReport{Ltaief:2008:PBH,
author = "Hatem Ltaief and Jakub Kurzak and Jack Dongarra",
title = "Parallel Block {Hessenberg} Reduction using
Algorithms-By-Tiles for Multicore Architectures
Revisited",
type = "LAPACK Working Note",
number = "208",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = aug,
year = "2008",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn208.pdf",
abstract = "The objective of this paper is to extend and redesign
the block matrix reduction applied for the family of
two-sided factorizations, introduced by Dongarra et al.
[9], to the context of multicore architectures using
algorithms-by-tiles. In particular, the Block
Hessenberg Reduction is very often used as a
pre-processing step in solving dense linear algebra
problems, such as the standard eigenvalue problem.
Although expensive, orthogonal transformations are
commonly used for this reduction because they guarantee
stability, as opposed to Gaussian Elimi- nation. Two
versions of the Block Hessenberg Reduction are
presented in this paper, the first one with Householder
reflectors and the second one with Givens rotations. A
short investigation on variants of Fast Givens
Rotations is also mentioned. Furthermore, in the last
Top500 list from June 2008, 98\% of the fastest
parallel systems in the world are based on multicores.
The emerging petascale systems consisting of hundreds
of thousands of cores have exacerbated the problem even
more and it becomes judicious to efficiently integrate
existing or new numerical linear algebra algorithms
suitable for such hardware. By exploiting the concepts
of algorithms-by-tiles in the multicore environment
(i.e., high level of parallelism with fine granularity
and high performance data representation combined with
a dynamic data driven execution), the Block Hessenberg
Reduction presented here achieves 72\% of the DGEMM
peak on a 12000 \times 12000 matrix with 16 Intel
Tigerton 2.4 GHz processors.",
acknowledgement = ack-nhfb,
note = "UT-CS-08-624",
}
@TechReport{Ltaief:2008:PBT,
author = "Hatem Ltaief and Jakub Kurzak and Jack Dongarra",
title = "Parallel Band Two-Sided Matrix Bidiagonalization for
Multicore Architectures",
type = "LAPACK Working Note",
number = "209",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "2008",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn209.pdf",
abstract = "The objective of this paper is to extend, in the
context of multicore architectures, the concepts of
algorithms-by-tiles [Buttari et al., 2007] for
Cholesky, LU, QR factorizations to the family of two-
sided factorizations. In particular, the bidiagonal
reduction of a general, dense matrix is very often used
as a pre-processing step for calculating the singular
value decomposition. Furthermore, in the last Top500
list from June 2008, 98\% of the fastest parallel
systems in the world were based on multicores. The
manycore trend has increasingly exacerbated the
problem, and it becomes critical to efficiently
integrate existing or new numerical linear algebra
algorithms suitable for such hardware. By exploiting
the concept of algorithms-by-tiles in the multicore
environment (i.e., high level of parallelism with fine
granularity and high performance data representation
combined with a dynamic data driven execution), the
band bidiagonal reduction presented here achieves 94
Gflop/s on a 12000 \times 12000 matrix with 16 Intel
Tigerton 2.4 GHz processors.",
acknowledgement = ack-nhfb,
note = "UT-CS-08-631",
}
@TechReport{Tomov:2008:TDL,
author = "Stanimire Tomov and Jack Dongarra and Marc Baboulin",
title = "Towards Dense Linear Algebra for Hybrid {GPU}
Accelerated Manycore Systems",
type = "LAPACK Working Note",
number = "210",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "2008",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn210.pdf",
abstract = "If multicore is a disruptive technology, try to
imagine hybrid multicore systems enhanced with
accelerators! This is happening today as accelerators,
in particular Graphics Processing Units (GPUs), are
steadily making their way into the high performance
computing (HPC) world. We highlight the trends leading
to the idea of hybrid manycore/GPU systems, and we
present a set of techniques that can be used to
efficiently program them. The presentation is in the
context of Dense Linear Algebra (DLA), a major building
block for many scientific computing applications. We
motivate the need for new algorithms that would split
the computation in a way that would fully exploit the
power that each of the hybrid components offers. As the
area of hybrid multicore/GPU computing is still in its
infancy, we also argue for its importance in view of
what future architectures may look like. We therefore
envision the need for a DLA library similar to LAPACK
but for hybrid manycore/GPU systems. We illustrate the
main ideas with an LU-factorization algorithm where
particular techniques are used to reduce the amount of
pivoting, resulting in an algorithm achieving up to 388
GFlop/s for single and up to 99.4 GFlop/s for double
precision factorization on a hybrid Intel Xeon (2x4
cores @ 2.33 GHz) --- NVIDIA GeForce GTX 280 5 (240
cores @ 1.30 GHz) system.",
acknowledgement = ack-nhfb,
keywords = "dense linear algebra; graphics processing units.;
hybrid computing; LU factorization; multicore
processors; parallel algorithms",
note = "UT-CS-08-632",
}
@TechReport{Gustavson:2008:LCK,
author = "Fred G. Gustavson and Jerzy Wasniewski and Jack
Dongarra",
title = "Level-3 {Cholesky} kernel subroutine of a fully
portable High Performance minimal storage hybrid format
{Cholesky} algorithm",
type = "LAPACK Working Note",
number = "211",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = dec,
year = "2008",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn211.pdf",
acknowledgement = ack-nhfb,
note = "UT-CS-08-634",
}
@TechReport{Li:2009:NAT,
author = "Yinan Li and Jack Dongarra and Stanimire Tomov",
title = "A Note on Auto-tuning {GEMM} for {GPUs}",
type = "LAPACK Working Note",
number = "212",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jan,
year = "2009",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn212.pdf",
abstract = "The development of high performance dense linear
algebra (DLA) critically depends on highly optimized
BLAS, and especially on the matrix multiplication
routine (GEMM). This is especially true for Graphics
Processing Units (GPUs), as evidenced by recently
published results on DLA for GPUs that rely on highly
optimized GEMM [13, 11]. However, the current best GEMM
performance, e.g. of up to 375 GFlop/s in single
precision and of up to 75 GFlop/s in double precision
arithmetic on NVIDIA's GTX 280, is difficult to
achieve. The development involves extensive GPU
knowledge and even backward engineering to understand
some undocumented insides about the architecture that
have been of key importance in the development [12]. In
this paper, we describe some GPU GEMM auto-tuning
optimization techniques that allow us to keep up with
changing hardware by rapidly reusing, rather than
reinventing, the existing ideas. Auto-tuning, as we
show in this paper, is a very practical solution where
in addition to getting an easy portability, we can
often get substantial speedups even on current GPUs
(e.g. up to 27\% in certain cases for both single and
double precision GEMMs on the GTX 280).",
acknowledgement = ack-nhfb,
keywords = "Auto-tuning; dense linear algebra; GPUs; matrix
multiply",
note = "UT-CS-09-635",
}
@TechReport{Kurzak:2009:SLA,
author = "Jakub Kurzak and Hatem Ltaief and Jack Dongarra and
Rosa M. Badia",
title = "Scheduling Linear Algebra Operations on Multicore
Processors",
type = "LAPACK Working Note",
number = "213",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = feb,
year = "2009",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn213.pdf",
abstract = "We present performance results for dense linear
algebra using the 8-series NVIDIA GPUs. Our
matrix-matrix multiply routine (GEMM) runs 60\% faster
than the vendor implementation in CUBLAS 1.1 and
approaches the peak of hardware capabilities. Our LU,
QR and Cholesky factorizations achieve up to 80--90\%
of the peak GEMM rate. Our parallel LU running on two
GPUs achieves up to $ \approx $300 Gflop/s. These
results are accomplished by challenging the accepted
view of the GPU architecture and
programming guidelines. We argue that modern GPUs
should be viewed as multithreaded multicore vector
units. We exploit blocking similarly to vector
computers and heterogeneity of the system by computing
both on GPU and CPU. This study includes detailed
benchmarking of the GPU memory system that reveals
sizes and latencies of caches and TLB. We present a
couple of algorithmic optimizations aimed at increasing
parallelism and regularity in the problem that provide
us with slightly higher performance.",
acknowledgement = ack-nhfb,
keywords = "Cholesky; factorization; linear algebra; LU;
multicore; QR; scheduling; task graph",
note = "UT-CS-09-636",
}
@TechReport{Kurzak:2009:STS,
author = "Jakub Kurzak and Hatem Ltaief and Jack Dongarra",
title = "Scheduling Two-sided Transformations using
Algorithms-by-Tiles on Multicore Architectures",
type = "LAPACK Working Note",
number = "214",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = feb,
year = "2009",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn214.pdf",
abstract = "The objective of this paper is to describe, in the
context of multicore architectures, different scheduler
implementations for the two-sided linear algebra
transformations, in particular the Hessenberg and
Bidiagonal reductions which are the first steps for the
standard eigenvalue problems and the singular value
decompositions respectively. State-of-the-art dense
linear algebra software, such as the LAPACK and
ScaLAPACK libraries, suffer performance losses on
multicore processors due to their inability to fully
exploit thread-level parallelism. At the same time the
coarse-grain dataflow model gains popularity as a
paradigm for programming multicore architectures. By
using the concepts of algorithms-by-tiles [Buttari et
al., 2007] along with efficient mechanisms for
data-driven execution, these two-sided reductions
achieve high performance computing. The main drawback
of the algorithms-by-tiles approach for two-sided
transformations is that the full reduction can not be
obtained in one stage. Other methods have to be
considered to further reduce the band matrices to the
required forms.",
acknowledgement = ack-nhfb,
note = "UT-CS-09-637",
}
@TechReport{Ballard:2009:COP,
author = "Grey Ballard and James Demmel and Olga Holtz and Oded
Schwartz",
title = "Communication-optimal Parallel and Sequential
{Cholesky} decomposition",
type = "LAPACK Working Note",
number = "215",
institution = inst-UCB-EECS,
address = inst-UCB-EECS:adr,
day = "13",
month = feb,
year = "2009",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn215.pdf",
abstract = "Numerical algorithms have two kinds of costs:
arithmetic and communication, by which we mean either
moving data between levels of a memory hierarchy (in
the sequential case) or over a network connecting
processors (in the parallel case). Communication costs
often dominate arithmetic costs, so it is of interest
to design algorithms minimizing communication. In this
paper we first extend known lower bounds on the
communication cost (both for bandwidth and for latency)
of conventional ({$ O(n^3) $}) matrix multiplication to
Cholesky factorization, which is used for solving dense
symmetric positive definite linear systems. Second, we
compare the cost of various Cholesky decomposition
implementations to this lower bound, and draw the
following conclusions:\par
\begin{itemize} \item ``Naive'' sequential algorithms
for Cholesky attain neither the bandwidth nor latency
lower bounds. \item The sequential blocked algorithm in
LAPACK (with the right block size), as well as various
recursive algorithms [AP00, GJ01, AGW01, ST04], and one
based on work of Toledo [Tol97], can attain the
bandwidth lower bound. \item The LAPACK algorithm can
also attain the latency bound if used with blocked data
structures rather than column-wise or row-wise matrix
data structures, though the Toledo algorithm cannot.
\item The recursive sequential algorithm due to [AP00]
attains the bandwidth and latency lower bounds at every
level of a multi-level memory hierarchy, in a
`cache-oblivious' way. \item The parallel
implementation of Cholesky in the ScaLAPACK library
(again with the right block-size) attains both the
bandwidth and latency lower bounds to within a poly-
logarithmic factor. \end{itemize}
Combined with prior results in [DGHL08a, DGHL08b,
DGX08] this gives a complete set of
communication-optimal algorithms for {$ O(n^3) $}
implementations of three basic factorizations of dense
linear algebra: LU with pivoting, QR and Cholesky. But
it goes beyond this prior work on sequential LU and QR
by optimizing communication for any number of levels of
memory hierarchy.",
acknowledgement = ack-nhfb,
ucbnumber = "UCB/EECS-2009-29",
}
@TechReport{Granat:2009:NPQ,
author = "Robert Granat and Bo K{\aa}gstr{\"o}m and Daniel
Kressner",
title = "A novel parallel {$ Q R $} algorithm for hybrid
distributed memory {HPC} systems",
type = "LAPACK Working Note",
number = "216",
institution = "Department of Computing Science and HPC2N",
address = "Ume{\aa} University, S-901 Ume{\aa}, Sweden",
month = apr,
year = "2009",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn216.pdf",
abstract = "A novel variant of the parallel QR algorithm for
solving dense nonsymmetric eigenvalue problems on
hybrid distributed high performance computing (HPC)
systems is presented. For this purpose, we introduce
the concept of multi-window bulge chain chasing and
parallelize aggressive early deflation. The
multi-window approach ensures that most computations
when chasing chains of bulges are performed in level 3
BLAS operations, while the aim of aggressive early
deflation is to speed up the convergence of the QR
algorithm. Mixed MPI-OpenMP coding techniques are
utilized for porting the codes to distributed memory
platforms with multithreaded nodes, such as multicore
processors. Numerous numerical experiments confirm the
superior performance of our parallel QR algorithm in
comparison with the existing ScaLAPACK code, leading to
an implementation that is one to two orders of
magnitude faster for sufficiently large problems,
including a number of examples from applications.",
acknowledgement = ack-nhfb,
keywords = "aggressive early deflation; bulge chasing; Eigenvalue
problem; hybrid distributed memory systems.; level 3
performance; multishift; nonsymmetric QR algorithm;
parallel algorithms; parallel computations",
note = "UMINF-09.06",
}
@TechReport{Agullo:2009:CSO,
author = "Emmanuel Agullo and Bilel Hadri and Hatem Ltaief and
Jack Dongarra",
title = "Comparative Study of One-Sided Factorizations with
Multiple Software Packages on Multi-Core Hardware",
type = "LAPACK Working Note",
number = "217",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "28",
month = apr,
year = "2009",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-09-640.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn217.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Ballard:2009:MCL,
author = "Grey Ballard and James Demmel and Olga Holtz and Oded
Schwartz",
title = "Minimizing Communication in Linear Algebra",
type = "LAPACK Working Note",
number = "218",
institution = inst-UCB-EECS,
address = inst-UCB-EECS:adr,
day = "15",
month = may,
year = "2009",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UCB/EECS-2009-62",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn218.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Tomov:2009:ARU,
author = "Stanimire Tomov and Jack Dongarra",
title = "Accelerating the reduction to upper {Hessenberg} form
through hybrid {GPU}-based computing",
type = "LAPACK Working Note",
number = "219",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "24",
month = may,
year = "2009",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-09-642.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn219.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Kurzak:2009:FDS,
author = "Jakub Kurzak and Jack Dongarra",
title = "Fully Dynamic Scheduler for Numerical Computing on
Multicore Processors",
type = "LAPACK Working Note",
number = "220",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "4",
month = jun,
year = "2009",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-09-643.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn220.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Song:2009:DTS,
author = "Fengguang Song and Asim YarKhan and Jack Dongarra",
title = "Dynamic Task Scheduling for Linear Algebra Algorithms
on Distributed-Memory Multicore Systems",
type = "LAPACK Working Note",
number = "221",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "13",
month = apr,
year = "2009",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-09-638.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn221.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Hadri:2009:EPT,
author = "Bilel Hadri and Hatem Ltaief and Emmanuel Agullo and
Jack Dongarra",
title = "Enhancing Parallelism of Tile {$ Q R $} Factorization
for Multicore Architectures",
type = "LAPACK Working Note",
number = "222",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "4",
month = sep,
year = "2009",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-09-645.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn222.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Ltaief:2009:SHP,
author = "Hatem Ltaief and Stanimire Tomov and Rajib Nath and
Peng Du and Jack Dongarra",
title = "A Scalable High Performant {Cholesky} Factorization
for Multicore with {GPU} Accelerators",
type = "LAPACK Working Note",
number = "223",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "25",
month = nov,
year = "2009",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-09-646.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn223.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Agullo:2010:QFT,
author = "Emmanuel Agullo and Camille Coti and Jack Dongarra and
Thomas Herault and Julien Langou",
title = "{$ Q R $} Factorization of Tall and Skinny Matrices in
a Grid Computing Environment",
type = "LAPACK Working Note",
number = "224",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "6",
month = apr,
year = "2010",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-10-651. Published in the Proceedings of IPDPS
2010: 24th IEEE International Parallel and Distributed
Processing Symposium Atlanta GA April 2010.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn224.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Tomov:2010:DLA,
author = "Stanimire Tomov and Rajib Nath and Hatem Ltaief and
Jack Dongarra",
title = "Dense Linear Algebra Solvers for Multicore with {GPU}
Accelerators",
type = "LAPACK Working Note",
number = "225",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "18",
month = apr,
year = "2010",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-09-649. Published in the Proceedings of IPDPS
2010: 24th IEEE International Parallel and Distributed
Processing Symposium Atlanta GA April 2010.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn225.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Grigori:2010:CCO,
author = "Laura Grigori and James W. Demmel and Hua Xiang",
title = "{CALU}: a communication optimal {$ L U $}
factorization algorithm",
type = "LAPACK Working Note",
number = "226",
institution = inst-UCB-EECS,
address = inst-UCB-EECS:adr,
day = "15",
month = mar,
year = "2010",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UCB/EECS-2010-29. Submitted to SIAM Journal on Matrix
Analysis and Applications (SIMAX).",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn226.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Nath:2010:IMG,
author = "Rajib Nath and Stanimire Tomov and Jack Dongarra",
title = "An Improved {MAGMA GEMM} for {Fermi GPUs}",
type = "LAPACK Working Note",
number = "227",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "29",
month = jul,
year = "2010",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-10-655.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn227.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Du:2010:COT,
author = "Peng Du and Rick Weber and Piotr Luszczek and
Stanimire Tomov and Gregory Peterson and Jack
Dongarra",
title = "From {CUDA} to {OpenCL}: Towards a
Performance-portable Solution for Multi-platform {GPU}
Programming",
type = "LAPACK Working Note",
number = "228",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "6",
month = sep,
year = "2010",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-10-656.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn228.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Kurzak:2010:ITF,
author = "Jakub Kurzak and Rajib Nath and Peng Du and Jack
Dongarra",
title = "An Implementation of the Tile {$ Q R $} Factorization
for a {GPU} and Multiple {CPUs}",
type = "LAPACK Working Note",
number = "229",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "15",
month = sep,
year = "2010",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-10-657. Submitted to PARA'10",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn229.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Agullo:2010:FCB,
author = "Emmanuel Agullo and Cedric Augonnet and Jack Dongarra
and Hatem Ltaief and Raymond Namyst and Samuel Thibault
and Stanimire Tomov",
title = "Faster, Cheaper, Better --- a Hybridization
Methodology to Develop Linear Algebra Software for
{GPUs}",
type = "LAPACK Working Note",
number = "230",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "15",
month = sep,
year = "2010",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-10-658. To appear in GPU Computing GEMs, vol.
2",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn230.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Bosilca:2010:DGD,
author = "G. Bosilca and A. Bouteiller and A. Danalis and T.
Herault and P. Lemarinier and J. Dongarra",
title = "{DAGuE}: {A} generic distributed {DAG} engine for high
performance computing",
type = "LAPACK Working Note",
number = "231",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "15",
month = sep,
year = "2010",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-10-659.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn231.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Bosilca:2010:DMT,
author = "G. Bosilca and A. Bouteiller and A. Danalis and M.
Faverge and H. Haidar and T. Herault and J. Kurzak and
J. Langou and P. Lemarinier and H. Ltaief and P.
Luszczekl and A. YarKhan and J. Dongarra",
title = "Distributed-Memory Task Execution and Dependence
Tracking within {DAGuE} and the {DPLASMA Project}",
type = "LAPACK Working Note",
number = "232",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "15",
month = sep,
year = "2010",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-10-660.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn232.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Agullo:2010:FMN,
author = "E. Agullo and C. Augonnet and J. Dongarra and M.
Faverge and H. Ltaief and S. Thibault and S. Tomov",
title = "{$ Q R $} Factorization on a Multicore Node Enhanced
with Multiple {GPU} Accelerators",
type = "LAPACK Working Note",
number = "233",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "2010",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-10-XXX, published in Proceedings of IPDPS
2011.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn233.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:2010:RTT,
author = "Jack Dongarra and Piotr Luszczek",
title = "Reducing the time to tune parallel dense linear
algebra routines with partial execution and performance
modelling",
type = "LAPACK Working Note",
number = "235",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "8",
month = oct,
year = "2010",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-10-661.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn235.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Baboulin:2010:CCT,
author = "Marc Baboulin and Serge Gratton",
title = "A contribution to the conditioning of the total least
squares problem",
type = "LAPACK Working Note",
number = "236",
institution = inst-INRIA,
address = inst-INRIA:adr,
day = "5",
month = nov,
year = "2010",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "INRIA report.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn236.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Ballard:2010:MCE,
author = "Grey Ballard and James Demmel and Ioana Dumitriu",
title = "Minimizing Communication for Eigenproblems and the
Singular Value Decomposition",
type = "LAPACK Working Note",
number = "237",
institution = inst-UCB-EECS,
address = inst-UCB-EECS:adr,
day = "13",
month = nov,
year = "2010",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UCB/EECS-2010-136.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn237.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Solomonik:2011:COPa,
author = "Edgar Solomonik and James Demmel",
title = "Communication-optimal parallel {$ 2.5 $D} matrix
multiplication and {$ L U $} factorization algorithms",
type = "LAPACK Working Note",
number = "238",
institution = inst-UCB-EECS,
address = inst-UCB-EECS:adr,
day = "9",
month = feb,
year = "2011",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UCB/EECS-2011-10.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn238.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Ballard:2011:CBH,
author = "Grey Ballard and James Demmel and Andrew Gearhart",
title = "Communication bounds for heterogeneous architectures",
type = "LAPACK Working Note",
number = "239",
institution = inst-UCB-EECS,
address = inst-UCB-EECS:adr,
day = "11",
month = feb,
year = "2011",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UCB/EECS-2011-13.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn239.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Anderson:2011:CAD,
author = "Michael Anderson and Grey Ballard and James Demmel and
Kurt Keutzer",
title = "Communication-Avoiding {$ Q R $} Decomposition for
{GPUs}",
type = "LAPACK Working Note",
number = "240",
institution = inst-UCB-EECS,
address = inst-UCB-EECS:adr,
day = "18",
month = feb,
year = "2011",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "Update of UCB/EECS-2010-131. To appear in IPDPS'11.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn240.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Song:2011:STC,
author = "Fengguang Song and Hatem Ltaief and Bilel Hadri and
Jack Dongarra",
title = "Scalable Tile Communication-Avoiding {$ Q R $}
Factorization on Multicore Cluster Systems",
type = "LAPACK Working Note",
number = "241",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "4",
month = mar,
year = "2011",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-10-653. Published at SC'10",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn241.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Agullo:2011:FEA,
author = "Emmanuel Agullo and Jack Dongarra and Rajib Nath and
Stanimire Tomov",
title = "A Fully Empirical Autotuned Dense {$ Q R $}
Factorization For Multicore Architectures",
type = "LAPACK Working Note",
number = "242",
institution = inst-INRIA,
address = inst-INRIA:adr,
day = "9",
month = mar,
year = "2011",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "INRIA-7526.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn242.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Haidar:2011:ADS,
author = "Azzam Haidar and Hatem Ltaief and Asim YarKhan and
Jack Dongarra",
title = "Analysis of Dynamically Scheduled Tile Algorithms for
Dense Linear Algebra on Multicore Architectures",
type = "LAPACK Working Note",
number = "243",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "10",
month = mar,
year = "2011",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-11-666. Submitted at Concurrency and
Computations.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn243.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Luszczek:2011:TST,
author = "Piotr Luszczek and Hatem Ltaief and Jack Dongarra",
title = "Two-Stage Tridiagonal Reduction for Dense Symmetric
Matrices using Tile Algorithms on Multicore
Architectures",
type = "LAPACK Working Note",
number = "244",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "18",
month = apr,
year = "2011",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-11-670.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn244.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Kurzak:2011:AGF,
author = "Jakub Kurzak and Stanimire Tomov and Jack Dongarra",
title = "Autotuning {GEMMs} for {Fermi}",
type = "LAPACK Working Note",
number = "245",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "18",
month = apr,
year = "2011",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-11-671. Submitted at SC11 November 12-18, 2011,
Seattle, Washington, USA.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn245.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Baboulin:2011:ALS,
author = "Marc Baboulin and Jack Dongarra and Julien Herrmann
and Stanimire Tomov",
title = "Accelerating linear system solutions using
randomization techniques",
type = "LAPACK Working Note",
number = "246",
institution = inst-INRIA,
address = inst-INRIA:adr,
day = "15",
month = may,
year = "2011",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "INRIA RR-7616.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn246.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Ltaief:2011:HPB,
author = "Hatem Ltaief and Piotr Luszczek and Jack Dongarra",
title = "High Performance Bidiagonal Reduction using Tile
Algorithms on Homogeneous Multicore Architectures",
type = "LAPACK Working Note",
number = "247",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "18",
month = may,
year = "2011",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-11-673. Submitted at TOMS.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn247.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Solomonik:2011:COPb,
author = "Edgar Solomonik and James Demmel",
title = "Communication-optimal parallel {$ 2.5 $D} matrix
multiplication and {$ L U $} factorization algorithms",
type = "LAPACK Working Note",
number = "248",
institution = inst-UCB-EECS,
address = inst-UCB-EECS:adr,
day = "7",
month = jun,
year = "2011",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UCB/EECS-2011-72.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn248.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Gustavson:2011:LCF,
author = "Fred G. Gustavson and Jerzy W{\'a}sniewski and Jack J.
Dongarra and Jos{\'e} R. Herrero and Julien Langou",
title = "Level-3 {Cholesky} Factorization Routines as Part of
Many {Cholesky} Algorithms",
type = "LAPACK Working Note",
number = "249",
institution = "????",
address = "????",
year = "2011",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "DTU/IMM-Technical-Report-2011-11, submitted at TOMS.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn249.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Song:2011:ESM,
author = "Fengguang Song and Stanimire Tomov and Jack Dongarra",
title = "Efficient Support for Matrix Computations on
Heterogeneous Multi-core and Multi-{GPU}
Architectures",
type = "LAPACK Working Note",
number = "250",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "16",
month = jun,
year = "2011",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-11-668.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn250.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Ltaief:2011:PHP,
author = "Hatem Ltaief and Piotr Luszczek and Jack Dongarra",
title = "Profiling High Performance Dense Linear Algebra
Algorithms on Multicore Architectures for Power and
Energy Efficiency",
type = "LAPACK Working Note",
number = "251",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "21",
month = jun,
year = "2011",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-11-674.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn251.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Du:2011:SER,
author = "Peng Du and Piotr Luszczek and Stanimire Tomov and
Jack Dongarra",
title = "Soft Error Resilient {$ Q R $} Factorization for
Hybrid System",
type = "LAPACK Working Note",
number = "252",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "1",
month = jul,
year = "2011",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-11-675.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn252.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Du:2011:ABF,
author = "Peng Du and Aurelien Bouteiller and George Bosilca and
Thomas Herault and Jack Dongarra",
title = "Algorithm-based Fault Tolerance for Dense Matrix
Factorizations",
type = "LAPACK Working Note",
number = "253",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "5",
month = aug,
year = "2011",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-11-676.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn253.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Haidar:2011:PRC,
author = "Azzam Haidar and Hatem Ltaief and Jack Dongarra",
title = "Parallel Reduction to Condensed Forms for Symmetric
Eigenvalue Problems using Aggregated Fine-Grained and
Memory-Aware Kernels",
type = "LAPACK Working Note",
number = "254",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "5",
month = aug,
year = "2011",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-11-677 Aug 5 2011.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn254.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Solomonik:2011:ICP,
author = "Edgar Solomonik and Abhinav Bhatele and James Demmel",
title = "Improving communication performance in dense linear
algebra via topology aware collectives",
type = "LAPACK Working Note",
number = "255",
institution = inst-UCB-EECS,
address = inst-UCB-EECS:adr,
day = "15",
month = aug,
year = "2011",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UCB/EECS-2011-92.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn255.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Du:2011:HPL,
author = "Peng Du and Piotr Luszczek and Jack Dongarra",
title = "High Performance Linear System Solver with Resilience
to Multiple Soft Errors",
type = "LAPACK Working Note",
number = "256",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "2011",
bibdate = "Sun May 5 11:20:19 2013",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-11-683.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn256.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:2011:HFA,
author = "Jack Dongarra and Mathieu Faverge and Thomas Herault
and Julien Langou and Yves Robert",
title = "Hierarchical {$ Q R $} factorization algorithms for
multi-core cluster systems",
type = "LAPACK Working Note",
number = "257",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "2011",
bibdate = "Sun May 5 11:20:19 2013",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-11-684.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn257.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Anzt:2011:BAR,
author = "Hartwig Anzt and Stanimire Tomov and Jack Dongarra and
Vincent Heuveline",
title = "A Block-Asynchronous Relaxation Method for Graphics
Processing Units",
type = "LAPACK Working Note",
number = "258",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = dec,
year = "2011",
bibdate = "Sun May 5 11:20:19 2013",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-11-687.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn258.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:2011:ANA,
author = "Jack Dongarra and Mathieu Faverge and Hatem Ltaief and
Piotr Luszczek",
title = "Achieving Numerical Accuracy and High Performance
using Recursive Tile {$ L U $} Factorization",
type = "LAPACK Working Note",
number = "259",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = dec,
year = "2011",
bibdate = "Sun May 5 11:20:19 2013",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-11-688.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn259.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Anzt:2011:GAA,
author = "Hartwig Anzt and Piotr Luszczek and Jack Dongarra and
Vincent Heuveline",
title = "{GPU}-Accelerated Asynchronous Error Correction for
Mixed Precision Iterative Refinement",
type = "LAPACK Working Note",
number = "260",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = dec,
year = "2011",
bibdate = "Sun May 5 11:20:19 2013",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-11-690.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn260.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Baboulin:2011:PTS,
author = "Marc Baboulin and Dulceneia Becker and Jack Dongarra",
title = "A parallel tiled solver for dense symmetric indefinite
systems on multicore architectures",
type = "LAPACK Working Note",
number = "261",
institution = inst-INRIA,
address = inst-INRIA:adr,
month = dec,
year = "2011",
bibdate = "Sun May 5 11:20:19 2013",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "INRIA-7762.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn261.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Bougeret:2011:URR,
author = "Marin Bougeret and Henri Casanova and Yves Robert and
Fr{\'e}d{\'e}ric Vivien and Dounia Zaidouni",
title = "Using replication for resilience on exascale systems",
type = "LAPACK Working Note",
number = "262",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = dec,
year = "2011",
bibdate = "Sun May 5 11:20:19 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-11-691.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn262.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Khabou:2012:FPR,
author = "Amal Khabou and James W. Demmel and Laura Grigori and
Ming Gu",
title = "{$ L U $} factorization with panel rank revealing
pivoting and its communication avoiding version",
type = "LAPACK Working Note",
number = "263",
institution = inst-UCB-EECS,
address = inst-UCB-EECS:adr,
month = jan,
year = "2012",
bibdate = "Sun May 5 11:20:19 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UCB/EECS-2012-XX.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn263.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Bosilca:2012:DLA,
author = "George Bosilca and Aurelien Bouteiller and Anthony
Danalis and Thomas Herault and Piotr Luszczek and Jack
J. Dongarra",
title = "Dense Linear Algebra on Distributed Heterogeneous
Hardware with a Symbolic {DAG} Approach",
type = "LAPACK Working Note",
number = "264",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jan,
year = "2012",
bibdate = "Sun May 5 11:20:19 2013",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn264.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Bougeret:2012:UGR,
author = "Marin Bougeret and Henri Casanova and Yves Robert and
Fr{\'e}d{\'e}ric Vivien and Dounia Zaidouni",
title = "Using group replication for resilience on exascale
systems",
type = "LAPACK Working Note",
number = "265",
institution = inst-INRIA,
address = inst-INRIA:adr,
month = mar,
year = "2012",
bibdate = "Sun May 5 11:20:19 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn265.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Kurzak:2012:FPP,
author = "Jakub Kurzak and Piotr Luszczek and Mathieu Faverge
and Jack Dongarra",
title = "{$ L U $} Factorization with Partial Pivoting for a
Multi-{CPU}, Multi-{GPU} Shared Memory System",
type = "LAPACK Working Note",
number = "266",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = apr,
year = "2012",
bibdate = "Sun May 5 11:20:19 2013",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn266.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Kurzak:2012:PRA,
author = "Jakub Kurzak and Piotr Luszczek and Stanimire Tomov
and Jack Dongarra",
title = "Preliminary Results of Autotuning {GEMM} Kernels for
the {NVIDIA Kepler Architecture GeForce GTX 680}",
type = "LAPACK Working Note",
number = "267",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = apr,
year = "2012",
bibdate = "Sun May 5 11:20:19 2013",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn267.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Robert:2012:CPR,
author = "Yves Robert and Fr{\'e}d{\'e}ric Vivien and Dounia
Zaidouni",
title = "Combining Process Replication and Checkpointing for
Resilience on Exascale Systems",
type = "LAPACK Working Note",
number = "268",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jun,
year = "2012",
bibdate = "Sun May 5 11:20:19 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-12-696.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn268.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Bosilca:2012:UMA,
author = "George Bosilca and Aurelien Bouteiller and Elisabeth
Brunet and Franck Cappello and Jack Dongarra and Amina
Guermouche and Thomas Herault and Yves Robert and
Frederic Vivien and Dounia Zaidouni",
title = "Unified Model for Assessing Checkpointing Protocols at
Extreme-Scale",
type = "LAPACK Working Note",
number = "269",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jun,
year = "2012",
bibdate = "Sun May 5 11:20:19 2013",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-12-697.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn269.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Langou:2012:HLL,
author = "Julie Langou and Bill Hofman and Brad King",
title = "How {LAPACK} library enables {Microsoft Visual Studio}
support with {CMake} and {LAPACKE}",
type = "LAPACK Working Note",
number = "270",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jul,
year = "2012",
bibdate = "Sun May 5 11:20:19 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-12-698.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn270.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Karlsson:2012:OPC,
author = "Lars Karlsson and Daniel Kressner",
title = "Optimally packed chains of bulges in multishift {$ Q R
$} algorithms",
type = "LAPACK Working Note",
number = "271",
institution = "Department of Computing Science, Ume{\aa} University
and EPF",
address = "Ume{\aa}, Sweden and Lausanne, Switzerland",
month = aug,
year = "2012",
bibdate = "Sun May 5 11:20:19 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn271.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Du:2012:PGC,
author = "Peng Du and Stanimire Tomov and Jack Dongarra",
title = "Providing {GPU} Capability to {$ L U $} and {$ Q R $}
within the {ScaLAPACK} Framework",
type = "LAPACK Working Note",
number = "272",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = sep,
year = "2012",
bibdate = "Sun May 5 11:20:19 2013",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-12-699.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn272.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Baboulin:2012:ECC,
author = "Marc Baboulin and Serge Gratton and Remi Lacroix and
Alan Laub",
title = "Efficient computation of condition estimates for
linear least squares problems",
type = "LAPACK Working Note",
number = "273",
institution = inst-INRIA,
address = inst-INRIA:adr,
month = sep,
year = "2012",
bibdate = "Sun May 5 11:20:19 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "INRIA-8065.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn273.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Dongarra:2012:RDC,
author = "Jack Dongarra and Thomas Herault and Yves Robert",
title = "Revisiting the double checkpointing algorithm",
type = "LAPACK Working Note",
number = "274",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = dec,
year = "2012",
bibdate = "Sun May 5 11:20:19 2013",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-13-705.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn274.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Cao:2013:CHP,
author = "Chongxiao Cao and Jack Dongarra and Peng Du and Mark
Gates and Piotr Luszczek and Stanimire Tomov",
title = "{clMAGMA}: High Performance Dense Linear Algebra with
{OpenCL}",
type = "LAPACK Working Note",
number = "275",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = mar,
year = "2013",
bibdate = "Sun May 5 11:20:19 2013",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-13-706.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn275.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Demmel:2013:CAR,
author = "James W. Demmel and Laura Grigori and Ming Gu and Hua
Xiang",
title = "Communication Avoiding Rank Revealing {$ Q R $}
Factorization With Column Pivoting",
type = "LAPACK Working Note",
number = "276",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "2013",
bibdate = "Sat Mar 15 07:08:58 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UCB/EECS-2013-46.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn276.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Aupy:2013:ISA,
author = "Guillaume Aupy and Mathieu Faverge and Yves Robert and
Jakub Kurzak and Piotr Luszczek and Jack Dongarra",
title = "Implementing a systolic algorithm for {$ Q R $}
factorization on multicore clusters with {PaRSEC}",
type = "LAPACK Working Note",
number = "277",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "2013",
bibdate = "Sat Mar 15 07:08:58 2014",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-13-709.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn277.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Aupy:2013:CSE,
author = "Guillaume Aupy and Anne Benoit and Thomas H{\'e}rault
and Yves Robert and Fr{\'e}d{\'e}ric Vivien and Dounia
Zaidouni",
title = "On the Combination of Silent Error Detection and
Checkpointing",
type = "LAPACK Working Note",
number = "278",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jun,
year = "2013",
bibdate = "Sat Mar 15 07:08:58 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-13-710.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn278.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Jia:2013:TER,
author = "Yulu Jia and Piotr Luszczek and Jack Dongarra",
title = "Transient Error Resilient {Hessenberg} Reduction on
{GPU}-based Hybrid Architectures",
type = "LAPACK Working Note",
number = "279",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jun,
year = "2013",
bibdate = "Sat Mar 15 07:08:58 2014",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-13-712.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn279.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Donfack:2013:AVP,
author = "Simplice Donfack and Jack Dongarra and Mathieu Faverge
and Mark Gates and Jakub Kurzak and Piotr Luszczek and
Ichitaro Yamazaki",
title = "On Algorithmic Variants of Parallel {Gaussian}
Elimination: Comparison of Implementations in Terms of
Performance and Numerical Properties",
type = "LAPACK Working Note",
number = "280",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = jul,
year = "2013",
bibdate = "Sat Mar 15 07:08:58 2014",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-CS-13-715",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn280.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Aupy:2013:OCP,
author = "Guillaume Aupy and Anne Benoit and Thomas Herault and
Yves Robert and Jack Dongarra",
title = "Optimal Checkpointing Period: Time vs. Energy",
type = "LAPACK Working Note",
number = "281",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "2013",
bibdate = "Sat Mar 15 07:08:58 2014",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-EECS-13-718.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn281.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Faverge:2013:DHS,
author = "Mathieu Faverge and Julien Herrmann and Julien Langou
and Bradley Lowery and Yves Robert and Jack Dongarra",
title = "Designing {$ L U $--$ Q R $} hybrid solvers for
performance and stability",
type = "LAPACK Working Note",
number = "282",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "2013",
bibdate = "Sat Mar 15 07:08:58 2014",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-EECS-13-719.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn282.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Haidar:2013:IPS,
author = "Azzam Haidar and Piotr Luszczek and Jakub Kurzak and
Jack Dongarra",
title = "An Improved Parallel Singular Value Algorithm and Its
Implementation for Multicore Hardware",
type = "LAPACK Working Note",
number = "283",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = oct,
year = "2013",
bibdate = "Sat Mar 15 07:08:58 2014",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "UT-EECS-13-720.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn283.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Kohler:2013:FFB,
author = "Martin K{\"o}hler and Jens Saak",
title = "{FlexiBLAS} --- A flexible {BLAS} library with runtime
exchangeable backends",
type = "LAPACK Working Note",
number = "284",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = "????",
year = "2013",
bibdate = "Sat Mar 15 07:08:58 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn284.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Baboulin:2014:URB,
author = "Marc Baboulin and Xiaoye S. Li and
Fran{\c{c}}ois-Henry Rouet",
title = "Using Random Butterfly Transformations to Avoid
Pivoting in Sparse Direct Methods",
type = "LAPACK Working Note",
number = "285",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = feb,
year = "2014",
bibdate = "Sat Mar 15 07:08:58 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "Inria Research Report RR-8481.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn285.pdf",
acknowledgement = ack-nhfb,
}
@Article{Brewer:1988:TAAb,
author = "Orlie Brewer and Jack Dongarra and Danny Sorensen",
title = "Tools to aid in the analysis of memory access patterns
for {FORTRAN} programs",
journal = j-PARALLEL-COMPUTING,
volume = "9",
number = "1",
pages = "25--35",
month = dec,
year = "1988",
CODEN = "PACOEJ",
ISSN = "0167-8191",
bibdate = "Sat Mar 22 15:39:54 MST 1997",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Brewer:1988:TAAa}.",
URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/Tools-to-Aid-Analysis-of-Memory-Access-Patterns-for-FORTRAN-Programs.pdf",
abstract = "In order to improve the performance of algorithms
implemented on high-performance computers, we must
consider not only the total number of memory
references, but also the pattern of memory references.
We would like our algorithms to observe the principle
of locality of reference, so that the data can be
effectively utilized. This paper describes a set of
tools that can be used as an aid in the analysis of
memory access patterns of FORTRAN programs.",
acknowledgement = ack-nhfb,
affiliation = "Argonne",
affiliationaddress = "Argonne, IL, USA",
classcodes = "C6115 (Programming support); C6110 (Systems analysis
and programming)",
classification = "723",
corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL,
USA",
journalabr = "Parallel Comput",
keywords = "Computer Programming Languages--FORTRAN; Computer
Software; Data Storage, Digital; FORTRAN Programs;
FORTRAN programs; Linear Algebra; Memory Access
Patterns; memory access patterns analysis; Parallel
Processing Computers; parallel programming; Software
Engineering; software tools; Visualization Tools",
treatment = "P Practical",
}
@Article{Bai:1989:BIHb,
author = "Z. Bai and J. Demmel",
title = "On a Block Implementation of {Hessenberg} Multishift
{$ Q R $} Iteration",
journal = j-INT-J-HIGH-SPEED-COMPUTING,
volume = "1",
number = "1",
pages = "97--112",
year = "1989",
CODEN = "IHSCEZ",
ISSN = "0129-0533",
bibsource = "ftp://ftp.ira.uka.de/bibliography/Parallel/par.lin.alg.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Bai:1989:BIHa}.",
}
@Article{Dongarra:1989:BRM,
author = "J. J. Dongarra and D. C. Sorensen and S. J.
Hammarling",
title = "Block reduction of matrices to condensed forms for
eigenvalue computations",
journal = j-J-COMPUT-APPL-MATH,
volume = "27",
number = "1--2",
pages = "215--227",
month = sep,
year = "1989",
CODEN = "JCAMDI",
ISSN = "0377-0427 (print), 1879-1778 (electronic)",
bibdate = "Sat Mar 22 15:39:54 MST 1997",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Dongarra:1987:BRM}.",
acknowledgement = ack-nhfb,
classcodes = "C4140 (Linear algebra); C4240 (Programming and
algorithm theory)",
corpsource = "Math. and Comput. Sci. Div., Argonne Nat. Lab., IL,
USA",
keywords = "algorithms; bidiagonal; block algorithms; block
reduction of matrices; condensed; divide and conquer
technique; eigenvalue computations; eigenvalues and
eigenfunctions; forms; Hessenberg form; Householder
transformations; linear algebra; matrix-matrix
operations; parallel",
treatment = "T Theoretical or Mathematical",
}
@InProceedings{Anderson:1990:LPLb,
author = "E. Anderson and Z. Bai and C. Bischof and J. Demmel
and J. Dongarra and J. DuCroz and A. Greenbaum and S.
Hammarling and A. McKenney and D. Sorensen",
title = "{LAPACK}: {A} Portable Linear Algebra Library for
High-Performance Computers",
crossref = "IEEE:1990:PSN",
pages = "2--11",
year = "1990",
bibdate = "Mon Sep 9 14:47:18 1996",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Anderson:1990:LPLa}.",
abstract = "The goal of the LAPACK project is to design and
implement a portable linear algebra library for
efficient use on a variety of high-performance
computers. The library is based on the widely used
LINPACK and EISPACK packages for solving linear
equations, eigenvalue problems, and linear
least-squares problems, but extends their functionality
in a number of ways. The major methodology for making
the algorithms run faster is to restructure them to
perform block matrix operations (e.g., matrix-matrix
multiplication) in their inner loops. These block
operations may be optimized to exploit the memory
hierarchy of a specific architecture. The LAPACK
project is also working on new algorithms that yield
higher relative accuracy for a variety of linear
algebra problems.",
acknowledgement = ack-nhfb,
affiliation = "Tennessee Univ., Knoxville, TN, USA",
classification = "C4140 (Linear algebra); C7310 (Mathematics)",
keywords = "Block matrix operations; Block operations; Eigenvalue
problems; Functionality; Inner loops; LAPACK; Linear
equations; Linear least-squares problems; Matrix-matrix
multiplication; Memory hierarchy; Portable linear
algebra library; Relative accuracy",
page = "1--10",
thesaurus = "Eigenvalues and eigenfunctions; Matrix algebra;
Software portability; Subroutines",
}
@Article{Barlow:1990:CAE,
author = "Jesse Barlow and James Demmel",
title = "Computing Accurate Eigensystems of Scaled Diagonally
Dominant Matrices",
journal = j-SIAM-J-NUMER-ANAL,
volume = "27",
number = "3",
pages = "762--791",
month = jun,
year = "1990",
CODEN = "SJNAAM",
ISSN = "0036-1429 (print), 1095-7170 (electronic)",
MRclass = "65F15",
MRnumber = "91g:65071",
MRreviewer = "Alan L. Andrew",
bibdate = "Fri Oct 16 06:57:22 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib; JSTOR
database; Parallel/par.lin.alg.bib",
note = "See original LAPACK Working note in
\cite{Barlow:1988:CAE}.",
acknowledgement = ack-nhfb,
}
@Article{Dongarra:1990:ASL,
author = "Jack J. Dongarra and Jeremy Du Croz and Sven
Hammarling and Iain Duff",
title = "{Algorithm 679}: {A} Set of Level 3 {Basic Linear
Algebra Subprograms}: Model Implementation and Test
Programs",
journal = j-TOMS,
volume = "16",
number = "1",
pages = "18--28",
month = mar,
year = "1990",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/77626.77627",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
bibdate = "Sat Aug 27 17:29:49 1994",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See also
\cite{Higham:1990:EFM,Demmel:1992:SBA,Dayde:1994:PBI}.",
URL = "http://www.acm.org/pubs/citations/journals/toms/1990-16-1/p18-dongarra/",
acknowledgement = ack-nhfb,
keywords = "algorithms; measurement; performance; reliability;
verification",
subject = "{\bf D.3.2}: Software, PROGRAMMING LANGUAGES, Language
Classifications, FORTRAN 8X. {\bf F.2.1}: Theory of
Computation, ANALYSIS OF ALGORITHMS AND PROBLEM
COMPLEXITY, Numerical Algorithms and Problems,
Computations on matrices. {\bf G.1.3}: Mathematics of
Computing, NUMERICAL ANALYSIS, Numerical Linear
Algebra, Linear systems (direct and iterative methods).
{\bf G.4}: Mathematics of Computing, MATHEMATICAL
SOFTWARE.",
}
@Article{Higham:1990:EFM,
author = "Nicholas J. Higham",
title = "Exploiting Fast Matrix Multiplication Within the Level
3 {BLAS}",
journal = j-TOMS,
volume = "16",
number = "4",
pages = "352--368",
month = dec,
year = "1990",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/98267.98290",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
MRclass = "65-04 (65F99)",
MRnumber = "1 095 133",
bibdate = "Sun Sep 04 23:21:57 1994",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "Describes algorithms based on Strassen's method which
are asymptotically faster than the standard {$ {N}^3 $}
algorithm, and in practice, faster for {$ {N} \approx
100 $}, and examines their numerical stability. See
\cite{Dongarra:1990:ASL,Demmel:1992:SBA,Dayde:1994:PBI}.",
URL = "http://www.acm.org/pubs/citations/journals/toms/1990-16-4/p352-higham/",
abstract = "The Level 3 BLAS (BLAS3) are a set of specifications
of FORTRAN 77 subprograms for carrying out matrix
multiplications and the solution of triangular systems
with multiple right-hand sides. They are intended to
provide efficient and portable building blocks for
linear algebra algorithms on high-performance
computers. We describe algorithms for the BLAS3
operations that are asymptotically faster than the
conventional ones. These algorithms are based on
Strassen's method for fast matrix multiplication, which
is now recognized to be a practically useful technique
once matrix dimensions exceed about 100. We pay
particular attention to the numerical stability of
these ``fast BLAS3.'' Error bounds are given and their
significance is explained and illustrated with the aid
of numerical experiments. Our conclusion is that the
fast BLAS3, although not as strongly stable as
conventional implementations, are stable enough to
merit careful consideration in many applications.",
acknowledgement = ack-nhfb,
keywords = "algorithms",
subject = "{\bf G.1.3}: Mathematics of Computing, NUMERICAL
ANALYSIS, Numerical Linear Algebra. {\bf D.3.2}:
Software, PROGRAMMING LANGUAGES, Language
Classifications, FORTRAN 77.",
}
@Article{Deift:1991:BSV,
author = "Percy Deift and James Demmel and Luen Chau Li and
Carlos Tomei",
title = "The Bidiagonal Singular Value Decomposition and
{Hamiltonian} Mechanics",
journal = j-SIAM-J-NUMER-ANAL,
volume = "28",
number = "5",
pages = "1463--1516",
month = oct,
year = "1991",
CODEN = "SJNAAM",
ISSN = "0036-1429 (print), 1095-7170 (electronic)",
MRclass = "65F15 (58F05)",
MRnumber = "92i:65071",
MRreviewer = "T. Y. Li",
bibdate = "Fri Oct 16 06:57:22 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib; JSTOR
database",
note = "See original LAPACK Working note in
\cite{Deift:1989:BSV}.",
acknowledgement = ack-nhfb,
}
@Article{Dongarra:1991:IRS,
author = "J. J. Dongarra and P. Mayes and G. {Radicati di
Brozolo}",
title = "The {IBM RISC System\slash 6000} and Linear Algebra
Operations",
journal = j-SUPERCOMPUTER,
volume = "8",
number = "4",
pages = "15--30",
month = jul,
year = "1991",
CODEN = "SPCOEL",
ISSN = "0168-7875",
bibdate = "Sat Mar 22 15:39:54 MST 1997",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Dongarra:1990:IRS}.",
URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/The-IBM-RISC-System-6000-and-Linear-Algebra-Operations.pdf",
abstract = "The paper discusses the IBM RISC System/6000
workstation and a set of experiments with blocked
algorithms commonly used in solving problems in
numerical linear algebra. The authors describe the
performance of these algorithms and discuss the
techniques used in achieving high performance on such
an architecture.",
acknowledgement = ack-nhfb,
affiliation = "Math. Sci. Section, Oak Ridge Nat. Lab., TN, USA",
classcodes = "C5420 (Mainframes and minicomputers); C5470
(Performance evaluation and testing); C4140 (Linear
algebra)",
classification = "C4140 (Linear algebra); C5420 (Mainframes and
minicomputers); C5470 (Performance evaluation and
testing)",
corpsource = "Math. Sci. Section, Oak Ridge Nat. Lab., TN, USA",
keywords = "blocked algorithms; Blocked algorithms; Floating point
performance; floating point performance; IBM computers;
IBM RISC System/6000; IBM RISC System/6000 workstation;
linear algebra; numerical linear algebra; Numerical
linear algebra; performance evaluation; reduced
instruction set computing; workstation",
pubcountry = "Netherlands",
thesaurus = "IBM computers; Linear algebra; Performance evaluation;
Reduced instruction set computing",
treatment = "P Practical",
}
@Article{Anderson:1992:GFA,
author = "E. Anderson and Z. Bai and J. Dongarra",
title = "Generalized {$ Q R $} factorization and its
applications",
journal = j-LINEAR-ALGEBRA-APPL,
volume = "162/164",
pages = "243--271",
year = "1992",
CODEN = "LAAPAW",
ISSN = "0024-3795 (print), 1873-1856 (electronic)",
MRclass = "65F15 15A23",
MRnumber = "92j:65050",
bibdate = "Thu Dec 19 14:07:22 1996",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "Directions in matrix theory (Auburn, AL, 1990). See
original LAPACK Working note in
\cite{Anderson:1991:GQF}.",
URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/Generalized-QR-Factorization-and-Its-Applications.pdf",
acknowledgement = ack-nhfb,
}
@Article{Bischof:1992:GIC,
author = "Christian H. Bischof and Ping Tak Peter Tang",
title = "Generalizing incremental condition estimation",
journal = j-J-NUM-LIN-ALG-APPL,
volume = "1",
number = "2",
pages = "149--163",
year = "1992",
CODEN = "NLAAEM",
ISSN = "0129-3281",
MRclass = "65F30",
MRnumber = "93e:65068",
bibdate = "Thu Jan 23 19:03:25 MST 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Bischof:1991:GIC}.",
acknowledgement = ack-nhfb,
}
@InProceedings{Choi:1992:SSLb,
author = "J. Choi and J. J. Dongarra and R. Pozo and D. W.
Walker",
title = "{ScaLAPACK}: a scalable linear algebra library for
distributed memory concurrent computers",
crossref = "Siegel:1992:FSF",
pages = "120--127",
year = "1992",
bibdate = "Sat Mar 22 15:39:54 MST 1997",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "IEEE catalog number 92CH3185-6. See original LAPACK
Working note in \cite{Choi:1992:SSLa}.",
acknowledgement = ack-nhfb,
classcodes = "C7310 (Mathematics); C4140 (Linear algebra); C6110J
(Object-oriented programming); C5470 (Performance
evaluation and testing); C5440 (Multiprocessor systems
and techniques)",
conflocation = "McLean, VA, USA; 19-21 Oct. 1992",
corpsource = "Oak Ridge Nat. Lab., TN, USA",
keywords = "algorithm; computations; computing; distributed;
distributed memory systems; distributed memory version;
evaluation; Intel Delta multicomputer; Level 3 BLAS;
library routines; linear algebra; mathematics; matrix;
memory concurrent computers; object-oriented interface;
object-oriented programming; performance; right-looking
LU factorization; scalable linear algebra library;
ScaLAPACK; software package; software packages; square
block scattered decomposition",
sponsororg = "IEEE; NASA",
treatment = "A Application; P Practical",
}
@Article{Croz:1992:SMM,
author = "Jeremy J. Du Croz and Nicholas J. Higham",
title = "Stability of Methods for Matrix Inversion",
journal = j-IMA-J-NUMER-ANAL,
volume = "12",
pages = "1--19",
year = "1992",
CODEN = "IJNADH",
ISSN = "0272-4979 (print), 1464-3642 (electronic)",
bibdate = "Sat Dec 23 14:54:28 2000",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/h/higham-nicholas-john.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{DuCroz:1990:SMM}.",
acknowledgement = ack-njh,
}
@InProceedings{Demmel:1992:DPH,
author = "J. Demmel and J. Dongarra and W. Kahan",
title = "On Designing Portable High Performance Numerical
Libraries",
crossref = "Griffiths:1992:NAP",
pages = "??--??",
month = jun,
year = "1991",
bibdate = "Tue Feb 26 10:10:44 2002",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Parallel/par.lin.alg.bib;
http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Demmel:1991:DPH}.",
acknowledgement = ack-nhfb,
}
@Article{Demmel:1992:JMM,
author = "James Demmel and Kre{\v{s}}imir Veseli{\'c}",
title = "{Jacobi}'s Method is More Accurate than {$ Q R $}",
journal = j-SIAM-J-MAT-ANA-APPL,
volume = "13",
number = "4",
pages = "1204--1245",
month = oct,
year = "1992",
CODEN = "SJMAEL",
ISSN = "0895-4798 (print), 1095-7162 (electronic)",
MRclass = "65F15 (65G05)",
MRnumber = "93e:65057",
bibdate = "Tue Jan 21 08:54:30 MST 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Demmel:1989:JMM}.",
acknowledgement = ack-nhfb,
}
@Article{Demmel:1992:SBA,
author = "James W. Demmel and Nicholas J. Higham",
title = "Stability of Block Algorithms with Fast Level-3
{BLAS}",
journal = j-TOMS,
volume = "18",
number = "3",
pages = "274--291",
month = sep,
year = "1992",
CODEN = "ACMSCU",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
bibdate = "Fri Sep 30 01:27:16 1994",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/duff-iain-s.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See
\cite{Dongarra:1990:ASL,Higham:1990:EFM,Dayde:1994:PBI}.
See original LAPACK Working note in
\cite{Demmel:1990:SBA}.",
URL = "http://www.acm.org/pubs/toc/Abstracts/0098-3500/131769.html",
abstract = "Block algorithms are becoming increasingly popular in
matrix computations. Since their basic unit of data is
a submatrix rather than a scalar, they have a higher
level of granularity than point algorithms, and this
makes them well suited to high-performance computers.
The numerical stability of the block algorithms in the
new linear algebra program library LAPACK is
investigated here. It is shown that these algorithms
have backward error analyses in which the backward
error bounds are commensurate with the error bounds for
the underlying level-3 BLAS (BLAS3). One implication is
that the block algorithms are as stable as the
corresponding point algorithms when conventional BLAS3
are used. A second implication is that the use of BLAS3
based on fast matrix multiplication techniques affects
the stability only insofar as it increases the constant
terms in the normwise backward error bounds. For linear
equation solvers employing {\em LU} factorization, it
is shown that fixed precision iterative refinement
helps to mitigate the effect of the larger error
constants. Despite the positive results presented here,
not all plausible block algorithms are stable; we
illustrate this with the example of {\em LU}
factorization with block triangular factors and
describe how to check a block algorithm for stability
without doing a full error analysis.",
acknowledgement = ack-nhfb,
keywords = "algorithms; performance",
subject = "{\bf G.1.3}: Mathematics of Computing, NUMERICAL
ANALYSIS, Numerical Linear Algebra. {\bf F.2.1}: Theory
of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM
COMPLEXITY, Numerical Algorithms and Problems,
Computations on matrices.",
}
@InProceedings{Dongarra:1992:LASb,
author = "J. Dongarra and R. {van de Geijn} and D. Walker",
title = "A look at scalable dense linear algebra libraries",
crossref = "IEEE:1992:SHP",
pages = "??--??",
year = "1992",
bibdate = "Sat Mar 22 15:39:54 MST 1997",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Parallel/par.lin.alg.bib;
http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "IEEE catalog number 92TH0432-5. See original LAPACK
Working note in \cite{Anderson:1992:PLP}.",
acknowledgement = ack-nhfb,
classcodes = "C4140 (Linear algebra); C7310 (Mathematics); C6110P
(Parallel programming)",
conflocation = "Williamsburg, VA, USA; 26-29 April 1992",
corpsource = "Dept. of Comput. Sci., Tennessee Univ., TN, USA",
keywords = "14 GFLOPS; applications; concurrent computers; Delta
system; dense matrix problems; distributed memory;
double precision; Intel Touchstone; linear algebra; LU
factorization; mathematics computing; object-oriented;
object-oriented interface; parallel implementation;
parallel programming; portable; programming; scalable
dense linear algebra libraries; software portability;
square block scattered decomposition; subroutines; user
interfaces",
sponsororg = "IEEE",
treatment = "P Practical",
}
@Article{Dongarra:1992:NCC,
author = "Jack J. Dongarra and Sven Hammarling and James H.
Wilkinson",
title = "Numerical Considerations in Computing Invariant
Subspaces",
journal = j-SIAM-J-MAT-ANA-APPL,
volume = "13",
number = "1",
pages = "145--161",
month = jan,
year = "1992",
CODEN = "SJMAEL",
ISSN = "0895-4798 (print), 1095-7162 (electronic)",
MRclass = "65F15",
MRnumber = "93a:65049",
MRreviewer = "Colette Lebaud",
bibdate = "Tue Jan 21 08:54:30 MST 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Dongarra:1990:NCC}.",
acknowledgement = ack-nhfb,
}
@Article{Dongarra:1992:RCFb,
author = "Jack J. Dongarra and Robert A. {van de Geijn}",
title = "Reduction to condensed form for the eigenvalue problem
on distributed memory architectures",
journal = j-PARALLEL-COMPUTING,
volume = "18",
number = "9",
pages = "973--982",
month = sep,
year = "1992",
CODEN = "PACOEJ",
ISSN = "0167-8191",
MRclass = "65Y05 (65F15)",
MRnumber = "1 190 458",
bibdate = "Thu Sep 16 09:30:12 1999",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Dongarra:1991:RCF}.",
URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/Reduction-to-Condensed-Form-for-the-Eigenvalue-Problem-on-Distributed-Memory.pdf",
abstract = "The authors describe a parallel implementation for the
reduction of general and symmetric matrices to
Hessenberg and tridiagonal form, respectively. The
methods are based on LAPACK sequential codes and use a
panel-wrapped mapping of matrices to nodes. Results
from experiments on the Intel Touchstone Delta are
given.",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Tennessee Univ., Knoxville, TN,
USA",
classcodes = "C7310 (Mathematics); C5220P (Parallel architecture);
C4140 (Linear algebra)",
classification = "C4140 (Linear algebra); C5220P (Parallel
architecture); C7310 (Mathematics)",
corpsource = "Dept. of Comput. Sci., Tennessee Univ., Knoxville, TN,
USA",
keywords = "architectures; distributed memory; Distributed memory
architectures; distributed memory systems; Eigenvalue
problem; eigenvalue problem; eigenvalues and
eigenfunctions; Hessenberg form; Intel Touchstone
Delta; LAPACK sequential codes; linear algebra;
mapping; mathematics computing; panel-wrapped;
Panel-wrapped mapping; parallel; Parallel
implementation; parallel implementation; Symmetric
matrices; symmetric matrices; Tridiagonal form;
tridiagonal form",
pubcountry = "Netherlands",
thesaurus = "Distributed memory systems; Eigenvalues and
eigenfunctions; Linear algebra; Mathematics computing;
Parallel architectures",
treatment = "P Practical",
}
@Article{Anderson:1993:PLP,
author = "E. C. Anderson and J. Dongarra",
title = "Performance of {LAPACK}: a portable library of
numerical linear algebra routines",
journal = j-PROC-IEEE,
volume = "81",
number = "8",
pages = "1094--1102",
month = aug,
year = "1993",
CODEN = "IEEPAD",
ISSN = "0018-9219",
bibdate = "Sat Mar 22 15:39:54 MST 1997",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Anderson:1992:PLP}.",
URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/Performance-of-LAPACK-A-Portable-Library.pdf",
acknowledgement = ack-nhfb,
classcodes = "C7310 (Mathematics); C4140 (Linear algebra); C5440
(Multiprocessor systems and techniques); C6150G
(Diagnostic, testing, debugging and evaluating
systems)",
corpsource = "Cray Res. Center, Eagan, MN, USA",
keywords = "algebra routines; computers; evaluation; LAPACK
project; library; linear algebra; mathematics
computing; numerical linear; numerical linear algebra;
parallel; parallel processors; performance; performance
tuning; portability; portable library; program testing;
shared memory systems; shared-memory vector; software",
treatment = "P Practical",
}
@Article{Bai:1993:CGS,
author = "Zhao Jun Bai and James W. Demmel",
title = "Computing the generalized singular value
decomposition",
journal = j-SIAM-J-SCI-COMP,
volume = "14",
number = "6",
pages = "1464--1486",
month = nov,
year = "1993",
CODEN = "SJOCE3",
ISSN = "1064-8275 (print), 1095-7197 (electronic)",
ISSN-L = "1064-8275",
MRclass = "65F30",
MRnumber = "94h:65043",
bibdate = "Tue Apr 29 18:15:07 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Bai:1992:CGS}.",
acknowledgement = ack-nhfb,
}
@Article{Bai:1993:SDB,
author = "Zhaojun Bai and James W. Demmel",
title = "On Swapping Diagonal Blocks in Real {Schur} Form",
journal = j-LINEAR-ALGEBRA-APPL,
volume = "186",
pages = "73--95",
year = "1993",
CODEN = "LAAPAW",
ISSN = "0024-3795 (print), 1873-1856 (electronic)",
MRclass = "15A18",
MRnumber = "94d:15006",
bibdate = "Wed Jan 22 17:57:24 MST 1997",
bibsource = "/usr/local/src/bib/bibliography/Parallel/par.lin.alg.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Bai:1992:SDB}.",
acknowledgement = ack-nhfb,
}
@Article{Demmel:1993:CAS,
author = "James W. Demmel and William Gragg",
title = "On Computing Accurate Singular Values and Eigenvalues
of Matrices With Acyclic Graphs",
journal = j-LINEAR-ALGEBRA-APPL,
volume = "185",
pages = "203--217",
month = may,
year = "1993",
CODEN = "LAAPAW",
ISSN = "0024-3795 (print), 1873-1856 (electronic)",
MRclass = "65F30 (15A18)",
MRnumber = "94h:65044",
bibdate = "Wed Jan 22 17:57:24 MST 1997",
bibsource = "/usr/local/src/bib/bibliography/Parallel/par.lin.alg.bib;
/usr/local/src/bib/bibliography/Theory/Matrix.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Demmel:1992:CAS}.",
acknowledgement = ack-nhfb,
keywords = "nla, la, pert, svd, eig, arrowhead matrix, acyclic
graph",
}
@Article{Demmel:1993:IEB,
author = "James W. Demmel and Nicholas J. Higham",
title = "Improved Error Bounds for Underdetermined System
Solvers",
journal = j-SIAM-J-MAT-ANA-APPL,
volume = "14",
number = "1",
pages = "1--14",
month = jan,
year = "1993",
CODEN = "SJMAEL",
ISSN = "0895-4798 (print), 1095-7162 (electronic)",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/h/higham-nicholas-john.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Demmel:1990:IEB}.",
acknowledgement = ack-njh,
mynote = "Also LAPACK Working Note \#23.",
}
@InCollection{Demmel:1993:PNLb,
author = "J. Demmel and M. Heath and H. {van der Vorst}",
booktitle = "Acta Numerica 1993",
title = "Parallel Numerical Linear Algebra",
publisher = pub-CAMBRIDGE,
address = pub-CAMBRIDGE:adr,
pages = "111--198",
year = "1993",
bibdate = "Thu Jun 8 12:55:05 MDT 1995",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/v/vandervorst-henk-a.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Demmel:1993:PNLa}.",
}
@InProceedings{Demmel:1993:TPN,
author = "James W. Demmel",
editor = "Marc S. Moonen and Gene H. Golub and Bart L. De Moor",
booktitle = "Linear Algebra for Large Scale and Real-Time
Applications",
title = "Trading Off Parallelism and Numerical Stability",
volume = "232",
publisher = pub-KLUWER,
address = pub-KLUWER:adr,
pages = "49--68",
year = "1993",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Demmel:1992:TPN}.",
series = "NATO ASI Series E",
}
@InProceedings{Dongarra:1993:TDB,
author = "J. J. Dongarra and R. A. {Van de Geijn} and R. {Clint
Whaley}",
title = "Two Dimensional Basic Linear Algebra Communication
Subprograms",
crossref = "Sincovec:1993:SCP",
pages = "347--352",
year = "1993",
bibdate = "Fri Mar 1 10:04:10 MST 1996",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Dongarra:1991:TDB}.",
acknowledgement = ack-nhfb,
}
@Article{Higham:1993:PTB,
author = "Nicholas J. Higham",
title = "Perturbation theory and backward error analysis for {$
A X - X B = C $}",
journal = j-BIT,
volume = "33",
number = "1",
pages = "124--136",
year = "1993",
CODEN = "BITTEL, NBITAB",
ISSN = "0006-3835 (print), 1572-9125 (electronic)",
MRclass = "65F05 (65G05)",
MRnumber = "96a:65036",
bibdate = "Fri Nov 13 07:00:34 MST 1998",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/h/higham-nicholas-john.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Higham:1992:PTB}.",
URL = "http://www.mai.liu.se/BIT/contents/bit33.html",
acknowledgement = ack-njh # " and " # ack-nhfb,
}
@InProceedings{Choi:1994:DPD,
author = "J. Choi and J. J. Dongarra and D. W. Walker",
title = "The design of a parallel, dense linear algebra
software library: reduction to {Hessenberg},
tridiagonal, and bidiagonal form",
crossref = "Dongarra:1994:PSW",
pages = "98--111",
year = "1994",
bibdate = "Sat Mar 22 15:39:54 MST 1997",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Choi:1995:DPDa} and \cite{Choi:1995:DPDb}.",
acknowledgement = ack-nhfb,
classcodes = "C7310 (Mathematics computing); C6110B (Software
engineering techniques); C5440 (Multiprocessing
systems); C4140 (Linear algebra); C6110P (Parallel
programming)",
conflocation = "Townsend, TN, USA; 25-27 May 1994",
conftitle = "Proceedings of the Second Workshop on Environments and
Tools for Parallel Scientific Computing",
corpsource = "Dept. of Comput. Sci., Tennessee Univ., Knoxville, TN,
USA",
keywords = "algebra; Basic Linear Algebra Communication
Subprograms; bidiagonal form; distributed Level 3 BLAS
routines; distributed memory concurrent computers;
distributed memory systems; Hessenberg; higher level;
library routines; mathematics computing; matrix; panel
reduction phase; Parallel Block BLAS; parallel dense
linear algebra software library; parallel programming;
PB-BLAS; reduction algorithms; ScaLAPACK; sequential
BLAS; software engineering considerations; software
libraries; tridiagonal",
treatment = "P Practical",
}
@InProceedings{Choi:1994:PMT,
author = "Jaeyoung Choi and J. J. Dongarra and D. W. Walker",
title = "Parallel matrix transpose algorithms on distributed
memory concurrent computers",
crossref = "IEEE:1994:PSP",
pages = "245--252",
year = "1994",
bibdate = "Sat Mar 22 15:39:54 MST 1997",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Choi:1993:PMT}.",
acknowledgement = ack-nhfb,
classcodes = "C7310 (Mathematics); C4240P (Parallel programming and
algorithm theory); C4140 (Linear algebra); C5440
(Multiprocessor systems and techniques)",
conflocation = "Mississippi State, MS, USA; 6-8 Oct. 1993",
conftitle = "Proceedings of Scalable Parallel Libraries
Conference",
corpsource = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA",
keywords = "algebra; block scattered data distribution; computer;
concurrent computers; distributed memory; distributed
memory systems; Intel Touchstone Delta; mathematics
computing; matrix; matrix multiplication routine;
parallel algorithms; parallel matrix transpose
algorithms; point-to-point communication; PUMMA
package; synchronisation; transposed matrices",
sponsororg = "Mississippi State Univ.; Nat. Sci. Found",
treatment = "A Application; P Practical",
}
@Article{Choi:1994:PPU,
author = "Jaeyoung Choi and Jack J. Dongarra and David W.
Walker",
title = "{PUMMA}: {Parallel Universal Matrix Multiplication
Algorithms} on distributed memory concurrent
computers",
journal = j-CPE,
volume = "6",
number = "7",
pages = "543--570",
month = oct,
year = "1994",
CODEN = "CPEXEI",
ISSN = "1040-3108",
bibdate = "Tue Feb 26 09:30:21 2002",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Choi:1993:PPU}",
URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/PUMMA-Parallel-Universal-Matrix-Multiplication-Algorithms.pdf",
abstract = "The paper describes Parallel Universal Matrix
Multiplication Algorithms (PUMMA) on distributed memory
concurrent computers. The PUMMA package includes not
only the non-transposed matrix multiplication routine
{$ C = A \dot B $}, but also transposed multiplication
routines {$ C = A^T \DOT B $}, {$ C = A \dot B^T $},
and {$ C = A^T \dot B^T $}, for a block cyclic data
distribution. The routines perform efficiently for a
wide range of processor configurations and block sizes.
The PUMMA together provide the same functionality as
the Level 3 BLAS routine xGEMM. Details of the parallel
implementation of the routines are given, and results
are presented for runs on the Intel Touchstone Delta
computer.",
acknowledgement = ack-nhfb,
affiliation = "Oak Ridge Natl Lab",
affiliationaddress = "Oak Ridge, TN, USA",
classcodes = "C7310 (Mathematics); C5440 (Multiprocessor systems and
techniques); C4240P (Parallel programming and algorithm
theory); C4140 (Linear algebra)",
classification = "722.4; 723.1; 921.1",
corpsource = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA",
journalabr = "Concurrency Pract Exper",
keywords = "algebra; Algorithms; block cyclic data distribution;
block sizes; Computer architecture; configurations;
Distributed memory concurrent computers; distributed
memory concurrent computers; distributed memory
systems; Intel Touchstone Delta Computer; level 3 BLAS
routine xGEMM; Mathematical operators; mathematics
computing; matrix; Matrix algebra; matrix
multiplication routine; Multiprogramming;
nontransposed; parallel algorithms; Parallel processing
systems; Parallel Universal Matrix Multiplication
Algorithm (PUMMA); parallel universal matrix
multiplication algorithms; processor; PUMMA; routines;
transposed multiplication",
treatment = "A Application; P Practical",
}
@Article{Dayde:1994:PBI,
author = "Michael J. Dayd{\'e} and Iain S. Duff and Antoine
Petitet",
title = "A Parallel Block Implementation of Level-3 {BLAS} for
{MIMD} Vector Processors",
journal = j-TOMS,
volume = "20",
number = "2",
pages = "178--193",
month = jun,
year = "1994",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/178365.174413",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
bibdate = "Fri Sep 09 13:52:29 1994",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See
\cite{Dongarra:1990:ASL,Higham:1990:EFM,Demmel:1992:SBA}.",
URL = "http://www.acm.org/pubs/citations/journals/toms/1994-20-2/p178-dayde/",
acknowledgement = ack-nhfb,
keywords = "algorithms; Level-3 BLAS; matrix-matrix kernels;
measurement; parallelization; performance;
vectorization",
subject = "F.2.1 [Analysis of Algorithms and Problem Complexity]:
Numerical Algorithms and Problems--computations on
matrices; G.1.0 [Numerical Analysis]:
General--numerical algorithms; G.1.3 [Numerical
Analysis]: Numerical Linear Algebra--linear systems
(direct and iterative methods); G.4 [Mathematics of
Computing]: Mathematical Software--certification and
testing; efficiency; portability; reliability and
robustness; verification",
}
@Article{Demmel:1994:FNA,
author = "James W. Demmel and Xiaoye Li",
title = "Faster Numerical Algorithms via Exception Handling",
journal = j-IEEE-TRANS-COMPUT,
volume = "43",
number = "8",
pages = "983--992",
month = aug,
year = "1994",
CODEN = "ITCOB4",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Mon May 20 06:16:49 MDT 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib; OCLC
Proceedings database",
note = "This is an expanded version of
\cite{Demmel:1993:FNA}.",
URL = "http://www.cs.berkeley.edu/~xiaoye/ieee.ps.gz",
acknowledgement = ack-nhfb,
remark = "Selected revised and extended papers from ARITH'11
\cite{Swartzlander:1993:PSC}.",
}
@InProceedings{Dongarra:1994:SMLb,
author = "J. Dongarra and A. Lumsdaine and X. Niu and R. Pozo
and K. Remington",
title = "A Sparse Matrix Library in {C++} For High Performance
Architectures",
crossref = "Anonymous:1994:OON",
pages = "214--218",
year = "1994",
bibdate = "Thu Sep 16 09:48:36 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Dongarra:1994:SMLa}.",
URL = "http://www.netlib.org/netlib/lapack/lawns/lawn74.ps;
http://www.netlib.org/netlib/lapack/lawnspdf/lawn74.pdf",
acknowledgement = ack-nhfb,
}
@Article{vandeGeijn:1994:GCO,
author = "R. A. {van de Geijn}",
title = "On Global Combine Operations",
journal = j-J-PAR-DIST-COMP,
volume = "22",
number = "2",
pages = "324--328",
month = aug,
year = "1994",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1006/jpdc.1994.1091",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
bibdate = "Thu Mar 9 09:18:55 MST 2000",
bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{vandeGeijn:1991:GCO}.",
URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.1994.1091/production;
http://www.idealibrary.com/links/doi/10.1006/jpdc.1994.1091/production/pdf",
acknowledgement = ack-nhfb,
classification = "C4230M (Multiprocessor interconnection); C4240P
(Parallel programming and algorithm theory); C5220P
(Parallel architecture); C5440 (Multiprocessor systems
and techniques)",
corpsource = "Dept. of Comput. Sci., Texas Univ., Austin, TX, USA",
keywords = "algorithms; distributed memory multiple instruction
multiple data; distributed memory systems; global
combine operations; hybrid strategy; hypercube
networks; Intel iPSC/860; multicomputers; parallel",
treatment = "P Practical",
}
@Article{Bai:1995:TLAb,
author = "Z. Bai and D. Day and J. Demmel and J. Dongarra",
title = "Templates for Linear Algebra Problems",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1000",
pages = "115--??",
year = "1995",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
bibdate = "Sat May 11 13:45:32 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Bai:1995:TLAa}.",
URL = "http://www.netlib.org/utk/papers/etemplates.ps;
http://www.netlib.org/utk/papers/etemplates/paper.html",
acknowledgement = ack-nhfb,
}
@Article{Choi:1995:DPDb,
author = "Jaeyoung Choi and Jack J. Dongarra and David W.
Walker",
title = "The design of a parallel dense linear algebra software
library: reduction to {Hessenberg}, tridiagonal, and
bidiagonal form",
journal = j-NUMER-ALGORITHMS,
volume = "10",
number = "3--4",
pages = "379--399",
month = oct,
year = "1995",
CODEN = "NUALEG",
ISSN = "1017-1398 (print), 1572-9265 (electronic)",
MRclass = "65-04 (65Y10)",
MRnumber = "1 355 739",
bibdate = "Sat Mar 22 15:39:54 MST 1997",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Choi:1995:DPDa,Choi:1994:DPD}.",
acknowledgement = ack-nhfb,
classcodes = "B0290H (Linear algebra); C7310 (Mathematics
computing); C4140 (Linear algebra); C6110B (Software
engineering techniques); C6115 (Programming support)",
corpsource = "Dept. of Comput. Sci., Tennessee Univ., Knoxville, TN,
USA",
keywords = "Basic Linear Algebra Communication; bidiagonal; BLACS;
computers; computing; dense; distributed memory
concurrent; eigenproblems; eigenvalues and
eigenfunctions; form; Hessenberg form; LAPACK; linear
algebra; linear algebra computations; mathematics;
matrices; matrix reduction algorithms; parallel BLAS;
parallel dense linear algebra software library;
routine; ScaLAPACK; sequencing BLAS; software
engineering; software libraries; Subprograms;
tridiagonal form",
treatment = "A Application; P Practical",
}
@Article{Demmel:1995:CSB,
author = "James W. Demmel and Inderjit Dhillon and Huan Ren",
title = "On the Correctness of Some Bisection-Like Parallel
Eigenvalue Algorithms in Floating Point Arithmetic",
journal = j-ETNA,
volume = "3",
pages = "116--149",
year = "1995",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Demmel:1994:CPB}.",
}
@Article{Heath:1995:CPN,
author = "Michael T. Heath and Padma Raghavan",
title = "A {Cartesian} Parallel Nested Dissection Algorithm",
journal = j-SIAM-J-MAT-ANA-APPL,
volume = "16",
number = "1",
pages = "235--253",
month = jan,
year = "1995",
CODEN = "SJMAEL",
ISSN = "0895-4798 (print), 1095-7162 (electronic)",
MRclass = "65F05 (65F50 65Y05)",
MRnumber = "95m:65046",
MRreviewer = "Ming Kui Chen",
bibdate = "Fri Dec 4 12:14:09 MST 1998",
bibsource = "http://epubs.siam.org/sam-bin/dbq/toc/SIMAX/16/1;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Heath:1992:CPN}.",
URL = "http://epubs.siam.org/sam-bin/dbq/article/23827",
acknowledgement = ack-nhfb,
}
@InProceedings{Plank:1995:ADC,
author = "James S. Plank and Youngbae Kim and Jack J. Dongarra",
title = "Algorithm-Based Diskless Checkpointing for
Fault-Tolerant Matrix Operations",
crossref = "IEEE:1995:DPT",
pages = "351--360",
year = "1995",
bibdate = "Mon Aug 26 07:58:57 1996",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Plank:1994:ABD}.",
URL = "http://www.cs.utk.edu/~plank/plank/papers/FTCS25.1995.html;
http://www.netlib.org/utk/papers/fault.ps;
http://www.netlib.org/utk/people/JackDongarra/pdf/fault.pdf",
abstract = "This paper is an exploration of diskless
check-pointing for distributed scientific computations.
With the widespread use of the `Network Of Workstation'
(NOW) platform for distributed computing, long-running
scientific computations need to tolerate the changing
and often faulty nature of NOW environments. We present
high-performance implementations of several algorithms
for distributed scientific computing, including
Cholesky factorization, LU factorization, QR
factorization, and Preconditioned Conjugate Gradient.
These implementations are able to run on PVM networks
of at least N processors, and can complete with low
overhead as long as any N processors remain functional.
We discuss the details of how the algorithms are tuned
for fault-tolerance, and present the performance
results on a PVM network of SUN workstations, and on
the IBM SP2.",
acknowledgement = ack-nhfb,
affiliation = "Univ of Tennessee",
affiliationaddress = "TN, USA",
classcodes = "C6150N (Distributed systems software); C6110B
(Software engineering techniques); C4140 (Linear
algebra); C7300 (Natural sciences computing); C4130
(Interpolation and function approximation)",
classification = "722.2; 722.4; 723.1",
conference = "Proceedings of the 25th International Symposium on
Fault-Tolerant Computing",
conflocation = "Pasadena, CA, USA; 27-30 June 1995",
conftitle = "Twenty-Fifth International Symposium on Fault-Tolerant
Computing. Digest of Papers",
corpsource = "Dept. of Comput. Sci., Tennessee Univ., TN, USA",
journalabr = "Dig Pap Int Symp Fault Tolerant Comput",
keywords = "algebra; Algorithm based diskless checkpointing;
algorithm-based diskless checkpointing; Algorithms;
Cholesky; Cholesky factorization; computations;
Computer networks; Computer workstations; conjugate
gradient methods; Distributed computer systems;
Distributed scientific computations; distributed
scientific computations; factorization; fault tolerant;
Fault tolerant computer systems; Fault tolerant matrix
operations; fault-tolerance; high-performance
implementations; IBM SP2; local area networks;
long-running scientific; low overhead; LU
factorization; matrix; matrix operations; natural
sciences computing; Network of workstation (NOW)
platform; Parallel processing systems; performance;
preconditioned conjugate gradient; Preconditioned
conjugate gradient; processors; PVM networks; QR
factorization; software fault; subroutines; SUN;
tolerance; workstation network platform; workstations",
meetingaddress = "Pasadena, CA, USA",
meetingdate = "Jun 27--30 1995",
meetingdate2 = "06/27--30/95",
sponsor = "IEEE",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Fault-Tolerant
Comput.; LAAS-CNRS, France; Univ. Illinois at
Urbana-Champaign; Univ. California at Los Angeles; Jep
Propulsion Lab.; IFIP WG 10.4",
treatment = "T Theoretical or Mathematical",
}
@Article{Raghavan:1995:DSG,
author = "Padma Raghavan",
title = "Distributed sparse {Gaussian} elimination and
orthogonal factorization",
journal = j-SIAM-J-SCI-COMP,
volume = "16",
number = "6",
pages = "1462--1477",
month = nov,
year = "1995",
CODEN = "SJOCE3",
ISSN = "1064-8275 (print), 1095-7197 (electronic)",
ISSN-L = "1064-8275",
MRclass = "65F50 (65F05 65F20)",
MRnumber = "96g:65046",
MRreviewer = "Zahari Zlatev",
bibdate = "Tue Apr 29 18:25:50 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Raghavan:1993:DSG}.",
acknowledgement = ack-nhfb,
}
@Article{Barrett:1996:ABI,
author = "R. Barrett and M. Berry and J. Dongarra and V.
Eijkhout and Romine and C.",
title = "Algorithmic bombardment for the iterative solution of
linear systems: a poly-iterative approach",
journal = j-J-COMPUT-APPL-MATH,
volume = "74",
number = "1--2",
pages = "91--109",
day = "5",
month = "????",
year = "1996",
CODEN = "JCAMDI",
ISSN = "0377-0427 (print), 1879-1778 (electronic)",
MRclass = "65F10 (65N22 65Y05)",
MRnumber = "97j:65052",
bibdate = "Sat Mar 22 15:39:54 MST 1997",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Barrett:1994:ABI}.",
URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/bombard.ps;
http://www.netlib.org/utk/people/JackDongarra/pdf/bombard.pdf",
acknowledgement = ack-nhfb,
classcodes = "B0290H (Linear algebra); B0290F (Interpolation and
function approximation); C4140 (Linear algebra); C4130
(Interpolation and function approximation); C4240P
(Parallel programming and algorithm theory)",
conflocation = "Austin, TX, USA; April 1995",
conftitle = "TICAM Symposium. Texas Institute for Computational and
Applied Mathematics",
corpsource = "Distributed Comput. Group, Los Alamos Nat. Lab., NM,
USA",
keywords = "algorithmic bombardment; convergence; cost;
environment; global communications; indefinite;
iterative methods; iterative solution; linear systems;
matrix; matrix algebra; matrix properties; nonsymmetric
matrix; parallel algorithms; parallel environment;
poly-iterative approach; sequential computing",
treatment = "T Theoretical or Mathematical",
}
@InProceedings{Blackford:1996:PEDb,
author = "L. S. Blackford and A. Cleary and J. Demmel and I.
Dhillon and J. Dongarra and S. Hammarling and A.
Petitet and H. Ren and K. Stanley and R. C. Whaley",
title = "Practical experience in the dangers of heterogeneous
computing",
crossref = "Wasniewski:1996:APC",
pages = "57--64",
year = "1996",
bibdate = "Tue Feb 26 08:49:09 2002",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Blackford:1996:PEDa}.",
URL = "http://www.netlib.org/utk/papers/practical-hetro/paper.html;
http://www.netlib.org/utk/papers/practical-hetro/paper.ps;
http://www.netlib.org/utk/people/JackDongarra/pdf/prac-het.pdf",
acknowledgement = ack-nhfb,
classcodes = "C7310 (Mathematics computing); C6110B (Software
engineering techniques); C6110P (Parallel programming);
C6150N (Distributed systems software); C6115
(Programming support)",
conflocation = "Lyngby, Denmark; 18-21 Aug. 1996",
conftitle = "Applied Parallel Computing. Industrial Computation and
Optimization. Third International Workshop, PARA'96.
Proceedings",
corpsource = "Tennessee Univ., Knoxville, TN, USA",
keywords = "distributed memory systems; floating point arithmetic;
heterogeneous computing; libraries; mathematics
computing; numerical library software; parallel
algorithms; ScaLAPACK; software; software portability;
software reliability; software robustness",
treatment = "A Application; G General Review",
}
@InProceedings{Blackford:1996:SPL,
author = "Laura Susan Blackford and J. Choi and A. Cleary and A.
Petitet and R. C. Whaley and J. Demmel and I. Dhillon
and K. Stanley and J. Dongarra and S. Hammarling and G.
Henry and D. Walker",
title = "{ScaLAPACK}: {A} Portable Linear Algebra Library for
Distributed Memory Computers --- Design Issues and
Performance",
crossref = "ACM:1996:SCP",
pages = "??--??",
year = "1996",
bibdate = "Mon Mar 23 12:31:18 1998",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Choi:1995:SPL}.",
URL = "http://www.netlib.org/utk/papers/sc96-scalapack/paper.html;
http://www.netlib.org/utk/papers/sc96-scalapack/paper.ps;
http://www.netlib.org/utk/people/JackDongarra/pdf/scala96.pdf;
http://www.supercomp.org/sc96/proceedings/SC96PROC/DONGARRA/INDEX.HTM",
acknowledgement = ack-nhfb,
}
@Article{Choi:1996:DIS,
author = "Jaeyoung Choi and J. J. Dongarra and L. S. Ostrouchov
and Petitet and A. P. and D. W. Walker and R. C.
Whaley",
title = "Design and implementation of the {ScaLAPACK LU}, {$ Q
R $}, and {Cholesky} factorization routines",
journal = j-SCI-PROG,
volume = "5",
number = "3",
pages = "173--184",
month = "Fall",
year = "1996",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
bibdate = "Sat Mar 22 15:39:54 MST 1997",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Choi:1994:DIS}.",
URL = "http://www.netlib.org/netlib/lapack/lawns/lawn80.ps;
http://www.netlib.org/netlib/lapack/lawnspdf/lawn80.pdf;
http://www.netlib.org/utk/papers/factor/ftcover.html",
acknowledgement = ack-nhfb,
classcodes = "C4140 (Linear algebra); C6110B (Software engineering
techniques); C6115 (Programming support); C5440
(Multiprocessing systems); C6150N (Distributed systems
software); C6110P (Parallel programming)",
corpsource = "Dept. of Comput. Sci., Tennessee Univ., Knoxville, TN,
USA",
keywords = "BLACS; BLAS; block; communication; cyclic data
distribution; de facto standard kernels; dense linear
equation system; distributed memory systems;
engineering; factorization routine; Intel; Intel
Touchstone Delta; Inter Paragon System; iPSC/860;
linear algebra; matrix; message passing; operations;
parallel implementations; parallel machines; parallel
programming; parallelized sequential LAPACK; PBLAS;
performance; performance evaluation; scalability;
ScaLAPACK Cholesky factorization routine; ScaLAPACK
library; ScaLAPACK LU factorization routine; ScaLAPACK
QR; software; software libraries; software packages;
vector operations",
treatment = "P Practical",
}
@InProceedings{Choi:1996:PSP,
author = "Jaeyoung Choi and J. Dongarra and S. Ostrouchov and A.
Petitet and D. Walker and R. C. Whaley",
title = "A proposal for a set of {Parallel Basic Linear Algebra
Subprograms}",
crossref = "Dongarra:1996:APC",
pages = "107--114",
year = "1996",
bibdate = "Sat Mar 22 15:39:54 MST 1997",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Choi:1995:PSP}.",
acknowledgement = ack-nhfb,
classcodes = "C7310 (Mathematics computing); C6110P (Parallel
programming); C4140 (Linear algebra)",
conflocation = "Lyngby, Denmark; 21-24 Aug. 1995",
conftitle = "Applied Parallel Computing. Computations in Physics,
Chemistry and Engineering Science",
corpsource = "Sch. of Comput., Soongsil Univ., Seoul, South Korea",
keywords = "basic linear algebra; distributed memory; linear
algebra; linear algebra subprograms; parallel; parallel
programming; PBLAS; software libraries",
treatment = "T Theoretical or Mathematical",
}
@InProceedings{Dongarra:1996:PFI,
author = "J. J. Dongarra and J. {Du Croz} and S. Hammarling and
J. Wa{\'s}niewski and A. Zemla",
title = "A proposal for a {Fortran 90} interface for {LAPACK}",
crossref = "Dongarra:1996:APC",
pages = "158--165",
year = "1996",
bibdate = "Sat Mar 22 15:39:54 MST 1997",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Dongarra:1995:PFI}.",
acknowledgement = ack-nhfb,
classcodes = "C7310 (Mathematics computing); C4140 (Linear algebra);
C6140D (High level languages); C6180 (User
interfaces)",
conflocation = "Lyngby, Denmark; 21-24 Aug. 1995",
conftitle = "Applied Parallel Computing. Computations in Physics,
Chemistry and Engineering Science",
corpsource = "Dept. of Comput. Sci., Tennessee Univ., Knoxville, TN,
USA",
keywords = "FORTRAN; Fortran 90 interface; LAPACK; LAPACK code;
linear algebra; mathematics computing; packages;
software; user interfaces; user-interface",
treatment = "P Practical; T Theoretical or Mathematical",
}
@Article{Henry:1996:PAU,
author = "Greg Henry and Robert van de Geijn",
title = "Parallelizing the {$ Q R $} Algorithm for the
Unsymmetric Algebraic Eigenvalue Problem: Myths and
Reality",
journal = j-SIAM-J-SCI-COMP,
volume = "17",
number = "4",
pages = "870--883",
month = jul,
year = "1996",
CODEN = "SJOCE3",
ISSN = "1064-8275 (print), 1095-7197 (electronic)",
ISSN-L = "1064-8275",
MRclass = "65F15 (15A18)",
MRnumber = "97b:65044",
bibdate = "Tue Apr 29 18:25:50 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Henry:1994:PQA}.",
acknowledgement = ack-nhfb,
}
@Article{Kaagstrom:1996:CES,
author = "Bo K{\aa}gstr{\"o}m and Peter Poromaa",
title = "Computing eigenspaces with specified eigenvalues of a
regular matrix pair {$ ({A}, {B}) $} and condition
estimation: theory, algorithms and software",
journal = j-NUMER-ALGORITHMS,
volume = "12",
number = "3--4",
pages = "369--407",
month = jul,
year = "1996",
CODEN = "NUALEG",
ISSN = "1017-1398 (print), 1572-9265 (electronic)",
MRclass = "65Fxx",
MRnumber = "1 402 856",
bibdate = "Tue Apr 29 08:56:05 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Kaagstrom:1994:CES}.",
acknowledgement = ack-nhfb,
classification = "B0290H (Linear algebra); C4140 (Linear algebra)",
corpsource = "Dept. of Comput. Sci., Umea Univ., Sweden",
keywords = "condition estimation; deflating sub-spaces;
eigenspaces; eigenvalues; eigenvalues and
eigenfunctions; error bounds; matrix algebra; numerical
stability; reciprocal values; regular matrix pair;
specified eigenvalues",
pubcountry = "Switzerland",
treatment = "T Theoretical or Mathematical",
}
@Article{Kaagstrom:1996:LSA,
author = "Bo K{\aa}gstr{\"o}m and Peter Poromaa",
title = "{LAPACK-style} algorithms and software for solving the
generalized {Sylvester} equation and estimating the
separation between regular matrix pairs",
journal = j-TOMS,
volume = "22",
number = "1",
pages = "78--103",
month = mar,
year = "1996",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/225545.225552",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
MRclass = "65-04 (65F30)",
MRnumber = "1 383 186",
bibdate = "Sat Aug 31 16:07:02 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Kaagstrom:1994:LSA}.",
URL = "http://www.acm.org/pubs/citations/journals/toms/1996-22-1/p78-kagstrom/",
abstract = "Robust and fast software to solve the generalized
Sylvester equation ({$ A R - L B = C, D R - L E = F $})
for unknowns {$R$} and {$L$} is presented. This special
linear system of equations, and its transpose, arises
in computing error bounds for computed eigenvalues and
eigenspaces of the generalized eigenvalue problem {$ S
- \lambda T $}, in computing deflating subspaces of the
same problem, and in computing certain decompositions
of transfer matrices arising in control theory. Our
contributions are twofold. First, we reorganize the
standard algorithm for this problem to use Level 3 BLAS
operations, like matrix multiplication, in its inner
loop. This speeds up the algorithm by a factor of 9 on
an IBM RS6000. Second, we develop and compare several
condition estimation algorithms, which inexpensively
but accurately estimate the sensitivity of the solution
of this linear system.",
acknowledgement = ack-nhfb,
keywords = "algorithms",
subject = "{\bf G.4}: Mathematics of Computing, MATHEMATICAL
SOFTWARE, Algorithm analysis. {\bf F.2.1}: Theory of
Computation, ANALYSIS OF ALGORITHMS AND PROBLEM
COMPLEXITY, Numerical Algorithms and Problems,
Computations on matrices. {\bf G.1.3}: Mathematics of
Computing, NUMERICAL ANALYSIS, Numerical Linear
Algebra, Linear systems (direct and iterative methods).
{\bf G.4}: Mathematics of Computing, MATHEMATICAL
SOFTWARE, Reliability and robustness. {\bf G.1.3}:
Mathematics of Computing, NUMERICAL ANALYSIS, Numerical
Linear Algebra, Conditioning. {\bf G.1.3}: Mathematics
of Computing, NUMERICAL ANALYSIS, Numerical Linear
Algebra, Eigenvalues. {\bf G.4}: Mathematics of
Computing, MATHEMATICAL SOFTWARE, Efficiency. {\bf
F.2.1}: Theory of Computation, ANALYSIS OF ALGORITHMS
AND PROBLEM COMPLEXITY, Numerical Algorithms and
Problems, Computations on matrices. {\bf G.1.3}:
Mathematics of Computing, NUMERICAL ANALYSIS, Numerical
Linear Algebra, Matrix inversion.",
}
@Article{Lehoucq:1996:CEU,
author = "R. B. Lehoucq",
title = "The Computation of Elementary Unitary Matrices",
journal = j-TOMS,
volume = "22",
number = "4",
pages = "393--400",
month = dec,
year = "1996",
CODEN = "ACMSCU",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Lehoucq:1995:CEU}.",
abstract = "The construction of elementary unitary matrices that
transform a complex vector to a multiple of $ e_1 $,
the first column of the identity matrix, is studied. We
present four variants and their software
implementation, including a discussion on the {LAPACK}
subroutine {CLARFG}. Comparisons are also given.",
accepted = "June 1996",
acknowledgement = ack-rfb,
keywords = "algorithms",
subject = "{\bf F.2}: Theory of Computation, ANALYSIS OF
ALGORITHMS AND PROBLEM COMPLEXITY, Numerical Algorithms
and Problems, Computations on matrices. {\bf G.1.3}:
Mathematics of Computing, NUMERICAL ANALYSIS, Numerical
Linear Algebra. {\bf G.4}: Mathematics of Computing,
MATHEMATICAL SOFTWARE, Algorithm analysis.",
}
@Article{Bai:1997:SDN,
author = "Z. Bai and J. Demmel and J. Dongarra and A. Petitet
and H. Robinson and K. Stanley",
title = "The Spectral Decomposition of Nonsymmetric Matrices on
Distributed Memory Parallel Computers",
journal = j-SIAM-J-SCI-COMP,
volume = "18",
number = "5",
pages = "1446--1461",
month = sep,
year = "1997",
CODEN = "SJOCE3",
ISSN = "1064-8275 (print), 1095-7197 (electronic)",
ISSN-L = "1064-8275",
MRclass = "65F05 (65F30 65Y05)",
MRnumber = "98d:65027",
bibdate = "Tue Feb 26 10:04:07 2002",
bibsource = "http://epubs.siam.org/sam-bin/dbq/toc/SISC/18/5;
http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib;
http://www.math.utah.edu/pub/tex/bib/siamjscicomput.bib",
note = "See original LAPACK Working note in
\cite{Bai:1995:SDN}.",
URL = "http://epubs.siam.org/sam-bin/dbq/article/28136;
http://www.netlib.org/utk/papers/sign/sign.html;
http://www.netlib.org/utk/papers/sign/sign.ps;
http://www.netlib.org/utk/people/JackDongarra/pdf/sign.pdf",
acknowledgement = ack-nhfb,
}
@Article{Blackford:1997:PEN,
author = "L. S. Blackford and A. Cleary and A. Petitet and R. C.
Whaley and J. Demmel and I. Dhillon and H. Ren and K.
Stanley and J. Dongarra and S. Hammarling",
title = "Practical Experience in the Numerical Dangers of
Heterogeneous Computing",
journal = j-TOMS,
volume = "23",
number = "2",
pages = "133--147",
month = jun,
year = "1997",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/264029.264030",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
bibdate = "Tue Feb 26 10:10:44 2002",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Blackford:1996:PEDa} and
\cite{Blackford:1996:PEDb}.",
URL = "http://www.acm.org/pubs/citations/journals/toms/1997-23-2/p133-blackford/",
abstract = "Special challenges exist in writing reliable numerical
library software for heterogeneous computing
environments. Although a lot of software for
distributed-memory parallel computers has been written,
porting this software to a network of workstations
requires careful consideration. The symptoms of
heterogeneous computing failures can range from
erroneous results without warning to deadlock. Some of
the problems are straightforward to solve, but for
others the solutions are not so obvious, or incur an
unacceptable overhead. Making software robust on
heterogeneous systems often requires additional
communication. We describe and illustrate the problems
encountered during the development of ScaLAPACK and the
NAG Numerical PVM Library. Where possible, we suggest
ways to avoid potential pitfalls, or if that is not
possible, we recommend that the software not be used on
heterogeneous networks.",
acknowledgement = ack-rfb # " and " # ack-kr,
keywords = "distributed-memory systems, floating-point arithmetic,
heterogeneous processor networks, message passing,
numerical software, reliability",
subject = "{\bf D.1.3} Software, PROGRAMMING TECHNIQUES,
Concurrent Programming, Distributed programming. {\bf
G.1.0} Mathematics of Computing, NUMERICAL ANALYSIS,
General, Computer arithmetic. {\bf G.1.0} Mathematics
of Computing, NUMERICAL ANALYSIS, General, Parallel
algorithms.",
}
@Article{Dongarra:1997:KCPb,
author = "Jack J. Dongarra and Sven Hammarling and David W.
Walker",
title = "Key concepts for parallel out-of-core {$ L U $}
factorization",
journal = j-PARALLEL-COMPUTING,
volume = "23",
number = "1--2",
pages = "49--70",
day = "16",
month = apr,
year = "1997",
CODEN = "PACOEJ",
ISSN = "0167-8191",
bibdate = "Tue Oct 21 15:14:48 MDT 1997",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Dongarra:1996:KCP}.",
acknowledgement = ack-nhfb,
affiliation = "Univ of Tennessee",
affiliationaddress = "Knoxville, TN, USA",
classification = "714.2; 722.1; 722.4; 723; 723.1; 921",
conference = "Proceedings of the 1996 International Workshop on
Environments and Tools for Parallel Scientific
Computing",
journalabr = "Parallel Comput",
keywords = "Algorithms; Computer architecture; Input output
programs; lu factorization; Microprocessor chips;
Parallel processing systems; Percolation (computer
storage); Storage allocation (computer)",
meetingaddress = "Faverges de la Tour, Fr",
meetingdate = "Aug 22--23 1996",
meetingdate2 = "08/22--23/96",
}
@Article{Higham:1997:IRL,
author = "Nicholas J. Higham",
title = "Iterative refinement for linear systems and {LAPACK}",
journal = j-IMA-J-NUMER-ANAL,
volume = "17",
number = "4",
pages = "495--509",
month = oct,
year = "1997",
CODEN = "IJNADH",
ISSN = "0272-4979 (print), 1464-3642 (electronic)",
MRclass = "65F30",
MRnumber = "98e:65036",
bibdate = "Sat Dec 23 17:06:35 MST 2000",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib;
http://www3.oup.co.uk/imanum/hdb/Volume_17/Issue_04/;
MathSciNet database",
note = "Preprint published as Numerical Analysis Report 277,
Manchester Centre for Computational Mathematics,
Manchester, England, and as LAPACK Working Note 104.
See original LAPACK Working note in
\cite{Higham:1995:IRL}.",
URL = "http://www3.oup.co.uk/imanum/hdb/Volume_17/Issue_04/170495.sgm.abs.html",
acknowledgement = ack-nhfb,
}
@Article{Higham:1997:SDP,
author = "Nicholas J. Higham",
title = "Stability of the Diagonal Pivoting Method with Partial
Pivoting",
journal = j-SIAM-J-MAT-ANA-APPL,
volume = "18",
number = "1",
pages = "52--65",
month = jan,
year = "1997",
CODEN = "SJMAEL",
ISSN = "0895-4798 (print), 1095-7162 (electronic)",
bibdate = "Sun Mar 2 11:16:54 GMT 1997",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/h/higham-nicholas-john.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Higham:1995:SDP}.",
acknowledgement = ack-njh,
}
@Article{Li:1997:RPB,
author = "Ren-Cang Li",
title = "Relative perturbation bounds for the unitary polar
factor",
journal = j-BIT-NUM-MATH,
volume = "37",
number = "1",
pages = "67--75",
month = mar,
year = "1997",
CODEN = "BITTEL, NBITAB",
ISSN = "0006-3835 (print), 1572-9125 (electronic)",
MRclass = "15A18 (15A23 65F35)",
MRnumber = "97k:15026",
MRreviewer = "Roy Mathias",
bibdate = "Fri Nov 13 07:00:34 MST 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Li:1994:RPB}.",
URL = "http://www.mai.liu.se/BIT/contents/bit37.html",
acknowledgement = ack-nhfb,
}
@Article{vandeGeijn:1997:SSU,
author = "R. A. van de Geijn and J. Watts",
title = "{SUMMA}: scalable universal matrix multiplication
algorithm",
journal = j-CPE,
volume = "9",
number = "4",
pages = "255--274",
month = apr,
year = "1997",
CODEN = "CPEXEI",
ISSN = "1040-3108",
bibdate = "Tue Sep 7 06:06:30 MDT 1999",
bibsource = "http://www.interscience.wiley.com/jpages/1040-3108/;
http://www.math.utah.edu/pub/tex/bib/lawn.bib;
http://www3.interscience.wiley.com/journalfinder.html",
note = "See original LAPACK Working note in
\cite{vandeGeijn:1995:SSU}.",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract?ID=13861;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=13861&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
}
@Article{Choi:1998:NPM,
author = "Jaeyoung Choi",
title = "A new parallel matrix multiplication algorithm on
distributed-memory concurrent computers",
journal = j-CPE,
volume = "10",
number = "8",
pages = "655--670",
month = jul,
year = "1998",
CODEN = "CPEXEI",
ISSN = "1040-3108",
bibdate = "Tue Sep 7 06:06:42 MDT 1999",
bibsource = "http://www.interscience.wiley.com/jpages/1040-3108/;
http://www.math.utah.edu/pub/tex/bib/lawn.bib;
http://www3.interscience.wiley.com/journalfinder.html",
note = "See original LAPACK Working note in
\cite{Choi:1997:NPM}.",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract?ID=10008698;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=10008698&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
}
@InProceedings{Desprez:1998:SBA,
author = "F. Desprez and J. Dongarra and A. Petitet and C.
Randriamaro",
title = "Scheduling Block-Cyclic Array Redistribution",
crossref = "DHollander:1998:PCF",
pages = "227--234",
year = "1998",
bibdate = "Thu Sep 16 09:48:36 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Desprez:1997:SBC} and \cite{Desprez:1998:SBC}.",
acknowledgement = ack-nhfb,
}
@Article{Desprez:1998:SBC,
author = "F. Desprez and J. Dongarra and A. Petitet and C.
Randriamaro and Y. Robert",
title = "Scheduling Block-Cyclic Array Redistribution",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "9",
number = "2",
pages = "192--??",
month = feb,
year = "1998",
CODEN = "ITDSEO",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
bibdate = "Fri Nov 6 12:31:15 MST 1998",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Desprez:1997:SBC}.",
URL = "http://dlib.computer.org/td/books/td1998/pdf/l0192.pdf;
http://www.computer.org/tpds/td1998/l0192abs.htm",
acknowledgement = ack-nhfb,
}
@Article{Dongarra:1998:HPL,
author = "J. Dongarra and J. Wa{\'s}niewski",
title = "High Performance Linear Algebra Package {LAPACK90}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1388",
pages = "387--391",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
bibdate = "Sat Oct 10 14:40:24 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib;
http://www.math.utah.edu/pub/tex/bib/lncs1998a.bib",
note = "See original LAPACK Working note in
\cite{Wasniewski:1998:HPL}.",
acknowledgement = ack-nhfb,
}
@Article{Kaagstrom:1998:GBL,
author = "Bo K{\aa}gstr{\"o}m and Per Ling and Charles {Van
Loan}",
title = "{GEMM-based} level 3 {BLAS}: high-performance model
implementations and performance evaluation benchmark",
journal = j-TOMS,
volume = "24",
number = "3",
pages = "268--302",
month = sep,
year = "1998",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/292395.292412",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
bibdate = "Mon Feb 8 17:51:43 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/toms/1998-24/;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Kaagstrom:1995:GBLa}.",
URL = "http://www.acm.org:80/pubs/citations/journals/toms/1998-24-3/p268-kagstrom/",
abstract = "The level 3 Basic Linear Algebra Subprograms (BLAS)
are designed to perform various matrix multiply and
triangular system solving computations. Due to the
complex hardware organization of advanced computer
architectures the development of optimal level 3 BLAS
code is costly and time consuming. However, it is
possible to develop a portable and high-performance
level 3 BLAS library mainly relying on a highly
optimized GEMM, the routine for the general matrix
multiply and add operation. With suitable partitioning,
all the other level 3 BLAS can be defined in terms of
GEMM and a small amount of level 1 and level 2
computations. Our contribution is twofold. First, the
model implementations in Fortran 77 of the GEMM-based
level 3 BLAS are structured to reduced effectively data
traffic in a memory hierarchy. Second, the GEMM-based
level 3 BLAS performance evaluation benchmark is a tool
for evaluating and comparing different implementations
of the level 3 BLAS with the GEMM-based model
implementations.",
acknowledgement = ack-nhfb,
keywords = "algorithms; measurement; performance",
subject = "{\bf G.1.3} Mathematics of Computing, NUMERICAL
ANALYSIS, Numerical Linear Algebra, Linear systems
(direct and iterative methods). {\bf D.3.2} Software,
PROGRAMMING LANGUAGES, Language Classifications,
FORTRAN 77. {\bf F.2.1} Theory of Computation, ANALYSIS
OF ALGORITHMS AND PROBLEM COMPLEXITY, Numerical
Algorithms and Problems, Computations on matrices. {\bf
G.4} Mathematics of Computing, MATHEMATICAL SOFTWARE,
Certification and testing. {\bf G.4} Mathematics of
Computing, MATHEMATICAL SOFTWARE, Efficiency. {\bf G.4}
Mathematics of Computing, MATHEMATICAL SOFTWARE,
Portability**. {\bf G.4} Mathematics of Computing,
MATHEMATICAL SOFTWARE, Reliability and robustness. {\bf
G.4} Mathematics of Computing, MATHEMATICAL SOFTWARE,
Verification**.",
}
@Article{Li:1998:RPT,
author = "Ren-Cang Li",
title = "Relative Perturbation Theory: {I}. Eigenvalue and
Singular Value Variations",
journal = j-SIAM-J-MAT-ANA-APPL,
volume = "19",
number = "4",
pages = "956--982",
month = oct,
year = "1998",
CODEN = "SJMAEL",
ISSN = "0895-4798 (print), 1095-7162 (electronic)",
bibdate = "Fri Dec 4 12:14:09 MST 1998",
bibsource = "http://epubs.siam.org/sam-bin/dbq/toc/SIMAX/19/4;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Li:1994:RPTa}.",
URL = "http://epubs.siam.org/sam-bin/dbq/article/29849",
acknowledgement = ack-nhfb,
}
@InProceedings{Whaley:1998:ATL,
author = "R. Clint Whaley and Jack J. Dongarra",
title = "{Automatically Tuned Linear Algebra Software}
({ATLAS})",
crossref = "ACM:1998:SHP",
year = "1998",
bibdate = "Wed Mar 06 06:37:44 2002",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "Best Paper Award for Systems. See original LAPACK
Working note in \cite{Whaley:1997:ATL}.",
URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/atlas-sc98.ps;
http://www.supercomp.org/sc98/TechPapers/sc98_FullAbstracts/Whaley814/INDEX.HTM",
acknowledgement = ack-nhfb,
}
@Article{Arbenz:1999:CPSc,
author = "P. Arbenz and A. Cleary and J. Dongarra and M.
Hegland",
title = "A Comparison of Parallel Solvers for Diagonally
Dominant and General Narrow-Banded Linear Systems",
journal = j-PARALLEL-DIST-COMP-PRACT,
volume = "2",
number = "4",
pages = "??--??",
month = "????",
year = "1999",
CODEN = "????",
ISSN = "1097-2803",
bibdate = "Fri Dec 19 08:14:14 MST 2003",
bibsource = "http://www.cs.okstate.edu/~pdcp/vols/vol02/vol02no4.html;
http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Arbenz:1999:CPSa}.",
URL = "http://www.cs.okstate.edu/~pdcp/vols/vol02/vol02no4abs.html#arbenz",
acknowledgement = ack-nhfb,
}
@Article{Demmel:1999:APS,
author = "James W. Demmel and John R. Gilbert and Xiaoye S. Li",
title = "An Asynchronous Parallel Supernodal Algorithm for
Sparse {Gaussian} Elimination",
journal = j-SIAM-J-MAT-ANA-APPL,
volume = "20",
number = "4",
pages = "915--952",
month = oct,
year = "1999",
CODEN = "SJMAEL",
ISSN = "0895-4798 (print), 1095-7162 (electronic)",
bibdate = "Sat Jan 22 14:39:14 MST 2000",
bibsource = "http://epubs.siam.org/sam-bin/dbq/toc/SIMAX/20/4;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Demmel:1997:APS}.",
URL = "http://epubs.siam.org/sam-bin/dbq/article/31768",
acknowledgement = ack-nhfb,
}
@Article{Demmel:1999:CSV,
author = "James Demmel and others",
title = "Computing the singular value decomposition with high
relative accuracy",
journal = j-LINEAR-ALGEBRA-APPL,
volume = "299",
number = "1--3",
pages = "21--80",
day = "15",
month = sep,
year = "1999",
CODEN = "LAAPAW",
ISSN = "0024-3795 (print), 1873-1856 (electronic)",
bibdate = "Wed Nov 01 08:18:32 2000",
bibsource = "http://www.elsevier.com/locate/laa;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Demmel:1997:CSV}.",
URL = "http://www.elsevier.nl/gej-ng/10/30/19/112/21/22/abstract.html;
http://www.elsevier.nl/gej-ng/10/30/19/112/21/22/article.pdf",
acknowledgement = ack-nhfb,
}
@Article{Demmel:1999:SAS,
author = "James W. Demmel and Stanley C. Eisenstat and John R.
Gilbert and Xiaoye S. Li and Joseph W. H. Liu",
title = "A Supernodal Approach to Sparse Partial Pivoting",
journal = j-SIAM-J-MAT-ANA-APPL,
volume = "20",
number = "3",
pages = "720--755",
month = jul,
year = "1999",
CODEN = "SJMAEL",
ISSN = "0895-4798 (print), 1095-7162 (electronic)",
bibdate = "Sat Jan 22 14:39:12 MST 2000",
bibsource = "http://epubs.siam.org/sam-bin/dbq/toc/SIMAX/20/3;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Demmel:1995:SAS}.",
URL = "http://epubs.siam.org/sam-bin/dbq/article/29176",
acknowledgement = ack-nhfb,
}
@Article{Li:1999:RPT,
author = "Ren-Cang Li",
title = "Relative Perturbation Theory: {II}. Eigenspace and
Singular Subspace Variations",
journal = j-SIAM-J-MAT-ANA-APPL,
volume = "20",
number = "2",
pages = "471--492",
month = apr,
year = "1999",
CODEN = "SJMAEL",
ISSN = "0895-4798 (print), 1095-7162 (electronic)",
bibdate = "Fri Dec 4 12:14:09 MST 1998",
bibsource = "http://epubs.siam.org/sam-bin/dbq/toc/SIMAX/20/2;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Li:1994:RPTb}.",
URL = "http://epubs.siam.org/sam-bin/dbq/article/29850",
acknowledgement = ack-nhfb,
}
@Article{Petitet:1999:ARM,
author = "A. P. Petitet and J. J. Dongarra",
title = "Algorithmic Redistribution Methods for Block-Cyclic
Decompositions",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "10",
number = "12",
pages = "201--??",
month = dec,
year = "1999",
CODEN = "ITDSEO",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
bibdate = "Thu Oct 12 18:48:32 MDT 2000",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Petitet:1997:ARM}.",
URL = "http://dlib.computer.org/td/books/td1999/pdf/l1201.pdf;
http://www.computer.org/tpds/td1999/l1201abs.htm;
http://www.netlib.org/utk/people/JackDongarra/PAPERS/alg-dist.ps;
http://www.netlib.org/utk/people/JackDongarra/pdf/alg-dist.pdf",
acknowledgement = ack-nhfb,
}
@InProceedings{Petitet:1999:NLA,
author = "A. Petitet and H. Casanova and R. Whaley and J.
Dongarra and Y. Robert",
booktitle = "SIAM Annual Meeting, Atlanta, GA, May 13, 1999",
title = "A Numerical Linear Algebra Problem Solving Environment
Designer's Perspective",
publisher = pub-SIAM,
address = pub-SIAM:adr,
year = "1999",
bibdate = "Tue Feb 26 10:10:44 2002",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Petitet:1998:NLA} and \cite{Petitet:2000:PDS}.",
URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/la-handbook-chp10.ps",
acknowledgement = ack-nhfb,
}
@Article{DAzevedo:2000:DIP,
author = "Eduardo D'Azevedo and Jack Dongarra",
title = "The design and implementation of the parallel
out-of-core {ScaLAPACK} {$ L U $}, {$ Q R $}, and
{Cholesky} factorization routines",
journal = j-CPE,
volume = "12",
number = "15",
pages = "1481--1493",
month = "????",
year = "2000",
CODEN = "CPEXEI",
DOI = "https://doi.org/10.1002/1096-9128(20001225)12:15<1481::AID-CPE540>3.0.CO;2-V",
ISSN = "1040-3108",
bibdate = "Sat Apr 7 06:56:11 MDT 2001",
bibsource = "http://www.interscience.wiley.com/jpages/1040-3108;
http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib;
http://www3.interscience.wiley.com/journalfinder.html",
note = "See original LAPACK Working note in
\cite{Dongarra:1997:DIP}.",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76505648/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76505648&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
}
@Article{Parlett:2000:IAP,
author = "Beresford N. Parlett and Osni A. Marques",
title = "An implementation of the $ d q d s $ algorithm
(positive case)",
journal = j-LINEAR-ALGEBRA-APPL,
volume = "309",
number = "1--3",
pages = "217--259",
day = "15",
month = apr,
year = "2000",
CODEN = "LAAPAW",
ISSN = "0024-3795 (print), 1873-1856 (electronic)",
bibdate = "Mon Oct 9 10:54:41 MDT 2000",
bibsource = "http://www.elsevier.com/locate/laa;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Parlett:2002:IDA}.",
URL = "http://www.elsevier.nl/gej-ng/10/30/19/126/25/37/abstract.html;
http://www.elsevier.nl/gej-ng/10/30/19/126/25/37/article.pdf",
acknowledgement = ack-nhfb,
}
@InCollection{Petitet:2000:PDS,
author = "A. Petitet and H. Casanova and J. Dongarra and Y.
Robert and R. Whaley",
editor = "Jacek Blazewicz and others",
booktitle = "Handbook on Parallel and Distributed Processing",
title = "Parallel and Distributed Scientific Computing: {A}
Numerical Linear Algebra Problem Solving Environment
Designer's Perspective",
publisher = pub-SV,
address = pub-SV:adr,
bookpages = "635",
pages = "??--??",
year = "2000",
ISBN = "3-540-66441-6",
ISBN-13 = "978-3-540-66441-3",
LCCN = "QA76.58 .H36 2000",
bibdate = "Tue Feb 26 10:10:44 2002",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Petitet:1998:NLA} and \cite{Petitet:1999:NLA}.",
URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/la-handbook.ps",
acknowledgement = ack-nhfb,
}
@Article{Andersen:2001:RFC,
author = "Bjarne S. Andersen and Jerzy Wa{\'s}niewski and Fred
G. Gustavson",
title = "A recursive formulation of {Cholesky} factorization of
a matrix in packed storage",
journal = j-TOMS,
volume = "27",
number = "2",
pages = "214--244",
month = jun,
year = "2001",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/383738.383741",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
bibdate = "Wed Feb 6 16:43:42 MST 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Andersen:2000:RFC}.",
abstract = "A new compact way to store a symmetric or triangular
matrix called RPF for Recursive Packed Format is fully
described. Novel ways to transform RPF to and from
standard packed format are included. A new algorithm,
called RPC for Recursive Packed Cholesky, that operates
on the RPG format is presented. Algorithm RPC is basd
on level-3 BLAS and requires variants of algorithms
TRSM and SYRK that work on RPF. We call these RP\_TRSM
and RP\_SYRK and find that they do most of their work
by calling GEMM. It follows that most of the execution
time of RPC lies in GEMM. The advantage of this storage
scheme compared to traditional packed and full storage
is demonstrated. First, the RPC storage format uses the
minimal amount of storage for the symmetric or
triangular matrix. Second, RPC gives a level-3
implementation of Cholesky factorization whereas
standard packed implementations are only level 2.
Hence, the performance of our RPC implementation is
decidedly superior. Third, unlike fixed block size
algorithms, RPC, requires no block size tuning
parameter. We present performance measurements on
several current architectures that demonstrate
improvements over the traditional packed routines. Also
MSP parallel computations on the IBM SMP computer are
made. The graphs that are attached in Section 7 show
that the RPC algorithms are superior by a factor
between 1.6 and 7.4 for order around 1000, and between
1.9 and 10.3 for order around 3000 over the traditional
packed algorithms. For some architectures, the RPC
performance results are almost the same or even better
than the traditional full-storage algorithms results.",
accepted = "15 March 2001",
acknowledgement = ack-nhfb,
}
@Article{Whaley:2001:AEO,
author = "R. Clint Whaley and Antoine Petitet and Jack J.
Dongarra",
title = "Automated empirical optimizations of software and the
{ATLAS} project",
journal = j-PARALLEL-COMPUTING,
volume = "27",
number = "1--2",
pages = "3--35",
month = jan,
year = "2001",
CODEN = "PACOEJ",
ISSN = "0167-8191",
bibdate = "Wed Jul 18 06:31:14 MDT 2001",
bibsource = "http://www.elsevier.com/locate/issn/01678191;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Whaley:2000:AEO}.",
URL = "http://www.elsevier.nl/gej-ng/10/35/21/47/25/23/abstract.html;
http://www.elsevier.nl/gej-ng/10/35/21/47/25/23/article.pdf;
http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib;
http://www.netlib.org/utk/people/JackDongarra/PAPERS/atlas_pub.pdf",
acknowledgement = ack-nhfb,
}
@Article{Bindel:2002:CGR,
author = "David Bindel and James Demmel and William Kahan and
Osni Marques",
title = "On computing {Givens} rotations reliably and
efficiently",
journal = j-TOMS,
volume = "28",
number = "2",
pages = "206--238",
month = jun,
year = "2002",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/567806.567809",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
bibdate = "Sat Nov 9 11:16:50 MST 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Bindel:2000:CGR}.",
URL = "all previous codes occasionally suffer from large
inaccuracies due to over/underflow. For real Givens
rotations there are also improvements in speed and
accuracy, though not as striking. Third, the design
process that led to this reliable implementation is
quite systematic, and could be applied to the design of
similarly reliable subroutines.",
abstract = "We consider the efficient and accurate computation of
Givens rotations. When $f$ and $g$ are positive real
numbers, this simply amounts to computing the values of
$ c = f / \sqrt {f^2 + g^2} $, $ s = g / \sqrt {f^2 +
g^2} $, and $ r = \sqrt {f^2 + g^2} $. This apparently
trivial computation merits closer consideration for the
following three reasons. First, while the definitions
of $c$, $s$ and $r$ seem obvious in the case of two
nonnegative arguments $f$ and $g$, there is enough
freedom of choice when one or more of $f$ and $g$ are
negative, zero or complex that LAPACK auxiliary
routines SLARTG, CLARTG, SLARGV and CLARGV can compute
rather different values of $c$, $s$ and $r$ for
mathematically identical values of $f$ and $g$. To
eliminate this unnecessary ambiguity, the BLAS
Technical Forum chose a single consistent definition of
Givens rotations that we will justify here. Second,
computing accurate values of $c$, $s$ and $r$ as
efficiently as possible and reliably despite
over/underflow is surprisingly complicated. For complex
Givens rotations, the most efficient formulas require
only one real square root and one real divide (as well
as several much cheaper additions and multiplications),
but a reliable implementation using only working
precision has a number of cases. On a Sun Ultra-10, the
new implementation is slightly faster than the previous
LAPACK implementation in the most common case, and 2.7
to 4.6 times faster than the corresponding vendor,
reference or ATLAS routines. It is also more reliable",
acknowledgement = ack-nhfb,
}
@Article{Henry:2002:PIN,
author = "Greg Henry and David Watkins and Jack Dongarra",
title = "A Parallel Implementation of the Nonsymmetric {$ Q R
$} Algorithm for Distributed Memory Architectures",
journal = j-SIAM-J-SCI-COMP,
volume = "24",
number = "1",
pages = "284--311",
month = jan,
year = "2002",
CODEN = "SJOCE3",
DOI = "https://doi.org/10.1137/S1064827597325165",
ISSN = "1064-8275 (print), 1095-7197 (electronic)",
ISSN-L = "1064-8275",
bibdate = "Tue Oct 22 18:24:38 MDT 2002",
bibsource = "http://epubs.siam.org/sam-bin/dbq/toc/SISC/24/1;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Henry:1997:PIN}.",
URL = "http://epubs.siam.org/sam-bin/dbq/article/32516",
acknowledgement = ack-nhfb,
fjournal = "SIAM Journal on Scientific Computing",
}
@Article{Li:2002:DIT,
author = "Xiaoye S. Li and James W. Demmel and David H. Bailey
and Greg Henry and Yozo Hida and Jimmy Iskandar and
William Kahan and Suh Y. Kang and Anil Kapur and
Michael C. Martin and Brandon J. Thompson and Teresa
Tung and Daniel J. Yoo",
title = "Design, implementation and testing of extended and
mixed precision {BLAS}",
journal = j-TOMS,
volume = "28",
number = "2",
pages = "152--205",
month = jun,
year = "2002",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/567806.567808",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
bibdate = "Sat Nov 9 11:16:50 MST 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Li:2000:DIT}.",
abstract = "This paper describes the design rationale, a C
implementation, and conformance testing of a subset of
the new Standard for the BLAS (Basic Linear Algebra
Subroutines): Extended and Mixed Precision BLAS.
Permitting higher internal precision and mixed
input\slash output types and precisions allows us to
implement some algorithms that are simpler, more
accurate, and sometimes faster than possible without
these features. The new BLAS are challenging to
implement and test because there are many more
subroutines than in the existing Standard, and because
we must be able to assess whether a higher precision is
used for internal computations than is used for either
input or output variables. We have therefore developed
an automated process of generating and systematically
testing these routines. Our methodology is applicable
to languages besides C. In particular, our algorithms
used in the testing code will be valuable to all other
BLAS implementors. Our extra precision routines achieve
excellent performance---close to half of the machine
peak Megaflop rate even for the Level 2 BLAS, when the
data access is stride one.",
acknowledgement = ack-nhfb,
}
@Article{Dongarra:2003:SANb,
author = "Jack Dongarra and Victor Eijkhout",
title = "Self-Adapting Numerical Software for Next Generation
Applications",
journal = j-IJHPCA,
volume = "17",
number = "2",
pages = "125--131",
month = "Summer",
year = "2003",
CODEN = "IHPCFL",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Fri Nov 28 06:52:13 2003",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Dongarra:2002:SAN}.",
URL = "http://www.math.utah.edu/pub/tex/bib/lawn.bib;
http://www.netlib.org/netlib/utk/people/JackDongarra/PAPERS/sans-position.pdf;
http://www.netlib.org/utk/people/JackDongarra/PAPERS/sans-ijhpca.pdf",
acknowledgement = ack-nhfb,
}
@Article{Dhillon:2004:OER,
author = "Inderjit S. Dhillon and Beresford N. Parlett",
title = "Orthogonal Eigenvectors and Relative Gaps",
journal = j-SIAM-J-MAT-ANA-APPL,
volume = "25",
number = "3",
pages = "858--899",
month = jul,
year = "2004",
CODEN = "SJMAEL",
ISSN = "0895-4798 (print), 1095-7162 (electronic)",
bibdate = "Sat Apr 16 10:32:32 MDT 2005",
bibsource = "http://epubs.siam.org/sam-bin/dbq/toc/SIMAX/25/3;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "See original LAPACK Working note in
\cite{Dhillon:2002:OER}.",
URL = "http://epubs.siam.org/sam-bin/dbq/article/37011",
acknowledgement = ack-nhfb,
}
@Article{Demmel:2007:FLAb,
author = "James Demmel and Ioana Dumitriu and Olga Holtz",
title = "Fast linear algebra is stable",
journal = j-NUM-MATH,
volume = "108",
number = "1",
pages = "59--91",
month = nov,
year = "2007",
CODEN = "NUMMA7",
DOI = "https://doi.org/10.1007/s00211-007-0114-x;
https://doi.org/10.1007/s00211-007-0114-x",
ISSN = "0029-599X (print), 0945-3245 (electronic)",
bibdate = "Tue Jul 8 09:49:13 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
abstract = "In Demmel et al. (Numer. Math. 106(2), 199--224, 2007)
we showed that a large class of fast recursive matrix
multiplication algorithms is stable in a normwise
sense, and that in fact if multiplication of $n$-by-$n$
matrices can be done by any algorithm in {$ O(n^{\omega
+ \eta }) $} operations for any $ \eta > 0 $, then it
can be done stably in {$ O(n^{\omega + \eta }) $}
operations for any $ \eta > 0 $. Here we extend this
result to show that essentially all standard linear
algebra operations, including LU decomposition, QR
decomposition, linear equation solving, matrix
inversion, solving least squares problems,
(generalized) eigenvalue problems and the singular
value decomposition can also be done stably (in a
normwise sense) in {$ O(n^{\omega + \eta }) $}
operations.",
acknowledgement = ack-nhfb,
remark = "Journal publication of LAWN 186
\cite{Demmel:2007:FLAa}.",
}
@Article{Buttari:2008:PTF,
author = "Alfredo Buttari and Julien Langou and Jakub Kurzak and
Jack Dongarra",
title = "Parallel Tiled {$ Q R $} Factorization for Multicore
Architectures",
journal = j-CCPE,
volume = "20",
number = "13",
pages = "1573--1590",
month = SEP,
year = "2008",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.1301",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
acknowledgement = ack-nhfb,
remark = "Journal publication of LAWN 190
\cite{Buttari:2007:PTQ}.",
}
@Article{Demmel:2009:EPI,
author = "James Demmel and Yozo Hida and E. Jason Riedy and
Xiaoye S. Li",
title = "Extra-Precise Iterative Refinement for Overdetermined
Least Squares Problems",
journal = j-TOMS,
volume = "35",
number = "4",
pages = "28:1--28:32",
month = feb,
year = "2009",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/1462173.1462177",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Fri Feb 13 18:09:40 MST 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
abstract = "We present the algorithm, error bounds, and numerical
results for extra-precise iterative refinement applied
to overdetermined linear least squares (LLS) problems.
We apply our linear system refinement algorithm to
Bj{\"o}rck's augmented linear system formulation of an
LLS problem. Our algorithm reduces the forward normwise
and componentwise errors to $ O(\epsilon_w) $, where $
\epsilon_w $ is the working precision, unless the
system is too ill conditioned. In contrast to linear
systems, we provide two separate error bounds for the
solution $x$ and the residual $r$. The refinement
algorithm requires only limited use of extra precision
and adds only $ O(m n)$ work to the $ O(m n^2)$ cost of
QR factorization for problems of size $ m \times n$.
The extra precision calculation is facilitated by the
new extended-precision BLAS standard in a portable way,
and the refinement algorithm will be included in a
future release of LAPACK and can be extended to the
other types of least squares problems.",
acknowledgement = ack-nhfb,
articleno = "28",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
keywords = "BLAS; floating-point arithmetic; LAPACK; Linear
algebra",
remark = "Journal publication of LAWN 188
\cite{Demmel:2007:EPI}.",
}
@Proceedings{Burkhart:1990:CVI,
editor = "H. (Helmar) Burkhart",
booktitle = "{CONPAR 90-VAPP IV}: {Joint} International Conference
on Vector and Parallel Processing, {Zurich,
Switzerland, September 10--13, 1990}: proceedings",
title = "{CONPAR 90-VAPP IV}: {Joint} International Conference
on Vector and Parallel Processing, {Zurich,
Switzerland, September 10--13, 1990}: proceedings",
volume = "457",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xi + 900",
year = "1990",
ISBN = "3-540-53065-7 (Berlin), 0-387-53065-7 (New York)",
ISBN-13 = "978-3-540-53065-7 (Berlin), 978-0-387-53065-9 (New
York)",
LCCN = "QA76.58 .J65 1990",
bibdate = "Sat Apr 23 06:53:59 MDT 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib;
z3950.loc.gov:7090/Voyager",
series = "Lecture notes in computer science",
acknowledgement = ack-nhfb,
subject = "Parallel processing (Electronic computers);
Congresses; Vector processing (Computer science);
Congresses",
}
@Proceedings{IEEE:1990:PSN,
editor = "{IEEE}",
booktitle = "Proceedings, Supercomputing '90: November 12--16,
1990, New York Hilton at Rockefeller Center, New York,
New York",
title = "Proceedings, Supercomputing '90: November 12--16,
1990, New York Hilton at Rockefeller Center, New York,
New York",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xxv + 982",
year = "1990",
ISBN = "0-8186-2056-0 (paperback) (IEEE Computer Society),
0-89791-412-0 (paperback) (ACM)",
ISBN-13 = "978-0-8186-2056-0 (paperback) (IEEE Computer Society),
978-0-89791-412-3 (paperback) (ACM)",
LCCN = "QA 76.88 S87 1990",
bibdate = "Wed Aug 28 06:48:31 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib;
University of California MELVYL catalog",
note = "ACM order number 415903. IEEE Computer Society Press
order number 2056. IEEE catalog number 90CH2916-5.",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessor systems and techniques); C5470
(Performance evaluation and testing); C6110 (Systems
analysis and programming); C7000 (Computer
applications)",
keywords = "biological applications; computer applications;
computer chess; innovative architectures; linear
algebra algorithms; memory; networking computing;
parallel languages; parallel processing; particle
transport; partitioning; performance evaluation;
performance visualizations; pipeline processing;
program analysis; program restructuring; scheduling;
supercomputers --- congresses; vector algorithms",
}
@Proceedings{Griffiths:1992:NAP,
editor = "D. F. Griffiths and G. A. Watson",
booktitle = "Numerical analysis, 1991: proceedings of the 14th
Dundee Conference, June 1991",
title = "Numerical analysis, 1991: proceedings of the 14th
Dundee Conference, June 1991",
volume = "260",
publisher = pub-LONGMAN,
address = pub-LONGMAN:adr,
pages = "292",
year = "1992",
ISBN = "0-582-08908-5",
ISBN-13 = "978-0-582-08908-2",
LCCN = "QA297.D85 1991",
bibdate = "Mon Jan 15 11:24:40 1996",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
series = "Pitman Res. Notes Math. Ser.",
acknowledgement = ack-nhfb,
}
@Proceedings{IEEE:1992:SHP,
editor = "{IEEE}",
key = "SHPCC-92",
booktitle = "Scalable High Performance Computing Conference,
SHPCC-92, April 26--29, 1992, Williamsburg, Virginia",
title = "Scalable High Performance Computing Conference,
{SHPCC}-92, April 26--29, 1992, Williamsburg,
Virginia",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xiii + 448",
year = "1992",
ISBN = "0-8186-2775-1",
ISBN-13 = "978-0-8186-2775-0",
LCCN = "QA76.76.A65 S33 1992",
bibdate = "Fri Dec 30 11:18:38 1994",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Siegel:1992:FSF,
editor = "H. J. Siegel",
booktitle = "The Fourth Symposium on the Frontiers of Massively
Parallel Computation: Frontiers '92 / October 19--21,
1992, McLean, Virginia",
title = "The Fourth Symposium on the Frontiers of Massively
Parallel Computation: Frontiers '92 / October 19--21,
1992, McLean, Virginia",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xviii + 592",
year = "1992",
ISBN = "0-8186-2772-7 (hardback), 0-8186-2771-9 (microfiche)",
ISBN-13 = "978-0-8186-2772-9 (hardback), 978-0-8186-2771-2
(microfiche)",
LCCN = "QA76.58 .S95 1992",
bibdate = "Mon Jan 15 11:06:11 1996",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Swartzlander:1993:PSC,
editor = "Earl {Swartzlander, Jr.} and Mary Jane Irwin and
Graham Jullien",
booktitle = "Proceedings: 11th Symposium on Computer Arithmetic,
June 29--July 2, 1993, Windsor, Ontario",
title = "Proceedings: 11th Symposium on Computer Arithmetic,
June 29--July 2, 1993, Windsor, Ontario",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xii + 284",
year = "1993",
ISBN = "0-7803-1401-8 (softbound), 0-8186-3862-1 (casebound),
0-8186-3861-3 (microfiche)",
ISBN-13 = "978-0-7803-1401-6 (softbound), 978-0-8186-3862-6
(casebound), 978-0-8186-3861-9 (microfiche)",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
LCCN = "QA 76.9 C62 S95 1993",
bibdate = "Thu Sep 01 22:58:49 1994",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "IEEE Transactions on Computers {\bf 43(8)}, 1994",
acknowledgement = ack-nhfb,
keywords = "ARITH-11",
}
@Proceedings{Sincovec:1993:SCP,
editor = "Richard F. Sincovec",
booktitle = "SIAM Conference on Parallel Processing for Scientific
Computing (6th: 1993: Norfolk, VA, USA)",
title = "{SIAM} Conference on Parallel Processing for
Scientific Computing (6th: 1993: Norfolk, {VA},
{USA})",
publisher = pub-SIAM,
address = pub-SIAM:adr,
pages = "xix + 1041 + iv",
year = "1993",
ISBN = "0-89871-315-3",
ISBN-13 = "978-0-89871-315-2",
LCCN = "QA 76.58 S55 1993",
bibdate = "Wed Aug 14 10:36:11 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "Two volumes.",
acknowledgement = ack-nhfb,
sponsor = "Society for Industrial and Applied Mathematics.",
}
@Proceedings{Anonymous:1994:OON,
editor = "Anonymous",
booktitle = "{Object oriented numerics: Annual conference: 2nd ---
April 1994, Sunriver, OR}",
title = "{Object oriented numerics: Annual conference: 2nd ---
April 1994, Sunriver, OR}",
publisher = "RWS",
address = "Corvallis, OR",
pages = "????",
year = "1994",
bibdate = "Thu Sep 16 09:48:36 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Dongarra:1994:PSW,
editor = "J. J. Dongarra and B. Tourancheau",
booktitle = "{Proceedings of the Second Workshop on Environments
and Tools for Parallel Scientific Computing, Townsend,
TN, USA, May 25--27, 1994}",
title = "{Proceedings of the Second Workshop on Environments
and Tools for Parallel Scientific Computing, Townsend,
TN, USA, May 25--27, 1994}",
publisher = pub-SIAM,
address = pub-SIAM:adr,
pages = "x + 292",
year = "1994",
ISBN = "0-89871-343-9",
ISBN-13 = "978-0-89871-343-5",
LCCN = "QA76.58.I568 1994",
bibdate = "Sat May 11 12:16:44 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
series = "Proceedings of the Workshop on Environments and Tools
for Parallel Scientific Computing",
acknowledgement = ack-nhfb,
}
@Proceedings{IEEE:1994:PSP,
editor = "{IEEE}",
booktitle = "Proceedings of the Scalable Parallel Libraries
Conference, October 6--8, 1993, Mississippi State,
Mississippi",
title = "Proceedings of the Scalable Parallel Libraries
Conference, October 6--8, 1993, Mississippi State,
Mississippi",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "vii + 261",
year = "1994",
ISBN = "0-8186-4980-1 (paper), 0-8186-4981-X (microfiche)",
ISBN-13 = "978-0-8186-4980-6 (paper), 978-0-8186-4981-3
(microfiche)",
LCCN = "QA76.58 .S34 1993",
bibdate = "Sat Mar 22 18:40:38 1997",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{IEEE:1995:DPT,
editor = "{IEEE}",
booktitle = "Digest of papers / the Twenty-fifth International
Symposium on Fault-Tolerant Computing, June 27--30,
1995, Pasadena, California",
title = "Digest of papers / the Twenty-fifth International
Symposium on Fault-Tolerant Computing, June 27--30,
1995, Pasadena, California",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xxiii + 547",
year = "1995",
CODEN = "DPFTDL",
ISBN = "0-8186-7079-7, 0-8186-7145-9",
ISBN-13 = "978-0-8186-7079-4, 978-0-8186-7145-6",
ISSN = "0731-3071",
LCCN = "QA 76.9 F38 I57 1995",
bibdate = "Fri Mar 1 10:04:10 MST 1996",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "IEEE catalog number 95CH35823.",
series = "FTCS 25th",
acknowledgement = ack-nhfb,
sponsor = "IEEE. Computer Society. Technical Committee on Fault-
Tolerant Computing.",
}
@Proceedings{ACM:1996:SCP,
editor = "{ACM}",
booktitle = "{Supercomputing '96 Conference Proceedings: November
17--22, Pittsburgh, PA}",
title = "{Supercomputing '96 Conference Proceedings: November
17--22, Pittsburgh, PA}",
publisher = pub-ACM # " and " # pub-IEEE,
address = pub-ACM:adr # " and " # pub-IEEE:adr,
pages = "????",
year = "1996",
ISBN = "0-89791-854-1",
ISBN-13 = "978-0-89791-854-1",
LCCN = "A76.88 .S8573 1996",
bibdate = "Mon Mar 23 12:30:13 1998",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
note = "ACM Order Number: 415962, IEEE Computer Society Press
Order Number: RS00126.",
URL = "http://www.supercomp.org/sc96/proceedings/",
acknowledgement = ack-nhfb,
}
@Proceedings{Dongarra:1996:APC,
editor = "J. J. Dongarra and Kaj Madsen and Jerzy
Wa{\'s}niewski",
booktitle = "{Applied parallel computing: computations in physics,
chemistry, and engineering science: second
international workshop, PARA '95, Lyngby, Denmark,
August 21--24, 1995: proceedings}",
title = "{Applied parallel computing: computations in physics,
chemistry, and engineering science: second
international workshop, PARA '95, Lyngby, Denmark,
August 21--24, 1995: proceedings}",
volume = "1041",
publisher = pub-SV,
address = pub-SV:adr,
pages = "562",
year = "1996",
CODEN = "LNCSD9",
ISBN = "3-540-60902-4",
ISBN-13 = "978-3-540-60902-5",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA76.58 .P35 1995",
MRclass = "65-06",
MRnumber = "1 320 056",
bibdate = "Thu Dec 19 14:25:58 1996",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
series = ser-LNCS,
acknowledgement = ack-nhfb,
keywords = "Chemistry -- Data processing -- Congresses;
Engineering -- Data processing -- Congresses.; Parallel
processing (Electronic computers) -- Congresses;
Physics -- Data processing -- Congresses",
}
@Proceedings{Wasniewski:1996:APC,
editor = "Jerzy Wa{\'s}niewski and J. Dongarra and K. Madsen and
D. Olesen",
booktitle = "Applied parallel computing: industrial-strength
computation and optimization: Third International
Workshop, {PARA} 96, Lyngby, Denmark, August 18--21,
1996: proceedings",
title = "Applied parallel computing: industrial-strength
computation and optimization: Third International
Workshop, {PARA} 96, Lyngby, Denmark, August 18--21,
1996: proceedings",
volume = "1184",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xiii + 722",
year = "1996",
ISBN = "3-540-62095-8 (softcover)",
ISBN-13 = "978-3-540-62095-2 (softcover)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA76.58 .P35 1996",
bibdate = "Sat Dec 21 16:06:37 MST 1996",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
series = ser-LNCS,
acknowledgement = ack-nhfb,
keywords = "Parallel processing (Electronic computers) --
Congresses.",
}
@Proceedings{ACM:1998:SHP,
editor = "{ACM}",
booktitle = "{SC'98: High Performance Networking and Computing:
Proceedings of the 1998 ACM\slash IEEE SC98 Conference:
Orange County Convention Center, Orlando, Florida, USA,
November 7--13, 1998}",
title = "{SC'98: High Performance Networking and Computing:
Proceedings of the 1998 ACM\slash IEEE SC98 Conference:
Orange County Convention Center, Orlando, Florida, USA,
November 7--13, 1998}",
publisher = pub-ACM # " and " # pub-IEEE,
address = pub-ACM:adr # " and " # pub-IEEE:adr,
pages = "????",
year = "1998",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Wed Oct 07 08:51:34 1998",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
URL = "http://www.supercomp.org/sc98/papers/",
acknowledgement = ack-nhfb,
}
@Proceedings{DHollander:1998:PCF,
editor = "E. D'Hollander and others",
booktitle = "{Parallel computing: fundamentals, applications, and
new directions: Papers from ParCo97, held in Bonn,
Germany, Sept. 19--22, 1997}",
title = "{Parallel computing: fundamentals, applications, and
new directions: Papers from ParCo97, held in Bonn,
Germany, Sept. 19--22, 1997}",
volume = "12",
publisher = pub-ELSEVIER,
address = pub-ELSEVIER:adr,
pages = "xx + 748",
year = "1998",
ISBN = "0-444-82882-6",
ISBN-13 = "978-0-444-82882-8",
LCCN = "QA76.58.P3795 1997",
bibdate = "Thu Sep 16 09:48:36 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib",
series = "Advances in Parallel Computing",
acknowledgement = ack-nhfb,
}