@TechReport{lawn01, URL = "http://www.netlib.org/lapack/lawnspdf/lawn01.pdf", number = 1, institution = "LAPACK Working Note", author = "James W. Demmel and Jack J. Dongarra and Jeremy Du Croz and Anne Greenbaum and Sven Hammarling and Danny C. Sorensen", title = "Prospectus for the Development of a Linear Algebra Library for High-Performance Computers", anlnumber = "ANL/MCS-TM-97", month = sep, year = 1987, } @TechReport{lawn02, URL = "http://www.netlib.org/lapack/lawnspdf/lawn02.pdf", number = 2, institution = "LAPACK Working Note", author = "Jack J. Dongarra and Sven Hammarling and Danny C. Sorensen", title = "Block Reduction of Matrices to Condensed Forms for Eigenvalue Computations", anlnumber = "ANL/MCS-TM-99", month = sep, year = 1987, } @TechReport{lawn03, URL = "http://www.netlib.org/lapack/lawnspdf/lawn03.pdf", number = 3, institution = "LAPACK Working Note", author = "James W. Demmel and W. Kahan", title = "Computing Small Singular Values of Bidiagonal Matrices with Guaranteed High Relative Accuracy", anlnumber = "ANL/MCS-TM-110", month = feb, year = 1988, } @TechReport{lawn04, abstract = "This note summarizes the numerical and software issues which arise in designing the LAPACK subroutines for the symmetric eigenproblem, the singular value decomposition (SVD) and iterative refinement for linear systems. At the end of each chapter are a list of design questions for which we would like feedback from the user community.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn04.pdf", number = 4, institution = "LAPACK Working Note", author = "James W. Demmel and Jeremy Du Croz and Sven Hammarling and Danny C. Sorensen", title = "Guidelines for the Design of Symmetric Eigenroutines, {SVD}, and Iterative Refinement and Condition Estimation for Linear Systems", anlnumber = "ANL/MCS-TM-111", month = mar, year = 1988, } @TechReport{lawn05, abstract = "This note outlines the provisional contents of LAPACK. It describes a naming scheme for the routines, enumerates the individual routines, and includes notes on the choice of algorithms and aspects of software design.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn05.pdf", number = 5, institution = "LAPACK Working Note", author = "Christian Bischof and James W. Demmel and Jack J. Dongarra and Jeremy Du Croz and Anne Greenbaum and Sven Hammarling and Danny C. Sorensen", title = "Provisional Contents", anlnumber = "ANL/MCS-TM-38", month = sep, year = 1988, } @TechReport{lawn06, URL = "http://www.netlib.org/lapack/lawnspdf/lawn06.pdf", number = 6, institution = "LAPACK Working Note", author = "O. Brewer and Jack J. Dongarra and Danny C. Sorensen", title = "Tools to Aid in the Analysis of Memory Access Patterns for {FORTRAN} Programs", anlnumber = "ANL/MCS-TM-120", month = jun, year = 1988, } @TechReport{lawn07, URL = "http://www.netlib.org/lapack/lawnspdf/lawn07.pdf", number = 7, institution = "LAPACK Working Note", author = "James W. Demmel", title = "Computing Accurate Eigensystems of Scaled Diagonally Dominant Matrices", anlnumber = "ANL/MCS-TM-126", month = dec, year = 1988, } @TechReport{lawn08, abstract = "The usual QR algorithm for finding the eigenvalues of a Hessenberg matrix $H$ is based on vector-vector operations, e.g. adding a multiple of one row to another. The opportunities for parallelism in such an algorithm are limited. In this report, we describe a reorganization of the QR algorithm to permit either matrix-vector or matrix-matrix operations to be performed, both of which yield more efficient implementations on vector and parallel machines. The idea is to chase a $k$ by $k$ bulge rather than a 1 by 1 or 2 by 2 bulge as in the standard QR algorithm. We report our preliminary numerical experiments on the CONVEX C-1 and CYBER 205 vector machines.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn08.pdf", number = 8, institution = "LAPACK Working Note", author = "Zhaojun Bai and James W. Demmel", title = "On a Block Implementation of Hessenberg Multishift {QR} Iteration", anlnumber = "ANL/MCS-TM-127", month = jan, year = 1989, } @TechReport{lawn09, URL = "http://www.netlib.org/lapack/lawnspdf/lawn09.pdf", number = 9, institution = "LAPACK Working Note", author = "James W. Demmel and Alan McKenney", title = "A Test Matrix Generation Suite", anlnumber = "ANL/MCS-P69-0389", month = mar, year = 1989, } @TechReport{lawn10, abstract = "This working note describes how to install and test the initial release of LAPACK.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn10.pdf", number = 10, institution = "LAPACK Working Note", author = "Edward Anderson and Jack J. Dongarra", title = "Installing and Testing the Initial Release of {LAPACK} -- Unix and Non-Unix Versions", anlnumber = "ANL/MCS-TM-130", month = may, year = 1989, } @TechReport{lawn11, URL = "http://www.netlib.org/lapack/lawnspdf/lawn11.pdf", number = 11, institution = "LAPACK Working Note", author = "Percy Deift and James W. Demmel and Luen Chau Li and Carlos Tomei", title = "The Bidiagonal Singular Value Decomposition and Hamiltonian Mechanics", anlnumber = "ANL/MCS-TM-133", month = aug, year = 1989, } @TechReport{lawn12, URL = "http://www.netlib.org/lapack/lawnspdf/lawn12.pdf", number = 12, institution = "LAPACK Working Note", author = "Peter Mayes and Giuseppe Radicati", title = "Banded Cholosky Factorization Using Level 3 {BLAS}", anlnumber = "ANL/MCS-TM-134", month = aug, year = 1989, } @TechReport{lawn13, abstract = "This report reviews the theory and practical estimation of condition numbers for the nonsymmetric eigenvalue problem. The report provides a manual for using {\sl LAPACK} subroutines {\tt STRSNA} and {\tt STRSEN} to estimate condition numbers for individual eigenvalues and eigenvectors, multiple (or clustered) eigenvalues, and invariant subspaces.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn13.pdf", number = 13, institution = "LAPACK Working Note", author = "Zhaojun Bai and James W. Demmel and Alan McKenney", title = "On the Conditioning of the Nonsymmetric Eigenproblem: Theory and Software", utknumber = "UT-CS-89-86", month = oct, year = 1989, } @TechReport{lawn14, URL = "http://www.netlib.org/lapack/lawnspdf/lawn14.pdf", number = 14, institution = "LAPACK Working Note", author = "James W. Demmel", title = "On Floating Point Errors in Cholesky", utknumber = "UT-CS-89-87", month = oct, year = 1989, } @TechReport{lawn15, URL = "http://www.netlib.org/lapack/lawnspdf/lawn15.pdf", number = 15, institution = "LAPACK Working Note", author = "James W. Demmel and Kresimir Veselić", title = "Jacobi's Method is More Accurate than {QR}", utknumber = "UT-CS-89-88", month = oct, year = 1989, } @TechReport{lawn16, abstract = "This report details our results and experiences from the first round of testing of LAPACK. A list of the known bugs in LAPACK and the BLAS is provided, all of which will be corrected in the next release. Selected timing results from the test sites are also presented to give some indication of the performance expected from LAPACK.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn16.pdf", number = 16, institution = "LAPACK Working Note", author = "Edward Anderson and Jack J. Dongarra", title = "Results from the Initial Release of {LAPACK}", utknumber = "UT-CS-89-89", month = nov, year = 1989, } @TechReport{lawn17, abstract = "Numerical experiments used in determining which variants of the QR/QL algorithm to include in LAPACK are described. Timing and accuracy comparisons are presented for the different methods applied to the symmetric tridiagonal eigenproblem. Specifically, comparisons are made between root-free and standard versions, between QL and QR iterations and dynamic strategies for switching between the two, and between Wilkinson's shift and the perfect shift strategy for the eigenvector computation. LAPACK routines that incorporate the most promising of these strategies are then compared with the corresponding EISPACK routines.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn17.pdf", number = 17, institution = "LAPACK Working Note", author = "Anne Greenbaum and Jack J. Dongarra", title = "Experiments with {QR}/{QL} Methods for the Symmetric Tridiagonal Eigenproblem", utknumber = "UT-CS-89-92", month = nov, year = 1989, } @TechReport{lawn18, URL = "http://www.netlib.org/lapack/lawnspdf/lawn18.pdf", number = 18, institution = "LAPACK Working Note", author = "Edward Anderson and Jack J. Dongarra", title = "Implementation Guide for {LAPACK}", utknumber = "UT-CS-90-101", month = apr, year = 1990, } @TechReport{lawn19, abstract = "The LAPACK software project currently under development is intended to provide a portable linear algebra library for high performance computers. LAPACK will make use of the Level 1, 2, and 3 BLAS to carry out basic operations. A principal focus of this project is to implement blocked versions of a number of algorithms to take advantage of the greater parallelism and improved data locality of the Level 3 BLAS. In this paper, we describe our work with variants of some of these algorithms and the performance data we have collected.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn19.pdf", number = 19, institution = "LAPACK Working Note", author = "Edward Anderson and Jack J. Dongarra", title = "Evaluating Block Algorithm Variants in {LAPACK}", utknumber = "UT-CS-90-103", month = apr, year = 1990, } @TechReport{lawn20, URL = "http://www.netlib.org/lapack/lawnspdf/lawn20.pdf", number = 20, institution = "LAPACK Working Note", author = "Edward Anderson and Zhaojun Bai and Christian Bischof and James W. Demmel and Jack J. Dongarra and Jeremy Du Croz and Anne Greenbaum and Sven Hammarling and Alan McKenney and Danny C. Sorensen", title = "{LAPACK}: {A} Portable Linear Algebra Library for High-Performance Computers", utknumber = "UT-CS-90-105", month = may, year = 1990, } @TechReport{lawn21, URL = "http://www.netlib.org/lapack/lawnspdf/lawn21.pdf", number = 21, institution = "LAPACK Working Note", author = "Jeremy Du Croz and Peter Mayes and Guiseppe Radicati", title = "Factorizations of Band Matrices Using Level 3 {BLAS}", utknumber = "UT-CS-90-109", month = jul, year = 1990, } @TechReport{lawn22, URL = "http://www.netlib.org/lapack/lawnspdf/lawn22.pdf", number = 22, institution = "LAPACK Working Note", author = "James W. Demmel and Nicholas J. Higham", title = "Stability of Block Algorithms with Fast Level 3 {BLAS}", utknumber = "UT-CS-90-110", month = jul, year = 1990, } @TechReport{lawn23, URL = "http://www.netlib.org/lapack/lawnspdf/lawn23.pdf", number = 23, institution = "LAPACK Working Note", author = "James W. Demmel and Nicholas J. Higham", title = "Improved Error Bounds for Underdetermined System Solvers", utknumber = "UT-CS-90-113", month = aug, year = 1990, } @TechReport{lawn24, URL = "http://www.netlib.org/lapack/lawnspdf/lawn24.pdf", number = 24, institution = "LAPACK Working Note", author = "Jack J. Dongarra and L. Susan Ostrouchov", title = "{LAPACK} Block Factorization Algorithms on the Intel i{PSC}/860", utknumber = "UT-CS-90-115", month = oct, year = 1990, } @TechReport{lawn25, URL = "http://www.netlib.org/lapack/lawnspdf/lawn25.pdf", number = 25, institution = "LAPACK Working Note", author = "Jack J. Dongarra and Sven Hammarling and James H. Wilkinson", title = "Numerical Considerations in Computing Invariant Subspaces", utknumber = "UT-CS-90-117", month = oct, year = 1990, } @TechReport{lawn26, URL = "http://www.netlib.org/lapack/lawnspdf/lawn26.pdf", number = 26, institution = "LAPACK Working Note", author = "Edward Anderson and Christian Bischof and James W. Demmel and Jack J. Dongarra and Jeremy Du Croz and Sven Hammarling and W. Kahan", title = "Prospectus for an Extension to {LAPACK}: {A} Portable Linear Algebra Library for High-Performance Computers", utknumber = "UT-CS-90-118", month = nov, year = 1990, } @TechReport{lawn27, URL = "http://www.netlib.org/lapack/lawnspdf/lawn27.pdf", number = 27, institution = "LAPACK Working Note", author = "Jeremy Du Croz and Nicholas J. Higham", title = "Stability of Methods for Matrix Inversion", utknumber = "UT-CS-90-119", month = oct, year = 1990, } @TechReport{lawn28, abstract = "This paper discusses the IBM RISC System/6000 workstation and a set of experiments with blocked algorithms commonly used in solving problems in numerical linear algebra. We describe the performance of these algorithms and discuss the techniques used in achieving high performance on such an architecture.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn28.pdf", number = 28, institution = "LAPACK Working Note", author = "Jack J. Dongarra and Peter Mayes and Guiseppe Radicati", title = "The {IBM} {RISC} {System/6000} and Linear Algebra Operations", utknumber = "UT-CS-90-122", month = dec, year = 1990, } @TechReport{lawn29, URL = "http://www.netlib.org/lapack/lawnspdf/lawn29.pdf", number = 29, institution = "LAPACK Working Note", author = "Robert A. van de Geijn", title = "On Global Combine Operations", utknumber = "UT-CS-91-129", month = apr, year = 1991, } @TechReport{lawn30, abstract = "In this paper, we describe a parallel implementation for the reduction of general and symmetric matrices to Hessenberg and tridiagonal form, respectively. The methods are based on LAPACK sequential codes and use a panel-wrapped mapping of matrices to nodes. Results from experiments on the Intel Touchstone Delta are given.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn30.pdf", number = 30, institution = "LAPACK Working Note", author = "Jack J. Dongarra and Robert A. van de Geijn", title = "Reduction to Condensed Form for the Eigenvalue Problem on Distributed Memory Architectures", utknumber = "UT-CS-91-130", month = apr, year = 1991, } @TechReport{lawn31, abstract = "The purpose of this note is to re-introduce the generalized QR factorization with or without pivoting of two matrices $A$ and $B$ having the same number of rows, and whenever $B$ is square and nonsingular, the factorization implicitly gives the orthogonal factorization with or without pivoting of $B^{-1}A$. The GQR factorization was introduced early by Hammarling and Paige. But from the general-purpose software development point of view, we proposed the different factorization forms. In addition to the factorization forms and implementation details, we show the applications of GQR factorization in solving the linear equality constraint least square problem, generalized linear model. It is intended to show the possible usage of LAPACK codes for solving a class of generalized least square problems who arise from optimization and statistics on high-performance machines.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn31.pdf", number = 31, institution = "LAPACK Working Note", author = "Edward Anderson and Zhaojun Bai and Jack J. Dongarra", title = "Generalized {QR} Factorization and its Applications", utknumber = "UT-CS-91-131", month = apr, year = 1991, } @TechReport{lawn32, abstract = "This paper presents a generalization of {\em incremental condition estimation}, a technique for tracking the extremal singular values of a triangular matrix. While the original approach allowed for the estimation of the largest or smallest singular value, the generalized scheme allows for the estimation of {\em any number} of extremal singular values. For example, we can derive estimates for the three smallest singular values and the corresponding singular vectors at the same time. When estimating $k$ singular values at the same time, the cost of one step of our generalized scheme on an $n \times n$ matrix is $O(n k^2)$. Experimental results show that the resulting estimator does a good job of estimating the extremal singular values of triangular matrices and that, in particular, it leads to an inexpensive, yet very accurate and robust condition estimator.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn32.pdf", number = 32, institution = "LAPACK Working Note", author = "Christian Bischof and Ping Tak Peter Tang", title = "Generalized Incremental Condition Estimation", utknumber = "UT-CS-91-132", month = may, year = 1991, } @TechReport{lawn33, abstract = "This paper presents an improved version of {\em incremental condition estimation}, a technique for tracking the extremal singular values of a triangular matrix as it is being constructed one column at a time. We present a new motivation for this estimation technique using orthogonal projections. The paper focuses on an implementation of this estimation scheme in an accurate and consistent fashion. In particular, we address the subtle numerical issues arising in the computation of the eigensystem of a symmetric rank-one perturbed diagonal $2 \times 2$ matrix. Experimental results show that the resulting scheme does a good job in estimating the extremal singular values of triangular matrices, independent of matrix size and matrix condition number, and that it performs qualitatively in the same fashion as some of the commonly used nonincremental condition estimation schemes.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn33.pdf", number = 33, institution = "LAPACK Working Note", author = "Christian Bischof and Ping Tak Peter Tang", title = "Robust Incremental Condition Estimation", utknumber = "UT-CS-91-133", month = may, year = 1991, } @TechReport{lawn34, URL = "http://www.netlib.org/lapack/lawnspdf/lawn34.pdf", number = 34, institution = "LAPACK Working Note", author = "Jack J. Dongarra", title = "Workshop on the {BLACS}", utknumber = "UT-CS-91-134", month = may, year = 1991, } @TechReport{lawn35, abstract = "This working note describes how to install, test, and time the third and final test release of LAPACK, a linear algebra package for high-performance computers. Separate instructions are provided for the Unix and non-Unix versions of the test package. Further details are also given on the design of the test and timing programs.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn35.pdf", number = 35, institution = "LAPACK Working Note", author = "Edward Anderson and Jack J. Dongarra and L. Susan Ostrouchov", title = "Implementation guide for {LAPACK}", utknumber = "UT-CS-91-138", month = aug, year = 1991, } @TechReport{lawn36, abstract = "Fortran codes are presented for solving a triangular system when the triangular matrix is badly scaled or badly conditioned. These subroutines incorporate scaling to prevent overflow and thus are more robust than their counterparts STRSV, STPSV, and STBSV from the Level 2 BLAS. Solving badly conditioned triangular systems arises in condition estimation when the procedure developed by Hager and Higham is used to estimate the norm of $A^{-1}$ from the triangular factorization of $A$. We discuss situations in which scaling is necessary to prevent overflow and give an example of how our routines are used in the LAPACK condition estimators.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn36.pdf", number = 36, institution = "LAPACK Working Note", author = "Edward Anderson", title = "Robust Triangular solvers", utknumber = "UT-CS-91-142", month = aug, year = 1991, } @TechReport{lawn37, abstract = "In this paper, we describe extensions to a proposed set of linear algebra communication routines for communicating and manipulating data structures that are distributed among the memories of a distributed memory MIMD computer. In particular, recent experience shows that higher performance can be attained on such architectures when parallel dense matrix algorithms utilize a data distribution that views the computational nodes as a logical two dimensional mesh. The motivation for the BLACS continues to be to increase portability, efficiency and modularity at a high level. The audience of the BLACS are mathematical software experts and people with large scale scientific computation to perform. A systematic effort must be made to achieve a {\em de facto} standard for the BLACS.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn37.pdf", number = 37, institution = "LAPACK Working Note", author = "Jack J. Dongarra and Robert A. van de Geijn", title = "Two Dimensional Basic Linear Algebra Communication Subprograms", utknumber = "UT-CS-91-138", month = oct, year = 1991, } @TechReport{lawn38, abstract = "We discuss a direct algorithm for reordering the eigenvalues on the diagonal of a matrix in real Schur form by performing an orthogonal similarity transformation. A new version of the algorithm is given. A detailed error analysis and software description are presented. Numerical examples show the superiority of our algorithm over previous algorithms.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn38.pdf", number = 38, institution = "LAPACK Working Note", author = "Zhaojun Bai and James W. Demmel", title = "On a Direct Algorithm for Computing Invariant Subspaces with Specified Eigenvalues", utknumber = "UT-CS-91-139", month = nov, year = 1991, } @TechReport{lawn39, abstract = "High quality portable numerical libraries have existed for many years. These libraries, such as LINPACK and EISPACK, were designed to be accurate, robust, efficient and portable in a Fortran environment of conventional uniprocessors, diverse floating point arithmetics, and limited input data structures. These libraries are no longer adequate on modern high performance computer architectures. We describe their inadequacies and how we are addressing them in the LAPACK project, a library of numerical linear algebra routines designed to supplant LINPACK and EISPACK. We shall show how the new architectures lead to important changes in the goals as well as the methods of library design.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn39.pdf", number = 39, institution = "LAPACK Working Note", author = "James W. Demmel and Jack J. Dongarra and W. Kahan", title = "On Designing Portable High Performance Numerical Libraries", utknumber = "UT-CS-91-141", month = jul, year = 1991, } @TechReport{lawn40, URL = "http://www.netlib.org/lapack/lawnspdf/lawn40.pdf", number = 40, institution = "LAPACK Working Note", author = "James W. Demmel and Nick Higham and Rob Schreiber", title = "Block {LU} Factorization", utknumber = "UT-CS-92-149", month = feb, year = 1992, } @TechReport{lawn41, abstract = "This working note describes how to install, test, and time version 2.0 of LAPACK, a linear algebra package for high-performance computers. Separate instructions are provided for the Unix and non-Unix versions of the test package. Further details are also given on the design of the test and timing programs.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn41.pdf", number = 41, institution = "LAPACK Working Note", author = "Susan Blackford and Jack J. Dongarra", title = "Installation Guide for {LAPACK}", utknumber = "UT-CS-92-151", note = "originally released March 1992", month = jun, year = 1999, } @TechReport{lawn42, URL = "http://www.netlib.org/lapack/lawnspdf/lawn42.pdf", number = 42, institution = "LAPACK Working Note", author = "Nick Higham", title = "Perturbation Theory and Backward Error for {AX}-{XB}= {C}", utknumber = "UT-CS-92-153", month = apr, year = 1992, } @TechReport{lawn43, abstract = "We discuss the essential design features of a library of scalable software for performing dense linear algebra computations on distributed memory concurrent computers. The square block scattered decomposition is proposed as a flexible and general-purpose way of decomposing most, if not all, dense matrix problems. An object-oriented interface to the library permits more portable applications to be written, and is easy to learn and use, since details of the parallel implementation are hidden from the user. Experiments on the Intel Touchstone Delta system with a prototype code that uses the square block scattered decomposition to perform LU factorization are presented and analyzed. It was found that the code was both scalable and efficient, performing at about 14 GFLOPS (double precision) for the largest problem considered.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn43.pdf", number = 43, institution = "LAPACK Working Note", author = "Jack J. Dongarra and Robert A. van de Geijn and David W. Walker", title = "A Look at Scalable Dense Linear Algebra Libraries", utknumber = "UT-CS-92-155", month = apr, year = 1992, } @TechReport{lawn44, URL = "http://www.netlib.org/lapack/lawnspdf/lawn44.pdf", number = 44, institution = "LAPACK Working Note", author = "Edward Anderson and Jack J. Dongarra", title = "Performance of {LAPACK}: {A} Portable Library of Numerical Linear Algebra Routines", utknumber = "UT-CS-92-156", month = may, year = 1992, } @TechReport{lawn45, URL = "http://www.netlib.org/lapack/lawnspdf/lawn45.pdf", number = 45, institution = "LAPACK Working Note", author = "James W. Demmel", title = "The Inherent Inaccuracy of Implicit Tridiagonal {QR}", utknumber = "UT-CS-92-162", month = may, year = 1992, } @TechReport{lawn46, abstract = "We present a variation of Paige's algorithm for computing the generalized singular value decomposition (GSVD) of two matrices $A$ and $B$. There are two innovations. The first is a new preprocessing step which reduces $A$ and $B$ to upper triangular forms satisfying certain rank conditions. The second is a new 2 by 2 triangular GSVD algorithm, which constitutes the inner loop of Paige's algorithm. We present proofs of stability and high accuracy of the 2 by 2 GSVD algorithm, and demonstrate it using examples on which all previous algorithms fail.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn46.pdf", number = 46, institution = "LAPACK Working Note", author = "Zhaojun Bai and James W. Demmel", title = "Computing the Generalized Singular Value Decomposition", utknumber = "UT-CS-92-163", month = may, year = 1992, } @TechReport{lawn47, abstract = "The original goal of the LAPACK project was to design and implement a portable linear algebra library that would be very efficient on high-performance machines. During the project it became apparent we could also significantly improve the accuracy of many standard algorithms in linear algebra, with little or no sacrifice of speed. This work has led to new perturbation theory, new algorithms and new error analyses for many problems, as well as many still unsolved problems. In this paper we survey some of these new results, and discuss open problems in four related areas: high accuracy algorithms, parallel algorithms, the complexity of condition estimation, and exploiting IEEE standard floating point arithmetic.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn47.pdf", number = 47, institution = "LAPACK Working Note", author = "James W. Demmel", title = "Open Problems in Numerical Linear Algebra", utknumber = "UT-CS-92-164", month = may, year = 1992, } @TechReport{lawn48, abstract = "It is known that small relative perturbations in the entries of a bidiagonal matrix only cause small relative perturbations in its singular values, independent of the values of the matrix entries. In this paper we show that a matrix has this property if and only if its associated bipartite graph is acyclic. We also show how to compute the singular values of such a matrix to high relative accuracy. The same algorithm can compute eigenvalues of symmetric matrices with acyclic graphs with tiny componentwise relative backward error. This class includes tridiagonal matrices, arrow matrices, and exponentially many others.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn48.pdf", number = 48, institution = "LAPACK Working Note", author = "James W. Demmel and William B. Gragg", title = "On Computing Accurate Singular Values and Eigenvalues of Matrices with Acyclic Graphs", utknumber = "UT-CS-92-166", month = may, year = 1992, } @TechReport{lawn49, abstract = "Parallel prefix is a useful operation for various linear algebra operations, including solving bidiagonal systems of equations and finding the eigenvalues of a symmetric tridiagonal matrix. However, the simplest implementations of parallel prefix for the operations of scalar floating point add and scalar floating point multiply are inadequate to solve these important problems. This is because they are too susceptible to over/underflow, and because they apparently cannot solve the general two term recurrence needed to find eigenvalues. In this note we propose a specification for parallel prefix operations overcoming these drawbacks.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn49.pdf", number = 49, institution = "LAPACK Working Note", author = "James W. Demmel", title = "A Specification for Floating Point Parallel Prefix", utknumber = "UT-CS-92-167", month = may, year = 1992, } @TechReport{lawn50, URL = "http://www.netlib.org/lapack/lawnspdf/lawn50.pdf", number = 50, institution = "LAPACK Working Note", author = "Victor L. Eijkhout", title = "Distributed Sparse Data Structures for Linear Algebra Operations", utknumber = "UT-CS-92-169", month = may, year = 1992, } @TechReport{lawn51, URL = "http://www.netlib.org/lapack/lawnspdf/lawn51.pdf", number = 51, institution = "LAPACK Working Note", author = "Victor L. Eijkhout", title = "Qualitative Properties of the Conjugate Gradient and Lanczos Methods in a Matrix Framework", utknumber = "UT-CS-92-170", month = may, year = 1992, } @TechReport{lawn52, abstract = "This paper is concerned with the distributed parallel computation of an ordering for a symmetric positive definite sparse matrix. The purpose of the ordering is to limit fill and enhance concurrency in the subsequent computation of the Cholesky factorization of the matrix. We use a geometric approach to nested dissection based on a given Cartesian embedding of the graph of the matrix in Euclidean space. The resulting algorithm can be implemented efficiently on massively parallel, distributed memory computers. One unusual feature of the distributed algorithm is that its effectiveness does not depend strongly on data locality, which is critical in this context, since an appropriate partitioning of the problem is not known until after the ordering has been determined. The ordering algorithm is the first component in a suite of scalable parallel algorithms currently under development for solving large sparse linear systems on massively parallel computers.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn52.pdf", number = 52, institution = "LAPACK Working Note", author = "Michael T. Heath and Padma Raghavan", title = "A Cartesian Parallel Nested Dissection Algorithm", utknumber = "UT-CS-92-178", month = jun, year = 1992, } @TechReport{lawn53, abstract = "The fastest parallel algorithm for a problem may be significantly less stable numerically than the fastest serial algorithm. We illustrate this phenomenon by a series of examples drawn from numerical linear algebra. We also show how some of these instabilities may be mitigated by better floating point arithmetic.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn53.pdf", number = 53, institution = "LAPACK Working Note", author = "James W. Demmel", title = "Trading Off Parallelism and Numerical Stability", utknumber = "UT-CS-92-179", month = jun, year = 1992, } @TechReport{lawn54, abstract = "We discuss a new version of an existing algorithm for reordering the eigenvalues on the diagonal of a matrix in real Schur form by performing an orthogonal similarity transformation. A detailed error analysis and software description are presented. Numerical examples show the superiority of our algorithm over previous algorithms.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn54.pdf", number = 54, institution = "LAPACK Working Note", author = "Zhaojun Bai and James W. Demmel", title = "On Swapping Diagonal Blocks in Real Schur Form", utknumber = "UT-CS-92-182", month = oct, year = 1992, } @TechReport{lawn55, URL = "http://www.netlib.org/lapack/lawnspdf/lawn55.pdf", number = 55, institution = "LAPACK Working Note", author = "Jaeyoung Choi and Jack J. Dongarra and Roldan Pozo and David W. Walker", title = "{ScaLAPACK}: {A} Scalable Linear Algebra for Distributed Memory Concurrent Computers", utknumber = "UT-CS-92-181", month = nov, year = 1992, } @TechReport{lawn56, URL = "http://www.netlib.org/lapack/lawnspdf/lawn56.pdf", number = 56, institution = "LAPACK Working Note", author = "E. F. D'Azevedo and Victor L. Eijkhout and Charles H. Romine", title = "Reducing Communication Costs in the Conjugate Gradient Algorithm on Distributed Memory Multiprocessors", utknumber = "UT-CS-93-185", month = jan, year = 1993, } @TechReport{lawn57, URL = "http://www.netlib.org/lapack/lawnspdf/lawn57.pdf", number = 57, institution = "LAPACK Working Note", author = "Jaeyoung Choi and Jack J. Dongarra and David W. Walker", title = "{PUMMA}: Parallel Universal Matrix Multiplication Algorithms on Distributed Memory Concurrent Computers", utknumber = "UT-CS-93-187", month = may, year = 1993, } @TechReport{lawn58, abstract = "This paper discusses the design of linear algebra libraries for high performance computers. Particular emphasis is placed on the development of scalable algorithms for MIMD distributed memory concurrent computers. A brief description of the EISPACK, LINPACK, and LAPACK libraries is given, followed by an outline of ScaLAPACK, which is a distributed memory version of LAPACK currently under development. The importance of block-partitioned algorithms in reducing the frequency of data movement between different levels of hierarchical memory is stressed. The use of such algorithms helps reduce the message startup costs on distributed memory concurrent computers. Other key ideas in our approach are the use of distributed versions of the Level 3 Basic Linear Algebra Subgrams (BLAS) as computational building blocks, and the use of Basic Linear Algebra Communication Subprograms (BLACS) as communication building blocks. Together the distributed BLAS and the BLACS can be used to construct higher-level algorithms, and hide many details of the parallelism from the application developer. The block-cyclic data distribution is described, and adopted as a good way of distributing block-partitioned matrices. Block-partitioned versions of the Cholesky and LU factorizations are presented, and optimization issues associated with the implementation of the LU factorization algorithm on distributed memory concurrent computers are discussed, together with its performance on the Intel Delta system. Finally, approaches to the design of library interfaces are reviewed.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn58.pdf", number = 58, institution = "LAPACK Working Note", author = "Jack J. Dongarra and David W. Walker", title = "The Design of Linear Algebra Libraries for High Performance Computer", utknumber = "UT-CS-93-188", month = jun, year = 1993, } @TechReport{lawn59, abstract = "An attractive paradigm for building fast numerical algorithms is the following: (1) try a fast but occasionally unstable algorithm, (2) test the accuracy of the computed answer, and (3) recompute the answer slowly and accurately in the unlikely event it is necessary. This is especially attractive on parallel machines where the fastest algorithms may be less stable than the best serial algorithms. Since unstable algorithms can overflow or cause other exceptions, exception handling is needed to implement this paradigm safely. To implement it efficiently, exception handling cannot be too slow. We illustrate this paradigm with numerical linear algebra algorithms from the LAPACK library.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn59.pdf", number = 59, institution = "LAPACK Working Note", author = "James W. Demmel and Xiaoye S. Li", title = "Faster Numerical Algorithms via Exception Handling", utknumber = "UT-CS-93-192", month = mar, year = 1993, } @TechReport{lawn60, abstract = "We survey general techniques and open problems in numerical linear algebra on parallel architectures. We first discuss basic principles of parallel processing, describing the costs of basic operations on parallel machines, including general principles for constructing efficient algorithms. We illustrate these principles using current architectures and software systems, and by showing how one would implement matrix multiplication. Then, we present direct and iterative algorithms for solving linear systems of equations, linear least squares problems, the symmetric eigenvalue problem, the nonsymmetric eigenvalue problem, and the singular value decomposition. We consider dense, band and sparse matrices.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn60.pdf", number = 60, institution = "LAPACK Working Note", author = "James W. Demmel and Michael T. Heath and Henk A. van der Vorst", title = "Parallel Numerical Linear Algebra", utknumber = "UT-CS-93-192", month = mar, year = 1993, } @TechReport{lawn61, URL = "http://www.netlib.org/lapack/lawnspdf/lawn61.pdf", number = 61, institution = "LAPACK Working Note", author = "Jack J. Dongarra and Roldan Pozo and David W. Walker", title = "An Object Oriented Design for High Performance Linear Algebra on Distributed Memory Architectures", utknumber = "UT-CS-93-200", month = aug, year = 1993, } @TechReport{lawn62, URL = "http://www.netlib.org/lapack/lawnspdf/lawn62.pdf", number = 62, institution = "LAPACK Working Note", author = "Michael T. Heath and Padma Raghavan", title = "Distributed Solution of Sparse Linear Systems", utknumber = "UT-CS-93-201", month = aug, year = 1993, } @TechReport{lawn63, URL = "http://www.netlib.org/lapack/lawnspdf/lawn63.pdf", number = 63, institution = "LAPACK Working Note", author = "Michael T. Heath and Padma Raghavan", title = "Line and Plane Separators", utknumber = "UT-CS-93-202", month = aug, year = 1993, } @TechReport{lawn64, URL = "http://www.netlib.org/lapack/lawnspdf/lawn64.pdf", number = 64, institution = "LAPACK Working Note", author = "Padma Raghavan", title = "Distributed Sparse Gaussian Elimination and Orthogonal Factorization", utknumber = "UT-CS-93-203", month = aug, year = 1993, } @TechReport{lawn65, URL = "http://www.netlib.org/lapack/lawnspdf/lawn65.pdf", number = 65, institution = "LAPACK Working Note", author = "Jaeyoung Choi and Jack J. Dongarra and David W. Walker", title = "Parallel Matrix Transpose Algorithms on Distributed Memory Concurrent Computers", utknumber = "UT-CS-93-215", month = nov, year = 1993, } @TechReport{lawn66, URL = "http://www.netlib.org/lapack/lawnspdf/lawn66.pdf", number = 66, institution = "LAPACK Working Note", author = "Victor L. Eijkhout", title = "A Characterization of Polynomial Iterative Methods", utknumber = "UT-CS-93-216", month = nov, year = 1993, } @TechReport{lawn67, abstract = "In this paper, we make efficient use of pipelining on LU decomposition with pivoting and a column-scattered data decomposition to derive precise variations of the computational complexities. We then compare these results with experiments on the Intel iPSC/860 and Paragon machines.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn67.pdf", number = 67, institution = "LAPACK Working Note", author = "Frédéric Desprez and Jack J. Dongarra and Bernard Tourancheau", title = "Performance Complexity of {LU} Factorization with Efficient Pipelining and Overlap on a Multiprocessor", utknumber = "UT-CS-93-218", month = dec, year = 1993, } @TechReport{lawn68, URL = "http://www.netlib.org/lapack/lawnspdf/lawn68.pdf", number = 68, institution = "LAPACK Working Note", author = "Michael W. Berry and Jack J. Dongarra and Youngbae Kim", title = "A Highly Parallel Algorithm for the Reduction of a Nonsymmetric Matrix to Block Upper-Hessenberg Form", utknumber = "UT-CS-94-221", month = feb, year = 1994, } @TechReport{lawn69, URL = "http://www.netlib.org/lapack/lawnspdf/lawn69.pdf", number = 69, institution = "LAPACK Working Note", author = "Jeffert D. Rutter", title = "A Serial Implementation of Cuppen's Divide and Conquer Algorithm for the Symmetric Eigenvalue Problem", utknumber = "UT-CS-94-225", month = mar, year = 1994, } @TechReport{lawn70, URL = "http://www.netlib.org/lapack/lawnspdf/lawn70.pdf", number = 70, institution = "LAPACK Working Note", author = "James W. Demmel and Inderjit S. Dhillon and Huan Ren", title = "On the Correctness of Parallel Bisection in Floating Point", utknumber = "UT-CS-94-228", month = mar, year = 1994, } @TechReport{lawn71, URL = "http://www.netlib.org/lapack/lawnspdf/lawn71.pdf", number = 71, institution = "LAPACK Working Note", author = "Jack J. Dongarra and Michael Kolatis", title = "{IBM} {RS}/6000-550 \& -590 Performance for Selected Routines in {ESSL}", utknumber = "UT-CS-94-231", month = apr, year = 1994, } @TechReport{lawn72, URL = "http://www.netlib.org/lapack/lawnspdf/lawn72.pdf", number = 72, institution = "LAPACK Working Note", author = "Rich Lehoucq", title = "The Computation of Elementary Unitary Matrices", utknumber = "UT-CS-94-233", month = oct, year = 1995, } @TechReport{lawn73, URL = "http://www.netlib.org/lapack/lawnspdf/lawn73.pdf", number = 73, institution = "LAPACK Working Note", author = "R. Clint Whaley", title = "Basic Linear Algebra Communication Subprograms: Analysis and Implementation Across Multiple Parallel Architectures", utknumber = "UT-CS-94-234", month = may, year = 1994, } @TechReport{lawn74, URL = "http://www.netlib.org/lapack/lawnspdf/lawn74.pdf", number = 74, institution = "LAPACK Working Note", author = "Jack J. Dongarra and Andrew Lumsdaine and Xinhiu Niu and Roldan Pozo and Karin Remington", title = "A Sparse Matrix Library in {C}++ for High Performance Architectures", utknumber = "UT-CS-94-236", month = jul, year = 1994, } @TechReport{lawn75, URL = "http://www.netlib.org/lapack/lawnspdf/lawn75.pdf", number = 75, institution = "LAPACK Working Note", author = "Bo KÃ¥gström and Peter Poromaa", title = "{LAPACK}-Style Algorithms and Software for Solving the Generalized Sylvester Equation and Estimaing the Separating Between Regular Matrix Pairs", utknumber = "UT-CS-94-237", month = jul, year = 1994, } @TechReport{lawn76, URL = "http://www.netlib.org/lapack/lawnspdf/lawn76.pdf", number = 76, institution = "LAPACK Working Note", author = "Richard Barrett and Michael Berry and Jack J. Dongarra and Victor L. Eijkhout and Charles Romine", title = "Algorithic Bombardment for the Iterative Solution of Linear Systems: {A} Poly-Iterative Approach", utknumber = "UT-CS-94-239", month = aug, year = 1994, } @TechReport{lawn77, URL = "http://www.netlib.org/lapack/lawnspdf/lawn77.pdf", number = 77, institution = "LAPACK Working Note", author = "Victor L. Eijkhout and Roldan Pozo", title = "Basic Concepts for Distributed Sparse Linear Algebra Operations", utknumber = "UT-CS-94-240", month = aug, year = 1994, } @TechReport{lawn78, URL = "http://www.netlib.org/lapack/lawnspdf/lawn78.pdf", number = 78, institution = "LAPACK Working Note", author = "Victor L. Eijkhout", title = "Computational variants of the {CGS} and {BiCGstab} methods", utknumber = "UT-CS-94-241", month = aug, year = 1994, } @TechReport{lawn79, URL = "http://www.netlib.org/lapack/lawnspdf/lawn79.pdf", number = 79, institution = "LAPACK Working Note", author = "Greg Henry and Robert A. van de Geijn", title = "Parallelizing the {QR} Algorithm for the Unsymmetric Algebraic Eigenvalue Problem: Myths and Reality", utknumber = "UT-CS-94-244", month = aug, year = 1994, } @TechReport{lawn80, URL = "http://www.netlib.org/lapack/lawnspdf/lawn80.pdf", number = 80, institution = "LAPACK Working Note", author = "Jaeyoung Choi and Jack J. Dongarra and L. Susan Ostrouchov and Antoine P. Petitet and David W. Walker and R. Clint Whaley", title = "The Design and Implementation of the {ScaLAPACK} {LU}, {QR}, and Cholesky Factorization Routines", utknumber = "UT-CS-94-246", month = sep, year = 1994, } @TechReport{lawn81, abstract = "This working note describes how to install, test, and time version 2.0 of LAPACK, a linear algebra package for high-performance computers, on a Unix System. Non-Unix installation instructions and further details of the testing and timing suites are only contained in LAPACK Working Note 41, and not in this abbreviated version.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn81.pdf", number = 81, institution = "LAPACK Working Note", author = "L. Susan Blackford and Jack J. Dongarra", title = "Quick Installation Guide for {LAPACK} on Unix Systems", utknumber = "UT-CS-94-249", note = "originally released September 1994", month = jun, year = 1999, } @TechReport{lawn82, abstract = "This document reviews the initial version of the Call Conversion Interface (CCI) from LAPACK to the Engineering and Scientific Subroutine Library (ESSL). The CCI substitutes a call to an ESSL subroutine in place of an LAPACK routine whenever ESSL subroutines provide either functional or near-functional equivalence. In either case, the ESSL subroutine will be used only if its calling sequence can be made to fit that of LAPACK in structure. Finally, the CCI consists of several parts: a list of possible subroutine matchings, interfacing requirements, the successes and failures of those matchings, timings (LAPACK vs. the CCI), and availability.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn82.pdf", number = 82, institution = "LAPACK Working Note", author = "Jack J. Dongarra and Michael Kolatis", title = "Call Conversion Interface ({CCI}) for {LAPACK}/{ESSL}", utknumber = "UT-CS-94-250", month = aug, year = 1994, } @TechReport{lawn83, abstract = "Let $B$ be an $m\times n$ ($m\ge n$) complex matrix. It is known that there is a unique {\em polar decomposition} $B=QH$, where $Q^*Q=I$, the $n\times n$ identity matrix, and $H$ is positive definite, provided $B$ has full column rank. This paper addresses the following question: how much may $Q$ change if $B$ is perturbed to $\widetilde B=D_1^*BD_2$? Here $D_1$ and $D_2$ are two nonsingular matrices and close to the identities of suitable dimensions. Known perturbation bounds for complex matrices indicate that in the worst case, the change in $Q$ is proportional to the reciprocal of the smallest singular value of $B$. In this paper, we will prove that for the above mentioned perturbations to $B$, the change in $Q$ is bounded only by the distances from $D_1$ and $D_2$ to identities! As an application, we will consider perturbations for one-side scaling, i.e., the case when $G=D^*B$ is perturbed to $\widetilde G=D^*\widetilde B$, where $D$ is usually a nonsingular diagonal scaling matrix but for our purpose we do not have to assume this, and $B$ and $\widetilde B$ are nonsingular.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn83.pdf", number = 83, institution = "LAPACK Working Note", author = "Ren-Cang Li", title = "Relative Perturbation Bounds for the Unitary Polar Factor", utknumber = "UT-CS-94-251", month = sep, year = 1994, } @TechReport{lawn84, abstract = "In this paper, we consider how eigenvalues of a matrix $A$ change when it is perturbed to $\widetilde A=D_1^*AD_2$ and how singular values of a (nonsquare) matrix $B$ change when it is perturbed to $\widetilde B=D_1^*BD_2$, where $D_1$ and $D_2$ are assumed to be close to unitary matrices of suitable dimensions. We have been able to generalize many well-known perturbation theorems, including Hoffman-Wielandt theorem and Weyl-Lidskii theorem. As applications, we obtained bounds for perturbations of graded matrices in both singular value problems and nonnegative definite Hermitian eigenvalue problems.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn84.pdf", number = 84, institution = "LAPACK Working Note", author = "Ren-Cang Li", title = "Relative Perturbation Theory: ({I}) Eigenvalue Variations", utknumber = "UT-CS-94-252", month = sep, year = 1994, } @TechReport{lawn85, abstract = "In this paper, we consider how eigenspaces of a Hermitian matrix $A$ change when it is perturbed to $\widetilde A=D^*AD$ and how singular values of a (nonsquare) matrix $B$ change when it is perturbed to $\widetilde B=D_1^*BD_2$, where $D$, $D_1$ and $D_2$ are assumed to be close to identity matrices of suitable dimensions, or either $D_1$ or $D_2$ close to some unitary matrix. We have been able to generalize well-known Davis-Kahan $\sin\theta$ theorems. As applications, we obtained bounds for perturbations of graded matrices.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn85.pdf", number = 85, institution = "LAPACK Working Note", author = "Ren-Cang Li", title = "Relative Perturbation Theory: ({II}) Eigenspace Variations", utknumber = "UT-CS-94-253", month = sep, year = 1994, } @TechReport{lawn86, abstract = "We discuss timing and performance modeling of a routine to find all the eigenvalues and eigenvectors of a dense symmetric matrix on distributed memory computers. The routine, \pdsyevx, is part of the \scalapack\ library. It is based on bisection and inverse iteration, but is not designed to guarantee orthogonality of eigenvectors in the presence of clustered eigenvalues. We use our validated performance model to conclude that \pdsyevx\ is very efficient for large enough problem sizes, nearly independently of input and output data layouts. However, efficiency will be low if interprocessor communication is too slow, such as on conventional workstation networks, or if per processor memory is too small, such as on the Intel Gamma. Modeling also helps us choose the appropriate algorithm to deal with clusters.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn86.pdf", number = 86, institution = "LAPACK Working Note", author = "James W. Demmel and Ken Stanley", title = "The Performance of Finding Eigenvalues and Eigenvectors of Dense Symmetric Matrices on Distributed Memory Computers", utknumber = "UT-CS-94-254", month = sep, year = 1994, } @TechReport{lawn87, URL = "http://www.netlib.org/lapack/lawnspdf/lawn87.pdf", number = 87, institution = "LAPACK Working Note", author = "Bo KÃ¥gström and Peter Poromaa", title = "Computing Eigenspaces with Specified Eigenvalues of a Regular Matrix Pair ({A},{B}) and Condition Estimation: Theory, Algorithms and Software", utknumber = "UT-CS-94-255", month = sep, year = 1994, } @TechReport{lawn88, URL = "http://www.netlib.org/lapack/lawnspdf/lawn88.pdf", number = 88, institution = "LAPACK Working Note", author = "Ming Gu and James W. Demmel and Inderjit S. Dhillon", title = "Efficient Computation of the Singular Value Decomposition with Applications to Least Squares Problems", utknumber = "UT-CS-94-257", month = oct, year = 1994, } @TechReport{lawn89, URL = "http://www.netlib.org/lapack/lawnspdf/lawn89.pdf", number = 89, institution = "LAPACK Working Note", author = "Ren-Cang Li", title = "Solving Secular Equations Stably and Efficiently", utknumber = "UT-CS-94-260", month = nov, year = 1994, } @TechReport{lawn90, URL = "http://www.netlib.org/lapack/lawnspdf/lawn90.pdf", number = 90, institution = "LAPACK Working Note", author = "James S. Plank and Youngbae Kim and Jack J. Dongarra", title = "Algorithm-Based Diskless Checkpointing for Fault Tolerant Matrix Operations", utknumber = "UT-CS-94-268", month = dec, year = 1994, } @TechReport{lawn91, URL = "http://www.netlib.org/lapack/lawnspdf/lawn91.pdf", number = 91, institution = "LAPACK Working Note", author = "Zhaojun Bai and James W. Demmel and Jack J. Dongarra and Antoine P. Petitet and Howard Robinson and Ken Stanley", title = "The Spectral Decomposition of Nonsymmetric Matrices on Distributed Memory Computers", utknumber = "UT-CS-95-273", month = jan, year = 1995, } @TechReport{lawn92, URL = "http://www.netlib.org/lapack/lawnspdf/lawn92.pdf", number = 92, institution = "LAPACK Working Note", author = "Jaeyoung Choi and Jack J. Dongarra and David W. Walker", title = "The Design of a Parallel Dense Linear Algebra Software Library: Reduction to Hessenberg, Tridiagonal, and Bidiagonal Form", utknumber = "UT-CS-95-275", month = feb, year = 1995, } @TechReport{lawn93, abstract = "This working note describes how to install and test version 1.0 of ScaLAPACK. These two-dimensional distributed memory versions of common LAPACK routines rely on calls to the BLAS for local computation, and calls to the PBLAS for global computations. For portability concerns, communication takes place inside the PBLAS through calls to the BLACS. The design of the testing/timing programs for the ScaLAPACK codes is also discussed.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn93.pdf", number = 93, institution = "LAPACK Working Note", author = "Jaeyoung Choi and James W. Demmel and Inderjit S. Dhillon and Jack J. Dongarra and L. Susan Ostrouchov and Antoine P. Petitet and Ken Stanley and David W. Walker and R. Clint Whaley", title = "Installation Guide for {ScaLAPACK} v1.7", note = "originally released March 1995", utknumber = "UT-CS-95-280", month = aug, year = 2001, } @TechReport{lawn94, URL = "http://www.netlib.org/lapack/lawnspdf/lawn94.pdf", number = 94, institution = "LAPACK Working Note", author = "Jack J. Dongarra and R. Clint Whaley", title = "A User's Guide to the {BLACS} v1.1", utknumber = "UT-CS-95-281", note = "originally released March 1995", month = may, year = 1997, } @TechReport{lawn95, URL = "http://www.netlib.org/lapack/lawnspdf/lawn95.pdf", number = 95, institution = "LAPACK Working Note", author = "Jaeyoung Choi and James W. Demmel and Inderjit S. Dhillon and Jack J. Dongarra and L. Susan Ostrouchov and Antoine P. Petitet and Ken Stanley and David W. Walker and R. Clint Whaley", title = "{ScaLAPACK}: {A} Portable Linear Algebra Library for Distributed Memory Computers - Design Issues and Performance", utknumber = "UT-CS-95-283", month = mar, year = 1995, } @TechReport{lawn96, URL = "http://www.netlib.org/lapack/lawnspdf/lawn96.pdf", number = 96, institution = "LAPACK Working Note", author = "Robert A. van de Geijn and Jerrell Watts", title = "{SUMMA}: Scalable Universal Matrix Multiplication Algorithm", utknumber = "UT-CS-95-286", month = apr, year = 1995, } @TechReport{lawn97, URL = "http://www.netlib.org/lapack/lawnspdf/lawn97.pdf", number = 97, institution = "LAPACK Working Note", author = "Soumen Chakrabarti and James Demmel and Katherine A. Yelick", title = "Modeling the Benefits of Mixed Data and Task Parallelism", utknumber = "UT-CS-95-289", month = may, year = 1995, } @TechReport{lawn98, URL = "http://www.netlib.org/lapack/lawnspdf/lawn98.pdf", number = 98, institution = "LAPACK Working Note", author = "Jack J. Dongarra and Roldan Pozo and David W. Walker", title = "{LAPACK}++ v. 1.0: High Performance Linear Algebra Users' Guide", utknumber = "UT-CS-95-290", month = may, year = 1995, } @TechReport{lawn99, URL = "http://www.netlib.org/lapack/lawnspdf/lawn99.pdf", number = 99, institution = "LAPACK Working Note", author = "Jack J. Dongarra and Victor L. Eijkhout and Ajay Kalhan", title = "Reverse Communication Interface for Linear Algebra Templates for Iterative Methods", utknumber = "UT-CS-95-291", month = may, year = 1995, } @TechReport{lawn100, URL = "http://www.netlib.org/lapack/lawnspdf/lawn100.pdf", number = 100, institution = "LAPACK Working Note", author = "Jaeyoung Choi and Jack J. Dongarra and L. Susan Ostrouchov and Antoine P. Petitet and David W. Walker and R. Clint Whaley", title = "A Proposal for a Set of Parallel Basic Linear Algebra Subprograms", utknumber = "UT-CS-95-292", month = may, year = 1995, } @TechReport{lawn101, URL = "http://www.netlib.org/lapack/lawnspdf/lawn101.pdf", number = 101, institution = "LAPACK Working Note", author = "Jack J. Dongarra and Jeremy Du Croz and Sven Hammarling and Jerzy Wasniewski and Adam Zemla", title = "A Proposal for a Fortran 90 Interface for {LAPACK}", utknumber = "UT-CS-95-295", month = jul, year = 1995, } @TechReport{lawn102, URL = "http://www.netlib.org/lapack/lawnspdf/lawn102.pdf", number = 102, institution = "LAPACK Working Note", author = "Jack J. Dongarra and Andrew Lumsdaine and Roldan Pozo and Karin Remington", title = "{IML}++ v. 1.2: Iterative Methods Library Reference Guide", utknumber = "UT-CS-95-303", month = aug, year = 1995, } @TechReport{lawn103, URL = "http://www.netlib.org/lapack/lawnspdf/lawn103.pdf", number = 103, institution = "LAPACK Working Note", author = "James W. Demmel and Stanley C. Eisenstat and John R. Gilbert and Xiaoye S. Li and Joseph W. H. Liu", title = "A Supernodal Approach to Sparse Partial Pivoting", utknumber = "UT-CS-95-304", month = sep, year = 1995, } @TechReport{lawn104, URL = "http://www.netlib.org/lapack/lawnspdf/lawn104.pdf", number = 104, institution = "LAPACK Working Note", author = "Nicholas J. Higham", title = "Iterative Refinement and {LAPACK}", utknumber = "UT-CS-95-308", month = oct, year = 1995, } @TechReport{lawn105, URL = "http://www.netlib.org/lapack/lawnspdf/lawn105.pdf", number = 105, institution = "LAPACK Working Note", author = "Nicholas J. Higham", title = "Stability of the Diagonal Pivoting Method with Partial Pivoting", utknumber = "UT-CS-95-309", month = oct, year = 1995, } @TechReport{lawn106, URL = "http://www.netlib.org/lapack/lawnspdf/lawn106.pdf", number = 106, institution = "LAPACK Working Note", author = "Zhaojun Bai and David Day and James W. Demmel and Jack J. Dongarra and Ming Gu and Axel Ruhe and Henk van der Vorst", title = "Templates for Linear Algebra Problems", utknumber = "UT-CS-95-311", month = oct, year = 1995, } @TechReport{lawn107, URL = "http://www.netlib.org/lapack/lawnspdf/lawn107.pdf", number = 107, institution = "LAPACK Working Note", author = "Bo KÃ¥gström and Per Ling and Charles Van Loan", title = "{GEMM}-Based Level 3 {BLAS}: High-Performance Model Implementations and Performance Evaluation Benchmark", utknumber = "UT-CS-95-315", month = nov, year = 1995, } @TechReport{lawn108, URL = "http://www.netlib.org/lapack/lawnspdf/lawn108.pdf", number = 108, institution = "LAPACK Working Note", author = "Bo KÃ¥gström and Per Ling and Charles Van Loan", title = "{GEMM}-Based Level 3 {BLAS}: Installation, Tuning and Use of the Model Implementations and the Performance Evaluation Benchmark", utknumber = "UT-CS-95-316", month = nov, year = 1995, } @TechReport{lawn109, URL = "http://www.netlib.org/lapack/lawnspdf/lawn109.pdf", number = 109, institution = "LAPACK Working Note", author = "Jack J. Dongarra and Sven Hammarling and L. Susan Ostrouchov", title = "{BLAS} Technical Workshop", utknumber = "UT-CS-95-317", month = nov, year = 1995, } @TechReport{lawn110, URL = "http://www.netlib.org/lapack/lawnspdf/lawn110.pdf", number = 110, institution = "LAPACK Working Note", author = "Jack J. Dongarra and Sven Hammarling and David W. Walker", title = "Key Concepts For Parallel Out-Of-Core {LU} Factorization", utknumber = "UT-CS-96-324", month = apr, year = 1996, } @TechReport{lawn111, URL = "http://www.netlib.org/lapack/lawnspdf/lawn111.pdf", number = 111, institution = "LAPACK Working Note", author = "Jeff Bilmes and Krste Asanovic and James W. Demmel and Dominic Lam and Chee-Whye Chin", title = "Optimizing Matrix Multiply using {PHiPAC}: {A} Portable, High-Performance, {ANSI} {C} Coding Methodology", utknumber = "UT-CS-96-326", month = may, year = 1996, } @TechReport{lawn112, URL = "http://www.netlib.org/lapack/lawnspdf/lawn112.pdf", number = 112, institution = "LAPACK Working Note", author = "L. Susan Blackford and Andrew Cleary and James W. Demmel and Inderjit S. Dhillon and Jack J. Dongarra and Sven Hammarling and Antoine P. Petitet and Huan Ren and Ken Stanley and R. Clint Whaley", title = "Practical Experience in the Dangers of Heterogeneous Computing", utknumber = "UT-CS-96-330", month = jul, year = 1996, } @TechReport{lawn113, URL = "http://www.netlib.org/lapack/lawnspdf/lawn113.pdf", number = 113, institution = "LAPACK Working Note", author = "Gregorio Quintana-Ortí and Enrique S. Quintana-Ortí and Antoine P. Petitet", title = "Block-Partitioned Algorithms for Solving the Linear Least Squares Problem", utknumber = "UT-CS-96-333", month = jul, year = 1996, } @TechReport{lawn114, URL = "http://www.netlib.org/lapack/lawnspdf/lawn114.pdf", number = 114, institution = "LAPACK Working Note", author = "Gregorio Quintana-Ortí and Xiaobai Sun and Christian Bischof", title = "A {BLAS}-3 Version of the {QR} Factorization with Column Pivoting", utknumber = "UT-CS-96-334", month = aug, year = 1996, } @TechReport{lawn115, URL = "http://www.netlib.org/lapack/lawnspdf/lawn115.pdf", number = 115, institution = "LAPACK Working Note", author = "Huan Ren", title = "On the Error Analysis and Implementation of Some Eigenvalue Decomposition and Singular Value Decomposition Algorithms", utknumber = "UT-CS-96-336", month = sep, year = 1996, } @TechReport{lawn116, URL = "http://www.netlib.org/lapack/lawnspdf/lawn116.pdf", number = 116, institution = "LAPACK Working Note", author = "Majed Sidani and Bill Harrod", title = "Parallel Matrix Distributions: Have we been doing it all right?", utknumber = "UT-CS-96-340", month = nov, year = 1996, } @TechReport{lawn117, URL = "http://www.netlib.org/lapack/lawnspdf/lawn117.pdf", number = 117, institution = "LAPACK Working Note", author = "L. Susan Blackford and Jack J. Dongarra and Jeremy Du Croz and Sven Hammarling and Jerzy Wasniewski", title = "A Fortran 90 Interface for {LAPACK}", utknumber = "UT-CS-96-341", month = dec, year = 1996, } @TechReport{lawn118, URL = "http://www.netlib.org/lapack/lawnspdf/lawn118.pdf", number = 118, institution = "LAPACK Working Note", author = "Jack J. Dongarra and Eduardo F. D'Azevedo", title = "The Design and Implementation of the Parallel Out-of-core {ScaLAPACK} {LU}, {QR}, and Cholesky Factorization Routines", utknumber = "UT-CS-97-347", month = jan, year = 1997, } @TechReport{lawn119, URL = "http://www.netlib.org/lapack/lawnspdf/lawn119.pdf", number = 119, institution = "LAPACK Working Note", author = "James W. Demmel and Ming Gu and Stanley C. Eisenstat and Ivan Slapnicar and Kresimir Veselić and Zlatko Drmač", title = "Computing the Singular Value Decomposition with High Relative Accuracy", utknumber = "UT-CS-97-348", month = feb, year = 1997, } @TechReport{lawn120, URL = "http://www.netlib.org/lapack/lawnspdf/lawn120.pdf", number = 120, institution = "LAPACK Working Note", author = "Frédéric Desprez and Jack Dongarra and Antoine Petitet and Cyril Randriamaro and Yves Robert", title = "Scheduling Block-Cyclic Array Redistribution", utknumber = "UT-CS-97-349", month = feb, year = 1997, } @TechReport{lawn121, URL = "http://www.netlib.org/lapack/lawnspdf/lawn121.pdf", number = 121, institution = "LAPACK Working Note", author = "Greg Henry and David Watkins and Jack J. Dongarra", title = "A Parallel Implementation of the Nonsymmetric {QR} Algorithm for Distributed Memory Architectures", utknumber = "UT-CS-97-352", month = mar, year = 1997, } @TechReport{lawn122, URL = "http://www.netlib.org/lapack/lawnspdf/lawn122.pdf", number = 122, institution = "LAPACK Working Note", author = "Mario Ahues and Francoise Tisseur", title = "A New Deflation Criterion for the {QR} Algorithm", utknumber = "UT-CS-97-353", month = mar, year = 1997, } @TechReport{lawn123, URL = "http://www.netlib.org/lapack/lawnspdf/lawn123.pdf", number = 123, institution = "LAPACK Working Note", author = "Zhaojun Bai and David Day and James W. Demmel and Jack J. Dongarra", title = "A Test Matrix Collection for Non-Hermitian Eigenvalue Problems", utknumber = "UT-CS-97-355", month = mar, year = 1997, } @TechReport{lawn124, URL = "http://www.netlib.org/lapack/lawnspdf/lawn124.pdf", number = 124, institution = "LAPACK Working Note", author = "James W. Demmel and John R. Gilbert and Xiaoye S. Li", title = "An Asynchronous Parallel Supernodal Algorithm for Sparse Gaussian Elimination", utknumber = "UT-CS-97-357", month = apr, year = 1997, } @TechReport{lawn125, URL = "http://www.netlib.org/lapack/lawnspdf/lawn125.pdf", number = 125, institution = "LAPACK Working Note", author = "Andrew Cleary and Jack J. Dongarra", title = "Implementation in {ScaLAPACK} of Divide-and-Conquer Algorithms for Banded and Tridiagonal Linear Systems", utknumber = "UT-CS-97-358", month = apr, year = 1997, } @TechReport{lawn126, URL = "http://www.netlib.org/lapack/lawnspdf/lawn126.pdf", number = 126, institution = "LAPACK Working Note", author = "Edward Anderson and Mark R. Fahey", title = "Performance Improvements to {LAPACK} for the Cray Scientific Library", utknumber = "UT-CS-97-359", month = apr, year = 1997, } @TechReport{lawn127, URL = "http://www.netlib.org/lapack/lawnspdf/lawn127.pdf", number = 127, institution = "LAPACK Working Note", author = "Xiaoye S. Li", title = "Sparse Gaussian Elimination on High Performance Computers", utknumber = "UT-CS-97-368", month = jun, year = 1997, } @TechReport{lawn128, URL = "http://www.netlib.org/lapack/lawnspdf/lawn128.pdf", number = 128, institution = "LAPACK Working Note", author = "Antoine P. Petitet", title = "Algorithmic Redistribution Methods for Block Cyclic Decompositions", utknumber = "UT-CS-97-371", month = jul, year = 1997, } @TechReport{lawn129, URL = "http://www.netlib.org/lapack/lawnspdf/lawn129.pdf", number = 129, institution = "LAPACK Working Note", author = "Jaeyoung Choi", title = "A New Parallel Matrix Multiplication Algorithm on Distributed-Memory Concurrent Computers", utknumber = "UT-CS-97-369", month = sep, year = 1997, } @TechReport{lawn130, URL = "http://www.netlib.org/lapack/lawnspdf/lawn130.pdf", number = 130, institution = "LAPACK Working Note", author = "James W. Demmel", title = "Accurate {SVD}s of Structured Matrices", utknumber = "UT-CS-97-375", month = oct, year = 1997, } @TechReport{lawn131, URL = "http://www.netlib.org/lapack/lawnspdf/lawn131.pdf", number = 131, institution = "LAPACK Working Note", author = "R. Clint Whaley and Jack J. Dongarra", title = "Automatically Tuned Linear Algebra Software", utknumber = "UT-CS-97-366", month = dec, year = 1997, } @TechReport{lawn132, URL = "http://www.netlib.org/lapack/lawnspdf/lawn132.pdf", number = 132, institution = "LAPACK Working Note", author = "Francoise Tisseur and Jack J. Dongarra", title = "Parallelizing the Divide and Conquer Algorithm for the Symmetric Tridiagonal Eigenvalue Problem on Distributed Memory Architectures", utknumber = "UT-CS-98-382", month = mar, year = 1998, } @TechReport{lawn133, URL = "http://www.netlib.org/lapack/lawnspdf/lawn133.pdf", number = 133, institution = "LAPACK Working Note", author = "Antoine P. Petitet and Jack J. Dongarra", title = "Algorithmic Redistribution Methods for Block Cyclic Distributions", utknumber = "UT-CS-98-383", month = mar, year = 1998, } @TechReport{lawn134, URL = "http://www.netlib.org/lapack/lawnspdf/lawn134.pdf", number = 134, institution = "LAPACK Working Note", author = "Jerzy Wasniewski and Jack J. Dongarra", title = "High Performance Linear Algebra Package -- {LAPACK90}", utknumber = "UT-CS-98-384", month = apr, year = 1998, } @TechReport{lawn135, URL = "http://www.netlib.org/lapack/lawnspdf/lawn135.pdf", number = 135, institution = "LAPACK Working Note", author = "Eduardo F. D'Azevedo and Jack J. Dongarra", title = "Packed Storage Extensions for {ScaLAPACK}", utknumber = "UT-CS-98-385", month = apr, year = 1998, } @TechReport{lawn136, URL = "http://www.netlib.org/lapack/lawnspdf/lawn136.pdf", number = 136, institution = "LAPACK Working Note", author = "L. Susan Blackford and R. Clint Whaley", title = "{ScaLAPACK} Evaluation and Performance at the {DoD} {MSRC}s", utknumber = "UT-CS-98-388", month = apr, year = 1998, } @TechReport{lawn137, URL = "http://www.netlib.org/lapack/lawnspdf/lawn137.pdf", number = 137, institution = "LAPACK Working Note", author = "L. Susan Blackford and Jack J. Dongarra and C. A. Papadopoulos and R. Clint Whaley", title = "Installation Guide and Design of the {HPF} 1.1 interface to {ScaLAPACK}, {SLHPF}", utknumber = "UT-CS-98-396", month = aug, year = 1998, } @TechReport{lawn138, URL = "http://www.netlib.org/lapack/lawnspdf/lawn138.pdf", number = 138, institution = "LAPACK Working Note", author = "Jack J. Dongarra and Wojciech Owczarz and Jerzy Wasniewski and Plamen Yalamov", title = "Testing Software for {LAPACK90}", utknumber = "UT-CS-98-401", month = sep, year = 1998, } @TechReport{lawn139, URL = "http://www.netlib.org/lapack/lawnspdf/lawn139.pdf", number = 139, institution = "LAPACK Working Note", author = "Antoine P. Petitet and Henri Casanova and Jack J. Dongarra and Y. Robert and R. Clint Whaley", title = "A Numerical Linear Algebra Problem Solving Environment Designer's Perspective", utknumber = "UT-CS-98-405", month = oct, year = 1998, } @TechReport{lawn140, URL = "http://www.netlib.org/lapack/lawnspdf/lawn140.pdf", number = 140, institution = "LAPACK Working Note", author = "Henri Casanova and Jack J. Dongarra", title = "{NetSolve} version 1.2: Design and Implementation", utknumber = "UT-CS-98-406", month = nov, year = 1998, } @TechReport{lawn141, URL = "http://www.netlib.org/lapack/lawnspdf/lawn141.pdf", number = 141, institution = "LAPACK Working Note", author = "Victor L. Eijkhout", title = "Overview of Iterative Linear System Solver Packages", utknumber = "UT-CS-98-411", month = dec, year = 1998, } @TechReport{lawn142, URL = "http://www.netlib.org/lapack/lawnspdf/lawn142.pdf", number = 142, institution = "LAPACK Working Note", author = "Peter Arbenz and Andrew Cleary and Jack J. Dongarra and Markus Hegland", title = "A Comparison of Parallel Solvers for Diagonally Dominant and General Narrow-Banded Linear Systems", utknumber = "UT-CS-99-414", month = feb, year = 1999, } @TechReport{lawn143, URL = "http://www.netlib.org/lapack/lawnspdf/lawn143.pdf", number = 143, institution = "LAPACK Working Note", author = "Peter. Arbenz and Andrew Cleary and Jack J. Dongarra and Markus Hegland", title = "A Comparison of Parallel Solvers for Diagonally Dominant and General Narrow-Banded Linear Systems {II}", utknumber = "UT-CS-99-415", month = may, year = 1999, } @TechReport{lawn144, URL = "http://www.netlib.org/lapack/lawnspdf/lawn144.pdf", number = 144, institution = "LAPACK Working Note", author = "Victor L. Eijkhout", title = "On the Existence Problem of Incomplete Factorisation Methods", utknumber = "UT-CS-99-435", month = dec, year = 1999, } @TechReport{lawn145, URL = "http://www.netlib.org/lapack/lawnspdf/lawn145.pdf", number = 145, institution = "LAPACK Working Note", author = "Victor L. Eijkhout", title = "The 'weighted modification' incomplete factorisation method", utknumber = "UT-CS-99-436", month = dec, year = 1999, } @TechReport{lawn146, URL = "http://www.netlib.org/lapack/lawnspdf/lawn146.pdf", number = 146, institution = "LAPACK Working Note", author = "Bjarne S. Andersen and Fred Gustavson and Jerzy Wasniewski", title = "A recursive formulation of Cholesky factorization of a matrix in packed storage", utknumber = "UT-CS-00-441", month = may, year = 2000, } @TechReport{lawn147, URL = "http://www.netlib.org/lapack/lawnspdf/lawn147.pdf", number = 147, institution = "LAPACK Working Note", author = "R. Clint Whaley and Antoine P. Petitet and Jack J. Dongarra", title = "Automated Empirical Optimization of Software and the {ATLAS} Project", utknumber = "UT-CS-00-448", month = sep, year = 2000, } @TechReport{lawn148, URL = "http://www.netlib.org/lapack/lawnspdf/lawn148.pdf", number = 148, institution = "LAPACK Working Note", author = "David S. Bindel and James W. Demmel and W. Kahan and Osni A. Marques", title = "On Computing Givens rotations reliably and efficiently", utknumber = "UT-CS-00-449", month = oct, year = 2000, } @TechReport{lawn149, URL = "http://www.netlib.org/lapack/lawnspdf/lawn149.pdf", number = 149, institution = "LAPACK Working Note", author = "Xiaoye S. Li and James W. Demmel and David H. Bailey and Greg Henry and Yozo Hida and Jimmy Iskandar and W. Kahan and Anil Kapur and Michael C. Martin and Brandon J. Thompson and Teresa Tung and Daniel J. Yoo", title = "Design, Implementation and Testing of Extended and Mixed Precision {BLAS}", utknumber = "UT-CS-00-451", month = oct, year = 2000, } @TechReport{lawn150, URL = "http://www.netlib.org/lapack/lawnspdf/lawn150.pdf", number = 150, institution = "LAPACK Working Note", author = "Edward Anderson", title = "Discontinuous Plane Rotations and the Symmetric Eigenvalue Problem", utknumber = "UT-CS-00-454", month = dec, year = 2000, } @TechReport{lawn151, URL = "http://www.netlib.org/lapack/lawnspdf/lawn151.pdf", number = 151, institution = "LAPACK Working Note", author = "Victor L. Eijkhout", title = "Automatic Determination of Matrix-Blocks", utknumber = "UT-CS-01-458", month = apr, year = 2001, } @TechReport{lawn152, URL = "http://www.netlib.org/lapack/lawnspdf/lawn152.pdf", number = 152, institution = "LAPACK Working Note", author = "Sheung Hun Cheng and Nicholas J. Higham", title = "Implementation for {LAPACK} of a Block Algorithm for Matrix 1-Norm Estimation", utknumber = "UT-CS-01-470", month = aug, year = 2001, } @TechReport{lawn153, URL = "http://www.netlib.org/lapack/lawnspdf/lawn153.pdf", number = 153, institution = "LAPACK Working Note", author = "Mark R. Fahey", title = "New Complex Parallel Eigenvalue and Eigenvector Routines", utknumber = "UT-CS-01-471", month = aug, year = 2001, } @TechReport{lawn154, URL = "http://www.netlib.org/lapack/lawnspdf/lawn154.pdf", number = 154, institution = "LAPACK Working Note", author = "Inderjit S. Dhillon and Beresford N. Parlett", title = "Orthogonal Eigenvectors and Relative Gaps", utknumber = "UT-CS-02-474", month = aug, year = 2002, } @TechReport{lawn155, URL = "http://www.netlib.org/lapack/lawnspdf/lawn155.pdf", number = 155, institution = "LAPACK Working Note", author = "Beresford N. Parlett and Osni A. Marques", title = "An implementation of the dqds algorithm positive case", utknumber = "UT-CS-02-475", month = aug, year = 2002, } % PUBLICATION CORRESPONDING TO LAWN 155 @ARTICLE{Parlett99animplementation, author = {Beresford N. Parlett and Osni A. Marques}, title = {An implementation of the dqds algorithm (positive case}, journal = {Linear Algebra and Appl}, year = {1999}, volume = {309}, pages = {2000} } @TechReport{lawn156, URL = "http://www.netlib.org/lapack/lawnspdf/lawn156.pdf", number = 156, institution = "LAPACK Working Note", author = "Victor L. Eijkhout", title = "Polynomial acceleration of optimised multi-grid smoothers basic theory", utknumber = "UT-CS-02-477", month = aug, year = 2002, } @TechReport{lawn157, URL = "http://www.netlib.org/lapack/lawnspdf/lawn157.pdf", number = 157, institution = "LAPACK Working Note", author = "Jack J. Dongarra and Victor L. Eijkhout", title = "Self-adapting Numerical Software for Next Generation Applications", utknumber = "UT-CS-02-484", month = aug, year = 2002, } % PUBLICATION CORRESPONDING TO LAWN 157 @ARTICLE{Dongarra02self-adaptingnumerical, author = {Jack Dongarra and Victor Eijkhout}, title = {Self-adapting numerical software for next generation applications}, journal = {International Journal of High Performance Computing and Applications}, year = {2002}, volume = {17}, pages = {02--07} } @TechReport{lawn158, URL = "http://www.netlib.org/lapack/lawnspdf/lawn158.pdf", number = 158, institution = "LAPACK Working Note", author = "Edward Anderson", title = "{LAPACK3E} -- {A} Fortran 90-enhanced version of {LAPACK}", utknumber = "UT-CS-02-497", month = dec, year = 2002, } @TechReport{lawn159, URL = "http://www.netlib.org/lapack/lawnspdf/lawn159.pdf", number = 159, institution = "LAPACK Working Note", author = "Jack J. Dongarra and Victor L. Eijkhout", utknumber = "UT-CS-03-502", month = jan, year = 2003, } @TechReport{lawn160, URL = "http://www.netlib.org/lapack/lawnspdf/lawn160.pdf", number = 160, institution = "LAPACK Working Note", author = "Zizhong Chen and Jack J. Dongarra and Piotr Luszczek and Kenneth Roche", title = "Self Adapting Software for Numerical Linear Algebra and {LAPACK} for Clusters", utknumber = "UT-CS-03-499", month = jan, year = 2003, } % PUBLICATION CORRESPONDING TO LAWN 160 @ARTICLE{Chen03selfadapting, author = {Zizhong Chen and Jack Dongarra and Piotr Luszczek and Kenneth Roche A}, title = {Self adapting software for numerical linear algebra and lapack for clusters}, journal = {Parallel Computing}, year = {2003}, volume = {29}, pages = {1723--1743} } @TechReport{lawn161, URL = "http://www.netlib.org/lapack/lawnspdf/lawn161.pdf", number = 161, institution = "LAPACK Working Note", author = "Craig Lucas", title = "{LAP}ack-Style Codes for Level 2 and 3 Pivoted Cholesky Factorizations", utknumber = "UT-CS-04-522", month = feb, year = 2004, } % LAWN 162 @TechReport{lawn162, URL = "http://www.netlib.org/lapack/lawnspdf/lawn162.pdf", number = 162, institution = "LAPACK Working Note", author = "Inderjit S. Dhillon and Beresford N. Parlett and Christof Vömel", title = "The Design and Implementation of the {MRRR} Algorithm", utknumber = "UT-CS-04-541", month = dec, year = 2004, } % PUBLICATION CORRESPONDING TO LAWN 162 @article{1186788, author = {Inderjit S. Dhillon and Beresford N. Parlett and Christof V\"{o}mel}, title = {The design and implementation of the MRRR algorithm}, journal = {ACM Trans. Math. Softw.}, volume = {32}, number = {4}, year = {2006}, issn = {0098-3500}, pages = {533--560}, doi = {http://doi.acm.org/10.1145/1186785.1186788}, publisher = {ACM}, address = {New York, NY, USA}, } @TechReport{lawn163, URL = "http://www.netlib.org/lapack/lawnspdf/lawn163.pdf", number = 163, institution = "LAPACK Working Note", author = "Beresford N. Parlett and Christof Vömel", title = "How the {MRRR} Algorithm Can Fail on Tight Eigenvalue Clusters", utknumber = "UT-CS-04-542", month = dec, year = 2004, } @TechReport{lawn164, URL = "http://www.netlib.org/lapack/lawnspdf/lawn164.pdf", number = 164, institution = "LAPACK Working Note", author = "Jim Demmel and Jack J. Dongarra", title = "{LAPACK} 2005 Prospectus: Reliable and Scalable Software for Linear Algebra Computations on High End Computers", utknumber = "UT-CS-05-546", month = feb, year = 2005, } @TechReport{lawn165, URL = "http://www.netlib.org/lapack/lawnspdf/lawn165.pdf", number = 165, institution = "LAPACK Working Note", author = "James W. Demmel and Yozo Hida and W. Kahan and Xiaoye S. Li and Soni Mukherjee and E. Jason Riedy", title = "Error Bounds from Extra Precise Iterative Refinement", utknumber = "UT-CS-05-547", month = feb, year = 2005, } @TechReport{lawn166, URL = "http://www.netlib.org/lapack/lawnspdf/lawn166.pdf", number = 166, institution = "LAPACK Working Note", author = "Paul R. Willems and Bruno Lang and Christof Vömel", title = "Computing The Bidiagonal {SVD} Using Multiple Relatively Robust Representations", utknumber = "UT-CS-05-551", month = apr, year = 2005, } @TechReport{lawn167, URL = "http://www.netlib.org/lapack/lawnspdf/lawn167.pdf", number = 167, institution = "LAPACK Working Note", author = "Osni A. Marques and Beresford N. Parlett and Christof Vömel", title = "Subset Computations with the {MRRR} algorithm", ucbnumber = "UCB/CSD-05-1392", month = aug, year = 2005, } @TechReport{lawn168, URL = "http://www.netlib.org/lapack/lawnspdf/lawn168.pdf", number = 168, institution = "LAPACK Working Note", author = "Dominic Antonelli and Christof Vömel", title = "{PDSYEVR}. {ScaLAPACK}’s parallel {MRRR} algorithm for the symmetric eigenvalue problem", ucbnumber = "UCB/CSD-05-1399", month = aug, year = 2005, } @TechReport{lawn169, URL = "http://www.netlib.org/lapack/lawnspdf/lawn169.pdf", number = 169, institution = "LAPACK Working Note", author = "Zlatko Drmač and Kresimir Veselić", title = "New fast and accurate Jacobi {SVD} algorithm: {I}", month = aug, year = 2005, } @TechReport{lawn170, URL = "http://www.netlib.org/lapack/lawnspdf/lawn170.pdf", number = 170, institution = "LAPACK Working Note", author = "Zlatko Drmač and Kresimir Veselić", title = "New fast and accurate Jacobi {SVD} algorithm: {II}", month = aug, year = 2005, } @TechReport{lawn171, URL = "http://www.netlib.org/lapack/lawnspdf/lawn171.pdf", number = 171, institution = "LAPACK Working Note", author = "Daniel Kressner", title = "Block Algorithms for Reordering Standard and Generalized Schur Forms", month = feb, year = 2006, } @TechReport{lawn172, URL = "http://www.netlib.org/lapack/lawnspdf/lawn172.pdf", number = 172, institution = "LAPACK Working Note", author = "Osni A. Marques and E. Jason Riedy and Christof Vömel", title = "Benefits of {IEEE}-754 Features in Modern Symmetric Tridiagonal Eigensolvers", ucbnumber = "UCB/CSD-05-1414", month = sep, year = 2005, } @TechReport{lawn173, URL = "http://www.netlib.org/lapack/lawnspdf/lawn173.pdf", number = 173, institution = "LAPACK Working Note", author = "Bo KÃ¥gström and Daniel Kressner", title = "Multishift Variants of the {QZ} Algorithm with Agressive Early Deflation", month = feb, year = 2006, } @TechReport{lawn174, URL = "http://www.netlib.org/lapack/lawnspdf/lawn174.pdf", number = 174, institution = "LAPACK Working Note", author = "G. W. Howell and James W. Demmel and C. T. Fulton and Sven Hammarling and K. Marmol", title = "Cache Efficient Biadiagonalization Using {BLAS} 2.5 Operators", month = feb, year = 2006, } @TechReport{lawn175, URL = "http://www.netlib.org/lapack/lawnspdf/lawn175.pdf", number = 175, institution = "LAPACK Working Note", author = "Julie Langou and Julien Langou and Piotr Luszczek and Jakub Kurzak and Alfredo Buttari and Jack J. Dongarra", title = "Exploiting the Performance of 32 bit Floating Point Arithmetic in Obtaining 64 bit Accuracy (Revisiting Iterative Refinement for Linear Systems)", month = jun, year = 2006, } @TechReport{lawn176, URL = "http://www.netlib.org/lapack/lawnspdf/lawn176.pdf", number = 176, institution = "LAPACK Working Note", author = "Zlatko Drmač and Zvonimir Bujanović", title = "On the failure of rank revealing {QR} factorization software - a case study", month = jun, year = 2006, } @TechReport{lawn177, URL = "http://www.netlib.org/lapack/lawnspdf/lawn177.pdf", number = 177, institution = "LAPACK Working Note", author = "Jakub Kurzak and Jack J. Dongarra", title = "Implementation of the Mixed-Precision High Performance {LINPACK} Benchmark on the {CELL} Processor", utknumber = "UT-CS-06-580", month = sep, year = 2006, } @TechReport{lawn178, URL = "http://www.netlib.org/lapack/lawnspdf/lawn178.pdf", number = 178, institution = "LAPACK Working Note", author = "Jakub Kurzak and Jack J. Dongarra", title = "Implementing Linear Algebra Routines on Multi-Core Processors with Pipelining and a Look Ahead", utknumber = "UT-CS-06-581", month = sep, year = 2006, } @TechReport{lawn179, URL = "http://www.netlib.org/lapack/lawnspdf/lawn179.pdf", number = 179, institution = "LAPACK Working Note", author = "Marc Baboulin and Luc Giraud and Serge Gratton and Julien Langou", title = "Parallel tools for solving incremental dense least squares problems. Application to space geodesy", utknumber = "UT-CS-06-582", month = sep, year = 2006, } @TechReport{lawn180, URL = "http://www.netlib.org/lapack/lawnspdf/lawn180.pdf", number = 180, institution = "LAPACK Working Note", author = "Alfredo Buttari and Jack J. Dongarra and Jakub Kurzak and Piotr Luszczek and Stanimire Tomov", title = "Computations to Enhance the Performance while Achieving the 64-bit Accuracy", utknumber = "UT-CS-06-584", month = nov, year = 2006, } @TechReport{lawn181, URL = "http://www.netlib.org/lapack/lawnspdf/lawn181.pdf", number = 181, institution = "LAPACK Working Note", author = "James W. Demmel and Jack J. Dongarra and Beresford N. Parlett and William Kahan and Ming Gu and David S. Bindel and Yozo Hida and Xiaoye S. Li and Osni A. Marques and E. Jason Riedy and Christof Vömel and Julien Langou and Piotr Luszczek and Jakub Kurzak and Alfredo Buttari and Julie Langou and Stanimire Tomov", title = "Prospectus for the Next {LAPACK} and {ScaLAPACK} Libraries", utknumber = "UT-CS-07-592", month = feb, year = 2007, } @TechReport{lawn182, URL = "http://www.netlib.org/lapack/lawnspdf/lawn182.pdf", number = 182, institution = "LAPACK Working Note", author = "James W. Demmel and Osni A. Marques and Beresford N. Parlett and Christof Vömel", title = "A Testing Infrastructure for {LAPACK}'s Symmetric Eigensolvers", month = apr, year = 2007, } @TechReport{lawn183, URL = "http://www.netlib.org/lapack/lawnspdf/lawn183.pdf", number = 183, institution = "LAPACK Working Note", author = "James W. Demmel and Osni A. Marques and Beresford N. Parlett and Christof Vömel", title = "Performance and Accuracy of {LAPACK}'s Symmetric Tridiagonal Eigensolvers", month = apr, year = 2007, } @TechReport{lawn184, URL = "http://www.netlib.org/lapack/lawnspdf/lawn184.pdf", number = 184, institution = "LAPACK Working Note", author = "Jakub Kurzak and Alfredo Buttari and Jack J. Dongarra", title = "Solving Systems of Linear Equations on the {CELL} Processor Using Cholesky Factorization", utknumber = "UT-CS-07-596", month = may, year = 2007, } @TechReport{lawn185, URL = "http://www.netlib.org/lapack/lawnspdf/lawn185.pdf", number = 185, institution = "LAPACK Working Note", author = "Alfredo Buttari and Jack J. Dongarra and Jakub Kurzak", title = "Limitations of the {PlayStation} 3 for High Performance Cluster Computing", utknumber = "UT-CS-07-597", month = may, year = 2007, } @TechReport{lawn186, URL = "http://www.netlib.org/lapack/lawnspdf/lawn186.pdf", number = 186, institution = "LAPACK Working Note", author = "James W. Demmel and Ioana Dumitriu and Olga Holtz", title = "Fast Linear Algebra is Stable", month = may, year = 2007, } % PUBLICATION CORRESPONDING TO LAWN 186 @article{1298603, author = {James Demmel and Ioana Dumitriu and Olga Holtz}, title = {Fast linear algebra is stable}, journal = {Numer. Math.}, volume = {108}, number = {1}, year = {2007}, issn = {0029-599X}, pages = {59--91}, doi = {http://dx.doi.org/10.1007/s00211-007-0114-x}, publisher = {Springer-Verlag New York, Inc.}, address = {Secaucus, NJ, USA}, } @TechReport{lawn187, URL = "http://www.netlib.org/lapack/lawnspdf/lawn187.pdf", number = 187, institution = "LAPACK Working Note", author = "Ralph Byers", title = "{LAPACK} 3.1 x{HSEQR}: Tuning and Implementation Notes on the Small Bulge Multi-shift {QR} Algorithm with Aggressive Early Deflation", month = may, year = 2007, } @TechReport{lawn188, URL = "http://www.netlib.org/lapack/lawnspdf/lawn188.pdf", number = 188, institution = "LAPACK Working Note", author = "James W. Demmel and Yozo Hida and Xiaoye S. Li and E. Jason Riedy", title = "Extra-precise Iterative Refinement for Overdetermined Least Squares Problems", month = may, year = 2007, } % PUBLICATION CORRESPONDING TO LAWN 188 @Article{Demmel:2008:EPI, author = "James Demmel and Yozo Hida and Xiaoye S. Li and E. Jason Riedy", title = "Extra-precise Iterative Refinement for Overdetermined Least Squares Problems", journal = "{ACM} Transactions on Mathematical Software", volume = "35", number = "4", accepted = "25 June 2008", upcoming = "true", } @TechReport{lawn189, URL = "http://www.netlib.org/lapack/lawnspdf/lawn189.pdf", number = 189, institution = "LAPACK Working Note", author = "Wesley Alvaro and Jakub Kurzak and Jack J. Dongarra", title = "Fast and Small Short Vector {SIMD} Matrix Multiplication Kernels for the {CELL} Processor", utknumber = "UT-CS-08-609", month = jan, year = 2008, } % LAWN 190 @TechReport{lawn190, URL = "http://www.netlib.org/lapack/lawnspdf/lawn190.pdf", number = 190, institution = "LAPACK Working Note", author = "Alfredo Buttari and Julien Langou and Jakub Kurzak and Jack J. Dongarra", title = "Parallel Tiled {QR} Factorization for Multicore Architectures", utknumber = "UT-CS-07-598", month = jul, year = 2007, } % PUBLICATION CORRESPONDING TO LAWN 190 @string{CCPE = "Concurrency Computat.: Pract. Exper."} @article{blkd:ccpe:08, author = {Alfredo Buttari and Julien Langou and Jakub Kurzak and Jack Dongarra}, title = {Parallel Tiled {QR} Factorization for Multicore Architectures}, journal = CCPE, pages = {1573--1590}, number = 13, volume = 20, year = 2008, month = SEP, doi = {10.1002/cpe.1301}, } @TechReport{lawn191, URL = "http://www.netlib.org/lapack/lawnspdf/lawn191.pdf", number = 191, institution = "LAPACK Working Note", author = "Alfredo Buttari and Julien Langou and Jakub Kurzak and Jack J. Dongarra", title = "A Class of Parallel Tiled Linear Algebra Algorithms for Multicore Architectures", utknumber = "UT-CS-07-600", month = sep, year = 2007, } @TechReport{lawn192, URL = "http://www.netlib.org/lapack/lawnspdf/lawn192.pdf", number = 192, institution = "LAPACK Working Note", author = "Robert Granat and Bo KÃ¥gström and Daniel Kressner", title = "Parallel eigenvalue reordering in real Schur forms", month = sep, year = 2007, } @TechReport{lawn193, URL = "http://www.netlib.org/lapack/lawnspdf/lawn193.pdf", number = 193, institution = "LAPACK Working Note", author = "Marc Baboulin and Jack J. Dongarra and Serge Gratton and Julien Langou", title = "Computing the Conditioning of the Components of a Linear Least Squares Solution", utknumber = "UT-CS-07-604", month = sep, year = 2007, } @TechReport{lawn194, URL = "http://www.netlib.org/lapack/lawnspdf/lawn194.pdf", number = 194, institution = "LAPACK Working Note", author = "Christof Vömel", title = "A Refined Representation Tree for {MRRR}", month = nov, year = 2007, } @TechReport{lawn195, URL = "http://www.netlib.org/lapack/lawnspdf/lawn195.pdf", number = 195, institution = "LAPACK Working Note", author = "Christof Vömel", title = "{ScaLAPACK}'s {MRRR} Algorithm", month = nov, year = 2007, } @TechReport{lawn196, URL = "http://www.netlib.org/lapack/lawnspdf/lawn196.pdf", number = 196, institution = "LAPACK Working Note", author = "Zlatko Drmač", title = "A global convergence proof of cyclic Jacobi methods with block rotations", month = dec, year = 2007, } @TechReport{lawn197, URL = "http://www.netlib.org/lapack/lawnspdf/lawn197.pdf", number = 197, institution = "LAPACK Working Note", author = "Vasily Volkov and James W. Demmel", title = "Using {GPU}s to Accelerate the Bisection Algorithm for Finding Eigenvalues of Symmetric Tridiagonal Matrices", ucbnumber = "UCB/EECS-2007-179", month = jan, year = 2008, } @TechReport{lawn198, URL = "http://www.netlib.org/lapack/lawnspdf/lawn198.pdf", number = 198, institution = "LAPACK Working Note", author = "Bo KÃ¥gström and Daniel Kressner and Enrique S. Quintana-Ortí and Gregorio Quintana-Ortí", title = "Blocked Algorithms for the Reduction to Hessenberg-Triangular Form Revisited", month = feb, year = 2008, } @TechReport{lawn199, URL = "http://www.netlib.org/lapack/lawnspdf/lawn199.pdf", number = 199, institution = "LAPACK Working Note", author = "Fred G. Gustavson and Jerzy Wasniewski and Julien Langou and Jack J. Dongarra", title = "Rectangular Full Packed Format for {Cholesky}'s Algorithm: Factorization, Solution and Inversion", month = apr, year = 2008, utknumber = "UT-CS-08-614", } @TechReport{lawn200, URL = "http://www.netlib.org/lapack/lawnspdf/lawn200.pdf", number = 200, institution = "LAPACK Working Note", author = "Marc Baboulin and Jack J. Dongarra and Stanimire Tomov", title = "Some Issues in Dense Linear Algebra for Multicore and Special Purpose Architectures", month = may, year = 2008, utknumber = "UT-CS-08-615", } @TechReport{lawn201, URL = "http://www.netlib.org/lapack/lawnspdf/lawn201.pdf", number = 201, institution = "LAPACK Working Note", author = "Jakub Kurzak and Jack J. Dongarra", title = "{QR} Factorization for the {CELL} Processor", month = may, year = 2008, utknumber = "UT-CS-08-616", } @TechReport{lawn202, URL = "http://www.netlib.org/lapack/lawnspdf/lawn202.pdf", number = 202, institution = "LAPACK Working Note", author = "Vasily Volkov and James W. Demmel", title = "{LU}, {QR} and {Cholesky} Factorizations using Vector Capabilities of {GPUs}", month = may, year = 2008, ucbnumber = "UCB/EECS-2008-49," } @TechReport{lawn203, URL = "http://www.netlib.org/lapack/lawnspdf/lawn203.pdf", number = 203, institution = "LAPACK Working Note", author = "James W. Demmel and Yozo Hida and Mark F. Hoemmen and E. Jason Riedy", title = "Non-Negative Diagonals and High Performance on Low-Profile Matrices from Householder {QR}", month = may, year = 2008, ucbnumber = "UCB/EECS-2008-76", } @TechReport{lawn204, URL = "http://www.netlib.org/lapack/lawnspdf/lawn204.pdf", number = 204, institution = "LAPACK Working Note", author = "James W. Demmel and Laura Grigori and Mark F. Hoemmen and Julien Langou", title = "Communication-optimal parallel and sequential QR and LU factorizations.", month = aug, year = 2008, ucbnumber = "UCB/EECS-2008-89", } @TechReport{lawn205, URL = "http://www.netlib.org/lapack/lawnspdf/lawn205.pdf", number = 205, institution = "LAPACK Working Note", author = "George Bosilca and Remi Delmas and Jack J. Dongarra and Julien Langou", title = "Algorithmic Based Fault Tolerance Applied to High Performance Computing", month = jun, year = 2008, utknumber = "UT-CS-08-620", } @TechReport{lawn206, URL = "http://www.netlib.org/lapack/lawnspdf/lawn206.pdf", number = 206, institution = "LAPACK Working Note", author = "Jack J. Dongarra and Julien Langou", title = "The Problem with the {Linpack} Benchmark Matrix Generator", month = jun, year = 2008, ucdenvernumber= "UCD-CCM-271", } @TechReport{lawn207, URL = "http://www.netlib.org/lapack/lawnspdf/lawn207.pdf", number = 207, institution = "LAPACK Working Note", author = "Marc Baboulin and Serge Gratton", title = "Using dual techniques to derive componentwise and mixed condition numbers for a linear functional of a linear least squares solution", month = aug, year = 2008, utknumber = "UT-CS-08-622", } @TechReport{lawn208, URL = "http://www.netlib.org/lapack/lawnspdf/lawn208.pdf", number = 208, institution = "LAPACK Working Note", author = "Hatem Ltaief and Jakub Kurzak and Jack Dongarra", title = "Parallel Block Hessenberg Reduction using Algorithms-By-Tiles for Multicore Architectures Revisited", month = aug, year = 2008, utknumber = "UT-CS-08-624", } @TechReport{lawn209, URL = "http://www.netlib.org/lapack/lawnspdf/lawn209.pdf", number = 209, institution = "LAPACK Working Note", author = "Hatem Ltaief and Jakub Kurzak and Jack Dongarra", title = "Parallel Band Two-Sided Matrix Bidiagonalization for Multicore Architectures.", month = oct, year = 2008, utknumber = "UT-CS-08-631", } @TechReport{lawn210, URL = "http://www.netlib.org/lapack/lawnspdf/lawn210.pdf", number = 210, institution = "LAPACK Working Note", author = "Stanimire Tomov and Jack Dongarra and Marc Baboulin", title = "Towards Dense Linear Algebra for Hybrid GPU Accelerated Manycore Systems.", month = oct, year = 2008, utknumber = "UT-CS-08-632", } @TechReport{lawn211, URL = "http://www.netlib.org/lapack/lawnspdf/lawn211.pdf", number = 211, institution = "LAPACK Working Note", author = "Fred G. Gustavson and Jerzy Wasniewski and Jack Dongarra", title = "Level-3 Cholesky kernel subroutine of a fully portable High Performance minimal storage hybrid format Cholesky algorithm.", month = dec, year = 2008, utknumber = "UT-CS-08-634", } @TechReport{lawn212, URL = "http://www.netlib.org/lapack/lawnspdf/lawn212.pdf", number = 212, institution = "LAPACK Working Note", author = "Yinan Li and Jack Dongarra and Stanimire Tomov", title = "A Note on Auto-tuning GEMM for GPUs.", month = jan, year = 2009, utknumber = "UT-CS-09-635", } @TechReport{lawn213, URL = "http://www.netlib.org/lapack/lawnspdf/lawn213.pdf", number = 213, institution = "LAPACK Working Note", author = "Jakub Kurzak and Hatem Ltaief and Jack Dongarra and Rosa M. Badia", title = "Scheduling Linear Algebra Operations on Multicore Processors.", month = feb, year = 2009, utknumber = "UT-CS-09-636", } @TechReport{lawn214, URL = "http://www.netlib.org/lapack/lawnspdf/lawn214.pdf", number = 214, institution = "LAPACK Working Note", author = "Hatem Ltaief and Jakub Kurzak and Jack Dongarra", title = "Scheduling Two-sided Transformations using Algorithms-by-Tiles on Multicore Architectures.", month = feb, year = 2009, utknumber = "UT-CS-09-637", } @TechReport{lawn215, URL = "http://www.netlib.org/lapack/lawnspdf/lawn215.pdf", number = 215, institution = "LAPACK Working Note", author = "Grey Ballard and James Demmel and Olga Holtz and Oded Schwartz", title = "Communication-optimal Parallel and Sequential Cholesky decomposition.", month = feb, year = 2009, ucbnumber = "UCB/EECS-2009-29", } @TechReport{lawn216, URL = "http://www.netlib.org/lapack/lawnspdf/lawn216.pdf", number = 216, institution = "LAPACK Working Note", author = "Robert Granat and Bo Kagstrom and Daniel Kressner", title = "A novel parallel QR algorithm for hybrid distributed memory HPC systems.", month = apr, year = 2009, utknumber = "UMINF-09.06", } @TechReport{lawn217, URL = "http://www.netlib.org/lapack/lawnspdf/lawn217.pdf", number = 217, institution = "LAPACK Working Note", author = "Emmanuel Agullo and Bilel Hadri and Hatem Ltaief and Jack Dongarra", title = "Comparative Study of One-Sided Factorizations with Multiple Software Packages on Multi-Core Hardware.", month = apr, year = 2009, utknumber = "UT-CS-09-640", } @TechReport{lawn218, URL = "http://www.netlib.org/lapack/lawnspdf/lawn218.pdf", number = 218, institution = "LAPACK Working Note", author = "Grey Ballard and James Demmel and Olga Holtz and Oded Schwartz", title = "Minimizing Communication in Linear Algebra.", month = may, year = 2009, ucbnumber = "UCB/EECS-2009-62", } @TechReport{lawn219, URL = "http://www.netlib.org/lapack/lawnspdf/lawn219.pdf", number = 219, institution = "LAPACK Working Note", author = "Stanimire Tomov and Jack Dongarra", title = "Accelerating the reduction to upper Hessenberg form through hybrid GPU-based computing.", month = may, year = 2009, utknumber = "UT-CS-09-642", } @TechReport{lawn220, URL = "http://www.netlib.org/lapack/lawnspdf/lawn220.pdf", number = 220, institution = "LAPACK Working Note", author = "Jakub Kurzak and Jack Dongarra", title = "Fully Dynamic Scheduler for Numerical Computing on Multicore Processors.", month = jun, year = 2009, utknumber = "UT-CS-09-643", } @TechReport{lawn221, URL = "http://www.netlib.org/lapack/lawnspdf/lawn221.pdf", number = 221, institution = "LAPACK Working Note", author = "Fengguang Song and Asim YarKhan and Jack Dongarra", title = "Dynamic Task Scheduling for Linear Algebra Algorithms on Distributed-Memory Multicore Systems.", month = apr, year = 2009, utknumber = "UT-CS-09-638", } @TechReport{lawn222, URL = "http://www.netlib.org/lapack/lawnspdf/lawn222.pdf", number = 222, institution = "LAPACK Working Note", author = "Bilel Hadri and Hatem Ltaief and Emmanuel Agullo and Jack Dongarra", title = "Enhancing Parallelism of Tile QR Factorization for Multicore Architectures.", month = sep, year = 2009, utknumber = "UT-CS-09-645", } @TechReport{lawn223, URL = "http://www.netlib.org/lapack/lawnspdf/lawn223.pdf", number = 223, institution = "LAPACK Working Note", author = "Hatem Ltaief and Stanimire Tomov and Rajib Nath and Peng Du and and Jack Dongarra", title = "A Scalable High Performant Cholesky Factorization for Multicore with GPU Accelerators.", month = nov, year = 2009, utknumber = "UT-CS-09-646", } @TechReport{lawn224, URL = "http://www.netlib.org/lapack/lawnspdf/lawn224.pdf", number = 224, institution = "LAPACK Working Note", author = "Emmanuel Agullo and Camille Coti and Jack Dongarra and Thomas Herault and Julien Langou", title = "QR Factorization of Tall and Skinny Matrices in a Grid Computing Environment.", month = jan, year = 2010, utknumber = "UT-CS-10-651", } @TechReport{lawn225, URL = "http://www.netlib.org/lapack/lawnspdf/lawn225.pdf", number = 225, institution = "LAPACK Working Note", author = "Stanimire Tomov and Rajib Nath and Hatem Ltaief and Jack Dongarra", title = "Dense Linear Algebra Solvers for Multicore with GPU Accelerators.", month = feb, year = 2010, utknumber = "UT-CS-09-649", } @TechReport{lawn226, URL = "http://www.netlib.org/lapack/lawnspdf/lawn226.pdf", number = 226, institution = "LAPACK Working Note", author = "Laura Grigori and James W Demmel and Hua Xiang", title = "CALU: a communication optimal LU factorization algorithm.", ucbnumber = "UCB/EECS-2010-29", month = mar, year = 2010, } @TechReport{lawn227, URL = "http://www.netlib.org/lapack/lawnspdf/lawn227.pdf", number = 227, institution = "LAPACK Working Note", author = "Rajib Nath and Stanimire Tomov and Jack Dongarra", title = "An Improved MAGMA GEMM for Fermi GPUs.", month = july, year = 2010, utknumber = "UT-CS-10-655", } @TechReport{lawn228, URL = "http://www.netlib.org/lapack/lawnspdf/lawn228.pdf", number = 228, institution = "LAPACK Working Note", author = "Peng Du and Rick Weber and Piotr Luszczek and Stanimire Tomov and Gregory Peterson and Jack Dongarra", title = "From CUDA to OpenCL: Towards a Performance-portable Solution for Multi-platform GPU Programming.", month = sep, year = 2010, utknumber = "UT-CS-10-656", } @TechReport{lawn229, URL = "http://www.netlib.org/lapack/lawnspdf/lawn229.pdf", number = 229, institution = "LAPACK Working Note", author = "Jakub Kurzak and Rajib Nath and Peng Du and Jack Dongarra", title = "An Implementation of the Tile QR Factorization for a GPU and Multiple CPUs.", month = sep, year = 2010, utknumber = "UT-CS-10-657", } @TechReport{lawn230, URL = "http://www.netlib.org/lapack/lawnspdf/lawn230.pdf", number = 230, institution = "LAPACK Working Note", author = "Emmanuel Agullo and Cedric Augonnet and Jack Dongarra and Hatem Ltaief and Raymond Namyst and Samuel Thibault and Stanimire Tomov", title = "Faster, Cheaper, Better – a Hybridization Methodology to Develop Linear Algebra Software for GPUs.", month = sep, year = 2010, utknumber = "UT-CS-10-658", } @TechReport{lawn231, URL = "http://www.netlib.org/lapack/lawnspdf/lawn231.pdf", number = 231, institution = "LAPACK Working Note", author = "Bosilca, G. and Bouteiller, A. and Danalis, A. and Herault, T. and Lemarinier, P. and Dongarra, J", title = "DAGuE: A generic distributed DAG engine for high performance computing.", month = sep, year = 2010, utknumber = "UT-CS-10-659", } @TechReport{lawn232, URL = "http://www.netlib.org/lapack/lawnspdf/lawn232.pdf", number = 232, institution = "LAPACK Working Note", author = "Bosilca, G. and Bouteiller, A. and Danalis, A and Faverge, M. and Haidar, H. and Herault, T. and Kurzak, J. and Langou, J. and Lemarinier, P. and Ltaief, H. and Luszczek, P. and YarKhan, A. and Dongarra, J", title = "Distributed-Memory Task Execution and Dependence Tracking within DAGuE and the DPLASMA Project.", month = sep, year = 2010, utknumber = "UT-CS-10-660", } @TechReport{lawn233, URL = "http://www.netlib.org/lapack/lawnspdf/lawn233.pdf", number = 233, institution = "LAPACK Working Note", author = "Agullo, E. and Augonnet, C. and Dongarra, J. and Faverge, M. and Ltaief, H. and Thibault, S. and Tomov, S.", title = "QR Factorization on a Multicore Node Enhanced with Multiple GPU Accelerators.", month = mai, year = 2011, utknumber = "UT-CS-10-XXX", } @TechReport{lawn235, URL = "http://www.netlib.org/lapack/lawnspdf/lawn235.pdf", number = 235, institution = "LAPACK Working Note", author = "Jack Dongarra and Piotr Luszczek", title = "Reducing the time to tune parallel dense linear algebra routines with partial execution and performance modelling.", month = oct, year = 2010, utknumber = "UT-CS-10-661", } @TechReport{lawn236, URL = "http://www.netlib.org/lapack/lawnspdf/lawn236.pdf", number = 236, institution = "LAPACK Working Note", author = "Marc Baboulin and Serge Gratton", title = "A contribution to the conditioning of the total least squares problem.", month = nov, year = 2010, } @TechReport{lawn237, URL = "http://www.netlib.org/lapack/lawnspdf/lawn237.pdf", number = 237, institution = "LAPACK Working Note", author = "Grey Ballard and James Demmel and Ioana Dumitriu", title = "Minimizing Communication for Eigenproblems and the Singular Value Decomposition.", ucbnumber = "UCB/EECS-2010-136", month = nov, year = 2010, } @TechReport{lawn238, URL = "http://www.netlib.org/lapack/lawnspdf/lawn238.pdf", number = 238, institution = "LAPACK Working Note", author = "Edgar Solomonik and James Demmel", title = "Communication-optimal parallel 2.5D matrix multiplication and LU factorization algorithms.", ucbnumber = "UCB/EECS-2011-10", month = feb, year = 2011, } @TechReport{lawn239, URL = "http://www.netlib.org/lapack/lawnspdf/lawn239.pdf", number = 239, institution = "LAPACK Working Note", author = "Grey Ballard and James Demmel and Andrew Gearhart", title = "Communication bounds for heterogeneous architectures.", ucbnumber = "UCB/EECS-2011-13", month = feb, year = 2011, } @TechReport{lawn240, URL = "http://www.netlib.org/lapack/lawnspdf/lawn240.pdf", number = 240, institution = "LAPACK Working Note", author = "Michael Anderson and Grey Ballard and James Demmel and Kurt Keutzer", title = "Communication-Avoiding QR Decomposition for GPUs.", ucbnumber = "UCB/EECS-2011-XX", month = feb, year = 2011, } @TechReport{lawn241, URL = "http://www.netlib.org/lapack/lawnspdf/lawn241.pdf", number = 241, institution = "LAPACK Working Note", author = "Fengguang Song and Hatem Ltaief and Bilel Hadri and Jack Dongarra", title = "Scalable Tile Communication-Avoiding QR Factorization on Multicore Cluster Systems.", utknumber = "UT-CS-10-653", month = mar, year = 2011, } @TechReport{lawn242, URL = "http://www.netlib.org/lapack/lawnspdf/lawn242.pdf", number = 242, institution = "LAPACK Working Note", author = "Emmanuel Agullo and Jack Dongarra and Rajib Nath and Stanimire Tomov", title = "A Fully Empirical Autotuned Dense QR Factorization For Multicore Architectures.", inrianumber = "INRIA-7526", month = mar, year = 2011, } @TechReport{lawn243, URL = "http://www.netlib.org/lapack/lawnspdf/lawn243.pdf", number = 243, institution = "LAPACK Working Note", author = "Azzam Haidar and Hatem Ltaief and Asim YarKhan and Jack Dongarra", title = "Analysis of Dynamically Scheduled Tile Algorithms for Dense Linear Algebra on Multicore Architectures.", utknumber = "UT-CS-11-666", month = mar, year = 2011, } @TechReport{lawn244, URL = "http://www.netlib.org/lapack/lawnspdf/lawn244.pdf", number = 244, institution = "LAPACK Working Note", author = "Piotr Luszczek and Hatem Ltaief and Jack Dongarra", title = "Two-Stage Tridiagonal Reduction for Dense Symmetric Matrices using Tile Algorithms on Multicore Architectures.", utknumber = "UT-CS-11-670", month = apr, year = 2011, } @TechReport{lawn245, URL = "http://www.netlib.org/lapack/lawnspdf/lawn245.pdf", number = 245, institution = "LAPACK Working Note", author = "Jakub Kurzak and Stanimire Tomov and Jack Dongarra", title = "Autotuning GEMMs for Fermi.", utknumber = "UT-CS-11-671", month = apr, year = 2011, } @TechReport{lawn246, URL = "http://www.netlib.org/lapack/lawnspdf/lawn246.pdf", number = 246, institution = "LAPACK Working Note", author = "Marc Baboulin and Jack Dongarra and Julien Herrmann and Stanimire Tomov", title = "Accelerating linear system solutions using randomization techniques.", inrianumber = "INRIA RR-7616", month = may, year = 2011, } @TechReport{lawn247, URL = "http://www.netlib.org/lapack/lawnspdf/lawn247.pdf", number = 247, institution = "LAPACK Working Note", author = "Hatem Ltaief and Piotr Luszczek and Jack Dongarra", title = "High Performance Bidiagonal Reduction using Tile Algorithms on Homogeneous Multicore Architectures.", utknumber = "UT-CS-11-673", month = apr, year = 2011, } @TechReport{lawn248, URL = "http://www.netlib.org/lapack/lawnspdf/lawn248.pdf", number = 248, institution = "LAPACK Working Note", author = "Edgar Solomonik and James Demmel", title = "Communication-optimal parallel 2.5D matrix multiplication and LU factorization algorithms.", ucbnumber = "UCB/EECS-2011-72", month = jun, year = 2011, } @TechReport{lawn249, URL = "http://www.netlib.org/lapack/lawnspdf/lawn249.pdf", number = 249, institution = "LAPACK Working Note", author = "Fred G. Gustavson and Jerzy Wásniewski and Jack J. Dongarra and José R. Herrero and Julien Langou", title = "Level-3 Cholesky Factorization Routines as Part of Many Cholesky Algorithms.", dtunumber = "DTU/IMM-Technical-Report-2011-11", month = jun, year = 2011, } @TechReport{lawn250, URL = "http://www.netlib.org/lapack/lawnspdf/lawn250.pdf", number = 250, institution = "LAPACK Working Note", author = "Fengguang Song and Stanimire Tomov and Jack Dongarra", title = "Efficient Support for Matrix Computations on Heterogeneous Multi-core and Multi-GPU Architectures.", utknumber = "UT-CS-11-669", month = jun, year = 2011, } @TechReport{lawn251, URL = "http://www.netlib.org/lapack/lawnspdf/lawn251.pdf", number = 251, institution = "LAPACK Working Note", author = "Hatem Ltaief and Piotr Luszczek and Jack Dongarra", title = "Profiling High Performance Dense Linear Algebra Algorithms on Multicore Architectures for Power and Energy Efficiency.", utknumber = "UT-CS-11-674", month = jun, year = 2011, } @TechReport{lawn252, URL = "http://www.netlib.org/lapack/lawnspdf/lawn252.pdf", number = 252, institution = "LAPACK Working Note", author = "Peng Du and Piotr Luszczek and Stanimire Tomov and Jack Dongarra", title = "Soft Error Resilient QR Factorization for Hybrid System.", utknumber = "UT-CS-11-675", month = jul, year = 2011, } @TechReport{lawn253, URL = "http://www.netlib.org/lapack/lawnspdf/lawn253.pdf", number = 253, institution = "LAPACK Working Note", author = "Peng Du and Aurelien Bouteiller and George Bosilca and Thomas Herault and Jack Dongarra", title = "Algorithm-based Fault Tolerance for Dense Matrix Factorizations.", utknumber = "UT-CS-11-676", month = aug, year = 2011, } @TechReport{lawn254, URL = "http://www.netlib.org/lapack/lawnspdf/lawn254.pdf", number = 254, institution = "LAPACK Working Note", author = "Azzam Haidar and Hatem Ltaief and Jack Dongarra", title = "Parallel Reduction to Condensed Forms for Symmetric Eigenvalue Problems using Aggregated Fine-Grained and Memory-Aware Kernels.", utknumber = "UT-CS-11-677", month = aug, year = 2011, } @TechReport{lawn255, URL = "http://www.netlib.org/lapack/lawnspdf/lawn255.pdf", number = 255, institution = "LAPACK Working Note", author = "Edgar Solomonik and Abhinav Bhatele and James Demmel", title = "Improving communication performance in dense linear algebra via topology aware collectives.", ucbnumber = "UCB/EECS-2011-92", month = aug, year = 2011, } @TechReport{lawn256, URL = "http://www.netlib.org/lapack/lawnspdf/lawn256.pdf", number = 256, institution = "LAPACK Working Note", author = "Peng Du and Piotr Luszczek and Jack Dongarra", title = "High Performance Linear System Solver with Resilience to Multiple Soft Errors.", utknumber = "UT-CS-11-683", month = oct, year = 2011, } @TechReport{lawn257, URL = "http://www.netlib.org/lapack/lawnspdf/lawn257.pdf", number = 257, institution = "LAPACK Working Note", author = "Jack Dongarra and Mathieu Faverge and Thomas Herault and Julien Langou and Yves Robert", title = "Hierarchical QR factorization algorithms for multi-core cluster systems.", utknumber = "UT-CS-11-684", month = oct, year = 2011, } @TechReport{lawn258, URL = "http://www.netlib.org/lapack/lawnspdf/lawn258.pdf", number = 258, institution = "LAPACK Working Note", author = "Hartwig Anzt and Stanimire Tomov and Jack Dongarra and Vincent Heuveline", title = "A Block-Asynchronous Relaxation Method for Graphics Processing Units.", utknumber = "UT-CS-11-687", month = dec, year = 2011, } @TechReport{lawn259, URL = "http://www.netlib.org/lapack/lawnspdf/lawn259.pdf", number = 259, institution = "LAPACK Working Note", author = "Jack Dongarra and Mathieu Faverge and Hatem Ltaief and Piotr Luszczek", title = "Achieving Numerical Accuracy and High Performance using Recursive Tile LU Factorization.", utknumber = "UT-CS-11-688", month = dec, year = 2011, } @TechReport{lawn260, URL = "http://www.netlib.org/lapack/lawnspdf/lawn260.pdf", number = 260, institution = "LAPACK Working Note", author = "Hartwig Anzt and Piotr Luszczek and Jack Dongarra, and Vincent Heuveline", title = "GPU-Accelerated Asynchronous Error Correction for Mixed Precision Iterative Refinement.", utknumber = "UT-CS-11-690", month = dec, year = 2011, } @TechReport{lawn261, URL = "http://www.netlib.org/lapack/lawnspdf/lawn261.pdf", number = 261, institution = "LAPACK Working Note", author = "Marc Baboulin and Dulceneia Becker and Jack Dongarra", title = "A parallel tiled solver for dense symmetric indefinite systems on multicore architectures.", inrianumber = "INRIA-7762", month = dec, year = 2011, } @TechReport{lawn262, URL = "http://www.netlib.org/lapack/lawnspdf/lawn262.pdf", number = 262, institution = "LAPACK Working Note", author = "Marin Bougeret and Henri Casanova and Yves Robert and Frédéric Vivien and Dounia Zaidouni", title = "Using replication for resilience on exascale systems.", utknumber = "UT-CS-11-691", month = dec, year = 2011, } @TechReport{lawn263, URL = "http://www.netlib.org/lapack/lawnspdf/lawn263.pdf", number = 263, institution = "LAPACK Working Note", author = "Amal Khabou and James W. Demmel and Laura Grigori and Ming Gu", title = "LU factorization with panel rank revealing pivoting and its communication avoiding version.", ucbnumber = "UCB/EECS-2012-XX", month = jan, year = 2012, } @TechReport{lawn264, URL = "http://www.netlib.org/lapack/lawnspdf/lawn264.pdf", number = 264, institution = "LAPACK Working Note", author = "George Bosilca and Aurelien Bouteiller and Anthony Danalis and Thomas Herault and Piotr Luszczek and Jack J. Dongarra", title = "Dense Linear Algebra on Distributed Heterogeneous Hardware with a Symbolic DAG Approach.", month = jan, year = 2012, } @TechReport{lawn265, URL = "http://www.netlib.org/lapack/lawnspdf/lawn265.pdf", number = 265, institution = "LAPACK Working Note", author = "Marin Bougeret and Henri Casanova and Yves Robert and Frédéric Vivien and Dounia Zaidouni", title = "Using group replication for resilience on exascale systems.", month = mar, year = 2012, } @TechReport{lawn266, URL = "http://www.netlib.org/lapack/lawnspdf/lawn266.pdf", number = 266, institution = "LAPACK Working Note", author = "Jakub Kurzak and Piotr Luszczek and Mathieu Faverge and Jack Dongarra", title = "LU Factorization with Partial Pivoting for a Multi-CPU, Multi-GPU Shared Memory System.", month = apr, year = 2012, } @TechReport{lawn267, URL = "http://www.netlib.org/lapack/lawnspdf/lawn267.pdf", number = 267, institution = "LAPACK Working Note", author = "Jakub Kurzak and Piotr Luszczek and Stanimire Tomov and Jack Dongarra", title = "Preliminary Results of Autotuning GEMM Kernels for the NVIDIA Kepler Architecture – GeForce GTX 680.", month = apr, year = 2012, } @TechReport{lawn268, URL = "http://www.netlib.org/lapack/lawnspdf/lawn268.pdf", number = 268, institution = "LAPACK Working Note", author = "Yves Robert and Frédéric Vivien, and Dounia Zaidouni", title = "Combining Process Replication and Checkpointing for Resilience on Exascale Systems.", utknumber = "UT-CS-12-696", month = jun, year = 2012, } @TechReport{lawn269, URL = "http://www.netlib.org/lapack/lawnspdf/lawn269.pdf", number = 269, institution = "LAPACK Working Note", author = "George Bosilca and Aurelien Bouteiller and Elisabeth Brunet and Franck Cappello and Jack Dongarra and Amina Guermouche and Thomas Herault and Yves Robert and Frederic Vivien and Dounia Zaidouni", title = "Unified Model for Assessing Checkpointing Protocols at Extreme-Scale", utknumber = "UT-CS-12-697", month = jun, year = 2012, } @TechReport{lawn270, URL = "http://www.netlib.org/lapack/lawnspdf/lawn270.pdf", number = 270, institution = "LAPACK Working Note", author = "Julie Langou and Bill Hofman and Brad King", title = "How LAPACK library enables Microsoft Visual Studio support with CMake and LAPACKE", utknumber = "UT-CS-12-698", month = jul, year = 2012, } @TechReport{lawn271, URL = "http://www.netlib.org/lapack/lawnspdf/lawn271.pdf", number = 271, institution = "LAPACK Working Note", author = "Lars Karlsson and Daniel Kressner", title = "Optimally packed chains of bulges in multishift QR algorithms", month = aug, year = 2012, } @TechReport{lawn272, URL = "http://www.netlib.org/lapack/lawnspdf/lawn272.pdf", number = 272, institution = "LAPACK Working Note", author = "Peng Du and Stanimire Tomov and Jack Dongarra", title = "Providing GPU Capability to LU and QR within the ScaLAPACK Framework", utknumber = "UT-CS-12-699", month = sep, year = 2012, } @TechReport{lawn273, URL = "http://www.netlib.org/lapack/lawnspdf/lawn273.pdf", number = 273, institution = "LAPACK Working Note", author = "Marc Baboulin and Serge Gratton and Remi Lacroix and Alan Laub", title = "Efficient computation of condition estimates for linear least squares problems", inrianumber = "8065", month = sep, year = 2012, } @TechReport{lawn274, URL = "http://www.netlib.org/lapack/lawnspdf/lawn274.pdf", number = 274, institution = "LAPACK Working Note", author = "Jack Dongarra and Thomas Herault and Yves Robert", title = "Revisiting the double checkpointing algorithm", utknumber = "UT-CS-13-705", month = dec, year = 2012, } @TechReport{lawn275, URL = "http://www.netlib.org/lapack/lawnspdf/lawn275.pdf", number = 275, institution = "LAPACK Working Note", author = "Chongxiao Cao and Jack Dongarra and Peng Du and Mark Gates and Piotr Luszczek and Stanimire Tomov", title = "clMAGMA: High Performance Dense Linear Algebra with OpenCL", utknumber = "UT-CS-13-706", month = mar, year = 2013, } @TechReport{lawn276, URL = "http://www.netlib.org/lapack/lawnspdf/lawn276.pdf", number = 276, institution = "LAPACK Working Note", author = "James W. Demmel and Laura Grigori and Ming Gu and And Hua Xiang", title = "Communication Avoiding Rank Revealing QR Factorization With Column Pivoting", ucbnumber = "UCB/EECS-2013-46", month = may, year = 2013, } @TechReport{lawn277, URL = "http://www.netlib.org/lapack/lawnspdf/lawn277.pdf", number = 277, institution = "LAPACK Working Note", author = "Guillaume Aupy and Mathieu Faverge and Yves Robert and Jakub Kurzak and Piotr Luszczek and Jack Dongarra", title = "Implementing a systolic algorithm for QR factorization on multicore clusters with PaRSEC", utknumber = "UT-CS-13-709", month = may, year = 2013, } @TechReport{lawn278, URL = "http://www.netlib.org/lapack/lawnspdf/lawn278.pdf", number = 278, institution = "LAPACK Working Note", author = "Guillaume Aupy and Anne Benoit and Thomas Hérault and Yves Robert and Frédéric Vivien and Dounia Zaidouni", title = "On the Combination of Silent Error Detection and Checkpointing", utknumber = "UT-CS-13-710", month = jun, year = 2013, } @TechReport{lawn279, URL = "http://www.netlib.org/lapack/lawnspdf/lawn279.pdf", number = 279, institution = "LAPACK Working Note", author = "Yulu Jia and Piotr Luszczek and and Jack Dongarra", title = "Transient Error Resilient Hessenberg Reduction on GPU-based Hybrid Architectures", utknumber = "UT-CS-13-712", month = jun, year = 2013, } @TechReport{lawn280, URL = "http://www.netlib.org/lapack/lawnspdf/lawn280.pdf", number = 280, institution = "LAPACK Working Note", author = "Simplice Donfack and Jack Dongarra and Mathieu Faverge and Mark Gates and Jakub Kurzak and Piotr Luszczek and Ichitaro Yamazaki", title = "On Algorithmic Variants of Parallel Gaussian Elimination: Comparison of Implementations in Terms of Performance and Numerical Properties", utknumber = "UT-CS-13-715", month = jul, year = 2013, } @TechReport{lawn281, URL = "http://www.netlib.org/lapack/lawnspdf/lawn281.pdf", number = 281, institution = "LAPACK Working Note", author = "Guillaume Aupy and Anne Benoit and Thomas Herault and Yves Robert and Jack Dongarra", title = "Optimal Checkpointing Period: Time vs. Energy", utknumber = "UT-EECS-13-718", month = oct, year = 2013, } @TechReport{lawn282, URL = "http://www.netlib.org/lapack/lawnspdf/lawn282.pdf", number = 282, institution = "LAPACK Working Note", author = "Mathieu Faverge and Julien Herrmann and Julien Langou and Bradley Lowery and Yves Robert and Jack Dongarra", title = "Designing LU-QR hybrid solvers for performance and stability", utknumber = "UT-EECS-13-719", month = oct, year = 2013, } @TechReport{lawn283, URL = "http://www.netlib.org/lapack/lawnspdf/lawn283.pdf", number = 283, institution = "LAPACK Working Note", author = "Azzam Haidar and Piotr Luszczek and Jakub Kurzak and Jack Dongarra", title = "An Improved Parallel Singular Value Algorithm and Its Implementation for Multicore Hardware", utknumber = "UT-EECS-13-720", month = oct, year = 2013, } @TechReport{lawn284, URL = "http://www.netlib.org/lapack/lawnspdf/lawn284.pdf", number = 284, institution = "LAPACK Working Note", author = "Martin K\"{o}hler and Jens Saak", title = "FlexiBLAS - A flexible BLAS library with runtime exchangeable backends", month = jan, year = 2014, } @TechReport{lawn285, URL = "http://www.netlib.org/lapack/lawnspdf/lawn285.pdf", number = 285, institution = "LAPACK Working Note", author = "Marc Baboulin and Xiaoye S. Li and François-Henry Rouet", title = "Using Random Butterfly Transformations to Avoid Pivoting in Sparse Direct Methods", inrianumber = "INRIA-8481", month = feb, year = 2014, } @TechReport{lawn286, URL = "http://www.netlib.org/lapack/lawnspdf/lawn286.pdf", number = 286, institution = "LAPACK Working Note", author = "Mark Gates and Azzam Haidar and Jack Dongarra", title = "Accelerating computation of eigenvectors in the nonsymmetric eigenvalue problem", utknumber = "UT-EECS-14-720", month = mar, year = 2014, } @TechReport{lawn287, URL = "http://www.netlib.org/lapack/lawnspdf/lawn287.pdf", number = 287, institution = "LAPACK Working Note", author = "Anne Benoit and Saurabh K. Raina and Yves Robert", title = "Efficient checkpoint/verification patterns for silent error detection", utknumber = "UT-EECS-14-729", month = may, year = 2014, } @TechReport{lawn288, URL = "http://www.netlib.org/lapack/lawnspdf/lawn288.pdf", number = 288, institution = "LAPACK Working Note", author = "Jack Dongarra and Jakub Kurzak and Piotr Luszczek and Ichitaro Yamazaki", title = "PULSAR Users’ Guide", utknumber = "UT-EECS-14-733", month = dec, year = 2014, } @TechReport{lawn289, URL = "http://www.netlib.org/lapack/lawnspdf/lawn289.pdf", number = 289, institution = "LAPACK Working Note", author = "Jack Dongarra and Thomas Herault and Yves Robert", title = "Fault tolerance techniques for high-performance computing", utknumber = "UT-EECS-15-734", month = may, year = 2015, } @TechReport{lawn290, URL = "http://www.netlib.org/lapack/lawnspdf/lawn290.pdf", number = 290, institution = "LAPACK Working Note", author = "Jack Dongarra and Jim Demmel and Julien Langou and Julie Langou", title = "2016 Dense Linear Algebra Software Packages Survey", utknumber = "UT-EECS-16-744", month = sep, year = 2016, } @TechReport{lawn291, URL = "http://www.netlib.org/lapack/lawnspdf/lawn291.pdf", number = 291, institution = "LAPACK Working Note", author = "Hartwig Anzt and Jack Dongarra and Edmond Chow", title = "On block-asynchronous execution on GPUs", utknumber = "UT-EECS-16-746", month = nov, year = 2016, } @TechReport{lawn292, URL = "http://www.netlib.org/lapack/lawnspdf/lawn292.pdf", number = 292, institution = "LAPACK Working Note", author = "Maksims Abalenkovs and Negin Bagherpour and Jack Dongarra and Mark Gates and Azzam Haidar and Jakub Kurzak and Piotr Luszczek and Samuel Relton and Jakub Sistek and David Stevens and Panruo Wu and Ichitaro Yamazaki and Asim YarKhan and Mawussi Zounon", title = "PLASMA 17 Performance Report", utknumber = "UT-EECS-17-750", month = jun, year = 2017, } @TechReport{lawn293, URL = "http://www.netlib.org/lapack/lawnspdf/lawn293.pdf", number = 293, institution = "LAPACK Working Note", author = "Maksims Abalenkovs and Negin Bagherpour and Jack Dongarra and Mark Gates and Azzam Haidar and Jakub Kurzak and Piotr Luszczek and Samuel Relton and Jakub Sistek and David Stevens and Panruo Wu and Ichitaro Yamazaki and Asim YarKhan and Mawussi Zounon", title = "PLASMA 17.1 Functionality Report", utknumber = "UT-EECS-17-751", month = jun, year = 2017, } @TechReport{lawn294, URL = "http://www.netlib.org/lapack/lawnspdf/lawn294.pdf", number = 294, institution = "LAPACK Working Note", author = "Ichitaro Yamazaki and Jack Dongarra", title = "Aasen’s Symmetric Indefinite Linear Solvers in LAPACK", utknumber = "ICL-UT-17-13", month = dec, year = 2017, } @TechReport{lawn295, URL = "http://www.netlib.org/lapack/lawnspdf/lawn295.pdf", number = 295, institution = "LAPACK Working Note", author = "Osni Marques and James Demmel and Paulo B. Vasconcelos", title = "Bidiagonal SVD Computation via an Associated Tridiagonal Eigenproblem", utknumber = "ICL-UT-18-02", month = avr, year = 2018, } @TechReport{lawn296, URL = "http://www.netlib.org/lapack/lawnspdf/lawn296.pdf", number = 296, institution = "LAPACK Working Note", author = "Zvonimir Bujanovi\'{c} and Zlatko Drma\ˇ{c}", title = "New robust ScaLAPACK routine for computing the QR factorization with column pivoting", utknumber = "ICL-UT-19-14", month = oct, year = 2019, } @TechReport{lawn297, URL = "http://www.netlib.org/lapack/lawnspdf/lawn297.pdf", number = 297, institution = "LAPACK Working Note", author = "James Demmel and Jack Dongarra and Julien Langou and Piotr Luszczek and Michael Mahoney", title = "Prospectus for the Next LAPACK and ScaLAPACK Libraries: Basic ALgebra LIbraries for Sustainable Technology with Interdisciplinary Collaboration (BALLISTIC)", utknumber = "ICL-UT-20-07", month = jul, year = 2020, } @TechReport{lawn298, URL = "http://www.netlib.org/lapack/lawnspdf/lawn298.pdf", number = 298, institution = "LAPACK Working Note", author = "Zlatko Drmač", title = "A LAPACK implementation of the Dynamic Mode Decomposition I", month = oct, year = 2022, } @TechReport{lawn299, URL = "http://www.netlib.org/lapack/lawnspdf/lawn299.pdf", number = 299, institution = "LAPACK Working Note", author = "Riley Murray and James Demmel and Michael W. Mahoney and N. Benjamin Erichson and Maksim Melnichenko and Osman Asif Malik and Laura Grigori and Piotr Luszczek and Michael Dereziński and Miles E. Lopes and Tianyu Liang and Hengrui Luo and Jack Dongarra", title = "Randomized Numerical Linear Algebra - A Perspective on the Field with an Eye to Software", ucbnumber = "UCB/EECS-2022-258", month = dec, year = 2022, } @TechReport{lawn300, URL = "http://www.netlib.org/lapack/lawnspdf/lawn300.pdf", number = 300, institution = "LAPACK Working Note", author = "Zlatko Drmač", title = "A LAPACK implementation of the Dynamic Mode Decomposition II", month = dec, year = 2022, }