1. Home page
  2. BibTeX

Publications, presentations, and other written artifacts

ejr.bib

@misc{an12-streaming-ms,
  author = {David A. Bader and David Ediger and Jason Riedy},
  ejr-withauthor = {David A. Bader and David Ediger},
  title = {Streaming Graph Analytics for Massive Graphs},
  howpublished = {SIAM Annual Meeting},
  dom = 10,
  month = jul,
  year = {2012},
  url = {http://www.slideshare.net/jasonriedy/streaming-graph-analytics-for-massive-graphs},
  role = {presentation},
  tags = {siam; streaming data; parallel algorithms},
  address = {Minneapolis, MN},
  abstract = {Emerging real-world graph problems include detecting
		  community structure in large social networks, improving the
		  resilience of the electric power grid, and detecting and
		  preventing disease in human populations. The volume and
		  richness of data combined with its rate of change renders
		  monitoring properties at scale by static recomputation
		  infeasible. We approach these problems with massive,
		  fine-grained parallelism across different shared memory
		  architectures both to compute solutions and to explore the
		  sensitivity of these solutions to natural bias and
		  omissions within the data.}
}
@inproceedings{arith-lang,
  author = {David Hough and Bill Hay and Jeff Kidder and E. Jason
		  Riedy and Guy L. Steele Jr. and Jim Thomas},
  ejr-withauthor = {David Hough and Bill Hay and Jeff Kidder and Guy L. Steele
		  Jr. and Jim Thomas},
  title = {Arithmetic Interactions: From Hardware to Applications},
  booktitle = {17th {IEEE} Symposium on Computer Arithmetic
		  ({ARITH}'05)},
  year = {2005},
  dom = 28,
  month = jun,
  note = {See
		  \href{http://purl.oclc.org/NET/jason-riedy/resume/material/arith17-slides.pdf}{related
		  presentation}},
  isbn = {0-7695-2366-8},
  role = {proceedings; panel},
  tags = {ieee754; floating point},
  doi = {10.1109/ARITH.2005.10},
  abstract = {The entire process of creating and executing applications
		  that solve interesting problems with acceptable cost and
		  accuracy involves a complex interaction among hardware,
		  system software, programming environments, mathematical
		  software libraries, and applications software, all mediated
		  by standards for arithmetic, operating systems, and
		  programming environments. This panel will discuss various
		  issues arising among these various contending points of
		  view, sometimes from the point of view of issues raised
		  during the current IEEE 754R standards revision effort.}
}
@techreport{axb-itref-lawn,
  author = {James W. Demmel and Yozo Hida and W. Kahan and Xiaoye S.
		  Li and Sonil Mukherjee and E. Jason Riedy},
  ejr-withauthor = {James W. Demmel and Yozo Hida and W. Kahan and Xiaoye S.
		  Li and Sonil Mukherjee},
  title = {Error bounds from extra-precise iterative refinement},
  type = {LAPACK Working Note},
  institution = {Netlib},
  year = {2005},
  number = {165},
  lawn = {165},
  month = feb,
  dom = 3,
  role = {techreport},
  tags = {lawn; lapack; linear algebra; floating point},
  other-url = {http://www.eecs.berkeley.edu/Pubs/TechRpts/2007/EECS-2007-77.html},
  note = {Also issued as UCB//CSD-05-1414, UT-CS-05-547, and
		  LBNL-56965; expanded from TOMS version},
  url = {http://www.netlib.org/lapack/lawnspdf/lawn165.pdf}
}
@article{axb-itref-toms,
  author = {James W. Demmel and Yozo Hida and W. Kahan and Xiaoye S.
		  Li and Sonil Mukherjee and E. Jason Riedy},
  ejr-withauthor = {James W. Demmel and Yozo Hida and W. Kahan and Xiaoye S.
		  Li and Sonil Mukherjee},
  title = {Error bounds from extra-precise iterative refinement},
  journal = {{ACM} Transactions on Mathematical Software},
  year = {2006},
  volume = {32},
  number = {2},
  pages = {325--351},
  month = jun,
  role = {refereed},
  tags = {acm; toms; lapack; floating point; linear algebra},
  doi = {10.1145/1141885.1141894},
  issn = {0098-3500},
  mrclass = {65F10},
  mrnumber = {2272365},
  abstract = {We present the design and testing of an algorithm for
		  iterative refinement of the solution of linear equations
		  where the residual is computed with extra precision. This
		  algorithm was originally proposed in 1948 and analyzed in
		  the 1960s as a means to compute very accurate solutions to
		  all but the most ill-conditioned linear systems. However,
		  two obstacles have until now prevented its adoption in
		  standard subroutine libraries like LAPACK: (1) There was no
		  standard way to access the higher precision arithmetic
		  needed to compute residuals, and (2) it was unclear how to
		  compute a reliable error bound for the computed solution.
		  The completion of the new BLAS Technical Forum Standard has
		  essentially removed the first obstacle. To overcome the
		  second obstacle, we show how the application of iterative
		  refinement can be used to compute an error bound in any
		  norm at small cost and use this to compute both an error
		  bound in the usual infinity norm, and a componentwise
		  relative error bound.}
}
@misc{bascd2002-poster,
  author = {E. Jason Riedy},
  title = {Parallel Bipartite Matching for Sparse Matrix
		  Computation},
  howpublished = {Third Bay Area Scientific Computing Day},
  month = mar,
  year = {2002},
  role = {poster},
  address = {Livermore, CA},
  tags = {bascd; sparse matrix; combinatorial optimization; parallel
		  algorithms}
}
@misc{bascd2006-poster,
  author = {E. Jason Riedy},
  title = {Making Static Pivoting Dependable},
  howpublished = {Seventh Bay Area Scientific Computing Day},
  month = mar,
  year = {2006},
  role = {poster},
  address = {Livermore, CA},
  url = {http://purl.oclc.org/NET/jason-riedy/resume/material/bascd2006-poster.pdf},
  tags = {bascd; sparse matrix; linear algebra},
  abstract = {For sparse LU factorization, dynamic pivoting tightly
		  couples symbolic and numerical computation. Dynamic
		  structural changes limit parallel scalability. Demmel and
		  Li use static pivoting in distributed SuperLU for
		  performance, but intentionally perturbing the input may
		  lead silently to erroneous results. Are there
		  experimentally stable static pivoting heuristics that lead
		  to a dependable direct solver? The answer is currently a
		  qualified yes. Current heuristics fail on a few systems,
		  but all failures are detectable. }
}
@misc{bascd2007-poster,
  author = {James W. Demmel and Yozo Hida and Xiaoye S. Li and E.
		  Jason Riedy and Meghana Vishvanath and David Vu},
  ejr-withauthor = {James W. Demmel and Yozo Hida and Xiaoye S. Li and Meghana
		  Vishvanath and David Vu},
  title = {Precise Solutions for Overdetermined Least Squares
		  Problems},
  howpublished = {Stanford 50 -- Eighth Bay Area Scientific Computing Day},
  month = mar,
  year = {2007},
  role = {poster},
  address = {Stanford, CA},
  tags = {bascd; least squares},
  url = {http://purl.oclc.org/NET/jason-riedy/resume/material/bascd2007-poster.pdf},
  abstract = {Linear least squares (LLS) fitting is the most widely used
		  data modeling technique and is included in almost every
		  data analysis system (e.g. spreadsheets). These software
		  systems often give no feedback on the conditioning of the
		  LLS problem or the floating-point calculation errors
		  present in the solution. With limited use of extra
		  precision, we can eliminate these concerns for all but the
		  most ill-conditioned LLS problems. Our algorithm provides
		  either a solution and residual with relatively tiny error
		  or a notice that the LLS problem is too ill-conditioned.}
}
@misc{cerfacs08,
  author = {E. Jason Riedy},
  title = {Auctions for Distributed (and Possibly Parallel)
		  Matchings},
  howpublished = {Visit to \href{http://www.cerfacs.fr/}{CERFACS} courtesy
		  of the Franco-Berkeley Fund},
  dom = {17},
  month = dec,
  year = {2008},
  url = {http://purl.oclc.org/NET/jason-riedy/resume/material/cerfacs08.pdf},
  tags = {cerfacs; combinatorial optimization; sparse matrix},
  role = {presentation}
}
@misc{comb-sparse-cse05,
  author = {E. Jason Riedy},
  title = {Parallel Combinatorial Computing and Sparse Matrices},
  howpublished = {SIAM Conference on Computational Science and Engineering},
  dom = {14},
  month = feb,
  year = {2005},
  role = {presentation},
  url = {http://purl.oclc.org/NET/jason-riedy/resume/material/cse05.pdf},
  tags = {combinatorial optimization; sparse matrix; parallel
		  algorithms; siam}
}
@incollection{dimacs10-workshop,
  author = {E. Jason Riedy and Henning Meyerhenke and David Ediger and
		  David A. Bader},
  ejr-withauthor = {Henning Meyerhenke and David Ediger and David A. Bader},
  title = {Parallel Community Detection for Massive Graphs},
  booktitle = {10th DIMACS Implementation Challenge - Graph Partitioning
		  and Graph Clustering},
  publisher = {(workshop paper)},
  year = 2012,
  month = feb,
  dom = 14,
  address = {Atlanta, Georgia},
  note = {Won first place in the Mix Challenge and Mix Pareto
		  Challenge},
  url = {http://www.cc.gatech.edu/dimacs10/papers/[15]-dimacs10-community-detection.pdf}
}
@unpublished{fp-type-project,
  author = {E. Jason Riedy},
  title = {Type System Support for Floating-Point Computation},
  month = may,
  dom = 25,
  url = {http://purl.oclc.org/NET/jason-riedy/resume/material/type-support-for-fp.pdf},
  abstract = {Floating-point arithmetic is often seen as untrustworthy.
		  We show how manipulating precisions according to the
		  following rules of thumb enhances the reliability of and
		  removes surprises from calculations: Store data narrowly,
		  compute intermediates widely, and derive properties widely.
		  Further, we describe a typing system for floating point
		  that both supports and is supported by these rules. A
		  single type is established for all in- termediate
		  computations. The type describes a precision at least as
		  wide as all inputs to and results from the computation.
		  Picking a single type provides benefits to users,
		  compilers, and interpreters. The type system also extends
		  cleanly to encompass intervals and higher precisions.},
  year = {2001},
  role = {unpublished},
  tags = {programming language; floating point; ieee754}
}
@unpublished{graph500-1.1,
  author = {David A. Bader and Jonathan Berry and Simon Kahan and
		  Richard Murphy and E. Jason Riedy and Jeremiah Willcock},
  ejr-withauthor = {David A. Bader and Jonathan Berry and Simon Kahan and
		  Richard Murphy and Jeremiah Willcock},
  title = {Graph 500 Benchmark 1 ("Search")},
  note = {Version 1.1},
  url = {http://www.graph500.org/Specifications.html},
  month = oct,
  year = 2010
}
@article{graphct-tpds-2012,
  author = {David Ediger and Karl Jiang and Jason Riedy and David A.
		  Bader},
  ejr-withauthor = {David Ediger and Karl Jiang and David A. Bader},
  title = {GraphCT: Multithreaded Algorithms for Massive Graph
		  Analysis},
  journal = {{IEEE} Transactions in Parallel and Distributed Systems},
  year = 2012,
  optkey = {},
  optvolume = {},
  optnumber = {},
  optpages = {},
  optmonth = {},
  note = {(to appear)},
  optannote = {}
}
@incollection{graphct-wiley-chap,
  author = {David Ediger and Jason Riedy and David A. Bader and
		  Henning Meyerhenke},
  ejr-withauthor = {David Ediger and David A. Bader and Henning Meyerhenke},
  title = {Computational Graph Analytics for Massive Streaming Data},
  booktitle = {Large Scale Network-Centric Computing Systems},
  publisher = {Wiley},
  month = jul,
  dom = 30,
  year = 2013,
  editor = {Hamid Sarbazi-azad and Albert Zomaya},
  isbn = {978-0470936887},
  series = {Parallel and Distributed Computing},
  chapter = 25,
  note = {(to appear)}
}
@misc{graphex11,
  author = {Jason Riedy and David Ediger and David A. Bader and
		  Henning Meyerhenke},
  ejr-withauthor = {David Ediger and David A. Bader and Henning Meyerhenke},
  title = {Tracking Structure of Streaming Social Networks},
  dom = 9,
  month = aug,
  year = 2011,
  role = {presentation},
  url = {http://purl.oclc.org/NET/jason-riedy/resume/material/GraphEx-2011.pdf},
  tags = {graph; streaming},
  howpublished = {2011 Graph Exploitation Symposium hosted by MIT Lincoln
		  Labs}
}
@misc{gt09,
  author = {E. Jason Riedy},
  title = {Dependable direct solutions for linear systems using a
		  little extra precision},
  howpublished = {\href{http://cse.gatech.edu/}{CSE} Seminar at Georgia
		  Institute of Technology},
  dom = {21},
  month = aug,
  year = 2009,
  url = {http://hdl.handle.net/1853/29795},
  tags = {linear algebra; floating point; lapack},
  role = {presentation},
  abstract = {Solving a square linear system $Ax=b$ often is considered
		  a black box. It's supposed to "just work," and failures
		  often are blamed on the original data or subtleties of
		  floating-point. Now that we have an abundance of cheap
		  computations, however, we can do much better. A little
		  extra precision in just the right places produces accurate
		  solutions cheaply or demonstrates when problems are too
		  hard to solve without significant cost. This talk will
		  outline the method, iterative refinement with a new twist;
		  the benefits, small backward and forward errors; and the
		  trade-offs and unexpected benefits.}
}
@inproceedings{ia-cost,
  author = {Joseph N. Wilson and E. Jason Riedy},
  ejr-withauthor = {Joseph N. Wilson},
  title = {Efficient {SIMD} evaluation of image processing programs},
  booktitle = {Parallel and Distributed Methods for Image Processing},
  pages = {199--210},
  year = {1997},
  month = jul,
  dom = 28,
  editor = {Hongchi Shi and Patrick C. Coffield},
  volume = {3166},
  address = {San Diego, CA},
  organization = {SPIE},
  role = {proceedings},
  tags = {spie; image algebra; parallel algorithms},
  url = {http://purl.oclc.org/NET/jason-riedy/resume/material/ia-cost.pdf},
  doi = {10.1117/12.279618},
  abstract = {SIMD parallel systems have been employed for image
		  processing and computer vision applications since their
		  inception. This paper describes a system in which parallel
		  programs are implemented using a machine-independent,
		  retargetable object library that provides SIMD execution on
		  the Lockheed Martin PAL-I SIMD parallel processor.
		  Programs' performance on this machine is improved through
		  on-the-fly execution analysis and scheduling. We describe
		  the relevant elements of the system structure, the general
		  scheme for execution analysis, and the current cost model
		  for scheduling.}
}
@incollection{ia-simd-chapter,
  author = {Joseph N. Wilson and E. Jason Riedy and Gerhard X. Ritter
		  and Hongchi Shi},
  ejr-withauthor = {Joseph N. Wilson and Gerhard X. Ritter and Hongchi Shi},
  editor = {C. W. Chen and Y. Q. Zhang},
  booktitle = {Visual Information Representation, Communication, and
		  Image Processing},
  title = {An {Image} {Algebra} Based {SIMD} Image Processing
		  Environment},
  publisher = {Marcel Dekker},
  year = 1999,
  address = {New York},
  pages = {523--542},
  citeseer = {wilson97image.html},
  url = {http://purl.oclc.org/NET/jason-riedy/resume/material/ia-simd-chap.pdf},
  isbn = {082471928X},
  role = {chapter},
  tags = {image algebra; parallel algorithms},
  abstract = {SIMD parallel computers have been employed for image
		  related applications since their inception. They have been
		  leading the way in improving processing speed for those
		  applications. However, current parallel programming
		  technologies have not kept pace with the performance growth
		  and cost decline of parallel hardware. A highly usable
		  parallel software development environment is needed. This
		  chapter presents a computing environment that integrates a
		  SIMD mesh architecture with image algebra for
		  high-performance image processing applications. The
		  environment describes parallel programs through a
		  machine-independent, retargetable image algebra object
		  library that supports SIMD execution on the Lockheed Martin
		  PAL-I parallel computer. Program performance on this
		  machine is improved through on-the-fly execution analysis
		  and scheduling. We describe the relevant elements of the
		  system structure, outline the scheme for execution
		  analysis, and provide examples of the current cost model
		  and scheduling system.},
  icon = {ia-simd-chap.wordle.png}
}
@incollection{icassp2012-stinger,
  author = {Jason Riedy and Henning Meyerhenke and David A. Bader and
		  David Ediger and Timothy G. Mattson},
  ejr-withauthor = {Henning Meyerhenke and David Ediger and David A. Bader and
		  Timothy G. Mattson},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and
		  Signal Processing ({ICASSP})},
  title = {Analysis of Streaming Social Networks and Graphs on
		  Multicore Architectures},
  year = {2012},
  month = mar,
  dom = 29,
  address = {Kyoto, Japan},
  url = {http://www.slideshare.net/jasonriedy/icassp-2012-analysis-of-streaming-social-networks-and-graphs-on-multicore-architectures},
  abstract = {Analyzing static snapshots of massive, graph-structured
		  data cannot keep pace with the growth of social networks,
		  financial transactions, and other valuable data sources. We
		  introduce a framework, STING (Spatio-Temporal Interaction
		  Networks and Graphs), and evaluate its performance on
		  multicore, multisocket Intel(R)-based platforms. STING
		  achieves rates of around 100\,000 edge updates per second
		  on large, dynamic graphs with a single, general data
		  structure. We achieve speed-ups of up to 1000$\times$ over
		  parallel static computation, improve monitoring a dynamic
		  graph's connected components, and show an exact algorithm
		  for maintaining local clustering coefficients performs
		  better on Intel-based platforms than our earlier
		  approximate algorithm.}
}
@inproceedings{icpp10,
  author = {David Ediger and Karl Jiang and E. Jason Riedy and David
		  A. Bader and Courtney Corley and Rob Farber and William N.
		  Reynolds},
  ejr-withauthor = {David Ediger and Karl Jiang and David A. Bader and
		  Courtney Corley and Rob Farber and William N. Reynolds},
  title = {Massive Social Network Analysis: Mining Twitter for Social
		  Good},
  booktitle = {39th International Conference on Parallel Processing
		  ({ICPP})},
  role = {proceedings},
  tags = {parallel; graph},
  year = {2010},
  address = {San Diego, CA},
  month = sep,
  dom = 16,
  url = {http://www.cc.gatech.edu/~bader/papers/MassiveTwitter.html},
  note = {(70/225 papers accepted: 31.1\% acceptance rate)}
}
@misc{ieee754-exceptions,
  author = {David Bindel and E. Jason Riedy},
  ejr-withauthor = {David Bindel},
  title = {Exception Handling Interfaces, Implementations, and
		  Evaluation},
  howpublished = {IEEE-754r revision meeting},
  month = aug,
  year = {2002},
  url = {http://grouper.ieee.org/groups/754/meeting-materials/2002-08-22-pres.pdf},
  role = {presentation},
  tags = {ieee754; floating point}
}
@misc{intel.graph.2011,
  author = {Jason Riedy and David A. Bader and Henning Meyerhenke and
		  David Ediger and Timothy Mattson},
  ejr-withauthor = {David A. Bader and Henning Meyerhenke and David Ediger and
		  Timothy Mattson},
  title = {STING: Spatio-Temporal Interaction Networks and Graphs for
		  Intel Platforms},
  howpublished = {Presentation at Intel Corporation, Santa Clara, CA},
  dom = 9,
  month = aug,
  year = 2011,
  role = {presentation},
  url = {http://purl.oclc.org/NET/jason-riedy/resume/material/GT-STING-for-Intel-beamer.pdf}
}
@misc{lang-tools-ieee754,
  author = {E. Jason Riedy},
  title = {Modern Language Tools and {754R}},
  howpublished = {{ARITH}'05},
  month = jun,
  dom = 28,
  year = {2005},
  role = {presentation},
  url = {http://purl.oclc.org/NET/jason-riedy/resume/material/arith17-slides.pdf},
  tags = {ieee754; floating point}
}
@misc{lapack-future,
  author = {E. Jason Riedy and Yozo Hida and James W. Demmel},
  ejr-withauthor = {Yozo Hida and James W. Demmel},
  title = {The Future of {LAPACK} and {ScaLAPACK}},
  howpublished = {Robert C. Thompson Matrix Meeting},
  dom = {18},
  month = nov,
  year = {2005},
  url = {http://purl.oclc.org/NET/jason-riedy/resume/material/future-of-scalapack.pdf},
  role = {presentation},
  tags = {lapack; software engineering},
  abstract = {We are planning new releases of the widely used LAPACK and
		  ScaLAPACK numerical linear algebra libraries. Based on an
		  on-going user survey (http://www.netlib.org/lapack-dev) and
		  research by many people, we are proposing the following
		  improvements: Faster algorithms (including better numerical
		  methods, memory hierarchy optimizations, parallelism, and
		  automatic performance tuning to accomodate new
		  architectures), more accurate algorithms (including better
		  numerical methods, and use of extra precision), expanded
		  functionality (including updating and downdating, new
		  eigenproblems, etc. and putting more of LAPACK into
		  ScaLAPACK), and improved ease of use (friendlier interfaces
		  in multiple languages). To accomplish these goals we are
		  also relying on better software engineering techniques and
		  contributions from collaborators at many institutions. This
		  is joint work with Jack Dongarra.}
}
@inproceedings{lapack-prospectus,
  author = {James W. Demmel and Jack Dongarra and Beresford Parlett
		  and W. Kahan and Ming Gu and David Bindel and Yozo Hida and
		  Xiaoye S. Li and Osni A. Marques and E. Jason Riedy and
		  Christof V{\"o}mel and Julien Langou and Piotr Luszczek and
		  Jakub Kurzak and Alfredo Buttari and Julie Langou and
		  Stanimire Tomov},
  ejr-withauthor = {James W. Demmel and Jack Dongarra and Beresford Parlett
		  and W. Kahan and Ming Gu and David Bindel and Yozo Hida and
		  Xiaoye S. Li and Osni A. Marques and Christof V{\"o}mel and
		  Julien Langou and Piotr Luszczek and Jakub Kurzak and
		  Alfredo Buttari and Julie Langou and Stanimire Tomov},
  title = {Prospectus for the Next {LAPACK} and {ScaLAPACK}
		  Libraries},
  booktitle = {{PARA'06}: State-of-the-Art in Scientific and Parallel
		  Computing},
  year = {2006},
  address = {Ume{\aa}, Sweden},
  month = jun,
  organization = {High Performance Computing Center North ({HPC2N}) and the
		  Department of Computing Science, Ume{\aa} University},
  publisher = {Springer},
  role = {proceedings},
  tags = {lapack},
  url = {http://www.netlib.org/utk/people/JackDongarra/PAPERS/para06-lapack.pdf},
  abstract = {LAPACK and ScaLAPACK are widely used software libraries
		  for numerical linear algebra. There have been over 68M web
		  hits at www.netlib.org for the associated libraries LAPACK,
		  ScaLAPACK, CLAPACK and LAPACK95. LAPACK and ScaLAPACK are
		  used to solve leading edge science problems and they have
		  been adopted by many vendors and software providers as the
		  basis for their own libraries, including AMD, Apple (under
		  Mac OS X), Cray, Fujitsu, HP, IBM, Intel, NEC, SGI, several
		  Linux distributions (such as Debian), NAG, IMSL, the
		  MathWorks (producers of MATLAB), Interactive
		  Supercomputing, and PGI. Future improvements in these
		  libraries will therefore have a large impact on users.}
}
@techreport{lapack-prospectus-lawn,
  author = {James W. Demmel and Jack Dongarra and Beresford Parlett
		  and W. Kahan and Ming Gu and David Bindel and Yozo Hida and
		  Xiaoye S. Li and Osni A. Marques and E. Jason Riedy and
		  Christof Vömel and Julien Langou and Piotr Luszczek and
		  Jakub Kurzak and Alfredo Buttari and Julie Langou and
		  Stanimire Tomov},
  ejr-withauthor = {James W. Demmel and Jack Dongarra and Beresford Parlett
		  and W. Kahan and Ming Gu and David Bindel and Yozo Hida and
		  Xiaoye S. Li and Osni A. Marques and Christof V{\"o}mel and
		  Julien Langou and Piotr Luszczek and Jakub Kurzak and
		  Alfredo Buttari and Julie Langou and Stanimire Tomov},
  title = {Prospectus for the Next {LAPACK} and {ScaLAPACK}
		  Libraries},
  institution = {Netlib},
  year = {2007},
  type = {LAPACK Working Note},
  number = {181},
  lawn = {181},
  month = feb,
  note = {Also issued as UT-CS-07-592},
  role = {techreport},
  tags = {lawn; lapack},
  url = {http://www.netlib.org/lapack/lawnspdf/lawn181.pdf}
}
@unpublished{lapack-style,
  author = {Jack Dongarra and Julien Langou and E. Jason Riedy},
  ejr-withauthor = {Jack Dongarra and Julien Langou},
  title = {Sca/{LAPACK} Program Style},
  month = aug,
  year = {2006},
  role = {unpublished},
  tags = {lapack},
  url = {http://www.netlib.org/lapack-dev/lapack-coding/program-style.html},
  abstract = {The purpose of this document is to facilitate
		  contributions to LAPACK and ScaLAPACK by documenting their
		  design and implementation guidelines. The long-term goal is
		  to provide guidelines for both LAPACK and ScaLAPACK.
		  However, the parallel ScaLAPACK code has more open issues,
		  so this document primarily concerns LAPACK.}
}
@techreport{lawn188,
  author = {James W. Demmel and Yozo Hida and Xiaoye S. Li and E.
		  Jason Riedy},
  ejr-withauthor = {James W. Demmel and Yozo Hida and Xiaoye S. Li},
  title = {Extra-precise iterative refinement for overdetermined
		  least squares problems},
  type = {LAPACK Working Note},
  institution = {Netlib},
  year = 2007,
  number = 188,
  month = may,
  dom = 31,
  url = {http://www.netlib.org/lapack/lawnspdf/lawn188.pdf},
  other-url = {http://www.eecs.berkeley.edu/Pubs/TechRpts/2007/EECS-2007-77.html},
  note = {Also issued as UCB/EECS-2007-77; version accepted for
		  TOMS.},
  abstract = {We present the algorithm, error bounds, and numerical
		  results for extra-precise iterative refinement applied to
		  overdetermined linear least squares (LLS) problems. We
		  apply our linear system refinement algorithm to
		  Bj{\"o}rck’s augmented linear system formulation of an
		  LLS problem. Our algorithm reduces the forward normwise and
		  componentwise errors to $O(\varepsilon)$ unless the system
		  is too ill conditioned. In contrast to linear systems, we
		  provide two separate error bounds for the solution $x$ and
		  the residual $r$. The refinement algorithm requires only
		  limited use of extra precision and adds only $O(mn)$ work
		  to the $O(mn^2)$ cost of QR factorization for problems of
		  size m-by-n. The extra precision calculation is facilitated
		  by the new extended-precision BLAS standard in a portable
		  way, and the refinement algorithm will be included in a
		  future release of LAPACK and can be extended to the other
		  types of least squares problems.},
  role = {techreport},
  tags = {lawn; lapack; least squares; floating point}
}
@article{lsq-itref-toms,
  author = {James W. Demmel and Yozo Hida and Xiaoye S. Li and E.
		  Jason Riedy},
  ejr-withauthor = {James W. Demmel and Yozo Hida and Xiaoye S. Li},
  title = {Extra-precise iterative refinement for overdetermined
		  least squares problems},
  journal = {{ACM} Transactions on Mathematical Software},
  volume = {35},
  number = {4},
  year = 2009,
  month = feb,
  issn = {0098-3500},
  pages = {1--32},
  doi = {10.1145/1462173.1462177},
  accepted = {25 June 2008},
  role = {refereed},
  tags = {acm; toms; lapack; floating point; linear algebra},
  abstract = {We present the algorithm, error bounds, and numerical
		  results for extra-precise iterative refinement applied to
		  overdetermined linear least squares (LLS) problems. We
		  apply our linear system refinement algorithm to
		  Bj{\"o}rck’s augmented linear system formulation of an
		  LLS problem. Our algorithm reduces the forward normwise and
		  componentwise errors to $O(\varepsilon)$ unless the system
		  is too ill conditioned. In contrast to linear systems, we
		  provide two separate error bounds for the solution $x$ and
		  the residual $r$. The refinement algorithm requires only
		  limited use of extra precision and adds only $O(mn)$ work
		  to the $O(mn^2)$ cost of QR factorization for problems of
		  size m-by-n. The extra precision calculation is facilitated
		  by the new extended-precision BLAS standard in a portable
		  way, and the refinement algorithm will be included in a
		  future release of LAPACK and can be extended to the other
		  types of least squares problems.}
}
@inproceedings{mtaap10,
  author = {David Ediger and Karl Jiang and E. Jason Riedy and David
		  A. Bader},
  ejr-withauthor = {David Ediger and Karl Jiang and David A. Bader},
  title = {Massive Streaming Data Analytics: A Case Study with
		  Clustering Coefficients},
  booktitle = {4th Workshop on Multithreaded Architectures and
		  Applications (MTAAP)},
  role = {proceedings},
  tags = {parallel; graph; streaming},
  year = 2010,
  address = {Atlanta, GA},
  month = apr,
  dom = 23,
  url = {http://www.cc.gatech.edu/~bader/papers/StreamingCC.html},
  note = {(11/22 papers accepted, 50\% acceptance rate)}
}
@inproceedings{mtaap11,
  author = {David Ediger and E. Jason Riedy and David A. Bader and
		  Henning Meyerhenke},
  ejr-withauthor = {David Ediger and David A. Bader and Henning Meyerhenke},
  title = {Tracking Structure of Streaming Social Networks},
  booktitle = {5th Workshop on Multithreaded Architectures and
		  Applications (MTAAP)},
  role = {proceedings},
  tags = {parallel; graph; streaming},
  year = 2011,
  month = may,
  abstract = {Current online social networks are massive and still
		  growing. For example, Facebook has over 500 million active
		  users sharing over 30 billion items per month. The scale
		  within these data streams has outstripped traditional graph
		  analysis methods. Monitoring requires dynamic analysis
		  rather than repeated static analysis. The massive state
		  behind multiple persistent queries requires shared data
		  structures and not problem-specific representations. We
		  present a framework based on the STINGER data structure
		  that can monitor a global property, connected components,
		  on a graph of 16 million vertices at rates of up to
		  240\,000 updates per second on a 32 processor Cray XMT. For
		  very large scale-free graphs, our implementation uses novel
		  batching techniques that exploit the scale-free nature of
		  the data and run over three times faster than prior
		  methods. Our framework handles, for the first time,
		  real-world data rates, opening the door to higher-level
		  analytics such as community and anomaly detection.},
  note = {(10/17 papers accepted, 59\% acceptance rate)}
}
@inproceedings{mtaap12,
  author = {E. Jason Riedy and David A. Bader and Henning Meyerhenke},
  ejr-withauthor = {David A. Bader and Henning Meyerhenke},
  title = {Scalable Multi-threaded Community Detection in Social
		  Networks},
  booktitle = {6th Workshop on Multithreaded Architectures and
		  Applications (MTAAP)},
  role = {proceedings},
  tags = {parallel; graph; streaming},
  year = 2012,
  month = may,
  dom = 25,
  abstract = {The volume of existing graph-structured data requires
		  improved parallel tools and algorithms. Finding
		  communities, smaller subgraphs densely connected within the
		  subgraph than to the rest of the graph, plays a role both
		  in developing new parallel algorithms as well as opening
		  smaller portions of the data to current analysis tools. We
		  improve performance of our parallel community detection
		  algorithm by 20\% on the massively multithreaded Cray XMT,
		  evaluate its performance on the next-generation Cray XMT2,
		  and extend its reach to Intel-based platforms with OpenMP.
		  To our knowledge, not only is this the first massively
		  parallel community detection algorithm but also the only
		  such algorithm that achieves excellent performance and good
		  parallel scalability across all these platforms. Our
		  implementation analyzes a moderate sized graph with 105
		  million vertices and 3.3 billion edges in around 500
		  seconds on a four processor, 80-logical-core Intel-based
		  system and 1100 seconds on a 64-processor Cray XMT2.},
  note = {(9/15 papers accepted, 60\% acceptance)},
  url = {http://www.slideshare.net/jasonriedy/mtaap12-scalable-community-detection}
}
@techreport{nonneg-house-lawn,
  author = {James W. Demmel and Mark Frederick Hoemmen and Yozo Hida
		  and E. Jason Riedy},
  ejr-withauthor = {James W. Demmel and Mark Frederick Hoemmen and Yozo Hida},
  title = {Non-Negative Diagonals and High Performance on Low-Profile
		  Matrices from Householder $QR$},
  institution = {Netlib},
  year = {2008},
  type = {LAPACK Working Note},
  number = {203},
  lawn = {203},
  month = may,
  dom = 30,
  note = {Also issued as UCB/EECS-2008-76; modified from SISC
		  version.},
  role = {techreport},
  tags = {lawn; lapack; householder; qr},
  url = {http://www.netlib.org/lapack/lawnspdf/lawn203.pdf},
  other-url = {http://www.eecs.berkeley.edu/Pubs/TechRpts/2008/EECS-2008-76.html}
}
@article{nonneg-house-lawn-sisc,
  author = {James W. Demmel and Mark Frederick Hoemmen and Yozo Hida
		  and E. Jason Riedy},
  ejr-withauthor = {James W. Demmel and Mark Frederick Hoemmen and Yozo Hida},
  title = {Non-Negative Diagonals and High Performance on Low-Profile
		  Matrices from {H}ouseholder {$QR$}},
  publisher = {SIAM},
  year = 2009,
  month = jul,
  dom = 3,
  journal = {SIAM Journal on Scientific Computing},
  volume = 31,
  number = 4,
  pages = {2832--2841},
  keywords = {LAPACK; QR factorization; Householder reflection;
		  floating-point},
  doi = {10.1137/080725763},
  role = {refereed},
  tags = {siam; sisc; lapack; householder; qr},
  issn = {1064-8275},
  mrclass = {65F30},
  mrnumber = {2520301}
}
@unpublished{nsf-accel-workshop,
  ejr-withauthor = {workshop participants},
  editor = {Viktor K. Prasanna and David A. Bader},
  key = {Report on NSF Workshop on Center Scale Activities Related
		  to Accelerators for Data Intensive Applications},
  title = {{Report on NSF Workshop on Center Scale Activities Related
		  to Accelerators for Data Intensive Applications}},
  note = {This workshop is supported by NSF Grant Number 1051537, in
		  response to the Call for Exploratory Workshop Proposals for
		  Scientific Software Innovation Institutes (S2I2).},
  dom = {31},
  month = oct,
  year = 2010
}
@inproceedings{nsfaccelws10,
  author = {Jason Riedy and David Bader and David Ediger},
  ejr-withauthor = {David Bader and David Ediger},
  title = {Applications in Social Networks},
  booktitle = {NSF Workshop on Accelerators for Data-Intensive
		  Applications},
  dom = {13},
  year = {2010},
  month = oct,
  role = {presentation},
  url = {http://purl.oclc.org/NET/jason-riedy/resume/material/nsf-workshop-socnet.pdf},
  tags = {graph; NSF; streaming}
}
@misc{par-bipart-pp04,
  author = {E. Jason Riedy},
  title = {Parallel Weighted Bipartite Matching and Applications},
  howpublished = {SIAM Parallel Processing for Scientific Computing},
  dom = {27},
  month = feb,
  year = {2004},
  role = {presentation},
  url = {http://purl.oclc.org/NET/jason-riedy/resume/material/pp04.pdf},
  tags = {siam; combinatorial optimization; parallel algorithms;
		  sparse matrix}
}
@unpublished{power-control,
  author = {E. Jason Riedy and Robert Szewczyk},
  ejr-withauthor = {Robert Szewczyk},
  title = {Power and Control in Networked Sensors},
  note = {Cited},
  month = may,
  dom = 11,
  year = {2000},
  url = {http://purl.oclc.org/NET/jason-riedy/resume/material/power-and-control.pdf},
  role = {unpublished},
  tags = {sensor network},
  abstract = {The fundamental constraint on a networked sensor is its
		  energy consumption, since it may be either impossible or
		  not feasible to replace its energy source. We analyze the
		  power dissipation implications of implementing the network
		  sensor with either a central processor switching between
		  I/O devices or a family of processors, each dedicated to a
		  single device. We present the energy measurements of the
		  current generations of networked sensors, and develop an
		  abstract description of tradeoffs between both designs.},
  citeseer = {riedy00power.html}
}
@misc{pp12-community-ms,
  author = {Henning Meyerhenke and E. Jason Riedy and David A. Bader},
  ejr-withauthor = {Henning Meyerhenke and David A. Bader},
  title = {Parallel Community Detection in Streaming Graphs},
  howpublished = {SIAM Parallel Processing for Scientific Computing},
  dom = {15},
  month = feb,
  year = {2012},
  role = {presentation},
  tags = {siam; streaming data; parallel algorithms},
  address = {Savannah, GA}
}
@misc{pp12-graphct,
  author = {David Ediger and E. Jason Riedy and Henning Meyerhenke and
		  David A. Bader},
  eir-withauthor = {David Ediger and Henning Meyerhenke and David A. Bader},
  title = {Analyzing Massive Networks with GraphCT},
  howpublished = {SIAM Parallel Processing for Scientific Computing},
  dom = {16},
  month = feb,
  year = {2012},
  role = {poster},
  tags = {siam; parallel algorithms},
  address = {Savannah, GA}
}
@misc{pp12-sting,
  author = {E. Jason Riedy and David Ediger and Henning Meyerhenke and
		  David A. Bader},
  eir-withauthor = {David Ediger and Henning Meyerhenke and David A. Bader},
  title = {STING: Software for Analysis of Spatio-Temporal
		  Interaction Networks and Graphs},
  howpublished = {SIAM Parallel Processing for Scientific Computing},
  dom = {16},
  month = feb,
  year = {2012},
  role = {poster},
  tags = {siam; parallel algorithms},
  address = {Savannah, GA}
}
@misc{pp12-streaming-ms,
  author = {E. Jason Riedy and Henning Meyerhenke},
  ejr-withauthor = {Henning Meyerhenke},
  title = {Scalable Algorithms for Analysis of Massive, Streaming
		  Graphs},
  howpublished = {SIAM Parallel Processing for Scientific Computing},
  dom = {15},
  month = feb,
  year = {2012},
  role = {presentation},
  url = {http://www.slideshare.net/jasonriedy/siam-pp-2012-scalable-algorithms-for-analysis-of-massive-streaming-graphs},
  tags = {siam; streaming data; parallel algorithms},
  address = {Savannah, GA}
}
@inproceedings{ppam11,
  author = {E. Jason Riedy and Henning Meyerhenke and David Ediger and
		  David A. Bader},
  ejr-withauthor = {Henning Meyerhenke and David Ediger and David A. Bader},
  title = {Parallel Community Detection for Massive Graphs},
  booktitle = {9th International Conference on Parallel Processing and
		  Applied Mathematics (PPAM11)},
  year = 2011,
  month = sep,
  publisher = {Springer},
  role = {proceedings},
  tags = {parallel; graph},
  abstract = {Tackling the current volume of graph-structured data
		  requires parallel tools. We extend our work on analyzing
		  such massive graph data with the first massively parallel
		  algorithm for community detection that scales to current
		  data sizes, scaling to graphs of over 122 million vertices
		  and nearly 2 billion edges in under 7300 seconds on a
		  massively multithreaded Cray XMT. Our algorithm achieves
		  moderate parallel scalability without sacrificing
		  sequential operational complexity. Community detection
		  partitions a graph into subgraphs more densely connected
		  within the subgraph than to the rest of the graph. We take
		  an agglomerative approach similar to Clauset, Newman, and
		  Moore's sequential algorithm, merging pairs of connected
		  intermediate subgraphs to optimize different graph
		  properties. Working in parallel opens new approaches to
		  high performance. On smaller data sets, we find the
		  output's modularity compares well with the standard
		  sequential algorithms.},
  note = {(134/243 papers accepted, 55\% acceptance rate)}
}
@inproceedings{ppopp11-graph,
  author = {David A. Bader and David Ediger and E. Jason Riedy},
  ejr-withauthor = {David A. Bader and David Ediger},
  title = {Parallel Programming for Graph Analysis},
  booktitle = {16th ACM SIGPLAN Annual Symposium on Principles and
		  Practice of Parallel Programming (PPoPP)},
  role = {tutorial},
  tags = {parallel; graph},
  year = 2011,
  month = feb,
  dom = 12,
  address = {San Antonio, TX},
  url = {http://www.cc.gatech.edu/~bader/papers/GraphAnalysisTutorial-PPoPP2011.html},
  abstract = {An increasingly fast-paced, digital world has produced an
		  ever-growing volume of petabyte-sized datasets. At the same
		  time, terabytes of new, unstructured data arrive daily. As
		  the desire to ask more detailed questions about these
		  massive streams has grown, parallel software and hardware
		  have only recently begun to enable complex analytics in
		  this non-scientific space.
		  
		  In this tutorial, we will discuss the open problems facing
		  us with analyzing this "data deluge". We will present
		  algorithms and data structures capable of analyzing
		  spatio-temporal data at massive scale on parallel systems.
		  We will try to understand the difficulties and bottlenecks
		  in parallel graph algorithm design on current systems and
		  will show how multithreaded and hybrid systems can overcome
		  these challenges. We will demonstrate how parallel graph
		  algorithms can be implemented on a variety of architectures
		  using different programming models.
		  
		  The goal of this tutorial is to provide a comprehensive
		  introduction to the field of parallel graph analysis to an
		  audience with computing background, interested in
		  participating in research and/or commercial applications of
		  this field. Moreover, we will cover leading-edge technical
		  and algorithmic developments in the field and discuss open
		  problems and potential solutions.}
}
@inproceedings{ppopp12-graph,
  author = {David Ediger and Jason Riedy and Rob McColl and David A.
		  Bader},
  ejr-withauthor = {David Ediger and Rob McColl and David A. Bader},
  title = {Parallel Programming for Graph Analysis},
  booktitle = {17th ACM SIGPLAN Annual Symposium on Principles and
		  Practice of Parallel Programming (PPoPP)},
  role = {tutorial},
  tags = {parallel; graph},
  year = 2012,
  month = feb,
  dom = 26,
  address = {New Orleans, LA},
  url = {http://www.cc.gatech.edu/~bader/papers/GraphAnalysisTutorial-PPoPP2012.html},
  abstract = {An increasingly fast-paced, digital world has produced an
		  ever-growing volume of petabyte-sized datasets. At the same
		  time, terabytes of new, unstructured data arrive daily. As
		  the desire to ask more detailed questions about these
		  massive streams has grown, parallel software and hardware
		  have only recently begun to enable complex analytics in
		  this non-scientific space.
		  
		  In this tutorial, we will discuss the open problems facing
		  us with analyzing this "data deluge". We will present
		  algorithms and data structures capable of analyzing
		  spatio-temporal data at massive scale on parallel systems.
		  We will try to understand the difficulties and bottlenecks
		  in parallel graph algorithm design on current systems and
		  will show how multithreaded and hybrid systems can overcome
		  these challenges. We will demonstrate how parallel graph
		  algorithms can be implemented on a variety of architectures
		  using different programming models.
		  
		  The goal of this tutorial is to provide a comprehensive
		  introduction to the field of parallel graph analysis to an
		  audience with computing background, interested in
		  participating in research and/or commercial applications of
		  this field. Moreover, we will cover leading-edge technical
		  and algorithmic developments in the field and discuss open
		  problems and potential solutions.}
}
@phdthesis{riedy:eecs-2010-172,
  author = {Riedy, E. Jason},
  title = {Making Static Pivoting Scalable and Dependable},
  school = {EECS Department, University of California, Berkeley},
  year = {2010},
  month = dec,
  dom = 17,
  url = {http://www.eecs.berkeley.edu/Pubs/TechRpts/2010/EECS-2010-172.html},
  role = {techreport},
  tags = {lapack; linear algebra; floating point; thesis},
  number = {UCB/EECS-2010-172},
  abstract = {Solving square linear systems of equations $Ax=b$ is one
		  of the primary workhorses in scientific computing. With
		  asymptotically and practically small amounts of extra
		  calculation and higher precision, we can render solution
		  techniques \emph{dependable}. We produce a solution with
		  tiny error for almost all systems where we should expect a
		  tiny error, and we correctly flag potential failures.
		  
		  Our method uses a proven technique: iterative refinement.
		  We extend prior work by applying extra precision not only
		  in calculating the residual $b-A y_i$ of an intermediate
		  solution $y_i$ but also in carrying that intermediate
		  solution $y_i$. Analysis shows that extra precision in the
		  intermediate solutions lowers the limiting backward error
		  (measuring perturbations in the initial problem) to levels
		  that produce a forward error (measuring perturbations in
		  the solution) not much larger than the precision used to
		  store the result. We also demonstrate that condition
		  estimation is not necessary for determining success,
		  reducing the computation in refinement substantially.
		  
		  This basic, dependable solver applies to typical dense $LU$
		  factorization methods using partial pivoting as well as
		  methods that risk greater failure by choosing pivots for
		  non-numerical reasons. Sparse factorization methods may
		  choose pivots to promote structural sparsity or even choose
		  pivots \emph{before} factorization to decouple the phases.
		  We show through experiments that solutions using these
		  restrictive pivoting methods still have small error so long
		  as an estimate of factorization quality, the growth factor,
		  does not grow too large. Our refinement algorithm
		  dependably flags such failures. Additionally, we find a
		  better choice of heuristic for sparse static pivoting than
		  the defaults in Li and Demmel's SuperLU package.
		  
		  Static pivoting in a distributed-memory setting needs an
		  algorithm for choosing pivots that does not rely on fitting
		  the entire matrix into one memory space. We investigate a
		  set of algorithms, Bertsekas's auction algorithms, for
		  choosing a static pivoting via maximum weight perfect
		  bipartite matching. Auction algorithms have a natural
		  mapping to distributed memory computation through their
		  bidding mechanism. We provide an analysis of the auction
		  algorithm fitting it comfortably in linear optimization
		  theory and characterizing approximately maximum weight
		  perfect bipartite matches. These approximately maximum
		  weight perfect matches work well as static pivot choices
		  and can be computed much more quickly than the exact
		  maximum weight matching.
		  
		  Finally, we consider the performance of auction algorithm
		  implementations on a suite of real-world sparse problems.
		  Sequential performance is roughly equivalent to existing
		  implementations like Duff and Koster's MC64, but varies
		  widely with different parameter and input settings. The
		  parallel performance is even more wildly unpredictable.
		  Computing approximately maximum weight matchings helps
		  performance somewhat, but we still conclude that the
		  performance is too variable for a black-box solution
		  method.}
}
@incollection{rwp10,
  author = {E. Jason Riedy},
  editor = {Dana Martin Guthrie},
  booktitle = {Read Write Poem NaPoWriMo Anthology},
  title = {``Here, on the farthest point of the peninsula''},
  publisher = {\url{http://issuu.com}},
  year = {2010},
  month = sep,
  dom = {15},
  pages = {86},
  url = {http://issuu.com/readwritepoem/docs/read_write_poem_napowrimo_anthology},
  myurl = {http://lovesgoodfood.com/jason/posts/post-0099/},
  role = {poetry},
  tags = {beach; napowrimo; poetry; rwp}
}
@techreport{seed-set-tr,
  author = {Riedy, Jason and Bader, David A. and Jiang, Karl and
		  Pande, Pushkar and Sharma, Richa},
  ejr-withauthor = {Bader, David A. and Jiang, Karl and Pande, Pushkar and
		  Sharma, Richa},
  title = {Detecting Communities from Given Seeds in Social
		  Networks},
  institution = {Georgia Institute of Technology},
  year = 2011,
  number = {GT-CSE-11-01},
  month = feb,
  dom = 22,
  url = {http://hdl.handle.net/1853/36980},
  role = {techreport},
  abstract = {Analyzing massive social networks challenges both
		  high-performance computers and human understanding. These
		  massive networks cannot be visualized easily, and their
		  scale makes applying complex analysis methods
		  computationally expensive. We present a region-growing
		  method for finding a smaller, more tractable subgraph, a
		  community, given a few example seed vertices. Unlike
		  existing work, we focus on a small number of seed vertices,
		  from two to a few dozen. We also present the first
		  comparison between five algorithms for expanding a small
		  seed set into a community. Our comparison applies these
		  algorithms to an R-MAT generated graph component with 240
		  thousand vertices and 32 million edges and evaluates the
		  community size, modularity, Kullback-Leibler divergence,
		  conductance, and clustering coefficient. We find that our
		  new algorithm with a local modularity maximizing heuristic
		  based on Clauset, Newman, and Moore performs very well when
		  the output is limited to 100 or 1000 vertices. When run
		  without a vertex size limit, a heuristic from McCloskey and
		  Bader generates communities containing around 60\% of the
		  graph's vertices and having a small conductance and
		  modularity appropriate to the result size. A personalized
		  PageRank algorithm based on Andersen, Lang, and Chung also
		  performs well with respect to our metrics.},
  tags = {graph; social network}
}
@misc{siam-am03,
  author = {E. Jason Riedy},
  title = {Practical Alternatives for Parallel Pivoting},
  howpublished = {SIAM Annual Meeting},
  month = jun,
  year = {2003},
  role = {presentation},
  tags = {siam; sparse matrix; linear algebra},
  url = {http://purl.oclc.org/NET/jason-riedy/resume/material/siam-am03.pdf}
}
@misc{siam-cse03,
  author = {E. Jason Riedy},
  title = {Parallel Bipartite Matching for Sparse Matrix
		  Computations},
  howpublished = {SIAM Conference on Computational Science and Engineering},
  month = feb,
  year = {2003},
  role = {poster},
  tags = {siam; parallel algorithms; combinatorial optimization;
		  sparse matrix},
  url = {http://purl.oclc.org/NET/jason-riedy/resume/material/siam-cse03-poster.pdf}
}
@incollection{smallstone10,
  author = {Jason Riedy},
  editor = {Fiona Robyn and Kaspalita},
  booktitle = {pay attention: a river of stones},
  title = {``The storm's coming when the chickens spread out''},
  publisher = {\url{http://lulu.com}},
  year = {2011},
  month = mar,
  dom = {2},
  pages = 77,
  myurl = {http://lovesgoodfood.com/jason/posts/river-of-stones-7/},
  url = {http://www.lulu.com/product/file-download/pay-attention-a-river-of-stones/15057101},
  role = {poetry},
  tags = {poetry; aros; riverofstones}
}
@misc{sparse-ds-csc04,
  author = {E. Jason Riedy},
  title = {Sparse Data Structures for Weighted Bipartite Matching},
  howpublished = {SIAM Workshop on Combinatorial Scientific Computing},
  dom = {28},
  month = feb,
  year = {2004},
  role = {presentation},
  tags = {siam; combinatorial optimization; sparse matrix},
  url = {http://purl.oclc.org/NET/jason-riedy/resume/material/csc04.pdf}
}
@inproceedings{stinger-hpec12,
  author = {David Ediger and Robert McColl and Jason Riedy and David
		  A. Bader},
  ejr-withauthor = {David Ediger and Robert McColl and David A. Bader},
  title = {{STINGER}: High Performance Data Structure for Streaming
		  Graphs},
  booktitle = {The IEEE High Performance Extreme Computing Conference
		  (HPEC)},
  year = 2012,
  month = sep,
  address = {Waltham, MA},
  note = {Best paper award},
  dom = 12
}
@unpublished{tera-ubench,
  author = {E. Jason Riedy and Rich Vuduc},
  ejr-withauthor = {Rich Vuduc},
  url = {http://purl.oclc.org/NET/jason-riedy/resume/material/Tera.pdf},
  title = {Microbenchmarking the {Tera} {MTA}},
  note = {Cited,
		  \href{http://purl.oclc.org/NET/jason-riedy/resume/material/Tera-presentation.pdf}{presentation
		  version} available},
  other-url = {http://purl.oclc.org/NET/jason-riedy/resume/material/Tera-presentation.pdf},
  dom = 21,
  month = may,
  year = {1999},
  abstract = {The Tera Multithreaded Architecture, or MTA, addresses
		  scalable shared memory system design with a difierent
		  approach; it tolerates latency through providing fast
		  access to multiple threads of execution. The MTA employs a
		  number of radical design ideas: creation of hardware
		  threads (streams) with frequent context switching;
		  full-empty bits for each memory word; a flat memory
		  hierarchy; and deep pipelines. Recent evaluations of the
		  MTA have taken a top-down approach: port applications and
		  application benchmarks, and compare the absolute
		  performance with conventional systems. While useful, these
		  studies do not reveal the effect of the Tera MTA's unique
		  hardware features on an application. We present a bottom-up
		  approach to the evaluation of the MTA via a suite of
		  microbenchmarks to examine in detail the underlying
		  hardware mechanisms and the cost of runtime system support
		  for multithreading. In particular, we measure memory,
		  network, and instruction latencies; memory bandwidth; the
		  cost of low-level synchronization via full-empty bits;
		  overhead for stream management; and the effects of software
		  pipelining. These data should provide a foundation for
		  performance modeling on the MTA. We also present results
		  for list ranking on the MTA, an application which has
		  traditionally been difficult to scale on conventional
		  parallel systems.},
  role = {unpublished},
  tags = {parallel programming; parallel algorithms; multithreaded;
		  computer architecture; cray}
}
@techreport{tridiag-lawn,
  author = {Osni A. Marques and E. Jason Riedy and Christof V{\"o}mel},
  ejr-withauthor = {Osni A. Marques and Christof V{\"o}mel},
  title = {Benefits of {IEEE-754} Features in Modern Symmetric
		  Tridiagonal Eigensolvers},
  type = {LAPACK Working Note},
  number = {172},
  lawn = {172},
  institution = {Netlib},
  month = sep,
  year = {2005},
  dom = 30,
  note = {Also issued as UCB//CSD-05-1414; expanded from SISC
		  version},
  url = {http://www.netlib.org/lapack/lawnspdf/lawn172.pdf},
  other-url = {http://www.eecs.berkeley.edu/Pubs/TechRpts/2005/6514.html},
  role = {techreport},
  tags = {lawn; lapack; floating point; ieee754; eigenvalue}
}
@article{tridiag-sisc,
  author = {Osni A. Marques and E. Jason Riedy and Christof V{\"o}mel},
  ejr-withauthor = {Osni A. Marques and Christof V{\"o}mel},
  title = {Benefits of {IEEE-754} Features in Modern Symmetric
		  Tridiagonal Eigensolvers},
  journal = {SIAM Journal on Scientific Computing},
  year = {2006},
  month = sep,
  dom = 28,
  volume = {28},
  number = {5},
  pages = {1613--1633},
  role = {refereed},
  tags = {siam; sisc; floating point; eigenvalue; ieee754},
  doi = {10.1137/050641624},
  issn = {1064-8275},
  mrclass = {65F15},
  mrnumber = {2272181},
  abstract = {Bisection is one of the most common methods used to
		  compute the eigenvalues of symmetric tridiagonal matrices.
		  Bisection relies on the Sturm count: For a given shift
		  sigma, the number of negative pivots in the factorization
		  $T - \sigma I = LDL^T$ equals the number of eigenvalues of
		  T that are smaller than sigma. In IEEE-754 arithmetic, the
		  value $\infty$ permits the computation to continue past a
		  zero pivot, producing a correct Sturm count when $T$ is
		  unreduced. Demmel and Li showed [IEEE Trans. Comput., 43
		  (1994), pp. 983–992] that using $\infty$ rather than
		  testing for zero pivots within the loop could significantly
		  improve performance on certain architectures. When
		  eigenvalues are to be computed to high relative accuracy,
		  it is often preferable to work with $LDL^T$ factorizations
		  instead of the original tridiagonal $T$. One important
		  example is the MRRR algorithm. When bisection is applied to
		  the factored matrix, the Sturm count is computed from
		  $LDL^T$ which makes differential stationary and progressive
		  qds algorithms the methods of choice. While it seems
		  trivial to replace $T$ by $LDL^T$, in reality these
		  algorithms are more complicated: In IEEE-754 arithmetic, a
		  zero pivot produces an overflow followed by an invalid
		  exception (NaN, or ``Not a Number'') that renders the Sturm
		  count incorrect. We present alternative, safe formulations
		  that are guaranteed to produce the correct result.
		  Benchmarking these algorithms on a variety of platforms
		  shows that the original formulation without tests is always
		  faster provided that no exception occurs. The transforms
		  see speed-ups of up to 2.6x over the careful formulations.
		  Tests on industrial matrices show that encountering
		  exceptions in practice is rare. This leads to the following
		  design: First, compute the Sturm count by the fast but
		  unsafe algorithm. Then, if an exception occurs, recompute
		  the count by a safe, slower alternative. The new Sturm
		  count algorithms improve the speed of bisection by up to 2x
		  on our test matrices. Furthermore, unlike the traditional
		  tiny-pivot substitution, proper use of IEEE-754 features
		  provides a careful formulation that imposes no input range
		  restrictions.}
}

This file was generated by bibtex2html 1.97.