@STRING{jan = "January"} @STRING{feb = "February"} @STRING{mar = "March"} @STRING{apr = "April"} @STRING{may = "May"} @STRING{jun = "June"} @STRING{jul = "July"} @STRING{aug = "August"} @STRING{sep = "September"} @STRING{oct = "October"} @STRING{nov = "November"} @STRING{dec = "December"} @Unpublished{ngblas-doc, author = {James Demmel and Mark Gates and Greg Henry and Xiaoye S. Li and Jason Riedy and P.T. Peter Tang}, title = {A Proposal for a Next-Generation {BLAS}}, note = {(living document, being updated)}, month = nov, year = 2017, url = {https://goo.gl/hvDu3d}, projtag = {lapack}, keywords = {lapack, blas, linear algebra}, ejr-proj = {linear-algebra, high-performance-data-analysis}, ejr-grant = {xscala, grateful}, } @InProceedings{wssspe1, author = {Shel Swenson and Yogesh Simmhan and Viktor Prasanna and Manish Parashar and Jason Riedy and David Bader and Richard Vuduc}, ejr-withauthor ={Shel Swenson and Yogesh Simmhan and Viktor Prasanna and Manish Parashar and David Bader and Richard Vuduc}, title = {Sustainable Software Development for Next-Gen Sequencing (NGS) Bioinformatics on Emerging Platforms}, booktitle = {First Workshop on Sustainable Software for Science: Practice and Experiences (WSSSPE1)}, year = 2013, month = nov, dom = 17, address = {Denver, CO}, note = {held in conjunction with SC13, published electronically (\url{http://wssspe.researchcomputing.org.uk/})}, url = {http://arxiv.org/abs/1309.1828}, file = {material/wssspe13.pdf}, abstract = {DNA sequence analysis is fundamental to life science research. The rapid development of next generation sequencing (NGS) technologies, and the richness and diversity of applications it makes feasible, have created an enormous gulf between the potential of this technology and the development of computational methods to realize this potential. Bridging this gap holds possibilities for broad impacts toward multiple grand challenges and offers unprecedented opportunities for software innovation and research. We argue that NGS-enabled applications need a critical mass of sustainable software to benefit from emerging computing platforms' transformative potential. Accumulating the necessary critical mass will require leaders in computational biology, bioinformatics, computer science, and computer engineering work together to identify core opportunity areas, critical software infrastructure, and software sustainability challenges. Furthermore, due to the quickly changing nature of both bioinformatics software and accelerator technology, we conclude that creating sustainable accelerated bioinformatics software means constructing a sustainable bridge between the two fields. In particular, sustained collaboration between domain developers and technology experts is needed to develop the accelerated kernels, libraries, frameworks and middleware that could provide the needed flexible link from NGS bioinformatics applications to emerging platforms.}, officialproject ={nsf-s2i2-conc}, projtag = {xscala}, keywords = {high performance data analysis, accelerator, parallel algorithm}, ejr-proj = {high-performance-data-analysis}, ejr-grant = {xscala}, } @Unpublished{s2i2-ipdps-2013, author = {Shel Swenson and Yogesh Simmhan and Viktor Prasanna and Manish Parashar and David Bader and Jason Riedy and Richard Vuduc}, title = {Report on ``Workshop on Accelerating Bioinformatics Applications Enabled by NextGen-Sequencing''}, address = {Boston, MA}, dom = 19, month = may, year = 2013, note = {Co-located with IPDPS 2013}, url = {http://future-compute.usc.edu/index.php/NGS_Workshop}, officialproject ={nsf-s2i2-conc}, projtag = {xscala}, keywords = {high performance data analysis, accelerator, parallel algorithm}, ejr-proj = {high-performance-data-analysis}, ejr-grant = {xscala}, } @Unpublished{s2i2-acmbcb-2013, author = {Shel Swenson and Yogesh Simmhan and Viktor Prasanna and Manish Parashar and David Bader and Jason Riedy and Richard Vuduc}, title = {Report on ``Workshop on Challenges in accelerating Next-Gen Sequencing ({NGS}) bioinformatics''}, address = {Washington, DC}, dom = 25, month = sep, year = 2013, note = {in conjunction with ACM-BCB 2013}, url = {http://future-compute.usc.edu/index.php/NGS_Bioinformatics_Workshop}, officialproject ={nsf-s2i2-conc}, projtag = {xscala}, keywords = {high performance data analysis, accelerator, parallel algorithm}, ejr-proj = {high-performance-data-analysis}, ejr-grant = {xscala}, } @Unpublished{graph500-1.1, author = {David A. Bader and Jonathan Berry and Simon Kahan and Richard Murphy and E. Jason Riedy and Jeremiah Willcock}, ejr-withauthor ={David A. Bader and Jonathan Berry and Simon Kahan and Richard Murphy and Jeremiah Willcock}, title = {Graph 500 Benchmark 1 (``Search'')}, note = {Version 1.1}, url = {http://www.graph500.org/Specifications.html}, month = oct, year = 2010, projtag = {cassmt, percs}, keywords = {graph analysis, parallel algorithm, mistake}, ejr-proj = {graph-analysis}, ejr-grant = {cassmt, percs}, } @Unpublished{nsf-accel-workshop, author ={Participants}, editor = {Viktor K. Prasanna and David A. Bader}, key = {Report on NSF Workshop on Center Scale Activities Related to Accelerators for Data Intensive Applications}, title = {{Report on NSF Workshop on Center Scale Activities Related to Accelerators for Data Intensive Applications}}, note = {This workshop is supported by NSF Grant Number 1051537, in response to the Call for Exploratory Workshop Proposals for Scientific Software Innovation Institutes (S2I2).}, dom = 31, month = oct, year = 2010, keywords = {high performance data analysis, accelerator, parallel algorithm}, ejr-proj = {high-performance-data-analysis}, ejr-grant = {xscala}, } @Unpublished{lapack-style, author = {Jack Dongarra and Julien Langou and E. Jason Riedy}, ejr-withauthor ={Jack Dongarra and Julien Langou}, title = {Sca/{LAPACK} Program Style}, month = aug, year = 2006, role = {unpublished}, OPTtags = {lapack}, url = {http://www.netlib.org/lapack-dev/lapack-coding/program-style.html}, abstract = {The purpose of this document is to facilitate contributions to LAPACK and ScaLAPACK by documenting their design and implementation guidelines. The long-term goal is to provide guidelines for both LAPACK and ScaLAPACK. However, the parallel ScaLAPACK code has more open issues, so this document primarily concerns LAPACK.}, projtag = {lapack}, keywords = {linear algebra, lapack, blas}, ejr-proj = {linear-algebra}, } @Unpublished{fp-type-project, author = {E. Jason Riedy}, title = {Type System Support for Floating-Point Computation}, month = may, dom = 25, file = {material/type-support-for-fp.pdf}, abstract = {Floating-point arithmetic is often seen as untrustworthy. We show how manipulating precisions according to the following rules of thumb enhances the reliability of and removes surprises from calculations: Store data narrowly, compute intermediates widely, and derive properties widely. Further, we describe a typing system for floating point that both supports and is supported by these rules. A single type is established for all in- termediate computations. The type describes a precision at least as wide as all inputs to and results from the computation. Picking a single type provides benefits to users, compilers, and interpreters. The type system also extends cleanly to encompass intervals and higher precisions.}, year = 2001, role = {unpublished}, OPTtags = {programming language; floating point; ieee754}, projtag = {ieee754}, keywords = {floating point, ieee754}, ejr-proj = {floating-point}, } @Unpublished{power-control, author = {E. Jason Riedy and Robert Szewczyk}, ejr-withauthor ={Robert Szewczyk}, title = {Power and Control in Networked Sensors}, note = {Cited}, month = may, dom = 11, year = 2000, file = {material/power-and-control.pdf}, role = {unpublished}, OPTtags = {sensor network}, abstract = {The fundamental constraint on a networked sensor is its energy consumption, since it may be either impossible or not feasible to replace its energy source. We analyze the power dissipation implications of implementing the network sensor with either a central processor switching between I/O devices or a family of processors, each dedicated to a single device. We present the energy measurements of the current generations of networked sensors, and develop an abstract description of tradeoffs between both designs.}, citeseer = {riedy00power.html}, keywords = {embedded, sensor, {IoT}, novel architecture}, } @Unpublished{tera-ubench, author = {E. Jason Riedy and Rich Vuduc}, ejr-withauthor ={Rich Vuduc}, file = {material/Tera.pdf}, title = {Microbenchmarking the {Tera} {MTA}}, note = {Cited}, other-url = {http://purl.oclc.org/NET/jason-riedy/resume/material/Tera-presentation.pdf}, dom = 21, month = may, year = 1999, abstract = {The Tera Multithreaded Architecture, or MTA, addresses scalable shared memory system design with a difierent approach; it tolerates latency through providing fast access to multiple threads of execution. The MTA employs a number of radical design ideas: creation of hardware threads (streams) with frequent context switching; full-empty bits for each memory word; a flat memory hierarchy; and deep pipelines. Recent evaluations of the MTA have taken a top-down approach: port applications and application benchmarks, and compare the absolute performance with conventional systems. While useful, these studies do not reveal the effect of the Tera MTA's unique hardware features on an application. We present a bottom-up approach to the evaluation of the MTA via a suite of microbenchmarks to examine in detail the underlying hardware mechanisms and the cost of runtime system support for multithreading. In particular, we measure memory, network, and instruction latencies; memory bandwidth; the cost of low-level synchronization via full-empty bits; overhead for stream management; and the effects of software pipelining. These data should provide a foundation for performance modeling on the MTA. We also present results for list ranking on the MTA, an application which has traditionally been difficult to scale on conventional parallel systems.}, role = {unpublished}, OPTtags = {parallel programming; parallel algorithms; multithreaded; computer architecture; cray}, projtag = {cassmt}, keywords = {parallel algorithm, novel architecture, memory-centric}, ejr-proj = {novel-arch}, }