@STRING{jan = "January"}
@STRING{feb = "February"}
@STRING{mar = "March"}
@STRING{apr = "April"}
@STRING{may = "May"}
@STRING{jun = "June"}
@STRING{jul = "July"}
@STRING{aug = "August"}
@STRING{sep = "September"}
@STRING{oct = "October"}
@STRING{nov = "November"}
@STRING{dec = "December"}

@InProceedings{DBLP:conf_dimacs_RiedyMEB12,
  file =         {material/dimacs10-proceedings-community-detection.pdf},
  Title =        {Parallel community detection for massive graphs},
  Author =       {E. Jason Riedy and Henning Meyerhenke and David Ediger and David A. Bader},
  Booktitle =    {Graph Partitioning and Graph Clustering},
  Year =         {2012},
  Editor =       {David A. Bader and Henning Meyerhenke and Peter Sanders and Dorothea Wagner},
  Pages =        {207--222},
  Publisher =    {American Mathematical Society},
  Series =       {Contemporary Mathematics},
  Volume =       {588},
  Abstract =     {Tackling the current volume of graph-structured data requires parallel tools. We extend our work on analyzing such massive graph data with a massively parallel algorithm for community detection that scales to current data sizes, clustering a real-world graph of over 100 million vertices and over 3 billion edges in under 500 seconds on a four-processor Intel E7-8870-based server.  Our algorithm achieves moderate parallel scalability without sacrificing sequential operational complexity.  Community detection partitions a graph into subgraphs more densely connected within the subgraph than to the rest of the graph. We take an agglomerative approach similar to Clauset, Newman, and Moore’s sequential algorithm, merging pairs of connected intermediate subgraphs to optimize different graph properties. Working in parallel opens new approaches to high performance. We improve performance of our parallel community detection algorithm on both the Cray XMT2 and OpenMP platforms and adapt our algorithm to the DIMACS Implementation Challenge data set.},
  Bibsource =    {DBLP, http://dblp.uni-trier.de},
  Doi =          {10.1090/conm/588/11703},
  Ee =           {http://www.ams.org/books/conm/588/11703},
  OPTISBN =      {978-0-8218-9038-7, 978-0-8218-9869-7},
  ISBN =         {978-0-8218-9038-7},
  OPTtitle =     {Graph Partitioning and Graph Clustering - 10th DIMACS Implementation Challenge Workshop, Georgia Institute of Technology, Atlanta, GA, USA, February 13-14, 2012. Proceedings},
  OPTurl =          {http://www.ams.org/books/conm/588/11703/conm588-11703.pdf},
  projtag =      {cassmt, intel-sting},
  ejr-proj = {high-performance-data-analysis, graph-analysis},
  ejr-grant = {cassmt, intel-sting},
  keywords = {graph analysis, community detection, hpda, parallel algorithm}
}

@InCollection{GraphCT-Wiley-Chap,
  file =         {material/STINGER-with-apps.pdf},
  Title =        {Computational Graph Analytics for Massive Streaming Data},
  Author =       {David Ediger and Jason Riedy and David A. Bader and Henning Meyerhenke},
  Booktitle =    {Large Scale Network-Centric Computing Systems},
  Publisher =    {Wiley},
  Year =         {2013},
  Chapter =      {25},
  Editor =       {Hamid Sarbazi-azad and Albert Zomaya},
  Month =        jul,
  Series =       {Parallel and Distributed Computing},
  Abstract =     {Handling the constant stream of data from health care, security, business, and social network applications requires new algorithms and data structures.  We present a new approach for parallel massive analysis of streaming, temporal, graph-structured data.  For this purpose we examine data structure and algorithm trade-offs that extract the parallelism necessary for high-performance updating analysis of massive graphs. As a result of this study, we propose the extensible and flexible data structure for massive graphs called STINGER ({S}patio-{T}emporal {I}nteraction {N}etworks and {G}raphs {E}xtensible {R}epresentation).  Two case studies demonstrate our new approach's effectiveness.  The first one computes a dynamic graph's vertices' \emph{clustering coefficients}.  We show that incremental updates are far more efficient than global recomputation.  Within this kernel, we compare three methods for dynamically updating local clustering coefficients: a brute-force local recalculation, a sorting algorithm, and our new approximation method using a Bloom filter.  On 32 processors of a \xmt{} with a synthetic scale-free graph of $2^{24} \approx 16$ million vertices and $2^{29} \approx 537$ million edges, the brute-force method processes a mean of over 50\,000 updates per second, while our Bloom filter approaches 200\,000 updates per second.  The second case study monitors a global feature, a dynamic graph's connected components.  We use similar algorithmic ideas as before to exploit the parallelism in the problem and provided by the hardware architecture.  On a 16 million vertex graph, we obtain rates of up to 240\,000 updates per second on 32 processors of a \xmt{}.  For the large scale-free graphs typical in our applications, our implementation uses novel batching techniques that exploit the scale-free nature of the data and run over three times faster than prior methods.  Our new framework is the first to handle real-world data rates, opening the door to higher-level analytics such as community and anomaly detection.},
  Dom =          {30},
  ISBN =         {978-0470936887},
  Role =         {chapter},
  keywords =         {parallel algorithm, hpda, graph analysis, streaming data},
  doi =          {10.1002/9781118640708.ch25},
  projtag =      {cassmt, xscala, intel-sting},
  ejr-proj = {high-performance-data-analysis, graph-analysis},
  ejr-grant = {cassmt, xscala, intel-sting},
}

@InCollection{ia-simd-chapter,
  file =         {material/ia-simd-chap.pdf},
  Title =        {An {Image} {Algebra} Based {SIMD} Image Processing Environment},
  Author =       {Joseph N. Wilson and E. Jason Riedy and Gerhard X. Ritter and Hongchi Shi},
  Booktitle =    {Visual Information Representation, Communication, and Image Processing},
  Publisher =    {Marcel Dekker},
  Year =         {1999},
  Address =      {New York},
  Editor =       {C. W. Chen and Y. Q. Zhang},
  Pages =        {523--542},
  Abstract =     {SIMD parallel computers have been employed for image related applications since their inception. They have been leading the way in improving processing speed for those applications. However, current parallel programming technologies have not kept pace with the performance growth and cost decline of parallel hardware. A highly usable parallel software development environment is needed. This chapter presents a computing environment that integrates a SIMD mesh architecture with image algebra for high-performance image processing applications. The environment describes parallel programs through a machine-independent, retargetable image algebra object library that supports SIMD execution on the Lockheed Martin PAL-I parallel computer. Program performance on this machine is improved through on-the-fly execution analysis and scheduling. We describe the relevant elements of the system structure, outline the scheme for execution analysis, and provide examples of the current cost model and scheduling system.},
  OPTCiteseer =  {wilson97image.html},
  ISBN =         {082471928X},
  Role =         {chapter},
  keywords =         {image algebra, parallel algorithm},
  file =         {material/ia-simd-chap.pdf},
  projtag =      {image-algebra}
}