@article{LeGoues15tse,
  author = {Claire {Le Goues} and Neal Holtschulte and Edward K. Smith and
            Yuriy Brun and Premkumar Devanbu and Stephanie Forrest and 
            Westley Weimer},
  title = {The {ManyBugs} and {IntroClass} Benchmarks for Automated Repair of
           {C} Programs},
  journal = {IEEE Transactions on Software Engineering (TSE)},
  year = {2015},
  issn = {0098-5589},
  volume = {41},
  number = {12},
  month = {December},
  pages = {1236--1256},
  doi = {10.1109/TSE.2015.2454513},
  note = {\href{http://dx.doi.org/10.1109/TSE.2015.2454513}{DOI:
  10.1109/TSE.2015.2454513}},
	
  abstract = {The field of automated software repair lacks a set of common benchmark
  problems. Although benchmark sets are used widely throughout computer
  science, existing benchmarks are not easily adapted to the problem of
  automatic defect repair, which has several special requirements. Most
  important of these is the need for benchmark programs with reproducible,
  important defects and a deterministic method for assessing if those defects
  have been repaired. This article details the need for a new set of
  benchmarks, outlines requirements, and then presents two datasets, ManyBugs
  and IntroClass, consisting between them of 1,183 defects in 15 C programs.
  Each dataset is designed to support the comparative evaluation of automatic
  repair algorithms asking a variety of experimental questions. The datasets
  have empirically defined guarantees of reproducibility and benchmark
  quality, and each study object is categorized to facilitate qualitative
  evaluation and comparisons by category of bug or program. The article
  presents baseline experimental results on both datasets for three existing
  repair methods, GenProg, AE, and TrpAutoRepair, to reduce the burden on
  researchers who adopt these datasets for their own comparative evaluations.},
}