{
  "id": "anthropic-sitemap:research:swe-bench-sonnet",
  "type": "article",
  "title": "Raising the bar on SWE-bench Verified with Claude 3.5 Sonnet",
  "abstract": "_Our latest model, the upgraded Claude 3.5 Sonnet, achieved 49% on SWE-bench Verified, a software engineering evaluation, beating the previous state-of-the-art model's 45%. This post explains the \"agent\" we built around the model, and is intended to help developers get the best possible performance out of Claude 3.5 Sonnet._",
  "URL": "https://www.anthropic.com/research/swe-bench-sonnet",
  "publisher": "Anthropic",
  "source": "vendor/anthropic-sitemap/research/swe-bench-sonnet.md"
}