{
  "id": "anthropic-sitemap:research:statistical-approach-to-model-evals",
  "type": "article",
  "title": "A statistical approach to model evaluations",
  "abstract": "Suppose an AI model outperforms another model on a benchmark of interest—testing its general knowledge, for example, or its ability to solve computer-coding questions. Is the difference in capabilities real, or could one model simply have gotten lucky in the choice of questions on the benchmark?",
  "issued": {
    "date-parts": [
      [
        2024,
        11,
        19
      ]
    ]
  },
  "URL": "https://www.anthropic.com/research/statistical-approach-to-model-evals",
  "publisher": "Anthropic",
  "source": "vendor/anthropic-sitemap/research/statistical-approach-to-model-evals.md"
}
