{
  "id": "anthropic-sitemap:research:emergent-misalignment-reward-hacking",
  "type": "article",
  "title": "From shortcuts to sabotage: natural emergent misalignment from reward hacking",
  "abstract": "_In the latest research from Anthropic’s alignment team, we show for the first time that realistic AI training processes can accidentally produce misaligned models1._",
  "issued": {
    "date-parts": [
      [
        2025,
        11,
        21
      ]
    ]
  },
  "URL": "https://www.anthropic.com/research/emergent-misalignment-reward-hacking",
  "publisher": "Anthropic",
  "source": "vendor/anthropic-sitemap/research/emergent-misalignment-reward-hacking.md"
}