Longterm Wiki

AI Accident Risk Cruxes

accident-riskscruxPath: /knowledge-base/cruxes/accident-risks/
E394Entity ID (EID)
← Back to page4 backlinksQuality: 67Updated: 2026-03-13
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
  "id": "accident-risks",
  "numericId": null,
  "path": "/knowledge-base/cruxes/accident-risks/",
  "filePath": "knowledge-base/cruxes/accident-risks.mdx",
  "title": "AI Accident Risk Cruxes",
  "quality": 67,
  "readerImportance": 93.5,
  "researchImportance": 95,
  "tacticalValue": 55,
  "contentFormat": "article",
  "tractability": null,
  "neglectedness": null,
  "uncertainty": null,
  "causalLevel": null,
  "lastUpdated": "2026-03-13",
  "dateCreated": "2026-02-15",
  "llmSummary": "Comprehensive survey of AI safety researcher disagreements on accident risks, quantifying probability ranges for mesa-optimization (15-55%), deceptive alignment (15-50%), and P(doom) (5-35% median across populations). Integrates 2024-2025 empirical breakthroughs including Anthropic's Sleeper Agents study (backdoors persist through safety training, >99% AUROC detection) and SAD benchmark showing rapid situational awareness advances (Claude Sonnet 4.5: 58% evaluation detection vs 22% for Opus 4.1).",
  "description": "Key uncertainties that determine views on AI accident risks and alignment difficulty, including fundamental questions about mesa-optimization, deceptive alignment, and alignment tractability. Based on extensive surveys of AI safety researchers 2019-2025, revealing probability ranges of 35-55% vs 15-25% for mesa-optimization likelihood and 30-50% vs 15-30% for deceptive alignment. 2024-2025 empirical breakthroughs include Anthropic's Sleeper Agents study showing backdoors persist through safety training, and detection probes achieving greater than 99% AUROC. Industry preparedness rated D on existential safety per 2025 AI Safety Index.",
  "ratings": {
    "novelty": 5.2,
    "rigor": 6.8,
    "actionability": 7.3,
    "completeness": 7.5
  },
  "category": "cruxes",
  "subcategory": null,
  "clusters": [
    "ai-safety",
    "governance"
  ],
  "metrics": {
    "wordCount": 4073,
    "tableCount": 26,
    "diagramCount": 1,
    "internalLinks": 127,
    "externalLinks": 50,
    "footnoteCount": 0,
    "bulletRatio": 0.1,
    "sectionCount": 44,
    "hasOverview": true,
    "structuralScore": 15
  },
  "suggestedQuality": 100,
  "updateFrequency": 45,
  "evergreen": true,
  "wordCount": 4073,
  "unconvertedLinks": [
    {
      "text": "2025 Expert Survey",
      "url": "https://arxiv.org/html/2502.14870v1",
      "resourceId": "4e7f0e37bace9678",
      "resourceTitle": "Roman Yampolskiy"
    },
    {
      "text": "AI Impacts 2023 survey",
      "url": "https://wiki.aiimpacts.org/ai_timelines/predictions_of_human-level_ai_timelines/ai_timeline_surveys/2023_expert_survey_on_progress_in_ai",
      "resourceId": "b4342da2ca0d2721",
      "resourceTitle": "AI Impacts 2023 survey"
    },
    {
      "text": "MIRI research",
      "url": "https://intelligence.org/learned-optimization/",
      "resourceId": "e573623625e9d5d2",
      "resourceTitle": "MIRI"
    },
    {
      "text": "Anthropic Sleeper Agents (2024)",
      "url": "https://arxiv.org/abs/2401.05566",
      "resourceId": "e5c0904211c7d0cc"
    },
    {
      "text": "OpenAI Superalignment",
      "url": "https://openai.com/index/superalignment-fast-grants/",
      "resourceId": "82eb0a4b47c95d2a",
      "resourceTitle": "OpenAI Superalignment Fast Grants"
    },
    {
      "text": "2025 AI Safety Index",
      "url": "https://futureoflife.org/ai-safety-index-summer-2025/",
      "resourceId": "df46edd6fa2078d1",
      "resourceTitle": "FLI AI Safety Index Summer 2025"
    },
    {
      "text": "2023 AI Impacts survey",
      "url": "https://wiki.aiimpacts.org/ai_timelines/predictions_of_human-level_ai_timelines/ai_timeline_surveys/2023_expert_survey_on_progress_in_ai",
      "resourceId": "b4342da2ca0d2721",
      "resourceTitle": "AI Impacts 2023 survey"
    },
    {
      "text": "AI Impacts Survey",
      "url": "https://wiki.aiimpacts.org/ai_timelines/predictions_of_human-level_ai_timelines/ai_timeline_surveys/2023_expert_survey_on_progress_in_ai",
      "resourceId": "b4342da2ca0d2721",
      "resourceTitle": "AI Impacts 2023 survey"
    },
    {
      "text": "EA Forum Survey",
      "url": "https://forum.effectivealtruism.org/posts/8CM9vZ2nnQsWJNsHx/existential-risk-from-ai-survey-results",
      "resourceId": "0dee84dcc4f4076f",
      "resourceTitle": "Existential Risk Survey Results (EA Forum)"
    },
    {
      "text": "EA Forum Survey",
      "url": "https://forum.effectivealtruism.org/posts/8CM9vZ2nnQsWJNsHx/existential-risk-from-ai-survey-results",
      "resourceId": "0dee84dcc4f4076f",
      "resourceTitle": "Existential Risk Survey Results (EA Forum)"
    },
    {
      "text": "EA Forum Survey",
      "url": "https://forum.effectivealtruism.org/posts/8CM9vZ2nnQsWJNsHx/existential-risk-from-ai-survey-results",
      "resourceId": "0dee84dcc4f4076f",
      "resourceTitle": "Existential Risk Survey Results (EA Forum)"
    },
    {
      "text": "arXiv Expert Survey",
      "url": "https://arxiv.org/html/2502.14870v1",
      "resourceId": "4e7f0e37bace9678",
      "resourceTitle": "Roman Yampolskiy"
    },
    {
      "text": "10-20%",
      "url": "https://en.wikipedia.org/wiki/P(doom",
      "resourceId": "ffb7dcedaa0a8711",
      "resourceTitle": "Survey of AI researchers"
    },
    {
      "text": "Greenblatt et al. (2024)",
      "url": "https://www.anthropic.com/research/alignment-faking",
      "resourceId": "c2cfd72baafd64a9",
      "resourceTitle": "Anthropic's 2024 alignment faking study"
    },
    {
      "text": "Anthropic's 2025 research recommendations",
      "url": "https://alignment.anthropic.com/2025/recommended-directions/",
      "resourceId": "7ae6b3be2d2043c1",
      "resourceTitle": "Anthropic: Recommended Directions for AI Safety Research"
    },
    {
      "text": "MATS program",
      "url": "https://www.matsprogram.org/",
      "resourceId": "ba3a8bd9c8404d7b",
      "resourceTitle": "MATS Research Program"
    },
    {
      "text": "AI Safety Index",
      "url": "https://futureoflife.org/ai-safety-index-summer-2025/",
      "resourceId": "df46edd6fa2078d1",
      "resourceTitle": "FLI AI Safety Index Summer 2025"
    },
    {
      "text": "Anthropic study",
      "url": "https://arxiv.org/abs/2401.05566",
      "resourceId": "e5c0904211c7d0cc"
    },
    {
      "text": "Simple probes",
      "url": "https://www.anthropic.com/research/probes-catch-sleeper-agents",
      "resourceId": "72c1254d07071bf7",
      "resourceTitle": "Anthropic's follow-up research on defection probes"
    },
    {
      "text": "Greenblatt et al. 2024",
      "url": "https://www.anthropic.com/research/alignment-faking",
      "resourceId": "c2cfd72baafd64a9",
      "resourceTitle": "Anthropic's 2024 alignment faking study"
    },
    {
      "text": "Process supervision",
      "url": "https://arxiv.org/abs/2305.20050",
      "resourceId": "eea50d24e41938ed",
      "resourceTitle": "OpenAI's influential \"Let's Verify Step by Step\" study"
    },
    {
      "text": "AI Safety Index (Summer 2025)",
      "url": "https://futureoflife.org/ai-safety-index-summer-2025/",
      "resourceId": "df46edd6fa2078d1",
      "resourceTitle": "FLI AI Safety Index Summer 2025"
    },
    {
      "text": "February 2025 arXiv study",
      "url": "https://arxiv.org/html/2502.14870v1",
      "resourceId": "4e7f0e37bace9678",
      "resourceTitle": "Roman Yampolskiy"
    },
    {
      "text": "Coefficient Giving",
      "url": "https://www.openphilanthropy.org/",
      "resourceId": "dd0cf0ff290cc68e",
      "resourceTitle": "Open Philanthropy grants database"
    },
    {
      "text": "MIRI",
      "url": "https://intelligence.org/",
      "resourceId": "86df45a5f8a9bf6d",
      "resourceTitle": "miri.org"
    },
    {
      "text": "US AISI",
      "url": "https://www.nist.gov/aisi",
      "resourceId": "84e0da6d5092e27d",
      "resourceTitle": "US AISI"
    },
    {
      "text": "Alignment Faking",
      "url": "https://www.anthropic.com/research/alignment-faking",
      "resourceId": "c2cfd72baafd64a9",
      "resourceTitle": "Anthropic's 2024 alignment faking study"
    },
    {
      "text": "Let's Verify Step by Step",
      "url": "https://arxiv.org/abs/2305.20050",
      "resourceId": "eea50d24e41938ed",
      "resourceTitle": "OpenAI's influential \"Let's Verify Step by Step\" study"
    }
  ],
  "unconvertedLinkCount": 28,
  "convertedLinkCount": 42,
  "backlinkCount": 4,
  "hallucinationRisk": {
    "level": "medium",
    "score": 45,
    "factors": [
      "no-citations",
      "conceptual-content"
    ]
  },
  "entityType": "crux",
  "redundancy": {
    "maxSimilarity": 21,
    "similarPages": [
      {
        "id": "mesa-optimization",
        "title": "Mesa-Optimization",
        "path": "/knowledge-base/risks/mesa-optimization/",
        "similarity": 21
      },
      {
        "id": "scheming",
        "title": "Scheming",
        "path": "/knowledge-base/risks/scheming/",
        "similarity": 21
      },
      {
        "id": "situational-awareness",
        "title": "Situational Awareness",
        "path": "/knowledge-base/capabilities/situational-awareness/",
        "similarity": 20
      },
      {
        "id": "case-for-xrisk",
        "title": "The Case FOR AI Existential Risk",
        "path": "/knowledge-base/debates/case-for-xrisk/",
        "similarity": 20
      },
      {
        "id": "interpretability",
        "title": "Mechanistic Interpretability",
        "path": "/knowledge-base/responses/interpretability/",
        "similarity": 20
      }
    ]
  },
  "changeHistory": [
    {
      "date": "2026-03-12",
      "branch": "auto-update/2026-03-12",
      "title": "Auto-improve (standard): AI Accident Risk Cruxes",
      "summary": "Improved \"AI Accident Risk Cruxes\" via standard pipeline (480.9s). Quality score: 74. Issues resolved: Frontmatter description field contains raw '<' character in ; Mermaid chart contains an EntityLink MDX component inside a ; Table in 'Sources and Resources > 2024-2025 Key Research' ha.",
      "duration": "480.9s",
      "cost": "$5-8"
    }
  ],
  "coverage": {
    "passing": 9,
    "total": 13,
    "targets": {
      "tables": 16,
      "diagrams": 2,
      "internalLinks": 33,
      "externalLinks": 20,
      "footnotes": 12,
      "references": 12
    },
    "actuals": {
      "tables": 26,
      "diagrams": 1,
      "internalLinks": 127,
      "externalLinks": 50,
      "footnotes": 0,
      "references": 46,
      "quotesWithQuotes": 0,
      "quotesTotal": 0,
      "accuracyChecked": 0,
      "accuracyTotal": 0
    },
    "items": {
      "llmSummary": "green",
      "schedule": "green",
      "entity": "green",
      "editHistory": "green",
      "overview": "green",
      "tables": "green",
      "diagrams": "amber",
      "internalLinks": "green",
      "externalLinks": "green",
      "footnotes": "red",
      "references": "green",
      "quotes": "red",
      "accuracy": "red"
    },
    "editHistoryCount": 1,
    "ratingsString": "N:5.2 R:6.8 A:7.3 C:7.5"
  },
  "readerRank": 5,
  "researchRank": 1,
  "recommendedScore": 202.61
}
External Links
{
  "lesswrong": "https://www.lesswrong.com/tag/ai-risk"
}
Backlinks (4)
idtitletyperelationship
__index__/knowledge-base/cruxesKey Cruxesconcept
__index__/knowledge-baseKnowledge Baseconcept
risk-activation-timelineRisk Activation Timeline Modelanalysis
rlhfRLHF / Constitutional AIcapability
Longterm Wiki