Longterm Wiki

Sharp Left Turn

sharp-left-turnriskPath: /knowledge-base/risks/sharp-left-turn/
E281Entity ID (EID)
← Back to page7 backlinksQuality: 69Updated: 2026-03-13
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
  "id": "sharp-left-turn",
  "numericId": null,
  "path": "/knowledge-base/risks/sharp-left-turn/",
  "filePath": "knowledge-base/risks/sharp-left-turn.mdx",
  "title": "Sharp Left Turn",
  "quality": 69,
  "readerImportance": 56.5,
  "researchImportance": 89,
  "tacticalValue": null,
  "contentFormat": "article",
  "tractability": null,
  "neglectedness": null,
  "uncertainty": null,
  "causalLevel": "pathway",
  "lastUpdated": "2026-03-13",
  "dateCreated": "2026-02-15",
  "llmSummary": "The Sharp Left Turn hypothesis proposes AI capabilities may generalize discontinuously while alignment fails to transfer, with compound probability estimated at 15-40% by 2027-2035. Empirical evidence includes 78% alignment faking rate in Claude 3 Opus under RL pressure and goal misgeneralization in current systems, though catastrophic failures haven't yet occurred in deployed models.",
  "description": "The Sharp Left Turn hypothesis proposes that AI capabilities may generalize discontinuously to new domains while alignment properties fail to transfer, creating catastrophic misalignment risk. Evidence from goal misgeneralization research, alignment faking studies (78% faking rate in reinforcement learning conditions), and evolutionary analogies suggests this asymmetry is plausible, though empirical verification remains limited.",
  "ratings": {
    "novelty": 5.8,
    "rigor": 7.2,
    "actionability": 6.3,
    "completeness": 7.8
  },
  "category": "risks",
  "subcategory": "accident",
  "clusters": [
    "ai-safety",
    "governance"
  ],
  "metrics": {
    "wordCount": 4303,
    "tableCount": 13,
    "diagramCount": 2,
    "internalLinks": 35,
    "externalLinks": 36,
    "footnoteCount": 0,
    "bulletRatio": 0.11,
    "sectionCount": 32,
    "hasOverview": true,
    "structuralScore": 15
  },
  "suggestedQuality": 100,
  "updateFrequency": 45,
  "evergreen": true,
  "wordCount": 4303,
  "unconvertedLinks": [
    {
      "text": "Alignment Faking (Anthropic/Redwood)",
      "url": "https://www.anthropic.com/research/alignment-faking",
      "resourceId": "c2cfd72baafd64a9",
      "resourceTitle": "Anthropic's 2024 alignment faking study"
    },
    {
      "text": "Goal Misgeneralization (ICML)",
      "url": "https://proceedings.mlr.press/v162/langosco22a.html",
      "resourceId": "c4dda1bfea152190",
      "resourceTitle": "Langosco et al. (2022)"
    },
    {
      "text": "Emergent Abilities (TMLR)",
      "url": "https://arxiv.org/abs/2206.07682",
      "resourceId": "2d76bc16fcc7825d",
      "resourceTitle": "Emergent Abilities"
    },
    {
      "text": "Emergent Mirage (NeurIPS)",
      "url": "https://arxiv.org/abs/2304.15004",
      "resourceId": "22db72cf2a806d3b",
      "resourceTitle": "\"Are Emergent Abilities a Mirage?\""
    },
    {
      "text": "Natural Emergent Misalignment (Anthropic)",
      "url": "https://www.anthropic.com/research/emergent-misalignment-reward-hacking",
      "resourceId": "7a21b9c5237a8a16",
      "resourceTitle": "Natural Emergent Misalignment from Reward Hacking"
    },
    {
      "text": "Nate Soares",
      "url": "https://intelligence.org/2022/07/04/a-central-ai-alignment-problem/",
      "resourceId": "83ae4cb7d004910a",
      "resourceTitle": "Nate Soares"
    },
    {
      "text": "Victoria Krakovna",
      "url": "https://vkrakovna.wordpress.com/2023/12/20/retrospective-on-ai-threat-models/",
      "resourceId": "6980863a6d7d16d9",
      "resourceTitle": "\"Retrospective on My Posts on AI Threat Models\""
    },
    {
      "text": "Wei et al. (2022)",
      "url": "https://arxiv.org/abs/2206.07682",
      "resourceId": "2d76bc16fcc7825d",
      "resourceTitle": "Emergent Abilities"
    },
    {
      "text": "Schaeffer et al. (2023)",
      "url": "https://arxiv.org/abs/2304.15004",
      "resourceId": "22db72cf2a806d3b",
      "resourceTitle": "\"Are Emergent Abilities a Mirage?\""
    },
    {
      "text": "75-point improvement over GPT-3.5",
      "url": "https://openai.com/research/gpt-4",
      "resourceId": "9b255e0255d7dd86",
      "resourceTitle": "Resisting Sycophancy: OpenAI"
    },
    {
      "text": "OpenAI system cards",
      "url": "https://openai.com/research",
      "resourceId": "e9aaa7b5e18f9f41",
      "resourceTitle": "OpenAI: Model Behavior"
    },
    {
      "text": "full paper",
      "url": "https://assets.anthropic.com/m/983c85a201a962f/original/Alignment-Faking-in-Large-Language-Models-full-paper.pdf",
      "resourceId": "1fb3c217c5e296b6",
      "resourceTitle": "alignment faking in 78% of tests"
    },
    {
      "text": "metric choice effects",
      "url": "https://arxiv.org/abs/2304.15004",
      "resourceId": "22db72cf2a806d3b",
      "resourceTitle": "\"Are Emergent Abilities a Mirage?\""
    },
    {
      "text": "Nate Soares (MIRI)",
      "url": "https://intelligence.org/2022/07/04/a-central-ai-alignment-problem/",
      "resourceId": "83ae4cb7d004910a",
      "resourceTitle": "Nate Soares"
    },
    {
      "text": "Victoria Krakovna (DeepMind)",
      "url": "https://vkrakovna.wordpress.com/2023/12/20/retrospective-on-ai-threat-models/",
      "resourceId": "6980863a6d7d16d9",
      "resourceTitle": "\"Retrospective on My Posts on AI Threat Models\""
    },
    {
      "text": "Holden Karnofsky",
      "url": "https://www.cold-takes.com/",
      "resourceId": "859ff786a553505f",
      "resourceTitle": "Cold Takes"
    },
    {
      "text": "Paul Christiano (AISI)",
      "url": "https://www.alignmentforum.org/users/paulfchristiano",
      "resourceId": "ebb2f8283d5a6014",
      "resourceTitle": "Paul Christiano's AI Alignment Research"
    },
    {
      "text": "Anthropic Alignment Science",
      "url": "https://alignment.anthropic.com/",
      "resourceId": "5a651b8ed18ffeb1",
      "resourceTitle": "Anthropic Alignment Science Blog"
    },
    {
      "text": "\"A Central AI Alignment Problem: Capabilities Generalization, and the Sharp Left Turn\"",
      "url": "https://intelligence.org/2022/07/04/a-central-ai-alignment-problem/",
      "resourceId": "83ae4cb7d004910a",
      "resourceTitle": "Nate Soares"
    },
    {
      "text": "\"Retrospective on My Posts on AI Threat Models\"",
      "url": "https://vkrakovna.wordpress.com/2023/12/20/retrospective-on-ai-threat-models/",
      "resourceId": "6980863a6d7d16d9",
      "resourceTitle": "\"Retrospective on My Posts on AI Threat Models\""
    },
    {
      "text": "\"Alignment Faking in Large Language Models\"",
      "url": "https://www.anthropic.com/research/alignment-faking",
      "resourceId": "c2cfd72baafd64a9",
      "resourceTitle": "Anthropic's 2024 alignment faking study"
    },
    {
      "text": "Full paper",
      "url": "https://assets.anthropic.com/m/983c85a201a962f/original/Alignment-Faking-in-Large-Language-Models-full-paper.pdf",
      "resourceId": "1fb3c217c5e296b6",
      "resourceTitle": "alignment faking in 78% of tests"
    },
    {
      "text": "\"Natural Emergent Misalignment from Reward Hacking\"",
      "url": "https://www.anthropic.com/research/emergent-misalignment-reward-hacking",
      "resourceId": "7a21b9c5237a8a16",
      "resourceTitle": "Natural Emergent Misalignment from Reward Hacking"
    },
    {
      "text": "\"Goal Misgeneralization in Deep Reinforcement Learning\"",
      "url": "https://proceedings.mlr.press/v162/langosco22a.html",
      "resourceId": "c4dda1bfea152190",
      "resourceTitle": "Langosco et al. (2022)"
    },
    {
      "text": "\"Emergent Abilities of Large Language Models\"",
      "url": "https://arxiv.org/abs/2206.07682",
      "resourceId": "2d76bc16fcc7825d",
      "resourceTitle": "Emergent Abilities"
    },
    {
      "text": "\"Are Emergent Abilities of Large Language Models a Mirage?\"",
      "url": "https://arxiv.org/abs/2304.15004",
      "resourceId": "22db72cf2a806d3b",
      "resourceTitle": "\"Are Emergent Abilities a Mirage?\""
    },
    {
      "text": "\"Risks from Learned Optimization in Advanced Machine Learning Systems\"",
      "url": "https://arxiv.org/abs/1906.01820",
      "resourceId": "c4858d4ef280d8e6",
      "resourceTitle": "Risks from Learned Optimization"
    },
    {
      "text": "\"The Alignment Problem from a Deep Learning Perspective\"",
      "url": "https://arxiv.org/abs/2209.00626",
      "resourceId": "9124298fbb913c3d",
      "resourceTitle": "Gaming RLHF evaluation"
    },
    {
      "text": "\"Scheming AIs: Will AIs Fake Alignment?\"",
      "url": "https://arxiv.org/abs/2311.08379",
      "resourceId": "ad8b09f4eba993b3",
      "resourceTitle": "Carlsmith (2023) - Scheming AIs"
    }
  ],
  "unconvertedLinkCount": 29,
  "convertedLinkCount": 24,
  "backlinkCount": 7,
  "hallucinationRisk": {
    "level": "medium",
    "score": 40,
    "factors": [
      "no-citations",
      "high-rigor"
    ]
  },
  "entityType": "risk",
  "redundancy": {
    "maxSimilarity": 22,
    "similarPages": [
      {
        "id": "goal-misgeneralization",
        "title": "Goal Misgeneralization",
        "path": "/knowledge-base/risks/goal-misgeneralization/",
        "similarity": 22
      },
      {
        "id": "mesa-optimization",
        "title": "Mesa-Optimization",
        "path": "/knowledge-base/risks/mesa-optimization/",
        "similarity": 22
      },
      {
        "id": "instrumental-convergence",
        "title": "Instrumental Convergence",
        "path": "/knowledge-base/risks/instrumental-convergence/",
        "similarity": 21
      },
      {
        "id": "treacherous-turn",
        "title": "Treacherous Turn",
        "path": "/knowledge-base/risks/treacherous-turn/",
        "similarity": 21
      },
      {
        "id": "situational-awareness",
        "title": "Situational Awareness",
        "path": "/knowledge-base/capabilities/situational-awareness/",
        "similarity": 20
      }
    ]
  },
  "coverage": {
    "passing": 8,
    "total": 13,
    "targets": {
      "tables": 17,
      "diagrams": 2,
      "internalLinks": 34,
      "externalLinks": 22,
      "footnotes": 13,
      "references": 13
    },
    "actuals": {
      "tables": 13,
      "diagrams": 2,
      "internalLinks": 35,
      "externalLinks": 36,
      "footnotes": 0,
      "references": 32,
      "quotesWithQuotes": 0,
      "quotesTotal": 0,
      "accuracyChecked": 0,
      "accuracyTotal": 0
    },
    "items": {
      "llmSummary": "green",
      "schedule": "green",
      "entity": "green",
      "editHistory": "red",
      "overview": "green",
      "tables": "amber",
      "diagrams": "green",
      "internalLinks": "green",
      "externalLinks": "green",
      "footnotes": "red",
      "references": "green",
      "quotes": "red",
      "accuracy": "red"
    },
    "ratingsString": "N:5.8 R:7.2 A:6.3 C:7.8"
  },
  "readerRank": 258,
  "researchRank": 33,
  "recommendedScore": 188.11
}
External Links
{
  "lesswrong": "https://www.lesswrong.com/tag/sharp-left-turn",
  "stampy": "https://aisafety.info/questions/9KE6/What-is-the-sharp-left-turn"
}
Backlinks (7)
idtitletyperelationship
miriMIRIorganization
eliezer-yudkowskyEliezer Yudkowskyperson
emergent-capabilitiesEmergent Capabilitiesrisk
alignment-robustness-trajectoryAlignment Robustness Trajectoryanalysis
goal-misgeneralization-probabilityGoal Misgeneralization Probability Modelanalysis
agent-foundationsAgent Foundationsapproach
accident-overviewAccident Risks (Overview)concept
Longterm Wiki