Longterm Wiki

Cooperative IRL (CIRL)

cirlapproachPath: /knowledge-base/responses/cirl/
E586Entity ID (EID)
← Back to page3 backlinksQuality: 65Updated: 2026-03-13
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
  "id": "cirl",
  "numericId": null,
  "path": "/knowledge-base/responses/cirl/",
  "filePath": "knowledge-base/responses/cirl.mdx",
  "title": "Cooperative IRL (CIRL)",
  "quality": 65,
  "readerImportance": 25,
  "researchImportance": 8,
  "tacticalValue": null,
  "contentFormat": "article",
  "tractability": null,
  "neglectedness": null,
  "uncertainty": null,
  "causalLevel": null,
  "lastUpdated": "2026-03-13",
  "dateCreated": "2026-02-15",
  "llmSummary": "CIRL is a theoretical framework where AI systems maintain uncertainty about human preferences, which naturally incentivizes corrigibility and deference. Despite elegant theory with formal proofs, the approach faces a substantial theory-practice gap with no production deployments and only \\$1-5M/year in academic investment, making it more influential for conceptual foundations than immediate intervention design.",
  "description": "Cooperative Inverse Reinforcement Learning (CIRL) is a theoretical framework where AI systems maintain uncertainty about human preferences and cooperatively learn them through interaction. While providing elegant theoretical foundations for corrigibility, CIRL remains largely academic with limited practical implementation.",
  "ratings": {
    "novelty": 3.5,
    "rigor": 5,
    "actionability": 3,
    "completeness": 6
  },
  "category": "responses",
  "subcategory": "alignment-theoretical",
  "clusters": [
    "ai-safety"
  ],
  "metrics": {
    "wordCount": 1946,
    "tableCount": 21,
    "diagramCount": 1,
    "internalLinks": 12,
    "externalLinks": 11,
    "footnoteCount": 0,
    "bulletRatio": 0.05,
    "sectionCount": 32,
    "hasOverview": true,
    "structuralScore": 15
  },
  "suggestedQuality": 100,
  "updateFrequency": 90,
  "evergreen": true,
  "wordCount": 1946,
  "unconvertedLinks": [
    {
      "text": "Hadfield-Menell et al., 2017",
      "url": "https://arxiv.org/abs/1611.08219",
      "resourceId": "026569778403629b",
      "resourceTitle": "Hadfield-Menell et al. (2017)"
    },
    {
      "text": "Cooperative Inverse Reinforcement Learning",
      "url": "https://arxiv.org/abs/1606.03137",
      "resourceId": "821f65afa4c681ca",
      "resourceTitle": "Hadfield-Menell et al. (2016)"
    },
    {
      "text": "The Off-Switch Game",
      "url": "https://arxiv.org/abs/1611.08219",
      "resourceId": "026569778403629b",
      "resourceTitle": "Hadfield-Menell et al. (2017)"
    },
    {
      "text": "Incorrigibility in the CIRL Framework",
      "url": "https://intelligence.org/2017/08/31/incorrigibility-in-cirl/",
      "resourceId": "3e250a28699df556",
      "resourceTitle": "CIRL corrigibility proved fragile"
    }
  ],
  "unconvertedLinkCount": 4,
  "convertedLinkCount": 0,
  "backlinkCount": 3,
  "hallucinationRisk": {
    "level": "medium",
    "score": 45,
    "factors": [
      "no-citations",
      "conceptual-content"
    ]
  },
  "entityType": "approach",
  "redundancy": {
    "maxSimilarity": 15,
    "similarPages": [
      {
        "id": "chai",
        "title": "CHAI (Center for Human-Compatible AI)",
        "path": "/knowledge-base/organizations/chai/",
        "similarity": 15
      },
      {
        "id": "cooperative-ai",
        "title": "Cooperative AI",
        "path": "/knowledge-base/responses/cooperative-ai/",
        "similarity": 14
      },
      {
        "id": "debate",
        "title": "AI Safety via Debate",
        "path": "/knowledge-base/responses/debate/",
        "similarity": 14
      },
      {
        "id": "instrumental-convergence-framework",
        "title": "Instrumental Convergence Framework",
        "path": "/knowledge-base/models/instrumental-convergence-framework/",
        "similarity": 13
      },
      {
        "id": "deceptive-alignment-decomposition",
        "title": "Deceptive Alignment Decomposition Model",
        "path": "/knowledge-base/models/deceptive-alignment-decomposition/",
        "similarity": 12
      }
    ]
  },
  "coverage": {
    "passing": 7,
    "total": 13,
    "targets": {
      "tables": 8,
      "diagrams": 1,
      "internalLinks": 16,
      "externalLinks": 10,
      "footnotes": 6,
      "references": 6
    },
    "actuals": {
      "tables": 21,
      "diagrams": 1,
      "internalLinks": 12,
      "externalLinks": 11,
      "footnotes": 0,
      "references": 3,
      "quotesWithQuotes": 0,
      "quotesTotal": 0,
      "accuracyChecked": 0,
      "accuracyTotal": 0
    },
    "items": {
      "llmSummary": "green",
      "schedule": "green",
      "entity": "green",
      "editHistory": "red",
      "overview": "green",
      "tables": "green",
      "diagrams": "green",
      "internalLinks": "amber",
      "externalLinks": "green",
      "footnotes": "red",
      "references": "amber",
      "quotes": "red",
      "accuracy": "red"
    },
    "ratingsString": "N:3.5 R:5 A:3 C:6"
  },
  "readerRank": 489,
  "researchRank": 568,
  "recommendedScore": 164.14
}
External Links

No external links

Backlinks (3)
idtitletyperelationship
autonomous-cooperative-agentsAutonomous Cooperative Agentsconcept
alignment-theoretical-overviewTheoretical Foundations (Overview)concept
cooperative-aiCooperative AIapproach
Longterm Wiki