Cooperative IRL (CIRL)

cirlapproachPath: /knowledge-base/responses/cirl/
E586Entity ID (EID)
← Back to page3 backlinksQuality: 65Updated: 2026-01-28
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
  "id": "cirl",
  "wikiId": "E586",
  "path": "/knowledge-base/responses/cirl/",
  "filePath": "knowledge-base/responses/cirl.mdx",
  "title": "Cooperative IRL (CIRL)",
  "quality": 65,
  "readerImportance": 25,
  "researchImportance": 8,
  "tacticalValue": null,
  "contentFormat": "article",
  "causalLevel": null,
  "lastUpdated": "2026-01-28",
  "dateCreated": "2026-02-15",
  "summary": "CIRL is a theoretical framework where AI systems maintain uncertainty about human preferences, which naturally incentivizes corrigibility and deference. Despite elegant theory with formal proofs, the approach faces a substantial theory-practice gap with no production deployments and only \\$1-5M/year in academic investment, making it more influential for conceptual foundations than immediate intervention design.",
  "description": "Cooperative Inverse Reinforcement Learning (CIRL) is a theoretical framework where AI systems maintain uncertainty about human preferences and cooperatively learn them through interaction.",
  "ratings": {
    "novelty": 3.5,
    "rigor": 5,
    "completeness": 6,
    "actionability": 3
  },
  "category": "responses",
  "subcategory": "alignment-theoretical",
  "clusters": [
    "ai-safety"
  ],
  "metrics": {
    "wordCount": 1944,
    "tableCount": 21,
    "diagramCount": 1,
    "internalLinks": 14,
    "externalLinks": 11,
    "footnoteCount": 0,
    "bulletRatio": 0.05,
    "sectionCount": 32,
    "hasOverview": true,
    "structuralScore": 15
  },
  "suggestedQuality": 100,
  "updateFrequency": 90,
  "evergreen": true,
  "wordCount": 1944,
  "unconvertedLinks": [
    {
      "text": "Hadfield-Menell et al., 2017",
      "url": "https://arxiv.org/abs/1611.08219",
      "resourceId": "026569778403629b",
      "resourceTitle": "Hadfield-Menell et al. (2017)"
    },
    {
      "text": "Cooperative Inverse Reinforcement Learning",
      "url": "https://arxiv.org/abs/1606.03137",
      "resourceId": "821f65afa4c681ca",
      "resourceTitle": "Hadfield-Menell et al. (2016)"
    },
    {
      "text": "The Off-Switch Game",
      "url": "https://arxiv.org/abs/1611.08219",
      "resourceId": "026569778403629b",
      "resourceTitle": "Hadfield-Menell et al. (2017)"
    },
    {
      "text": "Incorrigibility in the CIRL Framework",
      "url": "https://intelligence.org/2017/08/31/incorrigibility-in-cirl/",
      "resourceId": "3e250a28699df556",
      "resourceTitle": "CIRL corrigibility proved fragile"
    }
  ],
  "unconvertedLinkCount": 4,
  "convertedLinkCount": 0,
  "backlinkCount": 3,
  "hallucinationRisk": {
    "level": "medium",
    "score": 45,
    "factors": [
      "no-citations",
      "conceptual-content"
    ]
  },
  "entityType": "approach",
  "redundancy": {
    "maxSimilarity": 15,
    "similarPages": [
      {
        "id": "chai",
        "title": "Center for Human-Compatible AI (CHAI)",
        "path": "/knowledge-base/organizations/chai/",
        "similarity": 15
      },
      {
        "id": "cooperative-ai",
        "title": "Cooperative AI",
        "path": "/knowledge-base/responses/cooperative-ai/",
        "similarity": 14
      },
      {
        "id": "debate",
        "title": "AI Safety via Debate",
        "path": "/knowledge-base/responses/debate/",
        "similarity": 14
      },
      {
        "id": "instrumental-convergence-framework",
        "title": "Instrumental Convergence Framework",
        "path": "/knowledge-base/models/instrumental-convergence-framework/",
        "similarity": 13
      },
      {
        "id": "deceptive-alignment-decomposition",
        "title": "Deceptive Alignment Decomposition Model",
        "path": "/knowledge-base/models/deceptive-alignment-decomposition/",
        "similarity": 12
      }
    ]
  },
  "coverage": {
    "passing": 7,
    "total": 13,
    "targets": {
      "tables": 8,
      "diagrams": 1,
      "internalLinks": 16,
      "externalLinks": 10,
      "footnotes": 6,
      "references": 6
    },
    "actuals": {
      "tables": 21,
      "diagrams": 1,
      "internalLinks": 14,
      "externalLinks": 11,
      "footnotes": 0,
      "references": 3,
      "quotesWithQuotes": 0,
      "quotesTotal": 0,
      "accuracyChecked": 0,
      "accuracyTotal": 0
    },
    "items": {
      "summary": "green",
      "schedule": "green",
      "entity": "green",
      "editHistory": "red",
      "overview": "green",
      "tables": "green",
      "diagrams": "green",
      "internalLinks": "amber",
      "externalLinks": "green",
      "footnotes": "red",
      "references": "amber",
      "quotes": "red",
      "accuracy": "red"
    },
    "ratingsString": "N:3.5 R:5 A:3 C:6"
  },
  "readerRank": 487,
  "researchRank": 560,
  "recommendedScore": 153.84
}
External Links
No external links
Backlinks (3)
id	title	type	relationship
autonomous-cooperative-agents	Autonomous Cooperative Agents	concept	—
alignment-theoretical-overview	Theoretical Foundations (Overview)	concept	—
cooperative-ai	Cooperative AI	approach	—