Cooperative IRL (CIRL)
cirlapproachPath: /knowledge-base/responses/cirl/
E586Entity ID (EID)
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
"id": "cirl",
"numericId": null,
"path": "/knowledge-base/responses/cirl/",
"filePath": "knowledge-base/responses/cirl.mdx",
"title": "Cooperative IRL (CIRL)",
"quality": 65,
"readerImportance": 25,
"researchImportance": 8,
"tacticalValue": null,
"contentFormat": "article",
"tractability": null,
"neglectedness": null,
"uncertainty": null,
"causalLevel": null,
"lastUpdated": "2026-03-13",
"dateCreated": "2026-02-15",
"llmSummary": "CIRL is a theoretical framework where AI systems maintain uncertainty about human preferences, which naturally incentivizes corrigibility and deference. Despite elegant theory with formal proofs, the approach faces a substantial theory-practice gap with no production deployments and only \\$1-5M/year in academic investment, making it more influential for conceptual foundations than immediate intervention design.",
"description": "Cooperative Inverse Reinforcement Learning (CIRL) is a theoretical framework where AI systems maintain uncertainty about human preferences and cooperatively learn them through interaction. While providing elegant theoretical foundations for corrigibility, CIRL remains largely academic with limited practical implementation.",
"ratings": {
"novelty": 3.5,
"rigor": 5,
"actionability": 3,
"completeness": 6
},
"category": "responses",
"subcategory": "alignment-theoretical",
"clusters": [
"ai-safety"
],
"metrics": {
"wordCount": 1946,
"tableCount": 21,
"diagramCount": 1,
"internalLinks": 12,
"externalLinks": 11,
"footnoteCount": 0,
"bulletRatio": 0.05,
"sectionCount": 32,
"hasOverview": true,
"structuralScore": 15
},
"suggestedQuality": 100,
"updateFrequency": 90,
"evergreen": true,
"wordCount": 1946,
"unconvertedLinks": [
{
"text": "Hadfield-Menell et al., 2017",
"url": "https://arxiv.org/abs/1611.08219",
"resourceId": "026569778403629b",
"resourceTitle": "Hadfield-Menell et al. (2017)"
},
{
"text": "Cooperative Inverse Reinforcement Learning",
"url": "https://arxiv.org/abs/1606.03137",
"resourceId": "821f65afa4c681ca",
"resourceTitle": "Hadfield-Menell et al. (2016)"
},
{
"text": "The Off-Switch Game",
"url": "https://arxiv.org/abs/1611.08219",
"resourceId": "026569778403629b",
"resourceTitle": "Hadfield-Menell et al. (2017)"
},
{
"text": "Incorrigibility in the CIRL Framework",
"url": "https://intelligence.org/2017/08/31/incorrigibility-in-cirl/",
"resourceId": "3e250a28699df556",
"resourceTitle": "CIRL corrigibility proved fragile"
}
],
"unconvertedLinkCount": 4,
"convertedLinkCount": 0,
"backlinkCount": 3,
"hallucinationRisk": {
"level": "medium",
"score": 45,
"factors": [
"no-citations",
"conceptual-content"
]
},
"entityType": "approach",
"redundancy": {
"maxSimilarity": 15,
"similarPages": [
{
"id": "chai",
"title": "CHAI (Center for Human-Compatible AI)",
"path": "/knowledge-base/organizations/chai/",
"similarity": 15
},
{
"id": "cooperative-ai",
"title": "Cooperative AI",
"path": "/knowledge-base/responses/cooperative-ai/",
"similarity": 14
},
{
"id": "debate",
"title": "AI Safety via Debate",
"path": "/knowledge-base/responses/debate/",
"similarity": 14
},
{
"id": "instrumental-convergence-framework",
"title": "Instrumental Convergence Framework",
"path": "/knowledge-base/models/instrumental-convergence-framework/",
"similarity": 13
},
{
"id": "deceptive-alignment-decomposition",
"title": "Deceptive Alignment Decomposition Model",
"path": "/knowledge-base/models/deceptive-alignment-decomposition/",
"similarity": 12
}
]
},
"coverage": {
"passing": 7,
"total": 13,
"targets": {
"tables": 8,
"diagrams": 1,
"internalLinks": 16,
"externalLinks": 10,
"footnotes": 6,
"references": 6
},
"actuals": {
"tables": 21,
"diagrams": 1,
"internalLinks": 12,
"externalLinks": 11,
"footnotes": 0,
"references": 3,
"quotesWithQuotes": 0,
"quotesTotal": 0,
"accuracyChecked": 0,
"accuracyTotal": 0
},
"items": {
"llmSummary": "green",
"schedule": "green",
"entity": "green",
"editHistory": "red",
"overview": "green",
"tables": "green",
"diagrams": "green",
"internalLinks": "amber",
"externalLinks": "green",
"footnotes": "red",
"references": "amber",
"quotes": "red",
"accuracy": "red"
},
"ratingsString": "N:3.5 R:5 A:3 C:6"
},
"readerRank": 489,
"researchRank": 568,
"recommendedScore": 164.14
}External Links
No external links
Backlinks (3)
| id | title | type | relationship |
|---|---|---|---|
| autonomous-cooperative-agents | Autonomous Cooperative Agents | concept | — |
| alignment-theoretical-overview | Theoretical Foundations (Overview) | concept | — |
| cooperative-ai | Cooperative AI | approach | — |