Deceptive Alignment Decomposition Model
deceptive-alignment-decompositionanalysisPath: /knowledge-base/models/deceptive-alignment-decomposition/
E94Entity ID (EID)
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
"id": "deceptive-alignment-decomposition",
"numericId": null,
"path": "/knowledge-base/models/deceptive-alignment-decomposition/",
"filePath": "knowledge-base/models/deceptive-alignment-decomposition.mdx",
"title": "Deceptive Alignment Decomposition Model",
"quality": 62,
"readerImportance": 85,
"researchImportance": 76,
"tacticalValue": 60,
"contentFormat": "article",
"tractability": null,
"neglectedness": null,
"uncertainty": null,
"causalLevel": null,
"lastUpdated": "2026-03-13",
"dateCreated": "2026-02-15",
"llmSummary": "Decomposes deceptive alignment probability into five multiplicative conditions (mesa-optimization, misalignment, awareness, deception, survival) yielding 0.5-24% overall risk with 5% central estimate. Identifies that reducing any single factor by 50% cuts total risk by 50%, recommending focus on detection/survival parameter P(V) as most tractable intervention point with 2-4 year research timeline.",
"description": "A quantitative framework decomposing deceptive alignment probability into five multiplicative conditions with 0.5-24% overall risk estimates. The model identifies specific intervention points where reducing any single factor by 50% cuts total risk by 50%.",
"ratings": {
"focus": 8.5,
"novelty": 5,
"rigor": 6.5,
"completeness": 7.5,
"concreteness": 7,
"actionability": 6.5
},
"category": "models",
"subcategory": "risk-models",
"clusters": [
"ai-safety"
],
"metrics": {
"wordCount": 2124,
"tableCount": 18,
"diagramCount": 1,
"internalLinks": 41,
"externalLinks": 0,
"footnoteCount": 0,
"bulletRatio": 0.18,
"sectionCount": 32,
"hasOverview": true,
"structuralScore": 12
},
"suggestedQuality": 80,
"updateFrequency": 90,
"evergreen": true,
"wordCount": 2124,
"unconvertedLinks": [],
"unconvertedLinkCount": 0,
"convertedLinkCount": 19,
"backlinkCount": 6,
"hallucinationRisk": {
"level": "medium",
"score": 60,
"factors": [
"no-citations",
"few-external-sources"
]
},
"entityType": "analysis",
"redundancy": {
"maxSimilarity": 19,
"similarPages": [
{
"id": "mesa-optimization-analysis",
"title": "Mesa-Optimization Risk Analysis",
"path": "/knowledge-base/models/mesa-optimization-analysis/",
"similarity": 19
},
{
"id": "corrigibility-failure-pathways",
"title": "Corrigibility Failure Pathways",
"path": "/knowledge-base/models/corrigibility-failure-pathways/",
"similarity": 18
},
{
"id": "scheming-likelihood-model",
"title": "Scheming Likelihood Assessment",
"path": "/knowledge-base/models/scheming-likelihood-model/",
"similarity": 18
},
{
"id": "sleeper-agent-detection",
"title": "Sleeper Agent Detection",
"path": "/knowledge-base/responses/sleeper-agent-detection/",
"similarity": 17
},
{
"id": "deceptive-alignment",
"title": "Deceptive Alignment",
"path": "/knowledge-base/risks/deceptive-alignment/",
"similarity": 17
}
]
},
"changeHistory": [
{
"date": "2026-03-08",
"branch": "auto-update/2026-03-08",
"title": "Auto-improve (standard): Deceptive Alignment Decomposition Model",
"summary": "Improved \"Deceptive Alignment Decomposition Model\" via standard pipeline (1282.5s). Quality score: 81. Issues resolved: Unverified opaque citation IDs flagged in frontmatter todos ; Raw dollar signs used in multiple places without escaping: '; Comparison pattern '\\<2%' in the Expert Disagreement table (.",
"duration": "1282.5s",
"cost": "$5-8"
}
],
"coverage": {
"passing": 9,
"total": 13,
"targets": {
"tables": 8,
"diagrams": 1,
"internalLinks": 17,
"externalLinks": 11,
"footnotes": 6,
"references": 6
},
"actuals": {
"tables": 18,
"diagrams": 1,
"internalLinks": 41,
"externalLinks": 0,
"footnotes": 0,
"references": 11,
"quotesWithQuotes": 0,
"quotesTotal": 0,
"accuracyChecked": 0,
"accuracyTotal": 0
},
"items": {
"llmSummary": "green",
"schedule": "green",
"entity": "green",
"editHistory": "green",
"overview": "green",
"tables": "green",
"diagrams": "green",
"internalLinks": "green",
"externalLinks": "red",
"footnotes": "red",
"references": "green",
"quotes": "red",
"accuracy": "red"
},
"editHistoryCount": 1,
"ratingsString": "N:5 R:6.5 A:6.5 C:7.5"
},
"readerRank": 49,
"researchRank": 112,
"recommendedScore": 188.18
}External Links
{
"lesswrong": "https://www.lesswrong.com/tag/deceptive-alignment"
}Backlinks (6)
| id | title | type | relationship |
|---|---|---|---|
| carlsmith-six-premises | Carlsmith's Six-Premise Argument | analysis | related |
| alignment-robustness-trajectory | Alignment Robustness Trajectory Model | analysis | related |
| defense-in-depth-model | Defense in Depth Model | analysis | — |
| __index__/knowledge-base/models | Analytical Models | concept | — |
| intervention-effectiveness-matrix | Intervention Effectiveness Matrix | analysis | — |
| technical-pathways | Technical Pathway Decomposition | analysis | — |