Longterm Wiki

Deceptive Alignment Decomposition Model

deceptive-alignment-decompositionanalysisPath: /knowledge-base/models/deceptive-alignment-decomposition/
E94Entity ID (EID)
← Back to page6 backlinksQuality: 62Updated: 2026-03-13
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
  "id": "deceptive-alignment-decomposition",
  "numericId": null,
  "path": "/knowledge-base/models/deceptive-alignment-decomposition/",
  "filePath": "knowledge-base/models/deceptive-alignment-decomposition.mdx",
  "title": "Deceptive Alignment Decomposition Model",
  "quality": 62,
  "readerImportance": 85,
  "researchImportance": 76,
  "tacticalValue": 60,
  "contentFormat": "article",
  "tractability": null,
  "neglectedness": null,
  "uncertainty": null,
  "causalLevel": null,
  "lastUpdated": "2026-03-13",
  "dateCreated": "2026-02-15",
  "llmSummary": "Decomposes deceptive alignment probability into five multiplicative conditions (mesa-optimization, misalignment, awareness, deception, survival) yielding 0.5-24% overall risk with 5% central estimate. Identifies that reducing any single factor by 50% cuts total risk by 50%, recommending focus on detection/survival parameter P(V) as most tractable intervention point with 2-4 year research timeline.",
  "description": "A quantitative framework decomposing deceptive alignment probability into five multiplicative conditions with 0.5-24% overall risk estimates. The model identifies specific intervention points where reducing any single factor by 50% cuts total risk by 50%.",
  "ratings": {
    "focus": 8.5,
    "novelty": 5,
    "rigor": 6.5,
    "completeness": 7.5,
    "concreteness": 7,
    "actionability": 6.5
  },
  "category": "models",
  "subcategory": "risk-models",
  "clusters": [
    "ai-safety"
  ],
  "metrics": {
    "wordCount": 2124,
    "tableCount": 18,
    "diagramCount": 1,
    "internalLinks": 41,
    "externalLinks": 0,
    "footnoteCount": 0,
    "bulletRatio": 0.18,
    "sectionCount": 32,
    "hasOverview": true,
    "structuralScore": 12
  },
  "suggestedQuality": 80,
  "updateFrequency": 90,
  "evergreen": true,
  "wordCount": 2124,
  "unconvertedLinks": [],
  "unconvertedLinkCount": 0,
  "convertedLinkCount": 19,
  "backlinkCount": 6,
  "hallucinationRisk": {
    "level": "medium",
    "score": 60,
    "factors": [
      "no-citations",
      "few-external-sources"
    ]
  },
  "entityType": "analysis",
  "redundancy": {
    "maxSimilarity": 19,
    "similarPages": [
      {
        "id": "mesa-optimization-analysis",
        "title": "Mesa-Optimization Risk Analysis",
        "path": "/knowledge-base/models/mesa-optimization-analysis/",
        "similarity": 19
      },
      {
        "id": "corrigibility-failure-pathways",
        "title": "Corrigibility Failure Pathways",
        "path": "/knowledge-base/models/corrigibility-failure-pathways/",
        "similarity": 18
      },
      {
        "id": "scheming-likelihood-model",
        "title": "Scheming Likelihood Assessment",
        "path": "/knowledge-base/models/scheming-likelihood-model/",
        "similarity": 18
      },
      {
        "id": "sleeper-agent-detection",
        "title": "Sleeper Agent Detection",
        "path": "/knowledge-base/responses/sleeper-agent-detection/",
        "similarity": 17
      },
      {
        "id": "deceptive-alignment",
        "title": "Deceptive Alignment",
        "path": "/knowledge-base/risks/deceptive-alignment/",
        "similarity": 17
      }
    ]
  },
  "changeHistory": [
    {
      "date": "2026-03-08",
      "branch": "auto-update/2026-03-08",
      "title": "Auto-improve (standard): Deceptive Alignment Decomposition Model",
      "summary": "Improved \"Deceptive Alignment Decomposition Model\" via standard pipeline (1282.5s). Quality score: 81. Issues resolved: Unverified opaque citation IDs flagged in frontmatter todos ; Raw dollar signs used in multiple places without escaping: '; Comparison pattern '\\<2%' in the Expert Disagreement table (.",
      "duration": "1282.5s",
      "cost": "$5-8"
    }
  ],
  "coverage": {
    "passing": 9,
    "total": 13,
    "targets": {
      "tables": 8,
      "diagrams": 1,
      "internalLinks": 17,
      "externalLinks": 11,
      "footnotes": 6,
      "references": 6
    },
    "actuals": {
      "tables": 18,
      "diagrams": 1,
      "internalLinks": 41,
      "externalLinks": 0,
      "footnotes": 0,
      "references": 11,
      "quotesWithQuotes": 0,
      "quotesTotal": 0,
      "accuracyChecked": 0,
      "accuracyTotal": 0
    },
    "items": {
      "llmSummary": "green",
      "schedule": "green",
      "entity": "green",
      "editHistory": "green",
      "overview": "green",
      "tables": "green",
      "diagrams": "green",
      "internalLinks": "green",
      "externalLinks": "red",
      "footnotes": "red",
      "references": "green",
      "quotes": "red",
      "accuracy": "red"
    },
    "editHistoryCount": 1,
    "ratingsString": "N:5 R:6.5 A:6.5 C:7.5"
  },
  "readerRank": 49,
  "researchRank": 112,
  "recommendedScore": 188.18
}
External Links
{
  "lesswrong": "https://www.lesswrong.com/tag/deceptive-alignment"
}
Backlinks (6)
idtitletyperelationship
carlsmith-six-premisesCarlsmith's Six-Premise Argumentanalysisrelated
alignment-robustness-trajectoryAlignment Robustness Trajectory Modelanalysisrelated
defense-in-depth-modelDefense in Depth Modelanalysis
__index__/knowledge-base/modelsAnalytical Modelsconcept
intervention-effectiveness-matrixIntervention Effectiveness Matrixanalysis
technical-pathwaysTechnical Pathway Decompositionanalysis
Longterm Wiki