Longterm Wiki

Safety-Capability Tradeoff Model

safety-capability-tradeoffanalysisPath: /knowledge-base/models/safety-capability-tradeoff/
E262Entity ID (EID)
← Back to page2 backlinksQuality: 64Updated: 2026-03-13
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
  "id": "safety-capability-tradeoff",
  "numericId": null,
  "path": "/knowledge-base/models/safety-capability-tradeoff/",
  "filePath": "knowledge-base/models/safety-capability-tradeoff.mdx",
  "title": "Safety-Capability Tradeoff Model",
  "quality": 64,
  "readerImportance": 85.5,
  "researchImportance": 87.5,
  "tacticalValue": 68,
  "contentFormat": "article",
  "tractability": null,
  "neglectedness": null,
  "uncertainty": null,
  "causalLevel": null,
  "lastUpdated": "2026-03-13",
  "dateCreated": "2026-02-15",
  "llmSummary": "Analyzes when AI safety measures conflict with capabilities, finding most interventions impose 5-15% capability cost but RLHF actually improves usability +10-30%. Under strong racing dynamics (60-75% probability), safety investment creates competitive disadvantage; coordination or regulation required to prevent race-to-bottom equilibrium.",
  "description": "This model analyzes when safety measures conflict with capabilities. It finds most safety interventions impose 5-15% capability cost, with some achieving safety gains at lower cost.",
  "ratings": {
    "focus": 8.5,
    "novelty": 5,
    "rigor": 6.5,
    "completeness": 7.5,
    "concreteness": 7,
    "actionability": 6.5
  },
  "category": "models",
  "subcategory": "safety-models",
  "clusters": [
    "ai-safety",
    "governance"
  ],
  "metrics": {
    "wordCount": 5817,
    "tableCount": 17,
    "diagramCount": 2,
    "internalLinks": 5,
    "externalLinks": 21,
    "footnoteCount": 0,
    "bulletRatio": 0.07,
    "sectionCount": 53,
    "hasOverview": true,
    "structuralScore": 15
  },
  "suggestedQuality": 100,
  "updateFrequency": 90,
  "evergreen": true,
  "wordCount": 5817,
  "unconvertedLinks": [
    {
      "text": "Concrete Problems in AI Safety",
      "url": "https://arxiv.org/abs/1606.06565",
      "resourceId": "cd3035dbef6c7b5b",
      "resourceTitle": "Concrete Problems in AI Safety"
    },
    {
      "text": "OpenAI Safety Approach",
      "url": "https://openai.com/safety/how-we-think-about-safety-alignment/",
      "resourceId": "155d4f497d76c742",
      "resourceTitle": "OpenAI - How We Think About Safety Alignment"
    },
    {
      "text": "AI Safety Textbook: AI Race Dynamics",
      "url": "https://www.aisafetybook.com/textbook/ai-race",
      "resourceId": "28cf9e30851a7bc2",
      "resourceTitle": "Frontier AI Safety Commitments"
    }
  ],
  "unconvertedLinkCount": 3,
  "convertedLinkCount": 0,
  "backlinkCount": 2,
  "hallucinationRisk": {
    "level": "medium",
    "score": 55,
    "factors": [
      "no-citations"
    ]
  },
  "entityType": "analysis",
  "redundancy": {
    "maxSimilarity": 21,
    "similarPages": [
      {
        "id": "scalable-oversight",
        "title": "Scalable Oversight",
        "path": "/knowledge-base/responses/scalable-oversight/",
        "similarity": 21
      },
      {
        "id": "structural-risks",
        "title": "AI Structural Risk Cruxes",
        "path": "/knowledge-base/cruxes/structural-risks/",
        "similarity": 19
      },
      {
        "id": "authoritarian-tools-diffusion",
        "title": "Authoritarian Tools Diffusion Model",
        "path": "/knowledge-base/models/authoritarian-tools-diffusion/",
        "similarity": 19
      },
      {
        "id": "reward-hacking-taxonomy",
        "title": "Reward Hacking Taxonomy and Severity Model",
        "path": "/knowledge-base/models/reward-hacking-taxonomy/",
        "similarity": 19
      },
      {
        "id": "whistleblower-dynamics",
        "title": "Whistleblower Dynamics Model",
        "path": "/knowledge-base/models/whistleblower-dynamics/",
        "similarity": 19
      }
    ]
  },
  "changeHistory": [
    {
      "date": "2026-03-12",
      "branch": "auto-update/2026-03-12",
      "title": "Auto-improve (standard): Safety-Capability Tradeoff Model",
      "summary": "Improved \"Safety-Capability Tradeoff Model\" via standard pipeline (1435.3s). Quality score: 71. Issues resolved: Frontmatter: 'lastEdited' date '2026-03-12' is a future date; Bare URL in footnote [^rc-1c25]: the footnote text is trunca; EntityLink in Mermaid chart: '<EntityLink id=\"E239\" name=\"ra.",
      "duration": "1435.3s",
      "cost": "$5-8"
    }
  ],
  "coverage": {
    "passing": 6,
    "total": 13,
    "targets": {
      "tables": 23,
      "diagrams": 2,
      "internalLinks": 47,
      "externalLinks": 29,
      "footnotes": 17,
      "references": 17
    },
    "actuals": {
      "tables": 17,
      "diagrams": 2,
      "internalLinks": 5,
      "externalLinks": 21,
      "footnotes": 0,
      "references": 3,
      "quotesWithQuotes": 0,
      "quotesTotal": 0,
      "accuracyChecked": 0,
      "accuracyTotal": 0
    },
    "items": {
      "llmSummary": "green",
      "schedule": "green",
      "entity": "green",
      "editHistory": "green",
      "overview": "green",
      "tables": "amber",
      "diagrams": "green",
      "internalLinks": "amber",
      "externalLinks": "amber",
      "footnotes": "red",
      "references": "amber",
      "quotes": "red",
      "accuracy": "red"
    },
    "editHistoryCount": 1,
    "ratingsString": "N:5 R:6.5 A:6.5 C:7.5"
  },
  "readerRank": 45,
  "researchRank": 38,
  "recommendedScore": 192.58
}
External Links

No external links

Backlinks (2)
idtitletyperelationship
ai-acceleration-tradeoffAI Acceleration Tradeoff Modelanalysisrelated
alignment-robustness-trajectoryAlignment Robustness Trajectory Modelanalysisrelated
Longterm Wiki