Longterm Wiki

Sandboxing / Containment

sandboxingapproachPath: /knowledge-base/responses/sandboxing/
E485Entity ID (EID)
← Back to page4 backlinksQuality: 91Updated: 2026-03-13
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
  "id": "sandboxing",
  "numericId": null,
  "path": "/knowledge-base/responses/sandboxing/",
  "filePath": "knowledge-base/responses/sandboxing.mdx",
  "title": "Sandboxing / Containment",
  "quality": 91,
  "readerImportance": 57.5,
  "researchImportance": 28.5,
  "tacticalValue": null,
  "contentFormat": "article",
  "tractability": null,
  "neglectedness": null,
  "uncertainty": null,
  "causalLevel": null,
  "lastUpdated": "2026-03-13",
  "dateCreated": "2026-02-15",
  "llmSummary": "Comprehensive analysis of AI sandboxing as defense-in-depth, synthesizing METR's 2025 evaluations (GPT-5 time horizon ~2h, capabilities doubling every 7 months), AI boxing experiments (60-70% escape rates), and 2024-2025 container vulnerabilities (CVE-2024-0132, NVIDIAScape, IDEsaster). Quantifies isolation technology tradeoffs (gVisor ~10% I/O, Firecracker ~85%) with market context (\\$7.6B AI agent market, 85% enterprise adoption, \\$3.8B 2024 investment). Includes Anthropic's Sabotage Risk Report findings and CVE-Bench benchmark (13% AI exploit success rate).",
  "description": "Sandboxing limits AI system access to resources, networks, and capabilities as a defense-in-depth measure. METR's August 2025 evaluation found GPT-5's time horizon at ~2 hours—insufficient for autonomous replication. AI boxing experiments show 60-70% social engineering escape rates. Critical CVEs (CVE-2024-0132, CVE-2025-23266) demonstrate container escapes, while the IDEsaster disclosure revealed 30+ vulnerabilities in AI coding tools. Firecracker microVMs provide 85% native performance with hardware isolation; gVisor offers ~10% I/O performance but better compatibility.",
  "ratings": {
    "novelty": 5,
    "rigor": 8,
    "actionability": 8,
    "completeness": 8
  },
  "category": "responses",
  "subcategory": "alignment-deployment",
  "clusters": [
    "ai-safety",
    "cyber"
  ],
  "metrics": {
    "wordCount": 4264,
    "tableCount": 30,
    "diagramCount": 5,
    "internalLinks": 4,
    "externalLinks": 121,
    "footnoteCount": 0,
    "bulletRatio": 0.09,
    "sectionCount": 51,
    "hasOverview": true,
    "structuralScore": 15
  },
  "suggestedQuality": 100,
  "updateFrequency": 21,
  "evergreen": true,
  "wordCount": 4264,
  "unconvertedLinks": [
    {
      "text": "GPT-5 time horizon ≈2 hours",
      "url": "https://evaluations.metr.org/gpt-5-report/",
      "resourceId": "7457262d461e2206",
      "resourceTitle": "evaluations.metr.org"
    },
    {
      "text": "METR",
      "url": "https://metr.org/",
      "resourceId": "45370a5153534152",
      "resourceTitle": "metr.org"
    },
    {
      "text": "UK AISI",
      "url": "https://www.aisi.gov.uk/",
      "resourceId": "fdf68a8f30f57dee",
      "resourceTitle": "AI Safety Institute"
    },
    {
      "text": "METR's 2024 evaluations",
      "url": "https://metr.org/blog/2024-11-12-rogue-replication-threat-model/",
      "resourceId": "5b45342b68bf627e",
      "resourceTitle": "The Rogue Replication Threat Model"
    },
    {
      "text": "METR (Model Evaluation & Threat Research)",
      "url": "https://metr.org/",
      "resourceId": "45370a5153534152",
      "resourceTitle": "metr.org"
    },
    {
      "text": "METR Rogue Replication Threat Model (November 2024)",
      "url": "https://metr.org/blog/2024-11-12-rogue-replication-threat-model/",
      "resourceId": "5b45342b68bf627e",
      "resourceTitle": "The Rogue Replication Threat Model"
    },
    {
      "text": "GPT-5",
      "url": "https://evaluations.metr.org/gpt-5-report/",
      "resourceId": "7457262d461e2206",
      "resourceTitle": "evaluations.metr.org"
    },
    {
      "text": "Claude 3.5 Sonnet",
      "url": "https://metr.org/blog/2025-01-31-update-sonnet-o1-evals/",
      "resourceId": "89b92e6423256fc4",
      "resourceTitle": "METR's research"
    },
    {
      "text": "o1",
      "url": "https://metr.org/blog/2025-01-31-update-sonnet-o1-evals/",
      "resourceId": "89b92e6423256fc4",
      "resourceTitle": "METR's research"
    },
    {
      "text": "METR GPT-5 Report",
      "url": "https://evaluations.metr.org/gpt-5-report/",
      "resourceId": "7457262d461e2206",
      "resourceTitle": "evaluations.metr.org"
    },
    {
      "text": "METR Claude/o1 Update",
      "url": "https://metr.org/blog/2025-01-31-update-sonnet-o1-evals/",
      "resourceId": "89b92e6423256fc4",
      "resourceTitle": "METR's research"
    },
    {
      "text": "OpenAI GPT-4 System Card",
      "url": "https://cdn.openai.com/papers/gpt-4-system-card.pdf",
      "resourceId": "ebab6e05661645c5",
      "resourceTitle": "OpenAI"
    },
    {
      "text": "Anthropic's 2025 research recommendations",
      "url": "https://alignment.anthropic.com/2025/recommended-directions/",
      "resourceId": "7ae6b3be2d2043c1",
      "resourceTitle": "Anthropic: Recommended Directions for AI Safety Research"
    },
    {
      "text": "Activation-Based Monitoring",
      "url": "https://alignment.anthropic.com/2025/recommended-directions/",
      "resourceId": "7ae6b3be2d2043c1",
      "resourceTitle": "Anthropic: Recommended Directions for AI Safety Research"
    },
    {
      "text": "Anthropic AI Control Research",
      "url": "https://alignment.anthropic.com/2025/recommended-directions/",
      "resourceId": "7ae6b3be2d2043c1",
      "resourceTitle": "Anthropic: Recommended Directions for AI Safety Research"
    },
    {
      "text": "monitor failures might be systematically concentrated in episodes where the actor behaves maliciously",
      "url": "https://alignment.anthropic.com/2025/recommended-directions/",
      "resourceId": "7ae6b3be2d2043c1",
      "resourceTitle": "Anthropic: Recommended Directions for AI Safety Research"
    },
    {
      "text": "AI Safety Level 3 (ASL-3) protections",
      "url": "https://www.anthropic.com/news/activating-asl3-protections",
      "resourceId": "7512ddb574f82249"
    },
    {
      "text": "METR GPT-5 Report (Aug 2025)",
      "url": "https://evaluations.metr.org/gpt-5-report/",
      "resourceId": "7457262d461e2206",
      "resourceTitle": "evaluations.metr.org"
    },
    {
      "text": "METR Claude/o1 Update (Jan 2025)",
      "url": "https://metr.org/blog/2025-01-31-update-sonnet-o1-evals/",
      "resourceId": "89b92e6423256fc4",
      "resourceTitle": "METR's research"
    },
    {
      "text": "METR Rogue Replication Threat Model",
      "url": "https://metr.org/blog/2024-11-12-rogue-replication-threat-model/",
      "resourceId": "5b45342b68bf627e",
      "resourceTitle": "The Rogue Replication Threat Model"
    },
    {
      "text": "Anthropic Research Directions 2025",
      "url": "https://alignment.anthropic.com/2025/recommended-directions/",
      "resourceId": "7ae6b3be2d2043c1",
      "resourceTitle": "Anthropic: Recommended Directions for AI Safety Research"
    },
    {
      "text": "Anthropic ASL-3 Announcement",
      "url": "https://www.anthropic.com/news/activating-asl3-protections",
      "resourceId": "7512ddb574f82249"
    },
    {
      "text": "Palo Alto Unit 42",
      "url": "https://unit42.paloaltonetworks.com/agentic-ai-threats/",
      "resourceId": "d6f4face14780e85",
      "resourceTitle": "EchoLeak exploit (CVE-2025-32711)"
    },
    {
      "text": "Grand View Research",
      "url": "https://www.grandviewresearch.com/industry-analysis/ai-agents-market-report",
      "resourceId": "9a0353b668d6ab37",
      "resourceTitle": "\"AI Agents Market Report.\" Grand View Research."
    },
    {
      "text": "METR",
      "url": "https://metr.org/",
      "resourceId": "45370a5153534152",
      "resourceTitle": "metr.org"
    },
    {
      "text": "UK AI Safety Institute",
      "url": "https://www.aisi.gov.uk/",
      "resourceId": "fdf68a8f30f57dee",
      "resourceTitle": "AI Safety Institute"
    },
    {
      "text": "Anthropic",
      "url": "https://www.anthropic.com/",
      "resourceId": "afe2508ac4caf5ee",
      "resourceTitle": "Anthropic"
    },
    {
      "text": "OpenAI",
      "url": "https://openai.com/safety/",
      "resourceId": "838d7a59a02e11a7",
      "resourceTitle": "OpenAI Safety Updates"
    },
    {
      "text": "Palo Alto Unit 42",
      "url": "https://unit42.paloaltonetworks.com/agentic-ai-threats/",
      "resourceId": "d6f4face14780e85",
      "resourceTitle": "EchoLeak exploit (CVE-2025-32711)"
    },
    {
      "text": "Anthropic",
      "url": "https://alignment.anthropic.com/2025/recommended-directions/",
      "resourceId": "7ae6b3be2d2043c1",
      "resourceTitle": "Anthropic: Recommended Directions for AI Safety Research"
    }
  ],
  "unconvertedLinkCount": 30,
  "convertedLinkCount": 0,
  "backlinkCount": 4,
  "hallucinationRisk": {
    "level": "low",
    "score": 25,
    "factors": [
      "no-citations",
      "high-rigor",
      "conceptual-content",
      "high-quality"
    ]
  },
  "entityType": "approach",
  "redundancy": {
    "maxSimilarity": 19,
    "similarPages": [
      {
        "id": "tool-restrictions",
        "title": "Tool-Use Restrictions",
        "path": "/knowledge-base/responses/tool-restrictions/",
        "similarity": 19
      },
      {
        "id": "self-improvement",
        "title": "Self-Improvement and Recursive Enhancement",
        "path": "/knowledge-base/capabilities/self-improvement/",
        "similarity": 16
      },
      {
        "id": "dangerous-cap-evals",
        "title": "Dangerous Capability Evaluations",
        "path": "/knowledge-base/responses/dangerous-cap-evals/",
        "similarity": 16
      },
      {
        "id": "metr",
        "title": "METR",
        "path": "/knowledge-base/organizations/metr/",
        "similarity": 15
      },
      {
        "id": "sleeper-agent-detection",
        "title": "Sleeper Agent Detection",
        "path": "/knowledge-base/responses/sleeper-agent-detection/",
        "similarity": 15
      }
    ]
  },
  "coverage": {
    "passing": 7,
    "total": 13,
    "targets": {
      "tables": 17,
      "diagrams": 2,
      "internalLinks": 34,
      "externalLinks": 21,
      "footnotes": 13,
      "references": 13
    },
    "actuals": {
      "tables": 30,
      "diagrams": 5,
      "internalLinks": 4,
      "externalLinks": 121,
      "footnotes": 0,
      "references": 12,
      "quotesWithQuotes": 0,
      "quotesTotal": 0,
      "accuracyChecked": 0,
      "accuracyTotal": 0
    },
    "items": {
      "llmSummary": "green",
      "schedule": "green",
      "entity": "green",
      "editHistory": "red",
      "overview": "green",
      "tables": "green",
      "diagrams": "green",
      "internalLinks": "amber",
      "externalLinks": "green",
      "footnotes": "red",
      "references": "amber",
      "quotes": "red",
      "accuracy": "red"
    },
    "ratingsString": "N:5 R:8 A:8 C:8"
  },
  "readerRank": 249,
  "researchRank": 430,
  "recommendedScore": 232.61
}
External Links
{
  "lesswrong": "https://www.lesswrong.com/tag/ai-boxing-containment",
  "wikipedia": "https://en.wikipedia.org/wiki/AI_capability_control",
  "grokipedia": "https://grokipedia.com/page/AI_capability_control"
}
Backlinks (4)
idtitletyperelationship
structured-accessStructured Access / API-Onlyapproach
tool-restrictionsTool-Use Restrictionsapproach
rogue-ai-scenariosRogue AI Scenariosrisk
alignment-deployment-overviewDeployment & Control (Overview)concept
Longterm Wiki