Longterm Wiki

Technical AI Safety Research

technical-researchcruxPath: /knowledge-base/responses/technical-research/
E297Entity ID (EID)
← Back to page14 backlinksQuality: 66Updated: 2026-03-13
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
  "id": "technical-research",
  "numericId": null,
  "path": "/knowledge-base/responses/technical-research/",
  "filePath": "knowledge-base/responses/technical-research.mdx",
  "title": "Technical AI Safety Research",
  "quality": 66,
  "readerImportance": 85.5,
  "researchImportance": 24,
  "tacticalValue": 65,
  "contentFormat": "article",
  "tractability": null,
  "neglectedness": null,
  "uncertainty": null,
  "causalLevel": null,
  "lastUpdated": "2026-03-13",
  "dateCreated": "2026-02-15",
  "llmSummary": "Technical AI safety research encompasses six major agendas (mechanistic interpretability, scalable oversight, AI control, evaluations, agent foundations, and robustness) with 500+ researchers and \\$110-130M annual funding. Key 2024-2025 findings include tens of millions of interpretable features identified in Claude 3, 5 of 6 frontier models showing scheming capabilities, and deliberative alignment reducing scheming by up to 30x, though experts estimate only 2-50% x-risk reduction depending on timeline assumptions and technical tractability.",
  "description": "Technical AI safety research aims to make AI systems reliably safe through scientific and engineering work. Current approaches include mechanistic interpretability (identifying millions of features in production models), scalable oversight (weak-to-strong generalization showing promise), AI control (protocols robust even against scheming models), and dangerous capability evaluations (five of six frontier models showed scheming capabilities in 2024 tests). Annual funding is estimated at \\$80-130M, with over 500 researchers across frontier labs and independent organizations.",
  "ratings": {
    "novelty": 4.2,
    "rigor": 6.8,
    "actionability": 7.1,
    "completeness": 7.5
  },
  "category": "responses",
  "subcategory": "alignment",
  "clusters": [
    "ai-safety",
    "governance"
  ],
  "metrics": {
    "wordCount": 3768,
    "tableCount": 11,
    "diagramCount": 1,
    "internalLinks": 67,
    "externalLinks": 29,
    "footnoteCount": 0,
    "bulletRatio": 0.39,
    "sectionCount": 45,
    "hasOverview": true,
    "structuralScore": 14
  },
  "suggestedQuality": 93,
  "updateFrequency": 21,
  "evergreen": true,
  "wordCount": 3768,
  "unconvertedLinks": [
    {
      "text": "UK AISI",
      "url": "https://www.aisi.gov.uk/",
      "resourceId": "fdf68a8f30f57dee",
      "resourceTitle": "AI Safety Institute"
    },
    {
      "text": "METR",
      "url": "https://metr.org/",
      "resourceId": "45370a5153534152",
      "resourceTitle": "metr.org"
    },
    {
      "text": "frontier AI safety policies",
      "url": "https://metr.org/blog/2025-03-26-common-elements-of-frontier-ai-safety-policies/",
      "resourceId": "a37628e3a1e97778",
      "resourceTitle": "footnote 17 problem"
    },
    {
      "text": "Anthropic's May 2024 \"Scaling Monosemanticity\"",
      "url": "https://transformer-circuits.pub/2024/scaling-monosemanticity/",
      "resourceId": "e724db341d6e0065"
    },
    {
      "text": "UK AI Security Institute",
      "url": "https://www.aisi.gov.uk/frontier-ai-trends-report",
      "resourceId": "7042c7f8de04ccb1",
      "resourceTitle": "AISI Frontier AI Trends"
    },
    {
      "text": "Anthropic Transformer Circuits",
      "url": "https://transformer-circuits.pub/2024/scaling-monosemanticity/",
      "resourceId": "e724db341d6e0065"
    },
    {
      "text": "OpenAI-Apollo Collaboration",
      "url": "https://openai.com/index/detecting-and-reducing-scheming-in-ai-models/",
      "resourceId": "b3f335edccfc5333",
      "resourceTitle": "OpenAI Preparedness Framework"
    },
    {
      "text": "UK AISI Frontier Trends Report",
      "url": "https://www.aisi.gov.uk/frontier-ai-trends-report",
      "resourceId": "7042c7f8de04ccb1",
      "resourceTitle": "AISI Frontier AI Trends"
    },
    {
      "text": "UK AISI Evaluations",
      "url": "https://www.aisi.gov.uk/blog/early-lessons-from-evaluating-frontier-ai-systems",
      "resourceId": "0fd3b1f5c81a37d8",
      "resourceTitle": "UK AI Security Institute's evaluations"
    },
    {
      "text": "OpenAI o1 System Card",
      "url": "https://openai.com/",
      "resourceId": "04d39e8bd5d50dd5",
      "resourceTitle": "OpenAI"
    },
    {
      "text": "UK AI Security Institute",
      "url": "https://www.aisi.gov.uk/",
      "resourceId": "fdf68a8f30f57dee",
      "resourceTitle": "AI Safety Institute"
    },
    {
      "text": "US AI Safety Institute",
      "url": "https://www.nist.gov/aisi",
      "resourceId": "84e0da6d5092e27d",
      "resourceTitle": "US AISI"
    },
    {
      "text": "METR",
      "url": "https://metr.org/",
      "resourceId": "45370a5153534152",
      "resourceTitle": "metr.org"
    },
    {
      "text": "Apollo Research",
      "url": "https://www.apolloresearch.ai/",
      "resourceId": "329d8c2e2532be3d",
      "resourceTitle": "Apollo Research"
    },
    {
      "text": "Redwood Research",
      "url": "https://www.redwoodresearch.org/",
      "resourceId": "42e7247cbc33fc4c",
      "resourceTitle": "Redwood Research: AI Control"
    },
    {
      "text": "UK Government (AISI)",
      "url": "https://www.aisi.gov.uk/",
      "resourceId": "fdf68a8f30f57dee",
      "resourceTitle": "AI Safety Institute"
    },
    {
      "text": "representing under 2% of estimated capabilities spending",
      "url": "https://www.lesswrong.com/posts/WGpFFJo2uFe5ssgEb/an-overview-of-the-ai-safety-funding-situation",
      "resourceId": "b1ab921f9cbae109",
      "resourceTitle": "An Overview of the AI Safety Funding Situation (LessWrong)"
    },
    {
      "text": "cost-prohibitive for full coverage",
      "url": "https://transformer-circuits.pub/2024/scaling-monosemanticity/",
      "resourceId": "e724db341d6e0065"
    },
    {
      "text": "Redwood's protocols",
      "url": "https://www.redwoodresearch.org/research/ai-control",
      "resourceId": "eb2318c5e3fc0f88",
      "resourceTitle": "Redwood Research, 2024"
    },
    {
      "text": "UK AISI tested 30+ models",
      "url": "https://www.aisi.gov.uk/",
      "resourceId": "fdf68a8f30f57dee",
      "resourceTitle": "AI Safety Institute"
    },
    {
      "text": "o1 process supervision deployed",
      "url": "https://openai.com/",
      "resourceId": "04d39e8bd5d50dd5",
      "resourceTitle": "OpenAI"
    },
    {
      "text": "doubling time ≈7 months for autonomy",
      "url": "https://metr.org/",
      "resourceId": "45370a5153534152",
      "resourceTitle": "metr.org"
    }
  ],
  "unconvertedLinkCount": 22,
  "convertedLinkCount": 52,
  "backlinkCount": 14,
  "hallucinationRisk": {
    "level": "medium",
    "score": 45,
    "factors": [
      "no-citations",
      "conceptual-content"
    ]
  },
  "entityType": "crux",
  "redundancy": {
    "maxSimilarity": 21,
    "similarPages": [
      {
        "id": "research-agendas",
        "title": "AI Alignment Research Agenda Comparison",
        "path": "/knowledge-base/responses/research-agendas/",
        "similarity": 21
      },
      {
        "id": "accident-risks",
        "title": "AI Accident Risk Cruxes",
        "path": "/knowledge-base/cruxes/accident-risks/",
        "similarity": 19
      },
      {
        "id": "intervention-effectiveness-matrix",
        "title": "Intervention Effectiveness Matrix",
        "path": "/knowledge-base/models/intervention-effectiveness-matrix/",
        "similarity": 19
      },
      {
        "id": "anthropic-core-views",
        "title": "Anthropic Core Views",
        "path": "/knowledge-base/responses/anthropic-core-views/",
        "similarity": 19
      },
      {
        "id": "interpretability",
        "title": "Mechanistic Interpretability",
        "path": "/knowledge-base/responses/interpretability/",
        "similarity": 19
      }
    ]
  },
  "coverage": {
    "passing": 7,
    "total": 13,
    "targets": {
      "tables": 15,
      "diagrams": 2,
      "internalLinks": 30,
      "externalLinks": 19,
      "footnotes": 11,
      "references": 11
    },
    "actuals": {
      "tables": 11,
      "diagrams": 1,
      "internalLinks": 67,
      "externalLinks": 29,
      "footnotes": 0,
      "references": 40,
      "quotesWithQuotes": 0,
      "quotesTotal": 0,
      "accuracyChecked": 0,
      "accuracyTotal": 0
    },
    "items": {
      "llmSummary": "green",
      "schedule": "green",
      "entity": "green",
      "editHistory": "red",
      "overview": "green",
      "tables": "amber",
      "diagrams": "amber",
      "internalLinks": "green",
      "externalLinks": "green",
      "footnotes": "red",
      "references": "green",
      "quotes": "red",
      "accuracy": "red"
    },
    "ratingsString": "N:4.2 R:6.8 A:7.1 C:7.5"
  },
  "readerRank": 47,
  "researchRank": 460,
  "recommendedScore": 196.61
}
External Links
{
  "eightyK": "https://80000hours.org/career-reviews/ai-safety-researcher/"
}
Backlinks (14)
idtitletyperelationship
agentic-aiAgentic AIcapability
80000-hours80,000 Hoursorganization
coefficient-givingCoefficient Givingorganization
fliFuture of Life Institute (FLI)organization
leading-the-futureLeading the Future super PACorganization
ltffLong-Term Future Fund (LTFF)organization
palisade-researchPalisade Researchorganization
dan-hendrycksDan Hendrycksperson
vidur-kapurVidur Kapurperson
vipul-naikVipul Naikperson
governance-policyAI Governance and Policycrux
intervention-portfolioAI Safety Intervention Portfolioapproach
public-educationAI Risk Public Educationapproach
training-programsAI Safety Training Programsapproach
Longterm Wiki