Longterm Wiki

METR

metrorganizationPath: /knowledge-base/organizations/metr/
E201Entity ID (EID)
← Back to page65 backlinksQuality: 66Updated: 2026-03-13
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
  "id": "metr",
  "numericId": null,
  "path": "/knowledge-base/organizations/metr/",
  "filePath": "knowledge-base/organizations/metr.mdx",
  "title": "METR",
  "quality": 66,
  "readerImportance": 83.5,
  "researchImportance": 50.5,
  "tacticalValue": 80,
  "contentFormat": "article",
  "tractability": null,
  "neglectedness": null,
  "uncertainty": null,
  "causalLevel": null,
  "lastUpdated": "2026-03-13",
  "dateCreated": "2026-02-15",
  "llmSummary": "METR conducts pre-deployment dangerous capability evaluations for frontier AI labs (OpenAI, Anthropic, Google DeepMind), testing autonomous replication, cybersecurity, CBRN, and manipulation capabilities using a 77-task suite. Their research shows task completion time horizons doubling every 7 months (accelerating to 4 months in 2024-2025), with GPT-5 achieving 2h17m 50%-time horizon; no models yet capable of autonomous replication but gap narrowing rapidly.",
  "description": "Model Evaluation and Threat Research conducts dangerous capability evaluations for frontier AI models, testing for autonomous replication, cybersecurity, CBRN, and manipulation capabilities. Funded by 17M USD from The Audacious Project, their 77-task evaluation suite and time horizon research (showing 7-month doubling, accelerating to 4 months) directly informs deployment decisions at OpenAI, Anthropic, and Google DeepMind.",
  "ratings": {
    "novelty": 4.5,
    "rigor": 6.5,
    "actionability": 7,
    "completeness": 7.5
  },
  "category": "organizations",
  "subcategory": "safety-orgs",
  "clusters": [
    "ai-safety",
    "community",
    "governance"
  ],
  "metrics": {
    "wordCount": 4387,
    "tableCount": 8,
    "diagramCount": 1,
    "internalLinks": 44,
    "externalLinks": 10,
    "footnoteCount": 0,
    "bulletRatio": 0.06,
    "sectionCount": 29,
    "hasOverview": true,
    "structuralScore": 15
  },
  "suggestedQuality": 100,
  "updateFrequency": 21,
  "evergreen": true,
  "wordCount": 4387,
  "unconvertedLinks": [
    {
      "text": "time horizons paper",
      "url": "https://arxiv.org/abs/2503.14499",
      "resourceId": "ddd93038c44fbd36",
      "resourceTitle": "arXiv:2503.14499"
    },
    {
      "text": "March 2025 research",
      "url": "https://metr.org/blog/2025-03-19-measuring-ai-ability-to-complete-long-tasks/",
      "resourceId": "271fc5f73a8304b2",
      "resourceTitle": "Measuring AI Ability to Complete Long Tasks - METR"
    },
    {
      "text": "December 2025 analysis",
      "url": "https://metr.org/blog/2025-12-09-common-elements-of-frontier-ai-safety-policies/",
      "resourceId": "c8782940b880d00f",
      "resourceTitle": "METR's analysis of 12 companies"
    },
    {
      "text": "UK AI Safety Institute Frontier AI Trends Report",
      "url": "https://www.aisi.gov.uk/frontier-ai-trends-report",
      "resourceId": "7042c7f8de04ccb1",
      "resourceTitle": "AISI Frontier AI Trends"
    }
  ],
  "unconvertedLinkCount": 4,
  "convertedLinkCount": 23,
  "backlinkCount": 65,
  "hallucinationRisk": {
    "level": "high",
    "score": 75,
    "factors": [
      "biographical-claims",
      "no-citations"
    ]
  },
  "entityType": "organization",
  "redundancy": {
    "maxSimilarity": 24,
    "similarPages": [
      {
        "id": "responsible-scaling-policies",
        "title": "Responsible Scaling Policies",
        "path": "/knowledge-base/responses/responsible-scaling-policies/",
        "similarity": 24
      },
      {
        "id": "ai-safety-institutes",
        "title": "AI Safety Institutes",
        "path": "/knowledge-base/responses/ai-safety-institutes/",
        "similarity": 23
      },
      {
        "id": "us-aisi",
        "title": "US AI Safety Institute",
        "path": "/knowledge-base/organizations/us-aisi/",
        "similarity": 22
      },
      {
        "id": "scalable-oversight",
        "title": "Scalable Oversight",
        "path": "/knowledge-base/responses/scalable-oversight/",
        "similarity": 21
      },
      {
        "id": "voluntary-commitments",
        "title": "Voluntary Industry Commitments",
        "path": "/knowledge-base/responses/voluntary-commitments/",
        "similarity": 21
      }
    ]
  },
  "changeHistory": [
    {
      "date": "2026-02-18",
      "branch": "claude/fix-issue-240-N5irU",
      "title": "Surface tacticalValue in /wiki table and score 53 pages",
      "summary": "Added `tacticalValue` to `ExploreItem` interface, `getExploreItems()` mappings, the `/wiki` explore table (new sortable \"Tact.\" column), and the card view sort dropdown. Scored 49 new pages with tactical values (4 were already scored), bringing total to 53.",
      "model": "sonnet-4",
      "duration": "~30min"
    }
  ],
  "coverage": {
    "passing": 7,
    "total": 13,
    "targets": {
      "tables": 18,
      "diagrams": 2,
      "internalLinks": 35,
      "externalLinks": 22,
      "footnotes": 13,
      "references": 13
    },
    "actuals": {
      "tables": 8,
      "diagrams": 1,
      "internalLinks": 44,
      "externalLinks": 10,
      "footnotes": 0,
      "references": 18,
      "quotesWithQuotes": 0,
      "quotesTotal": 0,
      "accuracyChecked": 0,
      "accuracyTotal": 0
    },
    "items": {
      "llmSummary": "green",
      "schedule": "green",
      "entity": "green",
      "editHistory": "green",
      "overview": "green",
      "tables": "amber",
      "diagrams": "amber",
      "internalLinks": "green",
      "externalLinks": "amber",
      "footnotes": "red",
      "references": "green",
      "quotes": "red",
      "accuracy": "red"
    },
    "editHistoryCount": 1,
    "ratingsString": "N:4.5 R:6.5 A:7 C:7.5"
  },
  "readerRank": 64,
  "researchRank": 277,
  "recommendedScore": 195.61
}
External Links
{
  "eaForum": "https://forum.effectivealtruism.org/topics/metr"
}
Backlinks (65)
idtitletyperelationship
capability-evaluationsCapability Evaluationsconcept
apollo-researchApollo Researchorganization
far-aiFAR AIorganization
uk-aisiUK AI Safety Instituteorganization
us-aisiUS AI Safety Instituteorganization
arc-evalsARC Evaluationsorganization
astralis-foundationAstralis Foundationorganizationleads-to
ajeya-cotraAjeya Cotraperson
beth-barnesBeth Barnesperson
scalable-eval-approachesScalable Eval Approachesapproach
dangerous-cap-evalsDangerous Capability Evaluationsapproach
capability-elicitationCapability Elicitationapproach
evaluationAI Evaluationapproach
red-teamingRed Teamingapproach
model-auditingThird-Party Model Auditingapproach
evals-governanceEvals-Based Deployment Gatespolicy
rspResponsible Scaling Policiespolicy
training-programsAI Safety Training Programsapproach
sandboxingSandboxing / Containmentapproach
tool-restrictionsTool-Use Restrictionsapproach
codingAutonomous Codingcapability
large-language-modelsLarge Language Modelsconcept
long-horizonLong-Horizon Autonomous Taskscapability
self-improvementSelf-Improvement and Recursive Enhancementcapability
situational-awarenessSituational Awarenesscapability
accident-risksAI Accident Risk Cruxescrux
solutionsAI Safety Solution Cruxescrux
ai-compute-scaling-metricsAI Compute Scaling Metricsanalysis
ai-timelinesAI Timelinesconcept
capability-alignment-raceCapability-Alignment Race Modelanalysis
intervention-effectiveness-matrixIntervention Effectiveness Matrixanalysis
model-organisms-of-misalignmentModel Organisms of Misalignmentanalysis
risk-activation-timelineRisk Activation Timeline Modelanalysis
risk-interaction-networkRisk Interaction Networkanalysis
safety-spending-at-scaleSafety Spending at Scaleanalysis
ai-futures-projectAI Futures Projectorganization
arcARC (Alignment Research Center)organization
caisCAIS (Center for AI Safety)organization
ea-funding-absorption-capacityEA Funding Absorption Capacityconcept
ea-globalEA Globalorganization
ftx-collapse-ea-funding-lessonsFTX Collapse: Lessons for EA Funding Resilienceconcept
funders-overviewLongtermist Funders (Overview)concept
government-orgs-overviewGovernment AI Safety Organizations (Overview)concept
__index__/knowledge-base/organizationsOrganizationsconcept
safety-orgs-overviewAI Safety Organizations (Overview)concept
sffSurvival and Flourishing Fund (SFF)organization
the-foundation-layerThe Foundation Layerorganization
dario-amodeiDario Amodeiperson
dustin-moskovitzDustin Moskovitz (AI Safety Funder)person
jaan-tallinnJaan Tallinnperson
alignmentAI Alignmentapproach
constitutional-aiConstitutional AIapproach
coordination-techAI Governance Coordination Technologiesapproach
corporateCorporate AI Safety Responsesapproach
eval-saturationEval Saturation & The Evals Gapapproach
evalsEvals & Red-teamingsafety-agenda
scheming-detectionScheming & Deception Detectionapproach
technical-researchTechnical AI Safety Researchcrux
deceptive-alignmentDeceptive Alignmentrisk
emergent-capabilitiesEmergent Capabilitiesrisk
enfeeblementAI-Induced Enfeeblementrisk
existential-riskExistential Risk from AIconcept
reward-hackingReward Hackingrisk
schemingSchemingrisk
trust-cascadeAI Trust Cascade Failurerisk
Longterm Wiki