METR
metrorganizationPath: /knowledge-base/organizations/metr/
E201Entity ID (EID)
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
"id": "metr",
"numericId": null,
"path": "/knowledge-base/organizations/metr/",
"filePath": "knowledge-base/organizations/metr.mdx",
"title": "METR",
"quality": 66,
"readerImportance": 83.5,
"researchImportance": 50.5,
"tacticalValue": 80,
"contentFormat": "article",
"tractability": null,
"neglectedness": null,
"uncertainty": null,
"causalLevel": null,
"lastUpdated": "2026-03-13",
"dateCreated": "2026-02-15",
"llmSummary": "METR conducts pre-deployment dangerous capability evaluations for frontier AI labs (OpenAI, Anthropic, Google DeepMind), testing autonomous replication, cybersecurity, CBRN, and manipulation capabilities using a 77-task suite. Their research shows task completion time horizons doubling every 7 months (accelerating to 4 months in 2024-2025), with GPT-5 achieving 2h17m 50%-time horizon; no models yet capable of autonomous replication but gap narrowing rapidly.",
"description": "Model Evaluation and Threat Research conducts dangerous capability evaluations for frontier AI models, testing for autonomous replication, cybersecurity, CBRN, and manipulation capabilities. Funded by 17M USD from The Audacious Project, their 77-task evaluation suite and time horizon research (showing 7-month doubling, accelerating to 4 months) directly informs deployment decisions at OpenAI, Anthropic, and Google DeepMind.",
"ratings": {
"novelty": 4.5,
"rigor": 6.5,
"actionability": 7,
"completeness": 7.5
},
"category": "organizations",
"subcategory": "safety-orgs",
"clusters": [
"ai-safety",
"community",
"governance"
],
"metrics": {
"wordCount": 4387,
"tableCount": 8,
"diagramCount": 1,
"internalLinks": 44,
"externalLinks": 10,
"footnoteCount": 0,
"bulletRatio": 0.06,
"sectionCount": 29,
"hasOverview": true,
"structuralScore": 15
},
"suggestedQuality": 100,
"updateFrequency": 21,
"evergreen": true,
"wordCount": 4387,
"unconvertedLinks": [
{
"text": "time horizons paper",
"url": "https://arxiv.org/abs/2503.14499",
"resourceId": "ddd93038c44fbd36",
"resourceTitle": "arXiv:2503.14499"
},
{
"text": "March 2025 research",
"url": "https://metr.org/blog/2025-03-19-measuring-ai-ability-to-complete-long-tasks/",
"resourceId": "271fc5f73a8304b2",
"resourceTitle": "Measuring AI Ability to Complete Long Tasks - METR"
},
{
"text": "December 2025 analysis",
"url": "https://metr.org/blog/2025-12-09-common-elements-of-frontier-ai-safety-policies/",
"resourceId": "c8782940b880d00f",
"resourceTitle": "METR's analysis of 12 companies"
},
{
"text": "UK AI Safety Institute Frontier AI Trends Report",
"url": "https://www.aisi.gov.uk/frontier-ai-trends-report",
"resourceId": "7042c7f8de04ccb1",
"resourceTitle": "AISI Frontier AI Trends"
}
],
"unconvertedLinkCount": 4,
"convertedLinkCount": 23,
"backlinkCount": 65,
"hallucinationRisk": {
"level": "high",
"score": 75,
"factors": [
"biographical-claims",
"no-citations"
]
},
"entityType": "organization",
"redundancy": {
"maxSimilarity": 24,
"similarPages": [
{
"id": "responsible-scaling-policies",
"title": "Responsible Scaling Policies",
"path": "/knowledge-base/responses/responsible-scaling-policies/",
"similarity": 24
},
{
"id": "ai-safety-institutes",
"title": "AI Safety Institutes",
"path": "/knowledge-base/responses/ai-safety-institutes/",
"similarity": 23
},
{
"id": "us-aisi",
"title": "US AI Safety Institute",
"path": "/knowledge-base/organizations/us-aisi/",
"similarity": 22
},
{
"id": "scalable-oversight",
"title": "Scalable Oversight",
"path": "/knowledge-base/responses/scalable-oversight/",
"similarity": 21
},
{
"id": "voluntary-commitments",
"title": "Voluntary Industry Commitments",
"path": "/knowledge-base/responses/voluntary-commitments/",
"similarity": 21
}
]
},
"changeHistory": [
{
"date": "2026-02-18",
"branch": "claude/fix-issue-240-N5irU",
"title": "Surface tacticalValue in /wiki table and score 53 pages",
"summary": "Added `tacticalValue` to `ExploreItem` interface, `getExploreItems()` mappings, the `/wiki` explore table (new sortable \"Tact.\" column), and the card view sort dropdown. Scored 49 new pages with tactical values (4 were already scored), bringing total to 53.",
"model": "sonnet-4",
"duration": "~30min"
}
],
"coverage": {
"passing": 7,
"total": 13,
"targets": {
"tables": 18,
"diagrams": 2,
"internalLinks": 35,
"externalLinks": 22,
"footnotes": 13,
"references": 13
},
"actuals": {
"tables": 8,
"diagrams": 1,
"internalLinks": 44,
"externalLinks": 10,
"footnotes": 0,
"references": 18,
"quotesWithQuotes": 0,
"quotesTotal": 0,
"accuracyChecked": 0,
"accuracyTotal": 0
},
"items": {
"llmSummary": "green",
"schedule": "green",
"entity": "green",
"editHistory": "green",
"overview": "green",
"tables": "amber",
"diagrams": "amber",
"internalLinks": "green",
"externalLinks": "amber",
"footnotes": "red",
"references": "green",
"quotes": "red",
"accuracy": "red"
},
"editHistoryCount": 1,
"ratingsString": "N:4.5 R:6.5 A:7 C:7.5"
},
"readerRank": 64,
"researchRank": 277,
"recommendedScore": 195.61
}External Links
{
"eaForum": "https://forum.effectivealtruism.org/topics/metr"
}Backlinks (65)
| id | title | type | relationship |
|---|---|---|---|
| capability-evaluations | Capability Evaluations | concept | — |
| apollo-research | Apollo Research | organization | — |
| far-ai | FAR AI | organization | — |
| uk-aisi | UK AI Safety Institute | organization | — |
| us-aisi | US AI Safety Institute | organization | — |
| arc-evals | ARC Evaluations | organization | — |
| astralis-foundation | Astralis Foundation | organization | leads-to |
| ajeya-cotra | Ajeya Cotra | person | — |
| beth-barnes | Beth Barnes | person | — |
| scalable-eval-approaches | Scalable Eval Approaches | approach | — |
| dangerous-cap-evals | Dangerous Capability Evaluations | approach | — |
| capability-elicitation | Capability Elicitation | approach | — |
| evaluation | AI Evaluation | approach | — |
| red-teaming | Red Teaming | approach | — |
| model-auditing | Third-Party Model Auditing | approach | — |
| evals-governance | Evals-Based Deployment Gates | policy | — |
| rsp | Responsible Scaling Policies | policy | — |
| training-programs | AI Safety Training Programs | approach | — |
| sandboxing | Sandboxing / Containment | approach | — |
| tool-restrictions | Tool-Use Restrictions | approach | — |
| coding | Autonomous Coding | capability | — |
| large-language-models | Large Language Models | concept | — |
| long-horizon | Long-Horizon Autonomous Tasks | capability | — |
| self-improvement | Self-Improvement and Recursive Enhancement | capability | — |
| situational-awareness | Situational Awareness | capability | — |
| accident-risks | AI Accident Risk Cruxes | crux | — |
| solutions | AI Safety Solution Cruxes | crux | — |
| ai-compute-scaling-metrics | AI Compute Scaling Metrics | analysis | — |
| ai-timelines | AI Timelines | concept | — |
| capability-alignment-race | Capability-Alignment Race Model | analysis | — |
| intervention-effectiveness-matrix | Intervention Effectiveness Matrix | analysis | — |
| model-organisms-of-misalignment | Model Organisms of Misalignment | analysis | — |
| risk-activation-timeline | Risk Activation Timeline Model | analysis | — |
| risk-interaction-network | Risk Interaction Network | analysis | — |
| safety-spending-at-scale | Safety Spending at Scale | analysis | — |
| ai-futures-project | AI Futures Project | organization | — |
| arc | ARC (Alignment Research Center) | organization | — |
| cais | CAIS (Center for AI Safety) | organization | — |
| ea-funding-absorption-capacity | EA Funding Absorption Capacity | concept | — |
| ea-global | EA Global | organization | — |
| ftx-collapse-ea-funding-lessons | FTX Collapse: Lessons for EA Funding Resilience | concept | — |
| funders-overview | Longtermist Funders (Overview) | concept | — |
| government-orgs-overview | Government AI Safety Organizations (Overview) | concept | — |
| __index__/knowledge-base/organizations | Organizations | concept | — |
| safety-orgs-overview | AI Safety Organizations (Overview) | concept | — |
| sff | Survival and Flourishing Fund (SFF) | organization | — |
| the-foundation-layer | The Foundation Layer | organization | — |
| dario-amodei | Dario Amodei | person | — |
| dustin-moskovitz | Dustin Moskovitz (AI Safety Funder) | person | — |
| jaan-tallinn | Jaan Tallinn | person | — |
| alignment | AI Alignment | approach | — |
| constitutional-ai | Constitutional AI | approach | — |
| coordination-tech | AI Governance Coordination Technologies | approach | — |
| corporate | Corporate AI Safety Responses | approach | — |
| eval-saturation | Eval Saturation & The Evals Gap | approach | — |
| evals | Evals & Red-teaming | safety-agenda | — |
| scheming-detection | Scheming & Deception Detection | approach | — |
| technical-research | Technical AI Safety Research | crux | — |
| deceptive-alignment | Deceptive Alignment | risk | — |
| emergent-capabilities | Emergent Capabilities | risk | — |
| enfeeblement | AI-Induced Enfeeblement | risk | — |
| existential-risk | Existential Risk from AI | concept | — |
| reward-hacking | Reward Hacking | risk | — |
| scheming | Scheming | risk | — |
| trust-cascade | AI Trust Cascade Failure | risk | — |