Redwood Research
redwood-researchorganizationPath: /knowledge-base/organizations/redwood-research/
E557Entity ID (EID)
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
"id": "redwood-research",
"numericId": null,
"path": "/knowledge-base/organizations/redwood-research/",
"filePath": "knowledge-base/organizations/redwood-research.mdx",
"title": "Redwood Research",
"quality": 78,
"readerImportance": 31.5,
"researchImportance": 45,
"tacticalValue": null,
"contentFormat": "article",
"tractability": null,
"neglectedness": null,
"uncertainty": null,
"causalLevel": null,
"lastUpdated": "2026-03-13",
"dateCreated": "2026-02-15",
"llmSummary": null,
"description": "A nonprofit AI safety and security research organization founded in 2021, known for pioneering AI Control research, developing causal scrubbing interpretability methods, and conducting landmark alignment faking studies with Anthropic.",
"ratings": {
"novelty": 7,
"rigor": 7,
"actionability": 5,
"completeness": 7
},
"category": "organizations",
"subcategory": "safety-orgs",
"clusters": [
"ai-safety",
"community"
],
"metrics": {
"wordCount": 1497,
"tableCount": 8,
"diagramCount": 0,
"internalLinks": 22,
"externalLinks": 22,
"footnoteCount": 0,
"bulletRatio": 0,
"sectionCount": 24,
"hasOverview": true,
"structuralScore": 14
},
"suggestedQuality": 93,
"updateFrequency": 21,
"evergreen": true,
"wordCount": 1497,
"unconvertedLinks": [
{
"text": "Anthropic",
"url": "https://www.anthropic.com/research/alignment-faking",
"resourceId": "c2cfd72baafd64a9",
"resourceTitle": "Anthropic's 2024 alignment faking study"
},
{
"text": "arXiv",
"url": "https://arxiv.org/pdf/2312.06942",
"resourceId": "cc80ab28579c5794",
"resourceTitle": "Redwood Research's AI Control paper (December 2023)"
},
{
"text": "Anthropic",
"url": "https://www.anthropic.com/research/alignment-faking",
"resourceId": "c2cfd72baafd64a9",
"resourceTitle": "Anthropic's 2024 alignment faking study"
},
{
"text": "arXiv",
"url": "https://arxiv.org/abs/2405.19550",
"resourceId": "2bb15dbd965ba994",
"resourceTitle": "Stress-Testing Capability Elicitation With Password-Locked Models - arXiv"
},
{
"text": "arXiv",
"url": "https://arxiv.org/abs/2501.17315",
"resourceId": "56bae0415b228464",
"resourceTitle": "A Sketch of an AI Control Safety Case - arXiv"
},
{
"text": "arXiv",
"url": "https://arxiv.org/abs/2504.10374",
"resourceId": "1cbfd6cb09ce2b50",
"resourceTitle": "Ctrl-Z: Controlling AI Agents via Resampling - arXiv"
},
{
"text": "arXiv",
"url": "https://arxiv.org/abs/2512.15688",
"resourceId": "a4efd028ec6d9614",
"resourceTitle": "BashArena: A Control Setting for Highly Privileged AI Agents - arXiv"
},
{
"text": "OP Grants",
"url": "https://www.openphilanthropy.org/grants/redwood-research-general-support/",
"resourceId": "8c79e00bab007a63",
"resourceTitle": "over $9.4 million from Open Philanthropy"
},
{
"text": "OP Grants",
"url": "https://www.openphilanthropy.org/grants/",
"resourceId": "2fcdf851ed57384c",
"resourceTitle": "Open Philanthropy Grants Database"
}
],
"unconvertedLinkCount": 9,
"convertedLinkCount": 0,
"backlinkCount": 49,
"citationHealth": {
"total": 45,
"withQuotes": 30,
"verified": 30,
"accuracyChecked": 30,
"accurate": 18,
"inaccurate": 0,
"avgScore": 0.9502938191095988
},
"hallucinationRisk": {
"level": "medium",
"score": 60,
"factors": [
"biographical-claims",
"no-citations",
"high-rigor"
]
},
"entityType": "organization",
"redundancy": {
"maxSimilarity": 13,
"similarPages": [
{
"id": "ai-control",
"title": "AI Control",
"path": "/knowledge-base/responses/ai-control/",
"similarity": 13
},
{
"id": "apollo-research",
"title": "Apollo Research",
"path": "/knowledge-base/organizations/apollo-research/",
"similarity": 12
},
{
"id": "secure-ai-project",
"title": "Secure AI Project",
"path": "/knowledge-base/organizations/secure-ai-project/",
"similarity": 12
},
{
"id": "ssi",
"title": "Safe Superintelligence Inc (SSI)",
"path": "/knowledge-base/organizations/ssi/",
"similarity": 12
},
{
"id": "ilya-sutskever",
"title": "Ilya Sutskever",
"path": "/knowledge-base/people/ilya-sutskever/",
"similarity": 12
}
]
},
"coverage": {
"passing": 7,
"total": 13,
"targets": {
"tables": 6,
"diagrams": 1,
"internalLinks": 12,
"externalLinks": 7,
"footnotes": 4,
"references": 4
},
"actuals": {
"tables": 8,
"diagrams": 0,
"internalLinks": 22,
"externalLinks": 22,
"footnotes": 0,
"references": 9,
"quotesWithQuotes": 30,
"quotesTotal": 45,
"accuracyChecked": 30,
"accuracyTotal": 45
},
"items": {
"llmSummary": "red",
"schedule": "green",
"entity": "green",
"editHistory": "red",
"overview": "green",
"tables": "green",
"diagrams": "red",
"internalLinks": "green",
"externalLinks": "green",
"footnotes": "red",
"references": "green",
"quotes": "amber",
"accuracy": "amber"
},
"ratingsString": "N:7 R:7 A:5 C:7"
},
"readerRank": 438,
"researchRank": 306,
"recommendedScore": 193.28
}External Links
{
"lesswrong": "https://www.lesswrong.com/tag/redwood-research",
"eaForum": "https://forum.effectivealtruism.org/topics/redwood-research",
"grokipedia": "https://grokipedia.com/page/Redwood_Research"
}Backlinks (49)
| id | title | type | relationship |
|---|---|---|---|
| field-building | AI Safety Field Building and Community | crux | — |
| research-agendas | AI Alignment Research Agendas | crux | — |
| technical-research | Technical AI Safety Research | crux | — |
| conjecture | Conjecture | organization | — |
| ai-control | AI Control | safety-agenda | — |
| interpretability | Interpretability | safety-agenda | — |
| language-models | Large Language Models | capability | — |
| large-language-models | Large Language Models | concept | — |
| why-alignment-easy | Why Alignment Might Be Easy | argument | — |
| ea-epistemic-failures-in-the-ftx-era | EA Epistemic Failures in the FTX Era | concept | — |
| ea-longtermist-wins-losses | EA and Longtermist Wins and Losses | concept | — |
| ai-talent-market-dynamics | AI Talent Market Dynamics | analysis | — |
| corrigibility-failure-pathways | Corrigibility Failure Pathways | analysis | — |
| deceptive-alignment-decomposition | Deceptive Alignment Decomposition Model | analysis | — |
| instrumental-convergence-framework | Instrumental Convergence Framework | analysis | — |
| intervention-effectiveness-matrix | Intervention Effectiveness Matrix | analysis | — |
| model-organisms-of-misalignment | Model Organisms of Misalignment | analysis | — |
| planning-for-frontier-lab-scaling | Planning for Frontier Lab Scaling | analysis | — |
| power-seeking-conditions | Power-Seeking Emergence Conditions Model | analysis | — |
| safety-spending-at-scale | Safety Spending at Scale | analysis | — |
| scheming-likelihood-model | Scheming Likelihood Assessment | analysis | — |
| worldview-intervention-mapping | Worldview-Intervention Mapping | analysis | — |
| arc | ARC (Alignment Research Center) | organization | — |
| bridgewater-aia-labs | Bridgewater AIA Labs | organization | — |
| cais | CAIS (Center for AI Safety) | organization | — |
| controlai | ControlAI | organization | — |
| ea-funding-absorption-capacity | EA Funding Absorption Capacity | concept | — |
| ea-global | EA Global | organization | — |
| fli | Future of Life Institute (FLI) | organization | — |
| founders-fund | Founders Fund | organization | — |
| frontier-model-forum | Frontier Model Forum | organization | — |
| ftx-future-fund | FTX Future Fund | organization | — |
| funders-overview | Longtermist Funders (Overview) | concept | — |
| giving-pledge | Giving Pledge | organization | — |
| __index__/knowledge-base/organizations | Organizations | concept | — |
| mats | MATS ML Alignment Theory Scholars program | organization | — |
| palisade-research | Palisade Research | organization | — |
| safety-orgs-overview | AI Safety Organizations (Overview) | concept | — |
| seldon-lab | Seldon Lab | organization | — |
| sentinel | Sentinel (Catastrophic Risk Foresight) | organization | — |
| swift-centre | Swift Centre | organization | — |
| the-foundation-layer | The Foundation Layer | organization | — |
| ajeya-cotra | Ajeya Cotra | person | — |
| connor-leahy | Connor Leahy | person | — |
| holden-karnofsky | Holden Karnofsky | person | — |
| jan-leike | Jan Leike | person | — |
| nick-bostrom | Nick Bostrom | person | — |
| alignment | AI Alignment | approach | — |
| deceptive-alignment | Deceptive Alignment | risk | — |