Anthropic
anthropicorganizationPath: /knowledge-base/organizations/anthropic/
E22Entity ID (EID)
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
"id": "anthropic",
"numericId": null,
"path": "/knowledge-base/organizations/anthropic/",
"filePath": "knowledge-base/organizations/anthropic.mdx",
"title": "Anthropic",
"quality": 74,
"readerImportance": 52.3,
"researchImportance": 57.5,
"tacticalValue": 92,
"contentFormat": "article",
"tractability": null,
"neglectedness": null,
"uncertainty": null,
"causalLevel": null,
"lastUpdated": "2026-03-12",
"dateCreated": "2026-02-15",
"llmSummary": "Comprehensive reference page on Anthropic covering financials (\\$380B valuation, \\$14B ARR), safety research (Constitutional AI, mechanistic interpretability, model welfare), governance (LTBT structure), controversies (alignment faking at 12%, RSP rollback), and competitive positioning (42% enterprise coding share). Highly concrete with specific numbers throughout but primarily descriptive compilation rather than original analysis.",
"description": "An AI safety company founded by former OpenAI researchers that develops frontier AI models while pursuing safety research, including the Claude model family, Constitutional AI, and mechanistic interpretability.",
"ratings": {
"focus": 7.5,
"novelty": 4.2,
"rigor": 6.8,
"completeness": 7.8,
"concreteness": 8.1,
"actionability": 4.5,
"objectivity": 6.2
},
"category": "organizations",
"subcategory": "labs",
"clusters": [
"ai-safety",
"community",
"governance"
],
"metrics": {
"wordCount": 5082,
"tableCount": 4,
"diagramCount": 2,
"internalLinks": 79,
"externalLinks": 15,
"footnoteCount": 0,
"bulletRatio": 0,
"sectionCount": 41,
"hasOverview": true,
"structuralScore": 15
},
"suggestedQuality": 100,
"updateFrequency": 3,
"evergreen": true,
"wordCount": 5082,
"unconvertedLinks": [
{
"text": "Harvard Law",
"url": "https://corpgov.law.harvard.edu/2023/10/28/anthropic-long-term-benefit-trust/",
"resourceId": "357cf00ad44eea37"
},
{
"text": "Anthropic",
"url": "https://www.anthropic.com/news/claude-opus-4-5",
"resourceId": "57f01cae307e1cb1"
},
{
"text": "TechCrunch",
"url": "https://techcrunch.com/2025/07/31/enterprises-prefer-anthropics-ai-models-over-anyone-elses-including-openais/",
"resourceId": "3a07423e8bf204c2"
},
{
"text": "Anthropic",
"url": "https://transformer-circuits.pub/2024/scaling-monosemanticity/index.html",
"resourceId": "426fcdeae8e2b749",
"resourceTitle": "Anthropic's dictionary learning work"
},
{
"text": "MIT TR",
"url": "https://www.technologyreview.com/2026/01/12/1130003/mechanistic-interpretability-ai-research-models-2026-breakthrough-technologies/",
"resourceId": "3a4cf664bf7b27a8",
"resourceTitle": "Mechanistic interpretability: 10 Breakthrough Technologies 2026 | MIT Technology Review"
},
{
"text": "Bank Info Security",
"url": "https://www.bankinfosecurity.com/models-strategically-lie-finds-anthropic-study-a-27136",
"resourceId": "de18440757f72c95",
"resourceTitle": "Models Can Strategically Lie, Finds Anthropic Study"
},
{
"text": "Axios",
"url": "https://www.axios.com/2025/05/23/anthropic-ai-deception-risk",
"resourceId": "e76f688da38ef0fd",
"resourceTitle": "Axios: Anthropic AI Deception Risk (May 2025)"
},
{
"text": "Glassdoor",
"url": "https://www.glassdoor.com/Overview/Working-at-Anthropic-EI_IE8109027.11,20.htm",
"resourceId": "150ce5e988bc1e00",
"resourceTitle": "Glassdoor: Working at Anthropic"
},
{
"text": "Glassdoor",
"url": "https://www.glassdoor.com/Overview/Working-at-Anthropic-EI_IE8109027.11,20.htm",
"resourceId": "150ce5e988bc1e00",
"resourceTitle": "Glassdoor: Working at Anthropic"
},
{
"text": "Glassdoor",
"url": "https://www.glassdoor.com/Overview/Working-at-Anthropic-EI_IE8109027.11,20.htm",
"resourceId": "150ce5e988bc1e00",
"resourceTitle": "Glassdoor: Working at Anthropic"
},
{
"text": "Glassdoor",
"url": "https://www.glassdoor.com/Overview/Working-at-Anthropic-EI_IE8109027.11,20.htm",
"resourceId": "150ce5e988bc1e00",
"resourceTitle": "Glassdoor: Working at Anthropic"
},
{
"text": "Glassdoor",
"url": "https://www.glassdoor.com/Overview/Working-at-Anthropic-EI_IE8109027.11,20.htm",
"resourceId": "150ce5e988bc1e00",
"resourceTitle": "Glassdoor: Working at Anthropic"
}
],
"unconvertedLinkCount": 12,
"convertedLinkCount": 0,
"backlinkCount": 256,
"citationHealth": {
"total": 42,
"withQuotes": 30,
"verified": 29,
"accuracyChecked": 29,
"accurate": 11,
"inaccurate": 6,
"avgScore": 0.9623388965924581
},
"hallucinationRisk": {
"level": "high",
"score": 75,
"factors": [
"biographical-claims",
"no-citations"
]
},
"entityType": "organization",
"redundancy": {
"maxSimilarity": 20,
"similarPages": [
{
"id": "anthropic-investors",
"title": "Anthropic (Funder)",
"path": "/knowledge-base/organizations/anthropic-investors/",
"similarity": 20
},
{
"id": "deep-learning-era",
"title": "Deep Learning Revolution (2012-2020)",
"path": "/knowledge-base/history/deep-learning-era/",
"similarity": 19
},
{
"id": "agentic-ai",
"title": "Agentic AI",
"path": "/knowledge-base/capabilities/agentic-ai/",
"similarity": 18
},
{
"id": "solutions",
"title": "AI Safety Solution Cruxes",
"path": "/knowledge-base/cruxes/solutions/",
"similarity": 18
},
{
"id": "openai-foundation",
"title": "OpenAI Foundation",
"path": "/knowledge-base/organizations/openai-foundation/",
"similarity": 18
}
]
},
"changeHistory": [
{
"date": "2026-02-26",
"branch": "claude/claims-driven-improvements",
"title": "Auto-improve (standard): Anthropic",
"summary": "Improved \"Anthropic\" via standard pipeline (570.8s). Quality score: 74. Issues resolved: Section duplication: 'Competitive Positioning' subsection un; Section duplication: 'Safety Levels' subsection repeats cont; Section duplication: The 'Quick Financial Context' subsectio.",
"duration": "570.8s",
"cost": "$5-8"
},
{
"date": "2026-02-24",
"branch": "feat/stale-fact-detection-581-582",
"title": "Batch content fixes + stale-facts validator + 2 new validation rules",
"summary": "(fill in)",
"pr": 924,
"model": "claude-sonnet-4-6"
},
{
"date": "2026-02-19",
"branch": "claude/resolve-issue-203-d8IBd",
"title": "Calc pipeline iteration: fix range facts, index mismatch, prompt quality",
"summary": "Ran `crux facts calc` on anthropic-valuation and anthropic pages post-implementation, discovered and fixed three bugs: (1) range-valued facts ({min: N}) invisible to LLM and evaluator, (2) proposal-to-pattern index mismatch causing wrong validation expected values, (3) over-wide originalText proposals including JSX tags or prose. Applied validated Calc replacements to two pages (openai.39d6868e/$500B valuation now computes correctly).",
"model": "sonnet-4",
"duration": "~40min"
},
{
"date": "2026-02-18",
"branch": "claude/source-unsourced-facts-RecGw",
"title": "Source unsourced facts",
"summary": "Sourced 25 of 30 previously unsourced facts across all 4 fact files (anthropic, sam-altman, openai, jaan-tallinn). Created 21 new resource entries in news-media.yaml and ai-labs.yaml with proper SHA256-based IDs. Added 8 new publications (Bloomberg, The Information, Quartz, Benzinga, Britannica, World, Sherwood News). Fixed date accuracy issues (Worldcoin stats from 2024 to 2025-05, OpenAI revenue from Oct to Jun 2024) and improved notes. Source coverage improved from 29% to 88%.",
"model": "opus-4-6",
"duration": "~45min"
},
{
"date": "2026-02-18",
"branch": "claude/review-pr-216-P4Fcu",
"title": "Fix audit report findings from PR #216",
"summary": "Reviewed PR #216 (comprehensive wiki audit report) and implemented fixes for the major issues it identified: fixed 181 path-style EntityLink IDs across 33 files, converted 164 broken EntityLinks (referencing non-existent entities) to plain text across 38 files, fixed a temporal inconsistency in anthropic.mdx, and added missing description fields to 53 ai-transition-model pages."
},
{
"date": "2026-02-18",
"branch": "claude/highlight-stakeholder-table-VtY0t",
"title": "Create dedicated Anthropic stakeholder page",
"summary": "Created a new dedicated `anthropic-stakeholders` page with the most shareable ownership tables (all stakeholders with stakes, values, EA alignment), added a condensed stakeholder summary to the top of the main Anthropic page, and wrote 4 proposed GitHub issues for broader system changes (datasets infrastructure, importance metrics rethink, concrete data expansion, continuous maintenance).",
"model": "opus-4-6",
"duration": "~30min"
},
{
"date": "2026-02-18",
"branch": "claude/fact-hash-ids-UETLf",
"title": "Migrate fact IDs from human-readable to hash-based",
"summary": "Migrated all canonical fact IDs from human-readable slugs (e.g., `revenue-arr-2025`) to 8-char random hex hashes (e.g., `55d88868`), matching the pattern used by resources. Updated all YAML files, MDX references, build scripts, tests, LLM prompts, and documentation.",
"model": "opus-4-6",
"duration": "~45min"
},
{
"date": "2026-02-18",
"branch": "claude/audit-webpage-errors-11sSF",
"title": "Fix factual errors found in wiki audit",
"summary": "Systematically audited ~35+ high-risk wiki pages for factual errors and hallucinations using parallel background agents plus direct reading. Fixed 13 confirmed errors across 11 files."
}
],
"coverage": {
"passing": 8,
"total": 13,
"targets": {
"tables": 20,
"diagrams": 2,
"internalLinks": 41,
"externalLinks": 25,
"footnotes": 15,
"references": 15
},
"actuals": {
"tables": 4,
"diagrams": 2,
"internalLinks": 79,
"externalLinks": 15,
"footnotes": 0,
"references": 102,
"quotesWithQuotes": 30,
"quotesTotal": 42,
"accuracyChecked": 29,
"accuracyTotal": 42
},
"items": {
"llmSummary": "green",
"schedule": "green",
"entity": "green",
"editHistory": "green",
"overview": "green",
"tables": "amber",
"diagrams": "green",
"internalLinks": "green",
"externalLinks": "amber",
"footnotes": "red",
"references": "green",
"quotes": "amber",
"accuracy": "amber"
},
"editHistoryCount": 8,
"ratingsString": "N:4.2 R:6.8 A:4.5 C:7.8"
},
"readerRank": 282,
"researchRank": 239,
"recommendedScore": 195.88
}External Links
{
"wikipedia": "https://en.wikipedia.org/wiki/Anthropic",
"lesswrong": "https://www.lesswrong.com/tag/anthropic-org",
"wikidata": "https://www.wikidata.org/wiki/Q116758847",
"grokipedia": "https://grokipedia.com/page/Anthropic"
}Backlinks (256)
| id | title | type | relationship |
|---|---|---|---|
| claude | Claude | ai-model | created-by |
| claude-3-opus | Claude 3 Opus | ai-model | created-by |
| claude-3-sonnet | Claude 3 Sonnet | ai-model | created-by |
| claude-3-haiku | Claude 3 Haiku | ai-model | created-by |
| claude-3-5-sonnet | Claude 3.5 Sonnet | ai-model | created-by |
| claude-3-5-haiku | Claude 3.5 Haiku | ai-model | created-by |
| claude-3-7-sonnet | Claude 3.7 Sonnet | ai-model | created-by |
| claude-sonnet-4 | Claude Sonnet 4 | ai-model | created-by |
| claude-opus-4 | Claude Opus 4 | ai-model | created-by |
| claude-opus-4-1 | Claude Opus 4.1 | ai-model | created-by |
| claude-sonnet-4-5 | Claude Sonnet 4.5 | ai-model | created-by |
| claude-haiku-4-5 | Claude Haiku 4.5 | ai-model | created-by |
| claude-opus-4-5 | Claude Opus 4.5 | ai-model | created-by |
| claude-opus-4-6 | Claude Opus 4.6 | ai-model | created-by |
| claude-sonnet-4-6 | Claude Sonnet 4.6 | ai-model | created-by |
| agentic-ai | Agentic AI | capability | — |
| situational-awareness | Situational Awareness | capability | — |
| tool-use | Tool Use and Computer Use | capability | — |
| ea-shareholder-diversification-anthropic | EA Shareholder Diversification from Anthropic | concept | — |
| corporate-influence | Corporate Influence on AI Policy | crux | — |
| field-building | AI Safety Field Building and Community | crux | — |
| research-agendas | AI Alignment Research Agendas | crux | — |
| technical-research | Technical AI Safety Research | crux | — |
| ai-welfare | AI Welfare and Digital Minds | concept | — |
| accident-risks | AI Accident Risk Cruxes | crux | — |
| large-language-models | Large Language Models | concept | — |
| heavy-scaffolding | Heavy Scaffolding / Agentic Systems | concept | — |
| dense-transformers | Dense Transformers | concept | — |
| mainstream-era | Mainstream Era | historical | — |
| anthropic-government-standoff | Anthropic-Pentagon Standoff (2026) | event | — |
| openai-foundation-governance | OpenAI Foundation Governance Paradox | analysis | — |
| anthropic-valuation | Anthropic Valuation Analysis | analysis | — |
| anthropic-stakeholders | Anthropic Stakeholders | table | — |
| anthropic-investors | Anthropic (Funder) | analysis | — |
| long-term-benefit-trust | Long-Term Benefit Trust (Anthropic) | analysis | — |
| anthropic-ipo | Anthropic IPO | analysis | — |
| anthropic-impact | Anthropic Impact Assessment Model | analysis | — |
| capability-alignment-race | Capability-Alignment Race Model | analysis | — |
| short-timeline-policy-implications | Short AI Timeline Policy Implications | analysis | — |
| technical-pathways | AI Safety Technical Pathway Decomposition | analysis | — |
| feedback-loops | AI Risk Feedback Loop & Cascade Model | analysis | — |
| multi-actor-landscape | AI Safety Multi-Actor Strategic Landscape | analysis | — |
| model-organisms-of-misalignment | Model Organisms of Misalignment | analysis | — |
| ea-biosecurity-scope | Is EA Biosecurity Work Limited to Restricting LLM Biological Use? | analysis | — |
| deceptive-alignment-decomposition | Deceptive Alignment Decomposition Model | analysis | research |
| deepmind | Google DeepMind | organization | — |
| openai | OpenAI | organization | — |
| xai | xAI | organization | — |
| apollo-research | Apollo Research | organization | — |
| cais | CAIS | organization | — |
| conjecture | Conjecture | organization | — |
| metr | METR | organization | — |
| arc | ARC | organization | — |
| uk-aisi | UK AI Safety Institute | organization | — |
| us-aisi | US AI Safety Institute | organization | — |
| openai-foundation | OpenAI Foundation | organization | — |
| johns-hopkins-center-for-health-security | Johns Hopkins Center for Health Security | organization | — |
| ssi | Safe Superintelligence Inc (SSI) | organization | — |
| frontier-model-forum | Frontier Model Forum | organization | — |
| palisade-research | Palisade Research | organization | — |
| goodfire | Goodfire | organization | — |
| redwood-research | Redwood Research | organization | — |
| chris-olah | Chris Olah | person | — |
| dario-amodei | Dario Amodei | person | — |
| holden-karnofsky | Holden Karnofsky | person | — |
| jan-leike | Jan Leike | person | — |
| david-sacks | David Sacks (White House AI Czar) | person | — |
| voluntary-commitments | Voluntary AI Safety Commitments | policy | — |
| anthropic-core-views | Anthropic Core Views | safety-agenda | — |
| interpretability | Interpretability | safety-agenda | — |
| eval-saturation | Eval Saturation & The Evals Gap | approach | — |
| evaluation-awareness | Evaluation Awareness | approach | — |
| alignment | AI Alignment | approach | — |
| scalable-eval-approaches | Scalable Eval Approaches | approach | — |
| scheming-detection | Scheming & Deception Detection | approach | — |
| dangerous-cap-evals | Dangerous Capability Evaluations | approach | — |
| capability-elicitation | Capability Elicitation | approach | — |
| safety-cases | AI Safety Cases | approach | — |
| sleeper-agent-detection | Sleeper Agent Detection | approach | — |
| ai-assisted | AI-Assisted Alignment | approach | — |
| evaluation | AI Evaluation | approach | — |
| alignment-evals | Alignment Evaluations | approach | — |
| red-teaming | Red Teaming | approach | — |
| constitutional-ai | Constitutional AI | approach | — |
| weak-to-strong | Weak-to-Strong Generalization | approach | — |
| preference-optimization | Preference Optimization Methods | approach | — |
| refusal-training | Refusal Training | approach | — |
| california-sb53 | California SB 53 | policy | — |
| evals-governance | Evals-Based Deployment Gates | policy | — |
| rsp | Responsible Scaling Policies | policy | — |
| corporate | Corporate AI Safety Responses | approach | — |
| lab-culture | AI Lab Safety Culture | approach | — |
| training-programs | AI Safety Training Programs | approach | — |
| mech-interp | Mechanistic Interpretability | approach | — |
| circuit-breakers | Circuit Breakers / Inference Interventions | approach | — |
| sparse-autoencoders | Sparse Autoencoders (SAEs) | approach | — |
| debate | AI Safety via Debate | approach | — |
| sandboxing | Sandboxing / Containment | approach | — |
| structured-access | Structured Access / API-Only | approach | — |
| tool-restrictions | Tool-Use Restrictions | approach | — |
| bioweapons | Bioweapons Risk | risk | — |
| deceptive-alignment | Deceptive Alignment | risk | — |
| racing-dynamics | AI Development Racing Dynamics | risk | — |
| sycophancy | Sycophancy | risk | — |
| sleeper-agents | Sleeper Agents: Training Deceptive LLMs | risk | — |
| coding | Autonomous Coding | capability | — |
| language-models | Large Language Models | capability | — |
| long-horizon | Long-Horizon Autonomous Tasks | capability | — |
| persuasion | Persuasion and Social Manipulation | capability | — |
| self-improvement | Self-Improvement and Recursive Enhancement | capability | — |
| solutions | AI Safety Solution Cruxes | crux | — |
| case-for-xrisk | The Case FOR AI Existential Risk | argument | — |
| interpretability-sufficient | Is Interpretability Sufficient for Safety? | crux | — |
| pause-debate | Should We Pause AI Development? | crux | — |
| why-alignment-easy | Why Alignment Might Be Easy | argument | — |
| why-alignment-hard | Why Alignment Might Be Hard | argument | — |
| agi-development | AGI Development | concept | — |
| agi-timeline | AGI Timeline | concept | — |
| deep-learning-era | Deep Learning Revolution (2012-2020) | historical | — |
| ea-longtermist-wins-losses | EA and Longtermist Wins and Losses | concept | — |
| __index__/knowledge-base/history | History | concept | — |
| longtermism-credibility-after-ftx | Longtermism's Philosophical Credibility After FTX | concept | — |
| claude-code-espionage-2025 | Claude Code Espionage Incident (2025) | concept | — |
| __index__/knowledge-base | Knowledge Base | concept | — |
| ai-compute-scaling-metrics | AI Compute Scaling Metrics | analysis | — |
| ai-risk-portfolio-analysis | AI Risk Portfolio Analysis | analysis | — |
| ai-talent-market-dynamics | AI Talent Market Dynamics | analysis | — |
| ai-timelines | AI Timelines | concept | — |
| alignment-robustness-trajectory | Alignment Robustness Trajectory | analysis | — |
| bioweapons-ai-uplift | AI Uplift Assessment Model | analysis | — |
| corrigibility-failure-pathways | Corrigibility Failure Pathways | analysis | — |
| cyberweapons-attack-automation | Autonomous Cyber Attack Timeline | analysis | — |
| frontier-lab-cost-structure | Frontier Lab Cost Structure | analysis | — |
| goal-misgeneralization-probability | Goal Misgeneralization Probability Model | analysis | — |
| instrumental-convergence-framework | Instrumental Convergence Framework | analysis | — |
| international-coordination-game | International AI Coordination Game | analysis | — |
| intervention-effectiveness-matrix | Intervention Effectiveness Matrix | analysis | — |
| intervention-timing-windows | Intervention Timing Windows | analysis | — |
| multipolar-trap-dynamics | Multipolar Trap Dynamics Model | analysis | — |
| planning-for-frontier-lab-scaling | Planning for Frontier Lab Scaling | analysis | — |
| power-seeking-conditions | Power-Seeking Emergence Conditions Model | analysis | — |
| pre-tai-capital-deployment | Pre-TAI Capital Deployment: $100B-$300B+ Spending Analysis | analysis | — |
| racing-dynamics-impact | Racing Dynamics Impact Model | analysis | — |
| reward-hacking-taxonomy | Reward Hacking Taxonomy and Severity Model | analysis | — |
| risk-activation-timeline | Risk Activation Timeline Model | analysis | — |
| risk-interaction-matrix | Risk Interaction Matrix Model | analysis | — |
| safety-culture-equilibrium | Safety Culture Equilibrium | analysis | — |
| safety-research-allocation | Safety Research Allocation Model | analysis | — |
| safety-researcher-gap | AI Safety Talent Supply/Demand Gap Model | analysis | — |
| safety-spending-at-scale | Safety Spending at Scale | analysis | — |
| scaling-laws | AI Scaling Laws | concept | — |
| scheming-likelihood-model | Scheming Likelihood Assessment | analysis | — |
| worldview-intervention-mapping | Worldview-Intervention Mapping | analysis | — |
| ai-futures-project | AI Futures Project | organization | — |
| ai-impacts | AI Impacts | organization | — |
| ai-revenue-sources | AI Revenue Sources | organization | — |
| biosecurity-orgs-overview | Biosecurity Organizations (Overview) | concept | — |
| bridgewater-aia-labs | Bridgewater AIA Labs | organization | — |
| chai | CHAI (Center for Human-Compatible AI) | organization | — |
| controlai | ControlAI | organization | — |
| cser | CSER (Centre for the Study of Existential Risk) | organization | — |
| ea-global | EA Global | organization | — |
| elicit | Elicit (AI Research Tool) | organization | — |
| elon-musk-philanthropy | Elon Musk (Funder) | analysis | — |
| far-ai | FAR AI | organization | — |
| fhi | Future of Humanity Institute (FHI) | organization | — |
| founders-fund | Founders Fund | organization | — |
| frontier-ai-comparison | Frontier AI Company Comparison (2026) | concept | — |
| ftx-collapse-ea-funding-lessons | FTX Collapse: Lessons for EA Funding Resilience | concept | — |
| ftx-future-fund | FTX Future Fund | organization | — |
| ftx | FTX (cryptocurrency exchange) | organization | — |
| futuresearch | FutureSearch | organization | — |
| giving-pledge | Giving Pledge | organization | — |
| govai | GovAI | organization | — |
| __index__/knowledge-base/organizations | Organizations | concept | — |
| kalshi | Kalshi (Prediction Market) | organization | — |
| labs-overview | Frontier AI Labs (Overview) | concept | — |
| leading-the-future | Leading the Future super PAC | organization | — |
| lesswrong | LessWrong | organization | — |
| lionheart-ventures | Lionheart Ventures | organization | — |
| mats | MATS ML Alignment Theory Scholars program | organization | — |
| meta-ai | Meta AI (FAIR) | organization | — |
| microsoft | Microsoft AI | organization | — |
| pause-ai | Pause AI | organization | — |
| safety-orgs-overview | AI Safety Organizations (Overview) | concept | — |
| securebio | SecureBio | organization | — |
| seldon-lab | Seldon Lab | organization | — |
| situational-awareness-lp | Situational Awareness LP | organization | — |
| swift-centre | Swift Centre | organization | — |
| connor-leahy | Connor Leahy | person | — |
| dan-hendrycks | Dan Hendrycks | person | — |
| daniela-amodei | Daniela Amodei | person | — |
| demis-hassabis | Demis Hassabis | person | — |
| dustin-moskovitz | Dustin Moskovitz (AI Safety Funder) | person | — |
| eliezer-yudkowsky | Eliezer Yudkowsky | person | — |
| elon-musk | Elon Musk (AI Industry) | person | — |
| evan-hubinger | Evan Hubinger | person | — |
| geoffrey-hinton | Geoffrey Hinton | person | — |
| ilya-sutskever | Ilya Sutskever | person | — |
| __index__/knowledge-base/people | People | concept | — |
| jaan-tallinn | Jaan Tallinn | person | — |
| leopold-aschenbrenner | Leopold Aschenbrenner | person | — |
| max-tegmark | Max Tegmark | person | — |
| neel-nanda | Neel Nanda | person | — |
| nick-bostrom | Nick Bostrom | person | — |
| paul-christiano | Paul Christiano | person | — |
| sam-altman | Sam Altman | person | — |
| vipul-naik | Vipul Naik | person | — |
| yann-lecun-predictions | Yann LeCun: Track Record | concept | — |
| yoshua-bengio | Yoshua Bengio | person | — |
| ai-control | AI Control | safety-agenda | — |
| ai-forecasting-benchmark | AI Forecasting Benchmark Tournament | project | — |
| california-sb1047 | California SB 1047 | policy | — |
| coordination-mechanisms | International Coordination Mechanisms | policy | — |
| coordination-tech | AI Governance Coordination Technologies | approach | — |
| corrigibility | Corrigibility Research | safety-agenda | — |
| deliberation | AI-Assisted Deliberation Platforms | approach | — |
| effectiveness-assessment | Policy Effectiveness Assessment | analysis | — |
| eu-ai-act | EU AI Act | policy | — |
| evals | Evals & Red-teaming | safety-agenda | — |
| governance-policy | AI Governance and Policy | crux | — |
| international-summits | International AI Safety Summits | policy | — |
| model-auditing | Third-Party Model Auditing | approach | — |
| model-spec | AI Model Specifications | policy | — |
| multi-agent | Multi-Agent Safety | approach | — |
| output-filtering | AI Output Filtering | approach | — |
| process-supervision | Process Supervision | approach | — |
| reducing-hallucinations | Reducing Hallucinations in AI-Generated Wiki Content | approach | — |
| responsible-scaling-policies | Responsible Scaling Policies | policy | — |
| reward-modeling | Reward Modeling | approach | — |
| scalable-oversight | Scalable Oversight | safety-agenda | — |
| seoul-declaration | Seoul AI Safety Summit Declaration | policy | — |
| state-capacity-ai-governance | State Capacity and AI Governance | concept | — |
| us-executive-order | US Executive Order on Safe, Secure, and Trustworthy AI | policy | — |
| us-state-legislation | US State AI Legislation | policy | — |
| whistleblower-protections | AI Whistleblower Protections | policy | — |
| concentrated-compute-cybersecurity-risk | Concentrated Compute as a Cybersecurity Risk | risk | — |
| corrigibility-failure | Corrigibility Failure | risk | — |
| cyber-psychosis | AI-Induced Cyber Psychosis | risk | — |
| disinformation | Disinformation | risk | — |
| emergent-capabilities | Emergent Capabilities | risk | — |
| epistemic-sycophancy | Epistemic Sycophancy | risk | — |
| existential-risk | Existential Risk from AI | concept | — |
| instrumental-convergence | Instrumental Convergence | risk | — |
| knowledge-monopoly | AI Knowledge Monopoly | risk | — |
| mesa-optimization | Mesa-Optimization | risk | — |
| power-seeking | Power-Seeking AI | risk | — |
| reward-hacking | Reward Hacking | risk | — |
| sandbagging | AI Capability Sandbagging | risk | — |
| scheming | Scheming | risk | — |
| superintelligence | Superintelligence | concept | — |
| winner-take-all | AI Winner-Take-All Dynamics | risk | — |
| long-timelines | Long-Timelines Technical Worldview | concept | — |
| optimistic | Optimistic Alignment Worldview | concept | — |
| longtermwiki-value-proposition | LongtermWiki Value Proposition | concept | — |
| table-candidates | Table Candidates | concept | — |