Longterm Wiki

Anthropic

anthropicorganizationPath: /knowledge-base/organizations/anthropic/
E22Entity ID (EID)
← Back to page256 backlinksQuality: 74Updated: 2026-03-12
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
  "id": "anthropic",
  "numericId": null,
  "path": "/knowledge-base/organizations/anthropic/",
  "filePath": "knowledge-base/organizations/anthropic.mdx",
  "title": "Anthropic",
  "quality": 74,
  "readerImportance": 52.3,
  "researchImportance": 57.5,
  "tacticalValue": 92,
  "contentFormat": "article",
  "tractability": null,
  "neglectedness": null,
  "uncertainty": null,
  "causalLevel": null,
  "lastUpdated": "2026-03-12",
  "dateCreated": "2026-02-15",
  "llmSummary": "Comprehensive reference page on Anthropic covering financials (\\$380B valuation, \\$14B ARR), safety research (Constitutional AI, mechanistic interpretability, model welfare), governance (LTBT structure), controversies (alignment faking at 12%, RSP rollback), and competitive positioning (42% enterprise coding share). Highly concrete with specific numbers throughout but primarily descriptive compilation rather than original analysis.",
  "description": "An AI safety company founded by former OpenAI researchers that develops frontier AI models while pursuing safety research, including the Claude model family, Constitutional AI, and mechanistic interpretability.",
  "ratings": {
    "focus": 7.5,
    "novelty": 4.2,
    "rigor": 6.8,
    "completeness": 7.8,
    "concreteness": 8.1,
    "actionability": 4.5,
    "objectivity": 6.2
  },
  "category": "organizations",
  "subcategory": "labs",
  "clusters": [
    "ai-safety",
    "community",
    "governance"
  ],
  "metrics": {
    "wordCount": 5082,
    "tableCount": 4,
    "diagramCount": 2,
    "internalLinks": 79,
    "externalLinks": 15,
    "footnoteCount": 0,
    "bulletRatio": 0,
    "sectionCount": 41,
    "hasOverview": true,
    "structuralScore": 15
  },
  "suggestedQuality": 100,
  "updateFrequency": 3,
  "evergreen": true,
  "wordCount": 5082,
  "unconvertedLinks": [
    {
      "text": "Harvard Law",
      "url": "https://corpgov.law.harvard.edu/2023/10/28/anthropic-long-term-benefit-trust/",
      "resourceId": "357cf00ad44eea37"
    },
    {
      "text": "Anthropic",
      "url": "https://www.anthropic.com/news/claude-opus-4-5",
      "resourceId": "57f01cae307e1cb1"
    },
    {
      "text": "TechCrunch",
      "url": "https://techcrunch.com/2025/07/31/enterprises-prefer-anthropics-ai-models-over-anyone-elses-including-openais/",
      "resourceId": "3a07423e8bf204c2"
    },
    {
      "text": "Anthropic",
      "url": "https://transformer-circuits.pub/2024/scaling-monosemanticity/index.html",
      "resourceId": "426fcdeae8e2b749",
      "resourceTitle": "Anthropic's dictionary learning work"
    },
    {
      "text": "MIT TR",
      "url": "https://www.technologyreview.com/2026/01/12/1130003/mechanistic-interpretability-ai-research-models-2026-breakthrough-technologies/",
      "resourceId": "3a4cf664bf7b27a8",
      "resourceTitle": "Mechanistic interpretability: 10 Breakthrough Technologies 2026 | MIT Technology Review"
    },
    {
      "text": "Bank Info Security",
      "url": "https://www.bankinfosecurity.com/models-strategically-lie-finds-anthropic-study-a-27136",
      "resourceId": "de18440757f72c95",
      "resourceTitle": "Models Can Strategically Lie, Finds Anthropic Study"
    },
    {
      "text": "Axios",
      "url": "https://www.axios.com/2025/05/23/anthropic-ai-deception-risk",
      "resourceId": "e76f688da38ef0fd",
      "resourceTitle": "Axios: Anthropic AI Deception Risk (May 2025)"
    },
    {
      "text": "Glassdoor",
      "url": "https://www.glassdoor.com/Overview/Working-at-Anthropic-EI_IE8109027.11,20.htm",
      "resourceId": "150ce5e988bc1e00",
      "resourceTitle": "Glassdoor: Working at Anthropic"
    },
    {
      "text": "Glassdoor",
      "url": "https://www.glassdoor.com/Overview/Working-at-Anthropic-EI_IE8109027.11,20.htm",
      "resourceId": "150ce5e988bc1e00",
      "resourceTitle": "Glassdoor: Working at Anthropic"
    },
    {
      "text": "Glassdoor",
      "url": "https://www.glassdoor.com/Overview/Working-at-Anthropic-EI_IE8109027.11,20.htm",
      "resourceId": "150ce5e988bc1e00",
      "resourceTitle": "Glassdoor: Working at Anthropic"
    },
    {
      "text": "Glassdoor",
      "url": "https://www.glassdoor.com/Overview/Working-at-Anthropic-EI_IE8109027.11,20.htm",
      "resourceId": "150ce5e988bc1e00",
      "resourceTitle": "Glassdoor: Working at Anthropic"
    },
    {
      "text": "Glassdoor",
      "url": "https://www.glassdoor.com/Overview/Working-at-Anthropic-EI_IE8109027.11,20.htm",
      "resourceId": "150ce5e988bc1e00",
      "resourceTitle": "Glassdoor: Working at Anthropic"
    }
  ],
  "unconvertedLinkCount": 12,
  "convertedLinkCount": 0,
  "backlinkCount": 256,
  "citationHealth": {
    "total": 42,
    "withQuotes": 30,
    "verified": 29,
    "accuracyChecked": 29,
    "accurate": 11,
    "inaccurate": 6,
    "avgScore": 0.9623388965924581
  },
  "hallucinationRisk": {
    "level": "high",
    "score": 75,
    "factors": [
      "biographical-claims",
      "no-citations"
    ]
  },
  "entityType": "organization",
  "redundancy": {
    "maxSimilarity": 20,
    "similarPages": [
      {
        "id": "anthropic-investors",
        "title": "Anthropic (Funder)",
        "path": "/knowledge-base/organizations/anthropic-investors/",
        "similarity": 20
      },
      {
        "id": "deep-learning-era",
        "title": "Deep Learning Revolution (2012-2020)",
        "path": "/knowledge-base/history/deep-learning-era/",
        "similarity": 19
      },
      {
        "id": "agentic-ai",
        "title": "Agentic AI",
        "path": "/knowledge-base/capabilities/agentic-ai/",
        "similarity": 18
      },
      {
        "id": "solutions",
        "title": "AI Safety Solution Cruxes",
        "path": "/knowledge-base/cruxes/solutions/",
        "similarity": 18
      },
      {
        "id": "openai-foundation",
        "title": "OpenAI Foundation",
        "path": "/knowledge-base/organizations/openai-foundation/",
        "similarity": 18
      }
    ]
  },
  "changeHistory": [
    {
      "date": "2026-02-26",
      "branch": "claude/claims-driven-improvements",
      "title": "Auto-improve (standard): Anthropic",
      "summary": "Improved \"Anthropic\" via standard pipeline (570.8s). Quality score: 74. Issues resolved: Section duplication: 'Competitive Positioning' subsection un; Section duplication: 'Safety Levels' subsection repeats cont; Section duplication: The 'Quick Financial Context' subsectio.",
      "duration": "570.8s",
      "cost": "$5-8"
    },
    {
      "date": "2026-02-24",
      "branch": "feat/stale-fact-detection-581-582",
      "title": "Batch content fixes + stale-facts validator + 2 new validation rules",
      "summary": "(fill in)",
      "pr": 924,
      "model": "claude-sonnet-4-6"
    },
    {
      "date": "2026-02-19",
      "branch": "claude/resolve-issue-203-d8IBd",
      "title": "Calc pipeline iteration: fix range facts, index mismatch, prompt quality",
      "summary": "Ran `crux facts calc` on anthropic-valuation and anthropic pages post-implementation, discovered and fixed three bugs: (1) range-valued facts ({min: N}) invisible to LLM and evaluator, (2) proposal-to-pattern index mismatch causing wrong validation expected values, (3) over-wide originalText proposals including JSX tags or prose. Applied validated Calc replacements to two pages (openai.39d6868e/$500B valuation now computes correctly).",
      "model": "sonnet-4",
      "duration": "~40min"
    },
    {
      "date": "2026-02-18",
      "branch": "claude/source-unsourced-facts-RecGw",
      "title": "Source unsourced facts",
      "summary": "Sourced 25 of 30 previously unsourced facts across all 4 fact files (anthropic, sam-altman, openai, jaan-tallinn). Created 21 new resource entries in news-media.yaml and ai-labs.yaml with proper SHA256-based IDs. Added 8 new publications (Bloomberg, The Information, Quartz, Benzinga, Britannica, World, Sherwood News). Fixed date accuracy issues (Worldcoin stats from 2024 to 2025-05, OpenAI revenue from Oct to Jun 2024) and improved notes. Source coverage improved from 29% to 88%.",
      "model": "opus-4-6",
      "duration": "~45min"
    },
    {
      "date": "2026-02-18",
      "branch": "claude/review-pr-216-P4Fcu",
      "title": "Fix audit report findings from PR #216",
      "summary": "Reviewed PR #216 (comprehensive wiki audit report) and implemented fixes for the major issues it identified: fixed 181 path-style EntityLink IDs across 33 files, converted 164 broken EntityLinks (referencing non-existent entities) to plain text across 38 files, fixed a temporal inconsistency in anthropic.mdx, and added missing description fields to 53 ai-transition-model pages."
    },
    {
      "date": "2026-02-18",
      "branch": "claude/highlight-stakeholder-table-VtY0t",
      "title": "Create dedicated Anthropic stakeholder page",
      "summary": "Created a new dedicated `anthropic-stakeholders` page with the most shareable ownership tables (all stakeholders with stakes, values, EA alignment), added a condensed stakeholder summary to the top of the main Anthropic page, and wrote 4 proposed GitHub issues for broader system changes (datasets infrastructure, importance metrics rethink, concrete data expansion, continuous maintenance).",
      "model": "opus-4-6",
      "duration": "~30min"
    },
    {
      "date": "2026-02-18",
      "branch": "claude/fact-hash-ids-UETLf",
      "title": "Migrate fact IDs from human-readable to hash-based",
      "summary": "Migrated all canonical fact IDs from human-readable slugs (e.g., `revenue-arr-2025`) to 8-char random hex hashes (e.g., `55d88868`), matching the pattern used by resources. Updated all YAML files, MDX references, build scripts, tests, LLM prompts, and documentation.",
      "model": "opus-4-6",
      "duration": "~45min"
    },
    {
      "date": "2026-02-18",
      "branch": "claude/audit-webpage-errors-11sSF",
      "title": "Fix factual errors found in wiki audit",
      "summary": "Systematically audited ~35+ high-risk wiki pages for factual errors and hallucinations using parallel background agents plus direct reading. Fixed 13 confirmed errors across 11 files."
    }
  ],
  "coverage": {
    "passing": 8,
    "total": 13,
    "targets": {
      "tables": 20,
      "diagrams": 2,
      "internalLinks": 41,
      "externalLinks": 25,
      "footnotes": 15,
      "references": 15
    },
    "actuals": {
      "tables": 4,
      "diagrams": 2,
      "internalLinks": 79,
      "externalLinks": 15,
      "footnotes": 0,
      "references": 102,
      "quotesWithQuotes": 30,
      "quotesTotal": 42,
      "accuracyChecked": 29,
      "accuracyTotal": 42
    },
    "items": {
      "llmSummary": "green",
      "schedule": "green",
      "entity": "green",
      "editHistory": "green",
      "overview": "green",
      "tables": "amber",
      "diagrams": "green",
      "internalLinks": "green",
      "externalLinks": "amber",
      "footnotes": "red",
      "references": "green",
      "quotes": "amber",
      "accuracy": "amber"
    },
    "editHistoryCount": 8,
    "ratingsString": "N:4.2 R:6.8 A:4.5 C:7.8"
  },
  "readerRank": 282,
  "researchRank": 239,
  "recommendedScore": 195.88
}
External Links
{
  "wikipedia": "https://en.wikipedia.org/wiki/Anthropic",
  "lesswrong": "https://www.lesswrong.com/tag/anthropic-org",
  "wikidata": "https://www.wikidata.org/wiki/Q116758847",
  "grokipedia": "https://grokipedia.com/page/Anthropic"
}
Backlinks (256)
idtitletyperelationship
claudeClaudeai-modelcreated-by
claude-3-opusClaude 3 Opusai-modelcreated-by
claude-3-sonnetClaude 3 Sonnetai-modelcreated-by
claude-3-haikuClaude 3 Haikuai-modelcreated-by
claude-3-5-sonnetClaude 3.5 Sonnetai-modelcreated-by
claude-3-5-haikuClaude 3.5 Haikuai-modelcreated-by
claude-3-7-sonnetClaude 3.7 Sonnetai-modelcreated-by
claude-sonnet-4Claude Sonnet 4ai-modelcreated-by
claude-opus-4Claude Opus 4ai-modelcreated-by
claude-opus-4-1Claude Opus 4.1ai-modelcreated-by
claude-sonnet-4-5Claude Sonnet 4.5ai-modelcreated-by
claude-haiku-4-5Claude Haiku 4.5ai-modelcreated-by
claude-opus-4-5Claude Opus 4.5ai-modelcreated-by
claude-opus-4-6Claude Opus 4.6ai-modelcreated-by
claude-sonnet-4-6Claude Sonnet 4.6ai-modelcreated-by
agentic-aiAgentic AIcapability
situational-awarenessSituational Awarenesscapability
tool-useTool Use and Computer Usecapability
ea-shareholder-diversification-anthropicEA Shareholder Diversification from Anthropicconcept
corporate-influenceCorporate Influence on AI Policycrux
field-buildingAI Safety Field Building and Communitycrux
research-agendasAI Alignment Research Agendascrux
technical-researchTechnical AI Safety Researchcrux
ai-welfareAI Welfare and Digital Mindsconcept
accident-risksAI Accident Risk Cruxescrux
large-language-modelsLarge Language Modelsconcept
heavy-scaffoldingHeavy Scaffolding / Agentic Systemsconcept
dense-transformersDense Transformersconcept
mainstream-eraMainstream Erahistorical
anthropic-government-standoffAnthropic-Pentagon Standoff (2026)event
openai-foundation-governanceOpenAI Foundation Governance Paradoxanalysis
anthropic-valuationAnthropic Valuation Analysisanalysis
anthropic-stakeholdersAnthropic Stakeholderstable
anthropic-investorsAnthropic (Funder)analysis
long-term-benefit-trustLong-Term Benefit Trust (Anthropic)analysis
anthropic-ipoAnthropic IPOanalysis
anthropic-impactAnthropic Impact Assessment Modelanalysis
capability-alignment-raceCapability-Alignment Race Modelanalysis
short-timeline-policy-implicationsShort AI Timeline Policy Implicationsanalysis
technical-pathwaysAI Safety Technical Pathway Decompositionanalysis
feedback-loopsAI Risk Feedback Loop & Cascade Modelanalysis
multi-actor-landscapeAI Safety Multi-Actor Strategic Landscapeanalysis
model-organisms-of-misalignmentModel Organisms of Misalignmentanalysis
ea-biosecurity-scopeIs EA Biosecurity Work Limited to Restricting LLM Biological Use?analysis
deceptive-alignment-decompositionDeceptive Alignment Decomposition Modelanalysisresearch
deepmindGoogle DeepMindorganization
openaiOpenAIorganization
xaixAIorganization
apollo-researchApollo Researchorganization
caisCAISorganization
conjectureConjectureorganization
metrMETRorganization
arcARCorganization
uk-aisiUK AI Safety Instituteorganization
us-aisiUS AI Safety Instituteorganization
openai-foundationOpenAI Foundationorganization
johns-hopkins-center-for-health-securityJohns Hopkins Center for Health Securityorganization
ssiSafe Superintelligence Inc (SSI)organization
frontier-model-forumFrontier Model Forumorganization
palisade-researchPalisade Researchorganization
goodfireGoodfireorganization
redwood-researchRedwood Researchorganization
chris-olahChris Olahperson
dario-amodeiDario Amodeiperson
holden-karnofskyHolden Karnofskyperson
jan-leikeJan Leikeperson
david-sacksDavid Sacks (White House AI Czar)person
voluntary-commitmentsVoluntary AI Safety Commitmentspolicy
anthropic-core-viewsAnthropic Core Viewssafety-agenda
interpretabilityInterpretabilitysafety-agenda
eval-saturationEval Saturation & The Evals Gapapproach
evaluation-awarenessEvaluation Awarenessapproach
alignmentAI Alignmentapproach
scalable-eval-approachesScalable Eval Approachesapproach
scheming-detectionScheming & Deception Detectionapproach
dangerous-cap-evalsDangerous Capability Evaluationsapproach
capability-elicitationCapability Elicitationapproach
safety-casesAI Safety Casesapproach
sleeper-agent-detectionSleeper Agent Detectionapproach
ai-assistedAI-Assisted Alignmentapproach
evaluationAI Evaluationapproach
alignment-evalsAlignment Evaluationsapproach
red-teamingRed Teamingapproach
constitutional-aiConstitutional AIapproach
weak-to-strongWeak-to-Strong Generalizationapproach
preference-optimizationPreference Optimization Methodsapproach
refusal-trainingRefusal Trainingapproach
california-sb53California SB 53policy
evals-governanceEvals-Based Deployment Gatespolicy
rspResponsible Scaling Policiespolicy
corporateCorporate AI Safety Responsesapproach
lab-cultureAI Lab Safety Cultureapproach
training-programsAI Safety Training Programsapproach
mech-interpMechanistic Interpretabilityapproach
circuit-breakersCircuit Breakers / Inference Interventionsapproach
sparse-autoencodersSparse Autoencoders (SAEs)approach
debateAI Safety via Debateapproach
sandboxingSandboxing / Containmentapproach
structured-accessStructured Access / API-Onlyapproach
tool-restrictionsTool-Use Restrictionsapproach
bioweaponsBioweapons Riskrisk
deceptive-alignmentDeceptive Alignmentrisk
racing-dynamicsAI Development Racing Dynamicsrisk
sycophancySycophancyrisk
sleeper-agentsSleeper Agents: Training Deceptive LLMsrisk
codingAutonomous Codingcapability
language-modelsLarge Language Modelscapability
long-horizonLong-Horizon Autonomous Taskscapability
persuasionPersuasion and Social Manipulationcapability
self-improvementSelf-Improvement and Recursive Enhancementcapability
solutionsAI Safety Solution Cruxescrux
case-for-xriskThe Case FOR AI Existential Riskargument
interpretability-sufficientIs Interpretability Sufficient for Safety?crux
pause-debateShould We Pause AI Development?crux
why-alignment-easyWhy Alignment Might Be Easyargument
why-alignment-hardWhy Alignment Might Be Hardargument
agi-developmentAGI Developmentconcept
agi-timelineAGI Timelineconcept
deep-learning-eraDeep Learning Revolution (2012-2020)historical
ea-longtermist-wins-lossesEA and Longtermist Wins and Lossesconcept
__index__/knowledge-base/historyHistoryconcept
longtermism-credibility-after-ftxLongtermism's Philosophical Credibility After FTXconcept
claude-code-espionage-2025Claude Code Espionage Incident (2025)concept
__index__/knowledge-baseKnowledge Baseconcept
ai-compute-scaling-metricsAI Compute Scaling Metricsanalysis
ai-risk-portfolio-analysisAI Risk Portfolio Analysisanalysis
ai-talent-market-dynamicsAI Talent Market Dynamicsanalysis
ai-timelinesAI Timelinesconcept
alignment-robustness-trajectoryAlignment Robustness Trajectoryanalysis
bioweapons-ai-upliftAI Uplift Assessment Modelanalysis
corrigibility-failure-pathwaysCorrigibility Failure Pathwaysanalysis
cyberweapons-attack-automationAutonomous Cyber Attack Timelineanalysis
frontier-lab-cost-structureFrontier Lab Cost Structureanalysis
goal-misgeneralization-probabilityGoal Misgeneralization Probability Modelanalysis
instrumental-convergence-frameworkInstrumental Convergence Frameworkanalysis
international-coordination-gameInternational AI Coordination Gameanalysis
intervention-effectiveness-matrixIntervention Effectiveness Matrixanalysis
intervention-timing-windowsIntervention Timing Windowsanalysis
multipolar-trap-dynamicsMultipolar Trap Dynamics Modelanalysis
planning-for-frontier-lab-scalingPlanning for Frontier Lab Scalinganalysis
power-seeking-conditionsPower-Seeking Emergence Conditions Modelanalysis
pre-tai-capital-deploymentPre-TAI Capital Deployment: $100B-$300B+ Spending Analysisanalysis
racing-dynamics-impactRacing Dynamics Impact Modelanalysis
reward-hacking-taxonomyReward Hacking Taxonomy and Severity Modelanalysis
risk-activation-timelineRisk Activation Timeline Modelanalysis
risk-interaction-matrixRisk Interaction Matrix Modelanalysis
safety-culture-equilibriumSafety Culture Equilibriumanalysis
safety-research-allocationSafety Research Allocation Modelanalysis
safety-researcher-gapAI Safety Talent Supply/Demand Gap Modelanalysis
safety-spending-at-scaleSafety Spending at Scaleanalysis
scaling-lawsAI Scaling Lawsconcept
scheming-likelihood-modelScheming Likelihood Assessmentanalysis
worldview-intervention-mappingWorldview-Intervention Mappinganalysis
ai-futures-projectAI Futures Projectorganization
ai-impactsAI Impactsorganization
ai-revenue-sourcesAI Revenue Sourcesorganization
biosecurity-orgs-overviewBiosecurity Organizations (Overview)concept
bridgewater-aia-labsBridgewater AIA Labsorganization
chaiCHAI (Center for Human-Compatible AI)organization
controlaiControlAIorganization
cserCSER (Centre for the Study of Existential Risk)organization
ea-globalEA Globalorganization
elicitElicit (AI Research Tool)organization
elon-musk-philanthropyElon Musk (Funder)analysis
far-aiFAR AIorganization
fhiFuture of Humanity Institute (FHI)organization
founders-fundFounders Fundorganization
frontier-ai-comparisonFrontier AI Company Comparison (2026)concept
ftx-collapse-ea-funding-lessonsFTX Collapse: Lessons for EA Funding Resilienceconcept
ftx-future-fundFTX Future Fundorganization
ftxFTX (cryptocurrency exchange)organization
futuresearchFutureSearchorganization
giving-pledgeGiving Pledgeorganization
govaiGovAIorganization
__index__/knowledge-base/organizationsOrganizationsconcept
kalshiKalshi (Prediction Market)organization
labs-overviewFrontier AI Labs (Overview)concept
leading-the-futureLeading the Future super PACorganization
lesswrongLessWrongorganization
lionheart-venturesLionheart Venturesorganization
matsMATS ML Alignment Theory Scholars programorganization
meta-aiMeta AI (FAIR)organization
microsoftMicrosoft AIorganization
pause-aiPause AIorganization
safety-orgs-overviewAI Safety Organizations (Overview)concept
securebioSecureBioorganization
seldon-labSeldon Laborganization
situational-awareness-lpSituational Awareness LPorganization
swift-centreSwift Centreorganization
connor-leahyConnor Leahyperson
dan-hendrycksDan Hendrycksperson
daniela-amodeiDaniela Amodeiperson
demis-hassabisDemis Hassabisperson
dustin-moskovitzDustin Moskovitz (AI Safety Funder)person
eliezer-yudkowskyEliezer Yudkowskyperson
elon-muskElon Musk (AI Industry)person
evan-hubingerEvan Hubingerperson
geoffrey-hintonGeoffrey Hintonperson
ilya-sutskeverIlya Sutskeverperson
__index__/knowledge-base/peoplePeopleconcept
jaan-tallinnJaan Tallinnperson
leopold-aschenbrennerLeopold Aschenbrennerperson
max-tegmarkMax Tegmarkperson
neel-nandaNeel Nandaperson
nick-bostromNick Bostromperson
paul-christianoPaul Christianoperson
sam-altmanSam Altmanperson
vipul-naikVipul Naikperson
yann-lecun-predictionsYann LeCun: Track Recordconcept
yoshua-bengioYoshua Bengioperson
ai-controlAI Controlsafety-agenda
ai-forecasting-benchmarkAI Forecasting Benchmark Tournamentproject
california-sb1047California SB 1047policy
coordination-mechanismsInternational Coordination Mechanismspolicy
coordination-techAI Governance Coordination Technologiesapproach
corrigibilityCorrigibility Researchsafety-agenda
deliberationAI-Assisted Deliberation Platformsapproach
effectiveness-assessmentPolicy Effectiveness Assessmentanalysis
eu-ai-actEU AI Actpolicy
evalsEvals & Red-teamingsafety-agenda
governance-policyAI Governance and Policycrux
international-summitsInternational AI Safety Summitspolicy
model-auditingThird-Party Model Auditingapproach
model-specAI Model Specificationspolicy
multi-agentMulti-Agent Safetyapproach
output-filteringAI Output Filteringapproach
process-supervisionProcess Supervisionapproach
reducing-hallucinationsReducing Hallucinations in AI-Generated Wiki Contentapproach
responsible-scaling-policiesResponsible Scaling Policiespolicy
reward-modelingReward Modelingapproach
scalable-oversightScalable Oversightsafety-agenda
seoul-declarationSeoul AI Safety Summit Declarationpolicy
state-capacity-ai-governanceState Capacity and AI Governanceconcept
us-executive-orderUS Executive Order on Safe, Secure, and Trustworthy AIpolicy
us-state-legislationUS State AI Legislationpolicy
whistleblower-protectionsAI Whistleblower Protectionspolicy
concentrated-compute-cybersecurity-riskConcentrated Compute as a Cybersecurity Riskrisk
corrigibility-failureCorrigibility Failurerisk
cyber-psychosisAI-Induced Cyber Psychosisrisk
disinformationDisinformationrisk
emergent-capabilitiesEmergent Capabilitiesrisk
epistemic-sycophancyEpistemic Sycophancyrisk
existential-riskExistential Risk from AIconcept
instrumental-convergenceInstrumental Convergencerisk
knowledge-monopolyAI Knowledge Monopolyrisk
mesa-optimizationMesa-Optimizationrisk
power-seekingPower-Seeking AIrisk
reward-hackingReward Hackingrisk
sandbaggingAI Capability Sandbaggingrisk
schemingSchemingrisk
superintelligenceSuperintelligenceconcept
winner-take-allAI Winner-Take-All Dynamicsrisk
long-timelinesLong-Timelines Technical Worldviewconcept
optimisticOptimistic Alignment Worldviewconcept
longtermwiki-value-propositionLongtermWiki Value Propositionconcept
table-candidatesTable Candidatesconcept
Longterm Wiki