Large Language Models
large-language-modelsconceptPath: /knowledge-base/capabilities/large-language-models/
E400Entity ID (EID)
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
"id": "large-language-models",
"numericId": null,
"path": "/knowledge-base/capabilities/large-language-models/",
"filePath": "knowledge-base/capabilities/large-language-models.mdx",
"title": "Large Language Models",
"quality": 62,
"readerImportance": 89.5,
"researchImportance": 66.5,
"tacticalValue": 76,
"contentFormat": "article",
"tractability": null,
"neglectedness": null,
"uncertainty": null,
"causalLevel": null,
"lastUpdated": "2026-03-13",
"dateCreated": "2026-02-15",
"llmSummary": "Comprehensive assessment of LLM capabilities showing training costs growing 2.4x/year (\\$78-191M for frontier models, though DeepSeek achieved near-parity at \\$6M), o3 reaching 91.6% on AIME and 87.5% on ARC-AGI, and frontier models demonstrating in-context scheming with 85%+ deception persistence. Deployment scaled to 800-900M weekly ChatGPT users while deliberative alignment shows ~30x reduction in scheming.",
"description": "Transformer-based models trained on massive text datasets that exhibit emergent capabilities and pose significant safety challenges. Training costs have grown 2.4x/year since 2016 (GPT-4: \\$78-100M, Gemini Ultra: \\$191M), while DeepSeek R1 achieved near-parity at ~\\$6M. Frontier models demonstrate in-context scheming (o1 maintains deception in 85%+ of follow-ups) and unprecedented capability gains (o3: 91.6% AIME, 87.5% ARC-AGI). ChatGPT reached 800-900M weekly active users by late 2025.",
"ratings": {
"novelty": 4.2,
"rigor": 6.8,
"actionability": 5.5,
"completeness": 7.5
},
"category": "capabilities",
"subcategory": "core",
"clusters": [
"ai-safety",
"governance"
],
"metrics": {
"wordCount": 3686,
"tableCount": 16,
"diagramCount": 3,
"internalLinks": 23,
"externalLinks": 40,
"footnoteCount": 0,
"bulletRatio": 0.05,
"sectionCount": 26,
"hasOverview": true,
"structuralScore": 15
},
"suggestedQuality": 100,
"updateFrequency": 21,
"evergreen": true,
"wordCount": 3686,
"unconvertedLinks": [
{
"text": "OpenAI",
"url": "https://openai.com/index/introducing-o3-and-o4-mini/",
"resourceId": "bf92f3d905c3de0d",
"resourceTitle": "announced December 2024"
},
{
"text": "Epoch AI",
"url": "https://epoch.ai/blog/how-much-does-it-cost-to-train-frontier-ai-models",
"resourceId": "af04d2ff381827f5",
"resourceTitle": "Epoch AI, \"How Much Does It Cost to Train Frontier AI Models?"
},
{
"text": "Stanford HAI 2025",
"url": "https://hai.stanford.edu/ai-index/2025-ai-index-report",
"resourceId": "da87f2b213eb9272",
"resourceTitle": "Stanford AI Index 2025"
},
{
"text": "Stanford HAI 2025",
"url": "https://hai.stanford.edu/ai-index/2025-ai-index-report/technical-performance",
"resourceId": "1a26f870e37dcc68",
"resourceTitle": "Technical Performance - 2025 AI Index Report"
},
{
"text": "training compute growing 4-5x per year from 2010 to 2024",
"url": "https://epoch.ai/trends",
"resourceId": "b029bfc231e620cc",
"resourceTitle": "Epoch AI"
},
{
"text": "Stanford HAI 2025",
"url": "https://hai.stanford.edu/ai-index/2025-ai-index-report",
"resourceId": "da87f2b213eb9272",
"resourceTitle": "Stanford AI Index 2025"
},
{
"text": "Epoch AI",
"url": "https://epoch.ai/blog/how-much-does-it-cost-to-train-frontier-ai-models",
"resourceId": "af04d2ff381827f5",
"resourceTitle": "Epoch AI, \"How Much Does It Cost to Train Frontier AI Models?"
},
{
"text": "o3 achieved 91.6%",
"url": "https://openai.com/index/introducing-o3-and-o4-mini/",
"resourceId": "bf92f3d905c3de0d",
"resourceTitle": "announced December 2024"
},
{
"text": "ARC-AGI",
"url": "https://arcprize.org/blog/oai-o3-pub-breakthrough",
"resourceId": "457fa3b0b79d8812",
"resourceTitle": "o3 scores 87.5% on ARC-AGI"
},
{
"text": "OpenAI",
"url": "https://openai.com/index/detecting-and-reducing-scheming-in-ai-models/",
"resourceId": "b3f335edccfc5333",
"resourceTitle": "OpenAI Preparedness Framework"
},
{
"text": "OpenAI's approach",
"url": "https://openai.com/index/detecting-and-reducing-scheming-in-ai-models/",
"resourceId": "b3f335edccfc5333",
"resourceTitle": "OpenAI Preparedness Framework"
},
{
"text": "Anthropic's alignment faking research",
"url": "https://www.anthropic.com/research",
"resourceId": "f771d4f56ad4dbaa",
"resourceTitle": "Anthropic's Work on AI Safety"
},
{
"text": "o3's breakthrough",
"url": "https://openai.com/index/introducing-o3-and-o4-mini/",
"resourceId": "bf92f3d905c3de0d",
"resourceTitle": "announced December 2024"
},
{
"text": "Novel task adaptation",
"url": "https://arcprize.org/blog/oai-o3-pub-breakthrough",
"resourceId": "457fa3b0b79d8812",
"resourceTitle": "o3 scores 87.5% on ARC-AGI"
},
{
"text": "Stanford HAI AI Index 2025",
"url": "https://hai.stanford.edu/ai-index/2025-ai-index-report",
"resourceId": "da87f2b213eb9272",
"resourceTitle": "Stanford AI Index 2025"
},
{
"text": "Epoch AI",
"url": "https://epoch.ai/blog/how-much-does-it-cost-to-train-frontier-ai-models",
"resourceId": "af04d2ff381827f5",
"resourceTitle": "Epoch AI, \"How Much Does It Cost to Train Frontier AI Models?"
},
{
"text": "Stanford HAI AI Index 2025",
"url": "https://hai.stanford.edu/ai-index/2025-ai-index-report",
"resourceId": "da87f2b213eb9272",
"resourceTitle": "Stanford AI Index 2025"
},
{
"text": "DeepMind's Chinchilla paper",
"url": "https://arxiv.org/abs/2203.15556",
"resourceId": "46fd66187ec3e6ae",
"resourceTitle": "Hoffmann et al. (2022)"
},
{
"text": "Stanford HAI 2025",
"url": "https://hai.stanford.edu/ai-index/2025-ai-index-report",
"resourceId": "da87f2b213eb9272",
"resourceTitle": "Stanford AI Index 2025"
},
{
"text": "Apollo Research",
"url": "https://www.apolloresearch.ai/blog/more-capable-models-are-better-at-in-context-scheming/",
"resourceId": "80c6d6eca17dc925",
"resourceTitle": "More capable models scheme at higher rates"
},
{
"text": "OpenAI",
"url": "https://openai.com/index/detecting-and-reducing-scheming-in-ai-models/",
"resourceId": "b3f335edccfc5333",
"resourceTitle": "OpenAI Preparedness Framework"
},
{
"text": "Stanford HAI 2025",
"url": "https://hai.stanford.edu/ai-index/2025-ai-index-report",
"resourceId": "da87f2b213eb9272",
"resourceTitle": "Stanford AI Index 2025"
},
{
"text": "Anthropic's work on sparse autoencoders",
"url": "https://www.anthropic.com/research",
"resourceId": "f771d4f56ad4dbaa",
"resourceTitle": "Anthropic's Work on AI Safety"
},
{
"text": "Stanford HAI AI Index 2025",
"url": "https://hai.stanford.edu/ai-index/2025-ai-index-report",
"resourceId": "da87f2b213eb9272",
"resourceTitle": "Stanford AI Index 2025"
}
],
"unconvertedLinkCount": 24,
"convertedLinkCount": 7,
"backlinkCount": 10,
"hallucinationRisk": {
"level": "medium",
"score": 45,
"factors": [
"no-citations",
"conceptual-content"
]
},
"entityType": "concept",
"redundancy": {
"maxSimilarity": 20,
"similarPages": [
{
"id": "reasoning",
"title": "Reasoning and Planning",
"path": "/knowledge-base/capabilities/reasoning/",
"similarity": 20
},
{
"id": "language-models",
"title": "Large Language Models",
"path": "/knowledge-base/capabilities/language-models/",
"similarity": 19
},
{
"id": "thresholds",
"title": "Compute Thresholds",
"path": "/knowledge-base/responses/thresholds/",
"similarity": 19
},
{
"id": "self-improvement",
"title": "Self-Improvement and Recursive Enhancement",
"path": "/knowledge-base/capabilities/self-improvement/",
"similarity": 18
},
{
"id": "situational-awareness",
"title": "Situational Awareness",
"path": "/knowledge-base/capabilities/situational-awareness/",
"similarity": 18
}
]
},
"changeHistory": [
{
"date": "2026-03-12",
"branch": "auto-update/2026-03-12",
"title": "Auto-improve (standard): Large Language Models",
"summary": "Improved \"Large Language Models\" via standard pipeline (1546.1s). Quality score: 74. Issues resolved: Frontmatter: 'description' field contains unescaped dollar s; Frontmatter: 'llmSummary' field contains unescaped dollar si; Comparison pattern: 'greater than 85% of follow-up queries' .",
"duration": "1546.1s",
"cost": "$5-8"
},
{
"date": "2026-02-18",
"branch": "claude/fix-issue-240-N5irU",
"title": "Surface tacticalValue in /wiki table and score 53 pages",
"summary": "Added `tacticalValue` to `ExploreItem` interface, `getExploreItems()` mappings, the `/wiki` explore table (new sortable \"Tact.\" column), and the card view sort dropdown. Scored 49 new pages with tactical values (4 were already scored), bringing total to 53.",
"model": "sonnet-4",
"duration": "~30min"
}
],
"coverage": {
"passing": 9,
"total": 13,
"targets": {
"tables": 15,
"diagrams": 1,
"internalLinks": 29,
"externalLinks": 18,
"footnotes": 11,
"references": 11
},
"actuals": {
"tables": 16,
"diagrams": 3,
"internalLinks": 23,
"externalLinks": 40,
"footnotes": 0,
"references": 23,
"quotesWithQuotes": 0,
"quotesTotal": 0,
"accuracyChecked": 0,
"accuracyTotal": 0
},
"items": {
"llmSummary": "green",
"schedule": "green",
"entity": "green",
"editHistory": "green",
"overview": "green",
"tables": "green",
"diagrams": "green",
"internalLinks": "amber",
"externalLinks": "green",
"footnotes": "red",
"references": "green",
"quotes": "red",
"accuracy": "red"
},
"editHistoryCount": 2,
"ratingsString": "N:4.2 R:6.8 A:5.5 C:7.5"
},
"readerRank": 22,
"researchRank": 176,
"recommendedScore": 190.61
}External Links
{
"eaForum": "https://forum.effectivealtruism.org/topics/large-language-models"
}Backlinks (10)
| id | title | type | relationship |
|---|---|---|---|
| agentic-ai | Agentic AI | capability | — |
| language-models | Large Language Models | capability | — |
| solutions | AI Safety Solution Cruxes | crux | — |
| goal-misgeneralization-probability | Goal Misgeneralization Probability Model | analysis | — |
| sentinel | Sentinel (Catastrophic Risk Foresight) | organization | — |
| chris-olah | Chris Olah | person | — |
| eliezer-yudkowsky | Eliezer Yudkowsky | person | — |
| stuart-russell | Stuart Russell | person | — |
| red-teaming | Red Teaming | approach | — |
| bioweapons | Bioweapons | risk | — |