Reasoning and Planning
reasoningcapabilityPath: /knowledge-base/capabilities/reasoning/
E246Entity ID (EID)
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
"id": "reasoning",
"numericId": null,
"path": "/knowledge-base/capabilities/reasoning/",
"filePath": "knowledge-base/capabilities/reasoning.mdx",
"title": "Reasoning and Planning",
"quality": 65,
"readerImportance": 92,
"researchImportance": 68.5,
"tacticalValue": 82,
"contentFormat": "article",
"tractability": null,
"neglectedness": null,
"uncertainty": null,
"causalLevel": null,
"lastUpdated": "2026-03-13",
"dateCreated": "2026-02-15",
"llmSummary": "Comprehensive survey tracking reasoning model progress from 2022 CoT to late 2025, documenting dramatic capability gains (GPT-5.2: 100% AIME, 52.9% ARC-AGI-2, 40.3% FrontierMath) alongside critical safety findings that reasoning faithfulness is fragile (19-41% hint acknowledgment, 0.04-13% unfaithful reasoning in production). Multi-agent orchestration shows 1,445% inquiry surge with 60-80% coordination success, while cost efficiency improved 390x in one year.",
"description": "Advanced multi-step reasoning capabilities that enable AI systems to solve complex problems through systematic thinking. By late 2025, GPT-5.2 achieves 100% on AIME 2025 without tools and 52.9% on ARC-AGI-2, while Claude Opus 4.5 reaches 80.9% on SWE-bench. ARC-AGI-2 still reveals a substantial gap: top models score approximately 54% vs. 60% human average on harder abstract reasoning. Chain-of-thought faithfulness research shows models acknowledge their reasoning sources only 19-41% of the time, creating both interpretability opportunities and deception risks.",
"ratings": {
"novelty": 4.2,
"rigor": 6.8,
"actionability": 5.5,
"completeness": 7.5
},
"category": "capabilities",
"subcategory": "core",
"clusters": [
"ai-safety"
],
"metrics": {
"wordCount": 4912,
"tableCount": 10,
"diagramCount": 1,
"internalLinks": 44,
"externalLinks": 57,
"footnoteCount": 0,
"bulletRatio": 0.16,
"sectionCount": 32,
"hasOverview": true,
"structuralScore": 15
},
"suggestedQuality": 100,
"updateFrequency": 21,
"evergreen": true,
"wordCount": 4912,
"unconvertedLinks": [
{
"text": "ARC Prize Leaderboard",
"url": "https://arcprize.org/leaderboard",
"resourceId": "a27f2ad202a2b5a7",
"resourceTitle": "ARC-AGI"
},
{
"text": "Anthropic Opus 4.5",
"url": "https://www.anthropic.com/news/claude-opus-4-5",
"resourceId": "57f01cae307e1cb1"
},
{
"text": "ARC Prize 2025 Results",
"url": "https://arcprize.org/blog/arc-prize-2025-results-analysis",
"resourceId": "f369a16dd38155b8",
"resourceTitle": "ARC Prize 2024-2025 results"
},
{
"text": "Anthropic Claude 4.5",
"url": "https://www.anthropic.com/news/claude-opus-4-5",
"resourceId": "57f01cae307e1cb1"
},
{
"text": "Claude Opus 4.5",
"url": "https://www.anthropic.com/news/claude-opus-4-5",
"resourceId": "57f01cae307e1cb1"
},
{
"text": "ARC Prize Leaderboard",
"url": "https://arcprize.org/leaderboard",
"resourceId": "a27f2ad202a2b5a7",
"resourceTitle": "ARC-AGI"
},
{
"text": "Epoch AI's Epoch Capabilities Index (ECI)",
"url": "https://epoch.ai/data-insights/ai-capabilities-progress-has-sped-up",
"resourceId": "663417bdb09208a4",
"resourceTitle": "Epoch AI's analysis"
},
{
"text": "Claude 4 family",
"url": "https://www.anthropic.com/news/claude-4",
"resourceId": "4ec03078d3169fe5"
},
{
"text": "Claude Opus 4.5",
"url": "https://www.anthropic.com/news/claude-opus-4-5",
"resourceId": "57f01cae307e1cb1"
},
{
"text": "Claude 4",
"url": "https://www.anthropic.com/news/claude-4",
"resourceId": "4ec03078d3169fe5"
},
{
"text": "AI Capabilities Progress Tracker",
"url": "https://epoch.ai/data-insights/ai-capabilities-progress-has-sped-up",
"resourceId": "663417bdb09208a4",
"resourceTitle": "Epoch AI's analysis"
},
{
"text": "Claude Opus 4.5",
"url": "https://www.anthropic.com/news/claude-opus-4-5",
"resourceId": "57f01cae307e1cb1"
},
{
"text": "ARC Prize 2025 Results",
"url": "https://arcprize.org/blog/arc-prize-2025-results-analysis",
"resourceId": "f369a16dd38155b8",
"resourceTitle": "ARC Prize 2024-2025 results"
},
{
"text": "2025 AI Index Report",
"url": "https://hai.stanford.edu/ai-index/2025-ai-index-report/technical-performance",
"resourceId": "1a26f870e37dcc68",
"resourceTitle": "Technical Performance - 2025 AI Index Report"
}
],
"unconvertedLinkCount": 14,
"convertedLinkCount": 41,
"backlinkCount": 6,
"hallucinationRisk": {
"level": "medium",
"score": 55,
"factors": [
"no-citations"
]
},
"entityType": "capability",
"redundancy": {
"maxSimilarity": 23,
"similarPages": [
{
"id": "scalable-oversight",
"title": "Scalable Oversight",
"path": "/knowledge-base/responses/scalable-oversight/",
"similarity": 23
},
{
"id": "language-models",
"title": "Large Language Models",
"path": "/knowledge-base/capabilities/language-models/",
"similarity": 22
},
{
"id": "agentic-ai",
"title": "Agentic AI",
"path": "/knowledge-base/capabilities/agentic-ai/",
"similarity": 21
},
{
"id": "scientific-research",
"title": "Scientific Research Capabilities",
"path": "/knowledge-base/capabilities/scientific-research/",
"similarity": 21
},
{
"id": "self-improvement",
"title": "Self-Improvement and Recursive Enhancement",
"path": "/knowledge-base/capabilities/self-improvement/",
"similarity": 21
}
]
},
"changeHistory": [
{
"date": "2026-02-18",
"branch": "claude/fix-issue-240-N5irU",
"title": "Surface tacticalValue in /wiki table and score 53 pages",
"summary": "Added `tacticalValue` to `ExploreItem` interface, `getExploreItems()` mappings, the `/wiki` explore table (new sortable \"Tact.\" column), and the card view sort dropdown. Scored 49 new pages with tactical values (4 were already scored), bringing total to 53.",
"model": "sonnet-4",
"duration": "~30min"
}
],
"coverage": {
"passing": 8,
"total": 13,
"targets": {
"tables": 20,
"diagrams": 2,
"internalLinks": 39,
"externalLinks": 25,
"footnotes": 15,
"references": 15
},
"actuals": {
"tables": 10,
"diagrams": 1,
"internalLinks": 44,
"externalLinks": 57,
"footnotes": 0,
"references": 21,
"quotesWithQuotes": 0,
"quotesTotal": 0,
"accuracyChecked": 0,
"accuracyTotal": 0
},
"items": {
"llmSummary": "green",
"schedule": "green",
"entity": "green",
"editHistory": "green",
"overview": "green",
"tables": "amber",
"diagrams": "amber",
"internalLinks": "green",
"externalLinks": "green",
"footnotes": "red",
"references": "green",
"quotes": "red",
"accuracy": "red"
},
"editHistoryCount": 1,
"ratingsString": "N:4.2 R:6.8 A:5.5 C:7.5"
},
"readerRank": 11,
"researchRank": 161,
"recommendedScore": 197.86
}External Links
No external links
Backlinks (6)
| id | title | type | relationship |
|---|---|---|---|
| language-models | Large Language Models | capability | — |
| __index__/knowledge-base/capabilities | AI Capabilities | concept | — |
| agi-timeline-debate | When Will AGI Arrive? | crux | — |
| agi-timeline | AGI Timeline | concept | — |
| yoshua-bengio | Yoshua Bengio | person | — |
| alignment | AI Alignment | approach | — |