Longterm Wiki

Reasoning and Planning

reasoningcapabilityPath: /knowledge-base/capabilities/reasoning/
E246Entity ID (EID)
← Back to page6 backlinksQuality: 65Updated: 2026-03-13
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
  "id": "reasoning",
  "numericId": null,
  "path": "/knowledge-base/capabilities/reasoning/",
  "filePath": "knowledge-base/capabilities/reasoning.mdx",
  "title": "Reasoning and Planning",
  "quality": 65,
  "readerImportance": 92,
  "researchImportance": 68.5,
  "tacticalValue": 82,
  "contentFormat": "article",
  "tractability": null,
  "neglectedness": null,
  "uncertainty": null,
  "causalLevel": null,
  "lastUpdated": "2026-03-13",
  "dateCreated": "2026-02-15",
  "llmSummary": "Comprehensive survey tracking reasoning model progress from 2022 CoT to late 2025, documenting dramatic capability gains (GPT-5.2: 100% AIME, 52.9% ARC-AGI-2, 40.3% FrontierMath) alongside critical safety findings that reasoning faithfulness is fragile (19-41% hint acknowledgment, 0.04-13% unfaithful reasoning in production). Multi-agent orchestration shows 1,445% inquiry surge with 60-80% coordination success, while cost efficiency improved 390x in one year.",
  "description": "Advanced multi-step reasoning capabilities that enable AI systems to solve complex problems through systematic thinking. By late 2025, GPT-5.2 achieves 100% on AIME 2025 without tools and 52.9% on ARC-AGI-2, while Claude Opus 4.5 reaches 80.9% on SWE-bench. ARC-AGI-2 still reveals a substantial gap: top models score approximately 54% vs. 60% human average on harder abstract reasoning. Chain-of-thought faithfulness research shows models acknowledge their reasoning sources only 19-41% of the time, creating both interpretability opportunities and deception risks.",
  "ratings": {
    "novelty": 4.2,
    "rigor": 6.8,
    "actionability": 5.5,
    "completeness": 7.5
  },
  "category": "capabilities",
  "subcategory": "core",
  "clusters": [
    "ai-safety"
  ],
  "metrics": {
    "wordCount": 4912,
    "tableCount": 10,
    "diagramCount": 1,
    "internalLinks": 44,
    "externalLinks": 57,
    "footnoteCount": 0,
    "bulletRatio": 0.16,
    "sectionCount": 32,
    "hasOverview": true,
    "structuralScore": 15
  },
  "suggestedQuality": 100,
  "updateFrequency": 21,
  "evergreen": true,
  "wordCount": 4912,
  "unconvertedLinks": [
    {
      "text": "ARC Prize Leaderboard",
      "url": "https://arcprize.org/leaderboard",
      "resourceId": "a27f2ad202a2b5a7",
      "resourceTitle": "ARC-AGI"
    },
    {
      "text": "Anthropic Opus 4.5",
      "url": "https://www.anthropic.com/news/claude-opus-4-5",
      "resourceId": "57f01cae307e1cb1"
    },
    {
      "text": "ARC Prize 2025 Results",
      "url": "https://arcprize.org/blog/arc-prize-2025-results-analysis",
      "resourceId": "f369a16dd38155b8",
      "resourceTitle": "ARC Prize 2024-2025 results"
    },
    {
      "text": "Anthropic Claude 4.5",
      "url": "https://www.anthropic.com/news/claude-opus-4-5",
      "resourceId": "57f01cae307e1cb1"
    },
    {
      "text": "Claude Opus 4.5",
      "url": "https://www.anthropic.com/news/claude-opus-4-5",
      "resourceId": "57f01cae307e1cb1"
    },
    {
      "text": "ARC Prize Leaderboard",
      "url": "https://arcprize.org/leaderboard",
      "resourceId": "a27f2ad202a2b5a7",
      "resourceTitle": "ARC-AGI"
    },
    {
      "text": "Epoch AI's Epoch Capabilities Index (ECI)",
      "url": "https://epoch.ai/data-insights/ai-capabilities-progress-has-sped-up",
      "resourceId": "663417bdb09208a4",
      "resourceTitle": "Epoch AI's analysis"
    },
    {
      "text": "Claude 4 family",
      "url": "https://www.anthropic.com/news/claude-4",
      "resourceId": "4ec03078d3169fe5"
    },
    {
      "text": "Claude Opus 4.5",
      "url": "https://www.anthropic.com/news/claude-opus-4-5",
      "resourceId": "57f01cae307e1cb1"
    },
    {
      "text": "Claude 4",
      "url": "https://www.anthropic.com/news/claude-4",
      "resourceId": "4ec03078d3169fe5"
    },
    {
      "text": "AI Capabilities Progress Tracker",
      "url": "https://epoch.ai/data-insights/ai-capabilities-progress-has-sped-up",
      "resourceId": "663417bdb09208a4",
      "resourceTitle": "Epoch AI's analysis"
    },
    {
      "text": "Claude Opus 4.5",
      "url": "https://www.anthropic.com/news/claude-opus-4-5",
      "resourceId": "57f01cae307e1cb1"
    },
    {
      "text": "ARC Prize 2025 Results",
      "url": "https://arcprize.org/blog/arc-prize-2025-results-analysis",
      "resourceId": "f369a16dd38155b8",
      "resourceTitle": "ARC Prize 2024-2025 results"
    },
    {
      "text": "2025 AI Index Report",
      "url": "https://hai.stanford.edu/ai-index/2025-ai-index-report/technical-performance",
      "resourceId": "1a26f870e37dcc68",
      "resourceTitle": "Technical Performance - 2025 AI Index Report"
    }
  ],
  "unconvertedLinkCount": 14,
  "convertedLinkCount": 41,
  "backlinkCount": 6,
  "hallucinationRisk": {
    "level": "medium",
    "score": 55,
    "factors": [
      "no-citations"
    ]
  },
  "entityType": "capability",
  "redundancy": {
    "maxSimilarity": 23,
    "similarPages": [
      {
        "id": "scalable-oversight",
        "title": "Scalable Oversight",
        "path": "/knowledge-base/responses/scalable-oversight/",
        "similarity": 23
      },
      {
        "id": "language-models",
        "title": "Large Language Models",
        "path": "/knowledge-base/capabilities/language-models/",
        "similarity": 22
      },
      {
        "id": "agentic-ai",
        "title": "Agentic AI",
        "path": "/knowledge-base/capabilities/agentic-ai/",
        "similarity": 21
      },
      {
        "id": "scientific-research",
        "title": "Scientific Research Capabilities",
        "path": "/knowledge-base/capabilities/scientific-research/",
        "similarity": 21
      },
      {
        "id": "self-improvement",
        "title": "Self-Improvement and Recursive Enhancement",
        "path": "/knowledge-base/capabilities/self-improvement/",
        "similarity": 21
      }
    ]
  },
  "changeHistory": [
    {
      "date": "2026-02-18",
      "branch": "claude/fix-issue-240-N5irU",
      "title": "Surface tacticalValue in /wiki table and score 53 pages",
      "summary": "Added `tacticalValue` to `ExploreItem` interface, `getExploreItems()` mappings, the `/wiki` explore table (new sortable \"Tact.\" column), and the card view sort dropdown. Scored 49 new pages with tactical values (4 were already scored), bringing total to 53.",
      "model": "sonnet-4",
      "duration": "~30min"
    }
  ],
  "coverage": {
    "passing": 8,
    "total": 13,
    "targets": {
      "tables": 20,
      "diagrams": 2,
      "internalLinks": 39,
      "externalLinks": 25,
      "footnotes": 15,
      "references": 15
    },
    "actuals": {
      "tables": 10,
      "diagrams": 1,
      "internalLinks": 44,
      "externalLinks": 57,
      "footnotes": 0,
      "references": 21,
      "quotesWithQuotes": 0,
      "quotesTotal": 0,
      "accuracyChecked": 0,
      "accuracyTotal": 0
    },
    "items": {
      "llmSummary": "green",
      "schedule": "green",
      "entity": "green",
      "editHistory": "green",
      "overview": "green",
      "tables": "amber",
      "diagrams": "amber",
      "internalLinks": "green",
      "externalLinks": "green",
      "footnotes": "red",
      "references": "green",
      "quotes": "red",
      "accuracy": "red"
    },
    "editHistoryCount": 1,
    "ratingsString": "N:4.2 R:6.8 A:5.5 C:7.5"
  },
  "readerRank": 11,
  "researchRank": 161,
  "recommendedScore": 197.86
}
External Links

No external links

Backlinks (6)
idtitletyperelationship
language-modelsLarge Language Modelscapability
__index__/knowledge-base/capabilitiesAI Capabilitiesconcept
agi-timeline-debateWhen Will AGI Arrive?crux
agi-timelineAGI Timelineconcept
yoshua-bengioYoshua Bengioperson
alignmentAI Alignmentapproach
Longterm Wiki