Longterm Wiki

Tool Use and Computer Use

tool-usecapabilityPath: /knowledge-base/capabilities/tool-use/
E356Entity ID (EID)
← Back to page6 backlinksQuality: 67Updated: 2026-03-13
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
  "id": "tool-use",
  "numericId": null,
  "path": "/knowledge-base/capabilities/tool-use/",
  "filePath": "knowledge-base/capabilities/tool-use.mdx",
  "title": "Tool Use and Computer Use",
  "quality": 67,
  "readerImportance": 91.5,
  "researchImportance": 75.5,
  "tacticalValue": 72,
  "contentFormat": "article",
  "tractability": null,
  "neglectedness": null,
  "uncertainty": null,
  "causalLevel": null,
  "lastUpdated": "2026-03-13",
  "dateCreated": "2026-02-15",
  "llmSummary": "Tool use capabilities achieved superhuman computer control in late 2025 (OSAgent: 76.26% vs 72% human baseline) and near-human coding (Claude Opus 4.5: 80.9% SWE-bench Verified), but prompt injection remains the #1 AI vulnerability affecting 73% of deployments with OpenAI admitting it 'may never be fully solved.' Only 34.7% of organizations have deployed defenses while 97M+ monthly MCP SDK downloads indicate rapid proliferation.",
  "description": "AI systems' ability to interact with external tools and control computers represents a critical capability transition. As of late 2025, OSAgent achieved 76.26% on OSWorld (superhuman vs 72% human baseline), while SWE-bench performance reached 80.9% with Claude Opus 4.5. OpenAI acknowledges prompt injection 'may never be fully solved,' with OWASP ranking it #1 vulnerability in 73% of deployments.",
  "ratings": {
    "novelty": 4.5,
    "rigor": 7.2,
    "actionability": 6.8,
    "completeness": 7.5
  },
  "category": "capabilities",
  "subcategory": "agentic",
  "clusters": [
    "ai-safety",
    "cyber"
  ],
  "metrics": {
    "wordCount": 3802,
    "tableCount": 10,
    "diagramCount": 1,
    "internalLinks": 30,
    "externalLinks": 36,
    "footnoteCount": 0,
    "bulletRatio": 0.12,
    "sectionCount": 18,
    "hasOverview": true,
    "structuralScore": 15
  },
  "suggestedQuality": 100,
  "updateFrequency": 21,
  "evergreen": true,
  "wordCount": 3802,
  "unconvertedLinks": [
    {
      "text": "SWE-bench Pro",
      "url": "https://scale.com/leaderboard/swe_bench_pro_public",
      "resourceId": "9dbe484d48b6787a",
      "resourceTitle": "SWE-bench Pro Leaderboard - Scale AI"
    },
    {
      "text": "SWE-bench Pro",
      "url": "https://scale.com/leaderboard/swe_bench_pro_public",
      "resourceId": "9dbe484d48b6787a",
      "resourceTitle": "SWE-bench Pro Leaderboard - Scale AI"
    },
    {
      "text": "Claude Opus 4.5 Announcement",
      "url": "https://www.anthropic.com/news/claude-opus-4-5",
      "resourceId": "57f01cae307e1cb1"
    }
  ],
  "unconvertedLinkCount": 3,
  "convertedLinkCount": 28,
  "backlinkCount": 6,
  "hallucinationRisk": {
    "level": "medium",
    "score": 40,
    "factors": [
      "no-citations",
      "high-rigor"
    ]
  },
  "entityType": "capability",
  "redundancy": {
    "maxSimilarity": 20,
    "similarPages": [
      {
        "id": "agentic-ai",
        "title": "Agentic AI",
        "path": "/knowledge-base/capabilities/agentic-ai/",
        "similarity": 20
      },
      {
        "id": "reasoning",
        "title": "Reasoning and Planning",
        "path": "/knowledge-base/capabilities/reasoning/",
        "similarity": 19
      },
      {
        "id": "self-improvement",
        "title": "Self-Improvement and Recursive Enhancement",
        "path": "/knowledge-base/capabilities/self-improvement/",
        "similarity": 19
      },
      {
        "id": "large-language-models",
        "title": "Large Language Models",
        "path": "/knowledge-base/capabilities/large-language-models/",
        "similarity": 18
      },
      {
        "id": "metr",
        "title": "METR",
        "path": "/knowledge-base/organizations/metr/",
        "similarity": 18
      }
    ]
  },
  "changeHistory": [
    {
      "date": "2026-02-18",
      "branch": "claude/fix-issue-240-N5irU",
      "title": "Surface tacticalValue in /wiki table and score 53 pages",
      "summary": "Added `tacticalValue` to `ExploreItem` interface, `getExploreItems()` mappings, the `/wiki` explore table (new sortable \"Tact.\" column), and the card view sort dropdown. Scored 49 new pages with tactical values (4 were already scored), bringing total to 53.",
      "model": "sonnet-4",
      "duration": "~30min"
    }
  ],
  "coverage": {
    "passing": 8,
    "total": 13,
    "targets": {
      "tables": 15,
      "diagrams": 2,
      "internalLinks": 30,
      "externalLinks": 19,
      "footnotes": 11,
      "references": 11
    },
    "actuals": {
      "tables": 10,
      "diagrams": 1,
      "internalLinks": 30,
      "externalLinks": 36,
      "footnotes": 0,
      "references": 18,
      "quotesWithQuotes": 0,
      "quotesTotal": 0,
      "accuracyChecked": 0,
      "accuracyTotal": 0
    },
    "items": {
      "llmSummary": "green",
      "schedule": "green",
      "entity": "green",
      "editHistory": "green",
      "overview": "green",
      "tables": "amber",
      "diagrams": "amber",
      "internalLinks": "green",
      "externalLinks": "green",
      "footnotes": "red",
      "references": "green",
      "quotes": "red",
      "accuracy": "red"
    },
    "editHistoryCount": 1,
    "ratingsString": "N:4.5 R:7.2 A:6.8 C:7.5"
  },
  "readerRank": 13,
  "researchRank": 113,
  "recommendedScore": 201.61
}
External Links

No external links

Backlinks (6)
idtitletyperelationship
codingAutonomous Codingcapability
__index__/knowledge-base/capabilitiesAI Capabilitiesconcept
language-modelsLarge Language Modelscapability
large-language-modelsLarge Language Modelsconcept
agi-timelineAGI Timelineconcept
corrigibility-failure-pathwaysCorrigibility Failure Pathwaysanalysis
Longterm Wiki