Tool Use and Computer Use
tool-usecapabilityPath: /knowledge-base/capabilities/tool-use/
E356Entity ID (EID)
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
"id": "tool-use",
"numericId": null,
"path": "/knowledge-base/capabilities/tool-use/",
"filePath": "knowledge-base/capabilities/tool-use.mdx",
"title": "Tool Use and Computer Use",
"quality": 67,
"readerImportance": 91.5,
"researchImportance": 75.5,
"tacticalValue": 72,
"contentFormat": "article",
"tractability": null,
"neglectedness": null,
"uncertainty": null,
"causalLevel": null,
"lastUpdated": "2026-03-13",
"dateCreated": "2026-02-15",
"llmSummary": "Tool use capabilities achieved superhuman computer control in late 2025 (OSAgent: 76.26% vs 72% human baseline) and near-human coding (Claude Opus 4.5: 80.9% SWE-bench Verified), but prompt injection remains the #1 AI vulnerability affecting 73% of deployments with OpenAI admitting it 'may never be fully solved.' Only 34.7% of organizations have deployed defenses while 97M+ monthly MCP SDK downloads indicate rapid proliferation.",
"description": "AI systems' ability to interact with external tools and control computers represents a critical capability transition. As of late 2025, OSAgent achieved 76.26% on OSWorld (superhuman vs 72% human baseline), while SWE-bench performance reached 80.9% with Claude Opus 4.5. OpenAI acknowledges prompt injection 'may never be fully solved,' with OWASP ranking it #1 vulnerability in 73% of deployments.",
"ratings": {
"novelty": 4.5,
"rigor": 7.2,
"actionability": 6.8,
"completeness": 7.5
},
"category": "capabilities",
"subcategory": "agentic",
"clusters": [
"ai-safety",
"cyber"
],
"metrics": {
"wordCount": 3802,
"tableCount": 10,
"diagramCount": 1,
"internalLinks": 30,
"externalLinks": 36,
"footnoteCount": 0,
"bulletRatio": 0.12,
"sectionCount": 18,
"hasOverview": true,
"structuralScore": 15
},
"suggestedQuality": 100,
"updateFrequency": 21,
"evergreen": true,
"wordCount": 3802,
"unconvertedLinks": [
{
"text": "SWE-bench Pro",
"url": "https://scale.com/leaderboard/swe_bench_pro_public",
"resourceId": "9dbe484d48b6787a",
"resourceTitle": "SWE-bench Pro Leaderboard - Scale AI"
},
{
"text": "SWE-bench Pro",
"url": "https://scale.com/leaderboard/swe_bench_pro_public",
"resourceId": "9dbe484d48b6787a",
"resourceTitle": "SWE-bench Pro Leaderboard - Scale AI"
},
{
"text": "Claude Opus 4.5 Announcement",
"url": "https://www.anthropic.com/news/claude-opus-4-5",
"resourceId": "57f01cae307e1cb1"
}
],
"unconvertedLinkCount": 3,
"convertedLinkCount": 28,
"backlinkCount": 6,
"hallucinationRisk": {
"level": "medium",
"score": 40,
"factors": [
"no-citations",
"high-rigor"
]
},
"entityType": "capability",
"redundancy": {
"maxSimilarity": 20,
"similarPages": [
{
"id": "agentic-ai",
"title": "Agentic AI",
"path": "/knowledge-base/capabilities/agentic-ai/",
"similarity": 20
},
{
"id": "reasoning",
"title": "Reasoning and Planning",
"path": "/knowledge-base/capabilities/reasoning/",
"similarity": 19
},
{
"id": "self-improvement",
"title": "Self-Improvement and Recursive Enhancement",
"path": "/knowledge-base/capabilities/self-improvement/",
"similarity": 19
},
{
"id": "large-language-models",
"title": "Large Language Models",
"path": "/knowledge-base/capabilities/large-language-models/",
"similarity": 18
},
{
"id": "metr",
"title": "METR",
"path": "/knowledge-base/organizations/metr/",
"similarity": 18
}
]
},
"changeHistory": [
{
"date": "2026-02-18",
"branch": "claude/fix-issue-240-N5irU",
"title": "Surface tacticalValue in /wiki table and score 53 pages",
"summary": "Added `tacticalValue` to `ExploreItem` interface, `getExploreItems()` mappings, the `/wiki` explore table (new sortable \"Tact.\" column), and the card view sort dropdown. Scored 49 new pages with tactical values (4 were already scored), bringing total to 53.",
"model": "sonnet-4",
"duration": "~30min"
}
],
"coverage": {
"passing": 8,
"total": 13,
"targets": {
"tables": 15,
"diagrams": 2,
"internalLinks": 30,
"externalLinks": 19,
"footnotes": 11,
"references": 11
},
"actuals": {
"tables": 10,
"diagrams": 1,
"internalLinks": 30,
"externalLinks": 36,
"footnotes": 0,
"references": 18,
"quotesWithQuotes": 0,
"quotesTotal": 0,
"accuracyChecked": 0,
"accuracyTotal": 0
},
"items": {
"llmSummary": "green",
"schedule": "green",
"entity": "green",
"editHistory": "green",
"overview": "green",
"tables": "amber",
"diagrams": "amber",
"internalLinks": "green",
"externalLinks": "green",
"footnotes": "red",
"references": "green",
"quotes": "red",
"accuracy": "red"
},
"editHistoryCount": 1,
"ratingsString": "N:4.5 R:7.2 A:6.8 C:7.5"
},
"readerRank": 13,
"researchRank": 113,
"recommendedScore": 201.61
}External Links
No external links
Backlinks (6)
| id | title | type | relationship |
|---|---|---|---|
| coding | Autonomous Coding | capability | — |
| __index__/knowledge-base/capabilities | AI Capabilities | concept | — |
| language-models | Large Language Models | capability | — |
| large-language-models | Large Language Models | concept | — |
| agi-timeline | AGI Timeline | concept | — |
| corrigibility-failure-pathways | Corrigibility Failure Pathways | analysis | — |