ForecastBench
forecastbenchprojectPath: /knowledge-base/responses/forecastbench/
E144Entity ID (EID)
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
"id": "forecastbench",
"numericId": null,
"path": "/knowledge-base/responses/forecastbench/",
"filePath": "knowledge-base/responses/forecastbench.mdx",
"title": "ForecastBench",
"quality": 53,
"readerImportance": 20,
"researchImportance": 32.5,
"tacticalValue": null,
"contentFormat": "article",
"tractability": null,
"neglectedness": null,
"uncertainty": null,
"causalLevel": null,
"lastUpdated": "2026-03-13",
"dateCreated": "2026-02-15",
"llmSummary": "ForecastBench is a dynamic, contamination-free benchmark with 1,000 continuously-updated questions comparing LLM forecasting to superforecasters. GPT-4.5 achieves 0.101 Brier score vs 0.081 for superforecasters; linear extrapolation projects LLMs will match human experts by November 2026 (95% CI: Dec 2025 – Jan 2028).",
"description": "A dynamic, contamination-free benchmark for evaluating large language model forecasting capabilities, published at ICLR 2025. With 1,000 continuously-updated questions about future events, ForecastBench compares LLMs to superforecasters and finds GPT-4.5 (Feb 2025) achieves 0.101 difficulty-adjusted Brier score vs 0.081 for superforecasters—linear extrapolation suggests LLMs will match human superforecasters by November 2026 (95% CI: December 2025 – January 2028).",
"ratings": {
"novelty": 5,
"rigor": 6.5,
"actionability": 4.5,
"completeness": 7
},
"category": "responses",
"subcategory": "epistemic-platforms",
"clusters": [
"epistemics",
"ai-safety"
],
"metrics": {
"wordCount": 1899,
"tableCount": 21,
"diagramCount": 1,
"internalLinks": 12,
"externalLinks": 11,
"footnoteCount": 0,
"bulletRatio": 0.05,
"sectionCount": 31,
"hasOverview": true,
"structuralScore": 15
},
"suggestedQuality": 100,
"updateFrequency": 45,
"evergreen": true,
"wordCount": 1899,
"unconvertedLinks": [
{
"text": "FRI Project Page",
"url": "https://forecastingresearch.org/",
"resourceId": "46c32aeaf3c3caac",
"resourceTitle": "Forecasting Research Institute"
}
],
"unconvertedLinkCount": 1,
"convertedLinkCount": 0,
"backlinkCount": 6,
"hallucinationRisk": {
"level": "medium",
"score": 55,
"factors": [
"no-citations"
]
},
"entityType": "project",
"redundancy": {
"maxSimilarity": 16,
"similarPages": [
{
"id": "ai-forecasting-benchmark",
"title": "AI Forecasting Benchmark Tournament",
"path": "/knowledge-base/responses/ai-forecasting-benchmark/",
"similarity": 16
},
{
"id": "fri",
"title": "Forecasting Research Institute",
"path": "/knowledge-base/organizations/fri/",
"similarity": 12
},
{
"id": "metaforecast",
"title": "Metaforecast",
"path": "/knowledge-base/responses/metaforecast/",
"similarity": 11
},
{
"id": "squiggleai",
"title": "SquiggleAI",
"path": "/knowledge-base/responses/squiggleai/",
"similarity": 11
},
{
"id": "xpt",
"title": "XPT (Existential Risk Persuasion Tournament)",
"path": "/knowledge-base/responses/xpt/",
"similarity": 11
}
]
},
"coverage": {
"passing": 7,
"total": 13,
"targets": {
"tables": 8,
"diagrams": 1,
"internalLinks": 15,
"externalLinks": 9,
"footnotes": 6,
"references": 6
},
"actuals": {
"tables": 21,
"diagrams": 1,
"internalLinks": 12,
"externalLinks": 11,
"footnotes": 0,
"references": 1,
"quotesWithQuotes": 0,
"quotesTotal": 0,
"accuracyChecked": 0,
"accuracyTotal": 0
},
"items": {
"llmSummary": "green",
"schedule": "green",
"entity": "green",
"editHistory": "red",
"overview": "green",
"tables": "green",
"diagrams": "green",
"internalLinks": "amber",
"externalLinks": "green",
"footnotes": "red",
"references": "amber",
"quotes": "red",
"accuracy": "red"
},
"ratingsString": "N:5 R:6.5 A:4.5 C:7"
},
"readerRank": 521,
"researchRank": 398,
"recommendedScore": 137.61
}External Links
No external links
Backlinks (6)
| id | title | type | relationship |
|---|---|---|---|
| bridgewater-aia-labs | Bridgewater AIA Labs | organization | — |
| epistemic-orgs-overview | Epistemic & Forecasting Organizations (Overview) | concept | — |
| fri | Forecasting Research Institute | organization | — |
| ai-forecasting-benchmark | AI Forecasting Benchmark Tournament | project | — |
| epistemic-tools-tools-overview | Tools & Platforms (Overview) | concept | — |
| xpt | XPT (Existential Risk Persuasion Tournament) | project | — |