Longterm Wiki

AI Evaluation

evaluationapproachPath: /knowledge-base/responses/evaluation/
E447Entity ID (EID)
← Back to page14 backlinksQuality: 72Updated: 2026-03-13
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
  "id": "evaluation",
  "numericId": null,
  "path": "/knowledge-base/responses/evaluation/",
  "filePath": "knowledge-base/responses/evaluation.mdx",
  "title": "AI Evaluation",
  "quality": 72,
  "readerImportance": 78.5,
  "researchImportance": 84,
  "tacticalValue": 75,
  "contentFormat": "article",
  "tractability": null,
  "neglectedness": null,
  "uncertainty": null,
  "causalLevel": null,
  "lastUpdated": "2026-03-13",
  "dateCreated": "2026-02-15",
  "llmSummary": "Comprehensive overview of AI evaluation methods spanning dangerous capability assessment, safety properties, and deception detection, with categorized frameworks from industry (Anthropic Constitutional AI, OpenAI Model Spec) and government institutes (UK/US AISI). Identifies critical gaps in evaluation gaming, novel capability coverage, and scalability constraints while noting maturity varies from prototype (bioweapons) to production (Constitutional AI).",
  "description": "Methods and frameworks for evaluating AI system safety, capabilities, and alignment properties before deployment, including dangerous capability detection, robustness testing, and deceptive behavior assessment.",
  "ratings": {
    "novelty": 5,
    "rigor": 6.5,
    "actionability": 7,
    "completeness": 7
  },
  "category": "responses",
  "subcategory": "alignment-evaluation",
  "clusters": [
    "ai-safety",
    "governance"
  ],
  "metrics": {
    "wordCount": 1671,
    "tableCount": 11,
    "diagramCount": 0,
    "internalLinks": 66,
    "externalLinks": 24,
    "footnoteCount": 0,
    "bulletRatio": 0.33,
    "sectionCount": 28,
    "hasOverview": true,
    "structuralScore": 13
  },
  "suggestedQuality": 87,
  "updateFrequency": 21,
  "evergreen": true,
  "wordCount": 1671,
  "unconvertedLinks": [
    {
      "text": "METR Evals",
      "url": "https://metr.org/",
      "resourceId": "45370a5153534152",
      "resourceTitle": "metr.org"
    },
    {
      "text": "RSP Evaluations",
      "url": "https://www.anthropic.com/rsp-updates",
      "resourceId": "c6766d463560b923",
      "resourceTitle": "Anthropic pioneered the Responsible Scaling Policy"
    },
    {
      "text": "Scheming Evals",
      "url": "https://www.apolloresearch.ai/research/",
      "resourceId": "560dff85b3305858",
      "resourceTitle": "Apollo Research"
    },
    {
      "text": "NIST AI RMF",
      "url": "https://www.nist.gov/itl/ai-risk-management-framework",
      "resourceId": "54dbc15413425997",
      "resourceTitle": "NIST AI Risk Management Framework"
    },
    {
      "text": "Frontier AI Trends Report",
      "url": "https://www.aisi.gov.uk/frontier-ai-trends-report",
      "resourceId": "7042c7f8de04ccb1",
      "resourceTitle": "AISI Frontier AI Trends"
    },
    {
      "text": "5x more likely",
      "url": "https://www.aisi.gov.uk/blog/5-key-findings-from-our-first-frontier-ai-trends-report",
      "resourceId": "8a9de448c7130623",
      "resourceTitle": "nearly 5x more likely"
    },
    {
      "text": "Apollo Research",
      "url": "https://www.apolloresearch.ai/",
      "resourceId": "329d8c2e2532be3d",
      "resourceTitle": "Apollo Research"
    },
    {
      "text": "anti-scheming training method",
      "url": "https://openai.com/index/detecting-and-reducing-scheming-in-ai-models/",
      "resourceId": "b3f335edccfc5333",
      "resourceTitle": "OpenAI Preparedness Framework"
    },
    {
      "text": "universal jailbreaks",
      "url": "https://www.aisi.gov.uk/blog/5-key-findings-from-our-first-frontier-ai-trends-report",
      "resourceId": "8a9de448c7130623",
      "resourceTitle": "nearly 5x more likely"
    },
    {
      "text": "NIST Cybersecurity Framework Profile for AI",
      "url": "https://www.nist.gov/news-events/news/2025/12/draft-nist-guidelines-rethink-cybersecurity-ai-era",
      "resourceId": "579ec2c3e039a7a6",
      "resourceTitle": "NIST: Draft Cybersecurity Framework for AI"
    },
    {
      "text": "GPAI",
      "url": "https://gpai.ai/",
      "resourceId": "4c8c69d2914fc04d",
      "resourceTitle": "GPAI"
    },
    {
      "text": "UK AI Security Institute Frontier AI Trends Report",
      "url": "https://www.aisi.gov.uk/frontier-ai-trends-report",
      "resourceId": "7042c7f8de04ccb1",
      "resourceTitle": "AISI Frontier AI Trends"
    },
    {
      "text": "Anthropic RSP 2.2",
      "url": "https://www.anthropic.com/responsible-scaling-policy",
      "resourceId": "afe1e125f3ba3f14"
    },
    {
      "text": "OpenAI-Apollo anti-scheming partnership",
      "url": "https://openai.com/index/detecting-and-reducing-scheming-in-ai-models/",
      "resourceId": "b3f335edccfc5333",
      "resourceTitle": "OpenAI Preparedness Framework"
    }
  ],
  "unconvertedLinkCount": 14,
  "convertedLinkCount": 33,
  "backlinkCount": 14,
  "hallucinationRisk": {
    "level": "medium",
    "score": 45,
    "factors": [
      "no-citations",
      "conceptual-content"
    ]
  },
  "entityType": "approach",
  "redundancy": {
    "maxSimilarity": 17,
    "similarPages": [
      {
        "id": "dangerous-cap-evals",
        "title": "Dangerous Capability Evaluations",
        "path": "/knowledge-base/responses/dangerous-cap-evals/",
        "similarity": 17
      },
      {
        "id": "red-teaming",
        "title": "Red Teaming",
        "path": "/knowledge-base/responses/red-teaming/",
        "similarity": 17
      },
      {
        "id": "power-seeking-conditions",
        "title": "Power-Seeking Emergence Conditions Model",
        "path": "/knowledge-base/models/power-seeking-conditions/",
        "similarity": 16
      },
      {
        "id": "model-auditing",
        "title": "Third-Party Model Auditing",
        "path": "/knowledge-base/responses/model-auditing/",
        "similarity": 16
      },
      {
        "id": "accident-risks",
        "title": "AI Accident Risk Cruxes",
        "path": "/knowledge-base/cruxes/accident-risks/",
        "similarity": 15
      }
    ]
  },
  "coverage": {
    "passing": 8,
    "total": 13,
    "targets": {
      "tables": 7,
      "diagrams": 1,
      "internalLinks": 13,
      "externalLinks": 8,
      "footnotes": 5,
      "references": 5
    },
    "actuals": {
      "tables": 11,
      "diagrams": 0,
      "internalLinks": 66,
      "externalLinks": 24,
      "footnotes": 0,
      "references": 31,
      "quotesWithQuotes": 0,
      "quotesTotal": 0,
      "accuracyChecked": 0,
      "accuracyTotal": 0
    },
    "items": {
      "llmSummary": "green",
      "schedule": "green",
      "entity": "green",
      "editHistory": "red",
      "overview": "green",
      "tables": "green",
      "diagrams": "red",
      "internalLinks": "green",
      "externalLinks": "green",
      "footnotes": "red",
      "references": "green",
      "quotes": "red",
      "accuracy": "red"
    },
    "ratingsString": "N:5 R:6.5 A:7 C:7"
  },
  "readerRank": 97,
  "researchRank": 65,
  "recommendedScore": 204.83
}
External Links
{
  "lesswrong": "https://www.lesswrong.com/tag/ai-evaluations",
  "eaForum": "https://forum.effectivealtruism.org/topics/ai-evaluations-and-standards"
}
Backlinks (14)
idtitletyperelationship
agi-developmentAGI Developmentconcept
intervention-effectiveness-matrixIntervention Effectiveness Matrixanalysis
planning-for-frontier-lab-scalingPlanning for Frontier Lab Scalinganalysis
racing-dynamics-impactRacing Dynamics Impact Modelanalysis
risk-activation-timelineRisk Activation Timeline Modelanalysis
nist-aiNIST and AI Safetyorganization
uk-aisiUK AI Safety Instituteorganization
holden-karnofskyHolden Karnofskyperson
paul-christianoPaul Christianoperson
constitutional-aiConstitutional AIapproach
corporateCorporate AI Safety Responsesapproach
evals-governanceEvals-Based Deployment Gatespolicy
international-summitsInternational AI Safety Summitspolicy
red-teamingRed Teamingapproach
Longterm Wiki