AI Evaluation

evaluationapproachPath: /knowledge-base/responses/evaluation/

E447Entity ID (EID)

← Back to page14 backlinksQuality: 72Updated: 2026-01-28

Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time

{
  "id": "evaluation",
  "wikiId": "E447",
  "path": "/knowledge-base/responses/evaluation/",
  "filePath": "knowledge-base/responses/evaluation.mdx",
  "title": "AI Evaluation",
  "quality": 72,
  "readerImportance": 78.5,
  "researchImportance": 84,
  "tacticalValue": 75,
  "contentFormat": "article",
  "causalLevel": null,
  "lastUpdated": "2026-01-28",
  "dateCreated": "2026-02-15",
  "summary": "Comprehensive overview of AI evaluation methods spanning dangerous capability assessment, safety properties, and deception detection, with categorized frameworks from industry (Anthropic Constitutional AI, OpenAI Model Spec) and government institutes (UK/US AISI). Identifies critical gaps in evaluation gaming, novel capability coverage, and scalability constraints while noting maturity varies from prototype (bioweapons) to production (Constitutional AI).",
  "description": "Methods and frameworks for evaluating AI system safety, capabilities, and alignment properties before deployment, including dangerous capability detection, robustness testing, and deceptive behavior assessment.",
  "ratings": {
    "novelty": 5,
    "rigor": 6.5,
    "completeness": 7,
    "actionability": 7
  },
  "category": "responses",
  "subcategory": "alignment-evaluation",
  "clusters": [
    "ai-safety",
    "governance"
  ],
  "metrics": {
    "wordCount": 1671,
    "tableCount": 11,
    "diagramCount": 0,
    "internalLinks": 66,
    "externalLinks": 24,
    "footnoteCount": 0,
    "bulletRatio": 0.33,
    "sectionCount": 28,
    "hasOverview": true,
    "structuralScore": 13
  },
  "suggestedQuality": 87,
  "updateFrequency": 21,
  "evergreen": true,
  "wordCount": 1671,
  "unconvertedLinks": [
    {
      "text": "en.wikipedia.org",
      "url": "https://en.wikipedia.org/wiki/Artificial_intelligence",
      "resourceId": "aef977fc2141d0a3",
      "resourceTitle": "Artificial Intelligence - Wikipedia"
    },
    {
      "text": "METR Evals",
      "url": "https://metr.org/",
      "resourceId": "45370a5153534152",
      "resourceTitle": "METR: Model Evaluation and Threat Research"
    },
    {
      "text": "RSP Evaluations",
      "url": "https://www.anthropic.com/rsp-updates",
      "resourceId": "c6766d463560b923",
      "resourceTitle": "Anthropic pioneered the Responsible Scaling Policy"
    },
    {
      "text": "Scheming Evals",
      "url": "https://www.apolloresearch.ai/research/",
      "resourceId": "560dff85b3305858",
      "resourceTitle": "Apollo Research — Research Overview"
    },
    {
      "text": "NIST AI RMF",
      "url": "https://www.nist.gov/itl/ai-risk-management-framework",
      "resourceId": "54dbc15413425997",
      "resourceTitle": "NIST AI Risk Management Framework"
    },
    {
      "text": "Frontier AI Trends Report",
      "url": "https://www.aisi.gov.uk/frontier-ai-trends-report",
      "resourceId": "7042c7f8de04ccb1",
      "resourceTitle": "AISI Frontier AI Trends"
    },
    {
      "text": "5x more likely",
      "url": "https://www.aisi.gov.uk/blog/5-key-findings-from-our-first-frontier-ai-trends-report",
      "resourceId": "8a9de448c7130623",
      "resourceTitle": "nearly 5x more likely"
    },
    {
      "text": "Apollo Research",
      "url": "https://www.apolloresearch.ai/",
      "resourceId": "329d8c2e2532be3d",
      "resourceTitle": "Apollo Research - AI Safety Evaluation Organization"
    },
    {
      "text": "anti-scheming training method",
      "url": "https://openai.com/index/detecting-and-reducing-scheming-in-ai-models/",
      "resourceId": "b3f335edccfc5333",
      "resourceTitle": "OpenAI Preparedness Framework"
    },
    {
      "text": "universal jailbreaks",
      "url": "https://www.aisi.gov.uk/blog/5-key-findings-from-our-first-frontier-ai-trends-report",
      "resourceId": "8a9de448c7130623",
      "resourceTitle": "nearly 5x more likely"
    },
    {
      "text": "NIST Cybersecurity Framework Profile for AI",
      "url": "https://www.nist.gov/news-events/news/2025/12/draft-nist-guidelines-rethink-cybersecurity-ai-era",
      "resourceId": "579ec2c3e039a7a6",
      "resourceTitle": "NIST: Draft Cybersecurity Framework for AI"
    },
    {
      "text": "UK AI Security Institute Frontier AI Trends Report",
      "url": "https://www.aisi.gov.uk/frontier-ai-trends-report",
      "resourceId": "7042c7f8de04ccb1",
      "resourceTitle": "AISI Frontier AI Trends"
    },
    {
      "text": "Anthropic RSP 2.2",
      "url": "https://www.anthropic.com/responsible-scaling-policy",
      "resourceId": "afe1e125f3ba3f14",
      "resourceTitle": "Responsible Scaling Policy"
    },
    {
      "text": "OpenAI-Apollo anti-scheming partnership",
      "url": "https://openai.com/index/detecting-and-reducing-scheming-in-ai-models/",
      "resourceId": "b3f335edccfc5333",
      "resourceTitle": "OpenAI Preparedness Framework"
    }
  ],
  "unconvertedLinkCount": 14,
  "convertedLinkCount": 33,
  "backlinkCount": 14,
  "hallucinationRisk": {
    "level": "medium",
    "score": 45,
    "factors": [
      "no-citations",
      "conceptual-content"
    ]
  },
  "entityType": "approach",
  "redundancy": {
    "maxSimilarity": 17,
    "similarPages": [
      {
        "id": "dangerous-cap-evals",
        "title": "Dangerous Capability Evaluations",
        "path": "/knowledge-base/responses/dangerous-cap-evals/",
        "similarity": 17
      },
      {
        "id": "red-teaming",
        "title": "Red Teaming",
        "path": "/knowledge-base/responses/red-teaming/",
        "similarity": 17
      },
      {
        "id": "power-seeking-conditions",
        "title": "Power-Seeking Emergence Conditions Model",
        "path": "/knowledge-base/models/power-seeking-conditions/",
        "similarity": 16
      },
      {
        "id": "model-auditing",
        "title": "Third-Party Model Auditing",
        "path": "/knowledge-base/responses/model-auditing/",
        "similarity": 16
      },
      {
        "id": "capability-threshold-model",
        "title": "Capability Threshold Model",
        "path": "/knowledge-base/models/capability-threshold-model/",
        "similarity": 15
      }
    ]
  },
  "coverage": {
    "passing": 8,
    "total": 13,
    "targets": {
      "tables": 7,
      "diagrams": 1,
      "internalLinks": 13,
      "externalLinks": 8,
      "footnotes": 5,
      "references": 5
    },
    "actuals": {
      "tables": 11,
      "diagrams": 0,
      "internalLinks": 66,
      "externalLinks": 24,
      "footnotes": 0,
      "references": 31,
      "quotesWithQuotes": 0,
      "quotesTotal": 0,
      "accuracyChecked": 0,
      "accuracyTotal": 0
    },
    "items": {
      "summary": "green",
      "schedule": "green",
      "entity": "green",
      "editHistory": "red",
      "overview": "green",
      "tables": "green",
      "diagrams": "red",
      "internalLinks": "green",
      "externalLinks": "green",
      "footnotes": "red",
      "references": "green",
      "quotes": "red",
      "accuracy": "red"
    },
    "ratingsString": "N:5 R:6.5 A:7 C:7"
  },
  "readerRank": 96,
  "researchRank": 64,
  "recommendedScore": 194.53
}

External Links

{
  "lesswrong": "https://www.lesswrong.com/tag/ai-evaluations",
  "eaForum": "https://forum.effectivealtruism.org/topics/ai-evaluations-and-standards"
}

Backlinks (14)

id	title	type	relationship
agi-development	AGI Development	concept	—
intervention-effectiveness-matrix	Intervention Effectiveness Matrix	analysis	—
planning-for-frontier-lab-scaling	Planning for Frontier Lab Scaling	analysis	—
racing-dynamics-impact	Racing Dynamics Impact Model	analysis	—
risk-activation-timeline	Risk Activation Timeline Model	analysis	—
nist-ai	NIST and AI Safety	organization	—
uk-aisi	UK AI Safety Institute	organization	—
holden-karnofsky	Holden Karnofsky	person	—
paul-christiano	Paul Christiano	person	—
constitutional-ai	Constitutional AI	approach	—
corporate	Corporate AI Safety Responses	approach	—
evals-governance	Evals-Based Deployment Gates	approach	—
international-summits	International AI Safety Summits	event	—
red-teaming	Red Teaming	research-area	—