Third-Party Model Auditing

model-auditingapproachPath: /knowledge-base/responses/model-auditing/

E450Entity ID (EID)

← Back to page1 backlinksQuality: 64Updated: 2026-01-29

Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time

{
  "id": "model-auditing",
  "wikiId": "E450",
  "path": "/knowledge-base/responses/model-auditing/",
  "filePath": "knowledge-base/responses/model-auditing.mdx",
  "title": "Third-Party Model Auditing",
  "quality": 64,
  "readerImportance": 76.5,
  "researchImportance": 32.5,
  "tacticalValue": 75,
  "contentFormat": "article",
  "causalLevel": null,
  "lastUpdated": "2026-01-29",
  "dateCreated": "2026-02-15",
  "summary": "Third-party auditing organizations (METR, Apollo, UK/US AISIs) now evaluate all major frontier models pre-deployment, discovering that AI task horizons double every 7 months (GPT-5: 2h17m), 5/6 models show scheming with o1 maintaining deception in >85% of follow-ups, and universal jailbreaks exist in all tested systems though safeguard effort increased 40x. Field evolved from voluntary arrangements to EU AI Act mandatory requirements (Aug 2026) and formal US government MOUs (Aug 2024), with ~\\$30-50M annual investment across ecosystem but faces fundamental limits as auditors cannot detect sophisticated deception.",
  "description": "External organizations independently assess AI models for safety and dangerous capabilities.",
  "ratings": {
    "novelty": 4.5,
    "rigor": 7,
    "completeness": 7.5,
    "actionability": 6.5
  },
  "category": "responses",
  "subcategory": "alignment-evaluation",
  "clusters": [
    "ai-safety",
    "governance"
  ],
  "metrics": {
    "wordCount": 3766,
    "tableCount": 21,
    "diagramCount": 2,
    "internalLinks": 9,
    "externalLinks": 85,
    "footnoteCount": 0,
    "bulletRatio": 0.12,
    "sectionCount": 40,
    "hasOverview": true,
    "structuralScore": 15
  },
  "suggestedQuality": 100,
  "updateFrequency": 21,
  "evergreen": true,
  "wordCount": 3766,
  "unconvertedLinks": [
    {
      "text": "METR",
      "url": "https://metr.org/",
      "resourceId": "45370a5153534152",
      "resourceTitle": "METR: Model Evaluation and Threat Research"
    },
    {
      "text": "Apollo Research",
      "url": "https://www.apolloresearch.ai/",
      "resourceId": "329d8c2e2532be3d",
      "resourceTitle": "Apollo Research - AI Safety Evaluation Organization"
    },
    {
      "text": "US AI Safety Institute signed formal agreements",
      "url": "https://www.nist.gov/news-events/news/2024/08/us-ai-safety-institute-signs-agreements-regarding-ai-safety-research",
      "resourceId": "627bb42e8f74be04",
      "resourceTitle": "MOU with US AI Safety Institute"
    },
    {
      "text": "AI Security Institute",
      "url": "https://www.aisi.gov.uk/",
      "resourceId": "fdf68a8f30f57dee",
      "resourceTitle": "UK AI Safety Institute (AISI)"
    },
    {
      "text": "December 2024 assessment of OpenAI's o1 model",
      "url": "https://www.aisi.gov.uk/blog/pre-deployment-evaluation-of-openais-o1-model",
      "resourceId": "e23f70e673a090c1",
      "resourceTitle": "Pre-Deployment evaluation of OpenAI's o1 model"
    },
    {
      "text": "METR's research",
      "url": "https://arxiv.org/html/2503.14499v1",
      "resourceId": "324cd2230cbea396",
      "resourceTitle": "Measuring AI Long Tasks - arXiv"
    },
    {
      "text": "GPT-5 evaluation",
      "url": "https://evaluations.metr.org/gpt-5-report/",
      "resourceId": "7457262d461e2206",
      "resourceTitle": "Details about METR’s evaluation of OpenAI GPT-5"
    },
    {
      "text": "Apollo's follow-up research",
      "url": "https://www.apolloresearch.ai/blog/more-capable-models-are-better-at-in-context-scheming/",
      "resourceId": "80c6d6eca17dc925",
      "resourceTitle": "More capable models scheme at higher rates"
    },
    {
      "text": "partnership with OpenAI",
      "url": "https://openai.com/index/detecting-and-reducing-scheming-in-ai-models/",
      "resourceId": "b3f335edccfc5333",
      "resourceTitle": "OpenAI Preparedness Framework"
    },
    {
      "text": "Frontier AI Trends Report",
      "url": "https://www.aisi.gov.uk/frontier-ai-trends-report",
      "resourceId": "7042c7f8de04ccb1",
      "resourceTitle": "AISI Frontier AI Trends"
    },
    {
      "text": "over 7 hours of expert effort",
      "url": "https://www.aisi.gov.uk/blog/5-key-findings-from-our-first-frontier-ai-trends-report",
      "resourceId": "8a9de448c7130623",
      "resourceTitle": "nearly 5x more likely"
    },
    {
      "text": "METR",
      "url": "https://metr.org/",
      "resourceId": "45370a5153534152",
      "resourceTitle": "METR: Model Evaluation and Threat Research"
    },
    {
      "text": "task horizon research",
      "url": "https://arxiv.org/html/2503.14499v1",
      "resourceId": "324cd2230cbea396",
      "resourceTitle": "Measuring AI Long Tasks - arXiv"
    },
    {
      "text": "Apollo Research",
      "url": "https://www.apolloresearch.ai/",
      "resourceId": "329d8c2e2532be3d",
      "resourceTitle": "Apollo Research - AI Safety Evaluation Organization"
    },
    {
      "text": "OpenAI",
      "url": "https://openai.com/index/detecting-and-reducing-scheming-in-ai-models/",
      "resourceId": "b3f335edccfc5333",
      "resourceTitle": "OpenAI Preparedness Framework"
    },
    {
      "text": "UK AI Security Institute",
      "url": "https://www.aisi.gov.uk/",
      "resourceId": "fdf68a8f30f57dee",
      "resourceTitle": "UK AI Safety Institute (AISI)"
    },
    {
      "text": "All major labs",
      "url": "https://www.aisi.gov.uk/blog/our-2025-year-in-review",
      "resourceId": "3dec5f974c5da5ec",
      "resourceTitle": "Our 2025 Year in Review"
    },
    {
      "text": "30+ models evaluated",
      "url": "https://www.aisi.gov.uk/blog/our-2025-year-in-review",
      "resourceId": "3dec5f974c5da5ec",
      "resourceTitle": "Our 2025 Year in Review"
    },
    {
      "text": "US AI Safety Institute (NIST)",
      "url": "https://www.nist.gov/caisi",
      "resourceId": "94173523d006b3b4",
      "resourceTitle": "NIST Center for AI Standards and Innovation (CAISI)"
    },
    {
      "text": "Anthropic, OpenAI MOUs",
      "url": "https://www.nist.gov/news-events/news/2024/08/us-ai-safety-institute-signs-agreements-regarding-ai-safety-research",
      "resourceId": "627bb42e8f74be04",
      "resourceTitle": "MOU with US AI Safety Institute"
    },
    {
      "text": "300+ consortium members",
      "url": "https://www.nist.gov/news-events/news/us-ai-safety-institute-consortium-holds-first-plenary-meeting-reflect-progress-2024",
      "resourceId": "2ef355efe9937701",
      "resourceTitle": "First AISIC plenary meeting"
    },
    {
      "text": "UK AISI Frontier AI Trends Report",
      "url": "https://www.aisi.gov.uk/frontier-ai-trends-report",
      "resourceId": "7042c7f8de04ccb1",
      "resourceTitle": "AISI Frontier AI Trends"
    },
    {
      "text": "UK AISI",
      "url": "https://www.aisi.gov.uk/frontier-ai-trends-report",
      "resourceId": "7042c7f8de04ccb1",
      "resourceTitle": "AISI Frontier AI Trends"
    },
    {
      "text": "UK AISI",
      "url": "https://www.aisi.gov.uk/blog/5-key-findings-from-our-first-frontier-ai-trends-report",
      "resourceId": "8a9de448c7130623",
      "resourceTitle": "nearly 5x more likely"
    },
    {
      "text": "UK AISI",
      "url": "https://www.aisi.gov.uk/frontier-ai-trends-report",
      "resourceId": "7042c7f8de04ccb1",
      "resourceTitle": "AISI Frontier AI Trends"
    },
    {
      "text": "METR GPT-5 Evaluation",
      "url": "https://evaluations.metr.org/gpt-5-report/",
      "resourceId": "7457262d461e2206",
      "resourceTitle": "Details about METR’s evaluation of OpenAI GPT-5"
    },
    {
      "text": "METR",
      "url": "https://arxiv.org/html/2503.14499v1",
      "resourceId": "324cd2230cbea396",
      "resourceTitle": "Measuring AI Long Tasks - arXiv"
    },
    {
      "text": "METR",
      "url": "https://metr.org/research/",
      "resourceId": "a4652ab64ea54b52",
      "resourceTitle": "Evaluation Methodology"
    },
    {
      "text": "OpenAI",
      "url": "https://openai.com/index/detecting-and-reducing-scheming-in-ai-models/",
      "resourceId": "b3f335edccfc5333",
      "resourceTitle": "OpenAI Preparedness Framework"
    },
    {
      "text": "UK AISI",
      "url": "https://www.aisi.gov.uk/frontier-ai-trends-report",
      "resourceId": "7042c7f8de04ccb1",
      "resourceTitle": "AISI Frontier AI Trends"
    },
    {
      "text": "UK AISI",
      "url": "https://www.aisi.gov.uk/blog/5-key-findings-from-our-first-frontier-ai-trends-report",
      "resourceId": "8a9de448c7130623",
      "resourceTitle": "nearly 5x more likely"
    },
    {
      "text": "UK AISI",
      "url": "https://www.aisi.gov.uk/blog/our-2025-year-in-review",
      "resourceId": "3dec5f974c5da5ec",
      "resourceTitle": "Our 2025 Year in Review"
    },
    {
      "text": "NIST",
      "url": "https://www.nist.gov/news-events/news/us-ai-safety-institute-consortium-holds-first-plenary-meeting-reflect-progress-2024",
      "resourceId": "2ef355efe9937701",
      "resourceTitle": "First AISIC plenary meeting"
    },
    {
      "text": "signed MOUs with Anthropic and OpenAI",
      "url": "https://www.nist.gov/news-events/news/2024/08/us-ai-safety-institute-signs-agreements-regarding-ai-safety-research",
      "resourceId": "627bb42e8f74be04",
      "resourceTitle": "MOU with US AI Safety Institute"
    },
    {
      "text": "NIST",
      "url": "https://www.nist.gov/caisi",
      "resourceId": "94173523d006b3b4",
      "resourceTitle": "NIST Center for AI Standards and Innovation (CAISI)"
    },
    {
      "text": "AI Security Institute",
      "url": "https://www.aisi.gov.uk/",
      "resourceId": "fdf68a8f30f57dee",
      "resourceTitle": "UK AI Safety Institute (AISI)"
    },
    {
      "text": "International Network of AISIs",
      "url": "https://www.nist.gov/news-events/news/2024/11/fact-sheet-us-department-commerce-us-department-state-launch-international",
      "resourceId": "a65ad4f1a30f1737",
      "resourceTitle": "International Network of AI Safety Institutes"
    },
    {
      "text": "NIST",
      "url": "https://www.nist.gov/news-events/news/2024/11/fact-sheet-us-department-commerce-us-department-state-launch-international",
      "resourceId": "a65ad4f1a30f1737",
      "resourceTitle": "International Network of AI Safety Institutes"
    },
    {
      "text": "EU AI Act",
      "url": "https://artificialintelligenceact.eu/",
      "resourceId": "1ad6dc89cded8b0c",
      "resourceTitle": "EU AI Act – Official Resource Hub"
    },
    {
      "text": "International Network of AI Safety Institutes",
      "url": "https://www.nist.gov/news-events/news/2024/11/fact-sheet-us-department-commerce-us-department-state-launch-international",
      "resourceId": "a65ad4f1a30f1737",
      "resourceTitle": "International Network of AI Safety Institutes"
    },
    {
      "text": "METR's analysis",
      "url": "https://metr.org/common-elements",
      "resourceId": "30b9f5e826260d9d",
      "resourceTitle": "METR: Common Elements of Frontier AI Safety Policies"
    },
    {
      "text": "Anthropic RSP framework",
      "url": "https://www.anthropic.com/responsible-scaling-policy",
      "resourceId": "afe1e125f3ba3f14",
      "resourceTitle": "Responsible Scaling Policy"
    },
    {
      "text": "activated ASL-3 protections",
      "url": "https://www.anthropic.com/news/activating-asl3-protections",
      "resourceId": "7512ddb574f82249",
      "resourceTitle": "Activating AI Safety Level 3 protections"
    },
    {
      "text": "aisi.gov.uk",
      "url": "https://www.aisi.gov.uk/frontier-ai-trends-report",
      "resourceId": "7042c7f8de04ccb1",
      "resourceTitle": "AISI Frontier AI Trends"
    },
    {
      "text": "metr.org",
      "url": "https://metr.org/common-elements",
      "resourceId": "30b9f5e826260d9d",
      "resourceTitle": "METR: Common Elements of Frontier AI Safety Policies"
    },
    {
      "text": "NIST",
      "url": "https://www.nist.gov/news-events/news/2024/08/us-ai-safety-institute-signs-agreements-regarding-ai-safety-research",
      "resourceId": "627bb42e8f74be04",
      "resourceTitle": "MOU with US AI Safety Institute"
    },
    {
      "text": "openai.com",
      "url": "https://openai.com/index/detecting-and-reducing-scheming-in-ai-models/",
      "resourceId": "b3f335edccfc5333",
      "resourceTitle": "OpenAI Preparedness Framework"
    },
    {
      "text": "anthropic.com",
      "url": "https://www.anthropic.com/responsible-scaling-policy",
      "resourceId": "afe1e125f3ba3f14",
      "resourceTitle": "Responsible Scaling Policy"
    },
    {
      "text": "METR",
      "url": "https://metr.org/",
      "resourceId": "45370a5153534152",
      "resourceTitle": "METR: Model Evaluation and Threat Research"
    },
    {
      "text": "task horizon research",
      "url": "https://arxiv.org/html/2503.14499v1",
      "resourceId": "324cd2230cbea396",
      "resourceTitle": "Measuring AI Long Tasks - arXiv"
    },
    {
      "text": "evaluated GPT-4.5",
      "url": "https://metr.org/blog/2025-02-27-gpt-4-5-evals/",
      "resourceId": "a86b4f04559de6da",
      "resourceTitle": "METR’s GPT-4.5 pre-deployment evaluations"
    },
    {
      "text": "GPT-5",
      "url": "https://evaluations.metr.org/gpt-5-report/",
      "resourceId": "7457262d461e2206",
      "resourceTitle": "Details about METR’s evaluation of OpenAI GPT-5"
    },
    {
      "text": "Apollo Research",
      "url": "https://www.apolloresearch.ai/",
      "resourceId": "329d8c2e2532be3d",
      "resourceTitle": "Apollo Research - AI Safety Evaluation Organization"
    },
    {
      "text": "partners with OpenAI",
      "url": "https://openai.com/index/detecting-and-reducing-scheming-in-ai-models/",
      "resourceId": "b3f335edccfc5333",
      "resourceTitle": "OpenAI Preparedness Framework"
    },
    {
      "text": "UK AI Security Institute",
      "url": "https://www.aisi.gov.uk/",
      "resourceId": "fdf68a8f30f57dee",
      "resourceTitle": "UK AI Safety Institute (AISI)"
    },
    {
      "text": "rebranded Feb 2025",
      "url": "https://www.aisi.gov.uk/blog/our-2025-year-in-review",
      "resourceId": "3dec5f974c5da5ec",
      "resourceTitle": "Our 2025 Year in Review"
    },
    {
      "text": "evaluated 30+ models",
      "url": "https://www.aisi.gov.uk/blog/our-2025-year-in-review",
      "resourceId": "3dec5f974c5da5ec",
      "resourceTitle": "Our 2025 Year in Review"
    },
    {
      "text": "Frontier AI Trends Report",
      "url": "https://www.aisi.gov.uk/frontier-ai-trends-report",
      "resourceId": "7042c7f8de04ccb1",
      "resourceTitle": "AISI Frontier AI Trends"
    },
    {
      "text": "US AI Safety Institute (NIST/CAISI)",
      "url": "https://www.nist.gov/caisi",
      "resourceId": "94173523d006b3b4",
      "resourceTitle": "NIST Center for AI Standards and Innovation (CAISI)"
    },
    {
      "text": "International Network of AI Safety Institutes",
      "url": "https://www.nist.gov/news-events/news/2024/11/fact-sheet-us-department-commerce-us-department-state-launch-international",
      "resourceId": "a65ad4f1a30f1737",
      "resourceTitle": "International Network of AI Safety Institutes"
    },
    {
      "text": "300+ consortium members",
      "url": "https://www.nist.gov/news-events/news/us-ai-safety-institute-consortium-holds-first-plenary-meeting-reflect-progress-2024",
      "resourceId": "2ef355efe9937701",
      "resourceTitle": "First AISIC plenary meeting"
    },
    {
      "text": "signed MOUs with Anthropic and OpenAI",
      "url": "https://www.nist.gov/news-events/news/2024/08/us-ai-safety-institute-signs-agreements-regarding-ai-safety-research",
      "resourceId": "627bb42e8f74be04",
      "resourceTitle": "MOU with US AI Safety Institute"
    },
    {
      "text": "EU AI Act",
      "url": "https://artificialintelligenceact.eu/",
      "resourceId": "1ad6dc89cded8b0c",
      "resourceTitle": "EU AI Act – Official Resource Hub"
    },
    {
      "text": "NIST AI Risk Management Framework",
      "url": "https://www.nist.gov/itl/ai-risk-management-framework",
      "resourceId": "54dbc15413425997",
      "resourceTitle": "NIST AI Risk Management Framework"
    },
    {
      "text": "Anthropic RSP",
      "url": "https://www.anthropic.com/responsible-scaling-policy",
      "resourceId": "afe1e125f3ba3f14",
      "resourceTitle": "Responsible Scaling Policy"
    },
    {
      "text": "OpenAI Preparedness Framework",
      "url": "https://openai.com/preparedness",
      "resourceId": "90a03954db3c77d5",
      "resourceTitle": "OpenAI Preparedness Framework"
    },
    {
      "text": "CISA: AI Red Teaming",
      "url": "https://www.cisa.gov/news-events/news/ai-red-teaming-applying-software-tevv-ai-evaluations",
      "resourceId": "6f1d4fd3b52c7cb7",
      "resourceTitle": "AI Red Teaming: Applying Software TEVV for AI Evaluations"
    }
  ],
  "unconvertedLinkCount": 67,
  "convertedLinkCount": 0,
  "backlinkCount": 1,
  "hallucinationRisk": {
    "level": "low",
    "score": 30,
    "factors": [
      "no-citations",
      "high-rigor",
      "conceptual-content"
    ]
  },
  "entityType": "approach",
  "redundancy": {
    "maxSimilarity": 23,
    "similarPages": [
      {
        "id": "dangerous-cap-evals",
        "title": "Dangerous Capability Evaluations",
        "path": "/knowledge-base/responses/dangerous-cap-evals/",
        "similarity": 23
      },
      {
        "id": "alignment-evals",
        "title": "Alignment Evaluations",
        "path": "/knowledge-base/responses/alignment-evals/",
        "similarity": 19
      },
      {
        "id": "capability-elicitation",
        "title": "Capability Elicitation",
        "path": "/knowledge-base/responses/capability-elicitation/",
        "similarity": 19
      },
      {
        "id": "evals",
        "title": "Evals & Red-teaming",
        "path": "/knowledge-base/responses/evals/",
        "similarity": 19
      },
      {
        "id": "evals-governance",
        "title": "Evals-Based Deployment Gates",
        "path": "/knowledge-base/responses/evals-governance/",
        "similarity": 18
      }
    ]
  },
  "coverage": {
    "passing": 8,
    "total": 13,
    "targets": {
      "tables": 15,
      "diagrams": 2,
      "internalLinks": 30,
      "externalLinks": 19,
      "footnotes": 11,
      "references": 11
    },
    "actuals": {
      "tables": 21,
      "diagrams": 2,
      "internalLinks": 9,
      "externalLinks": 85,
      "footnotes": 0,
      "references": 24,
      "quotesWithQuotes": 0,
      "quotesTotal": 0,
      "accuracyChecked": 0,
      "accuracyTotal": 0
    },
    "items": {
      "summary": "green",
      "schedule": "green",
      "entity": "green",
      "editHistory": "red",
      "overview": "green",
      "tables": "green",
      "diagrams": "green",
      "internalLinks": "amber",
      "externalLinks": "green",
      "footnotes": "red",
      "references": "green",
      "quotes": "red",
      "accuracy": "red"
    },
    "ratingsString": "N:4.5 R:7 A:6.5 C:7.5"
  },
  "readerRank": 112,
  "researchRank": 395,
  "recommendedScore": 177.88
}

External Links

{
  "lesswrong": "https://www.lesswrong.com/tag/ai-evaluations"
}

Backlinks (1)

id	title	type	relationship
alignment-evaluation-overview	Evaluation & Detection (Overview)	concept	—