AI Model Steganography

steganographyriskPath: /knowledge-base/risks/steganography/
E603Entity ID (EID)
← Back to page3 backlinksQuality: 91Updated: 2026-01-30
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
  "id": "steganography",
  "wikiId": "E603",
  "path": "/knowledge-base/risks/steganography/",
  "filePath": "knowledge-base/risks/steganography.mdx",
  "title": "AI Model Steganography",
  "quality": 91,
  "readerImportance": 70,
  "researchImportance": 84,
  "tacticalValue": null,
  "contentFormat": "article",
  "causalLevel": "amplifier",
  "lastUpdated": "2026-01-30",
  "dateCreated": "2026-02-15",
  "summary": "Comprehensive analysis of AI steganography risks - systems hiding information in outputs to enable covert coordination or evade oversight. GPT-4 class models encode 3-5 bits/KB with under 30% human detection rates. NeurIPS 2024 research achieved information-theoretically undetectable channels; LASR Labs showed steganography emerges unprompted under optimization pressure. Paraphrasing reduces capacity to under 3 bits/KB; CoT Monitor+ achieves 43.8% reduction in deceptive behaviors.",
  "description": "AI systems can hide information in outputs undetectable to humans, enabling covert coordination and oversight evasion.",
  "ratings": {
    "novelty": 5,
    "rigor": 6,
    "completeness": 7,
    "actionability": 5.5
  },
  "category": "risks",
  "subcategory": "accident",
  "clusters": [
    "ai-safety"
  ],
  "metrics": {
    "wordCount": 2404,
    "tableCount": 13,
    "diagramCount": 1,
    "internalLinks": 20,
    "externalLinks": 39,
    "footnoteCount": 0,
    "bulletRatio": 0.25,
    "sectionCount": 37,
    "hasOverview": true,
    "structuralScore": 15
  },
  "suggestedQuality": 100,
  "updateFrequency": 45,
  "evergreen": true,
  "wordCount": 2404,
  "unconvertedLinks": [
    {
      "text": "Mitigating Deceptive Alignment via Self-Monitoring",
      "url": "https://arxiv.org/abs/2505.18807",
      "resourceId": "628f3eebcff82886",
      "resourceTitle": "Mitigating Deceptive Alignment via Self-Monitoring"
    },
    {
      "text": "Redwood Research",
      "url": "https://www.redwoodresearch.org/research",
      "resourceId": "d42c3c74354e7b66",
      "resourceTitle": "Causal Scrubbing - Redwood Research"
    },
    {
      "text": "CoT Monitor+ (2025)",
      "url": "https://arxiv.org/abs/2505.18807",
      "resourceId": "628f3eebcff82886",
      "resourceTitle": "Mitigating Deceptive Alignment via Self-Monitoring"
    },
    {
      "text": "Mitigating Deceptive Alignment",
      "url": "https://arxiv.org/abs/2505.18807",
      "resourceId": "628f3eebcff82886",
      "resourceTitle": "Mitigating Deceptive Alignment via Self-Monitoring"
    },
    {
      "text": "Nature 2024",
      "url": "https://www.nature.com/articles/s41586-024-08025-4",
      "resourceId": "a01e51407f492f11",
      "resourceTitle": "Scalable watermarking for identifying large language model outputs"
    },
    {
      "text": "Redwood Research",
      "url": "https://www.redwoodresearch.org/research",
      "resourceId": "d42c3c74354e7b66",
      "resourceTitle": "Causal Scrubbing - Redwood Research"
    }
  ],
  "unconvertedLinkCount": 6,
  "convertedLinkCount": 14,
  "backlinkCount": 3,
  "hallucinationRisk": {
    "level": "medium",
    "score": 50,
    "factors": [
      "no-citations",
      "high-quality"
    ]
  },
  "entityType": "risk",
  "redundancy": {
    "maxSimilarity": 17,
    "similarPages": [
      {
        "id": "situational-awareness",
        "title": "Situational Awareness",
        "path": "/knowledge-base/capabilities/situational-awareness/",
        "similarity": 17
      },
      {
        "id": "power-seeking-conditions",
        "title": "Power-Seeking Emergence Conditions Model",
        "path": "/knowledge-base/models/power-seeking-conditions/",
        "similarity": 17
      },
      {
        "id": "sandbagging",
        "title": "AI Capability Sandbagging",
        "path": "/knowledge-base/risks/sandbagging/",
        "similarity": 17
      },
      {
        "id": "corrigibility-failure-pathways",
        "title": "Corrigibility Failure Pathways",
        "path": "/knowledge-base/models/corrigibility-failure-pathways/",
        "similarity": 16
      },
      {
        "id": "scheming-likelihood-model",
        "title": "Scheming Likelihood Assessment",
        "path": "/knowledge-base/models/scheming-likelihood-model/",
        "similarity": 16
      }
    ]
  },
  "coverage": {
    "passing": 9,
    "total": 13,
    "targets": {
      "tables": 10,
      "diagrams": 1,
      "internalLinks": 19,
      "externalLinks": 12,
      "footnotes": 7,
      "references": 7
    },
    "actuals": {
      "tables": 13,
      "diagrams": 1,
      "internalLinks": 20,
      "externalLinks": 39,
      "footnotes": 0,
      "references": 18,
      "quotesWithQuotes": 0,
      "quotesTotal": 0,
      "accuracyChecked": 0,
      "accuracyTotal": 0
    },
    "items": {
      "summary": "green",
      "schedule": "green",
      "entity": "green",
      "editHistory": "red",
      "overview": "green",
      "tables": "green",
      "diagrams": "green",
      "internalLinks": "green",
      "externalLinks": "green",
      "footnotes": "red",
      "references": "green",
      "quotes": "red",
      "accuracy": "red"
    },
    "ratingsString": "N:5 R:6 A:5.5 C:7"
  },
  "readerRank": 167,
  "researchRank": 66,
  "recommendedScore": 228.59
}
External Links
No external links
Backlinks (3)
id	title	type	relationship
alignment	AI Alignment	approach	—
evaluation	AI Evaluation	approach	—
deceptive-alignment	Deceptive Alignment	risk	—