diff --git a/scripts/generate_datasheet.py b/scripts/generate_datasheet.py
new file mode 100644
index 0000000..e2004fd
--- /dev/null
+++ b/scripts/generate_datasheet.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+"""
+Generate a Datasheet for a ClimateVision training dataset.
+
+Usage:
+    python scripts/generate_datasheet.py \\
+        --manifest data/manifests/sentinel2-deforestation.yaml \\
+        --output-dir outputs/datasheets/
+
+Runs inside the release CI pipeline so every dataset version published
+ships with a Gebru-style datasheet alongside its model cards.
+"""
+
+from __future__ import annotations
+
+import argparse
+import logging
+import sys
+from pathlib import Path
+
+from climatevision.governance.datasheet import generate
+
+logger = logging.getLogger("generate_datasheet")
+
+
+def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--manifest", type=Path, required=True, help="Dataset manifest (yaml/json)")
+    parser.add_argument("--output-dir", type=Path, default=None, help="Where to write the datasheet")
+    parser.add_argument("--name", default=None, help="Override dataset name")
+    parser.add_argument("--version", default=None, help="Override dataset version")
+    parser.add_argument("-v", "--verbose", action="store_true")
+    return parser.parse_args(argv)
+
+
+def main(argv: list[str] | None = None) -> int:
+    args = parse_args(argv)
+    logging.basicConfig(
+        level=logging.DEBUG if args.verbose else logging.INFO,
+        format="%(asctime)s %(levelname)s %(name)s %(message)s",
+    )
+
+    paths = generate(
+        manifest=args.manifest,
+        output_dir=args.output_dir,
+        name=args.name,
+        version=args.version,
+    )
+    for label, path in paths.items():
+        print(f"{label}: {path}")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/src/climatevision/governance/__init__.py b/src/climatevision/governance/__init__.py
index 0f6cc09..19bb5e9 100644
--- a/src/climatevision/governance/__init__.py
+++ b/src/climatevision/governance/__init__.py
@@ -6,6 +6,7 @@
 - Regional bias and fairness auditing
 - Anomaly detection for inference inputs/outputs
 - Model audit trails and version tracking
+- Datasheets for training datasets (Gebru et al., 2018)
 """
 
 from .explainability import (
@@ -42,6 +43,13 @@
     check_fairness_gate,
     SUPPORTED_REGIONS,
 )
+from .datasheet import (
+    Datasheet,
+    build_datasheet,
+    generate as generate_datasheet,
+    render_markdown as render_datasheet_markdown,
+    write_datasheet,
+)
 
 __all__ = [
     # Explainability
@@ -73,4 +81,10 @@
     "RegionMetrics",
     "check_fairness_gate",
     "SUPPORTED_REGIONS",
+    # Datasheet
+    "Datasheet",
+    "build_datasheet",
+    "generate_datasheet",
+    "render_datasheet_markdown",
+    "write_datasheet",
 ]
diff --git a/src/climatevision/governance/datasheet.py b/src/climatevision/governance/datasheet.py
new file mode 100644
index 0000000..b4cc05d
--- /dev/null
+++ b/src/climatevision/governance/datasheet.py
@@ -0,0 +1,215 @@
+"""
+Datasheets for the datasets that train ClimateVision models.
+
+Companion to the Mitchell-style model cards in ``governance.model_card``:
+where a model card describes the *model*, a datasheet describes the
+*dataset* the model was trained on (Gebru et al., 2018, "Datasheets for
+Datasets"). The two artifacts answer different questions and both need
+to ship with a release.
+
+The module mirrors the model_card public surface (``build``, ``render``,
+``write``, ``generate``) so contributors only have to learn one pattern,
+and the release CI pipeline can call them in sequence.
+
+Sections covered:
+
+- Motivation
+- Composition
+- Collection process
+- Preprocessing, cleaning, labeling
+- Uses (intended and inappropriate)
+- Distribution
+- Maintenance
+
+Every section is a free-form ``dict`` of question -> answer so the schema
+can grow without code changes; ``REQUIRED_QUESTIONS`` enforces the bare
+minimum a release datasheet must answer.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Optional, Union
+
+logger = logging.getLogger(__name__)
+
+_PROJECT_ROOT = Path(__file__).resolve().parents[3]
+_DEFAULT_OUTPUT_DIR = _PROJECT_ROOT / "outputs" / "datasheets"
+
+REQUIRED_QUESTIONS = {
+    "motivation": ("purpose", "creators"),
+    "composition": ("instances", "labels", "splits"),
+    "collection_process": ("source", "timeframe"),
+    "uses": ("intended_uses", "inappropriate_uses"),
+}
+
+
+@dataclass
+class Datasheet:
+    """Structured datasheet for a single training dataset."""
+
+    name: str
+    version: str
+    motivation: dict
+    composition: dict
+    collection_process: dict
+    preprocessing: dict
+    uses: dict
+    distribution: dict
+    maintenance: dict
+    generated_at: str = field(
+        default_factory=lambda: datetime.now(timezone.utc).isoformat()
+    )
+
+    def to_dict(self) -> dict:
+        return {
+            "name": self.name,
+            "version": self.version,
+            "motivation": self.motivation,
+            "composition": self.composition,
+            "collection_process": self.collection_process,
+            "preprocessing": self.preprocessing,
+            "uses": self.uses,
+            "distribution": self.distribution,
+            "maintenance": self.maintenance,
+            "generated_at": self.generated_at,
+        }
+
+
+_DEFAULT_INAPPROPRIATE_USES = [
+    "Training models for real-time legal enforcement against individual landowners.",
+    "Land-rights or sovereignty disputes without on-the-ground verification.",
+    "Generative model training where label provenance is required to be human-verified.",
+]
+
+_DEFAULT_MAINTENANCE = {
+    "owner": "ClimateVision Governance <governance@climate-vision.org>",
+    "update_cadence": "Reviewed each minor release; refreshed when source providers change.",
+    "deprecation_policy": (
+        "Versions are retained for two minor releases after supersession; "
+        "models trained on deprecated versions are flagged in their model cards."
+    ),
+}
+
+
+def _coerce_config(config: Union[dict, str, Path]) -> dict:
+    if isinstance(config, dict):
+        return config
+    path = Path(config)
+    text = path.read_text()
+    if path.suffix in {".yml", ".yaml"}:
+        try:
+            import yaml
+        except ImportError as exc:  # pragma: no cover - import guard
+            raise RuntimeError("PyYAML is required to load YAML configs") from exc
+        return yaml.safe_load(text)
+    return json.loads(text)
+
+
+def _validate(datasheet: "Datasheet") -> None:
+    missing: list[str] = []
+    for section_name, required_keys in REQUIRED_QUESTIONS.items():
+        section = getattr(datasheet, section_name)
+        for key in required_keys:
+            if key not in section or section[key] in (None, "", []):
+                missing.append(f"{section_name}.{key}")
+    if missing:
+        raise ValueError(f"datasheet missing required answers: {missing}")
+
+
+def build_datasheet(
+    manifest: Union[dict, str, Path],
+    *,
+    name: Optional[str] = None,
+    version: Optional[str] = None,
+) -> Datasheet:
+    """Build a Datasheet from a structured dataset manifest."""
+    m = _coerce_config(manifest)
+
+    resolved_name = name or m.get("name") or "climatevision-dataset"
+    resolved_version = version or m.get("version") or "0.0.0"
+
+    uses = dict(m.get("uses", {}))
+    uses.setdefault("inappropriate_uses", list(_DEFAULT_INAPPROPRIATE_USES))
+
+    sheet = Datasheet(
+        name=resolved_name,
+        version=resolved_version,
+        motivation=dict(m.get("motivation", {})),
+        composition=dict(m.get("composition", {})),
+        collection_process=dict(m.get("collection_process", {})),
+        preprocessing=dict(m.get("preprocessing", {})),
+        uses=uses,
+        distribution=dict(m.get("distribution", {})),
+        maintenance=dict(m.get("maintenance", _DEFAULT_MAINTENANCE)),
+    )
+    _validate(sheet)
+    return sheet
+
+
+def _render_section(title: str, body: dict) -> list[str]:
+    if not body:
+        return [f"## {title}", "_Not documented._", ""]
+    lines = [f"## {title}"]
+    for key, value in body.items():
+        pretty_key = key.replace("_", " ").title()
+        if isinstance(value, list):
+            lines.append(f"### {pretty_key}")
+            lines.extend(f"- {item}" for item in value)
+        elif isinstance(value, dict):
+            lines.append(f"### {pretty_key}")
+            lines.append(f"```json\n{json.dumps(value, indent=2)}\n```")
+        else:
+            lines.append(f"- **{pretty_key}**: {value}")
+    lines.append("")
+    return lines
+
+
+def render_markdown(sheet: Datasheet) -> str:
+    sections = [
+        f"# Datasheet: {sheet.name} ({sheet.version})",
+        f"_Generated {sheet.generated_at}_",
+        "",
+        "_Format: Gebru et al., 2018, \"Datasheets for Datasets\"._",
+        "",
+    ]
+    sections += _render_section("Motivation", sheet.motivation)
+    sections += _render_section("Composition", sheet.composition)
+    sections += _render_section("Collection Process", sheet.collection_process)
+    sections += _render_section("Preprocessing, Cleaning, Labeling", sheet.preprocessing)
+    sections += _render_section("Uses", sheet.uses)
+    sections += _render_section("Distribution", sheet.distribution)
+    sections += _render_section("Maintenance", sheet.maintenance)
+    return "\n".join(sections) + "\n"
+
+
+def write_datasheet(
+    sheet: Datasheet,
+    output_dir: Optional[Union[str, Path]] = None,
+) -> dict[str, Path]:
+    output_dir = Path(output_dir) if output_dir else _DEFAULT_OUTPUT_DIR
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    base = f"{sheet.name}_{sheet.version}"
+    md_path = output_dir / f"{base}.md"
+    json_path = output_dir / f"{base}.json"
+
+    md_path.write_text(render_markdown(sheet))
+    json_path.write_text(json.dumps(sheet.to_dict(), indent=2))
+
+    logger.info("Wrote datasheet to %s and %s", md_path, json_path)
+    return {"markdown": md_path, "json": json_path}
+
+
+def generate(
+    manifest: Union[dict, str, Path],
+    output_dir: Optional[Union[str, Path]] = None,
+    **kwargs: Any,
+) -> dict[str, Path]:
+    """End-to-end: load manifest, build the datasheet, render to disk."""
+    sheet = build_datasheet(manifest, **kwargs)
+    return write_datasheet(sheet, output_dir=output_dir)
diff --git a/tests/test_datasheet.py b/tests/test_datasheet.py
new file mode 100644
index 0000000..45a83f9
--- /dev/null
+++ b/tests/test_datasheet.py
@@ -0,0 +1,148 @@
+"""Tests for governance.datasheet."""
+
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from climatevision.governance.datasheet import (
+    Datasheet,
+    build_datasheet,
+    generate,
+    render_markdown,
+    write_datasheet,
+)
+
+
+def _valid_manifest() -> dict:
+    return {
+        "name": "sentinel2-deforestation",
+        "version": "1.0.0",
+        "motivation": {
+            "purpose": "Detect Amazon basin deforestation events from Sentinel-2.",
+            "creators": "ClimateVision Data Pipeline team",
+            "funding": "Self-funded open-source initiative.",
+        },
+        "composition": {
+            "instances": "12,480 256x256 tiles",
+            "labels": "Binary deforestation mask per tile",
+            "splits": "70/15/15 train/val/test by spatial cluster",
+            "label_source": "Hansen Global Forest Change v1.10",
+        },
+        "collection_process": {
+            "source": "Sentinel-2 L2A via Google Earth Engine",
+            "timeframe": "2020-01-01 to 2023-12-31",
+            "consent": "Public open-data licence; no human subjects.",
+        },
+        "preprocessing": {
+            "cloud_masking": "QA60 + s2cloudless threshold 0.4",
+            "normalisation": "Per-band z-score against training set means",
+            "augmentation": "Random flip / 90deg rotate at train time only",
+        },
+        "uses": {
+            "intended_uses": [
+                "Training U-Net segmentation models for deforestation detection.",
+                "Evaluating fairness of detection across forest biomes.",
+            ]
+        },
+        "distribution": {
+            "license": "CC-BY-4.0 (derived data)",
+            "redistribution": "Allowed with attribution; do not redistribute raw Sentinel-2 tiles.",
+        },
+    }
+
+
+def test_build_datasheet_returns_typed_object():
+    sheet = build_datasheet(_valid_manifest())
+    assert isinstance(sheet, Datasheet)
+    assert sheet.name == "sentinel2-deforestation"
+    assert sheet.version == "1.0.0"
+    assert sheet.motivation["purpose"].startswith("Detect")
+
+
+def test_inappropriate_uses_default_when_omitted():
+    sheet = build_datasheet(_valid_manifest())
+    assert sheet.uses["inappropriate_uses"], "default inappropriate_uses should be populated"
+
+
+def test_inappropriate_uses_respect_override():
+    manifest = _valid_manifest()
+    manifest["uses"]["inappropriate_uses"] = ["custom override"]
+    sheet = build_datasheet(manifest)
+    assert sheet.uses["inappropriate_uses"] == ["custom override"]
+
+
+def test_maintenance_has_default():
+    sheet = build_datasheet(_valid_manifest())
+    assert "owner" in sheet.maintenance
+    assert "update_cadence" in sheet.maintenance
+
+
+def test_validate_rejects_missing_required_section():
+    manifest = _valid_manifest()
+    del manifest["motivation"]["purpose"]
+    with pytest.raises(ValueError, match="motivation.purpose"):
+        build_datasheet(manifest)
+
+
+def test_validate_rejects_empty_required_field():
+    manifest = _valid_manifest()
+    manifest["composition"]["labels"] = ""
+    with pytest.raises(ValueError, match="composition.labels"):
+        build_datasheet(manifest)
+
+
+def test_validate_rejects_missing_collection_timeframe():
+    manifest = _valid_manifest()
+    del manifest["collection_process"]["timeframe"]
+    with pytest.raises(ValueError, match="collection_process.timeframe"):
+        build_datasheet(manifest)
+
+
+def test_render_markdown_includes_section_headings():
+    sheet = build_datasheet(_valid_manifest())
+    md = render_markdown(sheet)
+    for heading in (
+        "# Datasheet:",
+        "## Motivation",
+        "## Composition",
+        "## Collection Process",
+        "## Uses",
+        "## Distribution",
+        "## Maintenance",
+    ):
+        assert heading in md, f"missing heading: {heading}"
+
+
+def test_render_markdown_renders_lists_as_bullets():
+    sheet = build_datasheet(_valid_manifest())
+    md = render_markdown(sheet)
+    assert "- Training U-Net segmentation models" in md
+
+
+def test_write_datasheet_round_trips_json(tmp_path):
+    sheet = build_datasheet(_valid_manifest())
+    paths = write_datasheet(sheet, output_dir=tmp_path)
+    loaded = json.loads(paths["json"].read_text())
+    assert loaded["name"] == sheet.name
+    assert loaded["composition"]["splits"] == "70/15/15 train/val/test by spatial cluster"
+
+
+def test_generate_end_to_end(tmp_path):
+    manifest_path = tmp_path / "manifest.json"
+    manifest_path.write_text(json.dumps(_valid_manifest()))
+    paths = generate(manifest_path, output_dir=tmp_path / "out")
+    assert paths["markdown"].exists()
+    assert paths["json"].exists()
+    assert "Datasheet:" in paths["markdown"].read_text()
+
+
+def test_generate_loads_yaml(tmp_path):
+    pytest.importorskip("yaml")
+    import yaml
+
+    manifest_path = tmp_path / "manifest.yaml"
+    manifest_path.write_text(yaml.safe_dump(_valid_manifest()))
+    paths = generate(manifest_path, output_dir=tmp_path / "out")
+    assert paths["markdown"].exists()