PyPSA · FBumann · Mar 12, 2026 · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026
diff --git a/.gitignore b/.gitignore
@@ -32,6 +32,10 @@ ENV/
 env.bak/
 venv.bak/
 
+# Benchmarks (new pytest-benchmark suite)
+.benchmarks/
+
+# Benchmarks (old Snakemake suite in benchmark/)
 benchmark/*.pdf
 benchmark/benchmarks
 benchmark/.snakemake

diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -0,0 +1,94 @@
+# Internal Performance Benchmarks
+
+Measures linopy's own performance (build time, LP write speed, memory usage) across problem sizes using [pytest-benchmark](https://pytest-benchmark.readthedocs.io/) and [pytest-memray](https://pytest-memray.readthedocs.io/). Use these to check whether a code change introduces a regression or improvement.
+
+> **Note:** The `benchmark/` directory (singular) contains *external* benchmarks comparing linopy against other modeling frameworks. This directory (`benchmarks/`) is for *internal* performance tracking only.
+
+## Setup
+
+```bash
+pip install -e ".[benchmarks]"
+```
+
+## Running benchmarks
+
+```bash
+# Quick smoke test (small sizes only)
+pytest benchmarks/ --quick
+
+# Full timing benchmarks
+pytest benchmarks/test_build.py benchmarks/test_lp_write.py benchmarks/test_matrices.py
+
+# Run a specific model
+pytest benchmarks/test_build.py -k basic
+```
+
+## Comparing timing between branches
+
+```bash
+# Save baseline results on master
+git checkout master
+pytest benchmarks/test_build.py --benchmark-save=master
+
+# Switch to feature branch and compare
+git checkout my-feature
+pytest benchmarks/test_build.py --benchmark-save=my-feature --benchmark-compare=0001_master
+
+# Compare saved results without re-running
+pytest-benchmark compare 0001_master 0002_my-feature --columns=median,iqr
+```
+
+Results are stored in `.benchmarks/` (gitignored).
+
+## Memory benchmarks
+
+`memory.py` runs each test in a separate process with pytest-memray to get accurate per-test peak memory (including C/numpy allocations). Results are saved as JSON and can be compared across branches.
+
+By default, only the build phase (`test_build.py`) is measured. Unlike timing benchmarks where `benchmark()` isolates the measured function, memray tracks all allocations within a test — including model construction in setup. This means LP write and matrix tests would report build + phase memory combined, making the phase-specific contribution impossible to isolate. Since model construction dominates memory usage, measuring build alone gives the most actionable numbers.
+
+```bash
+# Save baseline on master
+git checkout master
+python benchmarks/memory.py save master
+
+# Save feature branch
+git checkout my-feature
+python benchmarks/memory.py save my-feature
+
+# Compare
+python benchmarks/memory.py compare master my-feature
+
+# Quick mode (smaller sizes, faster)
+python benchmarks/memory.py save master --quick
+
+# Measure a specific phase (includes build overhead)
+python benchmarks/memory.py save master --test-path benchmarks/test_lp_write.py
+```
+
+Results are stored in `.benchmarks/memory/` (gitignored). Requires Linux or macOS (memray is not available on Windows).
+
+> **Note:** Small tests (~5 MiB) are near the import-overhead floor and may show noise of ~1 MiB between runs. Focus on larger tests for meaningful memory comparisons. Do not combine `--memray` with timing benchmarks — memray adds ~2x overhead that invalidates timing results.
+
+## Models
+
+| Model | Description | Sizes |
+|-------|-------------|-------|
+| `basic` | Dense N*N model, 2*N^2 vars/cons | 10 — 1600 |
+| `knapsack` | N binary variables, 1 constraint | 100 — 1M |
+| `expression_arithmetic` | Broadcasting, scaling, summation across dims | 10 — 1000 |
+| `sparse_network` | Ring network with mismatched bus/line coords | 10 — 1000 |
+| `pypsa_scigrid` | Real power system (requires `pypsa`) | 10 — 200 snapshots |
+
+## Phases
+
+| Phase | File | What it measures |
+|-------|------|------------------|
+| Build | `test_build.py` | Model construction (add_variables, add_constraints, add_objective) |
+| LP write | `test_lp_write.py` | Writing the model to an LP file |
+| Matrices | `test_matrices.py` | Generating sparse matrices (A, b, c, bounds) from the model |
+
+## Adding a new model
+
+1. Create `benchmarks/models/my_model.py` with a `build_my_model(n)` function and a `SIZES` list
+2. Add parametrized tests in the relevant `test_*.py` files
+3. Add a quick threshold in `conftest.py`
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
@@ -0,0 +1 @@
+"""Linopy benchmark suite — run with ``pytest benchmarks/`` (use ``--quick`` for smaller sizes)."""
diff --git a/benchmarks/conftest.py b/benchmarks/conftest.py
@@ -0,0 +1,30 @@
+"""Benchmark configuration and shared fixtures."""
+
+from __future__ import annotations
+
+import pytest
+
+QUICK_THRESHOLD = {
+    "basic": 100,
+    "knapsack": 10_000,
+    "pypsa_scigrid": 50,
+    "expression_arithmetic": 100,
+    "sparse_network": 100,
+}
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--quick",
+        action="store_true",
+        default=False,
+        help="Use smaller problem sizes for quick benchmarking",
+    )
+
+
+def skip_if_quick(request, model: str, size: int):
+    """Skip large sizes when --quick is passed."""
+    if request.config.getoption("--quick"):
+        threshold = QUICK_THRESHOLD.get(model, float("inf"))
+        if size > threshold:
+            pytest.skip(f"--quick: skipping {model} size {size}")
diff --git a/benchmarks/memory.py b/benchmarks/memory.py
@@ -0,0 +1,199 @@
+#!/usr/bin/env python
+"""
+Measure and compare peak memory using pytest-memray.
+
+Usage:
+    # Save a baseline (on master)
+    python benchmarks/memory.py save master
+
+    # Save current branch
+    python benchmarks/memory.py save my-feature
+
+    # Compare two saved runs
+    python benchmarks/memory.py compare master my-feature
+
+    # Quick mode (smaller sizes)
+    python benchmarks/memory.py save master --quick
+
+Results are stored in .benchmarks/memory/.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import platform
+import re
+import subprocess
+import sys
+from pathlib import Path
+
+if platform.system() == "Windows":
+    raise RuntimeError(
+        "memory.py requires pytest-memray which is not available on Windows. "
+        "Run memory benchmarks on Linux or macOS."
+    )
+
+RESULTS_DIR = Path(".benchmarks/memory")
+MEMORY_RE = re.compile(
+    r"Allocation results for (.+?) at the high watermark\s+"
+    r"📦 Total memory allocated: ([\d.]+)(MiB|KiB|GiB|B)",
+)
+# Only the build phase is measured by default. Unlike timing benchmarks (where
+# pytest-benchmark isolates the measured function), memray tracks all allocations
+# within a test — including model construction in setup. This means LP write and
+# matrix tests would report build + phase memory combined, making the phase-specific
+# contribution hard to isolate. Since model construction dominates memory usage,
+# measuring build alone gives the most accurate and actionable numbers.
+DEFAULT_TEST_PATHS = [
+    "benchmarks/test_build.py",
+]
+
+
+def _to_mib(value: float, unit: str) -> float:
+    factors = {"B": 1 / 1048576, "KiB": 1 / 1024, "MiB": 1, "GiB": 1024}
+    return value * factors[unit]
+
+
+def _collect_test_ids(test_paths: list[str], quick: bool) -> list[str]:
+    """Collect test IDs without running them."""
+    cmd = [
+        sys.executable,
+        "-m",
+        "pytest",
+        *test_paths,
+        "--collect-only",
+        "-q",
+    ]
+    if quick:
+        cmd.append("--quick")
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    return [
+        line.strip()
+        for line in result.stdout.splitlines()
+        if "::" in line and not line.startswith(("=", "-", " "))
+    ]
+
+
+def save(label: str, quick: bool = False, test_paths: list[str] | None = None) -> Path:
+    """Run each benchmark in a separate process for accurate memory measurement."""
+    if test_paths is None:
+        test_paths = DEFAULT_TEST_PATHS
+    test_ids = _collect_test_ids(test_paths, quick)
+    if not test_ids:
+        print("No tests collected.", file=sys.stderr)
+        sys.exit(1)
+
+    print(f"Running {len(test_ids)} tests (each in a separate process)...")
+    entries = {}
+    for i, test_id in enumerate(test_ids, 1):
+        short = test_id.split("::")[-1]
+        print(f"  [{i}/{len(test_ids)}] {short}...", end=" ", flush=True)
+
+        cmd = [
+            sys.executable,
+            "-m",
+            "pytest",
+            test_id,
+            "--memray",
+            "--benchmark-disable",
+            "-v",
+            "--tb=short",
+            "-q",
+        ]
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        output = result.stdout + result.stderr
+
+        match = MEMORY_RE.search(output)
+        if match:
+            value = float(match.group(2))
+            unit = match.group(3)
+            mib = round(_to_mib(value, unit), 3)
+            entries[test_id] = mib
+            print(f"{mib:.1f} MiB")
+        elif "SKIPPED" in output or "skipped" in output:
+            print("skipped")
+        else:
+            print(
+                "WARNING: no memray data (pytest-memray output format may have changed)",
+                file=sys.stderr,
+            )
+
+    if not entries:
+        print("No memray results found. Is pytest-memray installed?", file=sys.stderr)
+        sys.exit(1)
+
+    RESULTS_DIR.mkdir(parents=True, exist_ok=True)
+    out_path = RESULTS_DIR / f"{label}.json"
+    out_path.write_text(json.dumps({"label": label, "peak_mib": entries}, indent=2))
+    print(f"\nSaved {len(entries)} results to {out_path}")
+    return out_path
+
+
+def compare(label_a: str, label_b: str) -> None:
+    """Compare two saved memory results."""
+    path_a = RESULTS_DIR / f"{label_a}.json"
+    path_b = RESULTS_DIR / f"{label_b}.json"
+    for p in (path_a, path_b):
+        if not p.exists():
+            print(f"Not found: {p}. Run 'save {p.stem}' first.", file=sys.stderr)
+            sys.exit(1)
+
+    data_a = json.loads(path_a.read_text())["peak_mib"]
+    data_b = json.loads(path_b.read_text())["peak_mib"]
+
+    all_tests = sorted(set(data_a) | set(data_b))
+
+    print(f"\n{'Test':<60} {label_a:>10} {label_b:>10} {'Change':>10}")
+    print("-" * 94)
+
+    for test in all_tests:
+        a = data_a.get(test)
+        b = data_b.get(test)
+        a_str = f"{a:.1f}" if a is not None else "—"
+        b_str = f"{b:.1f}" if b is not None else "—"
+        if a is not None and b is not None and a > 0:
+            pct = (b - a) / a * 100
+            change = f"{pct:+.1f}%"
+        else:
+            change = "—"
+        # Shorten test name for readability
+        short = test.split("::")[-1] if "::" in test else test
+        print(f"{short:<60} {a_str:>10} {b_str:>10} {change:>10}")
+
+    print()
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+    sub = parser.add_subparsers(dest="cmd", required=True)
+
+    p_save = sub.add_parser("save", help="Run benchmarks and save memory results")
+    p_save.add_argument(
+        "label", help="Label for this run (e.g. 'master', 'my-feature')"
+    )
+    p_save.add_argument(
+        "--quick", action="store_true", help="Use smaller problem sizes"
+    )
+    p_save.add_argument(
+        "--test-path",
+        nargs="+",
+        default=None,
+        help="Test file(s) to run (default: all phases)",
+    )
+
+    p_cmp = sub.add_parser("compare", help="Compare two saved runs")
+    p_cmp.add_argument("label_a", help="First run label (baseline)")
+    p_cmp.add_argument("label_b", help="Second run label")
+
+    args = parser.parse_args()
+    if args.cmd == "save":
+        save(args.label, quick=args.quick, test_paths=args.test_path)
+    elif args.cmd == "compare":
+        compare(args.label_a, args.label_b)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/models/__init__.py b/benchmarks/models/__init__.py
@@ -0,0 +1,21 @@
+"""Model builders for benchmarks."""
+
+from benchmarks.models.basic import SIZES as BASIC_SIZES
+from benchmarks.models.basic import build_basic
+from benchmarks.models.expression_arithmetic import SIZES as EXPR_SIZES
+from benchmarks.models.expression_arithmetic import build_expression_arithmetic
+from benchmarks.models.knapsack import SIZES as KNAPSACK_SIZES
+from benchmarks.models.knapsack import build_knapsack
+from benchmarks.models.sparse_network import SIZES as SPARSE_SIZES
+from benchmarks.models.sparse_network import build_sparse_network
+
+__all__ = [
+    "BASIC_SIZES",
+    "EXPR_SIZES",
+    "KNAPSACK_SIZES",
+    "SPARSE_SIZES",
+    "build_basic",
+    "build_expression_arithmetic",
+    "build_knapsack",
+    "build_sparse_network",
+]
diff --git a/benchmarks/models/basic.py b/benchmarks/models/basic.py
@@ -0,0 +1,18 @@
+"""Basic benchmark model: 2*N^2 variables and constraints."""
+
+from __future__ import annotations
+
+import linopy
+
+SIZES = [10, 50, 100, 250, 500, 1000, 1600]
+
+
+def build_basic(n: int) -> linopy.Model:
+    """Build a basic N*N model with 2*N^2 vars and 2*N^2 constraints."""
+    m = linopy.Model()
+    x = m.add_variables(coords=[range(n), range(n)], dims=["i", "j"], name="x")
+    y = m.add_variables(coords=[range(n), range(n)], dims=["i", "j"], name="y")
+    m.add_constraints(x + y <= 10, name="upper")
+    m.add_constraints(x - y >= -5, name="lower")
+    m.add_objective(x.sum() + 2 * y.sum())
+    return m
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		"""Linopy benchmark suite — run with ``pytest benchmarks/`` (use ``--quick`` for smaller sizes)."""