From 6c19e4283e6c85764857e88498f541b41c746114 Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Fri, 20 Mar 2026 13:20:53 +0000 Subject: [PATCH 1/9] Add post-calibration population rescaling to fix ~6% overshoot The optimiser treats population as 1 of ~556 targets so it drifts high. After calibration, rescale all weights so the weighted UK population matches the ONS target exactly. Also fix pre-existing ruff lint errors (unused import, ambiguous variable name). Closes #217 Co-Authored-By: Claude Opus 4.6 --- changelog.d/population-rescale.fixed.md | 1 + policyengine_uk_data/utils/calibrate.py | 50 +++++++++++++++++++++---- 2 files changed, 43 insertions(+), 8 deletions(-) create mode 100644 changelog.d/population-rescale.fixed.md diff --git a/changelog.d/population-rescale.fixed.md b/changelog.d/population-rescale.fixed.md new file mode 100644 index 00000000..90a02bd5 --- /dev/null +++ b/changelog.d/population-rescale.fixed.md @@ -0,0 +1 @@ +Post-calibration population rescaling so weighted UK population matches the ONS target (#217). diff --git a/policyengine_uk_data/utils/calibrate.py b/policyengine_uk_data/utils/calibrate.py index c9fc5a92..e5250784 100644 --- a/policyengine_uk_data/utils/calibrate.py +++ b/policyengine_uk_data/utils/calibrate.py @@ -1,5 +1,6 @@ +import logging + import torch -from policyengine_uk import Microsimulation import pandas as pd import numpy as np import h5py @@ -7,6 +8,8 @@ from policyengine_uk.data import UKSingleYearDataset from policyengine_uk_data.utils.progress import ProcessingProgress +logger = logging.getLogger(__name__) + def calibrate_local_areas( dataset: UKSingleYearDataset, @@ -171,8 +174,8 @@ def dropout_weights(weights, p): optimizer.zero_grad() weights_ = torch.exp(dropout_weights(weights, 0.05)) * r - l = loss(weights_) - l.backward() + loss_val = loss(weights_) + loss_val.backward() optimizer.step() local_close = pct_close(weights_, local=True, national=False) @@ -187,7 +190,7 @@ def dropout_weights(weights, p): ) else: update_calibration( - epoch + 1, loss_value=l.item(), calculating_loss=False + epoch + 1, loss_value=loss_val.item(), calculating_loss=False ) if epoch % 10 == 0: @@ -225,8 +228,8 @@ def dropout_weights(weights, p): for epoch in range(epochs): optimizer.zero_grad() weights_ = torch.exp(dropout_weights(weights, 0.05)) * r - l = loss(weights_) - l.backward() + loss_val = loss(weights_) + loss_val.backward() optimizer.step() local_close = pct_close(weights_, local=True, national=False) @@ -236,12 +239,12 @@ def dropout_weights(weights, p): if dropout_targets: validation_loss = loss(weights_, validation=True) print( - f"Training loss: {l.item():,.3f}, Validation loss: {validation_loss.item():,.3f}, Epoch: {epoch}, " + f"Training loss: {loss_val.item():,.3f}, Validation loss: {validation_loss.item():,.3f}, Epoch: {epoch}, " f"{area_name}<10%: {local_close:.1%}, National<10%: {national_close:.1%}" ) else: print( - f"Loss: {l.item()}, Epoch: {epoch}, {area_name}<10%: {local_close:.1%}, National<10%: {national_close:.1%}" + f"Loss: {loss_val.item()}, Epoch: {epoch}, {area_name}<10%: {local_close:.1%}, National<10%: {national_close:.1%}" ) if epoch % 10 == 0: @@ -276,4 +279,35 @@ def dropout_weights(weights, p): dataset.household.household_weight = final_weights.sum(axis=0) + # Post-calibration population rescaling: the optimiser treats + # population as 1 of ~556 targets so it drifts ~6% high. Rescale + # all weights so the weighted population matches the ONS target. + pop_col_name = "ons/uk_population" + pop_idx = None + if hasattr(m_n, "columns"): + cols = list(m_n.columns) + if pop_col_name in cols: + pop_idx = cols.index(pop_col_name) + if pop_idx is not None: + pop_metric = ( + m_n.values[:, pop_idx] if hasattr(m_n, "values") else m_n[:, pop_idx] + ) + pop_target = float(y_n.iloc[pop_idx] if hasattr(y_n, "iloc") else y_n[pop_idx]) + national_weights = final_weights.sum(axis=0) + actual_pop = (national_weights * pop_metric).sum() + if actual_pop > 0: + scale = pop_target / actual_pop + final_weights *= scale + logger.info( + "Population rescale: %.2fM -> %.2fM (factor %.4f)", + actual_pop / 1e6, + pop_target / 1e6, + scale, + ) + + # Re-save rescaled weights + with h5py.File(STORAGE_FOLDER / weight_file, "w") as f: + f.create_dataset(dataset_key, data=final_weights) + dataset.household.household_weight = final_weights.sum(axis=0) + return dataset From a3fc98fb98285425bcc3df510a1bc7640e684617 Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Fri, 20 Mar 2026 13:27:22 +0000 Subject: [PATCH 2/9] Add tests for population rescaling and extract into testable function Extract rescale_weights_to_population() from calibrate_local_areas() so it can be unit tested independently. Add 10 tests covering: scale up, scale down, exact match, missing column, zero weights, multiple columns, raw numpy arrays, 1D weights, non-mutation, and realistic 6% overshoot. Co-Authored-By: Claude Opus 4.6 --- .../tests/test_population_rescale.py | 154 ++++++++++++++++++ policyengine_uk_data/utils/calibrate.py | 88 +++++++--- 2 files changed, 215 insertions(+), 27 deletions(-) create mode 100644 policyengine_uk_data/tests/test_population_rescale.py diff --git a/policyengine_uk_data/tests/test_population_rescale.py b/policyengine_uk_data/tests/test_population_rescale.py new file mode 100644 index 00000000..645b8a63 --- /dev/null +++ b/policyengine_uk_data/tests/test_population_rescale.py @@ -0,0 +1,154 @@ +"""Tests for post-calibration population rescaling.""" + +import numpy as np +import pandas as pd +import pytest + +from policyengine_uk_data.utils.calibrate import rescale_weights_to_population + + +def _make_national_matrix(pop_metric, other_metric=None): + """Build a small national matrix DataFrame with an ons/uk_population column.""" + data = {"ons/uk_population": pop_metric} + if other_metric is not None: + data["obr/income_tax"] = other_metric + return pd.DataFrame(data) + + +def _make_targets(values, index): + return pd.Series(values, index=index) + + +class TestRescaleWeightsToPopulation: + def test_scales_weights_down_when_population_too_high(self): + """Weights should shrink when actual population exceeds target.""" + n_areas, n_hh = 3, 4 + weights = np.ones((n_areas, n_hh)) * 10.0 # each HH weight=10 per area + # pop_metric=1 per household → actual_pop = sum(axis=0)*1 = 30 per HH, total=120 + matrix = _make_national_matrix(np.ones(n_hh)) + target_pop = 60.0 # half of actual + targets = _make_targets([target_pop], index=["ons/uk_population"]) + + rescaled, scale = rescale_weights_to_population(weights, matrix, targets) + + assert scale == pytest.approx(0.5, rel=1e-6) + assert rescaled.sum() == pytest.approx(weights.sum() * 0.5, rel=1e-6) + + def test_scales_weights_up_when_population_too_low(self): + """Weights should grow when actual population is below target.""" + n_areas, n_hh = 2, 5 + weights = np.ones((n_areas, n_hh)) * 5.0 + matrix = _make_national_matrix(np.ones(n_hh)) + # national_weights = sum(axis=0) = [10,10,10,10,10], actual_pop = 50 + target_pop = 100.0 + targets = _make_targets([target_pop], index=["ons/uk_population"]) + + rescaled, scale = rescale_weights_to_population(weights, matrix, targets) + + assert scale == pytest.approx(2.0, rel=1e-6) + np.testing.assert_allclose(rescaled, weights * 2.0) + + def test_no_change_when_population_matches(self): + """Scale should be 1.0 when actual population already matches target.""" + weights = np.array([[3.0, 7.0]]) + matrix = _make_national_matrix(np.array([1.0, 1.0])) + # national_weights = [3, 7], actual_pop = 10 + targets = _make_targets([10.0], index=["ons/uk_population"]) + + rescaled, scale = rescale_weights_to_population(weights, matrix, targets) + + assert scale == pytest.approx(1.0, rel=1e-6) + np.testing.assert_array_equal(rescaled, weights) + + def test_no_rescale_when_population_column_missing(self): + """Should return scale=1.0 when ons/uk_population is not in the matrix.""" + weights = np.ones((2, 3)) * 10.0 + matrix = pd.DataFrame({"obr/income_tax": np.ones(3)}) + targets = _make_targets([500.0], index=["obr/income_tax"]) + + rescaled, scale = rescale_weights_to_population(weights, matrix, targets) + + assert scale == 1.0 + np.testing.assert_array_equal(rescaled, weights) + + def test_no_rescale_when_actual_population_zero(self): + """Should return scale=1.0 when actual weighted population is zero.""" + weights = np.zeros((2, 3)) + matrix = _make_national_matrix(np.ones(3)) + targets = _make_targets([69_000_000.0], index=["ons/uk_population"]) + + rescaled, scale = rescale_weights_to_population(weights, matrix, targets) + + assert scale == 1.0 + np.testing.assert_array_equal(rescaled, weights) + + def test_works_with_multiple_columns(self): + """Population column is found even when other columns are present.""" + weights = np.array([[2.0, 8.0]]) + matrix = _make_national_matrix( + np.array([1.0, 1.0]), + other_metric=np.array([100.0, 200.0]), + ) + # actual_pop = 2+8 = 10, target = 20 + targets = _make_targets( + [20.0, 999.0], index=["ons/uk_population", "obr/income_tax"] + ) + + rescaled, scale = rescale_weights_to_population(weights, matrix, targets) + + assert scale == pytest.approx(2.0, rel=1e-6) + + def test_works_with_numpy_arrays(self): + """Should handle raw numpy arrays (no DataFrame/Series).""" + weights = np.ones((2, 4)) * 5.0 + # Without columns attr, pop_idx stays None → no rescaling + matrix = np.ones((4, 2)) + targets = np.array([40.0, 100.0]) + + rescaled, scale = rescale_weights_to_population(weights, matrix, targets) + + assert scale == 1.0 # no columns attr → no rescaling + + def test_1d_weights(self): + """Should work with 1D weight vectors (single area or flat weights). + + For 1D weights, sum(axis=0) returns the scalar total, so + actual_pop = total_weight * pop_metric.sum(). + """ + weights = np.array([5.0, 10.0, 15.0]) + matrix = _make_national_matrix(np.array([1.0, 1.0, 1.0])) + # national_weights = sum(axis=0) = scalar 30 + # actual_pop = (30 * [1,1,1]).sum() = 90 + target_pop = 45.0 + targets = _make_targets([target_pop], index=["ons/uk_population"]) + + rescaled, scale = rescale_weights_to_population(weights, matrix, targets) + + assert scale == pytest.approx(0.5, rel=1e-6) + np.testing.assert_allclose(rescaled, weights * 0.5) + + def test_does_not_mutate_input(self): + """Input weights array should not be modified in place.""" + weights = np.ones((2, 3)) * 10.0 + original = weights.copy() + matrix = _make_national_matrix(np.ones(3)) + targets = _make_targets([15.0], index=["ons/uk_population"]) + + rescale_weights_to_population(weights, matrix, targets) + + np.testing.assert_array_equal(weights, original) + + def test_realistic_6pct_overshoot(self): + """Simulate the real-world ~6% overshoot scenario from issue #217.""" + target_pop = 69_000_000.0 + actual_pop = 74_000_000.0 # 7.2% overshoot + n_hh = 100 + weights = np.ones((1, n_hh)) * (actual_pop / n_hh) + matrix = _make_national_matrix(np.ones(n_hh)) + targets = _make_targets([target_pop], index=["ons/uk_population"]) + + rescaled, scale = rescale_weights_to_population(weights, matrix, targets) + + weighted_pop = rescaled.sum(axis=0).sum() + assert weighted_pop == pytest.approx(target_pop, rel=1e-6) + assert scale == pytest.approx(target_pop / actual_pop, rel=1e-6) diff --git a/policyengine_uk_data/utils/calibrate.py b/policyengine_uk_data/utils/calibrate.py index e5250784..e8296cd5 100644 --- a/policyengine_uk_data/utils/calibrate.py +++ b/policyengine_uk_data/utils/calibrate.py @@ -11,6 +11,60 @@ logger = logging.getLogger(__name__) +def rescale_weights_to_population( + final_weights: np.ndarray, + national_matrix, + national_targets, +) -> tuple[np.ndarray, float]: + """Rescale weights so weighted UK population matches the ONS target. + + The optimiser treats population as 1 of ~556 targets so it drifts + ~6% high. This uniformly scales all weights to correct for that. + + Args: + final_weights: calibrated weight matrix (n_areas x n_households) + national_matrix: DataFrame or array with national metric columns + national_targets: Series or array of national target values + + Returns: + (rescaled_weights, scale_factor) — scale_factor is 1.0 if no + population column is found or actual population is zero. + """ + pop_col_name = "ons/uk_population" + pop_idx = None + if hasattr(national_matrix, "columns"): + cols = list(national_matrix.columns) + if pop_col_name in cols: + pop_idx = cols.index(pop_col_name) + if pop_idx is None: + return final_weights, 1.0 + + pop_metric = ( + national_matrix.values[:, pop_idx] + if hasattr(national_matrix, "values") + else national_matrix[:, pop_idx] + ) + pop_target = float( + national_targets.iloc[pop_idx] + if hasattr(national_targets, "iloc") + else national_targets[pop_idx] + ) + national_weights = final_weights.sum(axis=0) + actual_pop = (national_weights * pop_metric).sum() + if actual_pop <= 0: + return final_weights, 1.0 + + scale = pop_target / actual_pop + rescaled = final_weights * scale + logger.info( + "Population rescale: %.2fM -> %.2fM (factor %.4f)", + actual_pop / 1e6, + pop_target / 1e6, + scale, + ) + return rescaled, scale + + def calibrate_local_areas( dataset: UKSingleYearDataset, matrix_fn, @@ -282,32 +336,12 @@ def dropout_weights(weights, p): # Post-calibration population rescaling: the optimiser treats # population as 1 of ~556 targets so it drifts ~6% high. Rescale # all weights so the weighted population matches the ONS target. - pop_col_name = "ons/uk_population" - pop_idx = None - if hasattr(m_n, "columns"): - cols = list(m_n.columns) - if pop_col_name in cols: - pop_idx = cols.index(pop_col_name) - if pop_idx is not None: - pop_metric = ( - m_n.values[:, pop_idx] if hasattr(m_n, "values") else m_n[:, pop_idx] - ) - pop_target = float(y_n.iloc[pop_idx] if hasattr(y_n, "iloc") else y_n[pop_idx]) - national_weights = final_weights.sum(axis=0) - actual_pop = (national_weights * pop_metric).sum() - if actual_pop > 0: - scale = pop_target / actual_pop - final_weights *= scale - logger.info( - "Population rescale: %.2fM -> %.2fM (factor %.4f)", - actual_pop / 1e6, - pop_target / 1e6, - scale, - ) - - # Re-save rescaled weights - with h5py.File(STORAGE_FOLDER / weight_file, "w") as f: - f.create_dataset(dataset_key, data=final_weights) - dataset.household.household_weight = final_weights.sum(axis=0) + final_weights, scale = rescale_weights_to_population( + final_weights, m_n, y_n + ) + if scale != 1.0: + with h5py.File(STORAGE_FOLDER / weight_file, "w") as f: + f.create_dataset(dataset_key, data=final_weights) + dataset.household.household_weight = final_weights.sum(axis=0) return dataset From e08f3a70650a831c6cd670a0d2bc96fca7532d45 Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Fri, 20 Mar 2026 13:29:04 +0000 Subject: [PATCH 3/9] Replace synthetic rescale tests with microsimulation-based checks Use the baseline fixture to verify weighted population matches the ONS target via native microdf calculations. Tighten tolerance from 7% to 3% now that post-calibration rescaling is in place. Also adds household count, inflation guard, and country-sum consistency checks. Co-Authored-By: Claude Opus 4.6 --- policyengine_uk_data/tests/test_population.py | 3 +- .../tests/test_population_rescale.py | 206 +++++------------- 2 files changed, 53 insertions(+), 156 deletions(-) diff --git a/policyengine_uk_data/tests/test_population.py b/policyengine_uk_data/tests/test_population.py index 43645791..94a9d789 100644 --- a/policyengine_uk_data/tests/test_population.py +++ b/policyengine_uk_data/tests/test_population.py @@ -1,7 +1,6 @@ def test_population(baseline): population = baseline.calculate("people", 2025).sum() / 1e6 POPULATION_TARGET = 69.5 # Expected UK population in millions, per ONS 2022-based estimate here: https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationprojections/bulletins/nationalpopulationprojections/2022based - # Tolerance temporarily relaxed to 7% due to calibration inflation issue #217 - assert abs(population / POPULATION_TARGET - 1) < 0.07, ( + assert abs(population / POPULATION_TARGET - 1) < 0.03, ( f"Expected UK population of {POPULATION_TARGET:.1f} million, got {population:.1f} million." ) diff --git a/policyengine_uk_data/tests/test_population_rescale.py b/policyengine_uk_data/tests/test_population_rescale.py index 645b8a63..7b0780cb 100644 --- a/policyengine_uk_data/tests/test_population_rescale.py +++ b/policyengine_uk_data/tests/test_population_rescale.py @@ -1,154 +1,52 @@ -"""Tests for post-calibration population rescaling.""" - -import numpy as np -import pandas as pd -import pytest - -from policyengine_uk_data.utils.calibrate import rescale_weights_to_population - - -def _make_national_matrix(pop_metric, other_metric=None): - """Build a small national matrix DataFrame with an ons/uk_population column.""" - data = {"ons/uk_population": pop_metric} - if other_metric is not None: - data["obr/income_tax"] = other_metric - return pd.DataFrame(data) - - -def _make_targets(values, index): - return pd.Series(values, index=index) - - -class TestRescaleWeightsToPopulation: - def test_scales_weights_down_when_population_too_high(self): - """Weights should shrink when actual population exceeds target.""" - n_areas, n_hh = 3, 4 - weights = np.ones((n_areas, n_hh)) * 10.0 # each HH weight=10 per area - # pop_metric=1 per household → actual_pop = sum(axis=0)*1 = 30 per HH, total=120 - matrix = _make_national_matrix(np.ones(n_hh)) - target_pop = 60.0 # half of actual - targets = _make_targets([target_pop], index=["ons/uk_population"]) - - rescaled, scale = rescale_weights_to_population(weights, matrix, targets) - - assert scale == pytest.approx(0.5, rel=1e-6) - assert rescaled.sum() == pytest.approx(weights.sum() * 0.5, rel=1e-6) - - def test_scales_weights_up_when_population_too_low(self): - """Weights should grow when actual population is below target.""" - n_areas, n_hh = 2, 5 - weights = np.ones((n_areas, n_hh)) * 5.0 - matrix = _make_national_matrix(np.ones(n_hh)) - # national_weights = sum(axis=0) = [10,10,10,10,10], actual_pop = 50 - target_pop = 100.0 - targets = _make_targets([target_pop], index=["ons/uk_population"]) - - rescaled, scale = rescale_weights_to_population(weights, matrix, targets) - - assert scale == pytest.approx(2.0, rel=1e-6) - np.testing.assert_allclose(rescaled, weights * 2.0) - - def test_no_change_when_population_matches(self): - """Scale should be 1.0 when actual population already matches target.""" - weights = np.array([[3.0, 7.0]]) - matrix = _make_national_matrix(np.array([1.0, 1.0])) - # national_weights = [3, 7], actual_pop = 10 - targets = _make_targets([10.0], index=["ons/uk_population"]) - - rescaled, scale = rescale_weights_to_population(weights, matrix, targets) - - assert scale == pytest.approx(1.0, rel=1e-6) - np.testing.assert_array_equal(rescaled, weights) - - def test_no_rescale_when_population_column_missing(self): - """Should return scale=1.0 when ons/uk_population is not in the matrix.""" - weights = np.ones((2, 3)) * 10.0 - matrix = pd.DataFrame({"obr/income_tax": np.ones(3)}) - targets = _make_targets([500.0], index=["obr/income_tax"]) - - rescaled, scale = rescale_weights_to_population(weights, matrix, targets) - - assert scale == 1.0 - np.testing.assert_array_equal(rescaled, weights) - - def test_no_rescale_when_actual_population_zero(self): - """Should return scale=1.0 when actual weighted population is zero.""" - weights = np.zeros((2, 3)) - matrix = _make_national_matrix(np.ones(3)) - targets = _make_targets([69_000_000.0], index=["ons/uk_population"]) - - rescaled, scale = rescale_weights_to_population(weights, matrix, targets) - - assert scale == 1.0 - np.testing.assert_array_equal(rescaled, weights) - - def test_works_with_multiple_columns(self): - """Population column is found even when other columns are present.""" - weights = np.array([[2.0, 8.0]]) - matrix = _make_national_matrix( - np.array([1.0, 1.0]), - other_metric=np.array([100.0, 200.0]), - ) - # actual_pop = 2+8 = 10, target = 20 - targets = _make_targets( - [20.0, 999.0], index=["ons/uk_population", "obr/income_tax"] - ) - - rescaled, scale = rescale_weights_to_population(weights, matrix, targets) - - assert scale == pytest.approx(2.0, rel=1e-6) - - def test_works_with_numpy_arrays(self): - """Should handle raw numpy arrays (no DataFrame/Series).""" - weights = np.ones((2, 4)) * 5.0 - # Without columns attr, pop_idx stays None → no rescaling - matrix = np.ones((4, 2)) - targets = np.array([40.0, 100.0]) - - rescaled, scale = rescale_weights_to_population(weights, matrix, targets) - - assert scale == 1.0 # no columns attr → no rescaling - - def test_1d_weights(self): - """Should work with 1D weight vectors (single area or flat weights). - - For 1D weights, sum(axis=0) returns the scalar total, so - actual_pop = total_weight * pop_metric.sum(). - """ - weights = np.array([5.0, 10.0, 15.0]) - matrix = _make_national_matrix(np.array([1.0, 1.0, 1.0])) - # national_weights = sum(axis=0) = scalar 30 - # actual_pop = (30 * [1,1,1]).sum() = 90 - target_pop = 45.0 - targets = _make_targets([target_pop], index=["ons/uk_population"]) - - rescaled, scale = rescale_weights_to_population(weights, matrix, targets) - - assert scale == pytest.approx(0.5, rel=1e-6) - np.testing.assert_allclose(rescaled, weights * 0.5) - - def test_does_not_mutate_input(self): - """Input weights array should not be modified in place.""" - weights = np.ones((2, 3)) * 10.0 - original = weights.copy() - matrix = _make_national_matrix(np.ones(3)) - targets = _make_targets([15.0], index=["ons/uk_population"]) - - rescale_weights_to_population(weights, matrix, targets) - - np.testing.assert_array_equal(weights, original) - - def test_realistic_6pct_overshoot(self): - """Simulate the real-world ~6% overshoot scenario from issue #217.""" - target_pop = 69_000_000.0 - actual_pop = 74_000_000.0 # 7.2% overshoot - n_hh = 100 - weights = np.ones((1, n_hh)) * (actual_pop / n_hh) - matrix = _make_national_matrix(np.ones(n_hh)) - targets = _make_targets([target_pop], index=["ons/uk_population"]) - - rescaled, scale = rescale_weights_to_population(weights, matrix, targets) - - weighted_pop = rescaled.sum(axis=0).sum() - assert weighted_pop == pytest.approx(target_pop, rel=1e-6) - assert scale == pytest.approx(target_pop / actual_pop, rel=1e-6) +"""Tests for post-calibration population rescaling (#217). + +Verifies that the calibrated dataset's weighted population matches the +ONS target, rather than drifting ~6% high as it did before the fix. +""" + +POPULATION_TARGET = 69.5 # ONS 2022-based projection for 2025, millions +TOLERANCE = 0.03 # 3% — was 7% before rescaling fix + + +def test_weighted_population_matches_ons_target(baseline): + """Weighted UK population should be within 3% of the ONS target.""" + population = baseline.calculate("people", 2025).sum() / 1e6 + assert abs(population / POPULATION_TARGET - 1) < TOLERANCE, ( + f"Weighted population {population:.1f}M is >{TOLERANCE:.0%} " + f"from ONS target {POPULATION_TARGET:.1f}M." + ) + + +def test_household_count_reasonable(baseline): + """Total weighted households should be roughly 28-30M (ONS estimate).""" + weights = baseline.calculate("household_weight", 2025).values + total_hh = weights.sum() / 1e6 + assert 25 < total_hh < 33, ( + f"Total weighted households {total_hh:.1f}M outside 25-33M range." + ) + + +def test_population_not_inflated(baseline): + """Population should not exceed 72M (the pre-fix inflated level).""" + population = baseline.calculate("people", 2025).sum() / 1e6 + assert population < 72, ( + f"Population {population:.1f}M exceeds 72M — rescaling may not be working." + ) + + +def test_country_populations_sum_to_uk(baseline): + """England + Scotland + Wales + NI populations should sum to UK total.""" + hw = baseline.calculate("household_weight", 2025).values + people = baseline.calculate("people_in_household", 2025).values + country = baseline.calculate("country", map_to="household").values + + uk_pop = (hw * people).sum() + country_sum = 0 + for c in set(country): + mask = country == c + country_sum += (hw[mask] * people[mask]).sum() + + assert abs(country_sum / uk_pop - 1) < 0.001, ( + f"Country populations sum to {country_sum/1e6:.1f}M " + f"but UK total is {uk_pop/1e6:.1f}M." + ) From 9c021a2d0a8c8484b1730114060e8b89ef5f7316 Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Fri, 20 Mar 2026 13:32:02 +0000 Subject: [PATCH 4/9] Use native microdf weighted operations in population tests Replace manual .values * weights numpy calculations with MicroSeries .sum() which applies weights automatically. Co-Authored-By: Claude Opus 4.6 --- .../tests/test_population_rescale.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/policyengine_uk_data/tests/test_population_rescale.py b/policyengine_uk_data/tests/test_population_rescale.py index 7b0780cb..eb4fdc16 100644 --- a/policyengine_uk_data/tests/test_population_rescale.py +++ b/policyengine_uk_data/tests/test_population_rescale.py @@ -19,8 +19,7 @@ def test_weighted_population_matches_ons_target(baseline): def test_household_count_reasonable(baseline): """Total weighted households should be roughly 28-30M (ONS estimate).""" - weights = baseline.calculate("household_weight", 2025).values - total_hh = weights.sum() / 1e6 + total_hh = baseline.calculate("household_weight", 2025).sum() / 1e6 assert 25 < total_hh < 33, ( f"Total weighted households {total_hh:.1f}M outside 25-33M range." ) @@ -36,15 +35,13 @@ def test_population_not_inflated(baseline): def test_country_populations_sum_to_uk(baseline): """England + Scotland + Wales + NI populations should sum to UK total.""" - hw = baseline.calculate("household_weight", 2025).values - people = baseline.calculate("people_in_household", 2025).values - country = baseline.calculate("country", map_to="household").values - - uk_pop = (hw * people).sum() - country_sum = 0 - for c in set(country): - mask = country == c - country_sum += (hw[mask] * people[mask]).sum() + people = baseline.calculate("people_in_household", 2025) + country = baseline.calculate("country", map_to="household") + + uk_pop = people.sum() + country_sum = sum( + people[country == c].sum() for c in country.unique() + ) assert abs(country_sum / uk_pop - 1) < 0.001, ( f"Country populations sum to {country_sum/1e6:.1f}M " From 50844139029d35417a11013e1936a86a49132f68 Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Fri, 20 Mar 2026 13:33:40 +0000 Subject: [PATCH 5/9] Apply ruff formatting Co-Authored-By: Claude Opus 4.6 --- policyengine_uk_data/tests/test_population_rescale.py | 8 +++----- policyengine_uk_data/utils/calibrate.py | 4 +--- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/policyengine_uk_data/tests/test_population_rescale.py b/policyengine_uk_data/tests/test_population_rescale.py index eb4fdc16..222c79d3 100644 --- a/policyengine_uk_data/tests/test_population_rescale.py +++ b/policyengine_uk_data/tests/test_population_rescale.py @@ -39,11 +39,9 @@ def test_country_populations_sum_to_uk(baseline): country = baseline.calculate("country", map_to="household") uk_pop = people.sum() - country_sum = sum( - people[country == c].sum() for c in country.unique() - ) + country_sum = sum(people[country == c].sum() for c in country.unique()) assert abs(country_sum / uk_pop - 1) < 0.001, ( - f"Country populations sum to {country_sum/1e6:.1f}M " - f"but UK total is {uk_pop/1e6:.1f}M." + f"Country populations sum to {country_sum / 1e6:.1f}M " + f"but UK total is {uk_pop / 1e6:.1f}M." ) diff --git a/policyengine_uk_data/utils/calibrate.py b/policyengine_uk_data/utils/calibrate.py index e8296cd5..21b2d025 100644 --- a/policyengine_uk_data/utils/calibrate.py +++ b/policyengine_uk_data/utils/calibrate.py @@ -336,9 +336,7 @@ def dropout_weights(weights, p): # Post-calibration population rescaling: the optimiser treats # population as 1 of ~556 targets so it drifts ~6% high. Rescale # all weights so the weighted population matches the ONS target. - final_weights, scale = rescale_weights_to_population( - final_weights, m_n, y_n - ) + final_weights, scale = rescale_weights_to_population(final_weights, m_n, y_n) if scale != 1.0: with h5py.File(STORAGE_FOLDER / weight_file, "w") as f: f.create_dataset(dataset_key, data=final_weights) From 7e422866b4bb33ade0653f1f0916a94b8cf48894 Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Fri, 20 Mar 2026 14:03:37 +0000 Subject: [PATCH 6/9] Fix test failures: use raw weights for household count, fix variable name - household_weight.sum() on MicroSeries applies weights (w*w), use raw numpy array instead for simple sum of weights - people_in_household doesn't exist; use people + country at person level Co-Authored-By: Claude Opus 4.6 --- .../tests/test_population_rescale.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/policyengine_uk_data/tests/test_population_rescale.py b/policyengine_uk_data/tests/test_population_rescale.py index 222c79d3..a77c889b 100644 --- a/policyengine_uk_data/tests/test_population_rescale.py +++ b/policyengine_uk_data/tests/test_population_rescale.py @@ -4,10 +4,22 @@ ONS target, rather than drifting ~6% high as it did before the fix. """ +import warnings + +import numpy as np + POPULATION_TARGET = 69.5 # ONS 2022-based projection for 2025, millions TOLERANCE = 0.03 # 3% — was 7% before rescaling fix +def _raw(micro_series): + """Extract the raw numpy array from a MicroSeries without triggering + the .values deprecation warning.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore", UserWarning) + return np.array(micro_series.values) + + def test_weighted_population_matches_ons_target(baseline): """Weighted UK population should be within 3% of the ONS target.""" population = baseline.calculate("people", 2025).sum() / 1e6 @@ -19,7 +31,8 @@ def test_weighted_population_matches_ons_target(baseline): def test_household_count_reasonable(baseline): """Total weighted households should be roughly 28-30M (ONS estimate).""" - total_hh = baseline.calculate("household_weight", 2025).sum() / 1e6 + hw = _raw(baseline.calculate("household_weight", 2025)) + total_hh = hw.sum() / 1e6 assert 25 < total_hh < 33, ( f"Total weighted households {total_hh:.1f}M outside 25-33M range." ) @@ -35,8 +48,8 @@ def test_population_not_inflated(baseline): def test_country_populations_sum_to_uk(baseline): """England + Scotland + Wales + NI populations should sum to UK total.""" - people = baseline.calculate("people_in_household", 2025) - country = baseline.calculate("country", map_to="household") + people = baseline.calculate("people", 2025) + country = baseline.calculate("country") uk_pop = people.sum() country_sum = sum(people[country == c].sum() for c in country.unique()) From 6127620a95489a3df6fa324f8229cc69909ebad5 Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Mon, 23 Mar 2026 10:57:02 +0000 Subject: [PATCH 7/9] Fix country population test: map country variable to person level The `country` variable is household-level (53K rows) but `people` is person-level (115K rows), causing an IndexingError when used as a boolean indexer. Add `map_to="person"` so both series have matching indices. Co-Authored-By: Claude Opus 4.6 --- policyengine_uk_data/tests/test_population_rescale.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/policyengine_uk_data/tests/test_population_rescale.py b/policyengine_uk_data/tests/test_population_rescale.py index a77c889b..d9856230 100644 --- a/policyengine_uk_data/tests/test_population_rescale.py +++ b/policyengine_uk_data/tests/test_population_rescale.py @@ -49,7 +49,7 @@ def test_population_not_inflated(baseline): def test_country_populations_sum_to_uk(baseline): """England + Scotland + Wales + NI populations should sum to UK total.""" people = baseline.calculate("people", 2025) - country = baseline.calculate("country") + country = baseline.calculate("country", map_to="person") uk_pop = people.sum() country_sum = sum(people[country == c].sum() for c in country.unique()) From 7977a8c84e58f2a9ce8db8fa99978a04bc85af52 Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Mon, 23 Mar 2026 11:07:53 +0000 Subject: [PATCH 8/9] Fix population drift inside calibration loss instead of post-hoc rescaling Addresses review feedback: instead of rescaling weights after calibration (which invalidates the calibration dashboard), boost the population target weight 10x in the national loss function so the optimiser keeps it on target during training. - Remove rescale_weights_to_population() and its post-calibration call - Add _build_national_target_weights() giving ons/uk_population 10x weight - Replace torch.mean() with weighted_mean() in national loss computation Co-Authored-By: Claude Opus 4.6 --- .../tests/test_population_rescale.py | 5 +- policyengine_uk_data/utils/calibrate.py | 90 +++++++------------ 2 files changed, 37 insertions(+), 58 deletions(-) diff --git a/policyengine_uk_data/tests/test_population_rescale.py b/policyengine_uk_data/tests/test_population_rescale.py index d9856230..86cd9cc7 100644 --- a/policyengine_uk_data/tests/test_population_rescale.py +++ b/policyengine_uk_data/tests/test_population_rescale.py @@ -1,7 +1,8 @@ -"""Tests for post-calibration population rescaling (#217). +"""Tests for population accuracy in calibration (#217). Verifies that the calibrated dataset's weighted population matches the -ONS target, rather than drifting ~6% high as it did before the fix. +ONS target. The population target is boosted in the calibration loss +function to prevent it drifting ~6% high. """ import warnings diff --git a/policyengine_uk_data/utils/calibrate.py b/policyengine_uk_data/utils/calibrate.py index 21b2d025..bec9bf5b 100644 --- a/policyengine_uk_data/utils/calibrate.py +++ b/policyengine_uk_data/utils/calibrate.py @@ -10,59 +10,31 @@ logger = logging.getLogger(__name__) +# Population gets this multiplier in the national loss so the optimiser +# keeps it on target rather than letting it drift ~6% high. +POPULATION_LOSS_WEIGHT = 10.0 -def rescale_weights_to_population( - final_weights: np.ndarray, - national_matrix, - national_targets, -) -> tuple[np.ndarray, float]: - """Rescale weights so weighted UK population matches the ONS target. - - The optimiser treats population as 1 of ~556 targets so it drifts - ~6% high. This uniformly scales all weights to correct for that. - Args: - final_weights: calibrated weight matrix (n_areas x n_households) - national_matrix: DataFrame or array with national metric columns - national_targets: Series or array of national target values +def _build_national_target_weights( + national_matrix, + population_weight: float = POPULATION_LOSS_WEIGHT, +) -> np.ndarray: + """Build per-target weight vector for the national loss. - Returns: - (rescaled_weights, scale_factor) — scale_factor is 1.0 if no - population column is found or actual population is zero. + Every target gets weight 1.0 except ``ons/uk_population`` which gets + ``population_weight``. This ensures the optimiser treats population + accuracy as a hard constraint rather than 1-of-N soft targets. """ pop_col_name = "ons/uk_population" - pop_idx = None if hasattr(national_matrix, "columns"): + n = len(national_matrix.columns) + w = np.ones(n, dtype=np.float32) cols = list(national_matrix.columns) if pop_col_name in cols: - pop_idx = cols.index(pop_col_name) - if pop_idx is None: - return final_weights, 1.0 - - pop_metric = ( - national_matrix.values[:, pop_idx] - if hasattr(national_matrix, "values") - else national_matrix[:, pop_idx] - ) - pop_target = float( - national_targets.iloc[pop_idx] - if hasattr(national_targets, "iloc") - else national_targets[pop_idx] - ) - national_weights = final_weights.sum(axis=0) - actual_pop = (national_weights * pop_metric).sum() - if actual_pop <= 0: - return final_weights, 1.0 - - scale = pop_target / actual_pop - rescaled = final_weights * scale - logger.info( - "Population rescale: %.2fM -> %.2fM (factor %.4f)", - actual_pop / 1e6, - pop_target / 1e6, - scale, - ) - return rescaled, scale + w[cols.index(pop_col_name)] = population_weight + return w + # Fallback: no column names available — equal weights + return np.ones(national_matrix.shape[1], dtype=np.float32) def calibrate_local_areas( @@ -150,11 +122,20 @@ def calibrate_local_areas( ) r = torch.tensor(r, dtype=torch.float32) + # Per-target weights for the national loss (population gets boosted) + national_target_weights = torch.tensor( + _build_national_target_weights(m_national), + dtype=torch.float32, + ) + def sre(x, y): one_way = ((1 + x) / (1 + y) - 1) ** 2 other_way = ((1 + y) / (1 + x) - 1) ** 2 return torch.min(one_way, other_way) + def weighted_mean(values, weights): + return (values * weights).sum() / weights.sum() + def loss(w, validation: bool = False): pred_local = (w.unsqueeze(-1) * metrics.unsqueeze(0)).sum(dim=1) if dropout_targets and validation_targets_local is not None: @@ -174,9 +155,15 @@ def loss(w, validation: bool = False): else: mask = ~validation_targets_national pred_national = pred_national[mask] - mse_national = torch.mean(sre(pred_national, y_national[mask])) + mse_national = weighted_mean( + sre(pred_national, y_national[mask]), + national_target_weights[mask], + ) else: - mse_national = torch.mean(sre(pred_national, y_national)) + mse_national = weighted_mean( + sre(pred_national, y_national), + national_target_weights, + ) return mse_local + mse_national @@ -333,13 +320,4 @@ def dropout_weights(weights, p): dataset.household.household_weight = final_weights.sum(axis=0) - # Post-calibration population rescaling: the optimiser treats - # population as 1 of ~556 targets so it drifts ~6% high. Rescale - # all weights so the weighted population matches the ONS target. - final_weights, scale = rescale_weights_to_population(final_weights, m_n, y_n) - if scale != 1.0: - with h5py.File(STORAGE_FOLDER / weight_file, "w") as f: - f.create_dataset(dataset_key, data=final_weights) - dataset.household.household_weight = final_weights.sum(axis=0) - return dataset From 5ceb2703f15a62b64151f2b3841f418811271b7d Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Mon, 23 Mar 2026 12:27:36 +0000 Subject: [PATCH 9/9] Fix asymmetric loss function that biased optimiser toward overshoot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The min-of-two-ratios SRE loss penalised undershoot more than overshoot of the same magnitude (e.g. 6% overshoot cost 89% of 6% undershoot). Across ~11k targets this systematically inflated weights, causing the ~6% population overshoot. Replace with squared log-ratio which is perfectly symmetric: log(a/b)² = log(b/a)². Also remove redundant Scotland children/babies targets that overlapped with regional age bands. Co-Authored-By: Claude Opus 4.6 --- .../targets/sources/ons_demographics.py | 60 +------------------ policyengine_uk_data/utils/calibrate.py | 54 +++-------------- 2 files changed, 9 insertions(+), 105 deletions(-) diff --git a/policyengine_uk_data/targets/sources/ons_demographics.py b/policyengine_uk_data/targets/sources/ons_demographics.py index dba77671..8c18e8d8 100644 --- a/policyengine_uk_data/targets/sources/ons_demographics.py +++ b/policyengine_uk_data/targets/sources/ons_demographics.py @@ -205,33 +205,6 @@ def _parse_regional_from_csv() -> list[Target]: return targets -# Scotland-specific (from NRS/census — not in ONS projections) -_SCOTLAND_CHILDREN_UNDER_16 = { - y: v * 1e3 - for y, v in { - 2022: 904, - 2023: 900, - 2024: 896, - 2025: 892, - 2026: 888, - 2027: 884, - 2028: 880, - }.items() -} - -_SCOTLAND_BABIES_UNDER_1 = { - y: v * 1e3 - for y, v in { - 2022: 46, - 2023: 46, - 2024: 46, - 2025: 46, - 2026: 46, - 2027: 46, - 2028: 46, - }.items() -} - _SCOTLAND_HOUSEHOLDS_3PLUS_CHILDREN = { y: v * 1e3 for y, v in { @@ -263,38 +236,7 @@ def get_targets() -> list[Target]: # Regional age bands from demographics.csv targets.extend(_parse_regional_from_csv()) - # Scotland-specific (NRS/census — small number of static values) - targets.append( - Target( - name="ons/scotland_children_under_16", - variable="age", - source="nrs", - unit=Unit.COUNT, - values=_SCOTLAND_CHILDREN_UNDER_16, - is_count=True, - geographic_level=GeographicLevel.COUNTRY, - geo_code="S", - geo_name="Scotland", - reference_url=_REF_NRS, - ) - ) - targets.append( - Target( - name="ons/scotland_babies_under_1", - variable="age", - source="nrs", - unit=Unit.COUNT, - values=_SCOTLAND_BABIES_UNDER_1, - is_count=True, - geographic_level=GeographicLevel.COUNTRY, - geo_code="S", - geo_name="Scotland", - reference_url=( - "https://www.nrscotland.gov.uk/publications/" - "vital-events-reference-tables-2024/" - ), - ) - ) + # Scotland households (census-derived, no overlap with age bands) targets.append( Target( name="ons/scotland_households_3plus_children", diff --git a/policyengine_uk_data/utils/calibrate.py b/policyengine_uk_data/utils/calibrate.py index bec9bf5b..d809ea86 100644 --- a/policyengine_uk_data/utils/calibrate.py +++ b/policyengine_uk_data/utils/calibrate.py @@ -10,32 +10,6 @@ logger = logging.getLogger(__name__) -# Population gets this multiplier in the national loss so the optimiser -# keeps it on target rather than letting it drift ~6% high. -POPULATION_LOSS_WEIGHT = 10.0 - - -def _build_national_target_weights( - national_matrix, - population_weight: float = POPULATION_LOSS_WEIGHT, -) -> np.ndarray: - """Build per-target weight vector for the national loss. - - Every target gets weight 1.0 except ``ons/uk_population`` which gets - ``population_weight``. This ensures the optimiser treats population - accuracy as a hard constraint rather than 1-of-N soft targets. - """ - pop_col_name = "ons/uk_population" - if hasattr(national_matrix, "columns"): - n = len(national_matrix.columns) - w = np.ones(n, dtype=np.float32) - cols = list(national_matrix.columns) - if pop_col_name in cols: - w[cols.index(pop_col_name)] = population_weight - return w - # Fallback: no column names available — equal weights - return np.ones(national_matrix.shape[1], dtype=np.float32) - def calibrate_local_areas( dataset: UKSingleYearDataset, @@ -122,19 +96,13 @@ def calibrate_local_areas( ) r = torch.tensor(r, dtype=torch.float32) - # Per-target weights for the national loss (population gets boosted) - national_target_weights = torch.tensor( - _build_national_target_weights(m_national), - dtype=torch.float32, - ) - def sre(x, y): - one_way = ((1 + x) / (1 + y) - 1) ** 2 - other_way = ((1 + y) / (1 + x) - 1) ** 2 - return torch.min(one_way, other_way) - - def weighted_mean(values, weights): - return (values * weights).sum() / weights.sum() + """Squared log-ratio loss — symmetric so overshoot and undershoot + of the same magnitude incur identical cost. The previous + min-of-two-ratios formulation penalised undershoot more than + overshoot, which systematically biased the optimiser toward + inflating weights (root cause of the ~6 % population overshoot).""" + return torch.log((1 + x) / (1 + y)) ** 2 def loss(w, validation: bool = False): pred_local = (w.unsqueeze(-1) * metrics.unsqueeze(0)).sum(dim=1) @@ -155,15 +123,9 @@ def loss(w, validation: bool = False): else: mask = ~validation_targets_national pred_national = pred_national[mask] - mse_national = weighted_mean( - sre(pred_national, y_national[mask]), - national_target_weights[mask], - ) + mse_national = torch.mean(sre(pred_national, y_national[mask])) else: - mse_national = weighted_mean( - sre(pred_national, y_national), - national_target_weights, - ) + mse_national = torch.mean(sre(pred_national, y_national)) return mse_local + mse_national