From f9162bd0846e2e8be15c55d4e309efb48426206e Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Wed, 8 Apr 2026 22:15:41 -0400
Subject: [PATCH 1/8] Use Census childcare capping formula

---
 policyengine_us_data/datasets/cps/cps.py      |   1 +
 .../datasets/cps/extended_cps.py              | 151 +++++++++++-------
 tests/unit/test_extended_cps.py               |  43 ++---
 3 files changed, 116 insertions(+), 79 deletions(-)

diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index 5d5774eea..08301f167 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -494,6 +494,7 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int):
 
     cps["weekly_hours_worked"] = person.HRSWK
     cps["hours_worked_last_week"] = person.A_HRS1
+    cps["weeks_worked"] = np.clip(person.WKSWORK, 0, 52)
 
     cps["taxable_interest_income"] = person.INT_VAL * (p["taxable_interest_fraction"])
     cps["tax_exempt_interest_income"] = person.INT_VAL * (
diff --git a/policyengine_us_data/datasets/cps/extended_cps.py b/policyengine_us_data/datasets/cps/extended_cps.py
index c840f29af..760ac5df9 100644
--- a/policyengine_us_data/datasets/cps/extended_cps.py
+++ b/policyengine_us_data/datasets/cps/extended_cps.py
@@ -26,6 +26,12 @@
 
 logger = logging.getLogger(__name__)
 
+# Census SPM technical documentation, "SPM Work Expense Values".
+# These are weekly work expense amounts applied to each adult earner.
+SPM_WEEKLY_WORK_EXPENSE_BY_YEAR = {
+    2024: 41.17,
+}
+
 
 def _supports_structural_mortgage_inputs() -> bool:
     return has_policyengine_us_variables(*STRUCTURAL_MORTGAGE_VARIABLES)
@@ -325,69 +331,101 @@ def reconcile_ss_subcomponents(predictions, total_ss):
 }
 
 
+def _get_spm_weekly_work_expense(year: int) -> float:
+    try:
+        return SPM_WEEKLY_WORK_EXPENSE_BY_YEAR[year]
+    except KeyError as exc:
+        raise ValueError(
+            f"No Census SPM weekly work expense value configured for {year}"
+        ) from exc
+
+
+def _calculate_clone_work_expenses(
+    clone_person_data: pd.DataFrame,
+    clone_spm_unit_ids: np.ndarray,
+) -> np.ndarray:
+    clone_spm_unit_ids = np.asarray(clone_spm_unit_ids)
+    if clone_person_data.empty:
+        return np.zeros(len(clone_spm_unit_ids), dtype=float)
+
+    adult_earners = clone_person_data.loc[
+        (clone_person_data["age"] >= 18) & (clone_person_data["earnings"] > 0),
+        ["spm_unit_id", "weeks_worked"],
+    ].copy()
+    if adult_earners.empty:
+        return np.zeros(len(clone_spm_unit_ids), dtype=float)
+
+    adult_earners["weeks_worked"] = adult_earners["weeks_worked"].clip(
+        lower=0, upper=52
+    )
+    return (
+        adult_earners.groupby("spm_unit_id")["weeks_worked"]
+        .sum()
+        .reindex(
+            clone_spm_unit_ids,
+            fill_value=0.0,
+        )
+        .to_numpy(dtype=float)
+    )
+
+
+def _calculate_clone_lower_earner_caps(
+    clone_person_data: pd.DataFrame,
+    clone_spm_unit_ids: np.ndarray,
+) -> np.ndarray:
+    clone_spm_unit_ids = np.asarray(clone_spm_unit_ids)
+    if clone_person_data.empty:
+        return np.zeros(len(clone_spm_unit_ids), dtype=float)
+
+    head_or_spouse = clone_person_data.loc[
+        clone_person_data["is_parent_proxy"].astype(bool),
+        ["spm_unit_id", "earnings"],
+    ].copy()
+    if head_or_spouse.empty:
+        return np.zeros(len(clone_spm_unit_ids), dtype=float)
+
+    head_or_spouse["earnings"] = head_or_spouse["earnings"].clip(lower=0.0)
+    lower_earner_caps = head_or_spouse.groupby("spm_unit_id")["earnings"].agg(
+        lambda values: float(values.min()) if len(values) > 1 else float(values.iloc[0])
+    )
+    return lower_earner_caps.reindex(
+        clone_spm_unit_ids,
+        fill_value=0.0,
+    ).to_numpy(dtype=float)
+
+
 def derive_clone_capped_childcare_expenses(
-    donor_pre_subsidy: np.ndarray,
-    donor_capped: np.ndarray,
     clone_pre_subsidy: np.ndarray,
     clone_person_data: pd.DataFrame,
     clone_spm_unit_ids: np.ndarray,
+    time_period: int,
 ) -> np.ndarray:
-    """Derive clone-half capped childcare from clone inputs.
+    """Derive clone-half capped work and childcare expenses from clone inputs.
 
     The CPS provides both pre-subsidy childcare and the SPM-specific
-    capped childcare deduction. For the clone half, we impute only the
-    pre-subsidy amount, then deterministically rebuild the capped amount
-    instead of letting a second QRF predict it independently.
-
-    We preserve the donor's observed capping share while also respecting
-    the clone's own earnings cap. This keeps the clone-half value
-    consistent with pre-subsidy childcare and avoids impossible outputs
-    such as capped childcare exceeding pre-subsidy childcare.
+    capped work-and-childcare deduction. For the clone half, we impute
+    only the pre-subsidy childcare amount, then deterministically rebuild
+    the capped value using the Census SPM rule:
+    work expenses plus childcare, capped at the lower earner's earnings
+    for the reference person and spouse/partner.
     """
 
-    donor_pre_subsidy = np.asarray(donor_pre_subsidy, dtype=float)
-    donor_capped = np.asarray(donor_capped, dtype=float)
     clone_pre_subsidy = np.asarray(clone_pre_subsidy, dtype=float)
-    clone_spm_unit_ids = np.asarray(clone_spm_unit_ids)
-
-    donor_cap_share = np.divide(
-        donor_capped,
-        donor_pre_subsidy,
-        out=np.zeros_like(donor_capped, dtype=float),
-        where=donor_pre_subsidy > 0,
+    weekly_work_expense = _get_spm_weekly_work_expense(time_period)
+    annual_work_expenses = (
+        _calculate_clone_work_expenses(
+            clone_person_data=clone_person_data,
+            clone_spm_unit_ids=clone_spm_unit_ids,
+        )
+        * weekly_work_expense
+    )
+    lower_earner_cap = _calculate_clone_lower_earner_caps(
+        clone_person_data=clone_person_data,
+        clone_spm_unit_ids=clone_spm_unit_ids,
     )
-    donor_cap_share = np.clip(donor_cap_share, 0.0, 1.0)
-    capped_from_share = np.maximum(clone_pre_subsidy, 0.0) * donor_cap_share
-
-    if clone_person_data.empty:
-        earnings_cap = np.zeros(len(clone_spm_unit_ids), dtype=float)
-    else:
-        eligible = clone_person_data["is_parent_proxy"].astype(bool)
-        parent_rows = clone_person_data.loc[
-            eligible, ["spm_unit_id", "age", "earnings"]
-        ].copy()
-        if parent_rows.empty:
-            earnings_cap = np.zeros(len(clone_spm_unit_ids), dtype=float)
-        else:
-            parent_rows["earnings"] = parent_rows["earnings"].clip(lower=0.0)
-            parent_rows["age_rank"] = parent_rows.groupby("spm_unit_id")["age"].rank(
-                method="first", ascending=False
-            )
-            top_two = parent_rows[parent_rows["age_rank"] <= 2].sort_values(
-                ["spm_unit_id", "age_rank"]
-            )
-            earnings_cap_by_unit = top_two.groupby("spm_unit_id")["earnings"].agg(
-                lambda values: (
-                    float(values.iloc[0])
-                    if len(values) == 1
-                    else float(np.minimum(values.iloc[0], values.iloc[1]))
-                )
-            )
-            earnings_cap = earnings_cap_by_unit.reindex(
-                clone_spm_unit_ids, fill_value=0.0
-            ).to_numpy(dtype=float)
 
-    return np.minimum(capped_from_share, earnings_cap)
+    combined_expenses = np.maximum(clone_pre_subsidy, 0.0) + annual_work_expenses
+    return np.minimum(combined_expenses, lower_earner_cap)
 
 
 def _rebuild_clone_capped_childcare_expenses(
@@ -421,26 +459,19 @@ def _rebuild_clone_capped_childcare_expenses(
                 data["employment_income"][time_period][n_persons_half:]
                 + data["self_employment_income"][time_period][n_persons_half:]
             ),
+            "weeks_worked": data["weeks_worked"][time_period][n_persons_half:],
         }
     )
-
-    donor_pre_subsidy = data["spm_unit_pre_subsidy_childcare_expenses"][time_period][
-        :n_spm_units_half
-    ]
-    donor_capped = data["spm_unit_capped_work_childcare_expenses"][time_period][
-        :n_spm_units_half
-    ]
     clone_pre_subsidy = data["spm_unit_pre_subsidy_childcare_expenses"][time_period][
         n_spm_units_half:
     ]
     clone_spm_unit_ids = data["spm_unit_id"][time_period][n_spm_units_half:]
 
     return derive_clone_capped_childcare_expenses(
-        donor_pre_subsidy=donor_pre_subsidy,
-        donor_capped=donor_capped,
         clone_pre_subsidy=clone_pre_subsidy,
         clone_person_data=clone_person_data,
         clone_spm_unit_ids=clone_spm_unit_ids,
+        time_period=time_period,
     )
 
 
diff --git a/tests/unit/test_extended_cps.py b/tests/unit/test_extended_cps.py
index e32172db2..a6ea8b654 100644
--- a/tests/unit/test_extended_cps.py
+++ b/tests/unit/test_extended_cps.py
@@ -126,57 +126,62 @@ def test_capped_childcare_not_in_cps_only(self):
 
 
 class TestCloneChildcareDerivation:
-    """Clone-half capped childcare should be derived deterministically."""
+    """Clone-half capped work-and-childcare expenses should be deterministic."""
 
-    def test_caps_at_pre_subsidy_and_clone_earnings(self):
-        donor_pre_subsidy = np.array([10000.0, 4000.0, 6000.0])
-        donor_capped = np.array([4000.0, 4000.0, 0.0])
-        clone_pre_subsidy = np.array([12000.0, 5000.0, 3000.0])
+    def test_caps_combined_work_and_childcare_at_lower_earner(self):
+        clone_pre_subsidy = np.array([1200.0, 5000.0, 3000.0])
         person_data = pd.DataFrame(
             {
                 "spm_unit_id": [1, 1, 2, 2, 3],
                 "age": [40, 38, 35, 33, 29],
                 "is_parent_proxy": [True, True, True, True, True],
                 "earnings": [9000.0, 3000.0, 1500.0, 0.0, 2000.0],
+                "weeks_worked": [10.0, 20.0, 30.0, 5.0, 15.0],
             }
         )
 
         result = derive_clone_capped_childcare_expenses(
-            donor_pre_subsidy=donor_pre_subsidy,
-            donor_capped=donor_capped,
             clone_pre_subsidy=clone_pre_subsidy,
             clone_person_data=person_data,
             clone_spm_unit_ids=np.array([1, 2, 3]),
+            time_period=2024,
         )
 
-        np.testing.assert_allclose(result, np.array([3000.0, 0.0, 0.0]))
+        np.testing.assert_allclose(
+            result,
+            np.array(
+                [
+                    2435.1,  # 1200 childcare + (10 + 20) * 41.17 work expenses
+                    0.0,  # Two-parent unit capped by the lower earner's zero earnings
+                    2000.0,  # Single proxy unit capped at the proxy's earnings
+                ]
+            ),
+            rtol=0,
+            atol=1e-6,
+        )
 
-    def test_uses_single_parent_earnings_cap_for_single_proxy_units(self):
-        donor_pre_subsidy = np.array([4000.0])
-        donor_capped = np.array([4000.0])
-        clone_pre_subsidy = np.array([6000.0])
+    def test_includes_work_expenses_even_without_childcare(self):
+        clone_pre_subsidy = np.array([0.0])
         person_data = pd.DataFrame(
             {
                 "spm_unit_id": [10],
                 "age": [31],
                 "is_parent_proxy": [True],
                 "earnings": [2500.0],
+                "weeks_worked": [12.0],
             }
         )
 
         result = derive_clone_capped_childcare_expenses(
-            donor_pre_subsidy=donor_pre_subsidy,
-            donor_capped=donor_capped,
             clone_pre_subsidy=clone_pre_subsidy,
             clone_person_data=person_data,
             clone_spm_unit_ids=np.array([10]),
+            time_period=2024,
         )
 
-        np.testing.assert_allclose(result, np.array([2500.0]))
+        np.testing.assert_allclose(result, np.array([494.04]), rtol=0, atol=1e-6)
 
     def test_falls_back_to_zero_without_parent_proxies(self):
-        donor_pre_subsidy = np.array([3000.0])
-        donor_capped = np.array([2000.0])
         clone_pre_subsidy = np.array([3000.0])
         person_data = pd.DataFrame(
             {
@@ -184,15 +189,15 @@ def test_falls_back_to_zero_without_parent_proxies(self):
                 "age": [12, 9],
                 "is_parent_proxy": [False, False],
                 "earnings": [0.0, 0.0],
+                "weeks_worked": [0.0, 0.0],
             }
         )
 
         result = derive_clone_capped_childcare_expenses(
-            donor_pre_subsidy=donor_pre_subsidy,
-            donor_capped=donor_capped,
             clone_pre_subsidy=clone_pre_subsidy,
             clone_person_data=person_data,
             clone_spm_unit_ids=np.array([20]),
+            time_period=2024,
         )
 
         np.testing.assert_allclose(result, np.array([0.0]))

From fe77557976f369b505db2ce3d98ae40bed7f8255 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Wed, 8 Apr 2026 22:56:15 -0400
Subject: [PATCH 2/8] Add childcare formula changelog fragment

---
 changelog.d/705.fixed | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/705.fixed

diff --git a/changelog.d/705.fixed b/changelog.d/705.fixed
new file mode 100644
index 000000000..4c60c9bff
--- /dev/null
+++ b/changelog.d/705.fixed
@@ -0,0 +1 @@
+Use Census work-and-childcare capping inputs for clone-half SPM childcare expenses instead of donor capping shares.

From dd75f1d97eecb1f193e95a6feb7d366ba99a34cb Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Thu, 9 Apr 2026 06:59:25 -0400
Subject: [PATCH 3/8] Populate childcare formula inputs from CPS

---
 .../datasets/cps/extended_cps.py              | 168 ------------------
 tests/unit/test_extended_cps.py               |  82 +--------
 tests/unit/test_weeks_worked.py               |  41 +++++
 3 files changed, 42 insertions(+), 249 deletions(-)
 create mode 100644 tests/unit/test_weeks_worked.py

diff --git a/policyengine_us_data/datasets/cps/extended_cps.py b/policyengine_us_data/datasets/cps/extended_cps.py
index 760ac5df9..310d0a072 100644
--- a/policyengine_us_data/datasets/cps/extended_cps.py
+++ b/policyengine_us_data/datasets/cps/extended_cps.py
@@ -26,12 +26,6 @@
 
 logger = logging.getLogger(__name__)
 
-# Census SPM technical documentation, "SPM Work Expense Values".
-# These are weekly work expense amounts applied to each adult earner.
-SPM_WEEKLY_WORK_EXPENSE_BY_YEAR = {
-    2024: 41.17,
-}
-
 
 def _supports_structural_mortgage_inputs() -> bool:
     return has_policyengine_us_variables(*STRUCTURAL_MORTGAGE_VARIABLES)
@@ -331,150 +325,6 @@ def reconcile_ss_subcomponents(predictions, total_ss):
 }
 
 
-def _get_spm_weekly_work_expense(year: int) -> float:
-    try:
-        return SPM_WEEKLY_WORK_EXPENSE_BY_YEAR[year]
-    except KeyError as exc:
-        raise ValueError(
-            f"No Census SPM weekly work expense value configured for {year}"
-        ) from exc
-
-
-def _calculate_clone_work_expenses(
-    clone_person_data: pd.DataFrame,
-    clone_spm_unit_ids: np.ndarray,
-) -> np.ndarray:
-    clone_spm_unit_ids = np.asarray(clone_spm_unit_ids)
-    if clone_person_data.empty:
-        return np.zeros(len(clone_spm_unit_ids), dtype=float)
-
-    adult_earners = clone_person_data.loc[
-        (clone_person_data["age"] >= 18) & (clone_person_data["earnings"] > 0),
-        ["spm_unit_id", "weeks_worked"],
-    ].copy()
-    if adult_earners.empty:
-        return np.zeros(len(clone_spm_unit_ids), dtype=float)
-
-    adult_earners["weeks_worked"] = adult_earners["weeks_worked"].clip(
-        lower=0, upper=52
-    )
-    return (
-        adult_earners.groupby("spm_unit_id")["weeks_worked"]
-        .sum()
-        .reindex(
-            clone_spm_unit_ids,
-            fill_value=0.0,
-        )
-        .to_numpy(dtype=float)
-    )
-
-
-def _calculate_clone_lower_earner_caps(
-    clone_person_data: pd.DataFrame,
-    clone_spm_unit_ids: np.ndarray,
-) -> np.ndarray:
-    clone_spm_unit_ids = np.asarray(clone_spm_unit_ids)
-    if clone_person_data.empty:
-        return np.zeros(len(clone_spm_unit_ids), dtype=float)
-
-    head_or_spouse = clone_person_data.loc[
-        clone_person_data["is_parent_proxy"].astype(bool),
-        ["spm_unit_id", "earnings"],
-    ].copy()
-    if head_or_spouse.empty:
-        return np.zeros(len(clone_spm_unit_ids), dtype=float)
-
-    head_or_spouse["earnings"] = head_or_spouse["earnings"].clip(lower=0.0)
-    lower_earner_caps = head_or_spouse.groupby("spm_unit_id")["earnings"].agg(
-        lambda values: float(values.min()) if len(values) > 1 else float(values.iloc[0])
-    )
-    return lower_earner_caps.reindex(
-        clone_spm_unit_ids,
-        fill_value=0.0,
-    ).to_numpy(dtype=float)
-
-
-def derive_clone_capped_childcare_expenses(
-    clone_pre_subsidy: np.ndarray,
-    clone_person_data: pd.DataFrame,
-    clone_spm_unit_ids: np.ndarray,
-    time_period: int,
-) -> np.ndarray:
-    """Derive clone-half capped work and childcare expenses from clone inputs.
-
-    The CPS provides both pre-subsidy childcare and the SPM-specific
-    capped work-and-childcare deduction. For the clone half, we impute
-    only the pre-subsidy childcare amount, then deterministically rebuild
-    the capped value using the Census SPM rule:
-    work expenses plus childcare, capped at the lower earner's earnings
-    for the reference person and spouse/partner.
-    """
-
-    clone_pre_subsidy = np.asarray(clone_pre_subsidy, dtype=float)
-    weekly_work_expense = _get_spm_weekly_work_expense(time_period)
-    annual_work_expenses = (
-        _calculate_clone_work_expenses(
-            clone_person_data=clone_person_data,
-            clone_spm_unit_ids=clone_spm_unit_ids,
-        )
-        * weekly_work_expense
-    )
-    lower_earner_cap = _calculate_clone_lower_earner_caps(
-        clone_person_data=clone_person_data,
-        clone_spm_unit_ids=clone_spm_unit_ids,
-    )
-
-    combined_expenses = np.maximum(clone_pre_subsidy, 0.0) + annual_work_expenses
-    return np.minimum(combined_expenses, lower_earner_cap)
-
-
-def _rebuild_clone_capped_childcare_expenses(
-    data: dict,
-    time_period: int,
-    cps_sim,
-) -> np.ndarray:
-    """Rebuild clone-half capped childcare expenses after stage-2 imputation."""
-
-    n_persons_half = len(data["person_id"][time_period]) // 2
-    n_spm_units_half = len(data["spm_unit_id"][time_period]) // 2
-
-    person_roles = cps_sim.calculate_dataframe(
-        ["age", "is_tax_unit_head", "is_tax_unit_spouse"]
-    )
-    if len(person_roles) != n_persons_half:
-        raise ValueError(
-            "Unexpected person role frame length while rebuilding clone childcare "
-            f"expenses: got {len(person_roles)}, expected {n_persons_half}"
-        )
-
-    clone_person_data = pd.DataFrame(
-        {
-            "spm_unit_id": data["person_spm_unit_id"][time_period][n_persons_half:],
-            "age": person_roles["age"].values,
-            "is_parent_proxy": (
-                person_roles["is_tax_unit_head"].values
-                | person_roles["is_tax_unit_spouse"].values
-            ),
-            "earnings": (
-                data["employment_income"][time_period][n_persons_half:]
-                + data["self_employment_income"][time_period][n_persons_half:]
-            ),
-            "weeks_worked": data["weeks_worked"][time_period][n_persons_half:],
-        }
-    )
-    clone_pre_subsidy = data["spm_unit_pre_subsidy_childcare_expenses"][time_period][
-        n_spm_units_half:
-    ]
-    clone_spm_unit_ids = data["spm_unit_id"][time_period][n_spm_units_half:]
-
-    return derive_clone_capped_childcare_expenses(
-        clone_pre_subsidy=clone_pre_subsidy,
-        clone_person_data=clone_person_data,
-        clone_spm_unit_ids=clone_spm_unit_ids,
-        time_period=time_period,
-    )
-
-
 def _apply_post_processing(predictions, X_test, time_period, data):
     """Apply retirement constraints and SS reconciliation."""
     ret_cols = [c for c in predictions.columns if c in _RETIREMENT_VARS]
@@ -579,24 +429,6 @@ def _splice_cps_only_predictions(
         new_values = np.concatenate([cps_half, pred_values])
         data[var] = {time_period: new_values}
 
-    if (
-        "spm_unit_capped_work_childcare_expenses" in data
-        and "spm_unit_pre_subsidy_childcare_expenses" in data
-    ):
-        n_half = entity_half_lengths.get(
-            "spm_unit",
-            len(data["spm_unit_capped_work_childcare_expenses"][time_period]) // 2,
-        )
-        cps_half = data["spm_unit_capped_work_childcare_expenses"][time_period][:n_half]
-        clone_half = _rebuild_clone_capped_childcare_expenses(
-            data=data,
-            time_period=time_period,
-            cps_sim=cps_sim,
-        )
-        data["spm_unit_capped_work_childcare_expenses"] = {
-            time_period: np.concatenate([cps_half, clone_half])
-        }
-
     del cps_sim
     return data
 
diff --git a/tests/unit/test_extended_cps.py b/tests/unit/test_extended_cps.py
index a6ea8b654..3bbd98a2d 100644
--- a/tests/unit/test_extended_cps.py
+++ b/tests/unit/test_extended_cps.py
@@ -19,7 +19,6 @@
     CPS_ONLY_IMPUTED_VARIABLES,
     CPS_STAGE2_INCOME_PREDICTORS,
     apply_retirement_constraints,
-    derive_clone_capped_childcare_expenses,
     reconcile_ss_subcomponents,
 )
 from policyengine_us_data.datasets.org import ORG_IMPUTED_VARIABLES
@@ -118,91 +117,12 @@ def test_pension_income_not_in_cps_only(self):
         )
 
     def test_capped_childcare_not_in_cps_only(self):
-        """Capped childcare should be derived from clone-half inputs, not
-        independently QRF-imputed."""
+        """Capped childcare should not be independently QRF-imputed."""
         assert "spm_unit_capped_work_childcare_expenses" not in set(
             CPS_ONLY_IMPUTED_VARIABLES
         )
 
 
-class TestCloneChildcareDerivation:
-    """Clone-half capped work-and-childcare expenses should be deterministic."""
-
-    def test_caps_combined_work_and_childcare_at_lower_earner(self):
-        clone_pre_subsidy = np.array([1200.0, 5000.0, 3000.0])
-        person_data = pd.DataFrame(
-            {
-                "spm_unit_id": [1, 1, 2, 2, 3],
-                "age": [40, 38, 35, 33, 29],
-                "is_parent_proxy": [True, True, True, True, True],
-                "earnings": [9000.0, 3000.0, 1500.0, 0.0, 2000.0],
-                "weeks_worked": [10.0, 20.0, 30.0, 5.0, 15.0],
-            }
-        )
-
-        result = derive_clone_capped_childcare_expenses(
-            clone_pre_subsidy=clone_pre_subsidy,
-            clone_person_data=person_data,
-            clone_spm_unit_ids=np.array([1, 2, 3]),
-            time_period=2024,
-        )
-
-        np.testing.assert_allclose(
-            result,
-            np.array(
-                [
-                    2435.1,  # 1200 childcare + (10 + 20) * 41.17 work expenses
-                    0.0,  # Two-parent unit capped by the lower earner's zero earnings
-                    2000.0,  # Single proxy unit capped at the proxy's earnings
-                ]
-            ),
-            rtol=0,
-            atol=1e-6,
-        )
-
-    def test_includes_work_expenses_even_without_childcare(self):
-        clone_pre_subsidy = np.array([0.0])
-        person_data = pd.DataFrame(
-            {
-                "spm_unit_id": [10],
-                "age": [31],
-                "is_parent_proxy": [True],
-                "earnings": [2500.0],
-                "weeks_worked": [12.0],
-            }
-        )
-
-        result = derive_clone_capped_childcare_expenses(
-            clone_pre_subsidy=clone_pre_subsidy,
-            clone_person_data=person_data,
-            clone_spm_unit_ids=np.array([10]),
-            time_period=2024,
-        )
-
-        np.testing.assert_allclose(result, np.array([494.04]), rtol=0, atol=1e-6)
-
-    def test_falls_back_to_zero_without_parent_proxies(self):
-        clone_pre_subsidy = np.array([3000.0])
-        person_data = pd.DataFrame(
-            {
-                "spm_unit_id": [20, 20],
-                "age": [12, 9],
-                "is_parent_proxy": [False, False],
-                "earnings": [0.0, 0.0],
-                "weeks_worked": [0.0, 0.0],
-            }
-        )
-
-        result = derive_clone_capped_childcare_expenses(
-            clone_pre_subsidy=clone_pre_subsidy,
-            clone_person_data=person_data,
-            clone_spm_unit_ids=np.array([20]),
-            time_period=2024,
-        )
-
-        np.testing.assert_allclose(result, np.array([0.0]))
-
-
 class TestRetirementConstraints:
     """Post-processing retirement constraints enforce IRS caps."""
 
diff --git a/tests/unit/test_weeks_worked.py b/tests/unit/test_weeks_worked.py
new file mode 100644
index 000000000..ad5f801af
--- /dev/null
+++ b/tests/unit/test_weeks_worked.py
@@ -0,0 +1,41 @@
+"""
+Tests for weeks_worked extraction from CPS ASEC.
+
+The Census CPS ASEC exposes WKSWORK directly, which we now carry through as
+the model input for future-year SPM work-expense calculations.
+"""
+
+import numpy as np
+from pathlib import Path
+
+
+class TestWeeksWorked:
+    """Test suite for weeks_worked variable extraction."""
+
+    def test_census_cps_includes_wkswork(self):
+        census_cps_path = Path(__file__).parent.parent.parent / (
+            "policyengine_us_data/datasets/cps/census_cps.py"
+        )
+        content = census_cps_path.read_text()
+
+        assert '"WKSWORK"' in content, "WKSWORK should be in PERSON_COLUMNS"
+
+    def test_cps_maps_weeks_worked_from_wkswork(self):
+        cps_path = Path(__file__).parent.parent.parent / (
+            "policyengine_us_data/datasets/cps/cps.py"
+        )
+        content = cps_path.read_text()
+
+        assert 'cps["weeks_worked"]' in content
+        assert "person.WKSWORK" in content
+        assert "np.clip(person.WKSWORK, 0, 52)" in content
+
+    def test_weeks_worked_value_range(self):
+        raw_values = np.array([-4, 0, 1, 26, 52, 60])
+        processed = np.clip(raw_values, 0, 52)
+
+        assert processed.min() >= 0, "Minimum should be >= 0"
+        assert processed.max() <= 52, "Maximum should be <= 52"
+        assert processed[0] == 0, "Negative values should clip to 0"
+        assert processed[3] == 26, "Valid weeks should be preserved"
+        assert processed[5] == 52, "Values above 52 should clip to 52"

From 18f8ebd4a44b174180df43fdb13daefe2890cdf2 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Thu, 9 Apr 2026 07:40:25 -0400
Subject: [PATCH 4/8] Carry CPS partner input for SPM childcare

---
 .../datasets/cps/census_cps.py                |  1 +
 policyengine_us_data/datasets/cps/cps.py      |  3 ++
 tests/unit/test_reference_partner.py          | 32 +++++++++++++++++++
 3 files changed, 36 insertions(+)
 create mode 100644 tests/unit/test_reference_partner.py

diff --git a/policyengine_us_data/datasets/cps/census_cps.py b/policyengine_us_data/datasets/cps/census_cps.py
index 042fefe56..6faed88fe 100644
--- a/policyengine_us_data/datasets/cps/census_cps.py
+++ b/policyengine_us_data/datasets/cps/census_cps.py
@@ -233,6 +233,7 @@ class CensusCPS_2018(CensusCPS):
     "A_FNLWGT",
     "A_LINENO",
     "A_SPOUSE",
+    "PERRP",
     "A_AGE",
     "A_SEX",
     "PEDISEYE",
diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index 08301f167..6fe4d2f35 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -462,6 +462,9 @@ def children_per_parent(col: str) -> pd.DataFrame:
 
     cps["is_surviving_spouse"] = person.A_MARITL == 4
     cps["is_separated"] = person.A_MARITL == 6
+    cps["is_unmarried_partner_of_household_head"] = person.PERRP.isin(
+        [43, 44, 46, 47]
+    )
     # High school or college/university enrollment status.
     cps["is_full_time_college_student"] = person.A_HSCOL == 2
 
diff --git a/tests/unit/test_reference_partner.py b/tests/unit/test_reference_partner.py
new file mode 100644
index 000000000..83ee6bd28
--- /dev/null
+++ b/tests/unit/test_reference_partner.py
@@ -0,0 +1,32 @@
+"""
+Tests for reference-person partner extraction from CPS ASEC.
+
+The public CPS ASEC relationship-to-reference-person variable PERRP identifies
+unmarried partners of the household head/reference person. We carry that
+through so the SPM childcare cap can distinguish the reference person's partner
+from unrelated adults in the same SPM unit.
+"""
+
+from pathlib import Path
+
+
+class TestReferencePartner:
+    """Test suite for CPS relationship-to-reference-person extraction."""
+
+    def test_census_cps_includes_perrp(self):
+        census_cps_path = Path(__file__).parent.parent.parent / (
+            "policyengine_us_data/datasets/cps/census_cps.py"
+        )
+        content = census_cps_path.read_text()
+
+        assert '"PERRP"' in content, "PERRP should be in PERSON_COLUMNS"
+
+    def test_cps_maps_unmarried_partner_from_perrp(self):
+        cps_path = Path(__file__).parent.parent.parent / (
+            "policyengine_us_data/datasets/cps/cps.py"
+        )
+        content = cps_path.read_text()
+
+        assert 'cps["is_unmarried_partner_of_household_head"]' in content
+        for code in ("43", "44", "46", "47"):
+            assert code in content

From e5a488e0bc35207b9c19f94b853949d4733e2750 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Thu, 9 Apr 2026 10:16:50 -0400
Subject: [PATCH 5/8] Format CPS PERRP mapping

---
 policyengine_us_data/datasets/cps/cps.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index 6fe4d2f35..f0e2c756a 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -462,9 +462,7 @@ def children_per_parent(col: str) -> pd.DataFrame:
 
     cps["is_surviving_spouse"] = person.A_MARITL == 4
     cps["is_separated"] = person.A_MARITL == 6
-    cps["is_unmarried_partner_of_household_head"] = person.PERRP.isin(
-        [43, 44, 46, 47]
-    )
+    cps["is_unmarried_partner_of_household_head"] = person.PERRP.isin([43, 44, 46, 47])
     # High school or college/university enrollment status.
     cps["is_full_time_college_student"] = person.A_HSCOL == 2
 

From 27169f8f4f51cda5b2454178f84ef53b22e5e3fa Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Fri, 10 Apr 2026 10:31:35 -0400
Subject: [PATCH 6/8] Address childcare CPS review comments

---
 policyengine_us_data/datasets/cps/cps.py | 13 ++++++-
 tests/unit/test_reference_partner.py     | 49 +++++++++++++++++-------
 tests/unit/test_weeks_worked.py          | 45 +++++++++++-----------
 3 files changed, 70 insertions(+), 37 deletions(-)

diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index dcdab86e3..8c31d10ce 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -90,6 +90,15 @@
     ),
 }
 
+# Census CPS ASEC 2024 technical documentation, PERRP:
+# https://www2.census.gov/programs-surveys/cps/techdocs/cpsmar24.pdf
+PERRP_UNMARRIED_PARTNER_OF_HOUSEHOLD_HEAD_CODES = {
+    43: "Opposite Sex Unmarried Partner with Relatives",
+    44: "Opposite Sex Unmarried Partner without Relatives",
+    46: "Same Sex Unmarried Partner with Relatives",
+    47: "Same Sex Unmarried Partner without Relatives",
+}
+
 
 class CPS(Dataset):
     name = "cps"
@@ -572,7 +581,9 @@ def children_per_parent(col: str) -> pd.DataFrame:
 
     cps["is_surviving_spouse"] = person.A_MARITL == 4
     cps["is_separated"] = person.A_MARITL == 6
-    cps["is_unmarried_partner_of_household_head"] = person.PERRP.isin([43, 44, 46, 47])
+    cps["is_unmarried_partner_of_household_head"] = person.PERRP.isin(
+        PERRP_UNMARRIED_PARTNER_OF_HOUSEHOLD_HEAD_CODES.keys()
+    )
     # High school or college/university enrollment status.
     cps["is_full_time_college_student"] = person.A_HSCOL == 2
 
diff --git a/tests/unit/test_reference_partner.py b/tests/unit/test_reference_partner.py
index 83ee6bd28..d579b080a 100644
--- a/tests/unit/test_reference_partner.py
+++ b/tests/unit/test_reference_partner.py
@@ -7,26 +7,49 @@
 from unrelated adults in the same SPM unit.
 """
 
-from pathlib import Path
+import numpy as np
+import pandas as pd
+
+from policyengine_us_data.datasets.cps.census_cps import PERSON_COLUMNS
+from policyengine_us_data.datasets.cps.cps import (
+    PERRP_UNMARRIED_PARTNER_OF_HOUSEHOLD_HEAD_CODES,
+    add_personal_variables,
+)
+
+
+def _person_frame(**columns):
+    n_persons = len(next(iter(columns.values())))
+    data = {column: np.zeros(n_persons, dtype=int) for column in PERSON_COLUMNS}
+    data.update(columns)
+    return pd.DataFrame(data)
 
 
 class TestReferencePartner:
     """Test suite for CPS relationship-to-reference-person extraction."""
 
-    def test_census_cps_includes_perrp(self):
-        census_cps_path = Path(__file__).parent.parent.parent / (
-            "policyengine_us_data/datasets/cps/census_cps.py"
-        )
-        content = census_cps_path.read_text()
+    def test_census_cps_loads_perrp(self):
+        assert "PERRP" in PERSON_COLUMNS
 
-        assert '"PERRP"' in content, "PERRP should be in PERSON_COLUMNS"
+    def test_unmarried_partner_perrp_code_table_matches_census_labels(self):
+        assert PERRP_UNMARRIED_PARTNER_OF_HOUSEHOLD_HEAD_CODES == {
+            43: "Opposite Sex Unmarried Partner with Relatives",
+            44: "Opposite Sex Unmarried Partner without Relatives",
+            46: "Same Sex Unmarried Partner with Relatives",
+            47: "Same Sex Unmarried Partner without Relatives",
+        }
 
     def test_cps_maps_unmarried_partner_from_perrp(self):
-        cps_path = Path(__file__).parent.parent.parent / (
-            "policyengine_us_data/datasets/cps/cps.py"
+        person = _person_frame(
+            PH_SEQ=np.arange(7) + 1,
+            A_LINENO=np.ones(7),
+            A_AGE=np.full(7, 35),
+            PERRP=np.array([40, 43, 44, 45, 46, 47, 48]),
         )
-        content = cps_path.read_text()
 
-        assert 'cps["is_unmarried_partner_of_household_head"]' in content
-        for code in ("43", "44", "46", "47"):
-            assert code in content
+        cps = {}
+        add_personal_variables(cps, person)
+
+        np.testing.assert_array_equal(
+            cps["is_unmarried_partner_of_household_head"],
+            np.array([False, True, True, False, True, True, False]),
+        )
diff --git a/tests/unit/test_weeks_worked.py b/tests/unit/test_weeks_worked.py
index ad5f801af..0c3db2f76 100644
--- a/tests/unit/test_weeks_worked.py
+++ b/tests/unit/test_weeks_worked.py
@@ -6,36 +6,35 @@
 """
 
 import numpy as np
-from pathlib import Path
+import pandas as pd
+
+from policyengine_us_data.datasets.cps.census_cps import PERSON_COLUMNS
+from policyengine_us_data.datasets.cps.cps import add_personal_income_variables
+
+
+def _person_frame(**columns):
+    n_persons = len(next(iter(columns.values())))
+    data = {column: np.zeros(n_persons, dtype=int) for column in PERSON_COLUMNS}
+    data.update(columns)
+    return pd.DataFrame(data)
 
 
 class TestWeeksWorked:
     """Test suite for weeks_worked variable extraction."""
 
-    def test_census_cps_includes_wkswork(self):
-        census_cps_path = Path(__file__).parent.parent.parent / (
-            "policyengine_us_data/datasets/cps/census_cps.py"
-        )
-        content = census_cps_path.read_text()
-
-        assert '"WKSWORK"' in content, "WKSWORK should be in PERSON_COLUMNS"
+    def test_census_cps_loads_wkswork(self):
+        assert "WKSWORK" in PERSON_COLUMNS
 
     def test_cps_maps_weeks_worked_from_wkswork(self):
-        cps_path = Path(__file__).parent.parent.parent / (
-            "policyengine_us_data/datasets/cps/cps.py"
+        person = _person_frame(
+            A_AGE=np.full(6, 35),
+            WKSWORK=np.array([-4, 0, 1, 26, 52, 60]),
         )
-        content = cps_path.read_text()
-
-        assert 'cps["weeks_worked"]' in content
-        assert "person.WKSWORK" in content
-        assert "np.clip(person.WKSWORK, 0, 52)" in content
 
-    def test_weeks_worked_value_range(self):
-        raw_values = np.array([-4, 0, 1, 26, 52, 60])
-        processed = np.clip(raw_values, 0, 52)
+        cps = {}
+        add_personal_income_variables(cps, person, 2024)
 
-        assert processed.min() >= 0, "Minimum should be >= 0"
-        assert processed.max() <= 52, "Maximum should be <= 52"
-        assert processed[0] == 0, "Negative values should clip to 0"
-        assert processed[3] == 26, "Valid weeks should be preserved"
-        assert processed[5] == 52, "Values above 52 should clip to 52"
+        np.testing.assert_array_equal(
+            cps["weeks_worked"],
+            np.array([0, 0, 1, 26, 52, 52]),
+        )

From 89b58ac6b99b47c6d5581562f4228c3882d37c46 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Fri, 10 Apr 2026 10:41:00 -0400
Subject: [PATCH 7/8] Fix weeks worked unit test isolation

---
 policyengine_us_data/datasets/cps/cps.py |  6 +++++-
 tests/unit/test_weeks_worked.py          | 22 +++-------------------
 2 files changed, 8 insertions(+), 20 deletions(-)

diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index 8c31d10ce..2b328456b 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -594,6 +594,10 @@ def children_per_parent(col: str) -> pd.DataFrame:
     add_overtime_occupation(cps, person)
 
 
+def derive_weeks_worked(weeks_worked: Series | np.ndarray) -> Series | np.ndarray:
+    return np.clip(weeks_worked, 0, 52)
+
+
 def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int):
     """Add income variables.
 
@@ -619,7 +623,7 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int):
 
     cps["weekly_hours_worked"] = person.HRSWK
     cps["hours_worked_last_week"] = person.A_HRS1
-    cps["weeks_worked"] = np.clip(person.WKSWORK, 0, 52)
+    cps["weeks_worked"] = derive_weeks_worked(person.WKSWORK)
 
     cps["taxable_interest_income"] = person.INT_VAL * (p["taxable_interest_fraction"])
     cps["tax_exempt_interest_income"] = person.INT_VAL * (
diff --git a/tests/unit/test_weeks_worked.py b/tests/unit/test_weeks_worked.py
index 0c3db2f76..7f1bc6959 100644
--- a/tests/unit/test_weeks_worked.py
+++ b/tests/unit/test_weeks_worked.py
@@ -6,17 +6,9 @@
 """
 
 import numpy as np
-import pandas as pd
 
 from policyengine_us_data.datasets.cps.census_cps import PERSON_COLUMNS
-from policyengine_us_data.datasets.cps.cps import add_personal_income_variables
-
-
-def _person_frame(**columns):
-    n_persons = len(next(iter(columns.values())))
-    data = {column: np.zeros(n_persons, dtype=int) for column in PERSON_COLUMNS}
-    data.update(columns)
-    return pd.DataFrame(data)
+from policyengine_us_data.datasets.cps.cps import derive_weeks_worked
 
 
 class TestWeeksWorked:
@@ -25,16 +17,8 @@ class TestWeeksWorked:
     def test_census_cps_loads_wkswork(self):
         assert "WKSWORK" in PERSON_COLUMNS
 
-    def test_cps_maps_weeks_worked_from_wkswork(self):
-        person = _person_frame(
-            A_AGE=np.full(6, 35),
-            WKSWORK=np.array([-4, 0, 1, 26, 52, 60]),
-        )
-
-        cps = {}
-        add_personal_income_variables(cps, person, 2024)
-
+    def test_cps_derives_weeks_worked_from_wkswork(self):
         np.testing.assert_array_equal(
-            cps["weeks_worked"],
+            derive_weeks_worked(np.array([-4, 0, 1, 26, 52, 60])),
             np.array([0, 0, 1, 26, 52, 52]),
         )

From 7f64138e118ec81afc4494bc9381fdd0f12cc938 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Fri, 10 Apr 2026 20:57:01 -0400
Subject: [PATCH 8/8] Handle missing PERRP in synthetic CPS frames

---
 policyengine_us_data/datasets/cps/cps.py |  7 ++++++-
 tests/unit/test_reference_partner.py     | 15 +++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index 2b328456b..1244be4e7 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -581,7 +581,12 @@ def children_per_parent(col: str) -> pd.DataFrame:
 
     cps["is_surviving_spouse"] = person.A_MARITL == 4
     cps["is_separated"] = person.A_MARITL == 6
-    cps["is_unmarried_partner_of_household_head"] = person.PERRP.isin(
+    perrp = (
+        person.PERRP
+        if "PERRP" in person
+        else pd.Series(0, index=person.index, dtype=np.int16)
+    )
+    cps["is_unmarried_partner_of_household_head"] = perrp.isin(
         PERRP_UNMARRIED_PARTNER_OF_HOUSEHOLD_HEAD_CODES.keys()
     )
     # High school or college/university enrollment status.
diff --git a/tests/unit/test_reference_partner.py b/tests/unit/test_reference_partner.py
index d579b080a..7d18ce25b 100644
--- a/tests/unit/test_reference_partner.py
+++ b/tests/unit/test_reference_partner.py
@@ -53,3 +53,18 @@ def test_cps_maps_unmarried_partner_from_perrp(self):
             cps["is_unmarried_partner_of_household_head"],
             np.array([False, True, True, False, True, True, False]),
         )
+
+    def test_missing_perrp_defaults_to_false(self):
+        person = _person_frame(
+            PH_SEQ=np.arange(3) + 1,
+            A_LINENO=np.ones(3),
+            A_AGE=np.full(3, 35),
+        ).drop(columns="PERRP")
+
+        cps = {}
+        add_personal_variables(cps, person)
+
+        np.testing.assert_array_equal(
+            cps["is_unmarried_partner_of_household_head"],
+            np.array([False, False, False]),
+        )