PolicyEngine · MaxGhenis · May 26, 2026 · May 26, 2026
diff --git a/policyengine_us_data/datasets/cps/enhanced_cps.py b/policyengine_us_data/datasets/cps/enhanced_cps.py
@@ -45,21 +45,37 @@ def initialize_weight_priors(
     original_weights: np.ndarray,
     seed: int = 1456,
     epsilon: float = 1e-6,
+    zero_weight_total_share: float = 0.5,
 ) -> np.ndarray:
-    """Build deterministic positive priors for sparse reweighting."""
+    """Build deterministic positive priors for sparse reweighting.
+
+    PUF clone households enter the extended CPS with zero household weight.
+    Giving those records near-zero priors leaves them effectively unusable in
+    log-space optimization. When zero-weight rows are present, preserve the
+    relative distribution of positive survey weights but reserve a fixed share
+    of the original total household mass for uniform zero-weight-row priors.
+    """
 
     weights = np.asarray(original_weights, dtype=np.float64)
     if np.any(weights < 0):
         raise ValueError("original_weights must be non-negative")
+    if weights.size == 0:
+        return weights.copy()
+    if not 0 < zero_weight_total_share < 1:
+        raise ValueError("zero_weight_total_share must be between 0 and 1")
 
     priors = np.empty_like(weights, dtype=np.float64)
     positive_mask = weights > 0
-    priors[positive_mask] = weights[positive_mask]
-
     zero_mask = ~positive_mask
-    if zero_mask.any():
-        rng = np.random.default_rng(seed)
-        priors[zero_mask] = epsilon * rng.uniform(1.0, 2.0, size=zero_mask.sum())
+    if not zero_mask.any():
+        return weights.copy()
+
+    positive_total = float(weights[positive_mask].sum())
+    if positive_total <= 0:
+        return np.full_like(weights, 1.0, dtype=np.float64)
+
+    priors[positive_mask] = weights[positive_mask] * (1 - zero_weight_total_share)
+    priors[zero_mask] = positive_total * zero_weight_total_share / zero_mask.sum()
 
     return priors
 

diff --git a/policyengine_us_data/storage/calibration_targets/soi_targets.csv b/policyengine_us_data/storage/calibration_targets/soi_targets.csv
@@ -11929,3 +11929,41 @@ Year,SOI table,XLSX column,XLSX row,Variable,Filing status,AGI lower bound,AGI u
 2022,Table 3.3,AP,10,refundable_american_opportunity_credit,All,-inf,inf,False,False,True,5184485000
 2023,Table 3.3,AO,10,refundable_american_opportunity_credit,All,-inf,inf,True,False,True,5821688
 2023,Table 3.3,AP,10,refundable_american_opportunity_credit,All,-inf,inf,False,False,True,5090364000
+2023,Table 1.4A,BK,11,long_term_capital_gains,All,-inf,1.0,False,False,False,11981913000
+2023,Table 1.4A,BJ,11,long_term_capital_gains,All,-inf,1.0,True,False,False,137016
+2023,Table 1.4A,BK,12,long_term_capital_gains,All,1.0,5000.0,False,False,False,390046000
+2023,Table 1.4A,BJ,12,long_term_capital_gains,All,1.0,5000.0,True,False,False,171586
+2023,Table 1.4A,BK,13,long_term_capital_gains,All,5000.0,10000.0,False,False,False,740521000
+2023,Table 1.4A,BJ,13,long_term_capital_gains,All,5000.0,10000.0,True,False,False,181415
+2023,Table 1.4A,BK,14,long_term_capital_gains,All,10000.0,15000.0,False,False,False,1139960000
+2023,Table 1.4A,BJ,14,long_term_capital_gains,All,10000.0,15000.0,True,False,False,208487
+2023,Table 1.4A,BK,15,long_term_capital_gains,All,15000.0,20000.0,False,False,False,1222242000
+2023,Table 1.4A,BJ,15,long_term_capital_gains,All,15000.0,20000.0,True,False,False,231243
+2023,Table 1.4A,BK,16,long_term_capital_gains,All,20000.0,25000.0,False,False,False,1618072000
+2023,Table 1.4A,BJ,16,long_term_capital_gains,All,20000.0,25000.0,True,False,False,184713
+2023,Table 1.4A,BK,17,long_term_capital_gains,All,25000.0,30000.0,False,False,False,1627983000
+2023,Table 1.4A,BJ,17,long_term_capital_gains,All,25000.0,30000.0,True,False,False,184226
+2023,Table 1.4A,BK,18,long_term_capital_gains,All,30000.0,40000.0,False,False,False,2752465000
+2023,Table 1.4A,BJ,18,long_term_capital_gains,All,30000.0,40000.0,True,False,False,374807
+2023,Table 1.4A,BK,19,long_term_capital_gains,All,40000.0,50000.0,False,False,False,3402047000
+2023,Table 1.4A,BJ,19,long_term_capital_gains,All,40000.0,50000.0,True,False,False,401340
+2023,Table 1.4A,BK,20,long_term_capital_gains,All,50000.0,75000.0,False,False,False,9470818000
+2023,Table 1.4A,BJ,20,long_term_capital_gains,All,50000.0,75000.0,True,False,False,1138440
+2023,Table 1.4A,BK,21,long_term_capital_gains,All,75000.0,100000.0,False,False,False,12715937000
+2023,Table 1.4A,BJ,21,long_term_capital_gains,All,75000.0,100000.0,True,False,False,1185823
+2023,Table 1.4A,BK,22,long_term_capital_gains,All,100000.0,200000.0,False,False,False,63046717000
+2023,Table 1.4A,BJ,22,long_term_capital_gains,All,100000.0,200000.0,True,False,False,3470815
+2023,Table 1.4A,BK,23,long_term_capital_gains,All,200000.0,500000.0,False,False,False,127187338000
+2023,Table 1.4A,BJ,23,long_term_capital_gains,All,200000.0,500000.0,True,False,False,2793458
+2023,Table 1.4A,BK,24,long_term_capital_gains,All,500000.0,1000000.0,False,False,False,100228422000
+2023,Table 1.4A,BJ,24,long_term_capital_gains,All,500000.0,1000000.0,True,False,False,767767
+2023,Table 1.4A,BK,25,long_term_capital_gains,All,1000000.0,1500000.0,False,False,False,56098627000
+2023,Table 1.4A,BJ,25,long_term_capital_gains,All,1000000.0,1500000.0,True,False,False,196019
+2023,Table 1.4A,BK,26,long_term_capital_gains,All,1500000.0,2000000.0,False,False,False,37572096000
+2023,Table 1.4A,BJ,26,long_term_capital_gains,All,1500000.0,2000000.0,True,False,False,83388
+2023,Table 1.4A,BK,27,long_term_capital_gains,All,2000000.0,5000000.0,False,False,False,111769225000
+2023,Table 1.4A,BJ,27,long_term_capital_gains,All,2000000.0,5000000.0,True,False,False,123009
+2023,Table 1.4A,BK,28,long_term_capital_gains,All,5000000.0,10000000.0,False,False,False,82043062000
+2023,Table 1.4A,BJ,28,long_term_capital_gains,All,5000000.0,10000000.0,True,False,False,32657
+2023,Table 1.4A,BK,29,long_term_capital_gains,All,10000000.0,inf,False,False,False,346272458000
+2023,Table 1.4A,BJ,29,long_term_capital_gains,All,10000000.0,inf,True,False,False,22309
diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py
@@ -281,6 +281,7 @@ def _cbo_program_target_value(sim, variable_name: str, time_period):
 
 LOW_AGI_INVESTMENT_INCOME_SOI_VARIABLES = {
     "capital_gains_gross",
+    "long_term_capital_gains",
     "ordinary_dividends",
     "qualified_dividends",
     "taxable_interest_income",
@@ -292,6 +293,7 @@ def _cbo_program_target_value(sim, variable_name: str, time_period):
     "employment_income",
     "business_net_profits",
     "capital_gains_gross",
+    "long_term_capital_gains",
     "ordinary_dividends",
     "partnership_and_s_corp_income",
     "qualified_dividends",

diff --git a/policyengine_us_data/utils/soi.py b/policyengine_us_data/utils/soi.py
@@ -8,7 +8,8 @@
     "count": "population",
     "employment_income": "employment_income_before_lsr",
     "business_net_profits": "total_self_employment_income",
-    "capital_gains_gross": "long_term_capital_gains",
+    "capital_gains_gross": "long_term_capital_gains_basis",
+    "long_term_capital_gains": "long_term_capital_gains_basis",
     "ordinary_dividends": "non_qualified_dividend_income",
     "partnership_and_s_corp_income": "partnership_s_corp_income",
     "qualified_dividends": "qualified_dividend_income",
@@ -21,8 +22,8 @@
     "total_pension_income": "pension_income",
     "total_social_security": "social_security",
     "business_net_losses": "total_self_employment_income",
-    "capital_gains_distributions": "long_term_capital_gains",
-    "capital_gains_losses": "long_term_capital_gains",
+    "capital_gains_distributions": "long_term_capital_gains_basis",
+    "capital_gains_losses": "long_term_capital_gains_basis",
     "estate_income": "estate_income",
     "estate_losses": "estate_income",
     "exempt_interest": "tax_exempt_interest_income",
@@ -89,6 +90,8 @@ def pe(variable):
     df["capital_gains_losses"] = -pe("loss_limited_net_capital_gains") * (
         pe("loss_limited_net_capital_gains") < 0
     )
+    ltcg = pe("long_term_capital_gains")
+    df["long_term_capital_gains"] = ltcg * (ltcg > 0)
     df["estate_income"] = pe("estate_income") * (pe("estate_income") > 0)
     df["estate_losses"] = -pe("estate_income") * (pe("estate_income") < 0)
     df["exempt_interest"] = pe("tax_exempt_interest_income")
@@ -146,6 +149,12 @@ def puf_to_soi(puf, year):
     df["capital_gains_distributions"] = puf.E01100
     df["capital_gains_gross"] = puf["E01000"] * (puf["E01000"] > 0)
     df["capital_gains_losses"] = -puf["E01000"] * (puf["E01000"] < 0)
+    ltcg = (
+        puf["long_term_capital_gains"]
+        if "long_term_capital_gains" in puf
+        else puf.P23250
+    )
+    df["long_term_capital_gains"] = ltcg * (ltcg > 0)
     df["estate_income"] = puf.E26390
     df["estate_losses"] = puf.E26400
     df["exempt_interest"] = puf.E00400

diff --git a/tests/unit/calibration/test_loss_targets.py b/tests/unit/calibration/test_loss_targets.py
@@ -15,6 +15,7 @@
     BEA_WAGES_AND_SALARIES_LOSS_WEIGHT,
     BLS_CE_TOTALS,
     HARD_CODED_TOTALS,
+    LOW_AGI_INVESTMENT_INCOME_SOI_VARIABLES,
     TRANSFER_BALANCE_TARGETS,
     _add_bea_state_wage_targets,
     _add_agi_metric_columns,
@@ -39,6 +40,7 @@
     get_target_error_normalisation,
     get_target_loss_weights,
 )
+from policyengine_us_data.storage import CALIBRATION_FOLDER
 from policyengine_us_data.db import etl_national_targets
 from policyengine_us_data.utils.ssi_targets import (
     SSI_RECIPIENT_TARGETS_2024,
@@ -53,6 +55,29 @@ def test_legacy_loss_targets_include_aggregate_qbi_deduction():
     assert "qualified_business_income_deduction" not in AGI_LEVEL_TARGETED_VARIABLES
 
 
+def test_legacy_loss_targets_include_ltcg_agi_grid():
+    assert "long_term_capital_gains" in AGI_LEVEL_TARGETED_VARIABLES
+    assert "long_term_capital_gains" in LOW_AGI_INVESTMENT_INCOME_SOI_VARIABLES
+
+    soi = pd.read_csv(CALIBRATION_FOLDER / "soi_targets.csv")
+    ltcg = soi[
+        (soi["Variable"] == "long_term_capital_gains")
+        & (soi["SOI table"] == "Table 1.4A")
+        & (soi["Filing status"] == "All")
+        & (~soi["Taxable only"])
+        & (~soi["Full population"])
+    ]
+
+    assert ltcg.groupby("Count").size().to_dict() == {False: 19, True: 19}
+    assert ltcg["Value"].gt(0).all()
+    top_bracket = ltcg[
+        (~ltcg["Count"])
+        & (ltcg["AGI lower bound"] == 10_000_000.0)
+        & np.isinf(ltcg["AGI upper bound"])
+    ]
+    assert top_bracket["Value"].iat[0] == 346_272_458_000
+
+
 def test_bea_nipa_direct_sum_targets_match_targets_db():
     loss_targets_by_variable = {
         variable: target for _, variable, target in BEA_NIPA_DIRECT_SUM_TARGETS
@@ -790,12 +815,16 @@ def test_low_agi_soi_skip_keeps_investment_income_targets():
     capital_income_low_agi_row = pd.Series(
         {"Variable": "capital_gains_gross", "AGI upper bound": 10_000.0}
     )
+    ltcg_low_agi_row = pd.Series(
+        {"Variable": "long_term_capital_gains", "AGI upper bound": 10_000.0}
+    )
     ordinary_higher_agi_row = pd.Series(
         {"Variable": "employment_income", "AGI upper bound": 25_000.0}
     )
 
     assert _should_skip_soi_agi_row(ordinary_low_agi_row)
     assert not _should_skip_soi_agi_row(capital_income_low_agi_row)
+    assert not _should_skip_soi_agi_row(ltcg_low_agi_row)
     assert not _should_skip_soi_agi_row(ordinary_higher_agi_row)
 
 
@@ -806,6 +835,9 @@ def test_all_return_soi_skip_keeps_investment_income_targets():
     capital_income_all_return_row = pd.Series(
         {"Variable": "capital_gains_gross", "Taxable only": False}
     )
+    ltcg_all_return_row = pd.Series(
+        {"Variable": "long_term_capital_gains", "Taxable only": False}
+    )
     ordinary_taxable_row = pd.Series(
         {"Variable": "employment_income", "Taxable only": True}
     )
@@ -818,12 +850,17 @@ def test_all_return_soi_skip_keeps_investment_income_targets():
     capital_income_taxable_row = pd.Series(
         {"Variable": "capital_gains_gross", "Taxable only": True}
     )
+    ltcg_taxable_row = pd.Series(
+        {"Variable": "long_term_capital_gains", "Taxable only": True}
+    )
 
     assert _should_skip_soi_taxability_row(ordinary_all_return_row)
     assert not _should_skip_soi_taxability_row(capital_income_all_return_row)
+    assert not _should_skip_soi_taxability_row(ltcg_all_return_row)
     assert not _should_skip_soi_taxability_row(ordinary_taxable_row)
     assert not _should_skip_soi_taxability_row(qbi_taxable_row)
     assert _should_skip_soi_taxability_row(capital_income_taxable_row)
+    assert _should_skip_soi_taxability_row(ltcg_taxable_row)
 
 
 def test_tanf_hardcoded_target_uses_fy2024_basic_assistance_total():

diff --git a/tests/unit/datasets/test_enhanced_cps_seeding.py b/tests/unit/datasets/test_enhanced_cps_seeding.py
@@ -3,7 +3,8 @@
 Earlier versions used global ``np.random.normal(1, 0.1, ...)`` jitter before
 ``reweight()`` reseeded the optimizer. Current code routes both dense CPS
 weighting paths through ``initialize_weight_priors()``, which preserves positive
-survey weights and gives zero-weight clone records deterministic tiny priors.
+survey weight shape and gives zero-weight clone records deterministic uniform
+prior mass.
 """
 
 import numpy as np

diff --git a/tests/unit/test_enhanced_cps_clone_diagnostics.py b/tests/unit/test_enhanced_cps_clone_diagnostics.py
@@ -14,16 +14,17 @@
 )
 
 
-def test_initialize_weight_priors_keeps_zero_weight_records_near_zero():
+def test_initialize_weight_priors_gives_zero_weight_records_balanced_mass():
     weights = np.array([1_500.0, 0.0, 625.0, 0.0], dtype=np.float64)
 
     priors = initialize_weight_priors(weights, seed=123)
 
     assert np.all(priors > 0)
-    assert priors[1] < 1e-4
-    assert priors[3] < 1e-4
-    assert priors[0] == pytest.approx(1_500.0)
-    assert priors[2] == pytest.approx(625.0)
+    assert priors.sum() == pytest.approx(weights.sum())
+    assert priors[[0, 2]].sum() == pytest.approx(weights.sum() / 2)
+    assert priors[[1, 3]].sum() == pytest.approx(weights.sum() / 2)
+    assert priors[1] == pytest.approx(priors[3])
+    assert priors[0] / priors[2] == pytest.approx(weights[0] / weights[2])
 
 
 def test_initialize_weight_priors_preserves_positive_weights_exactly():

diff --git a/tests/unit/test_soi_utils.py b/tests/unit/test_soi_utils.py
@@ -105,6 +105,7 @@ def calculate(self, variable, map_to=None):
             values = {
                 "self_employment_income": np.array([100.0, -10.0]),
                 "sstb_self_employment_income": np.array([50.0, -25.0]),
+                "long_term_capital_gains": np.array([25.0, -5.0]),
                 "miscellaneous_income": np.array([12.0, -5.0]),
                 "filing_status": np.array(["SINGLE", "SINGLE"]),
                 "tax_unit_weight": np.ones(n),
@@ -124,6 +125,9 @@ def calculate(self, variable, map_to=None):
     np.testing.assert_array_equal(
         soi["business_net_losses"].to_numpy(), np.array([0.0, 35.0])
     )
+    np.testing.assert_array_equal(
+        soi["long_term_capital_gains"].to_numpy(), np.array([25.0, 0.0])
+    )
     np.testing.assert_array_equal(soi["other_income"].to_numpy(), np.array([12.0, 0.0]))
 
 
@@ -199,6 +203,56 @@ def test_get_soi_uses_best_available_year_per_variable(monkeypatch):
     assert np.isclose(taxable_interest_value, 266.6666666667)
 
 
+def test_get_soi_uses_ltcg_basis_uprating_for_capital_gains(monkeypatch):
+    soi_module = load_soi_module()
+    fake_soi = pd.DataFrame(
+        [
+            {
+                "Year": 2023,
+                "Variable": "capital_gains_gross",
+                "Value": 100.0,
+            },
+            {
+                "Year": 2023,
+                "Variable": "long_term_capital_gains",
+                "Value": 200.0,
+            },
+        ]
+    )
+    for column, default in {
+        "SOI table": "Table 1.4A",
+        "XLSX column": "BK",
+        "XLSX row": 10,
+        "Filing status": "All",
+        "AGI lower bound": float("-inf"),
+        "AGI upper bound": float("inf"),
+        "Count": False,
+        "Taxable only": False,
+        "Full population": True,
+    }.items():
+        fake_soi[column] = default
+
+    uprating = pd.DataFrame(
+        {
+            2023: [1.0, 1.0],
+            2024: [2.0, 10.0],
+        },
+        index=["long_term_capital_gains_basis", "employment_income_before_lsr"],
+    )
+
+    monkeypatch.setattr(soi_module, "load_tracked_soi_targets", lambda: fake_soi.copy())
+    monkeypatch.setattr(
+        soi_module,
+        "create_policyengine_uprating_factors_table",
+        lambda: uprating,
+    )
+
+    soi = soi_module.get_soi(2024)
+
+    assert soi.set_index("Variable").loc["capital_gains_gross", "Value"] == 200.0
+    assert soi.set_index("Variable").loc["long_term_capital_gains", "Value"] == 400.0
+
+
 def test_get_soi_uses_current_employment_income_uprating_without_legacy_row(
     monkeypatch,
 ):