Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 22 additions & 6 deletions policyengine_us_data/datasets/cps/enhanced_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,21 +45,37 @@ def initialize_weight_priors(
original_weights: np.ndarray,
seed: int = 1456,
epsilon: float = 1e-6,
zero_weight_total_share: float = 0.5,
) -> np.ndarray:
"""Build deterministic positive priors for sparse reweighting."""
"""Build deterministic positive priors for sparse reweighting.

PUF clone households enter the extended CPS with zero household weight.
Giving those records near-zero priors leaves them effectively unusable in
log-space optimization. When zero-weight rows are present, preserve the
relative distribution of positive survey weights but reserve a fixed share
of the original total household mass for uniform zero-weight-row priors.
"""

weights = np.asarray(original_weights, dtype=np.float64)
if np.any(weights < 0):
raise ValueError("original_weights must be non-negative")
if weights.size == 0:
return weights.copy()
if not 0 < zero_weight_total_share < 1:
raise ValueError("zero_weight_total_share must be between 0 and 1")

priors = np.empty_like(weights, dtype=np.float64)
positive_mask = weights > 0
priors[positive_mask] = weights[positive_mask]

zero_mask = ~positive_mask
if zero_mask.any():
rng = np.random.default_rng(seed)
priors[zero_mask] = epsilon * rng.uniform(1.0, 2.0, size=zero_mask.sum())
if not zero_mask.any():
return weights.copy()

positive_total = float(weights[positive_mask].sum())
if positive_total <= 0:
return np.full_like(weights, 1.0, dtype=np.float64)

priors[positive_mask] = weights[positive_mask] * (1 - zero_weight_total_share)
priors[zero_mask] = positive_total * zero_weight_total_share / zero_mask.sum()

return priors

Expand Down
38 changes: 38 additions & 0 deletions policyengine_us_data/storage/calibration_targets/soi_targets.csv
Original file line number Diff line number Diff line change
Expand Up @@ -11929,3 +11929,41 @@ Year,SOI table,XLSX column,XLSX row,Variable,Filing status,AGI lower bound,AGI u
2022,Table 3.3,AP,10,refundable_american_opportunity_credit,All,-inf,inf,False,False,True,5184485000
2023,Table 3.3,AO,10,refundable_american_opportunity_credit,All,-inf,inf,True,False,True,5821688
2023,Table 3.3,AP,10,refundable_american_opportunity_credit,All,-inf,inf,False,False,True,5090364000
2023,Table 1.4A,BK,11,long_term_capital_gains,All,-inf,1.0,False,False,False,11981913000
2023,Table 1.4A,BJ,11,long_term_capital_gains,All,-inf,1.0,True,False,False,137016
2023,Table 1.4A,BK,12,long_term_capital_gains,All,1.0,5000.0,False,False,False,390046000
2023,Table 1.4A,BJ,12,long_term_capital_gains,All,1.0,5000.0,True,False,False,171586
2023,Table 1.4A,BK,13,long_term_capital_gains,All,5000.0,10000.0,False,False,False,740521000
2023,Table 1.4A,BJ,13,long_term_capital_gains,All,5000.0,10000.0,True,False,False,181415
2023,Table 1.4A,BK,14,long_term_capital_gains,All,10000.0,15000.0,False,False,False,1139960000
2023,Table 1.4A,BJ,14,long_term_capital_gains,All,10000.0,15000.0,True,False,False,208487
2023,Table 1.4A,BK,15,long_term_capital_gains,All,15000.0,20000.0,False,False,False,1222242000
2023,Table 1.4A,BJ,15,long_term_capital_gains,All,15000.0,20000.0,True,False,False,231243
2023,Table 1.4A,BK,16,long_term_capital_gains,All,20000.0,25000.0,False,False,False,1618072000
2023,Table 1.4A,BJ,16,long_term_capital_gains,All,20000.0,25000.0,True,False,False,184713
2023,Table 1.4A,BK,17,long_term_capital_gains,All,25000.0,30000.0,False,False,False,1627983000
2023,Table 1.4A,BJ,17,long_term_capital_gains,All,25000.0,30000.0,True,False,False,184226
2023,Table 1.4A,BK,18,long_term_capital_gains,All,30000.0,40000.0,False,False,False,2752465000
2023,Table 1.4A,BJ,18,long_term_capital_gains,All,30000.0,40000.0,True,False,False,374807
2023,Table 1.4A,BK,19,long_term_capital_gains,All,40000.0,50000.0,False,False,False,3402047000
2023,Table 1.4A,BJ,19,long_term_capital_gains,All,40000.0,50000.0,True,False,False,401340
2023,Table 1.4A,BK,20,long_term_capital_gains,All,50000.0,75000.0,False,False,False,9470818000
2023,Table 1.4A,BJ,20,long_term_capital_gains,All,50000.0,75000.0,True,False,False,1138440
2023,Table 1.4A,BK,21,long_term_capital_gains,All,75000.0,100000.0,False,False,False,12715937000
2023,Table 1.4A,BJ,21,long_term_capital_gains,All,75000.0,100000.0,True,False,False,1185823
2023,Table 1.4A,BK,22,long_term_capital_gains,All,100000.0,200000.0,False,False,False,63046717000
2023,Table 1.4A,BJ,22,long_term_capital_gains,All,100000.0,200000.0,True,False,False,3470815
2023,Table 1.4A,BK,23,long_term_capital_gains,All,200000.0,500000.0,False,False,False,127187338000
2023,Table 1.4A,BJ,23,long_term_capital_gains,All,200000.0,500000.0,True,False,False,2793458
2023,Table 1.4A,BK,24,long_term_capital_gains,All,500000.0,1000000.0,False,False,False,100228422000
2023,Table 1.4A,BJ,24,long_term_capital_gains,All,500000.0,1000000.0,True,False,False,767767
2023,Table 1.4A,BK,25,long_term_capital_gains,All,1000000.0,1500000.0,False,False,False,56098627000
2023,Table 1.4A,BJ,25,long_term_capital_gains,All,1000000.0,1500000.0,True,False,False,196019
2023,Table 1.4A,BK,26,long_term_capital_gains,All,1500000.0,2000000.0,False,False,False,37572096000
2023,Table 1.4A,BJ,26,long_term_capital_gains,All,1500000.0,2000000.0,True,False,False,83388
2023,Table 1.4A,BK,27,long_term_capital_gains,All,2000000.0,5000000.0,False,False,False,111769225000
2023,Table 1.4A,BJ,27,long_term_capital_gains,All,2000000.0,5000000.0,True,False,False,123009
2023,Table 1.4A,BK,28,long_term_capital_gains,All,5000000.0,10000000.0,False,False,False,82043062000
2023,Table 1.4A,BJ,28,long_term_capital_gains,All,5000000.0,10000000.0,True,False,False,32657
2023,Table 1.4A,BK,29,long_term_capital_gains,All,10000000.0,inf,False,False,False,346272458000
2023,Table 1.4A,BJ,29,long_term_capital_gains,All,10000000.0,inf,True,False,False,22309
2 changes: 2 additions & 0 deletions policyengine_us_data/utils/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ def _cbo_program_target_value(sim, variable_name: str, time_period):

LOW_AGI_INVESTMENT_INCOME_SOI_VARIABLES = {
"capital_gains_gross",
"long_term_capital_gains",
"ordinary_dividends",
"qualified_dividends",
"taxable_interest_income",
Expand All @@ -292,6 +293,7 @@ def _cbo_program_target_value(sim, variable_name: str, time_period):
"employment_income",
"business_net_profits",
"capital_gains_gross",
"long_term_capital_gains",
"ordinary_dividends",
"partnership_and_s_corp_income",
"qualified_dividends",
Expand Down
15 changes: 12 additions & 3 deletions policyengine_us_data/utils/soi.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
"count": "population",
"employment_income": "employment_income_before_lsr",
"business_net_profits": "total_self_employment_income",
"capital_gains_gross": "long_term_capital_gains",
"capital_gains_gross": "long_term_capital_gains_basis",
"long_term_capital_gains": "long_term_capital_gains_basis",
"ordinary_dividends": "non_qualified_dividend_income",
"partnership_and_s_corp_income": "partnership_s_corp_income",
"qualified_dividends": "qualified_dividend_income",
Expand All @@ -21,8 +22,8 @@
"total_pension_income": "pension_income",
"total_social_security": "social_security",
"business_net_losses": "total_self_employment_income",
"capital_gains_distributions": "long_term_capital_gains",
"capital_gains_losses": "long_term_capital_gains",
"capital_gains_distributions": "long_term_capital_gains_basis",
"capital_gains_losses": "long_term_capital_gains_basis",
"estate_income": "estate_income",
"estate_losses": "estate_income",
"exempt_interest": "tax_exempt_interest_income",
Expand Down Expand Up @@ -89,6 +90,8 @@ def pe(variable):
df["capital_gains_losses"] = -pe("loss_limited_net_capital_gains") * (
pe("loss_limited_net_capital_gains") < 0
)
ltcg = pe("long_term_capital_gains")
df["long_term_capital_gains"] = ltcg * (ltcg > 0)
df["estate_income"] = pe("estate_income") * (pe("estate_income") > 0)
df["estate_losses"] = -pe("estate_income") * (pe("estate_income") < 0)
df["exempt_interest"] = pe("tax_exempt_interest_income")
Expand Down Expand Up @@ -146,6 +149,12 @@ def puf_to_soi(puf, year):
df["capital_gains_distributions"] = puf.E01100
df["capital_gains_gross"] = puf["E01000"] * (puf["E01000"] > 0)
df["capital_gains_losses"] = -puf["E01000"] * (puf["E01000"] < 0)
ltcg = (
puf["long_term_capital_gains"]
if "long_term_capital_gains" in puf
else puf.P23250
)
df["long_term_capital_gains"] = ltcg * (ltcg > 0)
df["estate_income"] = puf.E26390
df["estate_losses"] = puf.E26400
df["exempt_interest"] = puf.E00400
Expand Down
37 changes: 37 additions & 0 deletions tests/unit/calibration/test_loss_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
BEA_WAGES_AND_SALARIES_LOSS_WEIGHT,
BLS_CE_TOTALS,
HARD_CODED_TOTALS,
LOW_AGI_INVESTMENT_INCOME_SOI_VARIABLES,
TRANSFER_BALANCE_TARGETS,
_add_bea_state_wage_targets,
_add_agi_metric_columns,
Expand All @@ -39,6 +40,7 @@
get_target_error_normalisation,
get_target_loss_weights,
)
from policyengine_us_data.storage import CALIBRATION_FOLDER
from policyengine_us_data.db import etl_national_targets
from policyengine_us_data.utils.ssi_targets import (
SSI_RECIPIENT_TARGETS_2024,
Expand All @@ -53,6 +55,29 @@ def test_legacy_loss_targets_include_aggregate_qbi_deduction():
assert "qualified_business_income_deduction" not in AGI_LEVEL_TARGETED_VARIABLES


def test_legacy_loss_targets_include_ltcg_agi_grid():
assert "long_term_capital_gains" in AGI_LEVEL_TARGETED_VARIABLES
assert "long_term_capital_gains" in LOW_AGI_INVESTMENT_INCOME_SOI_VARIABLES

soi = pd.read_csv(CALIBRATION_FOLDER / "soi_targets.csv")
ltcg = soi[
(soi["Variable"] == "long_term_capital_gains")
& (soi["SOI table"] == "Table 1.4A")
& (soi["Filing status"] == "All")
& (~soi["Taxable only"])
& (~soi["Full population"])
]

assert ltcg.groupby("Count").size().to_dict() == {False: 19, True: 19}
assert ltcg["Value"].gt(0).all()
top_bracket = ltcg[
(~ltcg["Count"])
& (ltcg["AGI lower bound"] == 10_000_000.0)
& np.isinf(ltcg["AGI upper bound"])
]
assert top_bracket["Value"].iat[0] == 346_272_458_000


def test_bea_nipa_direct_sum_targets_match_targets_db():
loss_targets_by_variable = {
variable: target for _, variable, target in BEA_NIPA_DIRECT_SUM_TARGETS
Expand Down Expand Up @@ -790,12 +815,16 @@ def test_low_agi_soi_skip_keeps_investment_income_targets():
capital_income_low_agi_row = pd.Series(
{"Variable": "capital_gains_gross", "AGI upper bound": 10_000.0}
)
ltcg_low_agi_row = pd.Series(
{"Variable": "long_term_capital_gains", "AGI upper bound": 10_000.0}
)
ordinary_higher_agi_row = pd.Series(
{"Variable": "employment_income", "AGI upper bound": 25_000.0}
)

assert _should_skip_soi_agi_row(ordinary_low_agi_row)
assert not _should_skip_soi_agi_row(capital_income_low_agi_row)
assert not _should_skip_soi_agi_row(ltcg_low_agi_row)
assert not _should_skip_soi_agi_row(ordinary_higher_agi_row)


Expand All @@ -806,6 +835,9 @@ def test_all_return_soi_skip_keeps_investment_income_targets():
capital_income_all_return_row = pd.Series(
{"Variable": "capital_gains_gross", "Taxable only": False}
)
ltcg_all_return_row = pd.Series(
{"Variable": "long_term_capital_gains", "Taxable only": False}
)
ordinary_taxable_row = pd.Series(
{"Variable": "employment_income", "Taxable only": True}
)
Expand All @@ -818,12 +850,17 @@ def test_all_return_soi_skip_keeps_investment_income_targets():
capital_income_taxable_row = pd.Series(
{"Variable": "capital_gains_gross", "Taxable only": True}
)
ltcg_taxable_row = pd.Series(
{"Variable": "long_term_capital_gains", "Taxable only": True}
)

assert _should_skip_soi_taxability_row(ordinary_all_return_row)
assert not _should_skip_soi_taxability_row(capital_income_all_return_row)
assert not _should_skip_soi_taxability_row(ltcg_all_return_row)
assert not _should_skip_soi_taxability_row(ordinary_taxable_row)
assert not _should_skip_soi_taxability_row(qbi_taxable_row)
assert _should_skip_soi_taxability_row(capital_income_taxable_row)
assert _should_skip_soi_taxability_row(ltcg_taxable_row)


def test_tanf_hardcoded_target_uses_fy2024_basic_assistance_total():
Expand Down
3 changes: 2 additions & 1 deletion tests/unit/datasets/test_enhanced_cps_seeding.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
Earlier versions used global ``np.random.normal(1, 0.1, ...)`` jitter before
``reweight()`` reseeded the optimizer. Current code routes both dense CPS
weighting paths through ``initialize_weight_priors()``, which preserves positive
survey weights and gives zero-weight clone records deterministic tiny priors.
survey weight shape and gives zero-weight clone records deterministic uniform
prior mass.
"""

import numpy as np
Expand Down
11 changes: 6 additions & 5 deletions tests/unit/test_enhanced_cps_clone_diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,17 @@
)


def test_initialize_weight_priors_keeps_zero_weight_records_near_zero():
def test_initialize_weight_priors_gives_zero_weight_records_balanced_mass():
weights = np.array([1_500.0, 0.0, 625.0, 0.0], dtype=np.float64)

priors = initialize_weight_priors(weights, seed=123)

assert np.all(priors > 0)
assert priors[1] < 1e-4
assert priors[3] < 1e-4
assert priors[0] == pytest.approx(1_500.0)
assert priors[2] == pytest.approx(625.0)
assert priors.sum() == pytest.approx(weights.sum())
assert priors[[0, 2]].sum() == pytest.approx(weights.sum() / 2)
assert priors[[1, 3]].sum() == pytest.approx(weights.sum() / 2)
assert priors[1] == pytest.approx(priors[3])
assert priors[0] / priors[2] == pytest.approx(weights[0] / weights[2])


def test_initialize_weight_priors_preserves_positive_weights_exactly():
Expand Down
54 changes: 54 additions & 0 deletions tests/unit/test_soi_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def calculate(self, variable, map_to=None):
values = {
"self_employment_income": np.array([100.0, -10.0]),
"sstb_self_employment_income": np.array([50.0, -25.0]),
"long_term_capital_gains": np.array([25.0, -5.0]),
"miscellaneous_income": np.array([12.0, -5.0]),
"filing_status": np.array(["SINGLE", "SINGLE"]),
"tax_unit_weight": np.ones(n),
Expand All @@ -124,6 +125,9 @@ def calculate(self, variable, map_to=None):
np.testing.assert_array_equal(
soi["business_net_losses"].to_numpy(), np.array([0.0, 35.0])
)
np.testing.assert_array_equal(
soi["long_term_capital_gains"].to_numpy(), np.array([25.0, 0.0])
)
np.testing.assert_array_equal(soi["other_income"].to_numpy(), np.array([12.0, 0.0]))


Expand Down Expand Up @@ -199,6 +203,56 @@ def test_get_soi_uses_best_available_year_per_variable(monkeypatch):
assert np.isclose(taxable_interest_value, 266.6666666667)


def test_get_soi_uses_ltcg_basis_uprating_for_capital_gains(monkeypatch):
soi_module = load_soi_module()
fake_soi = pd.DataFrame(
[
{
"Year": 2023,
"Variable": "capital_gains_gross",
"Value": 100.0,
},
{
"Year": 2023,
"Variable": "long_term_capital_gains",
"Value": 200.0,
},
]
)
for column, default in {
"SOI table": "Table 1.4A",
"XLSX column": "BK",
"XLSX row": 10,
"Filing status": "All",
"AGI lower bound": float("-inf"),
"AGI upper bound": float("inf"),
"Count": False,
"Taxable only": False,
"Full population": True,
}.items():
fake_soi[column] = default

uprating = pd.DataFrame(
{
2023: [1.0, 1.0],
2024: [2.0, 10.0],
},
index=["long_term_capital_gains_basis", "employment_income_before_lsr"],
)

monkeypatch.setattr(soi_module, "load_tracked_soi_targets", lambda: fake_soi.copy())
monkeypatch.setattr(
soi_module,
"create_policyengine_uprating_factors_table",
lambda: uprating,
)

soi = soi_module.get_soi(2024)

assert soi.set_index("Variable").loc["capital_gains_gross", "Value"] == 200.0
assert soi.set_index("Variable").loc["long_term_capital_gains", "Value"] == 400.0


def test_get_soi_uses_current_employment_income_uprating_without_legacy_row(
monkeypatch,
):
Expand Down