Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/719.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added richer national CTC calibration and validation coverage by loading AGI-split refundable and nonrefundable CTC targets from IRS geography data, expanding CTC diagnostics to AGI-by-filing-status and child-composition tables, and reporting a canonical ARPA-style CTC reform in national H5 validation.
187 changes: 160 additions & 27 deletions policyengine_us_data/calibration/ctc_diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,28 @@
"non_refundable_ctc",
]

CHILD_AGE_GROUP_COLUMNS = [
"tax_unit_count",
"ctc_qualifying_children",
"ctc_recipient_count",
"refundable_ctc_recipient_count",
"non_refundable_ctc_recipient_count",
]

COUNT_FORMAT_COLUMNS = {
"tax_unit_count",
"ctc_qualifying_children",
"ctc_recipient_count",
"refundable_ctc_recipient_count",
"non_refundable_ctc_recipient_count",
}

AMOUNT_FORMAT_COLUMNS = {
"ctc",
"refundable_ctc",
"non_refundable_ctc",
}


def _assign_agi_bands(adjusted_gross_income: np.ndarray) -> pd.Categorical:
labels = [label for _, _, label in IRS_AGI_BANDS]
Expand All @@ -58,15 +80,19 @@ def _normalize_filing_status(filing_status: pd.Series) -> pd.Categorical:
return pd.Categorical(labels, categories=FILING_STATUS_ORDER, ordered=True)


def build_ctc_diagnostic_tables(frame: pd.DataFrame) -> dict[str, pd.DataFrame]:
"""Aggregate weighted CTC diagnostics by AGI band and filing status."""
work = frame.copy()
weights = work["tax_unit_weight"].astype(float).to_numpy()
def _assign_ctc_child_count_buckets(
ctc_qualifying_children: np.ndarray,
) -> pd.Categorical:
labels = ["0", "1", "2", "3+"]
bucket = np.full(len(ctc_qualifying_children), labels[-1], dtype=object)
bucket[ctc_qualifying_children <= 0] = "0"
bucket[ctc_qualifying_children == 1] = "1"
bucket[ctc_qualifying_children == 2] = "2"
return pd.Categorical(bucket, categories=labels, ordered=True)

work["agi_band"] = _assign_agi_bands(
work["adjusted_gross_income"].astype(float).to_numpy()
)
work["filing_status_group"] = _normalize_filing_status(work["filing_status"])

def _add_weighted_ctc_columns(work: pd.DataFrame) -> pd.DataFrame:
weights = work["tax_unit_weight"].astype(float).to_numpy()

work["tax_unit_count"] = weights
work["ctc_qualifying_children"] = (
Expand All @@ -87,6 +113,71 @@ def build_ctc_diagnostic_tables(frame: pd.DataFrame) -> dict[str, pd.DataFrame]:
work["non_refundable_ctc"].astype(float).to_numpy() * weights
)

return work


def _build_child_age_table(work: pd.DataFrame) -> pd.DataFrame | None:
if (
"ctc_qualifying_children_under_6" not in work
or "ctc_qualifying_children_6_to_17" not in work
):
return None

weights = work["tax_unit_weight"].astype(float).to_numpy()
ctc_positive = work["ctc"].astype(float).to_numpy() > 0
refundable_positive = work["refundable_ctc"].astype(float).to_numpy() > 0
non_refundable_positive = (
work["non_refundable_ctc"].astype(float).to_numpy() > 0
)

rows = []
for label, child_counts in (
(
"Under 6",
work["ctc_qualifying_children_under_6"].astype(float).to_numpy(),
),
(
"Age 6-17",
work["ctc_qualifying_children_6_to_17"].astype(float).to_numpy(),
),
):
has_children = child_counts > 0
rows.append(
{
"group": label,
"tax_unit_count": float((has_children.astype(float) * weights).sum()),
"ctc_qualifying_children": float((child_counts * weights).sum()),
"ctc_recipient_count": float(
((ctc_positive & has_children).astype(float) * weights).sum()
),
"refundable_ctc_recipient_count": float(
(
(refundable_positive & has_children).astype(float) * weights
).sum()
),
"non_refundable_ctc_recipient_count": float(
(
(non_refundable_positive & has_children).astype(float) * weights
).sum()
),
}
)

return pd.DataFrame(rows, columns=["group"] + CHILD_AGE_GROUP_COLUMNS)


def build_ctc_diagnostic_tables(frame: pd.DataFrame) -> dict[str, pd.DataFrame]:
"""Aggregate weighted CTC diagnostics by AGI band and filing status."""
work = frame.copy()
child_counts = work["ctc_qualifying_children"].astype(float).to_numpy()

work["agi_band"] = _assign_agi_bands(
work["adjusted_gross_income"].astype(float).to_numpy()
)
work["filing_status_group"] = _normalize_filing_status(work["filing_status"])
work["child_count_group"] = _assign_ctc_child_count_buckets(child_counts)
work = _add_weighted_ctc_columns(work)

by_agi = (
work.groupby("agi_band", observed=False)[CTC_GROUP_COLUMNS]
.sum()
Expand All @@ -99,26 +190,73 @@ def build_ctc_diagnostic_tables(frame: pd.DataFrame) -> dict[str, pd.DataFrame]:
.reset_index()
.rename(columns={"filing_status_group": "group"})
)
by_agi_band_and_filing_status = (
work.groupby(["agi_band", "filing_status_group"], observed=False)[
CTC_GROUP_COLUMNS
]
.sum()
.reset_index()
.rename(columns={"filing_status_group": "filing_status"})
)
by_child_count = (
work.groupby("child_count_group", observed=False)[CTC_GROUP_COLUMNS]
.sum()
.reset_index()
.rename(columns={"child_count_group": "group"})
)
by_child_age = _build_child_age_table(frame)

return {
tables = {
"by_agi_band": by_agi,
"by_filing_status": by_filing_status,
"by_agi_band_and_filing_status": by_agi_band_and_filing_status,
"by_child_count": by_child_count,
}
if by_child_age is not None:
tables["by_child_age"] = by_child_age
return tables


def create_ctc_diagnostic_tables(sim) -> dict[str, pd.DataFrame]:
def create_ctc_diagnostic_tables(sim, period=None) -> dict[str, pd.DataFrame]:
"""Calculate weighted CTC diagnostic tables from a microsimulation."""
frame = pd.DataFrame(
{
"adjusted_gross_income": sim.calculate("adjusted_gross_income").values,
"filing_status": sim.calculate("filing_status").values,
"tax_unit_weight": sim.calculate("tax_unit_weight").values,
"ctc_qualifying_children": sim.calculate("ctc_qualifying_children").values,
"ctc": sim.calculate("ctc").values,
"refundable_ctc": sim.calculate("refundable_ctc").values,
"non_refundable_ctc": sim.calculate("non_refundable_ctc").values,
"adjusted_gross_income": sim.calculate(
"adjusted_gross_income", period=period
).values,
"filing_status": sim.calculate("filing_status", period=period).values,
"tax_unit_weight": sim.calculate("tax_unit_weight", period=period).values,
"ctc_qualifying_children": sim.calculate(
"ctc_qualifying_children", period=period
).values,
"ctc": sim.calculate("ctc", period=period).values,
"refundable_ctc": sim.calculate("refundable_ctc", period=period).values,
"non_refundable_ctc": sim.calculate(
"non_refundable_ctc", period=period
).values,
}
)

try:
ctc_qualifying_child = sim.calculate(
"ctc_qualifying_child",
map_to="person",
period=period,
).values.astype(bool)
age = sim.calculate("age", map_to="person", period=period).values.astype(float)
frame["ctc_qualifying_children_under_6"] = sim.map_result(
(ctc_qualifying_child & (age < 6)).astype(float),
"person",
"tax_unit",
)
frame["ctc_qualifying_children_6_to_17"] = sim.map_result(
(ctc_qualifying_child & (age >= 6) & (age < 18)).astype(float),
"person",
"tax_unit",
)
except Exception:
pass

return build_ctc_diagnostic_tables(frame)


Expand All @@ -132,14 +270,9 @@ def _format_amount(value: float) -> str:

def format_ctc_diagnostic_table(table: pd.DataFrame) -> str:
display = table.copy()
for column in [
"tax_unit_count",
"ctc_qualifying_children",
"ctc_recipient_count",
"refundable_ctc_recipient_count",
"non_refundable_ctc_recipient_count",
]:
display[column] = display[column].map(_format_count)
for column in ["ctc", "refundable_ctc", "non_refundable_ctc"]:
display[column] = display[column].map(_format_amount)
for column in display.columns:
if column in COUNT_FORMAT_COLUMNS:
display[column] = display[column].map(_format_count)
elif column in AMOUNT_FORMAT_COLUMNS:
display[column] = display[column].map(_format_amount)
return display.to_string(index=False)
12 changes: 12 additions & 0 deletions policyengine_us_data/calibration/target_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,15 @@ include:
- variable: refundable_ctc
geo_level: national
domain_variable: refundable_ctc
- variable: refundable_ctc
geo_level: national
domain_variable: adjusted_gross_income,refundable_ctc
- variable: non_refundable_ctc
geo_level: national
domain_variable: non_refundable_ctc
- variable: non_refundable_ctc
geo_level: national
domain_variable: adjusted_gross_income,non_refundable_ctc
- variable: self_employment_income
geo_level: national
domain_variable: self_employment_income
Expand All @@ -173,9 +179,15 @@ include:
- variable: tax_unit_count
geo_level: national
domain_variable: refundable_ctc
- variable: tax_unit_count
geo_level: national
domain_variable: adjusted_gross_income,refundable_ctc
- variable: tax_unit_count
geo_level: national
domain_variable: non_refundable_ctc
- variable: tax_unit_count
geo_level: national
domain_variable: adjusted_gross_income,non_refundable_ctc

# === NATIONAL — SOI deduction totals (non-reform) ===
- variable: medical_expense_deduction
Expand Down
Loading
Loading