Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/1086.changed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add Stage 2 geography assignment summary artifacts for calibration packages.
42 changes: 42 additions & 0 deletions docs/pipeline_map.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -812,6 +812,8 @@ stages:
- stage2_target_catalog_reader
- stage2_target_selection_policy
- stage2_target_selection_result
- stage2_geography_assignment_spec
- stage2_geography_assignment_result
- target_resolve
- stage2_target_config_apply
- target_uprate
Expand All @@ -832,6 +834,7 @@ stages:
- out_metadata
- out_targets
- out_target_facets
- out_geography_summary
- stage2_calibration_package_contract_writer
- out_contract
- stage2_calibration_package_contract_validator
Expand Down Expand Up @@ -896,6 +899,10 @@ stages:
label: calibration_target_facets.json
node_type: artifact
description: Compact target counts by variable, geography level, target name, period, and constraint key
- id: out_geography_summary
label: geography_assignment_summary.json
node_type: artifact
description: Compact clone-major geography identity summary for block, county, state, and congressional district arrays
- id: out_contract
label: calibration_package_contract.json
node_type: artifact
Expand Down Expand Up @@ -981,6 +988,30 @@ stages:
target: build_matrix_chunked
edge_type: data_flow
label: matrix target order
- source: in_cps_s5
target: stage2_geography_assignment_spec
edge_type: data_flow
label: household AGI and fixed state overrides
- source: in_db_s5
target: stage2_geography_assignment_spec
edge_type: external_source
label: district AGI targets
- source: stage2_geography_assignment_spec
target: stage2_geography_assignment_result
edge_type: data_flow
label: deterministic assignment inputs
- source: in_blocks_s5
target: stage2_geography_assignment_result
edge_type: data_flow
label: block populations
- source: stage2_geography_assignment_result
target: build_matrix
edge_type: data_flow
label: clone-major geography
- source: stage2_geography_assignment_result
target: build_matrix_chunked
edge_type: data_flow
label: clone-major geography
- source: stage2_target_catalog_load
target: stage2_target_config_apply
edge_type: data_flow
Expand Down Expand Up @@ -1060,6 +1091,10 @@ stages:
target: out_target_facets
edge_type: produces_artifact
label: derived facets
- source: stage2_geography_assignment_result
target: out_geography_summary
edge_type: produces_artifact
label: geography identity
- source: out_pkg
target: stage2_payload_reader
edge_type: data_flow
Expand All @@ -1078,6 +1113,10 @@ stages:
target: stage2_calibration_package_contract_writer
edge_type: data_flow
label: target facet artifact
- source: out_geography_summary
target: stage2_calibration_package_contract_writer
edge_type: data_flow
label: geography summary artifact
- source: stage2_artifact_specs
target: stage2_calibration_package_contract_writer
edge_type: uses_utility
Expand All @@ -1091,6 +1130,9 @@ stages:
- source: stage2_calibration_package_contract_writer
target: out_target_facets
edge_type: validates
- source: stage2_calibration_package_contract_writer
target: out_geography_summary
edge_type: validates
- source: out_pkg
target: stage2_calibration_package_contract_validator
edge_type: validates
Expand Down
37 changes: 30 additions & 7 deletions policyengine_us_data/calibration/unified_calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,12 @@
CalibrationPackageReader,
CalibrationPackageWriter,
)
from policyengine_us_data.calibration_package.geography import GeographyAssignmentSpec
from policyengine_us_data.calibration_package.specs import (
DEFAULT_TARGET_CONFIG_PATH as DEFAULT_TARGET_CONFIG_RELATIVE_PATH,
CALIBRATION_TARGET_FACETS_FILENAME,
CALIBRATION_TARGETS_FILENAME,
GEOGRAPHY_ASSIGNMENT_SUMMARY_FILENAME,
TargetConfigIdentity,
resolve_target_config_identity,
)
Expand Down Expand Up @@ -1505,9 +1507,6 @@ def run_calibration(

from policyengine_us import Microsimulation

from policyengine_us_data.calibration.clone_and_assign import (
assign_random_geography,
)
from policyengine_us_data.calibration.unified_matrix_builder import (
UnifiedMatrixBuilder,
)
Expand Down Expand Up @@ -1555,6 +1554,14 @@ def run_calibration(
time_period=time_period,
n_records=n_records,
)
geography_spec = GeographyAssignmentSpec.from_runtime_inputs(
n_records=n_records,
n_clones=n_clones,
seed=seed,
household_agi=base_agi,
cd_agi_targets=cd_agi_targets,
fixed_state_fips=fixed_state_fips,
)

# Step 2: Clone and assign geography
logger.info(
Expand All @@ -1563,14 +1570,21 @@ def run_calibration(
n_clones,
n_records * n_clones,
)
geography = assign_random_geography(
n_records=n_records,
n_clones=n_clones,
seed=seed,
geography_result = geography_spec.assign(
household_agi=base_agi,
cd_agi_targets=cd_agi_targets,
fixed_state_fips=fixed_state_fips,
)
from policyengine_us_data.calibration.clone_and_assign import GeographyAssignment

geography = GeographyAssignment(
block_geoid=geography_result.block_geoid,
cd_geoid=geography_result.cd_geoid,
county_fips=geography_result.county_fips,
state_fips=geography_result.state_fips,
n_records=n_records,
n_clones=n_clones,
)

# Step 3: Source imputation (if requested)
dataset_for_matrix = dataset_path
Expand Down Expand Up @@ -1722,6 +1736,9 @@ def run_calibration(
"matrix_builder": "chunked" if chunked_matrix else "precompute",
"chunk_size": chunk_size if chunked_matrix else None,
"chunk_dir": chunk_dir if chunked_matrix else None,
"geography_assignment_spec": geography_spec.to_dict(),
"geography_assignment_sha256": geography_result.canonical_geography_sha256,
"geography_assignment_status": geography_result.status,
"target_selection_sha256": target_selection.checksum,
"target_selection_n_targets": target_selection.n_selected_targets,
}
Expand All @@ -1736,7 +1753,11 @@ def run_calibration(
package_path = Path(package_output_path)
targets_path = package_path.with_name(CALIBRATION_TARGETS_FILENAME)
target_facets_path = package_path.with_name(CALIBRATION_TARGET_FACETS_FILENAME)
geography_summary_path = package_path.with_name(
GEOGRAPHY_ASSIGNMENT_SUMMARY_FILENAME
)
target_selection.write_artifacts(targets_path, target_facets_path)
geography_result.write_summary(geography_summary_path)
package_payload = CalibrationPackagePayload(
X_sparse=X_sparse,
targets_df=targets_df,
Expand Down Expand Up @@ -1790,6 +1811,8 @@ def run_calibration(
target_metadata_path=targets_path,
target_facets_path=target_facets_path,
target_selection_summary=target_selection.summary(),
geography_summary_path=geography_summary_path,
geography_assignment_summary=geography_result.summary(),
)
validate_calibration_package_contract(
package_path=package_path,
Expand Down
16 changes: 16 additions & 0 deletions policyengine_us_data/calibration_package/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
CALIBRATION_REPORTS_DIRNAME,
DATASET_BUILD_OUTPUT_CONTRACT_FILENAME,
DEFAULT_TARGET_CONFIG_PATH,
GEOGRAPHY_ASSIGNMENT_SUMMARY_FILENAME,
MATRIX_BUILD_DIRNAME,
SOURCE_DATASET_FILENAME,
TARGET_CONFIG_IDENTITY_MODES,
Expand All @@ -28,6 +29,14 @@
stage2_input_bundle_from_stage1_contract,
stage2_input_bundle_from_stage1_contract_path,
)
from .geography import (
GEOGRAPHY_ASSIGNMENT_ORDERING,
GEOGRAPHY_ASSIGNMENT_SCHEMA_VERSION,
GeographyAssignmentResult,
GeographyAssignmentSpec,
geography_spec_from_metadata,
geography_summary_from_package,
)
from .payload import (
LEGACY_MISSING_GEOGRAPHY_WARNING,
REQUIRED_PACKAGE_KEYS,
Expand All @@ -53,6 +62,9 @@
"CALIBRATION_REPORTS_DIRNAME",
"DATASET_BUILD_OUTPUT_CONTRACT_FILENAME",
"DEFAULT_TARGET_CONFIG_PATH",
"GEOGRAPHY_ASSIGNMENT_ORDERING",
"GEOGRAPHY_ASSIGNMENT_SCHEMA_VERSION",
"GEOGRAPHY_ASSIGNMENT_SUMMARY_FILENAME",
"MATRIX_BUILD_DIRNAME",
"SOURCE_DATASET_FILENAME",
"TARGET_CONFIG_IDENTITY_MODES",
Expand All @@ -62,6 +74,8 @@
"CalibrationPackagePayload",
"CalibrationPackageReader",
"CalibrationPackageWriter",
"GeographyAssignmentResult",
"GeographyAssignmentSpec",
"LEGACY_MISSING_GEOGRAPHY_WARNING",
"Stage2BuildContext",
"Stage2InputBundle",
Expand All @@ -74,6 +88,8 @@
"TargetSelectionPolicy",
"TargetSelectionResult",
"calibration_package_artifact_paths",
"geography_spec_from_metadata",
"geography_summary_from_package",
"resolve_target_config_identity",
"stage2_build_context_for_run",
"stage2_input_bundle_from_artifacts_dir",
Expand Down
Loading