Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/codex-bump-us-data-bundle.changed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Update the bundled US model and enhanced CPS data release while preserving explicitly versioned dataset entries.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ uk = [
]
us = [
"policyengine_core==3.26.1",
"policyengine-us==1.691.12",
"policyengine-us==1.700.0",
]
dev = [
"pytest",
Expand All @@ -62,7 +62,7 @@ dev = [
"ruff>=0.9.0",
"policyengine_core==3.26.1",
"policyengine-uk==2.88.14",
"policyengine-us==1.691.12",
"policyengine-us==1.700.0",
"towncrier>=24.8.0",
"mypy>=1.11.0",
"pytest-cov>=5.0.0",
Expand Down
260 changes: 168 additions & 92 deletions src/policyengine/data/release_manifests/us.json

Large diffs are not rendered by default.

52 changes: 33 additions & 19 deletions src/policyengine/data/release_manifests/us.trace.tro.jsonld
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"schema:name": "PolicyEngine",
"schema:url": "https://policyengine.org"
},
"schema:dateCreated": "2026-05-20T19:37:01.417728Z",
"schema:description": "TRACE TRO for certified runtime bundle us-4.9.0 covering the bundle manifest, the certified dataset artifact, the country model wheel, and the country data release manifest when it is available.",
"schema:name": "policyengine us certified bundle TRO",
"trov:createdWith": {
Expand All @@ -38,21 +39,29 @@
},
"trov:hasLocation": "data/release_manifests/us.json"
},
{
"@id": "arrangement/1/location/data_release_manifest",
"@type": "trov:ArtifactLocation",
"trov:hasArtifact": {
"@id": "composition/1/artifact/data_release_manifest"
},
"trov:hasLocation": "https://huggingface.co/policyengine/policyengine-us-data/resolve/688f972425f5e858fc52bda2b696e0af74fea920/release_manifest.json"
},
{
"@id": "arrangement/1/location/dataset",
"@type": "trov:ArtifactLocation",
"trov:hasArtifact": {
"@id": "composition/1/artifact/dataset"
},
"trov:hasLocation": "https://huggingface.co/policyengine/policyengine-us-data/resolve/69fc39a7fece4c49ba87291e598e76b40568cc5d/enhanced_cps_2024.h5"
"trov:hasLocation": "https://huggingface.co/policyengine/policyengine-us-data/resolve/688f972425f5e858fc52bda2b696e0af74fea920/enhanced_cps_2024.h5"
},
{
"@id": "arrangement/1/location/model_wheel",
"@type": "trov:ArtifactLocation",
"trov:hasArtifact": {
"@id": "composition/1/artifact/model_wheel"
},
"trov:hasLocation": "https://files.pythonhosted.org/packages/83/6f/b605fc1d8e06e377ae50870dc44a28cfe6562f0032e36dd53a5ef49472db/policyengine_us-1.691.12-py3-none-any.whl"
"trov:hasLocation": "https://files.pythonhosted.org/packages/49/e9/2837a0d98e99efaf4d82aade276eee6eeff419df614863f08e3512961d2d/policyengine_us-1.700.0-py3-none-any.whl"
}
]
}
Expand All @@ -66,46 +75,51 @@
"@type": "trov:ResearchArtifact",
"schema:name": "policyengine.py bundle manifest for us",
"trov:mimeType": "application/json",
"trov:sha256": "0f07212e1bf3d4d482ee0807597ac86aba42e2dd011bdb0a19992bbf3d229a4b"
"trov:sha256": "60b57104001c8431ba9f5bf32883291434be679d1d710d0a41a894daf15c8e02"
},
{
"@id": "composition/1/artifact/data_release_manifest",
"@type": "trov:ResearchArtifact",
"schema:name": "policyengine-us-data release manifest 1.115.5",
"trov:mimeType": "application/json",
"trov:sha256": "f5387c6b5acc0507cc965087da5059f59a4c6cb43b3778f13f065355f05d900e"
},
{
"@id": "composition/1/artifact/dataset",
"@type": "trov:ResearchArtifact",
"schema:name": "enhanced_cps_2024",
"trov:sha256": "96965b0d9931d36beb29486b25fffee8b630e3b62c9376e7265976e02f7ab3ca"
"trov:mimeType": "application/x-hdf5",
"trov:sha256": "0a6b961ad363a421bde99f2c8e5d8f20370bcba45fd303050537a25bdd805b14"
},
{
"@id": "composition/1/artifact/model_wheel",
"@type": "trov:ResearchArtifact",
"schema:name": "policyengine-us==1.691.12 wheel",
"schema:name": "policyengine-us==1.700.0 wheel",
"trov:mimeType": "application/zip",
"trov:sha256": "ef43482bd8c6cc16f8f1d4050423f5dc1d045af15931f5d1b089715a31c839d2"
"trov:sha256": "7633d8aefcaf02d7628f841bc56750606f1d7fe409ff3ae7b0ef7e364a88e945"
}
],
"trov:hasFingerprint": {
"@id": "composition/1/fingerprint",
"@type": "trov:CompositionFingerprint",
"trov:sha256": "5acefac7fcc2e41aae94d5533a5553d1b7580a5d3923103fc58bb73fd7071917"
"trov:sha256": "55d4f9d726e0c8506dd36227a98b9297128fac7c83fa484792910e375a3eda78"
}
},
"trov:hasPerformance": {
"@id": "trp/1",
"@type": "trov:TransparentResearchPerformance",
"pe:builtWithModelVersion": "1.691.12",
"pe:certifiedBy": "policyengine.py candidate long-term manifest",
"pe:certifiedForModelVersion": "1.691.12",
"pe:ciGitRef": "refs/heads/main",
"pe:ciGitSha": "443561cb9d2e066fd8d0d79c4021b5081dcbfee8",
"pe:ciRunUrl": "https://github.com/PolicyEngine/policyengine.py/actions/runs/26120061926",
"pe:compatibilityBasis": "candidate_long_term_bundle",
"pe:dataBuildFingerprint": "sha256:ae675b873404ec1fdc461e056a02958816fd0f8ab08ba5883fb05c4cf40f5b49",
"pe:dataBuildId": "policyengine-us-data-crfb-longrun-20260517",
"pe:dataReleaseManifestStatus": "unavailable",
"pe:emittedIn": "github-actions",
"rdfs:comment": "Certification of build policyengine-us-data-crfb-longrun-20260517 for policyengine-us 1.691.12.",
"pe:builtWithModelVersion": "1.700.0",
"pe:certifiedBy": "policyengine-us-data release manifest",
"pe:certifiedForModelVersion": "1.700.0",
"pe:compatibilityBasis": "exact_build_model_version",
"pe:dataBuildFingerprint": "sha256:b0862de383ffcbe45f4ba0aa9c6aaec286cd4c6688c6ccb33f939bc176f9a8a0",
"pe:dataBuildId": "policyengine-us-data-1.115.5",
"pe:emittedIn": "local",
"rdfs:comment": "Certification of build policyengine-us-data-1.115.5 for policyengine-us 1.700.0.",
"trov:accessedArrangement": {
"@id": "arrangement/1"
},
"trov:startedAtTime": "2026-05-20T19:37:01.417728Z",
"trov:wasConductedBy": {
"@id": "trs"
}
Expand Down
5 changes: 5 additions & 0 deletions src/policyengine/provenance/bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,8 @@ def _refresh_dataset_path_references_from_data_release(
path = path_reference.get("path")
if not path:
continue
if path_reference.get("revision"):
continue
artifact = _release_artifact_by_path(release_manifest_json, path)
if artifact is None:
if "sha256" in path_reference or "metadata_sha256" in path_reference:
Expand Down Expand Up @@ -549,6 +551,9 @@ def refresh_release_bundle(
certification_json = manifest_json["certification"]
certification_json["data_build_id"] = data_build_id
certification_json["certified_for_model_version"] = new_model
certification_json["certified_by"] = (
f"{current.data_package.name} release manifest"
)
built_with_model_version = built_with_model.get("version")
if built_with_model_version is not None:
certification_json["built_with_model_version"] = built_with_model_version
Expand Down
4 changes: 3 additions & 1 deletion src/policyengine/provenance/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ class DataBuildInfo(BaseModel):

class ArtifactPathReference(BaseModel):
path: str
revision: Optional[str] = None
sha256: Optional[str] = None
metadata_sha256: Optional[str] = None

Expand Down Expand Up @@ -426,7 +427,8 @@ def resolve_dataset_reference(country_id: str, dataset: str) -> str:
return build_hf_uri(
repo_id=manifest.data_package.repo_id,
path_in_repo=path_reference.path,
revision=_artifact_revision(manifest.data_package),
revision=path_reference.revision
or _artifact_revision(manifest.data_package),
)

data_release_manifest = get_data_release_manifest(country_id)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"person[0].is_male": 1.0,
"person[0].marital_unit_id": 0.0,
"person[0].medicaid": 0.0,
"person[0].medicare_cost": 14500.0,
"person[0].medicare_cost": 0.0,
"person[0].person_id": 0.0,
"person[0].person_weight": 1.0,
"person[0].race": 3.0,
Expand All @@ -39,7 +39,7 @@
"person[1].is_male": 1.0,
"person[1].marital_unit_id": 0.0,
"person[1].medicaid": 0.0,
"person[1].medicare_cost": 14500.0,
"person[1].medicare_cost": 0.0,
"person[1].person_id": 1.0,
"person[1].person_weight": 1.0,
"person[1].race": 3.0,
Expand All @@ -57,7 +57,7 @@
"person[2].is_male": 1.0,
"person[2].marital_unit_id": 0.0,
"person[2].medicaid": 0.0,
"person[2].medicare_cost": 14500.0,
"person[2].medicare_cost": 0.0,
"person[2].person_id": 2.0,
"person[2].person_weight": 1.0,
"person[2].race": 3.0,
Expand All @@ -75,7 +75,7 @@
"person[3].is_male": 1.0,
"person[3].marital_unit_id": 0.0,
"person[3].medicaid": 0.0,
"person[3].medicare_cost": 14500.0,
"person[3].medicare_cost": 0.0,
"person[3].person_id": 3.0,
"person[3].person_weight": 1.0,
"person[3].race": 3.0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"has_income_tax": true,
"has_region_registry": true,
"model_package_name": "policyengine-us",
"num_parameters_bucketed_100s": 852,
"num_variables_bucketed_100s": 48,
"num_parameters_bucketed_100s": 873,
"num_variables_bucketed_100s": 49,
"region_registry_country": "us"
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"person[0].is_male": 1.0,
"person[0].marital_unit_id": 0.0,
"person[0].medicaid": 0.0,
"person[0].medicare_cost": 14500.0,
"person[0].medicare_cost": 0.0,
"person[0].person_id": 0.0,
"person[0].person_weight": 1.0,
"person[0].race": 3.0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"person[0].is_male": 1.0,
"person[0].marital_unit_id": 0.0,
"person[0].medicaid": 6439.11,
"person[0].medicare_cost": 14500.0,
"person[0].medicare_cost": 0.0,
"person[0].person_id": 0.0,
"person[0].person_weight": 1.0,
"person[0].race": 3.0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"person[0].is_male": 1.0,
"person[0].marital_unit_id": 0.0,
"person[0].medicaid": 0.0,
"person[0].medicare_cost": 14500.0,
"person[0].medicare_cost": 0.0,
"person[0].person_id": 0.0,
"person[0].person_weight": 1.0,
"person[0].race": 3.0,
Expand All @@ -39,7 +39,7 @@
"person[1].is_male": 1.0,
"person[1].marital_unit_id": 0.0,
"person[1].medicaid": 3258.31,
"person[1].medicare_cost": 14500.0,
"person[1].medicare_cost": 0.0,
"person[1].person_id": 1.0,
"person[1].person_weight": 1.0,
"person[1].race": 3.0,
Expand Down
40 changes: 40 additions & 0 deletions tests/test_bundle_refresh.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,46 @@ def fake_urlopen(request, *args, **kwargs):
)


def test__refresh_preserves_dataset_entries_with_explicit_revisions(
sandbox,
) -> None:
manifest_path = sandbox["manifest_dir"] / "us.json"
manifest = json.loads(manifest_path.read_text())
manifest["datasets"]["long_term_cps_2100"] = {
"path": "long_term/2100.h5",
"revision": "crfb-longrun-20260517",
"sha256": "1" * 64,
"metadata_sha256": "2" * 64,
}
manifest_path.write_text(json.dumps(manifest, indent=2))

def fake_urlopen(request, *args, **kwargs):
url = request.full_url
if url.endswith("releases/1.83.4/release_manifest.json"):
return _data_release_manifest_response(data_version="1.83.4")
raise AssertionError(f"Unexpected URL fetched: {url}")

with patch("policyengine.provenance.bundle.urlopen", side_effect=fake_urlopen):
refresh_release_bundle(
country="us",
data_version="1.83.4",
manifest_dir=sandbox["manifest_dir"],
pyproject_path=sandbox["pyproject_path"],
)

written = json.loads((sandbox["manifest_dir"] / "us.json").read_text())
assert written["datasets"]["long_term_cps_2100"] == {
"path": "long_term/2100.h5",
"revision": "crfb-longrun-20260517",
"sha256": "1" * 64,
"metadata_sha256": "2" * 64,
}
assert (
written["certified_data_artifact"]["uri"]
== "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@release-manifest-commit-sha"
)


def test__custom_release_manifest_requires_existing_long_term_dataset_sha(
sandbox,
) -> None:
Expand Down
6 changes: 3 additions & 3 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,12 @@ def test_has_release_manifest_metadata(self):
assert us_latest.release_manifest is not None
assert us_latest.release_manifest.country_id == "us"
assert us_latest.model_package.name == "policyengine-us"
assert us_latest.model_package.version == "1.691.12"
assert us_latest.model_package.version == "1.700.0"
assert us_latest.data_package.name == "policyengine-us-data"
assert us_latest.data_package.version == "1.115.3"
assert us_latest.data_package.version == "1.115.5"
assert (
us_latest.default_dataset_uri
== "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@69fc39a7fece4c49ba87291e598e76b40568cc5d"
== "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@688f972425f5e858fc52bda2b696e0af74fea920"
)

def test_has_hundreds_of_parameters(self):
Expand Down
Loading
Loading