diff --git a/pyproject.toml b/pyproject.toml index ec95c06e..9a050cc4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ dependencies = [ "elfdeps>=0.2.0", "license-expression", "packaging", + "packageurl-python", "psutil", "pydantic", "pypi_simple", diff --git a/src/fromager/packagesettings/__init__.py b/src/fromager/packagesettings/__init__.py index 3233b0a9..3b9e02c2 100644 --- a/src/fromager/packagesettings/__init__.py +++ b/src/fromager/packagesettings/__init__.py @@ -7,6 +7,7 @@ GitOptions, PackageSettings, ProjectOverride, + PurlConfig, ResolverDist, SbomSettings, VariantInfo, @@ -46,6 +47,7 @@ "PackageVersion", "PatchMap", "ProjectOverride", + "PurlConfig", "RawAnnotations", "ResolverDist", "SbomSettings", diff --git a/src/fromager/packagesettings/_models.py b/src/fromager/packagesettings/_models.py index 47d96f6d..779b8d09 100644 --- a/src/fromager/packagesettings/_models.py +++ b/src/fromager/packagesettings/_models.py @@ -37,6 +37,8 @@ class SbomSettings(pydantic.BaseModel): sbom: supplier: "Organization: ExampleCo" namespace: "https://www.example.com" + purl_type: pypi + repository_url: "https://example.com/simple" creators: - "Organization: ExampleCo" """ @@ -55,6 +57,64 @@ class SbomSettings(pydantic.BaseModel): The fromager tool creator entry is always added automatically. """ + purl_type: str = "pypi" + """Default purl type for all packages (e.g. ``pypi``, ``generic``)""" + + repository_url: str | None = None + """Default purl ``repository_url`` qualifier for all packages + + When set, this URL is added to every purl as a qualifier + (e.g. ``pkg:pypi/flask@2.0?repository_url=https://example.com/simple``). + Can be overridden per-package in the package settings file. + """ + + +class PurlConfig(pydantic.BaseModel): + """Per-package purl configuration for SBOM generation. + + Allows overriding individual purl components or specifying an + upstream purl for packages sourced from GitHub/GitLab. + + :: + + purl: + type: generic + name: custom-name + repository_url: "https://example.com/simple" + upstream: "pkg:github/org/repo@v1.0.0" + """ + + model_config = MODEL_CONFIG + + type: str | None = None + """Override the purl type (e.g. ``generic`` instead of ``pypi``)""" + + namespace: str | None = None + """Override the purl namespace component""" + + name: str | None = None + """Override the purl name component (defaults to the package name)""" + + version: str | None = None + """Override the purl version component (defaults to the resolved version)""" + + repository_url: str | None = None + """Per-package override for the purl ``repository_url`` qualifier. + + Overrides the global ``sbom.repository_url`` setting for this package. + """ + + upstream: str | None = None + """Full purl string identifying the upstream source package. + + When set, this is used as the upstream identity in the SBOM's + GENERATED_FROM relationship. Used for packages sourced from + GitHub/GitLab rather than PyPI. + + When absent, the upstream purl is auto-derived from the downstream + purl without the ``repository_url`` qualifier. + """ + class ResolverDist(pydantic.BaseModel): """Packages resolver dist @@ -351,12 +411,11 @@ class PackageSettings(pydantic.BaseModel): download_source: DownloadSource = Field(default_factory=DownloadSource) """Alternative source download settings""" - purl: str | None = None - """Package URL (purl) override for SBOM generation + purl: PurlConfig | None = None + """Purl configuration for SBOM generation. - When set, this value is used instead of the default ``pkg:pypi/@`` - purl. Useful for packages that are not on PyPI or are midstream forks. - Supports ``{name}`` and ``{version}`` format substitution. + A ``PurlConfig`` object with individual field overrides and upstream + source identification. """ resolver_dist: ResolverDist = Field(default_factory=ResolverDist) diff --git a/src/fromager/packagesettings/_pbi.py b/src/fromager/packagesettings/_pbi.py index 5c8458ea..97b3de1b 100644 --- a/src/fromager/packagesettings/_pbi.py +++ b/src/fromager/packagesettings/_pbi.py @@ -18,6 +18,7 @@ GitOptions, PackageSettings, ProjectOverride, + PurlConfig, VariantInfo, ) from ._templates import _resolve_template, substitute_template @@ -70,8 +71,8 @@ def variant(self) -> Variant: return self._variant @property - def purl(self) -> str | None: - """Package URL (purl) override for SBOM generation.""" + def purl_config(self) -> PurlConfig | None: + """Per-package purl configuration for SBOM generation.""" return self._ps.purl @property diff --git a/src/fromager/sbom.py b/src/fromager/sbom.py index 9bc246bb..1520b341 100644 --- a/src/fromager/sbom.py +++ b/src/fromager/sbom.py @@ -13,39 +13,73 @@ import typing from datetime import UTC, datetime +from packageurl import PackageURL from packaging.requirements import Requirement from packaging.utils import canonicalize_name from packaging.version import Version if typing.TYPE_CHECKING: from . import context + from .packagesettings import PackageBuildInfo, SbomSettings logger = logging.getLogger(__name__) SBOM_FILENAME = "fromager.spdx.json" -def _build_purl( +def _build_downstream_purl( *, - package_name: str, - package_version: Version, - purl_override: str | None, -) -> str: - """Build a package URL for the SBOM. - - Returns ``pkg:pypi/@`` by default. If a purl override - is set in per-package settings, it is used instead with - ``str.format()`` substitution for ``{name}`` and ``{version}``. + name: str, + version: Version, + pbi: PackageBuildInfo, + sbom_settings: SbomSettings, +) -> PackageURL: + """Build the downstream package URL for the wheel. + + A purl is constructed from ``PurlConfig`` field overrides + (per-package) falling back to global defaults. """ - if purl_override: - try: - return purl_override.format(name=package_name, version=package_version) - except (KeyError, ValueError) as err: - raise ValueError( - f"invalid purl template {purl_override!r}: " - "only {name} and {version} are supported" - ) from err - return f"pkg:pypi/{package_name}@{package_version}" + pc = pbi.purl_config + purl_type = (pc.type if pc else None) or sbom_settings.purl_type + qualifiers: dict[str, str] = {} + repo_url = (pc.repository_url if pc else None) or sbom_settings.repository_url + if repo_url: + qualifiers["repository_url"] = repo_url + + return PackageURL( + type=purl_type, + namespace=pc.namespace if pc else None, + name=(pc.name if pc else None) or name, + version=(pc.version if pc else None) or str(version), + qualifiers=qualifiers or None, + ) + + +def _build_upstream_purl( + *, + name: str, + version: Version, + pbi: PackageBuildInfo, + sbom_settings: SbomSettings, +) -> PackageURL: + """Build the upstream source package URL. + + If ``upstream`` is set in the per-package ``PurlConfig``, it is + used as-is. Otherwise, the upstream purl is derived from the same + base as the downstream purl but without the ``repository_url`` + qualifier. + """ + pc = pbi.purl_config + if pc and pc.upstream: + return PackageURL.from_string(pc.upstream) + + purl_type = (pc.type if pc else None) or sbom_settings.purl_type + return PackageURL( + type=purl_type, + namespace=pc.namespace if pc else None, + name=(pc.name if pc else None) or name, + version=(pc.version if pc else None) or str(version), + ) def generate_sbom( @@ -56,8 +90,9 @@ def generate_sbom( ) -> dict[str, typing.Any]: """Generate a minimal SPDX 2.3 JSON document for a wheel. - The document contains the wheel as the primary package and a - DESCRIBES relationship from the document to the package. + The document contains the downstream wheel as the primary package, + the upstream source as a second package, and DESCRIBES / + GENERATED_FROM relationships. """ sbom_settings = ctx.settings.sbom_settings if sbom_settings is None: @@ -73,26 +108,48 @@ def generate_sbom( namespace = f"{sbom_settings.namespace}/{name}-{version}.spdx.json" - package_entry: dict[str, typing.Any] = { + downstream = _build_downstream_purl( + name=name, + version=version, + pbi=pbi, + sbom_settings=sbom_settings, + ) + upstream = _build_upstream_purl( + name=name, + version=version, + pbi=pbi, + sbom_settings=sbom_settings, + ) + + wheel_entry: dict[str, typing.Any] = { "SPDXID": "SPDXRef-wheel", - "name": name, - "versionInfo": str(version), + "name": downstream.name, + "versionInfo": downstream.version or str(version), "downloadLocation": "NOASSERTION", "supplier": sbom_settings.supplier, + "externalRefs": [ + { + "referenceCategory": "PACKAGE-MANAGER", + "referenceType": "purl", + "referenceLocator": downstream.to_string(), + } + ], } - purl = _build_purl( - package_name=name, - package_version=version, - purl_override=pbi.purl, - ) - package_entry["externalRefs"] = [ - { - "referenceCategory": "PACKAGE-MANAGER", - "referenceType": "purl", - "referenceLocator": purl, - } - ] + upstream_entry: dict[str, typing.Any] = { + "SPDXID": "SPDXRef-upstream", + "name": upstream.name, + "versionInfo": upstream.version or str(version), + "downloadLocation": "NOASSERTION", + "supplier": "NOASSERTION", + "externalRefs": [ + { + "referenceCategory": "PACKAGE-MANAGER", + "referenceType": "purl", + "referenceLocator": upstream.to_string(), + } + ], + } doc: dict[str, typing.Any] = { "spdxVersion": "SPDX-2.3", @@ -104,13 +161,18 @@ def generate_sbom( "created": timestamp, "creators": creators, }, - "packages": [package_entry], + "packages": [wheel_entry, upstream_entry], "relationships": [ { "spdxElementId": "SPDXRef-DOCUMENT", "relationshipType": "DESCRIBES", "relatedSpdxElement": "SPDXRef-wheel", }, + { + "spdxElementId": "SPDXRef-wheel", + "relationshipType": "GENERATED_FROM", + "relatedSpdxElement": "SPDXRef-upstream", + }, ], } return doc diff --git a/tests/conftest.py b/tests/conftest.py index d3de5ebb..b9927261 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -86,7 +86,7 @@ def testdata_context( def make_sbom_ctx( tmp_path: pathlib.Path, sbom_settings: SbomSettings | None = None, - purl: str | None = None, + package_overrides: dict[str, typing.Any] | None = None, ) -> context.WorkContext: """Create a minimal WorkContext with SBOM settings.""" settings_file = packagesettings.SettingsFile(sbom=sbom_settings) @@ -97,10 +97,10 @@ def make_sbom_ctx( variant="cpu", max_jobs=None, ) - if purl is not None: + if package_overrides is not None: ps = packagesettings.PackageSettings.from_mapping( "test-pkg", - {"purl": purl}, + package_overrides, source="test", has_config=True, ) diff --git a/tests/test_sbom.py b/tests/test_sbom.py index da331552..3db57362 100644 --- a/tests/test_sbom.py +++ b/tests/test_sbom.py @@ -41,8 +41,8 @@ def test_generate_sbom_structure(tmp_path: pathlib.Path) -> None: _validate_spdx(doc) -def test_generate_sbom_default_settings(tmp_path: pathlib.Path) -> None: - """Verify defaults when no custom settings are provided.""" +def test_generate_sbom_default_purls(tmp_path: pathlib.Path) -> None: + """Verify default purls use pkg:pypi without qualifiers.""" ctx = make_sbom_ctx(tmp_path, sbom_settings=SbomSettings()) doc = sbom.generate_sbom( ctx=ctx, @@ -50,15 +50,34 @@ def test_generate_sbom_default_settings(tmp_path: pathlib.Path) -> None: version=Version("2.0.0"), ) - pkg = doc["packages"][0] - assert pkg["supplier"] == "NOASSERTION" - assert pkg["externalRefs"][0]["referenceLocator"] == "pkg:pypi/my-package@2.0.0" - assert doc["documentNamespace"] == ( - "https://spdx.org/spdxdocs/my-package-2.0.0.spdx.json" + wheel = doc["packages"][0] + upstream = doc["packages"][1] + assert wheel["externalRefs"][0]["referenceLocator"] == "pkg:pypi/my-package@2.0.0" + assert ( + upstream["externalRefs"][0]["referenceLocator"] == "pkg:pypi/my-package@2.0.0" ) _validate_spdx(doc) +def test_generate_sbom_repository_url_qualifier(tmp_path: pathlib.Path) -> None: + """Verify global repository_url adds qualifier to downstream but not upstream.""" + settings = SbomSettings(repository_url="https://packages.redhat.com") + ctx = make_sbom_ctx(tmp_path, sbom_settings=settings) + doc = sbom.generate_sbom( + ctx=ctx, + req=Requirement("numpy==1.26.0"), + version=Version("1.26.0"), + ) + + wheel = doc["packages"][0] + upstream = doc["packages"][1] + assert wheel["externalRefs"][0]["referenceLocator"] == ( + "pkg:pypi/numpy@1.26.0?repository_url=https://packages.redhat.com" + ) + assert upstream["externalRefs"][0]["referenceLocator"] == "pkg:pypi/numpy@1.26.0" + _validate_spdx(doc) + + def test_generate_sbom_custom_settings(tmp_path: pathlib.Path) -> None: """Verify custom supplier, namespace, and creators are used.""" settings = SbomSettings( @@ -73,8 +92,8 @@ def test_generate_sbom_custom_settings(tmp_path: pathlib.Path) -> None: version=Version("2.0.0"), ) - pkg = doc["packages"][0] - assert pkg["supplier"] == "Organization: ExampleCo" + wheel = doc["packages"][0] + assert wheel["supplier"] == "Organization: ExampleCo" assert doc["documentNamespace"] == ( "https://www.example.com/my-package-2.0.0.spdx.json" ) @@ -84,12 +103,12 @@ def test_generate_sbom_custom_settings(tmp_path: pathlib.Path) -> None: _validate_spdx(doc) -def test_generate_sbom_purl_override(tmp_path: pathlib.Path) -> None: - """Verify per-package purl override is used with template substitution.""" +def test_generate_sbom_purl_field_overrides(tmp_path: pathlib.Path) -> None: + """Verify individual purl field overrides work.""" ctx = make_sbom_ctx( tmp_path, sbom_settings=SbomSettings(), - purl="pkg:generic/{name}@{version}", + package_overrides={"purl": {"type": "generic", "name": "custom-name"}}, ) doc = sbom.generate_sbom( ctx=ctx, @@ -97,24 +116,71 @@ def test_generate_sbom_purl_override(tmp_path: pathlib.Path) -> None: version=Version("1.0.0"), ) - pkg = doc["packages"][0] - ext_refs = pkg["externalRefs"] - assert len(ext_refs) == 1 - assert ext_refs[0]["referenceLocator"] == "pkg:generic/test-pkg@1.0.0" + wheel = doc["packages"][0] + upstream = doc["packages"][1] + assert wheel["externalRefs"][0]["referenceLocator"] == ( + "pkg:generic/custom-name@1.0.0" + ) + # Field overrides carry through to upstream (without qualifiers) + assert upstream["externalRefs"][0]["referenceLocator"] == ( + "pkg:generic/custom-name@1.0.0" + ) _validate_spdx(doc) -def test_generate_sbom_default_purl(tmp_path: pathlib.Path) -> None: - """Verify default pkg:pypi purl is used when no override is set.""" - ctx = make_sbom_ctx(tmp_path, sbom_settings=SbomSettings()) +def test_generate_sbom_package_repository_url_override(tmp_path: pathlib.Path) -> None: + """Verify per-package repository_url overrides the global value.""" + ctx = make_sbom_ctx( + tmp_path, + sbom_settings=SbomSettings(repository_url="https://packages.redhat.com"), + package_overrides={ + "purl": {"repository_url": "https://mirror.example.com/simple"}, + }, + ) doc = sbom.generate_sbom( ctx=ctx, - req=Requirement("test==0.1.0"), - version=Version("0.1.0"), + req=Requirement("test-pkg==1.0.0"), + version=Version("1.0.0"), ) - pkg = doc["packages"][0] - assert pkg["externalRefs"][0]["referenceLocator"] == "pkg:pypi/test@0.1.0" + wheel = doc["packages"][0] + upstream = doc["packages"][1] + assert wheel["externalRefs"][0]["referenceLocator"] == ( + "pkg:pypi/test-pkg@1.0.0?repository_url=https://mirror.example.com/simple" + ) + # Upstream never gets repository_url + assert upstream["externalRefs"][0]["referenceLocator"] == "pkg:pypi/test-pkg@1.0.0" + _validate_spdx(doc) + + +def test_generate_sbom_upstream_purl_override(tmp_path: pathlib.Path) -> None: + """Verify upstream purl override for GitHub-sourced packages.""" + ctx = make_sbom_ctx( + tmp_path, + sbom_settings=SbomSettings(repository_url="https://packages.redhat.com"), + package_overrides={ + "purl": {"upstream": "pkg:github/vllm-project/bart-plugin@v0.2.0"}, + }, + ) + doc = sbom.generate_sbom( + ctx=ctx, + req=Requirement("test-pkg==0.2.0"), + version=Version("0.2.0"), + ) + + wheel = doc["packages"][0] + upstream = doc["packages"][1] + # Downstream has repository_url qualifier + assert ( + "repository_url=https://packages.redhat.com" + in (wheel["externalRefs"][0]["referenceLocator"]) + ) + # Upstream identity comes from the override purl + assert upstream["name"] == "bart-plugin" + assert upstream["versionInfo"] == "v0.2.0" + assert upstream["externalRefs"][0]["referenceLocator"] == ( + "pkg:github/vllm-project/bart-plugin@v0.2.0" + ) _validate_spdx(doc) @@ -127,15 +193,15 @@ def test_generate_sbom_canonicalizes_name(tmp_path: pathlib.Path) -> None: version=Version("1.0.0"), ) - pkg = doc["packages"][0] - assert pkg["name"] == "my-package" + wheel = doc["packages"][0] + assert wheel["name"] == "my-package" assert doc["name"] == "my-package-1.0.0" - assert pkg["externalRefs"][0]["referenceLocator"] == "pkg:pypi/my-package@1.0.0" + assert "pkg:pypi/my-package@1.0.0" in (wheel["externalRefs"][0]["referenceLocator"]) _validate_spdx(doc) -def test_generate_sbom_describes_relationship(tmp_path: pathlib.Path) -> None: - """Verify the DESCRIBES relationship exists.""" +def test_generate_sbom_relationships(tmp_path: pathlib.Path) -> None: + """Verify DESCRIBES and GENERATED_FROM relationships.""" ctx = make_sbom_ctx(tmp_path, sbom_settings=SbomSettings()) doc = sbom.generate_sbom( ctx=ctx, @@ -144,10 +210,30 @@ def test_generate_sbom_describes_relationship(tmp_path: pathlib.Path) -> None: ) rels = doc["relationships"] - assert len(rels) == 1 + assert len(rels) == 2 assert rels[0]["spdxElementId"] == "SPDXRef-DOCUMENT" assert rels[0]["relationshipType"] == "DESCRIBES" assert rels[0]["relatedSpdxElement"] == "SPDXRef-wheel" + assert rels[1]["spdxElementId"] == "SPDXRef-wheel" + assert rels[1]["relationshipType"] == "GENERATED_FROM" + assert rels[1]["relatedSpdxElement"] == "SPDXRef-upstream" + _validate_spdx(doc) + + +def test_generate_sbom_upstream_supplier(tmp_path: pathlib.Path) -> None: + """Verify upstream package always has supplier NOASSERTION.""" + settings = SbomSettings(supplier="Organization: Red Hat") + ctx = make_sbom_ctx(tmp_path, sbom_settings=settings) + doc = sbom.generate_sbom( + ctx=ctx, + req=Requirement("numpy==1.26.0"), + version=Version("1.26.0"), + ) + + wheel = doc["packages"][0] + upstream = doc["packages"][1] + assert wheel["supplier"] == "Organization: Red Hat" + assert upstream["supplier"] == "NOASSERTION" _validate_spdx(doc) diff --git a/tests/testdata/sbom-context/overrides/settings.yaml b/tests/testdata/sbom-context/overrides/settings.yaml new file mode 100644 index 00000000..2dd57860 --- /dev/null +++ b/tests/testdata/sbom-context/overrides/settings.yaml @@ -0,0 +1,5 @@ +sbom: + supplier: "Organization: ExampleCo" + namespace: "https://www.example.com" + creators: + - "Organization: ExampleCo" diff --git a/tests/testdata/sbom-context/overrides/settings/test_purl_fields.yaml b/tests/testdata/sbom-context/overrides/settings/test_purl_fields.yaml new file mode 100644 index 00000000..5683140d --- /dev/null +++ b/tests/testdata/sbom-context/overrides/settings/test_purl_fields.yaml @@ -0,0 +1,4 @@ +purl: + type: generic + name: custom-name + upstream: "pkg:github/org/repo@v1.0.0"