Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 157 additions & 0 deletions src/fromager/sdist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
"""Helpers to normalize arbitrary source directories into valid sdist layout.

Git clones and non-standard tarballs (e.g. GitHub release assets) lack the
``PKG-INFO`` file and standardized directory naming that PEP 517 build
backends expect. These helpers bridge that gap *before* the build backend
is available -- only the package name and version are required.
"""

from __future__ import annotations

import logging
import pathlib
import shutil
import tarfile

from packaging.version import Version

from . import dependencies, overrides, tarballs

logger = logging.getLogger(__name__)

PKG_INFO_TEMPLATE = """\
Metadata-Version: 2.2
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any reason to bump the metadata version?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@LalatenduMohanty mentioned it in the issue #554 (comment)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which packages are affected and do they use custom build_sdist plugin overrides?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jlarkin09 you should be able to run fromager+ with this PR against downstream bootstrap and build. We should atleast run an analysis to see if this will break some packages.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tiran You asked to bump to 2.2 meta data version in you original issue description.

Name: {name}
Version: {version}
Summary: {summary}
"""


def _write_pkg_info(
directory: pathlib.Path,
name: str,
version: Version,
) -> pathlib.Path:
"""Write a stub ``PKG-INFO`` into *directory* if one does not exist.

Returns the path to the ``PKG-INFO`` file.
"""
pkg_info_file = directory / "PKG-INFO"
if not pkg_info_file.is_file():
logger.info("writing stub PKG-INFO in %s", directory)
pkg_info_file.write_text(
PKG_INFO_TEMPLATE.format(
name=name,
version=str(version),
summary=dependencies.STUB_PKG_INFO_SUMMARY,
)
)
return pkg_info_file


def make_sdist_directory(
source_dir: pathlib.Path,
name: str,
version: Version,
*,
build_dir: pathlib.Path | None = None,
) -> pathlib.Path:
"""Normalize *source_dir* into a valid sdist directory layout.

The directory is renamed to ``{normalized_name}-{version}`` (using
:func:`~fromager.overrides.pkgname_to_override_module`) and a stub
``PKG-INFO`` is written if missing. When *build_dir* differs from
the source root a second ``PKG-INFO`` is placed there for
``setuptools-scm`` compatibility.

Args:
source_dir: Path to the source directory (git clone or unpacked
tarball).
name: Distribution name (e.g. ``req.name``).
version: Package version.
build_dir: Optional non-standard build directory inside
*source_dir*. Receives its own ``PKG-INFO`` copy.

Returns:
Path to the (possibly renamed) source directory.
"""
normalized_name = overrides.pkgname_to_override_module(name)
expected_name = f"{normalized_name}-{version}"

if source_dir.name != expected_name:
old_source_dir = source_dir
desired = source_dir.parent / expected_name
logger.info(
"renaming source directory %s -> %s",
source_dir.name,
expected_name,
)
try:
shutil.move(str(source_dir), str(desired))
except Exception as err:
raise RuntimeError(
f"Could not rename {source_dir} to {desired}: {err}"
) from err
source_dir = desired

# Rebase build_dir so it tracks the renamed parent directory.
if build_dir is not None and build_dir.is_relative_to(old_source_dir):
build_dir = source_dir / build_dir.relative_to(old_source_dir)

_write_pkg_info(source_dir, name, version)

if build_dir is not None and build_dir != source_dir:
_write_pkg_info(build_dir, name, version)

Comment thread
coderabbitai[bot] marked this conversation as resolved.
return source_dir


def repack_as_sdist(
source_dir: pathlib.Path,
name: str,
version: Version,
output_dir: pathlib.Path,
*,
build_dir: pathlib.Path | None = None,
) -> pathlib.Path:
"""Repack *source_dir* into a standards-compliant sdist tarball.

Calls :func:`make_sdist_directory` to normalize the layout first,
then creates a reproducible ``{name}-{version}.tar.gz`` in
*output_dir*.

Args:
source_dir: Path to the source directory.
name: Distribution name.
version: Package version.
output_dir: Directory where the tarball is written.
build_dir: Optional non-standard build subdirectory. When set
the tarball is rooted at *build_dir* (matching
:func:`~fromager.sources.default_build_sdist` behavior).

Returns:
Path to the created ``.tar.gz`` file.
"""
old_source_dir = source_dir
source_dir = make_sdist_directory(source_dir, name, version, build_dir=build_dir)

# Rebase build_dir after a potential rename inside make_sdist_directory.
if build_dir is not None and source_dir != old_source_dir:
build_dir = source_dir / build_dir.relative_to(old_source_dir)

tar_root = build_dir if build_dir is not None else source_dir
normalized_name = overrides.pkgname_to_override_module(name)
sdist_filename = output_dir / f"{normalized_name}-{version}.tar.gz"

if sdist_filename.exists():
sdist_filename.unlink()

with tarfile.open(sdist_filename, "x:gz", format=tarfile.PAX_FORMAT) as tar:
tarballs.tar_reproducible(
tar=tar,
basedir=tar_root,
prefix=tar_root.parent,
)

logger.info("created sdist archive %s", sdist_filename)
return sdist_filename
70 changes: 31 additions & 39 deletions src/fromager/sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
packagesettings,
pyproject,
resolver,
sdist,
tarballs,
vendor_rust,
)
Expand Down Expand Up @@ -540,6 +541,15 @@ def prepare_source(
source_root_dir=source_root_dir,
version=version,
)
source_root_dir = sdist.make_sdist_directory(
source_root_dir,
req.name,
version,
)
pbi = ctx.package_build_info(req)
build_dir = pbi.build_dir(source_root_dir)
if build_dir != source_root_dir:
sdist._write_pkg_info(build_dir, req.name, version)
else:
logger.info(f"preparing source for {req} from {source_filename}")
prepare_source_details = overrides.find_and_invoke(
Expand Down Expand Up @@ -698,31 +708,27 @@ def default_build_sdist(
build_env: build_environment.BuildEnvironment,
build_dir: pathlib.Path,
) -> pathlib.Path:
# It seems like the "correct" way to do this would be to run the
# PEP 517 API in the source tree we have modified. However, quite
# a few packages assume their source distribution is being built
# from a source code repository checkout and those throw an error
# when we use the interface to try to rebuild the sdist. Since we
# know what we have is an exploded tarball, we just tar it back
# up.
#
# For cases where the PEP 517 approach works, use
# pep517_build_sdist().
"""Rebuild an sdist by re-tarring a previously unpacked source tree.

For cases where the PEP 517 approach works, use
:func:`pep517_build_sdist` instead. Many packages assume the sdist
is built from a repository checkout and error out when the PEP 517
interface is used, so this function simply tars the tree back up.
"""
sdist.make_sdist_directory(
sdist_root_dir,
req.name,
version,
build_dir=build_dir,
)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are discarding return value. If the directory is renamed (source directory name doesn't match
{normalized_name}-{version}), both sdist_root_dir and build_dir still point to the old, now-nonexistent paths.

sdist_filename = ctx.sdists_builds / f"{req.name}-{version}.tar.gz"
if sdist_filename.exists():
sdist_filename.unlink()
ensure_pkg_info(
ctx=ctx,
req=req,
version=version,
sdist_root_dir=sdist_root_dir,
build_dir=build_dir,
)
# The format argument is specified based on
# https://peps.python.org/pep-0517/#build-sdist.
with tarfile.open(sdist_filename, "x:gz", format=tarfile.PAX_FORMAT) as sdist:
with tarfile.open(sdist_filename, "x:gz", format=tarfile.PAX_FORMAT) as sdist_tar:
tarballs.tar_reproducible(
tar=sdist,
tar=sdist_tar,
basedir=build_dir,
prefix=build_dir.parent,
)
Expand Down Expand Up @@ -754,14 +760,6 @@ def pep517_build_sdist(
return ctx.sdists_builds / sdist_filename


PKG_INFO_CONTENT = """\
Metadata-Version: 1.0
Name: {name}
Version: {version}
Summary: {summary}
"""


def ensure_pkg_info(
*,
ctx: context.WorkContext,
Expand All @@ -772,11 +770,11 @@ def ensure_pkg_info(
) -> bool:
"""Ensure that sdist has a PKG-INFO file.

Returns True if PKG-INFO is present, False if file is missing. The
function also updates build_dir if package has a non-standard build
directory. Every sdist must have a PKG-INFO file in the first directory.
The additional PKG-INFO file in build_dir is required for projects
with a non-standard layout and projects using setuptools-scm.
Delegates to :func:`fromager.sdist._write_pkg_info` to create stub
files when missing.

Returns True if PKG-INFO was already present in all directories,
False if any file had to be created.
"""
had_pkg_info = True
directories = [sdist_root_dir]
Expand All @@ -788,13 +786,7 @@ def ensure_pkg_info(
logger.warning(
f"PKG-INFO file is missing from {directory}, creating stub file"
)
pkg_info_file.write_text(
PKG_INFO_CONTENT.format(
name=req.name,
version=str(version),
summary=dependencies.STUB_PKG_INFO_SUMMARY,
)
)
sdist._write_pkg_info(directory, req.name, version)
had_pkg_info = False
return had_pkg_info

Expand Down
Loading
Loading