Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
.git
.github
.ipython
.jupyter
.pytest_cache
.venv
.vscode
CODE_OF_CONDUCT.md
CONTRIBUTING.md
Dockerfile
LICENSE
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ jobs:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: uv sync --dev --group local
run: uv sync --dev

- name: Run local tests
shell: bash
Expand Down
24 changes: 16 additions & 8 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
# Dockerfile for running dlt pipelines

# Dockerfile is based heavily on the example uv dockerfile:
# https://github.com/astral-sh/uv-docker-example

# Use a Python image with uv pre-installed
FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim
FROM ghcr.io/astral-sh/uv:python3.13-trixie-slim

# Install the project into `/app`
WORKDIR /app
# Set environment variable to noninteractive to prevent prompts during apt operations
ENV DEBIAN_FRONTEND=noninteractive

# add tini
RUN apt-get update -y && apt-get install -y --no-install-recommends tini git

# Enable bytecode compilation
ENV UV_COMPILE_BYTECODE=1
Expand All @@ -16,6 +24,9 @@ ENV UV_NO_DEV=1
# Ensure installed tools can be executed out of the box
ENV UV_TOOL_BIN_DIR=/usr/local/bin

# Install the project into `/app`
WORKDIR /app

# Install the project's dependencies using the lockfile and settings
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=uv.lock,target=uv.lock \
Expand All @@ -35,10 +46,7 @@ ENV PATH="/app/.venv/bin:$PATH"
RUN groupadd --system --gid 999 nonroot \
&& useradd --system --gid 999 --uid 999 --create-home nonroot

COPY --chmod=+x ./scripts/entrypoint.sh /app/
# Use the non-root user to run our application
USER nonroot

# Reset the entrypoint, don't invoke `uv`
ENTRYPOINT []

# CMD ["uv", "run", "python", "--version"]
ENTRYPOINT ["./entrypoint.sh"]
28 changes: 15 additions & 13 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,11 @@ authors = [
]

dependencies = [
"biopython>=1.86",
"click>=8.3.1",
"dlt[deltalake,filesystem,parquet]>=1.21.0",
"lxml>=6.0.2",
"pydantic>=2.12.5",
"pydantic-settings>=2.12.0",
"ruff>=0.14.14",
]

[project.scripts]
Expand All @@ -25,35 +23,40 @@ uniref_pipeline = "cdm_data_loader_utils.pipelines.uniref_pipeline:cli"

[dependency-groups]
dev = [
"berdl-notebook-utils>=0.0.1",
"biopython>=1.86",
"pytest>=9.0.2",
"pytest-asyncio>=1.3.0",
"pytest-cov>=7.0.0",
"pytest-env>=1.2.0",
"ruff>=0.14.14",
]
experimental = [
"mutmut>=3.4.0",
]
local = [
"berdl-notebook-utils>=0.0.1",
]
minio = [
"boto3[crt]>=1.42.0",
"tqdm>=4.67.3",
]
models = [
"genson>=1.3.0",
"json2python-models>=0.3.1",
]
pipeline = []
xml = [
"xmlschema>=4.3.1",
"xsdata[cli,lxml]>=26.1",
]

[project.optional-dependencies]
# for minio interactions -- see utils/minio.py for more details
minio = [
"boto3[crt]>=1.42.0",
"tqdm>=4.67.3",
]

biopython = [
"biopython>=1.86",
]

[tool.ruff]
line-length = 120
target-version = "py313"

# Exclude a variety of commonly ignored directories.
exclude = [
"__pypackages__",
Expand Down Expand Up @@ -127,7 +130,7 @@ select = [
"SLOT", # flake8-slots
"SIM", # flake8-simplify
"TID", # flake8-tidy-imports
"TCH", # flake8-type-checking
"TC", # flake8-type-checking
"INT", # flake8-gettext
"ARG", # flake8-unused-arguments
"PTH", # flake8-use-pathlib
Expand All @@ -149,7 +152,6 @@ select = [

# Allow autofix for all enabled rules (when `--fix`) is provided.
fixable = ["ALL"]
unfixable = []

ignore = [
# D200: unnecessary-multiline-docstring
Expand Down
26 changes: 26 additions & 0 deletions scripts/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env bash
set -euo pipefail

# Ensure at least one argument is provided
if [ "$#" -eq 0 ]; then
echo "Usage: $0 {uniref|uniprot} [args...]"
exit 1
fi

cmd="$1"
shift

case "$cmd" in
uniref)
# Run the uniref pipeline with any additional arguments via tini
exec /usr/bin/tini -- uv run uniref_pipeline "$@"
;;
uniprot)
# Run the uniprot pipeline with any additional arguments via tini
exec /usr/bin/tini -- uv run uniprot_pipeline "$@"
;;
*)
echo "Error: unknown command '$cmd'; valid commands are 'uniref' or 'uniprot'." >&2
exit 1
;;
esac
36 changes: 19 additions & 17 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading