Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 105 additions & 29 deletions .github/workflows/docker-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,36 @@ on:
release:
type: [published]

permissions:
contents: read
id-token: write

env:
TEST_TAG: dessimozlab/fastoma:test
REGISTRY_IMAGE: dessimozlab/fastoma

jobs:

build:

runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- platform: linux/amd64
runner: ubuntu-latest
- platform: linux/arm64
runner: ubuntu-24.04-arm

runs-on: ${{ matrix.runner }}

steps:
- name: Prepare
run: |
platform=${{ matrix.platform }}
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV

- name: Checkout
uses: actions/checkout@v6.0.1
uses: actions/checkout@v6
with:
submodules: recursive

Expand All @@ -26,8 +44,7 @@ jobs:
uses: docker/metadata-action@v5
with:
# list of Docker images to use as base name for tags
images: |
dessimozlab/fastoma
images: ${{ env.REGISTRY_IMAGE }}
# generate Docker tags based on the following events/attributes
tags: |
type=schedule
Expand All @@ -47,40 +64,99 @@ jobs:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Build and export to docker for testing
- name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}

- name: Build and push by digest only
id: build
uses: docker/build-push-action@v6
with:
context: .
load: true
tags: ${{ env.TEST_TAG }}
platforms: ${{ matrix.platform }}
tags: ${{ env.REGISTRY_IMAGE }}
labels: ${{ steps.meta.outputs.labels }}
outputs: type=image,push-by-digest=true,name-canonical=true,push=true
provenance: mode=max
sbom: true

- name: Export digest
run: |
mkdir -p ${{ runner.temp }}/digests
digest="${{ steps.build.outputs.digest }}"
touch "${{ runner.temp }}/digests/${digest#sha256:}"

#- name: Test
# run: |
# docker run --rm -i -v $PWD/tests:/input -v $PWD/tests/:/reads -v $PWD/output:/out -v $PWD/run:/run ${{ env.TEST_TAG }} --tree --standalone_path /input/marker_genes --dna_reference /input/cds-marker_genes.fasta.gz --reads /reads/sample_1.fastq --output_path /out
# if [ ! -f output/tree_sample_1.nwk ] ; then exit 1; fi
- name: Upload digest
uses: actions/upload-artifact@v7
with:
name: digests-${{ env.PLATFORM_PAIR }}
path: ${{ runner.temp }}/digests/*
if-no-files-found: error
retention-days: 1

merge:
runs-on: ubuntu-latest
needs:
- build
steps:
- name: Download digests
uses: actions/download-artifact@v8
with:
path: ${{ runner.temp }}/digests
pattern: digests-*
merge-multiple: true

- name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}

- name: Set platforms
id: set_platforms
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY_IMAGE }}
tags: |
type=schedule
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=semver,pattern={{major}}
type=sha
labels: |
org.opencontainers.image.source=${{ github.repository }}
org.opencontainers.image.revision=${{ github.sha }}

- name: Create manifest list and push
working-directory: ${{ runner.temp }}/digests
run: |
echo "github ref: ${GITHUB_REF}"
if [[ "${GITHUB_REF##*/}" == "main" || "${GITHUB_REF##*/}" == "dev" || "${GITHUB_REF}" == "refs/tags/"* ]]; then
echo "platforms=linux/amd64,linux/arm64" >> $GITHUB_OUTPUT
else
echo "platforms=linux/amd64" >> $GITHUB_OUTPUT
fi

- name: Build and push
uses: docker/build-push-action@v6
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
$(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)

- name: Inspect image
run: |
docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}

publish-doc:
runs-on: ubuntu-latest
needs:
- merge

steps:
- name: Checkout
uses: actions/checkout@v6

- name: Update repo description
uses: peter-evans/dockerhub-description@v5
with:
context: .
platforms: ${{ steps.set_platforms.outputs.platforms }}
push: true
#${{ github.event_name != 'push' && github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
repository: ${{ env.REGISTRY_IMAGE }}
readme-filepath: ./README.md
16 changes: 9 additions & 7 deletions .github/workflows/nf-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@ on:
env:
NFT_DIFF: "pdiff"
NFT_DIFF_ARGS: "--line-numbers --width 120 --expand-tabs=2"
NFT_VER: "0.9.2"
NFT_WORKDIR: "${{ github.workspace }}/nf-test-work"
NFT_VER: "0.9.4"
NFT_WORKDIR: "/tmp/nft-test"
NXF_WORK: "/tmp/nxf-work"
NXF_ANSI_LOG: false
NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity
NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity
NXF_CONDA_CACHEDIR: "/tmp/conda"

concurrency:
group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}"
Expand All @@ -40,15 +42,15 @@ jobs:

steps:
- name: Check out pipeline code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v4
uses: actions/checkout@0c366fd6a839edf440554fa01a7085ccba70ac98 # v4
with:
fetch-depth: 0

- uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v5
with:
python-version: "3.11"

- uses: actions/setup-java@f2beeb24e141e01a676f977032f5a29d81c9e27e # v4
- uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v4
with:
distribution: "temurin"
java-version: "17"
Expand Down Expand Up @@ -78,7 +80,7 @@ jobs:

- name: Set up miniconda
if: matrix.profile == 'conda'
uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3
uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3
with:
miniconda-version: "latest"
auto-update-conda: true
Expand Down Expand Up @@ -143,7 +145,7 @@ jobs:

- name: Upload test results
if: always() # run even if tests fail
uses: actions/upload-artifact@v6
uses: actions/upload-artifact@v7
with:
name: nf-test-results-${{ matrix.filter }}-${{ matrix.profile }}-${{ matrix.NXF_VER }}-${{ matrix.shard }}
path: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish-pypi-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
id-token: write

steps:
- uses: actions/checkout@v6.0.1
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
Expand Down
22 changes: 14 additions & 8 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,21 @@ RUN apt-get update \
&& rm -rf /var/lib/apt/lists/*

WORKDIR /src
RUN pip install --upgrade hatch pip
RUN pip install --upgrade pip \
&& pip install "hatch<1.17" "virtualenv<20.26"
COPY pyproject.toml .
RUN python -m venv /app \
&& hatch dep show requirements --all > requirements.txt \
&& /app/bin/pip install wheel setuptools \
&& /app/bin/pip install -r requirements.txt

RUN hatch dep show requirements --all > requirements.txt \
&& pip install wheel setuptools -r requirements.txt

COPY . .
RUN ls -la \
&& hatch build \
&& ls -la dist/ \
RUN hatch build \
&& ls -la dist/

# Create a clean venv for runtime and install the wheel
RUN python -m venv /app \
&& /app/bin/pip install --upgrade pip wheel setuptools \
&& /app/bin/pip install -r requirements.txt \
&& /app/bin/pip install dist/*.whl


Expand All @@ -44,3 +48,5 @@ RUN apt-get update \

COPY --from=builder /app /app
ENV PATH="/app/bin:$PATH"

RUN python -c "import FastOMA; print(FastOMA.__version__)"
14 changes: 10 additions & 4 deletions FastOMA/_infer_subhog.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from ._utils_subhog import MSAFilter, MSAFilterElbow, MSAFilterTrimAL

from .zoo.utils import unique
from .zoo.wrappers import WrapperError

low_so_detection = True # detection of proteins with low species overlap score in gene tree
fragment_detection = True # this also need to be consistent in _hog_class.py
Expand Down Expand Up @@ -76,10 +77,15 @@ def read_infer_xml_rhog(rhogid, inferhog_concurrent_on, pickles_rhog_folder, pi
species_names_rhog = list(set(species_names_rhog))
logger.info("Number of unique species in rHOG " + rhogid + " is " + str(len(species_names_rhog)) + ".")

if inferhog_concurrent_on: # for big HOG we use parallelization at the level taxonomic level using concurrent
infer_hogs_concurrent(species_tree, rhogid, pickles_subhog_folder_all, rhogs_fa_folder, conf_infer_subhhogs)
else:
infer_hogs_for_rhog_levels_recursively(species_tree, rhogid, pickles_subhog_folder_all, rhogs_fa_folder, conf_infer_subhhogs)
try:
if inferhog_concurrent_on: # for big HOG we use parallelization at the level taxonomic level using concurrent
infer_hogs_concurrent(species_tree, rhogid, pickles_subhog_folder_all, rhogs_fa_folder, conf_infer_subhhogs)
else:
infer_hogs_for_rhog_levels_recursively(species_tree, rhogid, pickles_subhog_folder_all, rhogs_fa_folder, conf_infer_subhhogs)
except WrapperError as e:
logger.exception("Error of external tool during subhog inference: %s", str(e))
sys.exit(getattr(e, "exit_code", 1))


##### Now read the final pickle file for this rootHOG
root_node_name = species_tree.name
Expand Down
16 changes: 15 additions & 1 deletion FastOMA/zoo/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,18 @@ def unique(seq):
return [x for x in seq if x not in seen and not seen.add(x)]



def summarize_long_message(message: str, head_chars: int = 1000, tail_lines: int = 50) -> str:
"""
Summarize a potentially long messge string.
Shows first head_chars characters and last tail_lines lines with ellipsis.
"""
output = message or ""
# Get last tail_lines
lines = output.splitlines()
tail = "\n".join(lines[-tail_lines:]) if len(lines) > tail_lines else "\n".join(lines)
# Get first head_chars
head = output[:head_chars] + ("…" if len(output) > head_chars else "")
if len(lines) > tail_lines or len(output) > head_chars:
return f"{head}\n...\n{tail}"
else:
return output
4 changes: 3 additions & 1 deletion FastOMA/zoo/wrappers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
class WrapperError(Exception):
pass
def __init__(self, message, exit_code=1):
super().__init__(message)
self.exit_code = exit_code


32 changes: 31 additions & 1 deletion FastOMA/zoo/wrappers/aligners/mafft.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .base_aligner import Aligner, AlignmentInput, DataType
from ...seq_utils.utils import iter_seqrecs_from_any
from ...wrappers import WrapperError
from ...utils import summarize_long_message
from ..options import StringOption, FlagOption, IntegerOption, FloatOption, MultiOption, OptionSet
import tempfile
import logging
Expand Down Expand Up @@ -120,7 +121,8 @@ def __call__(self, *args, **kwargs):
logger.debug('Output of Mafft: stdout={}; stderr={}'.format(output, error))
if len(output) == 0 and len(error) > 0:
logger.warning('is MAFFT_BINARIES set correctly: {}'.format(os.getenv('MAFFT_BINARIES', '')))
raise WrapperError('Mafft did not compute any alignments. StdErr: {}'.format(error))
logger.warning("Mafft did not compute any alignments. StdErr:\n%s", summarize_long_message(error))
raise WrapperError('Mafft did not compute any alignments')
self.result = self._read_result(output) # store result
self.stdout = output
self.stderr = error
Expand All @@ -140,6 +142,20 @@ def _call(self, filename, *args, **kwargs):
"""
self.cli('{} {}'.format(self.command(), filename),
wait=True)

ret = self.cli.process.returncode
if ret != 0:
logger.error('Mafft returned non-zero exit status: {}'.format(ret))
logger.error('Output of Mafft:\n\n%s\nstdout=\n%s\n%s\n\n%s\nstderr=\n%s\n%s\n\n',
"=" * 30, "=" * 30, summarize_long_message(self.cli.get_stdout()),
"=" * 30, "=" * 30, summarize_long_message(self.cli.get_stderr()))
if ret < 0:
sig = -ret
raise WrapperError(f'Mafft was terminated by signal {sig}', exit_code=128 + sig)
else:
if ret == 1 and (was_oom_killed() or "Killed" in self.cli.get_stderr()):
raise WrapperError(f'Mafft was killed by the kernel due to running out of memory', exit_code=137)
raise WrapperError(f'Mafft exited with code {ret}', exit_code=ret)
return self.cli.get_stdout(), self.cli.get_stderr()

def command(self):
Expand Down Expand Up @@ -334,3 +350,17 @@ def get_default_options():
StringOption('--merge', '', active=False),
IntegerOption('--thread', -1, active=False),
])


def was_oom_killed():
"""
Check if the process was killed by the kernel due to running out of memory.
"""
try:
with open("/sys/fs/cgroup/memory.events") as f:
for line in f:
if line.startswith("oom_kill"):
return int(line.split()[1]) > 0
except FileNotFoundError:
return False
return False
Loading
Loading