Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Introducing a new subworkflow to generate truth vcf with an ensemble approach. Test VCFs are being merged and according to ensembl_truth rule (the minimum number of callers to agree) a new truth set is created. This apporach is especially important and needed for somatic benchmarks where truth is often missing. [#276](https://github.com/nf-core/variantbenchmarking/pull/276)
- Syntax health, Channel to channel, versions to topic channels. Version update for happy, bedops, picard, ucsc, datavzrd, truvari tools [#277](https://github.com/nf-core/variantbenchmarking/pull/277)
- Bedfile index checking and smoother vcf comparisions [#290](https://github.com/nf-core/variantbenchmarking/pull/290)
- Adding nf-tests to local modules [#278](https://github.com/nf-core/variantbenchmarking/pull/295/), thanks @aaryanjaitly for initiation of some processes.
- Replace local module VARIANT_EXTRACTOR with nf-core module VARIANTEXTRACTOR [#278](https://github.com/nf-core/variantbenchmarking/pull/295/)

### `Fixed`

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ The workflow involves several key processes to ensure reliable and reproducible
This initial step ensures consistent formatting and alignment of variants in test and truth VCF files for accurate comparison.

- Subsample if input vcf is multisample ([bcftools view](https://samtools.github.io/bcftools/bcftools.html#view))
- Homogenization of multi-allelic variants, MNPs and SVs (including imprecise paired breakends and single breakends) ([variant-extractor](https://github.com/EUCANCan/variant-extractor))
- Homogenization of multi-allelic variants, MNPs and SVs to BND (including imprecise paired breakends and single breakends) ([variant-extractor](https://github.com/EUCANCan/variant-extractor))
- Reformatting VCF files from different SV callers ([svync](https://github.com/nvnieuwk/svync))
- Standardize SV variants to BND ([SVTK standardize](https://github.com/broadinstitute/gatk-sv/blob/main/src/svtk/scripts/svtk))
- Decompose SVs to BND [rtgtools svdecompose](https://cn.animalgenome.org/bioinfo/resources/manuals/RTGOperationsManual.pdf)
Expand Down
4 changes: 3 additions & 1 deletion bin/merge_sompy_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
# Copyright 2025 - GHGA
# Author: Kuebra Narci - @kubranarci
'''
Generates a CSV file from a VCF
Merges CSV files based on CHROM and POS, handling dynamic GT columns.
Each GT column is renamed to include the sample name
(e.g., Sample1_GT). Missing GT values are filled with './.'.
Expected usage:
$ python merge_sompy_features.py $csvs --output ${prefix}.${meta.tag}.csv
Use --help for more information.
Expand Down
17 changes: 8 additions & 9 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ process {

// sv_vcf_conversions

withName: VARIANT_EXTRACTOR {
ext.prefix = { input.baseName - ".vcf" + ".variantextract" }
withName: VARIANTEXTRACTOR {
ext.prefix = { vcf.baseName - ".vcf" + ".variantextract" }
publishDir = [
enabled: false
]
Expand Down Expand Up @@ -409,7 +409,7 @@ process {
]
}

withName: "SPLIT_SOMPY_FEATURES" {
withName: "SOMPY_FEATURES_SPLIT" {
ext.prefix = {params.truth_id ? "${meta.id}.${params.truth_id}.${meta.caller}" : "${meta.id}.truth.${meta.caller}" }
publishDir = [
enabled: false
Expand Down Expand Up @@ -618,7 +618,7 @@ process {
]
}

withName: PLOTS {
withName: PLOTS_METRICS {
ext.prefix = {"${meta.benchmark_tool}_mqc"}
publishDir = [
path: {"${params.outdir}/${params.variant_type}/summary/plots/${meta.benchmark_tool}"},
Expand All @@ -627,7 +627,7 @@ process {
]
}

withName: PLOT_UPSET {
withName: PLOTS_UPSET {
ext.prefix = {"upset_${meta.id}"}
publishDir = [
path: {"${params.outdir}/${params.variant_type}/summary/plots/${meta.id}"},
Expand Down Expand Up @@ -662,7 +662,7 @@ process {
]
}

withName: PLOT_SVLEN_DIST {
withName: PLOTS_SVLEN_DIST {
ext.prefix = {"${meta.id}.${meta.tag}"}
publishDir = [
path: {"${params.outdir}/${params.variant_type}/summary/plots/${meta.id}"},
Expand All @@ -686,7 +686,6 @@ process {
]
}


withName: VCF_TO_CSV {
ext.prefix = {"${meta.id}.${meta.tag}"}
publishDir = [
Expand All @@ -696,7 +695,8 @@ process {
]
}

withName: MERGE_SOMPY_FEATURES {
withName: SOMPY_FEATURES_MERGE {
ext.prefix = {"${meta.id}.${meta.tag}"}
publishDir = [
path: {"${params.outdir}/${params.variant_type}/summary/comparisons/${meta.id}"},
pattern: "*{csv}",
Expand All @@ -705,7 +705,6 @@ process {
}

// VCF2BED tools

withName: "SVTK_VCF2BED" {
ext.args = {"--no-samples"}
publishDir = [
Expand Down
5 changes: 5 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,11 @@
"git_sha": "b1c64e862fb5ed7f13b6f8f3ef8f04e700d41438",
"installed_by": ["modules"]
},
"variantextractor": {
"branch": "master",
"git_sha": "b61b31006844eba0086187863095d79f8fb58220",
"installed_by": ["modules"]
},
"wittyer": {
"branch": "master",
"git_sha": "3d2cd05f05038ead04ec6d8cbcdc26e1608025db",
Expand Down
27 changes: 9 additions & 18 deletions modules/local/custom/bedtools_intersect_bench/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ process BEDTOOLS_INTERSECT_BENCH {
tuple val(meta),path("*FP.bed") , emit: fp
tuple val(meta),path("*FN.bed") , emit: fn
tuple val(meta),path("*converted.bed"), emit: out_bed, optional: true
path "versions.yml" , emit: versions
tuple val("${task.process}"), val('python'), eval("python --version | sed 's/Python //g'"), emit: versions_python, topic: versions
tuple val("${task.process}"), val('bedtools'), eval("bedtools --version | sed 's/bedtools v//g'"), emit: versions_bedtools, topic: versions

when:
task.ext.when == null || task.ext.when
Expand All @@ -36,25 +37,15 @@ process BEDTOOLS_INTERSECT_BENCH {
$format \\
$params.genome \\
$args

cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
END_VERSIONS
"""
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${meta.id}_stats.txt
touch ${meta.id}_TP_comp.bed
touch ${meta.id}_TP_base.bed
touch ${meta.id}_FP.bed
touch ${meta.id}_FN.bed
touch ${meta.id}_converted.bed

cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
END_VERSIONS
touch ${prefix}_stats.csv
touch ${prefix}_TP_comp.bed
touch ${prefix}_TP_base.bed
touch ${prefix}_FP.bed
touch ${prefix}_FN.bed
touch ${prefix}_converted.bed
"""

}
107 changes: 107 additions & 0 deletions modules/local/custom/bedtools_intersect_bench/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
name: "bedtools_intersect_bench"
description: "Benchmarks BED files using bedtools intersect, generating TP, FP, FN intervals and summary statistics."
keywords:
- bedtools
- intersect
- benchmark
- intervals
- python
- pandas
tools:
- python:
description: "Python programming language"
homepage: "https://www.python.org"
licence: ["Python-2.0"]
- pybedtools:
description: "Python wrapper for bedtools"
homepage: "https://daler.github.io/pybedtools/"
licence: ["GPL-2.0"]
- bedtools:
description: "A powerful toolset for genome arithmetic."
homepage: "https://bedtools.readthedocs.io/"
licence: ["MIT"]

input:
- - meta:
type: map
description: |
Groovy Map containing sample information.
Must contain `id` and `caller`.
e.g. `[ id:'test', caller:'cnvkit' ]`
- truth:
type: file
description: Truth or baseline BED file.
pattern: "*.bed"
- test:
type: file
description: Test or comparison BED file.
pattern: "*.bed"

output:
summary:
- meta:
type: map
description: Groovy Map containing sample information.
- csv:
type: file
description: Summary statistics of the intersection.
pattern: "*stats.csv"
tp_base:
- meta:
type: map
description: Groovy Map containing sample information.
- bed:
type: file
description: True positive base intervals.
pattern: "*TP_base.bed"
tp_comp:
- meta:
type: map
description: Groovy Map containing sample information.
- bed:
type: file
description: True positive comparison intervals.
pattern: "*TP_comp.bed"
fp:
- meta:
type: map
description: Groovy Map containing sample information.
- bed:
type: file
description: False positive intervals.
pattern: "*FP.bed"
fn:
- meta:
type: map
description: Groovy Map containing sample information.
- bed:
type: file
description: False negative intervals.
pattern: "*FN.bed"
out_bed:
- meta:
type: map
description: Groovy Map containing sample information.
- bed:
type: file
description: Converted BED file.
pattern: "*converted.bed"
versions_python:
- versions:
type: tuple
description: Tuple containing process name software name and python version string.
versions_bedtools:
- versions:
type: tuple
description: Tuple containing process name software name and bedtools version string.

topics:
versions:
- versions:
type: tuple
description: Tuple containing process name software name and version string.

authors:
- "@kubranarci"
maintainers:
- "@kubranarci"
61 changes: 61 additions & 0 deletions modules/local/custom/bedtools_intersect_bench/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
nextflow_process {

name "Test Process BEDTOOLS_INTERSECT_BENCH"
script "../main.nf"
process "BEDTOOLS_INTERSECT_BENCH"

tag "modules"
tag "modules_local"
tag "bedtools_intersect_bench"

test("Standard bedtools intersect bench") {

when {
params {
genome = "GRCh38"
truth_id = "truth"
}
process {
"""
input[0] = [
[ id:'test_intersect', caller:'manta' ],
file(params.test_data_base + '/testdata/nf-test/intersect/rtgtools.TP_base.vcf', checkIfExists: true),
file(params.test_data_base + '/testdata/nf-test/intersect/test14.bed', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}

test("Standard bedtools intersect bench - stub") {
options "-stub"
when {
params {
genome = "GRCh38"
truth_id = "truth"
}
process {
"""
input[0] = [
[ id:'test_intersect', caller:'manta' ],
file(params.test_data_base + '/testdata/nf-test/intersect/rtgtools.TP_base.vcf', checkIfExists: true),
file(params.test_data_base + '/testdata/nf-test/intersect/test14.bed', checkIfExists: true)
]
"""
}
}
then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}
}
Loading
Loading