nf-core · kubranarci · Mar 24, 2026 · Mar 23, 2026 · Mar 23, 2026 · Mar 23, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,6 +15,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Introducing a new subworkflow to generate truth vcf with an ensemble approach. Test VCFs are being merged and according to ensembl_truth rule (the minimum number of callers to agree) a new truth set is created. This apporach is especially important and needed for somatic benchmarks where truth is often missing. [#276](https://github.com/nf-core/variantbenchmarking/pull/276)
 - Syntax health, Channel to channel, versions to topic channels. Version update for happy, bedops, picard, ucsc, datavzrd, truvari tools [#277](https://github.com/nf-core/variantbenchmarking/pull/277)
 - Bedfile index checking and smoother vcf comparisions [#290](https://github.com/nf-core/variantbenchmarking/pull/290)
+- Adding nf-tests to local modules [#278](https://github.com/nf-core/variantbenchmarking/pull/295/), thanks @aaryanjaitly for initiation of some processes.
+- Replace local module VARIANT_EXTRACTOR with nf-core module VARIANTEXTRACTOR [#278](https://github.com/nf-core/variantbenchmarking/pull/295/)
 
 ### `Fixed`
 

diff --git a/README.md b/README.md
@@ -39,7 +39,7 @@ The workflow involves several key processes to ensure reliable and reproducible
 This initial step ensures consistent formatting and alignment of variants in test and truth VCF files for accurate comparison.
 
 - Subsample if input vcf is multisample ([bcftools view](https://samtools.github.io/bcftools/bcftools.html#view))
-- Homogenization of multi-allelic variants, MNPs and SVs (including imprecise paired breakends and single breakends) ([variant-extractor](https://github.com/EUCANCan/variant-extractor))
+- Homogenization of multi-allelic variants, MNPs and SVs to BND (including imprecise paired breakends and single breakends) ([variant-extractor](https://github.com/EUCANCan/variant-extractor))
 - Reformatting VCF files from different SV callers ([svync](https://github.com/nvnieuwk/svync))
 - Standardize SV variants to BND ([SVTK standardize](https://github.com/broadinstitute/gatk-sv/blob/main/src/svtk/scripts/svtk))
 - Decompose SVs to BND [rtgtools svdecompose](https://cn.animalgenome.org/bioinfo/resources/manuals/RTGOperationsManual.pdf)

diff --git a/bin/merge_sompy_features.py b/bin/merge_sompy_features.py
@@ -3,7 +3,9 @@
 # Copyright 2025 - GHGA
 # Author: Kuebra Narci - @kubranarci
 '''
-Generates a CSV file from a VCF
+Merges CSV files based on CHROM and POS, handling dynamic GT columns.
+Each GT column is renamed to include the sample name
+(e.g., Sample1_GT). Missing GT values are filled with './.'.
 Expected usage:
     $ python merge_sompy_features.py  $csvs --output ${prefix}.${meta.tag}.csv
 Use --help for more information.

diff --git a/conf/modules.config b/conf/modules.config
@@ -82,8 +82,8 @@ process {
 
     // sv_vcf_conversions
 
-    withName: VARIANT_EXTRACTOR {
-        ext.prefix = { input.baseName - ".vcf" + ".variantextract" }
+    withName: VARIANTEXTRACTOR {
+        ext.prefix = { vcf.baseName - ".vcf" + ".variantextract" }
         publishDir = [
             enabled: false
         ]
@@ -409,7 +409,7 @@ process {
         ]
     }
 
-    withName: "SPLIT_SOMPY_FEATURES" {
+    withName: "SOMPY_FEATURES_SPLIT" {
         ext.prefix = {params.truth_id ? "${meta.id}.${params.truth_id}.${meta.caller}" : "${meta.id}.truth.${meta.caller}" }
         publishDir = [
             enabled: false
@@ -618,7 +618,7 @@ process {
         ]
     }
 
-    withName: PLOTS {
+    withName: PLOTS_METRICS {
         ext.prefix = {"${meta.benchmark_tool}_mqc"}
         publishDir = [
             path: {"${params.outdir}/${params.variant_type}/summary/plots/${meta.benchmark_tool}"},
@@ -627,7 +627,7 @@ process {
         ]
     }
 
-     withName: PLOT_UPSET {
+     withName: PLOTS_UPSET {
         ext.prefix = {"upset_${meta.id}"}
         publishDir = [
             path: {"${params.outdir}/${params.variant_type}/summary/plots/${meta.id}"},
@@ -662,7 +662,7 @@ process {
         ]
     }
 
-    withName: PLOT_SVLEN_DIST {
+    withName: PLOTS_SVLEN_DIST {
         ext.prefix = {"${meta.id}.${meta.tag}"}
         publishDir = [
             path: {"${params.outdir}/${params.variant_type}/summary/plots/${meta.id}"},
@@ -686,7 +686,6 @@ process {
         ]
     }
 
-
     withName: VCF_TO_CSV {
         ext.prefix = {"${meta.id}.${meta.tag}"}
         publishDir = [
@@ -696,7 +695,8 @@ process {
         ]
     }
 
-    withName: MERGE_SOMPY_FEATURES {
+    withName: SOMPY_FEATURES_MERGE {
+        ext.prefix = {"${meta.id}.${meta.tag}"}
         publishDir = [
             path: {"${params.outdir}/${params.variant_type}/summary/comparisons/${meta.id}"},
             pattern: "*{csv}",
@@ -705,7 +705,6 @@ process {
     }
 
     // VCF2BED tools
-
     withName: "SVTK_VCF2BED" {
         ext.args   = {"--no-samples"}
         publishDir = [

diff --git a/modules.json b/modules.json
@@ -190,6 +190,11 @@
                         "git_sha": "b1c64e862fb5ed7f13b6f8f3ef8f04e700d41438",
                         "installed_by": ["modules"]
                     },
+                    "variantextractor": {
+                        "branch": "master",
+                        "git_sha": "b61b31006844eba0086187863095d79f8fb58220",
+                        "installed_by": ["modules"]
+                    },
                     "wittyer": {
                         "branch": "master",
                         "git_sha": "3d2cd05f05038ead04ec6d8cbcdc26e1608025db",

diff --git a/modules/local/custom/bedtools_intersect_bench/main.nf b/modules/local/custom/bedtools_intersect_bench/main.nf
@@ -17,7 +17,8 @@ process BEDTOOLS_INTERSECT_BENCH {
     tuple val(meta),path("*FP.bed")       , emit: fp
     tuple val(meta),path("*FN.bed")       , emit: fn
     tuple val(meta),path("*converted.bed"), emit: out_bed, optional: true
-    path "versions.yml"                   , emit: versions
+    tuple val("${task.process}"), val('python'), eval("python --version | sed 's/Python //g'"), emit: versions_python, topic: versions
+    tuple val("${task.process}"), val('bedtools'), eval("bedtools --version | sed 's/bedtools v//g'"), emit: versions_bedtools, topic: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -36,25 +37,15 @@ process BEDTOOLS_INTERSECT_BENCH {
         $format \\
         $params.genome \\
         $args
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        python: \$(python --version | sed 's/Python //g')
-    END_VERSIONS
     """
     stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    touch ${meta.id}_stats.txt
-    touch ${meta.id}_TP_comp.bed
-    touch ${meta.id}_TP_base.bed
-    touch ${meta.id}_FP.bed
-    touch ${meta.id}_FN.bed
-    touch ${meta.id}_converted.bed
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        python: \$(python --version | sed 's/Python //g')
-    END_VERSIONS
+    touch ${prefix}_stats.csv
+    touch ${prefix}_TP_comp.bed
+    touch ${prefix}_TP_base.bed
+    touch ${prefix}_FP.bed
+    touch ${prefix}_FN.bed
+    touch ${prefix}_converted.bed
     """
-
 }
diff --git a/modules/local/custom/bedtools_intersect_bench/meta.yml b/modules/local/custom/bedtools_intersect_bench/meta.yml
@@ -0,0 +1,107 @@
+name: "bedtools_intersect_bench"
+description: "Benchmarks BED files using bedtools intersect, generating TP, FP, FN intervals and summary statistics."
+keywords:
+  - bedtools
+  - intersect
+  - benchmark
+  - intervals
+  - python
+  - pandas
+tools:
+  - python:
+      description: "Python programming language"
+      homepage: "https://www.python.org"
+      licence: ["Python-2.0"]
+  - pybedtools:
+      description: "Python wrapper for bedtools"
+      homepage: "https://daler.github.io/pybedtools/"
+      licence: ["GPL-2.0"]
+  - bedtools:
+      description: "A powerful toolset for genome arithmetic."
+      homepage: "https://bedtools.readthedocs.io/"
+      licence: ["MIT"]
+
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information.
+          Must contain `id` and `caller`.
+          e.g. `[ id:'test', caller:'cnvkit' ]`
+    - truth:
+        type: file
+        description: Truth or baseline BED file.
+        pattern: "*.bed"
+    - test:
+        type: file
+        description: Test or comparison BED file.
+        pattern: "*.bed"
+
+output:
+  summary:
+    - meta:
+        type: map
+        description: Groovy Map containing sample information.
+    - csv:
+        type: file
+        description: Summary statistics of the intersection.
+        pattern: "*stats.csv"
+  tp_base:
+    - meta:
+        type: map
+        description: Groovy Map containing sample information.
+    - bed:
+        type: file
+        description: True positive base intervals.
+        pattern: "*TP_base.bed"
+  tp_comp:
+    - meta:
+        type: map
+        description: Groovy Map containing sample information.
+    - bed:
+        type: file
+        description: True positive comparison intervals.
+        pattern: "*TP_comp.bed"
+  fp:
+    - meta:
+        type: map
+        description: Groovy Map containing sample information.
+    - bed:
+        type: file
+        description: False positive intervals.
+        pattern: "*FP.bed"
+  fn:
+    - meta:
+        type: map
+        description: Groovy Map containing sample information.
+    - bed:
+        type: file
+        description: False negative intervals.
+        pattern: "*FN.bed"
+  out_bed:
+    - meta:
+        type: map
+        description: Groovy Map containing sample information.
+    - bed:
+        type: file
+        description: Converted BED file.
+        pattern: "*converted.bed"
+  versions_python:
+    - versions:
+        type: tuple
+        description: Tuple containing process name software name and python version string.
+  versions_bedtools:
+    - versions:
+        type: tuple
+        description: Tuple containing process name software name and bedtools version string.
+
+topics:
+  versions:
+    - versions:
+        type: tuple
+        description: Tuple containing process name software name and version string.
+
+authors:
+  - "@kubranarci"
+maintainers:
+  - "@kubranarci"
diff --git a/modules/local/custom/bedtools_intersect_bench/tests/main.nf.test b/modules/local/custom/bedtools_intersect_bench/tests/main.nf.test
@@ -0,0 +1,61 @@
+nextflow_process {
+
+    name "Test Process BEDTOOLS_INTERSECT_BENCH"
+    script "../main.nf"
+    process "BEDTOOLS_INTERSECT_BENCH"
+
+    tag "modules"
+    tag "modules_local"
+    tag "bedtools_intersect_bench"
+
+    test("Standard bedtools intersect bench") {
+
+        when {
+            params {
+                genome = "GRCh38"
+                truth_id = "truth"
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'test_intersect', caller:'manta' ],
+                    file(params.test_data_base + '/testdata/nf-test/intersect/rtgtools.TP_base.vcf', checkIfExists: true),
+                    file(params.test_data_base + '/testdata/nf-test/intersect/test14.bed', checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("Standard bedtools intersect bench - stub") {
+        options "-stub"
+        when {
+            params {
+                genome = "GRCh38"
+                truth_id = "truth"
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'test_intersect', caller:'manta' ],
+                    file(params.test_data_base + '/testdata/nf-test/intersect/rtgtools.TP_base.vcf', checkIfExists: true),
+                    file(params.test_data_base + '/testdata/nf-test/intersect/test14.bed', checkIfExists: true)
+                ]
+                """
+            }
+        }
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+}