diff --git a/modules/nf-core/poppunk/createdb/environment.yml b/modules/nf-core/poppunk/createdb/environment.yml new file mode 100644 index 000000000000..208c53a6fa4c --- /dev/null +++ b/modules/nf-core/poppunk/createdb/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::poppunk=2.7.8" diff --git a/modules/nf-core/poppunk/createdb/main.nf b/modules/nf-core/poppunk/createdb/main.nf new file mode 100644 index 000000000000..d9a4ca55963f --- /dev/null +++ b/modules/nf-core/poppunk/createdb/main.nf @@ -0,0 +1,40 @@ +process POPPUNK_CREATEDB { + tag "$meta.id" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/poppunk:2.7.8--py310h4d0eb5b_0' : + 'quay.io/biocontainers/poppunk:2.7.8--py310h4d0eb5b_0' }" + input: + tuple val(meta), path(r_file), path(fasta) + output: + tuple val(meta), path("${meta.id}"), emit: db + tuple val(meta), path("${meta.id}/${meta.id}.h5"), emit: h5 + tuple val(meta), path("${meta.id}/${meta.id}.dists.*"), emit: dists + tuple val("${task.process}"), val('poppunk'), eval("poppunk --version 2>&1 | head -1 | sed 's/^.* //'"), topic: versions, emit: versions_poppunk + when: + task.ext.when == null || task.ext.when + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + poppunk \\ + --create-db \\ + --r-files ${r_file} \\ + --output ${prefix} \\ + --threads ${task.cpus} \\ + --qc-keep \\ + ${args} + """ + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p ${prefix} + touch ${prefix}/${prefix}.h5 + touch ${prefix}/${prefix}.dists.pkl + touch ${prefix}/${prefix}.dists.npy + touch ${prefix}/${prefix}.refs + touch ${prefix}/qcreport.txt + """ +} diff --git a/modules/nf-core/poppunk/createdb/meta.yml b/modules/nf-core/poppunk/createdb/meta.yml new file mode 100644 index 000000000000..dac35374351d --- /dev/null +++ b/modules/nf-core/poppunk/createdb/meta.yml @@ -0,0 +1,104 @@ +name: "poppunk_createdb" +description: Create a PopPUNK database of kmer sketches and pairwise distances + from a set of assemblies +keywords: + - genomics + - bacteria + - clustering + - sketching + - poppunk +tools: + - "poppunk": + description: "PopPUNK (POPulation Partitioning Using Nucleotide Kmers) - rapid + bacterial population analysis. Note: this module runs with --qc-filter continue + by default; override via task.ext.args." + homepage: "https://poppunk.bacpop.org/index.html" + documentation: "https://poppunk.bacpop.org/index.html" + tool_dev_url: "https://github.com/bacpop/PopPUNK" + doi: "10.1101/gr.241455.118" + licence: + - "Apache-2.0" + identifier: biotools:poppunk +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - r_file: + type: file + description: | + Tab-separated file listing sample names and paths to their assembly FASTA files. + Format: \t + pattern: "*.txt" + ontologies: + - edam: http://edamontology.org/format_3475 + - fasta: + type: file + description: List of assembly FASTA files for all samples listed in the + r_file + pattern: "*.{fa,fa.gz,fasta,fasta.gz,fna,fna.gz}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + db: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - ${meta.id}: + type: directory + description: Directory containing the full PopPUNK database output files + h5: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - ${meta.id}/${meta.id}.h5: + type: file + description: HDF5 file containing the k-mer sketches for all input + genomes + pattern: "*.h5" + ontologies: + - edam: http://edamontology.org/format_3590 + + dists: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - ${meta.id}/${meta.id}.dists.*: + type: file + description: Pairwise distance matrix files (numpy and pickle format) + pattern: "*.dists.{npy,pkl}" + ontologies: + - edam: http://edamontology.org/format_4003 + - edam: http://edamontology.org/format_4002 + versions_poppunk: + - - ${task.process}: + type: string + description: The name of the process + - poppunk: + type: string + description: The name of the tool + - poppunk --version 2>&1 | head -1 | sed 's/^.* //': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - poppunk: + type: string + description: The name of the tool + - poppunk --version 2>&1 | head -1 | sed 's/^.* //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@cwoodside1278" +maintainers: + - "@cwoodside1278" diff --git a/modules/nf-core/poppunk/createdb/tests/main.nf.test b/modules/nf-core/poppunk/createdb/tests/main.nf.test new file mode 100644 index 000000000000..679e2ae0d567 --- /dev/null +++ b/modules/nf-core/poppunk/createdb/tests/main.nf.test @@ -0,0 +1,41 @@ +nextflow_process { + name "Test Process POPPUNK_CREATEDB" + script "../main.nf" + process "POPPUNK_CREATEDB" + tag "modules" + tag "modules_nfcore" + tag "poppunk" + tag "poppunk/createdb" + + // PopPUNK requires a minimum number of sufficiently diverse same-species genomes + // to compute meaningful k-mer distances. No suitable tiny test data exists in + // nf-core/test-datasets, so only a stub test is provided here. + // To test with real data, run manually with multiple same-species genome FASTAs, + // e.g. multiple H. influenzae strains from NCBI. + + test("haemophilus_influenzae - fasta - createdb - stub") { + options "-stub" + when { + process { + """ + def fasta1 = file(params.modules_testdata_base_path + 'genomics/prokaryotes/haemophilus_influenzae/genome/genome.fna.gz', checkIfExists: true) + def fasta2 = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/fasta/haemophilus_influenzae.fna.gz', checkIfExists: true) + def rfile = file(workDir.toString() + '/rfile.txt') + rfile.text = ["sample1", "genome.fna.gz"].join("\t") + System.lineSeparator() + + ["sample2", "haemophilus_influenzae.fna.gz"].join("\t") + System.lineSeparator() + input[0] = [ + [ id:'test' ], + rfile, + [fasta1, fasta2] + ] + """ + } + } + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + } +} diff --git a/modules/nf-core/poppunk/createdb/tests/main.nf.test.snap b/modules/nf-core/poppunk/createdb/tests/main.nf.test.snap new file mode 100644 index 000000000000..110d72e9afc3 --- /dev/null +++ b/modules/nf-core/poppunk/createdb/tests/main.nf.test.snap @@ -0,0 +1,53 @@ +{ + "haemophilus_influenzae - fasta - createdb - stub": { + "content": [ + { + "db": [ + [ + { + "id": "test" + }, + [ + "qcreport.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.dists.npy:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.dists.pkl:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.h5:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.refs:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "dists": [ + [ + { + "id": "test" + }, + [ + "test.dists.npy:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.dists.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "h5": [ + [ + { + "id": "test" + }, + "test.h5:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_poppunk": [ + [ + "POPPUNK_CREATEDB", + "poppunk", + "2.7.8" + ] + ] + } + ], + "timestamp": "2026-05-01T19:38:13.774846004", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file