Skip to content

Commit 3926e27

Browse files
committed
adding run seqtk pipeline
1 parent e8f24fe commit 3926e27

File tree

1 file changed

+109
-0
lines changed

1 file changed

+109
-0
lines changed
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
#!/usr/bin/env nextflow
2+
3+
/*
4+
* Pipeline to run SEQTK QA
5+
*
6+
* @author
7+
* Ernesto Lowy <ernesto.lowy@gmail.com>
8+
*/
9+
10+
// Define defaults
11+
def defaults = [
12+
min_length: 70
13+
]
14+
15+
16+
// params defaults
17+
params.help = false
18+
params.min_length = defaults.min_length
19+
20+
//print usage
21+
if (params.help) {
22+
log.info ''
23+
log.info 'Pipeline to run FASTQA on a FASTQ file'
24+
log.info '---------------------------------'
25+
log.info 'FASTQA is based on SEQTK .'
26+
log.info ''
27+
log.info 'Usage: '
28+
log.info ' nextflow run_seqtk_qa.nf --pwd $DB_PASS --output_dir ./out --dbname elowy_hgdp_03102018 --runs runs.txt'
29+
log.info ''
30+
log.info 'Options:'
31+
log.info ' --help Show this message and exit.'
32+
log.info ' --pwd PWD Password for connecting the ReseqTrack DB.'
33+
log.info ' --dbname DBNAME ReseqTrack DB name.'
34+
log.info ' --host HOSTNAME Hostname of the MySQL ReseqTrack DB.'
35+
log.info ' --user USER Username of the MySQL ReseqTrack DB.'
36+
log.info ' --port PORT Port number of the MySQL ReseqTrack DB.'
37+
log.info ' --min_length MIN_LENGTH Minimum sequence length.'
38+
log.info ' --program PROGRAM Path to seqtk binary.'
39+
log.info ' --reseqtrack RESEQTRACK Path to the reseqtrack repository.'
40+
log.info ' --queue QUEUE Name of the queue to use on the LSF (eg, production-rh74, research-rh74).'
41+
log.info ' --output_dir OUTPUT_DIR Directory that will contain the output files.'
42+
log.info ' --runs RUN_FILE File containing runs to be analyzed'
43+
log.info ' The format of RUN_FILE is:'
44+
log.info ' name'
45+
log.info ' ERR0001'
46+
log.info ' ERR0002'
47+
log.info ' ERR0003'
48+
log.info ''
49+
exit 1
50+
}
51+
52+
/*
53+
* Validate input file
54+
*/
55+
run_file = file(params.runs)
56+
if( !run_file.exists() ) exit 1, "Missing file with run ids: ${run_file}"
57+
58+
/*
59+
* Create a channel for different run ids
60+
*/
61+
Channel
62+
.fromPath(run_file)
63+
.splitCsv(header:true)
64+
.map{ row-> row.name }
65+
.set { runs_ch }
66+
67+
process runFastqSimpleQA {
68+
/*
69+
Process to run perl fastq_simple_qa_by_seqtk.pl on the FASTQs in a RESEQTRACK DB
70+
*/
71+
72+
memory '2 GB'
73+
executor 'lsf'
74+
queue params.queue
75+
cpus 1
76+
77+
input:
78+
val x from runs_ch
79+
80+
output:
81+
file out_fastqa
82+
val x into run_id
83+
84+
script:
85+
"""
86+
perl ${params.reseqtrack}/scripts/qc/fastq_simple_qa_by_seqtk.pl -dbhost ${params.host} -dbname ${params.dbname} -dbuser ${params.user} \
87+
-dbpass ${params.pwd} -dbport ${params.port} -run_id $x -collection_type FASTQ -new_collection_type FQ_OK -min_length ${params.min_length} \
88+
-output_dir ${params.output_dir} -program ${params.program} -clobber 1> out_fastqa
89+
"""
90+
}
91+
92+
process moveFinalFile {
93+
/*
94+
Process to move the final output file to the output folder set in params.output_dir
95+
*/
96+
publishDir "${params.output_dir}", saveAs:{ filename -> "$filename" }
97+
98+
input:
99+
val run_id
100+
file out_fastqa
101+
102+
output:
103+
file "${run_id}.seqtk.out"
104+
105+
script:
106+
"""
107+
mv ${out_fastqa} ${run_id}.seqtk.out
108+
"""
109+
}

0 commit comments

Comments
 (0)