Skip to content

Commit f2d78b2

Browse files
committed
Added test data
1 parent 614887b commit f2d78b2

10 files changed

+60
-4
lines changed

.gitignore

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
/__pycache__/
22
*.pyc
33
*.pyo
4-
*.vcf
5-
*.vcf.gz
6-
*.vcf.gz.tbi
4+
/*.vcf
5+
/*.vcf.gz
6+
/*.vcf.gz.tbi
77
/*.rp
88
/*.rpdb
99
/.*/
1010
/*.sh
11+
/test/actual_output.vcf

README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,16 @@ python3 svimmer input_vcfs chrA chrB chrC ...
1515

1616
where input is a list of tabix indexed+bgzipped VCF files and chromosomes are the chromosomes to merge. For further details see the help page:
1717

18-
```
18+
```sh
1919
python3 svimmer -h
2020
```
2121

22+
## Test data example
23+
24+
```sh
25+
python3 svimmer test_vcfs chr20 > test/actual_output.vcf
26+
diff test/actual_output.vcf test/expected_output.vcf
27+
```
28+
2229
## License
2330
GNU GPLv3
1.79 KB
Binary file not shown.
393 Bytes
Binary file not shown.
1.72 KB
Binary file not shown.
377 Bytes
Binary file not shown.
1.99 KB
Binary file not shown.
446 Bytes
Binary file not shown.

test/expected_output.vcf

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
##fileformat=VCFv4.1
2+
##fileDate=20210107
3+
##contig=<ID=chr20,length=64444167>
4+
##INFO=<ID=NUM_MERGED_SVS,Number=1,Type=Integer,Description="Number of merged SVs.">
5+
##INFO=<ID=STDDEV_POS,Number=2,Type=Integer,Description="Std. dev of begin and end positions.">
6+
#CHROM POS ID REF ALT QUAL FILTER INFO
7+
chr20 41196482 . T TTATAAATATATATTATATATAATATATATTTATATAAATATATATTATATATAAATATATATTATATATAATATATATTTATATAAATATATATTATATATTATATAAATATTATATATAATATATATTTA 0 . END=41196482;SVTYPE=INS;SVLEN=131;CIGAR=1M131I;CIPOS=0,20;HOMLEN=20;HOMSEQ=TATAAATATATATTATATAT;NUM_MERGED_SVS=3;STDDEV_POS=0.00,0.00
8+
chr20 41257715 . AATTCTCCTGCCTCAGCCTCCTTAGTAGCTGGGACTACAGGCACACGCCACCATGCCTGGCTAAGTTTTCGTATTTTTAGTAGAGACGGGGTTTCACCATGCTAGCCAGGCTGGTCTCGAACTCCTGACCTTGTGATCTGCCCACCTTGGCCTCCCAAAGTGCTGGGATTACAGGTGTTAGCCACCACGCCCAACCCTTTTTTTTTTTGAGACGGAGTTTTGCTCTTGTCAGCCAGGCTGGGGTACAGTGGCACAGCTCACTGCAACCTCCACCTCCCAGGTTCAAGTG A 0 . END=41258003;SVTYPE=DEL;SVLEN=-288;CIGAR=1M288D;CIPOS=0,9;HOMLEN=9;HOMSEQ=ATTCTCCTG;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
9+
chr20 41277701 . C <DEL> 0 . END=41278993;SVTYPE=DEL;SVLEN=-1292;IMPRECISE;CIPOS=-393,393;CIEND=-511,511;NUM_MERGED_SVS=3;STDDEV_POS=12.49,71.07
10+
chr20 41694772 . A AATGTCAAATTGCGATAAATAGAACACCCCCACCCCCAGGATGCTACTAGAGAGAATAATGGAGGAAAATTGATACATTAGATTAGAATAAT 0 . END=41694772;SVTYPE=INS;SVLEN=91;CIGAR=1M91I;CIPOS=0,46;HOMLEN=46;HOMSEQ=ATGTCAAATTGCGATAAATAGAACACCCCCACCCCCAGGATGCTAC;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
11+
chr20 41912429 . AACTTCCACCTCCCGGGTTCAAGCAATTCTCCTTCCTCAGCCTCCCGAGTAGCTGGG A 0 . END=41912485;SVTYPE=DEL;SVLEN=-56;CIGAR=1M56D;CIPOS=0,3;HOMLEN=3;HOMSEQ=ACT;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
12+
chr20 42213265 . G GTTTCACATCATTAAAAGTTGTAGAATCTTGGATTCTCTGGCTGGAAAGGCCCTCCCA 0 . END=42213265;SVTYPE=INS;SVLEN=57;CIGAR=1M57I;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
13+
chr20 42633975 . ATATTATATATTATATTATATATATTATATATATATAATATATATATAATATATATATTATATATATTATATATATATTATAAATATATAATATATATATAATATAT A 0 . END=42634081;SVTYPE=DEL;SVLEN=-106;CIGAR=1M106D;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
14+
chr20 43162622 . TCTCCATGGTCCATCACTCTGGAGGCAGCAAGGAGTCAAGCTTCCAGGATCTGGTCTGGGGCTGCCTGGCTCCCCTTCCCTTCAGCCACACCAGTCAGTCACCCATGGTTCCAACCACACCAAAGGGTTTGGCTTTTCATGATGACCCGCTGGACAGCAGCTTGGCTCAGGCTGCTCTTCTGTCTG T 0 . END=43162807;SVTYPE=DEL;SVLEN=-185;CIGAR=1M185D;CIPOS=0,4;HOMLEN=4;HOMSEQ=CTCC;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
15+
chr20 43395954 . A <DEL> 0 . END=43397152;SVTYPE=DEL;SVLEN=-1198;CIPOS=0,18;CIEND=0,18;HOMLEN=18;HOMSEQ=TGGTAAAACCCCATTTCT;NUM_MERGED_SVS=3;STDDEV_POS=0.00,0.00
16+
chr20 43643224 . A <DEL> 0 . END=43645948;SVTYPE=DEL;SVLEN=-2724;CIPOS=0,35;CIEND=0,35;HOMLEN=35;HOMSEQ=CCTGTAATCCCAGCACTTTGGGAGGCCGAGGCAGG;NUM_MERGED_SVS=3;STDDEV_POS=0.00,0.00
17+
chr20 43696486 . A <DUP:TANDEM> 0 . END=43696990;SVTYPE=DUP;SVLEN=504;CIPOS=0,23;CIEND=0,23;HOMLEN=23;HOMSEQ=CTGGGATTACAGGCGTGAGCCAC;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
18+
chr20 44047485 . TTAACATCTGCACTAAGAAAAATTCCTCTGCCTTGGGATCCTGTTGATCTGTGACCTTACCCCCAACCCTGTGCTCTCTGAAACATGTGCTGTGTCCACTCAGGGTTAAATGGATTAAGGGCGGTGCAAGATGTGCTTTGTTAAACAGATGCTTGAAGGCAGCATGCTCGTTAAGAGTCATCACCAATCCCTAATCTCAAGTAATCAGGGACACAAACACTGCGGAAGGCCGCAGGGTCCTCTGCCTAGGAAAACCAGAGACCTTTGTTCACTTGTTTATCTGCTGACCTTCCCTCCACTATTGTCCCATGACCCTGCCAAATCCCCCTCTGTGAGAAACACCCAAGAATTATCAATAAAAAAAAATTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA T 0 . END=44047884;SVTYPE=DEL;SVLEN=-399;CIGAR=1M399D;CIPOS=0,3;HOMLEN=3;HOMSEQ=TAA;NUM_MERGED_SVS=3;STDDEV_POS=0.00,0.00
19+
chr20 44134468 . ATAAATATATATTTATAATATATATTTATTATATATAAATAATATATATTTATAATAAATATATATAAATAAATATATATTTATAATATATATAAATAAATATATATTTATTATAAATATATATAAATATATATTTATTATAAATATAATAAATATATATTTATTATAAATATATAAATATTTATTATAAATATATATAAATATATATTTATAAATATTATAAATATATATACATTAATATATATTATAAATATATATT AAA 0 . END=44134726;SVTYPE=DEL;SVLEN=-258;CIGAR=1M2I258D;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
20+
chr20 44404776 . T <INS> 0 . END=44404776;SVTYPE=INS;CIPOS=0,4;CIEND=0,4;HOMLEN=4;HOMSEQ=GTGT;LEFT_SVINSSEQ=GTGTTTGTTGATGTGTGTGTATGTGTGCGTGTGTGTGAATTGTGTGT;RIGHT_SVINSSEQ=TGTGTGTTGATGTGTGTGGAGTGTGTGGTGTGTGTGGTGTGTGGGTGTGTGGTGTGTGTGTGTGTTTGTGTGTGGTGTGTGTGCGTGTGTGTGGACTGTGTGGTGCGTGTGTGTGCTTGTGTGTGGACTGTGGATTGTGTGGTGTGTGTGTGCGCAC;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
21+
chr20 44677204 . G G]chr20:44683574] 0 . SVTYPE=BND;MATEID=MantaBND:2404:0:1:0:0:0:1;CIPOS=0,2;HOMLEN=2;HOMSEQ=CT;BND_DEPTH=19;MATE_BND_DEPTH=38;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
22+
chr20 44680251 . C [chr20:44683272[C 0 . SVTYPE=BND;MATEID=MantaBND:2409:0:1:0:0:0:1;CIPOS=0,4;HOMLEN=4;HOMSEQ=AGGG;BND_DEPTH=19;MATE_BND_DEPTH=47;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
23+
chr20 44683268 . C [chr20:44680255[C 0 . SVTYPE=BND;MATEID=MantaBND:2409:0:1:0:0:0:0;CIPOS=0,4;HOMLEN=4;HOMSEQ=CTGA;BND_DEPTH=47;MATE_BND_DEPTH=19;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
24+
chr20 44683572 . C C]chr20:44677206] 0 . SVTYPE=BND;MATEID=MantaBND:2404:0:1:0:0:0:0;CIPOS=0,2;HOMLEN=2;HOMSEQ=AG;BND_DEPTH=38;MATE_BND_DEPTH=19;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
25+
chr20 44764203 . C CATCCATCCATCCATCCATCCATCCGTCCATCCATTTATCCATCCATCCATCCATCCATCCATCCATCCATCCATTTATCCATCCGTCCATTTATCCATCCATCCATCCACCCACCCATCCATCCATCCATCCACTT 0 . END=44764203;SVTYPE=INS;SVLEN=136;CIGAR=1M136I;CIPOS=0,9;HOMLEN=9;HOMSEQ=ATCCATCCA;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
26+
chr20 45575422 . A <DEL> 0 . END=45578660;SVTYPE=DEL;SVLEN=-3238;CIPOS=0,3;CIEND=0,3;HOMLEN=3;HOMSEQ=GCA;NUM_MERGED_SVS=2;STDDEV_POS=0.00,0.00
27+
chr20 45725015 . T TATACACACATATATATATATATATATATACACATATATATATATATACAC 0 . END=45725015;SVTYPE=INS;SVLEN=50;CIGAR=1M50I;CIPOS=0,3;HOMLEN=3;HOMSEQ=ATA;NUM_MERGED_SVS=3;STDDEV_POS=1.15,1.15
28+
chr20 45730773 . CTCACTGCAACCTCCGCTTCCCAGGTTCAAGCAATTCTCCTGCCTCAGCCTCCTGAGTAGCCGGGATTACAGGCACGTGCCACCACACCCAGCTAATTTTTCTATTTTTAGTA C 0 . END=45730885;SVTYPE=DEL;SVLEN=-112;CIGAR=1M112D;NUM_MERGED_SVS=2;STDDEV_POS=0.00,0.00
29+
chr20 45738910 . CTATATATAGTTATATATATAGTTATAGTTTACAAAACTATATATAGTTATATATAGTTATAGTTTACAAAACTATATATAGTTA C 0 . END=45738994;SVTYPE=DEL;SVLEN=-84;CIGAR=1M84D;CIPOS=0,14;HOMLEN=14;HOMSEQ=TATATATAGTTATA;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
30+
chr20 45906708 . CGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGTTGGGCATGGTGGTGGGCGCCTGTAATCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATCGCTTGATCCTGGGAGGTGGAGGTTGCAGGGAGCCAGGATTGCACCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCCATCTCAAAAAAAAAAAAAAAAAAAAAAGATGGCATGAGGTTCCTTTCCCTTGGTTTCTCCTACCTCCTCCTTCGCTTCCCATGCCTGTGTGTTAGGTGTGATGGGAAAATACCTTTGCCCCACAGTAGACAGAGGTCATGGCTTAGAAAAAGGGAATTCATGGCCAGGCACAGTGGCTCATGCCTATAATCCCAGCACTTTGGGAGGCCAAGGTGGGCAGATCAGGAGGTCAGGAGATCGAGACCATCCCGGCCAACAT C 0 . END=45907144;SVTYPE=DEL;SVLEN=-436;CIGAR=1M436D;CIPOS=0,24;HOMLEN=24;HOMSEQ=GGTGAAACCCCGTCTCTACTAAAA;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
31+
chr20 47156115 . CACACACACACACATATATATATATATATATATATATATATATATATATAT C 0 . END=47156165;SVTYPE=DEL;SVLEN=-50;CIGAR=1M50D;CIPOS=0,1;HOMLEN=1;HOMSEQ=A;NUM_MERGED_SVS=2;STDDEV_POS=0.00,0.00
32+
chr20 47390323 . CGGTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCCGGACTGCGGACTGCAGTGGCGCAATCTCGGCTCACTGCAAGCTCCGCTTCCCGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCACCCGCCACCGCGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCTTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCATGATCCACCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCGCCTCCTGT CTG 0 . END=47390651;SVTYPE=DEL;SVLEN=-328;CIGAR=1M2I328D;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
33+
chr20 47481053 . T TTCTTTCCTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTC 0 . END=47481053;SVTYPE=INS;SVLEN=50;CIGAR=1M50I;CIPOS=0,1;HOMLEN=1;HOMSEQ=T;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
34+
chr20 47525638 . CGGGGCGGCTGGCCGGACGGGGGGGCTGACCCCCCCCACCTCCCTCCCGGACGGGGCGGCTGGCCGGGCGGGGGGCTGACCCCCCCACCTCCATCCCGGAGGGGGCGGCTGGCCGGGCGGGGGGCTGACCCCCCCCACCTCCCTCCCAGACGGGGCGGCTGGCCGGGCAGAGGGGCTCCTCACTTCCCAGTAGGGGCGGCCGGGCAGAGGTGCCCCTCACTTCCCGGATGGGG CAGGT 0 . END=47525870;SVTYPE=DEL;SVLEN=-232;CIGAR=1M4I232D;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
35+
chr20 48010935 . C CATCACCATCATCACAACCACCATCGTCAATATCACCATCACTTTCATAATCACCACCACCACAACCACCATCAATATCACCATCACCTTAATCATCACCACAACCACCATCAAT 0 . END=48010935;SVTYPE=INS;SVLEN=114;CIGAR=1M114I;CIPOS=0,10;HOMLEN=10;HOMSEQ=ATCACCATCA;NUM_MERGED_SVS=3;STDDEV_POS=0.00,0.00
36+
chr20 48214211 . G G]chr20:48598465] 0 . SVTYPE=BND;MATEID=MantaBND:2577:0:1:0:0:0:0;IMPRECISE;CIPOS=-478,478;BND_DEPTH=22;MATE_BND_DEPTH=27;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
37+
chr20 48214631 . C [chr20:48597827[C 0 . SVTYPE=BND;MATEID=MantaBND:2577:0:2:0:0:0:0;IMPRECISE;CIPOS=-472,473;BND_DEPTH=28;MATE_BND_DEPTH=31;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
38+
chr20 48412705 . T TTCTTTCTTCTTAAGGTGTTACAGAAGGGCAAAGTCCATCTTCTGTTCCTTCCTTCCTTCTG 0 . END=48412705;SVTYPE=INS;SVLEN=61;CIGAR=1M61I;CIPOS=0,59;HOMLEN=59;HOMSEQ=TCTTTCTTCTTAAGGTGTTACAGAAGGGCAAAGTCCATCTTCTGTTCCTTCCTTCCTTC;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
39+
chr20 48597827 . A [chr20:48214631[A 0 . SVTYPE=BND;MATEID=MantaBND:2577:0:2:0:0:0:1;IMPRECISE;CIPOS=-292,292;BND_DEPTH=31;MATE_BND_DEPTH=28;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
40+
chr20 48598465 . C C]chr20:48214211] 0 . SVTYPE=BND;MATEID=MantaBND:2577:0:1:0:0:0:1;IMPRECISE;CIPOS=-311,311;BND_DEPTH=27;MATE_BND_DEPTH=22;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
41+
chr20 48811623 . CCCCCC CACACGTGTGCATGCACACTACTGACCAGACCTGGATACACACACACACGTGCATGCACACTACTGACCAGACCTGGATACACACACACACACACGTGTGCATGCACACTACTGACCAGACCTGGATA 0 . END=48811628;SVTYPE=INS;SVLEN=127;CIGAR=1M127I5D;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
42+
chr20 49108128 . CAAAAAAAAAAAAAGGCCAGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGACCGAGGCGGGTGGATCATGAGGTCAGGAGATCGAGACCATCCTGGCTAACAAGGTGAAACCCCGTCTCTACTAAAAATACAAAAAATTAGCCGGGCGCGGTGGCGGGCGCGGTGGCGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCGGGAAGCGGAGCTTGCAGTGAGCCGAGATTGCGCCACTGCAGTCCGCAGTCCGGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAAAAAAAA C 0 . END=49108453;SVTYPE=DEL;SVLEN=-325;CIGAR=1M325D;CIPOS=0,13;HOMLEN=13;HOMSEQ=AAAAAAAAAAAAA;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
43+
chr20 49126606 . A <INS> 0 . END=49126641;SVTYPE=INS;LEFT_SVINSSEQ=AAAAAAAAAAAACTAAAAAATCAAACCAAAATAACCCAAAAAACTCAAAAAAAAAAAAAA;RIGHT_SVINSSEQ=GCTAAAGGGAGTTATGACCACCAAACTGCCCCCCAGCCTGGGAAAAAAAGAAAACCCCCTTCAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAACAAAAAAACTTAAAAATCTAACCGATATTAGCCAAGACACTC;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
44+
chr20 49834407 . ATATATATATACTGTATATATATACTGTATATATATATATATACTGTATATATATATACTGTG A 0 . END=49834469;SVTYPE=DEL;SVLEN=-62;CIGAR=1M62D;CIPOS=0,10;HOMLEN=10;HOMSEQ=TATATATATA;NUM_MERGED_SVS=1;STDDEV_POS=0.00,0.00
45+
chr20 49927198 . CAAAGGTTGCAGTGAGCCGAGATGGTACCACTGCACTCCAGCCTGGGAACAAAGTGAGACTCCATCTCAAAAAAAAAAATA C 0 . END=49927278;SVTYPE=DEL;SVLEN=-80;CIGAR=1M80D;CIPOS=0,2;HOMLEN=2;HOMSEQ=AA;NUM_MERGED_SVS=3;STDDEV_POS=0.00,0.00

test_vcfs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
test/HG002_30x_Manta_v1.6.0_test_data.vcf.gz
2+
test/HG003_30x_Manta_v1.6.0_test_data.vcf.gz
3+
test/HG004_30x_Manta_v1.6.0_test_data.vcf.gz

0 commit comments

Comments
 (0)