Skip to content

Commit 198d78a

Browse files
committed
Fix: Allow non-PHRED quality scores e.g. Solexa
Solexa FASTQ reads are not PHRED scores, and as such can be negative. Relax some of the checks regarding qualities.
1 parent ee0a192 commit 198d78a

File tree

5 files changed

+21
-10
lines changed

5 files changed

+21
-10
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file.
44
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
55
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
66

7+
## [2.1.2]
8+
### Bugfix
9+
* Allow non-PHRED quality scores, such as Solexa scores, which can be negative (#104)
10+
11+
## [2.1.0]
12+
### Bugfix
13+
* Fix doc examples for writer with do-syntax (#100)
14+
715
## [2.1.0]
816
### Additions
917
* Implement `Base.copy!` for `FASTQRecord` and `FASTARecord`

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "FASTX"
22
uuid = "c2308a5c-f048-11e8-3e8a-31650f418d12"
33
authors = ["Sabrina J. Ward <sabrinajward@protonmail.com>", "Jakob N. Nissen <jakobnybonissen@gmail.com>"]
4-
version = "2.1.1"
4+
version = "2.1.2"
55

66
[weakdeps]
77
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"

src/fastq/quality.jl

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,12 @@
99
"""
1010
QualityEncoding(range::StepRange{Char}, offset::Integer)
1111
12-
FASTQ PHRED quality encoding scheme. `QualityEncoding` objects are used to
12+
FASTQ quality encoding scheme. `QualityEncoding` objects are used to
1313
interpret the quality scores of FASTQ records.
1414
`range` is a range of allowed ASCII chars in the encoding, e.g. `'!':'~'` for
1515
the most common encoding scheme.
16-
The offset is the PHRED offset.
16+
The offset is the ASCII offset, i.e. a character with ASCII value `x` encodes
17+
the value `x - offset`.
1718
1819
See also: [`quality_scores`](@ref)
1920
@@ -44,9 +45,7 @@ struct QualityEncoding
4445
elseif high > 127
4546
error("Quality encoding only works with ASCII charsets")
4647
elseif offset < 0
47-
error("Quality offset must be non-negative")
48-
elseif low < offset
49-
error("Low end of in quality encoding range cannot be less than offset")
48+
error("Quality offset must be non-negative")
5049
else
5150
return new(low, high, off)
5251
end
@@ -57,7 +56,7 @@ end
5756
const SANGER_QUAL_ENCODING = QualityEncoding('!':'~', 33)
5857

5958
"Solexa (Solexa+64) quality score encoding"
60-
const SOLEXA_QUAL_ENCODING = QualityEncoding('@':'~', 64)
59+
const SOLEXA_QUAL_ENCODING = QualityEncoding(';':'~', 64)
6160

6261
"Illumina 1.3 (Phred+64) quality score encoding"
6362
const ILLUMINA13_QUAL_ENCODING = QualityEncoding('@':'~', 64)

test/fastq/TestFASTQ.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ module TestFASTQ
44
const OFFSET = 33
55

66
using ReTest
7+
using FASTX: FASTQ
78
using FASTX.FASTQ: Record, Reader, Writer, identifier, description,
89
sequence, quality, quality_scores, QualityEncoding, quality_header!, validate_fastq
910
using BioSequences: LongDNA, LongRNA, LongAA, @dna_str, @rna_str, @aa_str
@@ -41,4 +42,4 @@ include("record.jl")
4142
include("io.jl")
4243
include("specimens.jl")
4344

44-
end # module TestFASTQ
45+
end # module TestFASTQ

test/fastq/record.jl

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,6 @@ end
191191

192192
# QualityEncoding
193193
@test_throws Exception QualityEncoding('B':'A', 10)
194-
@test_throws Exception QualityEncoding('A':'A', 90)
195194
@test_throws Exception QualityEncoding('a':'A', 10)
196195
@test_throws Exception QualityEncoding('Z':'Y', 10)
197196
@test_throws Exception QualityEncoding('A':'B', -1)
@@ -208,6 +207,10 @@ end
208207
@test_throws BoundsError quality_scores(records[2], 2:5)
209208
@test_throws BoundsError quality_scores(records[2], 5:5)
210209

210+
# Solexa encoding is weird in thay it can be negative
211+
rec = Record("abc", "TAG", [20, 0, -5]; offset=64)
212+
@test collect(quality_scores(rec, FASTQ.SOLEXA_QUAL_ENCODING)) == [20, 0, -5]
213+
211214
# Custom quality encoding
212215
CustomQE = QualityEncoding('A':'Z', 12)
213216
good = parse(Record, "@a\naaaaaa\n+\nAKPZJO")
@@ -285,4 +288,4 @@ end
285288
@test isequal(cp, records[2])
286289
end
287290

288-
end # testset Record
291+
end # testset Record

0 commit comments

Comments
 (0)