Skip to content

Commit e86372a

Browse files
authored
Merge pull request #9 from spyoungtech/fix-eof-offset
fix errant EOF token when document ends with multibyte character
2 parents 61b94c2 + 9f28d95 commit e86372a

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed

src/tokenize.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,7 @@ impl <'input> Tokenizer<'input> {
547547
match maybe_next {
548548
None => {
549549
match maybe_last {
550-
Some((last_idx, _)) => Ok((last_idx+1, TokType::EOF, last_idx+1)),
550+
Some((last_idx, last_char)) => Ok((last_idx + last_char.len_utf8(), TokType::EOF, last_idx + last_char.len_utf8())),
551551
None => Ok((0, TokType::EOF, 0)),
552552
}
553553
}
@@ -797,4 +797,13 @@ mod test {
797797
let expected = Tokens{source: text, tok_spans: vec![(0, LeftBrace, 1), (1, Name, 3), (3, Colon, 4), (4, Integer, 5), (5, Comma, 6), (6, Name, 8), (8, Colon, 9), (9, Integer, 10), (10, Comma, 11), (11, Name, 18), (18, Colon, 19), (19, Integer, 20), (20, RightBrace, 21), (21, EOF, 21)]};
798798
assert_eq!(toks, expected)
799799
}
800+
801+
#[test]
802+
fn test_eof_after_multibyte() {
803+
let text = r#"ë"#;
804+
let toks = tokenize_str(text).unwrap();
805+
let expected = Tokens{source: text, tok_spans: vec![(0, Name, 2), (2, EOF, 2)]};
806+
assert_eq!(toks, expected)
807+
808+
}
800809
}

0 commit comments

Comments
 (0)