Skip to content

Commit 7b49632

Browse files
committed
progress
1 parent 8c0678a commit 7b49632

File tree

3 files changed

+39
-12
lines changed

3 files changed

+39
-12
lines changed

crates/pgt_lexer_new/src/lexed.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ pub struct Lexed<'a> {
2828
pub(crate) kind: Vec<SyntaxKind>,
2929
pub(crate) start: Vec<u32>,
3030
pub(crate) error: Vec<LexError>,
31+
pub(crate) line_ending_counts: Vec<usize>,
3132
}
3233

3334
impl<'a> Lexed<'a> {
@@ -52,6 +53,13 @@ impl<'a> Lexed<'a> {
5253
self.kind[idx]
5354
}
5455

56+
/// Returns the number of line endings in the token at the given index
57+
pub fn line_ending_count(&self, idx: usize) -> usize {
58+
assert!(idx < self.len());
59+
assert!(self.kind(idx) == SyntaxKind::LINE_ENDING);
60+
self.line_ending_counts[idx]
61+
}
62+
5563
/// Returns the text range of token at the given index
5664
pub fn range(&self, idx: usize) -> TextRange {
5765
let range = self.text_range(idx);

crates/pgt_lexer_new/src/lexer.rs

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ pub struct Lexer<'a> {
1010
start: Vec<u32>,
1111
error: Vec<LexError>,
1212
offset: usize,
13+
/// we store line ending counts outside of SyntaxKind because of the u16 represenation of SyntaxKind
14+
line_ending_counts: Vec<usize>,
1315
}
1416

1517
impl<'a> Lexer<'a> {
@@ -21,6 +23,7 @@ impl<'a> Lexer<'a> {
2123
start: Vec::new(),
2224
error: Vec::new(),
2325
offset: 0,
26+
line_ending_counts: Vec::new(),
2427
}
2528
}
2629

@@ -32,21 +35,35 @@ impl<'a> Lexer<'a> {
3235
}
3336

3437
// Add EOF token
35-
self.push(SyntaxKind::EOF, 0, None);
38+
self.push(SyntaxKind::EOF, 0, None, None);
3639

3740
Lexed {
3841
text: self.text,
3942
kind: self.kind,
4043
start: self.start,
4144
error: self.error,
45+
line_ending_counts: self.line_ending_counts,
4246
}
4347
}
4448

45-
fn push(&mut self, kind: SyntaxKind, len: usize, err: Option<&str>) {
49+
fn push(
50+
&mut self,
51+
kind: SyntaxKind,
52+
len: usize,
53+
err: Option<&str>,
54+
line_ending_count: Option<usize>,
55+
) {
4656
self.kind.push(kind);
4757
self.start.push(self.offset as u32);
4858
self.offset += len;
4959

60+
assert!(
61+
kind != SyntaxKind::LINE_ENDING || line_ending_count.is_some(),
62+
"Line ending token must have a line ending count"
63+
);
64+
65+
self.line_ending_counts.push(line_ending_count.unwrap_or(0));
66+
5067
if let Some(err) = err {
5168
let token = (self.kind.len() - 1) as u32;
5269
let msg = err.to_owned();
@@ -56,6 +73,7 @@ impl<'a> Lexer<'a> {
5673

5774
fn extend_token(&mut self, kind: &pgt_tokenizer::TokenKind, token_text: &str) {
5875
let mut err = "";
76+
let mut line_ending_count = None;
5977

6078
let syntax_kind = {
6179
match kind {
@@ -68,8 +86,10 @@ impl<'a> Lexer<'a> {
6886
}
6987
pgt_tokenizer::TokenKind::Space => SyntaxKind::SPACE,
7088
pgt_tokenizer::TokenKind::Tab => SyntaxKind::TAB,
71-
pgt_tokenizer::TokenKind::Newline => SyntaxKind::NEWLINE,
72-
pgt_tokenizer::TokenKind::CarriageReturn => SyntaxKind::CARRIAGE_RETURN,
89+
pgt_tokenizer::TokenKind::LineEnding { count } => {
90+
line_ending_count = Some(*count);
91+
SyntaxKind::LINE_ENDING
92+
}
7393
pgt_tokenizer::TokenKind::VerticalTab => SyntaxKind::VERTICAL_TAB,
7494
pgt_tokenizer::TokenKind::FormFeed => SyntaxKind::FORM_FEED,
7595
pgt_tokenizer::TokenKind::Ident => {
@@ -121,7 +141,7 @@ impl<'a> Lexer<'a> {
121141
};
122142

123143
let err = if err.is_empty() { None } else { Some(err) };
124-
self.push(syntax_kind, token_text.len(), err);
144+
self.push(syntax_kind, token_text.len(), err, line_ending_count);
125145
}
126146

127147
fn extend_literal(&mut self, len: usize, kind: &pgt_tokenizer::LiteralKind) {
@@ -182,6 +202,6 @@ impl<'a> Lexer<'a> {
182202
};
183203

184204
let err = if err.is_empty() { None } else { Some(err) };
185-
self.push(syntax_kind, len, err);
205+
self.push(syntax_kind, len, err, None);
186206
}
187207
}

crates/pgt_lexer_new_codegen/src/syntax_kind.rs

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,11 @@ use quote::{format_ident, quote};
55
use crate::keywords::{KeywordKinds, keyword_kinds};
66

77
const WHITESPACE: &[&str] = &[
8-
"SPACE", // " "
9-
"TAB", // "\t"
10-
"NEWLINE", // "\n"
11-
"CARRIAGE_RETURN", // "\r"
12-
"VERTICAL_TAB", // "\x0B"
13-
"FORM_FEED", // "\x0C"
8+
"SPACE", // " "
9+
"TAB", // "\t"
10+
"VERTICAL_TAB", // "\x0B"
11+
"FORM_FEED", // "\x0C"
12+
"LINE_ENDING", // "\n" or "\r" in any combination
1413
];
1514

1615
const PUNCT: &[(&str, &str)] = &[

0 commit comments

Comments
 (0)