Skip to content

Commit fb1594c

Browse files
committed
progress
1 parent 259d13b commit fb1594c

File tree

22 files changed

+891
-891
lines changed

22 files changed

+891
-891
lines changed

Cargo.lock

Lines changed: 23 additions & 23 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,17 +74,17 @@ pgt_fs = { path = "./crates/pgt_fs", version = "0.0.0" }
7474
pgt_lexer = { path = "./crates/pgt_lexer", version = "0.0.0" }
7575
pgt_lexer_codegen = { path = "./crates/pgt_lexer_codegen", version = "0.0.0" }
7676
pgt_lexer_new = { path = "./crates/pgt_lexer_new", version = "0.0.0" }
77+
pgt_lexer_new_codegen = { path = "./crates/pgt_lexer_new_codegen", version = "0.0.0" }
7778
pgt_lsp = { path = "./crates/pgt_lsp", version = "0.0.0" }
7879
pgt_markup = { path = "./crates/pgt_markup", version = "0.0.0" }
79-
pgt_parser = { path = "./crates/pgt_parser", version = "0.0.0" }
80-
pgt_parser_codegen = { path = "./crates/pgt_parser_codegen", version = "0.0.0" }
8180
pgt_query_ext = { path = "./crates/pgt_query_ext", version = "0.0.0" }
8281
pgt_query_ext_codegen = { path = "./crates/pgt_query_ext_codegen", version = "0.0.0" }
8382
pgt_query_proto_parser = { path = "./crates/pgt_query_proto_parser", version = "0.0.0" }
8483
pgt_schema_cache = { path = "./crates/pgt_schema_cache", version = "0.0.0" }
8584
pgt_statement_splitter = { path = "./crates/pgt_statement_splitter", version = "0.0.0" }
8685
pgt_text_edit = { path = "./crates/pgt_text_edit", version = "0.0.0" }
8786
pgt_text_size = { path = "./crates/pgt_text_size", version = "0.0.0" }
87+
pgt_tokenizer = { path = "./crates/pgt_tokenizer", version = "0.0.0" }
8888
pgt_treesitter_queries = { path = "./crates/pgt_treesitter_queries", version = "0.0.0" }
8989
pgt_typecheck = { path = "./crates/pgt_typecheck", version = "0.0.0" }
9090
pgt_workspace = { path = "./crates/pgt_workspace", version = "0.0.0" }

crates/pgt_lexer_new/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ version = "0.0.0"
1212

1313

1414
[dependencies]
15+
pgt_lexer_new_codegen.workspace = true
16+
pgt_tokenizer.workspace = true
1517

1618
[dev-dependencies]
1719
insta.workspace = true

crates/pgt_lexer_new/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
Heavily inspired by and copied from [squawk_lexer](https://github.com/sbdchd/squawk/tree/9acfecbbb7f3c7eedcbaf060e7b25f9afa136db3/crates/squawk_lexer). Thanks for making all the hard work MIT-licensed!
1+
Heavily inspired by and copied from [squawk_parser](https://github.com/sbdchd/squawk/tree/9acfecbbb7f3c7eedcbaf060e7b25f9afa136db3/crates/squawk_parser). Thanks for making all the hard work MIT-licensed!
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pgt_lexer_new_codegen::syntax_kind_codegen!();

crates/pgt_parser/src/lexed_str.rs renamed to crates/pgt_lexer_new/src/lexed_str.rs

Lines changed: 51 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
use std::ops;
44

5-
use pgt_lexer_new::tokenize;
5+
use pgt_tokenizer::tokenize;
66

77
use crate::SyntaxKind;
88

@@ -109,7 +109,7 @@ impl<'a> Converter<'a> {
109109
}
110110
}
111111

112-
fn extend_token(&mut self, kind: &pgt_lexer_new::TokenKind, token_text: &str) {
112+
fn extend_token(&mut self, kind: &pgt_tokenizer::TokenKind, token_text: &str) {
113113
// A note on an intended tradeoff:
114114
// We drop some useful information here (see patterns with double dots `..`)
115115
// Storing that info in `SyntaxKind` is not possible due to its layout requirements of
@@ -118,22 +118,22 @@ impl<'a> Converter<'a> {
118118

119119
let syntax_kind = {
120120
match kind {
121-
pgt_lexer_new::TokenKind::LineComment => SyntaxKind::COMMENT,
122-
pgt_lexer_new::TokenKind::BlockComment { terminated } => {
121+
pgt_tokenizer::TokenKind::LineComment => SyntaxKind::COMMENT,
122+
pgt_tokenizer::TokenKind::BlockComment { terminated } => {
123123
if !terminated {
124124
err = "Missing trailing `*/` symbols to terminate the block comment";
125125
}
126126
SyntaxKind::COMMENT
127127
}
128128

129129
// whitespace
130-
pgt_lexer_new::TokenKind::Space => SyntaxKind::SPACE,
131-
pgt_lexer_new::TokenKind::Tab => SyntaxKind::TAB,
132-
pgt_lexer_new::TokenKind::Newline => SyntaxKind::NEWLINE,
133-
pgt_lexer_new::TokenKind::CarriageReturn => SyntaxKind::CARRIAGE_RETURN,
134-
pgt_lexer_new::TokenKind::VerticalTab => SyntaxKind::VERTICAL_TAB,
135-
pgt_lexer_new::TokenKind::FormFeed => SyntaxKind::FORM_FEED,
136-
pgt_lexer_new::TokenKind::Ident => {
130+
pgt_tokenizer::TokenKind::Space => SyntaxKind::SPACE,
131+
pgt_tokenizer::TokenKind::Tab => SyntaxKind::TAB,
132+
pgt_tokenizer::TokenKind::Newline => SyntaxKind::NEWLINE,
133+
pgt_tokenizer::TokenKind::CarriageReturn => SyntaxKind::CARRIAGE_RETURN,
134+
pgt_tokenizer::TokenKind::VerticalTab => SyntaxKind::VERTICAL_TAB,
135+
pgt_tokenizer::TokenKind::FormFeed => SyntaxKind::FORM_FEED,
136+
pgt_tokenizer::TokenKind::Ident => {
137137
// TODO: check for max identifier length
138138
//
139139
// see: https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
@@ -146,43 +146,43 @@ impl<'a> Converter<'a> {
146146
// see: https://github.com/postgres/postgres/blob/e032e4c7ddd0e1f7865b246ec18944365d4f8614/src/include/pg_config_manual.h#L29
147147
SyntaxKind::from_keyword(token_text).unwrap_or(SyntaxKind::IDENT)
148148
}
149-
pgt_lexer_new::TokenKind::Literal { kind, .. } => {
149+
pgt_tokenizer::TokenKind::Literal { kind, .. } => {
150150
self.extend_literal(token_text.len(), kind);
151151
return;
152152
}
153-
pgt_lexer_new::TokenKind::Semi => SyntaxKind::SEMICOLON,
154-
pgt_lexer_new::TokenKind::Comma => SyntaxKind::COMMA,
155-
pgt_lexer_new::TokenKind::Dot => SyntaxKind::DOT,
156-
pgt_lexer_new::TokenKind::OpenParen => SyntaxKind::L_PAREN,
157-
pgt_lexer_new::TokenKind::CloseParen => SyntaxKind::R_PAREN,
158-
pgt_lexer_new::TokenKind::OpenBracket => SyntaxKind::L_BRACK,
159-
pgt_lexer_new::TokenKind::CloseBracket => SyntaxKind::R_BRACK,
160-
pgt_lexer_new::TokenKind::At => SyntaxKind::AT,
161-
pgt_lexer_new::TokenKind::Pound => SyntaxKind::POUND,
162-
pgt_lexer_new::TokenKind::Tilde => SyntaxKind::TILDE,
163-
pgt_lexer_new::TokenKind::Question => SyntaxKind::QUESTION,
164-
pgt_lexer_new::TokenKind::Colon => SyntaxKind::COLON,
165-
pgt_lexer_new::TokenKind::Eq => SyntaxKind::EQ,
166-
pgt_lexer_new::TokenKind::Bang => SyntaxKind::BANG,
167-
pgt_lexer_new::TokenKind::Lt => SyntaxKind::L_ANGLE,
168-
pgt_lexer_new::TokenKind::Gt => SyntaxKind::R_ANGLE,
169-
pgt_lexer_new::TokenKind::Minus => SyntaxKind::MINUS,
170-
pgt_lexer_new::TokenKind::And => SyntaxKind::AMP,
171-
pgt_lexer_new::TokenKind::Or => SyntaxKind::PIPE,
172-
pgt_lexer_new::TokenKind::Plus => SyntaxKind::PLUS,
173-
pgt_lexer_new::TokenKind::Star => SyntaxKind::STAR,
174-
pgt_lexer_new::TokenKind::Slash => SyntaxKind::SLASH,
175-
pgt_lexer_new::TokenKind::Caret => SyntaxKind::CARET,
176-
pgt_lexer_new::TokenKind::Percent => SyntaxKind::PERCENT,
177-
pgt_lexer_new::TokenKind::Unknown => SyntaxKind::ERROR,
178-
pgt_lexer_new::TokenKind::UnknownPrefix => {
153+
pgt_tokenizer::TokenKind::Semi => SyntaxKind::SEMICOLON,
154+
pgt_tokenizer::TokenKind::Comma => SyntaxKind::COMMA,
155+
pgt_tokenizer::TokenKind::Dot => SyntaxKind::DOT,
156+
pgt_tokenizer::TokenKind::OpenParen => SyntaxKind::L_PAREN,
157+
pgt_tokenizer::TokenKind::CloseParen => SyntaxKind::R_PAREN,
158+
pgt_tokenizer::TokenKind::OpenBracket => SyntaxKind::L_BRACK,
159+
pgt_tokenizer::TokenKind::CloseBracket => SyntaxKind::R_BRACK,
160+
pgt_tokenizer::TokenKind::At => SyntaxKind::AT,
161+
pgt_tokenizer::TokenKind::Pound => SyntaxKind::POUND,
162+
pgt_tokenizer::TokenKind::Tilde => SyntaxKind::TILDE,
163+
pgt_tokenizer::TokenKind::Question => SyntaxKind::QUESTION,
164+
pgt_tokenizer::TokenKind::Colon => SyntaxKind::COLON,
165+
pgt_tokenizer::TokenKind::Eq => SyntaxKind::EQ,
166+
pgt_tokenizer::TokenKind::Bang => SyntaxKind::BANG,
167+
pgt_tokenizer::TokenKind::Lt => SyntaxKind::L_ANGLE,
168+
pgt_tokenizer::TokenKind::Gt => SyntaxKind::R_ANGLE,
169+
pgt_tokenizer::TokenKind::Minus => SyntaxKind::MINUS,
170+
pgt_tokenizer::TokenKind::And => SyntaxKind::AMP,
171+
pgt_tokenizer::TokenKind::Or => SyntaxKind::PIPE,
172+
pgt_tokenizer::TokenKind::Plus => SyntaxKind::PLUS,
173+
pgt_tokenizer::TokenKind::Star => SyntaxKind::STAR,
174+
pgt_tokenizer::TokenKind::Slash => SyntaxKind::SLASH,
175+
pgt_tokenizer::TokenKind::Caret => SyntaxKind::CARET,
176+
pgt_tokenizer::TokenKind::Percent => SyntaxKind::PERCENT,
177+
pgt_tokenizer::TokenKind::Unknown => SyntaxKind::ERROR,
178+
pgt_tokenizer::TokenKind::UnknownPrefix => {
179179
err = "unknown literal prefix";
180180
SyntaxKind::IDENT
181181
}
182-
pgt_lexer_new::TokenKind::Eof => SyntaxKind::EOF,
183-
pgt_lexer_new::TokenKind::Backtick => SyntaxKind::BACKTICK,
184-
pgt_lexer_new::TokenKind::PositionalParam => SyntaxKind::POSITIONAL_PARAM,
185-
pgt_lexer_new::TokenKind::QuotedIdent { terminated } => {
182+
pgt_tokenizer::TokenKind::Eof => SyntaxKind::EOF,
183+
pgt_tokenizer::TokenKind::Backtick => SyntaxKind::BACKTICK,
184+
pgt_tokenizer::TokenKind::PositionalParam => SyntaxKind::POSITIONAL_PARAM,
185+
pgt_tokenizer::TokenKind::QuotedIdent { terminated } => {
186186
if !terminated {
187187
err = "Missing trailing \" to terminate the quoted identifier"
188188
}
@@ -195,17 +195,17 @@ impl<'a> Converter<'a> {
195195
self.push(syntax_kind, token_text.len(), err);
196196
}
197197

198-
fn extend_literal(&mut self, len: usize, kind: &pgt_lexer_new::LiteralKind) {
198+
fn extend_literal(&mut self, len: usize, kind: &pgt_tokenizer::LiteralKind) {
199199
let mut err = "";
200200

201201
let syntax_kind = match *kind {
202-
pgt_lexer_new::LiteralKind::Int { empty_int, base: _ } => {
202+
pgt_tokenizer::LiteralKind::Int { empty_int, base: _ } => {
203203
if empty_int {
204204
err = "Missing digits after the integer base prefix";
205205
}
206206
SyntaxKind::INT_NUMBER
207207
}
208-
pgt_lexer_new::LiteralKind::Float {
208+
pgt_tokenizer::LiteralKind::Float {
209209
empty_exponent,
210210
base: _,
211211
} => {
@@ -214,43 +214,43 @@ impl<'a> Converter<'a> {
214214
}
215215
SyntaxKind::FLOAT_NUMBER
216216
}
217-
pgt_lexer_new::LiteralKind::Str { terminated } => {
217+
pgt_tokenizer::LiteralKind::Str { terminated } => {
218218
if !terminated {
219219
err = "Missing trailing `'` symbol to terminate the string literal";
220220
}
221221
// TODO: rust analzyer checks for un-escaped strings, we should too
222222
SyntaxKind::STRING
223223
}
224-
pgt_lexer_new::LiteralKind::ByteStr { terminated } => {
224+
pgt_tokenizer::LiteralKind::ByteStr { terminated } => {
225225
if !terminated {
226226
err = "Missing trailing `'` symbol to terminate the hex bit string literal";
227227
}
228228
// TODO: rust analzyer checks for un-escaped strings, we should too
229229
SyntaxKind::BYTE_STRING
230230
}
231-
pgt_lexer_new::LiteralKind::BitStr { terminated } => {
231+
pgt_tokenizer::LiteralKind::BitStr { terminated } => {
232232
if !terminated {
233233
err = "Missing trailing `\'` symbol to terminate the bit string literal";
234234
}
235235
// TODO: rust analzyer checks for un-escaped strings, we should too
236236
SyntaxKind::BIT_STRING
237237
}
238-
pgt_lexer_new::LiteralKind::DollarQuotedString { terminated } => {
238+
pgt_tokenizer::LiteralKind::DollarQuotedString { terminated } => {
239239
if !terminated {
240240
// TODO: we could be fancier and say the ending string we're looking for
241241
err = "Unterminated dollar quoted string literal";
242242
}
243243
// TODO: rust analzyer checks for un-escaped strings, we should too
244244
SyntaxKind::DOLLAR_QUOTED_STRING
245245
}
246-
pgt_lexer_new::LiteralKind::UnicodeEscStr { terminated } => {
246+
pgt_tokenizer::LiteralKind::UnicodeEscStr { terminated } => {
247247
if !terminated {
248248
err = "Missing trailing `'` symbol to terminate the unicode escape string literal";
249249
}
250250
// TODO: rust analzyer checks for un-escaped strings, we should too
251251
SyntaxKind::BYTE_STRING
252252
}
253-
pgt_lexer_new::LiteralKind::EscStr { terminated } => {
253+
pgt_tokenizer::LiteralKind::EscStr { terminated } => {
254254
if !terminated {
255255
err = "Missing trailing `\'` symbol to terminate the escape string literal";
256256
}

0 commit comments

Comments
 (0)