Skip to content

Commit d33d56a

Browse files
committed
progress
1 parent 7b49632 commit d33d56a

File tree

21 files changed

+613
-827
lines changed

21 files changed

+613
-827
lines changed

.claude/settings.local.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
"Bash(grep:*)",
55
"Bash(rg:*)",
66
"Bash(cargo test:*)",
7-
"Bash(cargo run:*)"
7+
"Bash(cargo run:*)",
8+
"Bash(cargo check:*)"
89
],
910
"deny": []
1011
}

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/pgt_lexer_new/src/lexed.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@ pub struct Lexed<'a> {
3131
pub(crate) line_ending_counts: Vec<usize>,
3232
}
3333

34-
impl<'a> Lexed<'a> {
35-
/// Returns the number of tokens (excluding EOF)
34+
impl Lexed<'_> {
35+
/// Returns the number of tokens
3636
pub fn len(&self) -> usize {
37-
self.kind.len() - 1
37+
self.kind.len()
3838
}
3939

4040
/// Returns true if there are no tokens

crates/pgt_lexer_new/src/lexer.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ impl<'a> Lexer<'a> {
124124
pgt_tokenizer::TokenKind::Caret => SyntaxKind::CARET,
125125
pgt_tokenizer::TokenKind::Percent => SyntaxKind::PERCENT,
126126
pgt_tokenizer::TokenKind::Unknown => SyntaxKind::ERROR,
127+
pgt_tokenizer::TokenKind::Backslash => SyntaxKind::BACKSLASH,
127128
pgt_tokenizer::TokenKind::UnknownPrefix => {
128129
err = "unknown literal prefix";
129130
SyntaxKind::IDENT
Lines changed: 7 additions & 167 deletions
Original file line numberDiff line numberDiff line change
@@ -1,88 +1,13 @@
11
// from https://github.com/sbdchd/squawk/blob/ac9f90c3b2be8d2c46fd5454eb48975afd268dbe/crates/xtask/src/keywords.rs
22
use anyhow::{Context, Ok, Result};
3-
use enum_iterator::{Sequence, all};
4-
use std::{
5-
collections::{HashMap, HashSet},
6-
path,
7-
};
3+
use std::path;
84

9-
struct KeywordMeta {
10-
pub(crate) category: KeywordCategory,
11-
pub(crate) label: KeywordLabel,
12-
}
13-
14-
enum KeywordLabel {
15-
As,
16-
Bare,
17-
}
18-
19-
/// related:
20-
/// - [postgres/src/backend/utils/adt/misc.c](https://github.com/postgres/postgres/blob/08691ea958c2646b6aadefff878539eb0b860bb0/src/backend/utils/adt/misc.c#L452-L467/)
21-
/// - [postgres docs: sql keywords appendix](https://www.postgresql.org/docs/17/sql-keywords-appendix.html)
22-
///
23-
/// The header file isn't enough though because `json_scalar` can be a function
24-
/// name, but `between` cannot be
25-
///
26-
/// The Postgres parser special cases certain calls like `json_scalar`:
27-
/// <https://github.com/postgres/postgres/blob/028b4b21df26fee67b3ce75c6f14fcfd3c7cf2ee/src/backend/parser/gram.y#L15684C8-L16145>
28-
///
29-
/// | Category | Column | Table | Function | Type |
30-
/// |--------------|--------|-------|----------|------|
31-
/// | Unreserved | Y | Y | Y | Y |
32-
/// | Reserved | N | N | N | N |
33-
/// | ColName | Y | Y | N | Y |
34-
/// | TypeFuncName | N | N | Y | Y |
35-
///
36-
#[derive(Clone, Copy)]
37-
enum KeywordCategory {
38-
Unreserved,
39-
Reserved,
40-
ColName,
41-
TypeFuncName,
42-
}
43-
44-
#[derive(Sequence, PartialEq)]
45-
enum KWType {
46-
ColumnTable,
47-
Type,
48-
}
49-
50-
impl std::fmt::Display for KWType {
51-
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
52-
f.write_str(match self {
53-
KWType::ColumnTable => "COLUMN_OR_TABLE_KEYWORDS",
54-
KWType::Type => "TYPE_KEYWORDS",
55-
})
56-
}
57-
}
58-
59-
fn keyword_allowed(cat: KeywordCategory, kw_type: KWType) -> bool {
60-
match cat {
61-
KeywordCategory::Unreserved => match kw_type {
62-
KWType::ColumnTable => true,
63-
KWType::Type => true,
64-
},
65-
KeywordCategory::Reserved => match kw_type {
66-
KWType::ColumnTable => false,
67-
KWType::Type => false,
68-
},
69-
KeywordCategory::ColName => match kw_type {
70-
KWType::ColumnTable => true,
71-
KWType::Type => true,
72-
},
73-
KeywordCategory::TypeFuncName => match kw_type {
74-
KWType::ColumnTable => false,
75-
KWType::Type => true,
76-
},
77-
}
78-
}
79-
80-
fn parse_header() -> Result<HashMap<String, KeywordMeta>> {
5+
fn parse_header() -> Result<Vec<String>> {
816
// use the environment variable set by the build script to locate the kwlist.h file
827
let kwlist_file = path::PathBuf::from(env!("PG_QUERY_KWLIST_PATH"));
838
let data = std::fs::read_to_string(kwlist_file).context("Failed to read kwlist.h")?;
849

85-
let mut keywords = HashMap::new();
10+
let mut keywords = Vec::new();
8611

8712
for line in data.lines() {
8813
if line.starts_with("PG_KEYWORD") {
@@ -94,24 +19,9 @@ fn parse_header() -> Result<HashMap<String, KeywordMeta>> {
9419
let row_items: Vec<&str> = line.split(',').collect();
9520

9621
match row_items[..] {
97-
[name, _value, category, is_bare_label] => {
98-
let label = match is_bare_label.trim() {
99-
"AS_LABEL" => KeywordLabel::As,
100-
"BARE_LABEL" => KeywordLabel::Bare,
101-
unexpected => anyhow::bail!("Unexpected label: {}", unexpected),
102-
};
103-
104-
let category = match category.trim() {
105-
"UNRESERVED_KEYWORD" => KeywordCategory::Unreserved,
106-
"RESERVED_KEYWORD" => KeywordCategory::Reserved,
107-
"COL_NAME_KEYWORD" => KeywordCategory::ColName,
108-
"TYPE_FUNC_NAME_KEYWORD" => KeywordCategory::TypeFuncName,
109-
unexpected => anyhow::bail!("Unexpected category: {}", unexpected),
110-
};
111-
112-
let meta = KeywordMeta { category, label };
22+
[name, _value, _category, _is_bare_label] => {
11323
let name = name.trim().replace('\"', "");
114-
keywords.insert(name, meta);
24+
keywords.push(name);
11525
}
11626
_ => anyhow::bail!("Problem reading kwlist.h row"),
11727
}
@@ -123,81 +33,11 @@ fn parse_header() -> Result<HashMap<String, KeywordMeta>> {
12333

12434
pub(crate) struct KeywordKinds {
12535
pub(crate) all_keywords: Vec<String>,
126-
pub(crate) bare_label_keywords: Vec<String>,
127-
pub(crate) unreserved_keywords: Vec<String>,
128-
pub(crate) reserved_keywords: Vec<String>,
129-
pub(crate) col_table_keywords: Vec<String>,
130-
pub(crate) type_keywords: Vec<String>,
13136
}
13237

13338
pub(crate) fn keyword_kinds() -> Result<KeywordKinds> {
134-
let keywords = parse_header()?;
135-
let mut bare_label_keywords = keywords
136-
.iter()
137-
.filter(|(_key, value)| match value.label {
138-
KeywordLabel::As => false,
139-
KeywordLabel::Bare => true,
140-
})
141-
.map(|(key, _value)| key.to_owned())
142-
.collect::<Vec<String>>();
143-
bare_label_keywords.sort();
144-
145-
let mut unreserved_keywords = keywords
146-
.iter()
147-
.filter(|(_key, value)| matches!(value.category, KeywordCategory::Unreserved))
148-
.map(|(key, _value)| key.to_owned())
149-
.collect::<Vec<String>>();
150-
unreserved_keywords.sort();
151-
152-
let mut reserved_keywords = keywords
153-
.iter()
154-
.filter(|(_key, value)| matches!(value.category, KeywordCategory::Reserved))
155-
.map(|(key, _value)| key.to_owned())
156-
.collect::<Vec<String>>();
157-
reserved_keywords.sort();
158-
159-
let mut all_keywords = keywords
160-
.keys()
161-
.map(|key| key.to_owned())
162-
.collect::<Vec<String>>();
39+
let mut all_keywords = parse_header()?;
16340
all_keywords.sort();
16441

165-
let mut col_table_tokens = HashSet::new();
166-
let mut type_tokens = HashSet::new();
167-
for (key, meta) in &keywords {
168-
for variant in all::<KWType>() {
169-
match variant {
170-
KWType::ColumnTable => {
171-
if keyword_allowed(meta.category, variant) {
172-
col_table_tokens.insert(key);
173-
}
174-
}
175-
KWType::Type => {
176-
if keyword_allowed(meta.category, variant) {
177-
type_tokens.insert(key);
178-
}
179-
}
180-
}
181-
}
182-
}
183-
184-
let mut col_table_keywords = col_table_tokens
185-
.iter()
186-
.map(|x| x.to_string())
187-
.collect::<Vec<String>>();
188-
col_table_keywords.sort();
189-
let mut type_keywords = type_tokens
190-
.iter()
191-
.map(|x| x.to_string())
192-
.collect::<Vec<String>>();
193-
type_keywords.sort();
194-
195-
Ok(KeywordKinds {
196-
all_keywords,
197-
bare_label_keywords,
198-
unreserved_keywords,
199-
reserved_keywords,
200-
col_table_keywords,
201-
type_keywords,
202-
})
42+
Ok(KeywordKinds { all_keywords })
20343
}

crates/pgt_lexer_new_codegen/src/syntax_kind.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ const PUNCT: &[(&str, &str)] = &[
3131
("+", "PLUS"),
3232
("*", "STAR"),
3333
("/", "SLASH"),
34+
("\\", "BACKSLASH"),
3435
("^", "CARET"),
3536
("%", "PERCENT"),
3637
("_", "UNDERSCORE"),

crates/pgt_statement_splitter/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ version = "0.0.0"
1414
[dependencies]
1515
pgt_diagnostics = { workspace = true }
1616
pgt_lexer.workspace = true
17+
pgt_lexer_new.workspace = true
1718
pgt_query_ext.workspace = true
1819
pgt_text_size.workspace = true
1920
regex.workspace = true

crates/pgt_statement_splitter/src/diagnostics.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
use pgt_diagnostics::{Diagnostic, MessageAndDescription};
2+
use pgt_lexer_new::{LexDiagnostic, Lexed};
23
use pgt_text_size::TextRange;
34

5+
use crate::splitter::SplitError;
6+
47
/// A specialized diagnostic for the statement splitter parser.
58
///
69
/// Parser diagnostics are always **errors**.
@@ -23,3 +26,22 @@ impl SplitDiagnostic {
2326
}
2427
}
2528
}
29+
30+
impl From<LexDiagnostic> for SplitDiagnostic {
31+
fn from(lex_diagnostic: LexDiagnostic) -> Self {
32+
Self {
33+
span: Some(lex_diagnostic.span),
34+
message: lex_diagnostic.message,
35+
}
36+
}
37+
}
38+
39+
impl SplitDiagnostic {
40+
pub fn from_split_error(split_error: SplitError, lexed: &Lexed) -> Self {
41+
let range = lexed.range(split_error.token);
42+
Self {
43+
span: Some(range),
44+
message: MessageAndDescription::from(split_error.msg),
45+
}
46+
}
47+
}

0 commit comments

Comments
 (0)