diff --git a/.claude/settings.local.json b/.claude/settings.local.json
new file mode 100644
index 00000000..85429d0c
--- /dev/null
+++ b/.claude/settings.local.json
@@ -0,0 +1,13 @@
+{
+  "permissions": {
+    "allow": [
+      "Bash(grep:*)",
+      "Bash(rg:*)",
+      "Bash(cargo test:*)",
+      "Bash(cargo run:*)",
+      "Bash(cargo check:*)",
+      "Bash(cargo fmt:*)"
+    ],
+    "deny": []
+  }
+}
\ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
index 41f807d1..074ed19b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -334,6 +334,12 @@ version = "1.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
 
+[[package]]
+name = "beef"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
+
 [[package]]
 name = "bindgen"
 version = "0.66.1"
@@ -747,9 +753,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
 [[package]]
 name = "cc"
-version = "1.2.3"
+version = "1.2.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "27f657647bcff5394bf56c7317665bbf790a137a50eaaa5c6bfbb9e27a518f2d"
+checksum = "d487aa071b5f64da6f19a3e848e3578944b726ee5a4854b82172f02aa876bfdc"
 dependencies = [
  "shlex",
 ]
@@ -1363,6 +1369,12 @@ dependencies = [
  "spin",
 ]
 
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
 [[package]]
 name = "form_urlencoded"
 version = "1.2.1"
@@ -2113,6 +2125,40 @@ dependencies = [
  "value-bag",
 ]
 
+[[package]]
+name = "logos"
+version = "0.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab6f536c1af4c7cc81edf73da1f8029896e7e1e16a219ef09b184e76a296f3db"
+dependencies = [
+ "logos-derive",
+]
+
+[[package]]
+name = "logos-codegen"
+version = "0.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "189bbfd0b61330abea797e5e9276408f2edbe4f822d7ad08685d67419aafb34e"
+dependencies = [
+ "beef",
+ "fnv",
+ "lazy_static",
+ "proc-macro2",
+ "quote",
+ "regex-syntax 0.8.5",
+ "rustc_version",
+ "syn 2.0.90",
+]
+
+[[package]]
+name = "logos-derive"
+version = "0.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebfe8e1a19049ddbfccbd14ac834b215e11b85b90bab0c2dba7c7b92fb5d5cba"
+dependencies = [
+ "logos-codegen",
+]
+
 [[package]]
 name = "lsp-types"
 version = "0.94.1"
@@ -2160,6 +2206,28 @@ dependencies = [
  "autocfg",
 ]
 
+[[package]]
+name = "miette"
+version = "7.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f98efec8807c63c752b5bd61f862c165c115b0a35685bdcfd9238c7aeb592b7"
+dependencies = [
+ "cfg-if",
+ "miette-derive",
+ "unicode-width",
+]
+
+[[package]]
+name = "miette-derive"
+version = "7.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db5b29714e950dbb20d5e6f74f9dcec4edbcc1067bb7f8ed198c097b8c1a818b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.90",
+]
+
 [[package]]
 name = "mimalloc"
 version = "0.1.43"
@@ -2694,20 +2762,23 @@ name = "pgt_lexer"
 version = "0.0.0"
 dependencies = [
  "insta",
- "pg_query",
  "pgt_diagnostics",
  "pgt_lexer_codegen",
  "pgt_text_size",
- "regex",
+ "pgt_tokenizer",
 ]
 
 [[package]]
 name = "pgt_lexer_codegen"
 version = "0.0.0"
 dependencies = [
- "pgt_query_proto_parser",
+ "anyhow",
+ "convert_case",
  "proc-macro2",
+ "prost-reflect",
+ "protox",
  "quote",
+ "ureq",
 ]
 
 [[package]]
@@ -2755,20 +2826,9 @@ dependencies = [
  "petgraph",
  "pg_query",
  "pgt_diagnostics",
- "pgt_lexer",
- "pgt_query_ext_codegen",
  "pgt_text_size",
 ]
 
-[[package]]
-name = "pgt_query_ext_codegen"
-version = "0.0.0"
-dependencies = [
- "pgt_query_proto_parser",
- "proc-macro2",
- "quote",
-]
-
 [[package]]
 name = "pgt_query_proto_parser"
 version = "0.0.0"
@@ -2851,6 +2911,13 @@ dependencies = [
  "static_assertions",
 ]
 
+[[package]]
+name = "pgt_tokenizer"
+version = "0.0.0"
+dependencies = [
+ "insta",
+]
+
 [[package]]
 name = "pgt_treesitter_queries"
 version = "0.0.0"
@@ -3194,6 +3261,18 @@ dependencies = [
  "syn 2.0.90",
 ]
 
+[[package]]
+name = "prost-reflect"
+version = "0.15.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37587d5a8a1b3dc9863403d084fc2254b91ab75a702207098837950767e2260b"
+dependencies = [
+ "logos",
+ "miette",
+ "prost",
+ "prost-types",
+]
+
 [[package]]
 name = "prost-types"
 version = "0.13.5"
@@ -3239,6 +3318,33 @@ dependencies = [
  "thiserror 1.0.69",
 ]
 
+[[package]]
+name = "protox"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "424c2bd294b69c49b949f3619362bc3c5d28298cd1163b6d1a62df37c16461aa"
+dependencies = [
+ "bytes",
+ "miette",
+ "prost",
+ "prost-reflect",
+ "prost-types",
+ "protox-parse",
+ "thiserror 2.0.6",
+]
+
+[[package]]
+name = "protox-parse"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57927f9dbeeffcce7192404deee6157a640cbb3fe8ac11eabbe571565949ab75"
+dependencies = [
+ "logos",
+ "miette",
+ "prost-types",
+ "thiserror 2.0.6",
+]
+
 [[package]]
 name = "pulldown-cmark"
 version = "0.12.2"
@@ -3405,6 +3511,20 @@ version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
 
+[[package]]
+name = "ring"
+version = "0.17.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
+dependencies = [
+ "cc",
+ "cfg-if",
+ "getrandom",
+ "libc",
+ "untrusted",
+ "windows-sys 0.52.0",
+]
+
 [[package]]
 name = "rsa"
 version = "0.9.7"
@@ -3458,6 +3578,15 @@ version = "2.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497"
 
+[[package]]
+name = "rustc_version"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
+dependencies = [
+ "semver",
+]
+
 [[package]]
 name = "rustix"
 version = "0.37.28"
@@ -3485,6 +3614,41 @@ dependencies = [
  "windows-sys 0.59.0",
 ]
 
+[[package]]
+name = "rustls"
+version = "0.23.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7160e3e10bf4535308537f3c4e1641468cd0e485175d6163087c0393c7d46643"
+dependencies = [
+ "log",
+ "once_cell",
+ "ring",
+ "rustls-pki-types",
+ "rustls-webpki",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "rustls-pki-types"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79"
+dependencies = [
+ "zeroize",
+]
+
+[[package]]
+name = "rustls-webpki"
+version = "0.103.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e4a72fe2bcf7a6ac6fd7d0b9e5cb68aeb7d4c0a0271730218b3e92d43b4eb435"
+dependencies = [
+ "ring",
+ "rustls-pki-types",
+ "untrusted",
+]
+
 [[package]]
 name = "rustversion"
 version = "1.0.20"
@@ -3539,6 +3703,12 @@ version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
 
+[[package]]
+name = "semver"
+version = "1.0.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"
+
 [[package]]
 name = "serde"
 version = "1.0.215"
@@ -4647,6 +4817,28 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
 
+[[package]]
+name = "untrusted"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
+
+[[package]]
+name = "ureq"
+version = "2.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d"
+dependencies = [
+ "base64",
+ "flate2",
+ "log",
+ "once_cell",
+ "rustls",
+ "rustls-pki-types",
+ "url",
+ "webpki-roots 0.26.11",
+]
+
 [[package]]
 name = "url"
 version = "2.5.4"
@@ -4844,6 +5036,24 @@ dependencies = [
  "wasm-bindgen",
 ]
 
+[[package]]
+name = "webpki-roots"
+version = "0.26.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9"
+dependencies = [
+ "webpki-roots 1.0.1",
+]
+
+[[package]]
+name = "webpki-roots"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8782dd5a41a24eed3a4f40b606249b3e236ca61adf1f25ea4d45c73de122b502"
+dependencies = [
+ "rustls-pki-types",
+]
+
 [[package]]
 name = "which"
 version = "4.4.2"
diff --git a/Cargo.toml b/Cargo.toml
index fe00d7ca..b5d6dd01 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -43,6 +43,9 @@ slotmap                  = "1.0.7"
 smallvec                 = { version = "1.13.2", features = ["union", "const_new", "serde"] }
 strum                    = { version = "0.27.1", features = ["derive"] }
 # this will use tokio if available, otherwise async-std
+convert_case             = "0.6.0"
+prost-reflect            = "0.15.3"
+protox                   = "0.8.0"
 sqlx                     = { version = "0.8.2", features = ["runtime-tokio", "runtime-async-std", "postgres", "json"] }
 syn                      = "1.0.109"
 termcolor                = "1.4.1"
@@ -72,12 +75,12 @@ pgt_lexer_codegen          = { path = "./crates/pgt_lexer_codegen", version = "0
 pgt_lsp                    = { path = "./crates/pgt_lsp", version = "0.0.0" }
 pgt_markup                 = { path = "./crates/pgt_markup", version = "0.0.0" }
 pgt_query_ext              = { path = "./crates/pgt_query_ext", version = "0.0.0" }
-pgt_query_ext_codegen      = { path = "./crates/pgt_query_ext_codegen", version = "0.0.0" }
 pgt_query_proto_parser     = { path = "./crates/pgt_query_proto_parser", version = "0.0.0" }
 pgt_schema_cache           = { path = "./crates/pgt_schema_cache", version = "0.0.0" }
 pgt_statement_splitter     = { path = "./crates/pgt_statement_splitter", version = "0.0.0" }
 pgt_text_edit              = { path = "./crates/pgt_text_edit", version = "0.0.0" }
 pgt_text_size              = { path = "./crates/pgt_text_size", version = "0.0.0" }
+pgt_tokenizer              = { path = "./crates/pgt_tokenizer", version = "0.0.0" }
 pgt_treesitter_queries     = { path = "./crates/pgt_treesitter_queries", version = "0.0.0" }
 pgt_typecheck              = { path = "./crates/pgt_typecheck", version = "0.0.0" }
 pgt_workspace              = { path = "./crates/pgt_workspace", version = "0.0.0" }
diff --git a/crates/pgt_diagnostics/src/display/message.rs b/crates/pgt_diagnostics/src/display/message.rs
index 3cf9be3f..20c039a9 100644
--- a/crates/pgt_diagnostics/src/display/message.rs
+++ b/crates/pgt_diagnostics/src/display/message.rs
@@ -47,6 +47,15 @@ impl From<String> for MessageAndDescription {
     }
 }
 
+impl From<&str> for MessageAndDescription {
+    fn from(description: &str) -> Self {
+        Self {
+            message: markup! { {description} }.to_owned(),
+            description: description.into(),
+        }
+    }
+}
+
 impl From<MarkupBuf> for MessageAndDescription {
     fn from(message: MarkupBuf) -> Self {
         let description = markup_to_string(&message);
diff --git a/crates/pgt_lexer/Cargo.toml b/crates/pgt_lexer/Cargo.toml
index 4b218588..7f4ada43 100644
--- a/crates/pgt_lexer/Cargo.toml
+++ b/crates/pgt_lexer/Cargo.toml
@@ -12,16 +12,12 @@ version              = "0.0.0"
 
 
 [dependencies]
-regex = "1.9.1"
-
-pg_query.workspace          = true
 pgt_diagnostics.workspace   = true
 pgt_lexer_codegen.workspace = true
-
-pgt_text_size.workspace = true
+pgt_text_size.workspace     = true
+pgt_tokenizer.workspace     = true
 
 [dev-dependencies]
 insta.workspace = true
 
 [lib]
-doctest = false
diff --git a/crates/pgt_lexer/README.md b/crates/pgt_lexer/README.md
index ec61c7b2..57bdaa34 100644
--- a/crates/pgt_lexer/README.md
+++ b/crates/pgt_lexer/README.md
@@ -1,8 +1 @@
-# pgt_lexer
-
-The `pgt_lexer` crate exposes the `lex` method, which turns an SQL query text into a `Vec<Token>>`: the base for the `pg_parser` and most of pgtools's operations.
-
-A token is always of a certain `SyntaxKind` kind. That `SyntaxKind` enum is derived from `libpg_query`'s protobuf file.
-
-The SQL query text is mostly lexed using the `pg_query::scan` method (`pg_query` is just a Rust wrapper around `libpg_query`).
-However, that method does not parse required whitespace tokens, so the `lex` method takes care of parsing those and merging them into the result.
+Heavily inspired by and copied from [squawk_parser](https://github.com/sbdchd/squawk/tree/9acfecbbb7f3c7eedcbaf060e7b25f9afa136db3/crates/squawk_parser). Thanks for making all the hard work MIT-licensed!
diff --git a/crates/pgt_lexer/src/codegen.rs b/crates/pgt_lexer/src/codegen.rs
deleted file mode 100644
index 6c750590..00000000
--- a/crates/pgt_lexer/src/codegen.rs
+++ /dev/null
@@ -1,3 +0,0 @@
-use pgt_lexer_codegen::lexer_codegen;
-
-lexer_codegen!();
diff --git a/crates/pgt_lexer/src/codegen/mod.rs b/crates/pgt_lexer/src/codegen/mod.rs
new file mode 100644
index 00000000..c4e67bc5
--- /dev/null
+++ b/crates/pgt_lexer/src/codegen/mod.rs
@@ -0,0 +1 @@
+pub mod syntax_kind;
diff --git a/crates/pgt_lexer/src/codegen/syntax_kind.rs b/crates/pgt_lexer/src/codegen/syntax_kind.rs
new file mode 100644
index 00000000..f50398ec
--- /dev/null
+++ b/crates/pgt_lexer/src/codegen/syntax_kind.rs
@@ -0,0 +1 @@
+pgt_lexer_codegen::syntax_kind_codegen!();
diff --git a/crates/pgt_lexer/src/diagnostics.rs b/crates/pgt_lexer/src/diagnostics.rs
deleted file mode 100644
index 9516387a..00000000
--- a/crates/pgt_lexer/src/diagnostics.rs
+++ /dev/null
@@ -1,67 +0,0 @@
-use pgt_diagnostics::{Diagnostic, MessageAndDescription};
-use pgt_text_size::TextRange;
-
-/// A specialized diagnostic for scan errors.
-///
-/// Scan diagnostics are always **fatal errors**.
-#[derive(Clone, Debug, Diagnostic, PartialEq)]
-#[diagnostic(category = "syntax", severity = Fatal)]
-pub struct ScanError {
-    /// The location where the error is occurred
-    #[location(span)]
-    span: Option<TextRange>,
-    #[message]
-    #[description]
-    pub message: MessageAndDescription,
-}
-
-impl ScanError {
-    pub fn from_pg_query_err(err: pg_query::Error, input: &str) -> Vec<Self> {
-        let err_msg = err.to_string();
-        let re = regex::Regex::new(r#"at or near "(.*?)""#).unwrap();
-        let mut diagnostics = Vec::new();
-
-        for captures in re.captures_iter(&err_msg) {
-            if let Some(matched) = captures.get(1) {
-                let search_term = matched.as_str();
-                for (idx, _) in input.match_indices(search_term) {
-                    let from = idx;
-                    let to = from + search_term.len();
-                    diagnostics.push(ScanError {
-                        span: Some(TextRange::new(
-                            from.try_into().unwrap(),
-                            to.try_into().unwrap(),
-                        )),
-                        message: MessageAndDescription::from(err_msg.clone()),
-                    });
-                }
-            }
-        }
-
-        if diagnostics.is_empty() {
-            diagnostics.push(ScanError {
-                span: None,
-                message: MessageAndDescription::from(err_msg),
-            });
-        }
-
-        diagnostics
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::lex;
-
-    #[test]
-    fn finds_all_occurrences() {
-        let input =
-            "select 1443ddwwd33djwdkjw13331333333333; select 1443ddwwd33djwdkjw13331333333333;";
-        let diagnostics = lex(input).unwrap_err();
-        assert_eq!(diagnostics.len(), 2);
-        assert_eq!(diagnostics[0].span.unwrap().start(), 7.into());
-        assert_eq!(diagnostics[0].span.unwrap().end(), 39.into());
-        assert_eq!(diagnostics[1].span.unwrap().start(), 48.into());
-        assert_eq!(diagnostics[1].span.unwrap().end(), 80.into());
-    }
-}
diff --git a/crates/pgt_lexer/src/lexed.rs b/crates/pgt_lexer/src/lexed.rs
new file mode 100644
index 00000000..6f0a273f
--- /dev/null
+++ b/crates/pgt_lexer/src/lexed.rs
@@ -0,0 +1,107 @@
+use pgt_diagnostics::{Diagnostic, MessageAndDescription};
+use pgt_text_size::TextRange;
+
+use crate::SyntaxKind;
+
+/// Internal error type used during lexing
+#[derive(Debug, Clone)]
+pub struct LexError {
+    pub msg: String,
+    pub token: u32,
+}
+
+/// A specialized diagnostic for lex errors.
+#[derive(Clone, Debug, Diagnostic, PartialEq)]
+#[diagnostic(category = "syntax", severity = Error)]
+pub struct LexDiagnostic {
+    /// The location where the error is occurred
+    #[location(span)]
+    pub span: TextRange,
+    #[message]
+    #[description]
+    pub message: MessageAndDescription,
+}
+
+/// Result of lexing a string, providing access to tokens and diagnostics
+pub struct Lexed<'a> {
+    pub(crate) text: &'a str,
+    pub(crate) kind: Vec<SyntaxKind>,
+    pub(crate) start: Vec<u32>,
+    pub(crate) error: Vec<LexError>,
+    pub(crate) line_ending_counts: Vec<usize>,
+}
+
+impl Lexed<'_> {
+    /// Returns the number of tokens
+    pub fn len(&self) -> usize {
+        self.kind.len()
+    }
+
+    /// Returns true if there are no tokens
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Returns an iterator over token kinds
+    pub fn tokens(&self) -> impl Iterator<Item = SyntaxKind> + '_ {
+        self.kind.iter().copied()
+    }
+
+    /// Returns the kind of token at the given index
+    pub fn kind(&self, idx: usize) -> SyntaxKind {
+        assert!(
+            idx < self.len(),
+            "expected index < {}, got {}",
+            self.len(),
+            idx
+        );
+        self.kind[idx]
+    }
+
+    /// Returns the number of line endings in the token at the given index
+    pub fn line_ending_count(&self, idx: usize) -> usize {
+        assert!(
+            idx < self.len(),
+            "expected index < {}, got {}",
+            self.len(),
+            idx
+        );
+        assert!(self.kind(idx) == SyntaxKind::LINE_ENDING);
+        self.line_ending_counts[idx]
+    }
+
+    /// Returns the text range of token at the given index
+    pub fn range(&self, idx: usize) -> TextRange {
+        self.text_range(idx)
+    }
+
+    /// Returns the text of token at the given index
+    pub fn text(&self, idx: usize) -> &str {
+        self.range_text(idx..idx + 1)
+    }
+
+    /// Returns all lexing errors with their text ranges
+    pub fn errors(&self) -> Vec<LexDiagnostic> {
+        self.error
+            .iter()
+            .map(|it| LexDiagnostic {
+                message: it.msg.as_str().into(),
+                span: self.text_range(it.token as usize),
+            })
+            .collect()
+    }
+
+    pub(crate) fn text_range(&self, i: usize) -> TextRange {
+        assert!(i < self.len());
+        let lo = self.start[i];
+        let hi = self.start[i + 1];
+        TextRange::new(lo.into(), hi.into())
+    }
+
+    fn range_text(&self, r: std::ops::Range<usize>) -> &str {
+        assert!(r.start < r.end && r.end <= self.len());
+        let lo = self.start[r.start] as usize;
+        let hi = self.start[r.end] as usize;
+        &self.text[lo..hi]
+    }
+}
diff --git a/crates/pgt_lexer/src/lexer.rs b/crates/pgt_lexer/src/lexer.rs
new file mode 100644
index 00000000..db4b4ae2
--- /dev/null
+++ b/crates/pgt_lexer/src/lexer.rs
@@ -0,0 +1,208 @@
+use pgt_tokenizer::tokenize;
+
+use crate::SyntaxKind;
+use crate::lexed::{LexError, Lexed};
+
+/// Lexer that processes input text into tokens and diagnostics
+pub struct Lexer<'a> {
+    text: &'a str,
+    kind: Vec<SyntaxKind>,
+    start: Vec<u32>,
+    error: Vec<LexError>,
+    offset: usize,
+    /// we store line ending counts outside of SyntaxKind because of the u16 represenation of SyntaxKind
+    line_ending_counts: Vec<usize>,
+}
+
+impl<'a> Lexer<'a> {
+    /// Create a new lexer for the given text
+    pub fn new(text: &'a str) -> Self {
+        Self {
+            text,
+            kind: Vec::new(),
+            start: Vec::new(),
+            error: Vec::new(),
+            offset: 0,
+            line_ending_counts: Vec::new(),
+        }
+    }
+
+    /// Lex the input text and return the result
+    pub fn lex(mut self) -> Lexed<'a> {
+        for token in tokenize(&self.text[self.offset..]) {
+            let token_text = &self.text[self.offset..][..token.len as usize];
+            self.extend_token(&token.kind, token_text);
+        }
+
+        // Add EOF token
+        self.push(SyntaxKind::EOF, 0, None, None);
+
+        Lexed {
+            text: self.text,
+            kind: self.kind,
+            start: self.start,
+            error: self.error,
+            line_ending_counts: self.line_ending_counts,
+        }
+    }
+
+    fn push(
+        &mut self,
+        kind: SyntaxKind,
+        len: usize,
+        err: Option<&str>,
+        line_ending_count: Option<usize>,
+    ) {
+        self.kind.push(kind);
+        self.start.push(self.offset as u32);
+        self.offset += len;
+
+        assert!(
+            kind != SyntaxKind::LINE_ENDING || line_ending_count.is_some(),
+            "Line ending token must have a line ending count"
+        );
+
+        self.line_ending_counts.push(line_ending_count.unwrap_or(0));
+
+        if let Some(err) = err {
+            let token = (self.kind.len() - 1) as u32;
+            let msg = err.to_owned();
+            self.error.push(LexError { msg, token });
+        }
+    }
+
+    fn extend_token(&mut self, kind: &pgt_tokenizer::TokenKind, token_text: &str) {
+        let mut err = "";
+        let mut line_ending_count = None;
+
+        let syntax_kind = {
+            match kind {
+                pgt_tokenizer::TokenKind::LineComment => SyntaxKind::COMMENT,
+                pgt_tokenizer::TokenKind::BlockComment { terminated } => {
+                    if !terminated {
+                        err = "Missing trailing `*/` symbols to terminate the block comment";
+                    }
+                    SyntaxKind::COMMENT
+                }
+                pgt_tokenizer::TokenKind::Space => SyntaxKind::SPACE,
+                pgt_tokenizer::TokenKind::Tab => SyntaxKind::TAB,
+                pgt_tokenizer::TokenKind::LineEnding { count } => {
+                    line_ending_count = Some(*count);
+                    SyntaxKind::LINE_ENDING
+                }
+                pgt_tokenizer::TokenKind::VerticalTab => SyntaxKind::VERTICAL_TAB,
+                pgt_tokenizer::TokenKind::FormFeed => SyntaxKind::FORM_FEED,
+                pgt_tokenizer::TokenKind::Ident => {
+                    SyntaxKind::from_keyword(token_text).unwrap_or(SyntaxKind::IDENT)
+                }
+                pgt_tokenizer::TokenKind::Literal { kind, .. } => {
+                    self.extend_literal(token_text.len(), kind);
+                    return;
+                }
+                pgt_tokenizer::TokenKind::Semi => SyntaxKind::SEMICOLON,
+                pgt_tokenizer::TokenKind::Comma => SyntaxKind::COMMA,
+                pgt_tokenizer::TokenKind::Dot => SyntaxKind::DOT,
+                pgt_tokenizer::TokenKind::OpenParen => SyntaxKind::L_PAREN,
+                pgt_tokenizer::TokenKind::CloseParen => SyntaxKind::R_PAREN,
+                pgt_tokenizer::TokenKind::OpenBracket => SyntaxKind::L_BRACK,
+                pgt_tokenizer::TokenKind::CloseBracket => SyntaxKind::R_BRACK,
+                pgt_tokenizer::TokenKind::At => SyntaxKind::AT,
+                pgt_tokenizer::TokenKind::Pound => SyntaxKind::POUND,
+                pgt_tokenizer::TokenKind::Tilde => SyntaxKind::TILDE,
+                pgt_tokenizer::TokenKind::Question => SyntaxKind::QUESTION,
+                pgt_tokenizer::TokenKind::Colon => SyntaxKind::COLON,
+                pgt_tokenizer::TokenKind::Eq => SyntaxKind::EQ,
+                pgt_tokenizer::TokenKind::Bang => SyntaxKind::BANG,
+                pgt_tokenizer::TokenKind::Lt => SyntaxKind::L_ANGLE,
+                pgt_tokenizer::TokenKind::Gt => SyntaxKind::R_ANGLE,
+                pgt_tokenizer::TokenKind::Minus => SyntaxKind::MINUS,
+                pgt_tokenizer::TokenKind::And => SyntaxKind::AMP,
+                pgt_tokenizer::TokenKind::Or => SyntaxKind::PIPE,
+                pgt_tokenizer::TokenKind::Plus => SyntaxKind::PLUS,
+                pgt_tokenizer::TokenKind::Star => SyntaxKind::STAR,
+                pgt_tokenizer::TokenKind::Slash => SyntaxKind::SLASH,
+                pgt_tokenizer::TokenKind::Caret => SyntaxKind::CARET,
+                pgt_tokenizer::TokenKind::Percent => SyntaxKind::PERCENT,
+                pgt_tokenizer::TokenKind::Unknown => SyntaxKind::ERROR,
+                pgt_tokenizer::TokenKind::Backslash => SyntaxKind::BACKSLASH,
+                pgt_tokenizer::TokenKind::UnknownPrefix => {
+                    err = "unknown literal prefix";
+                    SyntaxKind::IDENT
+                }
+                pgt_tokenizer::TokenKind::Eof => SyntaxKind::EOF,
+                pgt_tokenizer::TokenKind::Backtick => SyntaxKind::BACKTICK,
+                pgt_tokenizer::TokenKind::PositionalParam => SyntaxKind::POSITIONAL_PARAM,
+                pgt_tokenizer::TokenKind::QuotedIdent { terminated } => {
+                    if !terminated {
+                        err = "Missing trailing \" to terminate the quoted identifier"
+                    }
+                    SyntaxKind::IDENT
+                }
+            }
+        };
+
+        let err = if err.is_empty() { None } else { Some(err) };
+        self.push(syntax_kind, token_text.len(), err, line_ending_count);
+    }
+
+    fn extend_literal(&mut self, len: usize, kind: &pgt_tokenizer::LiteralKind) {
+        let mut err = "";
+
+        let syntax_kind = match *kind {
+            pgt_tokenizer::LiteralKind::Int { empty_int, base: _ } => {
+                if empty_int {
+                    err = "Missing digits after the integer base prefix";
+                }
+                SyntaxKind::INT_NUMBER
+            }
+            pgt_tokenizer::LiteralKind::Float {
+                empty_exponent,
+                base: _,
+            } => {
+                if empty_exponent {
+                    err = "Missing digits after the exponent symbol";
+                }
+                SyntaxKind::FLOAT_NUMBER
+            }
+            pgt_tokenizer::LiteralKind::Str { terminated } => {
+                if !terminated {
+                    err = "Missing trailing `'` symbol to terminate the string literal";
+                }
+                SyntaxKind::STRING
+            }
+            pgt_tokenizer::LiteralKind::ByteStr { terminated } => {
+                if !terminated {
+                    err = "Missing trailing `'` symbol to terminate the hex bit string literal";
+                }
+                SyntaxKind::BYTE_STRING
+            }
+            pgt_tokenizer::LiteralKind::BitStr { terminated } => {
+                if !terminated {
+                    err = "Missing trailing `'` symbol to terminate the bit string literal";
+                }
+                SyntaxKind::BIT_STRING
+            }
+            pgt_tokenizer::LiteralKind::DollarQuotedString { terminated } => {
+                if !terminated {
+                    err = "Unterminated dollar quoted string literal";
+                }
+                SyntaxKind::DOLLAR_QUOTED_STRING
+            }
+            pgt_tokenizer::LiteralKind::UnicodeEscStr { terminated } => {
+                if !terminated {
+                    err = "Missing trailing `'` symbol to terminate the unicode escape string literal";
+                }
+                SyntaxKind::BYTE_STRING
+            }
+            pgt_tokenizer::LiteralKind::EscStr { terminated } => {
+                if !terminated {
+                    err = "Missing trailing `'` symbol to terminate the escape string literal";
+                }
+                SyntaxKind::ESC_STRING
+            }
+        };
+
+        let err = if err.is_empty() { None } else { Some(err) };
+        self.push(syntax_kind, len, err, None);
+    }
+}
diff --git a/crates/pgt_lexer/src/lib.rs b/crates/pgt_lexer/src/lib.rs
index 32bbdd42..2d8779a7 100644
--- a/crates/pgt_lexer/src/lib.rs
+++ b/crates/pgt_lexer/src/lib.rs
@@ -1,191 +1,14 @@
 mod codegen;
-pub mod diagnostics;
+mod lexed;
+mod lexer;
 
-use diagnostics::ScanError;
-use pg_query::protobuf::{KeywordKind, ScanToken};
-use pgt_text_size::{TextLen, TextRange, TextSize};
-use regex::Regex;
-use std::{collections::VecDeque, sync::LazyLock};
+pub use crate::codegen::syntax_kind::SyntaxKind;
+pub use crate::lexed::{LexDiagnostic, Lexed};
+pub use crate::lexer::Lexer;
 
-pub use crate::codegen::SyntaxKind;
-
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub enum TokenType {
-    Whitespace,
-    NoKeyword,
-    UnreservedKeyword,
-    ColNameKeyword,
-    TypeFuncNameKeyword,
-    ReservedKeyword,
-}
-
-impl From<&ScanToken> for TokenType {
-    fn from(token: &ScanToken) -> TokenType {
-        match token.token {
-            // SqlComment | CComment
-            275 | 276 => TokenType::Whitespace,
-            _ => match token.keyword_kind() {
-                KeywordKind::NoKeyword => TokenType::NoKeyword,
-                KeywordKind::UnreservedKeyword => TokenType::UnreservedKeyword,
-                KeywordKind::ColNameKeyword => TokenType::ColNameKeyword,
-                KeywordKind::TypeFuncNameKeyword => TokenType::TypeFuncNameKeyword,
-                KeywordKind::ReservedKeyword => TokenType::ReservedKeyword,
-            },
-        }
-    }
-}
-
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct Token {
-    pub kind: SyntaxKind,
-    pub text: String,
-    pub span: TextRange,
-    pub token_type: TokenType,
-}
-
-impl Token {
-    pub fn eof(pos: usize) -> Token {
-        Token {
-            kind: SyntaxKind::Eof,
-            text: "".to_string(),
-            span: TextRange::at(TextSize::try_from(pos).unwrap(), TextSize::from(0)),
-            token_type: TokenType::Whitespace,
-        }
-    }
-}
-
-pub static WHITESPACE_TOKENS: &[SyntaxKind] = &[
-    SyntaxKind::Whitespace,
-    SyntaxKind::Tab,
-    SyntaxKind::Newline,
-    SyntaxKind::SqlComment,
-    SyntaxKind::CComment,
-];
-
-static PATTERN_LEXER: LazyLock<Regex> = LazyLock::new(|| {
-    #[cfg(windows)]
-    {
-        // On Windows, treat \r\n as a single newline token
-        Regex::new(r"(?P<whitespace> +)|(?P<newline>(\r\n|\n)+)|(?P<tab>\t+)").unwrap()
-    }
-    #[cfg(not(windows))]
-    {
-        // On other platforms, just check for \n
-        Regex::new(r"(?P<whitespace> +)|(?P<newline>\n+)|(?P<tab>\t+)").unwrap()
-    }
-});
-
-fn whitespace_tokens(input: &str) -> VecDeque<Token> {
-    let mut tokens = VecDeque::new();
-
-    for cap in PATTERN_LEXER.captures_iter(input) {
-        if let Some(whitespace) = cap.name("whitespace") {
-            tokens.push_back(Token {
-                token_type: TokenType::Whitespace,
-                kind: SyntaxKind::Whitespace,
-                text: whitespace.as_str().to_string(),
-                span: TextRange::new(
-                    TextSize::from(u32::try_from(whitespace.start()).unwrap()),
-                    TextSize::from(u32::try_from(whitespace.end()).unwrap()),
-                ),
-            });
-        } else if let Some(newline) = cap.name("newline") {
-            tokens.push_back(Token {
-                token_type: TokenType::Whitespace,
-                kind: SyntaxKind::Newline,
-                text: newline.as_str().to_string(),
-                span: TextRange::new(
-                    TextSize::from(u32::try_from(newline.start()).unwrap()),
-                    TextSize::from(u32::try_from(newline.end()).unwrap()),
-                ),
-            });
-        } else if let Some(tab) = cap.name("tab") {
-            tokens.push_back(Token {
-                token_type: TokenType::Whitespace,
-                kind: SyntaxKind::Tab,
-                text: tab.as_str().to_string(),
-                span: TextRange::new(
-                    TextSize::from(u32::try_from(tab.start()).unwrap()),
-                    TextSize::from(u32::try_from(tab.end()).unwrap()),
-                ),
-            });
-        } else {
-            panic!("No match");
-        };
-    }
-
-    tokens
-}
-
-/// Turn a string of potentially valid sql code into a list of tokens, including their range in the source text.
-///
-/// The implementation is primarily using libpg_querys `scan` method, and fills in the gaps with tokens that are not parsed by the library, e.g. whitespace.
-pub fn lex(text: &str) -> Result<Vec<Token>, Vec<ScanError>> {
-    let mut whitespace_tokens = whitespace_tokens(text);
-
-    // tokens from pg_query.rs
-    let mut pgt_query_tokens = match pg_query::scan(text) {
-        Ok(r) => r.tokens.into_iter().collect::<VecDeque<_>>(),
-        Err(err) => return Err(ScanError::from_pg_query_err(err, text)),
-    };
-
-    // merge the two token lists
-    let mut tokens: Vec<Token> = Vec::new();
-    let mut pos = TextSize::from(0);
-
-    while pos < text.text_len() {
-        if !pgt_query_tokens.is_empty()
-            && TextSize::from(u32::try_from(pgt_query_tokens[0].start).unwrap()) == pos
-        {
-            let pgt_query_token = pgt_query_tokens.pop_front().unwrap();
-
-            // the lexer returns byte indices, so we need to slice
-            let token_text = &text[usize::try_from(pgt_query_token.start).unwrap()
-                ..usize::try_from(pgt_query_token.end).unwrap()];
-
-            let len = token_text.text_len();
-            let has_whitespace = token_text.contains(" ") || token_text.contains("\n");
-            tokens.push(Token {
-                token_type: TokenType::from(&pgt_query_token),
-                kind: SyntaxKind::from(&pgt_query_token),
-                text: token_text.to_string(),
-                span: TextRange::new(
-                    TextSize::from(u32::try_from(pgt_query_token.start).unwrap()),
-                    TextSize::from(u32::try_from(pgt_query_token.end).unwrap()),
-                ),
-            });
-            pos += len;
-
-            if has_whitespace {
-                while !whitespace_tokens.is_empty()
-                    && whitespace_tokens[0].span.start() < TextSize::from(u32::from(pos))
-                {
-                    whitespace_tokens.pop_front();
-                }
-            }
-
-            continue;
-        }
-
-        if !whitespace_tokens.is_empty()
-            && whitespace_tokens[0].span.start() == TextSize::from(u32::from(pos))
-        {
-            let whitespace_token = whitespace_tokens.pop_front().unwrap();
-            let len = whitespace_token.text.text_len();
-            tokens.push(whitespace_token);
-            pos += len;
-            continue;
-        }
-
-        let usize_pos = usize::from(pos);
-        panic!(
-            "No token found at position {:?}: '{:?}'",
-            pos,
-            text.get(usize_pos..usize_pos + 1)
-        );
-    }
-
-    Ok(tokens)
+/// Lex the input string into tokens and diagnostics
+pub fn lex(input: &str) -> Lexed {
+    Lexer::new(input).lex()
 }
 
 #[cfg(test)]
@@ -193,101 +16,106 @@ mod tests {
     use super::*;
 
     #[test]
-    fn test_special_chars() {
-        let input = "insert into c (name, full_name) values ('Å', 1);";
-        let tokens = lex(input).unwrap();
-        assert!(!tokens.is_empty());
-    }
+    fn test_basic_lexing() {
+        let input = "SELECT * FROM users WHERE id = 1;";
+        let lexed = lex(input);
+
+        // Check we have tokens
+        assert!(!lexed.is_empty());
+
+        // Iterate over tokens and collect identifiers
+        let mut identifiers = Vec::new();
+        for (idx, kind) in lexed.tokens().enumerate() {
+            if kind == SyntaxKind::IDENT {
+                identifiers.push((lexed.text(idx), lexed.range(idx)));
+            }
+        }
 
-    #[test]
-    fn test_tab_tokens() {
-        let input = "select\t1";
-        let tokens = lex(input).unwrap();
-        assert_eq!(tokens[1].kind, SyntaxKind::Tab);
+        // Should find at least "users" and "id" as identifiers
+        assert!(identifiers.len() >= 2);
     }
 
     #[test]
-    fn test_newline_tokens() {
-        let input = "select\n1";
-        let tokens = lex(input).unwrap();
-        assert_eq!(tokens[1].kind, SyntaxKind::Newline);
+    fn test_lexing_with_errors() {
+        let input = "SELECT 'unterminated string";
+        let lexed = lex(input);
+
+        // Should have tokens
+        assert!(!lexed.is_empty());
+
+        // Should have an error for unterminated string
+        let errors = lexed.errors();
+        assert!(!errors.is_empty());
+        // Check the error message exists
+        assert!(!errors[0].message.to_string().is_empty());
     }
 
     #[test]
-    fn test_consecutive_newlines() {
-        // Test with multiple consecutive newlines
-        #[cfg(windows)]
-        let input = "select\r\n\r\n1";
-        #[cfg(not(windows))]
-        let input = "select\n\n1";
-
-        let tokens = lex(input).unwrap();
-
-        // Check that we have exactly one newline token between "select" and "1"
-        assert_eq!(tokens[0].kind, SyntaxKind::Select);
-        assert_eq!(tokens[1].kind, SyntaxKind::Newline);
-        assert_eq!(tokens[2].kind, SyntaxKind::Iconst);
+    fn test_token_ranges() {
+        let input = "SELECT id";
+        let lexed = lex(input);
+
+        // First token should be a keyword (SELECT gets parsed as a keyword)
+        let _first_kind = lexed.kind(0);
+        assert_eq!(u32::from(lexed.range(0).start()), 0);
+        assert_eq!(u32::from(lexed.range(0).end()), 6);
+        assert_eq!(lexed.text(0), "SELECT");
+
+        // Find the id token
+        for (idx, kind) in lexed.tokens().enumerate() {
+            if kind == SyntaxKind::IDENT && lexed.text(idx) == "id" {
+                assert_eq!(u32::from(lexed.range(idx).start()), 7);
+                assert_eq!(u32::from(lexed.range(idx).end()), 9);
+            }
+        }
     }
 
     #[test]
-    fn test_whitespace_tokens() {
-        let input = "select 1";
-        let tokens = lex(input).unwrap();
-        assert_eq!(tokens[1].kind, SyntaxKind::Whitespace);
+    fn test_empty_input() {
+        let input = "";
+        let lexed = lex(input);
+        assert_eq!(lexed.len(), 1);
+        assert_eq!(lexed.kind(0), SyntaxKind::EOF);
     }
 
     #[test]
-    fn test_lexer() {
-        let input = "select 1; \n -- some comment \n select 2\t";
-
-        let tokens = lex(input).unwrap();
-        let mut tokens_iter = tokens.iter();
-
-        let token = tokens_iter.next().unwrap();
-        assert_eq!(token.kind, SyntaxKind::Select);
-        assert_eq!(token.text, "select");
-
-        let token = tokens_iter.next().unwrap();
-        assert_eq!(token.kind, SyntaxKind::Whitespace);
-
-        let token = tokens_iter.next().unwrap();
-        assert_eq!(token.kind, SyntaxKind::Iconst);
-        assert_eq!(token.text, "1");
-
-        let token = tokens_iter.next().unwrap();
-        assert_eq!(token.kind, SyntaxKind::Ascii59);
-
-        let token = tokens_iter.next().unwrap();
-        assert_eq!(token.kind, SyntaxKind::Whitespace);
-
-        let token = tokens_iter.next().unwrap();
-        assert_eq!(token.kind, SyntaxKind::Newline);
-
-        let token = tokens_iter.next().unwrap();
-        assert_eq!(token.kind, SyntaxKind::Whitespace);
-
-        let token = tokens_iter.next().unwrap();
-        assert_eq!(token.kind, SyntaxKind::SqlComment);
-        assert_eq!(token.text, "-- some comment ");
-
-        let token = tokens_iter.next().unwrap();
-        assert_eq!(token.kind, SyntaxKind::Newline);
-
-        let token = tokens_iter.next().unwrap();
-        assert_eq!(token.kind, SyntaxKind::Whitespace);
-
-        let token = tokens_iter.next().unwrap();
-        assert_eq!(token.kind, SyntaxKind::Select);
-        assert_eq!(token.text, "select");
-
-        let token = tokens_iter.next().unwrap();
-        assert_eq!(token.kind, SyntaxKind::Whitespace);
+    fn test_whitespace_handling() {
+        let input = "  SELECT  \n  id  ";
+        let lexed = lex(input);
+
+        // Collect non-whitespace tokens
+        let mut non_whitespace = Vec::new();
+        for (idx, kind) in lexed.tokens().enumerate() {
+            if !matches!(
+                kind,
+                SyntaxKind::SPACE | SyntaxKind::TAB | SyntaxKind::LINE_ENDING | SyntaxKind::EOF
+            ) {
+                non_whitespace.push(lexed.text(idx));
+            }
+        }
 
-        let token = tokens_iter.next().unwrap();
-        assert_eq!(token.kind, SyntaxKind::Iconst);
-        assert_eq!(token.text, "2");
+        assert_eq!(non_whitespace.len(), 2); // SELECT and id
+    }
 
-        let token = tokens_iter.next().unwrap();
-        assert_eq!(token.kind, SyntaxKind::Tab);
+    #[test]
+    fn finds_lex_errors() {
+        // Test with unterminated block comment
+        let input = "/* unterminated comment";
+        let lexed = lex(input);
+        let errors = lexed.errors();
+
+        // Should have error for unterminated block comment
+        assert!(!errors.is_empty());
+        assert!(errors[0].message.to_string().contains("Missing trailing"));
+        assert!(errors[0].span.start() < errors[0].span.end());
+
+        // Test with unterminated string
+        let input2 = "SELECT 'unterminated string";
+        let lexed2 = lex(input2);
+        let errors2 = lexed2.errors();
+
+        // Should have error for unterminated string
+        assert!(!errors2.is_empty());
+        assert!(errors2[0].message.to_string().contains("Missing trailing"));
     }
 }
diff --git a/crates/pgt_lexer_codegen/Cargo.toml b/crates/pgt_lexer_codegen/Cargo.toml
index c5878646..b50465b0 100644
--- a/crates/pgt_lexer_codegen/Cargo.toml
+++ b/crates/pgt_lexer_codegen/Cargo.toml
@@ -10,12 +10,16 @@ name                 = "pgt_lexer_codegen"
 repository.workspace = true
 version              = "0.0.0"
 
-
 [dependencies]
-pgt_query_proto_parser.workspace = true
-proc-macro2.workspace            = true
-quote                            = "1.0.33"
+anyhow                = { workspace = true }
+convert_case          = { workspace = true }
+proc-macro2.workspace = true
+prost-reflect         = { workspace = true }
+protox                = { workspace = true }
+quote.workspace       = true
+
+[build-dependencies]
+ureq = "2.9"
 
 [lib]
-doctest    = false
 proc-macro = true
diff --git a/crates/pgt_lexer_codegen/README.md b/crates/pgt_lexer_codegen/README.md
index 843ac2f8..57bdaa34 100644
--- a/crates/pgt_lexer_codegen/README.md
+++ b/crates/pgt_lexer_codegen/README.md
@@ -1,7 +1 @@
-# pgt_lexer_codegen
-
-This crate is responsible for reading `libpg_query`'s protobuf file and turning it into the Rust enum `SyntaxKind`.
-
-It does so by reading the file from the installed git submodule, parsing it with a protobuf parser, and using a procedural macro to generate the enum.
-
-Rust requires procedural macros to be defined in a different crate than where they're used, hence this \_codegen crate.
+Heavily inspired by and copied from [squawk_parser](https://github.com/sbdchd/squawk/tree/9acfecbbb7f3c7eedcbaf060e7b25f9afa136db3/crates/squawk_parser). Thanks for making all the hard work MIT-licensed!
diff --git a/crates/pgt_lexer_codegen/build.rs b/crates/pgt_lexer_codegen/build.rs
new file mode 100644
index 00000000..70c9635d
--- /dev/null
+++ b/crates/pgt_lexer_codegen/build.rs
@@ -0,0 +1,49 @@
+use std::env;
+use std::fs;
+use std::io::Write;
+use std::path::PathBuf;
+
+// TODO make this selectable via feature flags
+static LIBPG_QUERY_TAG: &str = "17-6.1.0";
+
+/// Downloads the `kwlist.h` file from the specified version of `libpg_query`
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let version = LIBPG_QUERY_TAG.to_string();
+
+    // Check for the postgres header file in the source tree first
+    let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR")?);
+    let headers_dir = manifest_dir.join("postgres").join(&version);
+    let kwlist_path = headers_dir.join("kwlist.h");
+
+    // Only download if the file doesn't exist
+    if !kwlist_path.exists() {
+        println!(
+            "cargo:warning=Downloading kwlist.h for libpg_query {}",
+            version
+        );
+
+        fs::create_dir_all(&headers_dir)?;
+
+        let proto_url = format!(
+            "https://raw.githubusercontent.com/pganalyze/libpg_query/{}/src/postgres/include/parser/kwlist.h",
+            version
+        );
+
+        let response = ureq::get(&proto_url).call()?;
+        let content = response.into_string()?;
+
+        let mut file = fs::File::create(&kwlist_path)?;
+        file.write_all(content.as_bytes())?;
+
+        println!("cargo:warning=Successfully downloaded kwlist.h");
+    }
+
+    println!(
+        "cargo:rustc-env=PG_QUERY_KWLIST_PATH={}",
+        kwlist_path.display()
+    );
+
+    println!("cargo:rerun-if-changed={}", kwlist_path.display());
+
+    Ok(())
+}
diff --git a/crates/pgt_lexer_codegen/postgres/17-6.1.0/kwlist.h b/crates/pgt_lexer_codegen/postgres/17-6.1.0/kwlist.h
new file mode 100644
index 00000000..658d7ff6
--- /dev/null
+++ b/crates/pgt_lexer_codegen/postgres/17-6.1.0/kwlist.h
@@ -0,0 +1,518 @@
+/*-------------------------------------------------------------------------
+ *
+ * kwlist.h
+ *
+ * The keyword lists are kept in their own source files for use by
+ * automatic tools.  The exact representation of a keyword is determined
+ * by the PG_KEYWORD macro, which is not defined in this file; it can
+ * be defined by the caller for special purposes.
+ *
+ * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/include/parser/kwlist.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* there is deliberately not an #ifndef KWLIST_H here */
+
+/*
+ * List of keyword (name, token-value, category, bare-label-status) entries.
+ *
+ * Note: gen_keywordlist.pl requires the entries to appear in ASCII order.
+ */
+
+/* name, value, category, is-bare-label */
+PG_KEYWORD("abort", ABORT_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("absent", ABSENT, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("absolute", ABSOLUTE_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("access", ACCESS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("action", ACTION, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("add", ADD_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("admin", ADMIN, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("after", AFTER, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("aggregate", AGGREGATE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("all", ALL, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("also", ALSO, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("alter", ALTER, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("always", ALWAYS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("analyse", ANALYSE, RESERVED_KEYWORD, BARE_LABEL)		/* British spelling */
+PG_KEYWORD("analyze", ANALYZE, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("and", AND, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("any", ANY, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("array", ARRAY, RESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("as", AS, RESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("asc", ASC, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("asensitive", ASENSITIVE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("assertion", ASSERTION, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("assignment", ASSIGNMENT, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("asymmetric", ASYMMETRIC, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("at", AT, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("atomic", ATOMIC, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("attach", ATTACH, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("attribute", ATTRIBUTE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("authorization", AUTHORIZATION, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("backward", BACKWARD, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("before", BEFORE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("begin", BEGIN_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("between", BETWEEN, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("bigint", BIGINT, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("binary", BINARY, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("bit", BIT, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("boolean", BOOLEAN_P, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("both", BOTH, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("breadth", BREADTH, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("by", BY, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("cache", CACHE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("call", CALL, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("called", CALLED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("cascade", CASCADE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("cascaded", CASCADED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("case", CASE, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("cast", CAST, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("catalog", CATALOG_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("chain", CHAIN, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("char", CHAR_P, COL_NAME_KEYWORD, AS_LABEL)
+PG_KEYWORD("character", CHARACTER, COL_NAME_KEYWORD, AS_LABEL)
+PG_KEYWORD("characteristics", CHARACTERISTICS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("check", CHECK, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("checkpoint", CHECKPOINT, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("class", CLASS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("close", CLOSE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("cluster", CLUSTER, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("coalesce", COALESCE, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("collate", COLLATE, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("collation", COLLATION, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("column", COLUMN, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("columns", COLUMNS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("comment", COMMENT, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("comments", COMMENTS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("commit", COMMIT, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("committed", COMMITTED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("compression", COMPRESSION, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("concurrently", CONCURRENTLY, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("conditional", CONDITIONAL, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("configuration", CONFIGURATION, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("conflict", CONFLICT, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("connection", CONNECTION, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("constraint", CONSTRAINT, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("constraints", CONSTRAINTS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("content", CONTENT_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("continue", CONTINUE_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("conversion", CONVERSION_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("copy", COPY, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("cost", COST, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("create", CREATE, RESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("cross", CROSS, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("csv", CSV, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("cube", CUBE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("current", CURRENT_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("current_catalog", CURRENT_CATALOG, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("current_date", CURRENT_DATE, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("current_role", CURRENT_ROLE, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("current_schema", CURRENT_SCHEMA, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("current_time", CURRENT_TIME, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("current_timestamp", CURRENT_TIMESTAMP, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("current_user", CURRENT_USER, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("cursor", CURSOR, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("cycle", CYCLE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("data", DATA_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("database", DATABASE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("day", DAY_P, UNRESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("deallocate", DEALLOCATE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("dec", DEC, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("decimal", DECIMAL_P, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("declare", DECLARE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("default", DEFAULT, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("defaults", DEFAULTS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("deferrable", DEFERRABLE, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("deferred", DEFERRED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("definer", DEFINER, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("delete", DELETE_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("delimiter", DELIMITER, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("delimiters", DELIMITERS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("depends", DEPENDS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("depth", DEPTH, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("desc", DESC, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("detach", DETACH, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("dictionary", DICTIONARY, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("disable", DISABLE_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("discard", DISCARD, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("distinct", DISTINCT, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("do", DO, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("document", DOCUMENT_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("domain", DOMAIN_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("double", DOUBLE_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("drop", DROP, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("each", EACH, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("else", ELSE, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("empty", EMPTY_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("enable", ENABLE_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("encoding", ENCODING, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("encrypted", ENCRYPTED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("end", END_P, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("enum", ENUM_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("error", ERROR_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("escape", ESCAPE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("event", EVENT, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("except", EXCEPT, RESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("exclude", EXCLUDE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("excluding", EXCLUDING, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("exclusive", EXCLUSIVE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("execute", EXECUTE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("exists", EXISTS, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("explain", EXPLAIN, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("expression", EXPRESSION, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("extension", EXTENSION, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("external", EXTERNAL, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("extract", EXTRACT, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("false", FALSE_P, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("family", FAMILY, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("fetch", FETCH, RESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("filter", FILTER, UNRESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("finalize", FINALIZE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("first", FIRST_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("float", FLOAT_P, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("following", FOLLOWING, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("for", FOR, RESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("force", FORCE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("foreign", FOREIGN, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("format", FORMAT, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("forward", FORWARD, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("freeze", FREEZE, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("from", FROM, RESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("full", FULL, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("function", FUNCTION, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("functions", FUNCTIONS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("generated", GENERATED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("global", GLOBAL, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("grant", GRANT, RESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("granted", GRANTED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("greatest", GREATEST, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("group", GROUP_P, RESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("grouping", GROUPING, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("groups", GROUPS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("handler", HANDLER, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("having", HAVING, RESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("header", HEADER_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("hold", HOLD, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("hour", HOUR_P, UNRESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("identity", IDENTITY_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("if", IF_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("ilike", ILIKE, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("immediate", IMMEDIATE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("immutable", IMMUTABLE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("implicit", IMPLICIT_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("import", IMPORT_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("in", IN_P, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("include", INCLUDE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("including", INCLUDING, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("increment", INCREMENT, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("indent", INDENT, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("index", INDEX, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("indexes", INDEXES, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("inherit", INHERIT, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("inherits", INHERITS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("initially", INITIALLY, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("inline", INLINE_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("inner", INNER_P, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("inout", INOUT, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("input", INPUT_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("insensitive", INSENSITIVE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("insert", INSERT, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("instead", INSTEAD, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("int", INT_P, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("integer", INTEGER, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("intersect", INTERSECT, RESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("interval", INTERVAL, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("into", INTO, RESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("invoker", INVOKER, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("is", IS, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("isnull", ISNULL, TYPE_FUNC_NAME_KEYWORD, AS_LABEL)
+PG_KEYWORD("isolation", ISOLATION, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("join", JOIN, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("json", JSON, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("json_array", JSON_ARRAY, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("json_arrayagg", JSON_ARRAYAGG, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("json_exists", JSON_EXISTS, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("json_object", JSON_OBJECT, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("json_objectagg", JSON_OBJECTAGG, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("json_query", JSON_QUERY, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("json_scalar", JSON_SCALAR, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("json_serialize", JSON_SERIALIZE, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("json_table", JSON_TABLE, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("json_value", JSON_VALUE, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("keep", KEEP, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("key", KEY, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("keys", KEYS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("label", LABEL, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("language", LANGUAGE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("large", LARGE_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("last", LAST_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("lateral", LATERAL_P, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("leading", LEADING, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("leakproof", LEAKPROOF, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("least", LEAST, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("left", LEFT, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("level", LEVEL, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("like", LIKE, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("limit", LIMIT, RESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("listen", LISTEN, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("load", LOAD, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("local", LOCAL, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("localtime", LOCALTIME, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("localtimestamp", LOCALTIMESTAMP, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("location", LOCATION, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("lock", LOCK_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("locked", LOCKED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("logged", LOGGED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("mapping", MAPPING, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("match", MATCH, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("matched", MATCHED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("materialized", MATERIALIZED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("maxvalue", MAXVALUE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("merge", MERGE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("merge_action", MERGE_ACTION, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("method", METHOD, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("minute", MINUTE_P, UNRESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("minvalue", MINVALUE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("mode", MODE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("month", MONTH_P, UNRESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("move", MOVE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("name", NAME_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("names", NAMES, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("national", NATIONAL, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("natural", NATURAL, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("nchar", NCHAR, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("nested", NESTED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("new", NEW, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("next", NEXT, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("nfc", NFC, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("nfd", NFD, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("nfkc", NFKC, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("nfkd", NFKD, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("no", NO, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("none", NONE, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("normalize", NORMALIZE, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("normalized", NORMALIZED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("not", NOT, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("nothing", NOTHING, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("notify", NOTIFY, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("notnull", NOTNULL, TYPE_FUNC_NAME_KEYWORD, AS_LABEL)
+PG_KEYWORD("nowait", NOWAIT, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("null", NULL_P, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("nullif", NULLIF, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("nulls", NULLS_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("numeric", NUMERIC, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("object", OBJECT_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("of", OF, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("off", OFF, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("offset", OFFSET, RESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("oids", OIDS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("old", OLD, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("omit", OMIT, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("on", ON, RESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("only", ONLY, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("operator", OPERATOR, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("option", OPTION, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("options", OPTIONS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("or", OR, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("order", ORDER, RESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("ordinality", ORDINALITY, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("others", OTHERS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("out", OUT_P, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("outer", OUTER_P, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("over", OVER, UNRESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("overlaps", OVERLAPS, TYPE_FUNC_NAME_KEYWORD, AS_LABEL)
+PG_KEYWORD("overlay", OVERLAY, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("overriding", OVERRIDING, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("owned", OWNED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("owner", OWNER, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("parallel", PARALLEL, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("parameter", PARAMETER, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("parser", PARSER, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("partial", PARTIAL, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("partition", PARTITION, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("passing", PASSING, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("password", PASSWORD, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("path", PATH, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("placing", PLACING, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("plan", PLAN, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("plans", PLANS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("policy", POLICY, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("position", POSITION, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("preceding", PRECEDING, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("precision", PRECISION, COL_NAME_KEYWORD, AS_LABEL)
+PG_KEYWORD("prepare", PREPARE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("prepared", PREPARED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("preserve", PRESERVE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("primary", PRIMARY, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("prior", PRIOR, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("privileges", PRIVILEGES, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("procedural", PROCEDURAL, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("procedure", PROCEDURE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("procedures", PROCEDURES, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("program", PROGRAM, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("publication", PUBLICATION, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("quote", QUOTE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("quotes", QUOTES, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("range", RANGE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("read", READ, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("real", REAL, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("reassign", REASSIGN, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("recheck", RECHECK, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("recursive", RECURSIVE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("ref", REF_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("references", REFERENCES, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("referencing", REFERENCING, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("refresh", REFRESH, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("reindex", REINDEX, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("relative", RELATIVE_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("release", RELEASE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("rename", RENAME, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("repeatable", REPEATABLE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("replace", REPLACE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("replica", REPLICA, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("reset", RESET, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("restart", RESTART, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("restrict", RESTRICT, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("return", RETURN, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("returning", RETURNING, RESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("returns", RETURNS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("revoke", REVOKE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("right", RIGHT, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("role", ROLE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("rollback", ROLLBACK, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("rollup", ROLLUP, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("routine", ROUTINE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("routines", ROUTINES, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("row", ROW, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("rows", ROWS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("rule", RULE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("savepoint", SAVEPOINT, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("scalar", SCALAR, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("schema", SCHEMA, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("schemas", SCHEMAS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("scroll", SCROLL, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("search", SEARCH, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("second", SECOND_P, UNRESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("security", SECURITY, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("select", SELECT, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("sequence", SEQUENCE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("sequences", SEQUENCES, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("serializable", SERIALIZABLE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("server", SERVER, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("session", SESSION, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("session_user", SESSION_USER, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("set", SET, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("setof", SETOF, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("sets", SETS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("share", SHARE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("show", SHOW, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("similar", SIMILAR, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("simple", SIMPLE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("skip", SKIP, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("smallint", SMALLINT, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("snapshot", SNAPSHOT, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("some", SOME, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("source", SOURCE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("sql", SQL_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("stable", STABLE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("standalone", STANDALONE_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("start", START, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("statement", STATEMENT, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("statistics", STATISTICS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("stdin", STDIN, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("stdout", STDOUT, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("storage", STORAGE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("stored", STORED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("strict", STRICT_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("string", STRING_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("strip", STRIP_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("subscription", SUBSCRIPTION, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("substring", SUBSTRING, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("support", SUPPORT, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("symmetric", SYMMETRIC, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("sysid", SYSID, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("system", SYSTEM_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("system_user", SYSTEM_USER, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("table", TABLE, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("tables", TABLES, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("tablesample", TABLESAMPLE, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("tablespace", TABLESPACE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("target", TARGET, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("temp", TEMP, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("template", TEMPLATE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("temporary", TEMPORARY, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("text", TEXT_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("then", THEN, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("ties", TIES, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("time", TIME, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("timestamp", TIMESTAMP, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("to", TO, RESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("trailing", TRAILING, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("transaction", TRANSACTION, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("transform", TRANSFORM, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("treat", TREAT, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("trigger", TRIGGER, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("trim", TRIM, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("true", TRUE_P, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("truncate", TRUNCATE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("trusted", TRUSTED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("type", TYPE_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("types", TYPES_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("uescape", UESCAPE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("unbounded", UNBOUNDED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("uncommitted", UNCOMMITTED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("unconditional", UNCONDITIONAL, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("unencrypted", UNENCRYPTED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("union", UNION, RESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("unique", UNIQUE, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("unknown", UNKNOWN, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("unlisten", UNLISTEN, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("unlogged", UNLOGGED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("until", UNTIL, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("update", UPDATE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("user", USER, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("using", USING, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("vacuum", VACUUM, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("valid", VALID, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("validate", VALIDATE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("validator", VALIDATOR, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("value", VALUE_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("values", VALUES, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("varchar", VARCHAR, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("variadic", VARIADIC, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("varying", VARYING, UNRESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("verbose", VERBOSE, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("version", VERSION_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("view", VIEW, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("views", VIEWS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("volatile", VOLATILE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("when", WHEN, RESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("where", WHERE, RESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("whitespace", WHITESPACE_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("window", WINDOW, RESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("with", WITH, RESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("within", WITHIN, UNRESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("without", WITHOUT, UNRESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("work", WORK, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("wrapper", WRAPPER, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("write", WRITE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("xml", XML_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("xmlattributes", XMLATTRIBUTES, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("xmlconcat", XMLCONCAT, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("xmlelement", XMLELEMENT, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("xmlexists", XMLEXISTS, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("xmlforest", XMLFOREST, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("xmlnamespaces", XMLNAMESPACES, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("xmlparse", XMLPARSE, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("xmlpi", XMLPI, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("xmlroot", XMLROOT, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("xmlserialize", XMLSERIALIZE, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("xmltable", XMLTABLE, COL_NAME_KEYWORD, BARE_LABEL)
+PG_KEYWORD("year", YEAR_P, UNRESERVED_KEYWORD, AS_LABEL)
+PG_KEYWORD("yes", YES_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("zone", ZONE, UNRESERVED_KEYWORD, BARE_LABEL)
diff --git a/crates/pgt_lexer_codegen/src/keywords.rs b/crates/pgt_lexer_codegen/src/keywords.rs
new file mode 100644
index 00000000..f0104c8d
--- /dev/null
+++ b/crates/pgt_lexer_codegen/src/keywords.rs
@@ -0,0 +1,43 @@
+// from https://github.com/sbdchd/squawk/blob/ac9f90c3b2be8d2c46fd5454eb48975afd268dbe/crates/xtask/src/keywords.rs
+use anyhow::{Context, Ok, Result};
+use std::path;
+
+fn parse_header() -> Result<Vec<String>> {
+    // use the environment variable set by the build script to locate the kwlist.h file
+    let kwlist_file = path::PathBuf::from(env!("PG_QUERY_KWLIST_PATH"));
+    let data = std::fs::read_to_string(kwlist_file).context("Failed to read kwlist.h")?;
+
+    let mut keywords = Vec::new();
+
+    for line in data.lines() {
+        if line.starts_with("PG_KEYWORD") {
+            let line = line
+                .split(&['(', ')'])
+                .nth(1)
+                .context("Invalid kwlist.h structure")?;
+
+            let row_items: Vec<&str> = line.split(',').collect();
+
+            match row_items[..] {
+                [name, _value, _category, _is_bare_label] => {
+                    let name = name.trim().replace('\"', "");
+                    keywords.push(name);
+                }
+                _ => anyhow::bail!("Problem reading kwlist.h row"),
+            }
+        }
+    }
+
+    Ok(keywords)
+}
+
+pub(crate) struct KeywordKinds {
+    pub(crate) all_keywords: Vec<String>,
+}
+
+pub(crate) fn keyword_kinds() -> Result<KeywordKinds> {
+    let mut all_keywords = parse_header()?;
+    all_keywords.sort();
+
+    Ok(KeywordKinds { all_keywords })
+}
diff --git a/crates/pgt_lexer_codegen/src/lib.rs b/crates/pgt_lexer_codegen/src/lib.rs
index 8f492e4b..b620b6a6 100644
--- a/crates/pgt_lexer_codegen/src/lib.rs
+++ b/crates/pgt_lexer_codegen/src/lib.rs
@@ -1,29 +1,9 @@
+mod keywords;
 mod syntax_kind;
 
-use pgt_query_proto_parser::ProtoParser;
-use quote::quote;
-use std::{env, path, path::Path};
+use syntax_kind::syntax_kind_mod;
 
 #[proc_macro]
-pub fn lexer_codegen(_item: proc_macro::TokenStream) -> proc_macro::TokenStream {
-    let parser = ProtoParser::new(&proto_file_path());
-    let proto_file = parser.parse();
-
-    let syntax_kind = syntax_kind::syntax_kind_mod(&proto_file);
-
-    quote! {
-        use pg_query::{protobuf, protobuf::ScanToken, protobuf::Token, NodeEnum, NodeRef};
-
-        #syntax_kind
-    }
-    .into()
-}
-
-fn proto_file_path() -> path::PathBuf {
-    Path::new(env!("CARGO_MANIFEST_DIR"))
-        .ancestors()
-        .nth(2)
-        .unwrap()
-        .join("libpg_query/protobuf/pg_query.proto")
-        .to_path_buf()
+pub fn syntax_kind_codegen(_input: proc_macro::TokenStream) -> proc_macro::TokenStream {
+    syntax_kind_mod().into()
 }
diff --git a/crates/pgt_lexer_codegen/src/syntax_kind.rs b/crates/pgt_lexer_codegen/src/syntax_kind.rs
index 091b1e02..07b7a419 100644
--- a/crates/pgt_lexer_codegen/src/syntax_kind.rs
+++ b/crates/pgt_lexer_codegen/src/syntax_kind.rs
@@ -1,111 +1,121 @@
-use std::collections::HashSet;
-
-use pgt_query_proto_parser::{Node, ProtoFile, Token};
-use proc_macro2::{Ident, Literal};
+use convert_case::{Case, Casing};
+use proc_macro2::TokenStream;
 use quote::{format_ident, quote};
 
-pub fn syntax_kind_mod(proto_file: &ProtoFile) -> proc_macro2::TokenStream {
-    let custom_node_names = custom_node_names();
-    let custom_node_identifiers = custom_node_identifiers(&custom_node_names);
-
-    let node_identifiers = node_identifiers(&proto_file.nodes);
+use crate::keywords::{KeywordKinds, keyword_kinds};
+
+const WHITESPACE: &[&str] = &[
+    "SPACE",        // " "
+    "TAB",          // "\t"
+    "VERTICAL_TAB", // "\x0B"
+    "FORM_FEED",    // "\x0C"
+    "LINE_ENDING",  // "\n" or "\r" in any combination
+];
+
+const PUNCT: &[(&str, &str)] = &[
+    ("$", "DOLLAR"),
+    (";", "SEMICOLON"),
+    (",", "COMMA"),
+    ("(", "L_PAREN"),
+    (")", "R_PAREN"),
+    ("[", "L_BRACK"),
+    ("]", "R_BRACK"),
+    ("<", "L_ANGLE"),
+    (">", "R_ANGLE"),
+    ("@", "AT"),
+    ("#", "POUND"),
+    ("~", "TILDE"),
+    ("?", "QUESTION"),
+    ("&", "AMP"),
+    ("|", "PIPE"),
+    ("+", "PLUS"),
+    ("*", "STAR"),
+    ("/", "SLASH"),
+    ("\\", "BACKSLASH"),
+    ("^", "CARET"),
+    ("%", "PERCENT"),
+    ("_", "UNDERSCORE"),
+    (".", "DOT"),
+    (":", "COLON"),
+    ("=", "EQ"),
+    ("!", "BANG"),
+    ("-", "MINUS"),
+    ("`", "BACKTICK"),
+];
+
+const EXTRA: &[&str] = &["POSITIONAL_PARAM", "ERROR", "COMMENT", "EOF"];
+
+const LITERALS: &[&str] = &[
+    "BIT_STRING",
+    "BYTE_STRING",
+    "DOLLAR_QUOTED_STRING",
+    "ESC_STRING",
+    "FLOAT_NUMBER",
+    "INT_NUMBER",
+    "NULL",
+    "STRING",
+    "IDENT",
+];
+
+pub fn syntax_kind_mod() -> proc_macro2::TokenStream {
+    let keywords = keyword_kinds().expect("Failed to get keyword kinds");
+
+    let KeywordKinds { all_keywords, .. } = keywords;
+
+    let mut enum_variants: Vec<TokenStream> = Vec::new();
+    let mut from_kw_match_arms: Vec<TokenStream> = Vec::new();
+
+    // collect keywords
+    for kw in &all_keywords {
+        if kw.to_uppercase().contains("WHITESPACE") {
+            continue; // Skip whitespace as it is handled separately
+        }
 
-    let token_identifiers = token_identifiers(&proto_file.tokens);
-    let token_value_literals = token_value_literals(&proto_file.tokens);
+        let kind_ident = format_ident!("{}_KW", kw.to_case(Case::UpperSnake));
 
-    let syntax_kind_from_impl =
-        syntax_kind_from_impl(&node_identifiers, &token_identifiers, &token_value_literals);
+        enum_variants.push(quote! { #kind_ident });
+        from_kw_match_arms.push(quote! {
+            #kw => Some(SyntaxKind::#kind_ident)
+        });
+    }
 
-    let mut enum_variants = HashSet::new();
-    enum_variants.extend(&custom_node_identifiers);
-    enum_variants.extend(&node_identifiers);
-    enum_variants.extend(&token_identifiers);
-    let unique_enum_variants = enum_variants.into_iter().collect::<Vec<_>>();
+    // collect extra keywords
+    EXTRA.iter().for_each(|&name| {
+        let variant_name = format_ident!("{}", name);
+        enum_variants.push(quote! { #variant_name });
+    });
+
+    // collect whitespace variants
+    WHITESPACE.iter().for_each(|&name| {
+        let variant_name = format_ident!("{}", name);
+        enum_variants.push(quote! { #variant_name });
+    });
+
+    // collect punctuations
+    PUNCT.iter().for_each(|&(_ascii_name, variant)| {
+        let variant_name = format_ident!("{}", variant);
+        enum_variants.push(quote! { #variant_name });
+    });
+
+    // collect literals
+    LITERALS.iter().for_each(|&name| {
+        let variant_name = format_ident!("{}", name);
+        enum_variants.push(quote! { #variant_name });
+    });
 
     quote! {
-        /// An u32 enum of all valid syntax elements (nodes and tokens) of the postgres
-        /// sql dialect, and a few custom ones that are not parsed by pg_query.rs, such
-        /// as `Whitespace`.
         #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
-        #[repr(u32)]
+        #[repr(u16)]
         pub enum SyntaxKind {
-            #(#unique_enum_variants),*,
-        }
-
-        #syntax_kind_from_impl
-    }
-}
-
-fn custom_node_names() -> Vec<&'static str> {
-    vec![
-        "SourceFile",
-        "Comment",
-        "Whitespace",
-        "Newline",
-        "Tab",
-        "Stmt",
-        "Eof",
-    ]
-}
-
-fn custom_node_identifiers(custom_node_names: &[&str]) -> Vec<Ident> {
-    custom_node_names
-        .iter()
-        .map(|&node_name| format_ident!("{}", node_name))
-        .collect()
-}
-
-fn node_identifiers(nodes: &[Node]) -> Vec<Ident> {
-    nodes
-        .iter()
-        .map(|node| format_ident!("{}", &node.name))
-        .collect()
-}
-
-fn token_identifiers(tokens: &[Token]) -> Vec<Ident> {
-    tokens
-        .iter()
-        .map(|token| format_ident!("{}", &token.name))
-        .collect()
-}
-
-fn token_value_literals(tokens: &[Token]) -> Vec<Literal> {
-    tokens
-        .iter()
-        .map(|token| Literal::i32_unsuffixed(token.value))
-        .collect()
-}
-
-fn syntax_kind_from_impl(
-    node_identifiers: &[Ident],
-    token_identifiers: &[Ident],
-    token_value_literals: &[Literal],
-) -> proc_macro2::TokenStream {
-    quote! {
-        /// Converts a `pg_query` node to a `SyntaxKind`
-        impl From<&NodeEnum> for SyntaxKind {
-            fn from(node: &NodeEnum) -> SyntaxKind {
-                match node {
-                    #(NodeEnum::#node_identifiers(_) => SyntaxKind::#node_identifiers),*
-                }
-            }
-
-        }
-
-        impl From<Token> for SyntaxKind {
-            fn from(token: Token) -> SyntaxKind {
-                match i32::from(token) {
-                    #(#token_value_literals => SyntaxKind::#token_identifiers),*,
-                    _ => panic!("Unknown token: {:?}", token),
-                }
-            }
+            #(#enum_variants),*,
         }
 
-        impl From<&ScanToken> for SyntaxKind {
-            fn from(token: &ScanToken) -> SyntaxKind {
-                match token.token {
-                    #(#token_value_literals => SyntaxKind::#token_identifiers),*,
-                    _ => panic!("Unknown token: {:?}", token.token),
+        impl SyntaxKind {
+            pub(crate) fn from_keyword(ident: &str) -> Option<SyntaxKind> {
+                let lower_ident = ident.to_ascii_lowercase();
+                match lower_ident.as_str() {
+                    #(#from_kw_match_arms),*,
+                    _ => None
                 }
             }
         }
diff --git a/crates/pgt_query_ext/Cargo.toml b/crates/pgt_query_ext/Cargo.toml
index c6754b67..3e6b57c1 100644
--- a/crates/pgt_query_ext/Cargo.toml
+++ b/crates/pgt_query_ext/Cargo.toml
@@ -14,11 +14,9 @@ version              = "0.0.0"
 [dependencies]
 petgraph = "0.6.4"
 
-pg_query.workspace              = true
-pgt_diagnostics.workspace       = true
-pgt_lexer.workspace             = true
-pgt_query_ext_codegen.workspace = true
-pgt_text_size.workspace         = true
+pg_query.workspace        = true
+pgt_diagnostics.workspace = true
+pgt_text_size.workspace   = true
 
 [lib]
 doctest = false
diff --git a/crates/pgt_query_ext/src/codegen.rs b/crates/pgt_query_ext/src/codegen.rs
deleted file mode 100644
index 8278383b..00000000
--- a/crates/pgt_query_ext/src/codegen.rs
+++ /dev/null
@@ -1 +0,0 @@
-pgt_query_ext_codegen::codegen!();
diff --git a/crates/pgt_query_ext/src/lib.rs b/crates/pgt_query_ext/src/lib.rs
index c1f5fb49..a087ec60 100644
--- a/crates/pgt_query_ext/src/lib.rs
+++ b/crates/pgt_query_ext/src/lib.rs
@@ -9,16 +9,11 @@
 //! - `get_node_properties` to get the properties of a node
 //! - `get_nodes` to get all the nodes in the AST as a petgraph tree
 //! - `ChildrenIterator` to iterate over the children of a node
-mod codegen;
 pub mod diagnostics;
 
 pub use pg_query::protobuf;
 pub use pg_query::{Error, NodeEnum, Result};
 
-pub use codegen::{
-    ChildrenIterator, Node, TokenProperty, get_location, get_node_properties, get_nodes,
-};
-
 pub fn parse(sql: &str) -> Result<NodeEnum> {
     pg_query::parse(sql).map(|parsed| {
         parsed
diff --git a/crates/pgt_query_ext_codegen/src/get_location.rs b/crates/pgt_query_ext_codegen/src/get_location.rs
deleted file mode 100644
index fa6fa8b2..00000000
--- a/crates/pgt_query_ext_codegen/src/get_location.rs
+++ /dev/null
@@ -1,122 +0,0 @@
-use pgt_query_proto_parser::{FieldType, Node, ProtoFile};
-use proc_macro2::{Ident, TokenStream};
-use quote::{format_ident, quote};
-
-pub fn get_location_mod(proto_file: &ProtoFile) -> proc_macro2::TokenStream {
-    let manual_node_names = manual_node_names();
-
-    let node_identifiers = node_identifiers(&proto_file.nodes, &manual_node_names);
-    let location_idents = location_idents(&proto_file.nodes, &manual_node_names);
-
-    quote! {
-        /// Returns the location of a node
-        pub fn get_location(node: &NodeEnum) -> Option<usize> {
-            let loc = get_location_internal(node);
-            if loc.is_some() {
-                usize::try_from(loc.unwrap()).ok()
-            } else {
-                None
-            }
-        }
-
-        fn get_location_internal(node: &NodeEnum) -> Option<i32> {
-            let location = match node {
-                // for some nodes, the location of the node itself is after their children location.
-                // we implement the logic for those nodes manually.
-                // if you add one, make sure to add its name to `manual_node_names()`.
-                NodeEnum::BoolExpr(n) => {
-                    let a = n.args.iter().min_by(|a, b| {
-                        let loc_a = get_location_internal(&a.node.as_ref().unwrap());
-                        let loc_b = get_location_internal(&b.node.as_ref().unwrap());
-                        loc_a.cmp(&loc_b)
-                    });
-                    get_location_internal(&a.unwrap().node.as_ref().unwrap())
-                },
-                NodeEnum::AExpr(n) => get_location_internal(&n.lexpr.as_ref().unwrap().node.as_ref().unwrap()),
-                NodeEnum::WindowDef(n) => {
-                    if n.partition_clause.len() > 0 || n.order_clause.len() > 0 {
-                        // the location is not correct if its the definition clause, e.g. for
-                        // window w as (partition by a order by b)
-                        // the location is the start of the `partition` token
-                        None
-                    } else  {
-                        Some(n.location)
-                    }
-                },
-                NodeEnum::CollateClause(n) => get_location_internal(&n.arg.as_ref().unwrap().node.as_ref().unwrap()),
-                NodeEnum::TypeCast(n) => get_location_internal(&n.arg.as_ref().unwrap().node.as_ref().unwrap()),
-                NodeEnum::ColumnDef(n) => if n.colname.len() > 0 {
-                    Some(n.location)
-                } else {
-                    None
-                },
-                NodeEnum::NullTest(n) => if n.arg.is_some()  {
-                    get_location_internal(&n.arg.as_ref().unwrap().node.as_ref().unwrap())
-                } else {
-                    Some(n.location)
-                },
-                NodeEnum::PublicationObjSpec(n) => {
-                    match &n.pubtable {
-                        Some(pubtable) => match &pubtable.relation {
-                            Some(range_var) => Some(range_var.location),
-                            None => Some(n.location),
-                        },
-                        None => Some(n.location),
-                    }
-                },
-                NodeEnum::BooleanTest(n) => {
-                    if n.arg.is_some() {
-                        get_location_internal(&n.arg.as_ref().unwrap().node.as_ref().unwrap())
-                    } else {
-                        Some(n.location)
-                    }
-                },
-                #(NodeEnum::#node_identifiers(n) => #location_idents),*
-            };
-            if location.is_some() && location.unwrap() < 0 {
-                None
-            } else {
-               location
-            }
-        }
-    }
-}
-
-fn manual_node_names() -> Vec<&'static str> {
-    vec![
-        "BoolExpr",
-        "AExpr",
-        "WindowDef",
-        "CollateClause",
-        "TypeCast",
-        "ColumnDef",
-        "NullTest",
-        "PublicationObjSpec",
-    ]
-}
-
-fn location_idents(nodes: &[Node], exclude_nodes: &[&str]) -> Vec<TokenStream> {
-    nodes
-        .iter()
-        .filter(|n| !exclude_nodes.contains(&n.name.as_str()))
-        .map(|node| {
-            if node
-                .fields
-                .iter()
-                .any(|n| n.name == "location" && n.field_type == FieldType::Int32)
-            {
-                quote! { Some(n.location) }
-            } else {
-                quote! { None }
-            }
-        })
-        .collect()
-}
-
-fn node_identifiers(nodes: &[Node], exclude_nodes: &[&str]) -> Vec<Ident> {
-    nodes
-        .iter()
-        .filter(|n| !exclude_nodes.contains(&n.name.as_str()))
-        .map(|node| format_ident!("{}", &node.name))
-        .collect()
-}
diff --git a/crates/pgt_query_ext_codegen/src/get_node_properties.rs b/crates/pgt_query_ext_codegen/src/get_node_properties.rs
deleted file mode 100644
index 9581304b..00000000
--- a/crates/pgt_query_ext_codegen/src/get_node_properties.rs
+++ /dev/null
@@ -1,1006 +0,0 @@
-use pgt_query_proto_parser::{FieldType, Node, ProtoFile};
-use proc_macro2::{Ident, TokenStream};
-use quote::{format_ident, quote};
-
-pub fn get_node_properties_mod(proto_file: &ProtoFile) -> proc_macro2::TokenStream {
-    let node_identifiers = node_identifiers(&proto_file.nodes);
-    let node_handlers = node_handlers(&proto_file.nodes);
-
-    quote! {
-        #[derive(Debug, Clone, PartialEq)]
-        pub struct TokenProperty {
-            pub value: Option<String>,
-            pub kind: Option<SyntaxKind>,
-        }
-
-        impl TokenProperty {
-            pub fn new(value: Option<String>, kind: Option<SyntaxKind>) -> TokenProperty {
-                if value.is_none() && kind.is_none() {
-                    panic!("TokenProperty must have either value or kind");
-                }
-                TokenProperty { value, kind }
-            }
-        }
-
-        impl From<i32> for TokenProperty {
-            fn from(value: i32) -> TokenProperty {
-                TokenProperty {
-                    value: Some(value.to_string()),
-                    kind: None,
-                }
-            }
-        }
-
-        impl From<u32> for TokenProperty {
-            fn from(value: u32) -> TokenProperty {
-                TokenProperty {
-                    value: Some(value.to_string()),
-                    kind: None,
-                }
-            }
-        }
-
-
-        impl From<i64> for TokenProperty {
-            fn from(value: i64) -> TokenProperty {
-                TokenProperty {
-                    value: Some(value.to_string()),
-                    kind: None,
-                }
-            }
-        }
-
-        impl From<u64> for TokenProperty {
-            fn from(value: u64) -> TokenProperty {
-                TokenProperty {
-                    value: Some(value.to_string()),
-                    kind: None,
-                }
-            }
-        }
-
-        impl From<f64> for TokenProperty {
-            fn from(value: f64) -> TokenProperty {
-                TokenProperty {
-                    value: Some(value.to_string()),
-                    kind: None,
-                }
-            }
-        }
-
-        impl From<bool> for TokenProperty {
-            fn from(value: bool) -> TokenProperty {
-                TokenProperty {
-                    value: Some(value.to_string()),
-                    kind: None,
-                }
-            }
-        }
-
-        impl From<String> for TokenProperty {
-            fn from(value: String) -> TokenProperty {
-                assert!(value.len() > 0, "String property value has length 0");
-                TokenProperty {
-                    value: Some(value.to_lowercase()),
-                    kind: None,
-                }
-            }
-        }
-
-
-        impl From<&pg_query::protobuf::Integer> for TokenProperty {
-            fn from(node: &pg_query::protobuf::Integer) -> TokenProperty {
-                TokenProperty {
-                        value: Some(node.ival.to_string()),
-                        kind: Some(SyntaxKind::Iconst)
-                    }
-            }
-        }
-
-        impl From<&pg_query::protobuf::Boolean> for TokenProperty {
-            fn from(node: &pg_query::protobuf::Boolean) -> TokenProperty {
-                TokenProperty {
-                        value: Some(node.boolval.to_string()),
-                        kind: match node.boolval {
-                            true => Some(SyntaxKind::TrueP),
-                            false => Some(SyntaxKind::FalseP),
-                        }
-                    }
-            }
-        }
-
-        impl From<SyntaxKind> for TokenProperty {
-            fn from(kind: SyntaxKind) -> TokenProperty {
-                TokenProperty {
-                    value: None,
-                    kind: Some(kind),
-                }
-            }
-        }
-
-        impl From<Token> for TokenProperty {
-            fn from(token: Token) -> TokenProperty {
-                TokenProperty {
-                    value: None,
-                    kind: Some(SyntaxKind::from(token)),
-                }
-            }
-        }
-
-        pub fn get_node_properties(node: &NodeEnum, parent: Option<&NodeEnum>) -> Vec<TokenProperty> {
-            let mut tokens: Vec<TokenProperty> = Vec::new();
-
-            match node {
-                #(NodeEnum::#node_identifiers(n) => {#node_handlers}),*,
-            };
-
-            tokens
-        }
-
-    }
-}
-
-fn node_identifiers(nodes: &[Node]) -> Vec<Ident> {
-    nodes
-        .iter()
-        .map(|node| format_ident!("{}", &node.name))
-        .collect()
-}
-
-fn node_handlers(nodes: &[Node]) -> Vec<TokenStream> {
-    nodes
-        .iter()
-        .map(|node| {
-            let string_property_handlers = string_property_handlers(node);
-            let custom_handlers = custom_handlers(node);
-            quote! {
-                #custom_handlers
-                #(#string_property_handlers)*
-            }
-        })
-        .collect()
-}
-
-fn custom_handlers(node: &Node) -> TokenStream {
-    match node.name.as_str() {
-        "SelectStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Select));
-            if n.distinct_clause.len() > 0 {
-                tokens.push(TokenProperty::from(Token::Distinct));
-            }
-            if n.values_lists.len() > 0 {
-                tokens.push(TokenProperty::from(Token::Values));
-            }
-            if n.from_clause.len() > 0 {
-                tokens.push(TokenProperty::from(Token::From));
-            }
-            if n.where_clause.is_some() {
-                tokens.push(TokenProperty::from(Token::Where));
-            }
-            if n.group_clause.len() > 0 {
-                tokens.push(TokenProperty::from(Token::GroupP));
-                tokens.push(TokenProperty::from(Token::By));
-            }
-            match n.op() {
-                protobuf::SetOperation::Undefined => {},
-                protobuf::SetOperation::SetopNone => {},
-                protobuf::SetOperation::SetopUnion => tokens.push(TokenProperty::from(Token::Union)),
-                protobuf::SetOperation::SetopIntersect => tokens.push(TokenProperty::from(Token::Intersect)),
-                protobuf::SetOperation::SetopExcept => tokens.push(TokenProperty::from(Token::Except)),
-                _ => panic!("Unknown SelectStmt op {:#?}", n.op()),
-            }
-            if n.all {
-                tokens.push(TokenProperty::from(Token::All));
-            }
-        },
-        "BoolExpr" => quote! {
-            match n.boolop() {
-                protobuf::BoolExprType::AndExpr => tokens.push(TokenProperty::from(Token::And)),
-                protobuf::BoolExprType::OrExpr => tokens.push(TokenProperty::from(Token::Or)),
-                protobuf::BoolExprType::NotExpr => tokens.push(TokenProperty::from(Token::Not)),
-                _ => panic!("Unknown BoolExpr {:#?}", n.boolop()),
-            }
-        },
-        "JoinExpr" => quote! {
-            tokens.push(TokenProperty::from(Token::Join));
-            tokens.push(TokenProperty::from(Token::On));
-            match n.jointype() {
-                protobuf::JoinType::JoinInner => tokens.push(TokenProperty::from(Token::InnerP)),
-                protobuf::JoinType::JoinLeft => tokens.push(TokenProperty::from(Token::Left)),
-                protobuf::JoinType::JoinFull => tokens.push(TokenProperty::from(Token::Full)),
-                protobuf::JoinType::JoinRight => tokens.push(TokenProperty::from(Token::Right)),
-                _ => panic!("Unknown JoinExpr jointype {:#?}", n.jointype()),
-            }
-
-        },
-        "ResTarget" => quote! {
-            if n.name.len() > 0 {
-                tokens.push(TokenProperty::from(Token::As));
-            }
-        },
-        "Integer" => quote! {
-            tokens.push(TokenProperty::from(n));
-        },
-        "DefElem" => quote! {
-            match n.defname.as_str() {
-                "location" => {
-                    tokens.push(TokenProperty::from(Token::Default));
-                },
-                "connection_limit" => {
-                    tokens.push(TokenProperty::from(Token::Limit));
-                    tokens.push(TokenProperty::from(Token::Iconst));
-                },
-                "owner" => {
-                    tokens.push(TokenProperty::from(Token::Owner));
-                }
-                _ => {}
-            }
-            match n.defaction() {
-                protobuf::DefElemAction::DefelemUnspec => tokens.push(TokenProperty::from(Token::Ascii61)),
-                _ => panic!("Unknown DefElem {:#?}", n.defaction()),
-            }
-        },
-        "Alias" => quote! {
-            tokens.push(TokenProperty::from(Token::As));
-        },
-        "CollateClause" => quote! {
-            tokens.push(TokenProperty::from(Token::Collate));
-        },
-        "AExpr" => quote! {
-            match n.kind() {
-                protobuf::AExprKind::AexprOp => {}, // do nothing
-                protobuf::AExprKind::AexprOpAny => tokens.push(TokenProperty::from(Token::Any)),
-                protobuf::AExprKind::AexprIn => tokens.push(TokenProperty::from(Token::InP)),
-                _ => panic!("Unknown AExpr kind {:#?}", n.kind()),
-            }
-        },
-        "WindowDef" => quote! {
-            if n.partition_clause.len() > 0 || n.order_clause.len() > 0 {
-                tokens.push(TokenProperty::from(Token::Window));
-                tokens.push(TokenProperty::from(Token::As));
-            }
-            if n.partition_clause.len() > 0 {
-                tokens.push(TokenProperty::from(Token::Partition));
-                tokens.push(TokenProperty::from(Token::By));
-            }
-        },
-        "Boolean" => quote! {
-            tokens.push(TokenProperty::from(n));
-        },
-        "AStar" => quote! {
-            tokens.push(TokenProperty::from(Token::Ascii42));
-        },
-        "FuncCall" => quote! {
-            if n.funcname.len() == 1 && n.args.len() == 0 {
-                // check if count(*)
-                if let Some(node) = &n.funcname[0].node {
-                    if let NodeEnum::String(n) = node {
-                        if n.sval == "count" {
-                            tokens.push(TokenProperty::from(Token::Ascii42));
-                        }
-                    }
-                }
-            }
-            if n.agg_filter.is_some() {
-                tokens.push(TokenProperty::from(Token::Filter));
-                tokens.push(TokenProperty::from(Token::Where));
-            }
-            if n.over.is_some() {
-                tokens.push(TokenProperty::from(Token::Over));
-            }
-        },
-        "SqlvalueFunction" => quote! {
-            match n.op() {
-                protobuf::SqlValueFunctionOp::SvfopCurrentRole => tokens.push(TokenProperty::from(Token::CurrentRole)),
-                protobuf::SqlValueFunctionOp::SvfopCurrentUser => tokens.push(TokenProperty::from(Token::CurrentUser)),
-                _ => panic!("Unknown SqlvalueFunction {:#?}", n.op()),
-            }
-        },
-        "SortBy" => quote! {
-            tokens.push(TokenProperty::from(Token::Order));
-            tokens.push(TokenProperty::from(Token::By));
-            match n.sortby_dir() {
-                protobuf::SortByDir::SortbyAsc => tokens.push(TokenProperty::from(Token::Asc)),
-                protobuf::SortByDir::SortbyDesc => tokens.push(TokenProperty::from(Token::Desc)),
-                _ => {}
-            }
-        },
-        "AConst" => quote! {
-            if n.isnull {
-                tokens.push(TokenProperty::from(Token::NullP));
-            }
-        },
-        "AlterTableStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Alter));
-            tokens.push(TokenProperty::from(Token::Table));
-        },
-        "AlterTableCmd" => quote! {
-            match n.subtype() {
-                protobuf::AlterTableType::AtColumnDefault => {
-                    tokens.push(TokenProperty::from(Token::Alter));
-                    tokens.push(TokenProperty::from(Token::Column));
-                    tokens.push(TokenProperty::from(Token::Set));
-                    tokens.push(TokenProperty::from(Token::Default));
-                },
-                protobuf::AlterTableType::AtAddConstraint => tokens.push(TokenProperty::from(Token::AddP)),
-                protobuf::AlterTableType::AtAlterColumnType => {
-                    tokens.push(TokenProperty::from(Token::Alter));
-                    tokens.push(TokenProperty::from(Token::Column));
-                    tokens.push(TokenProperty::from(Token::TypeP));
-                },
-                protobuf::AlterTableType::AtDropColumn => {
-                    tokens.push(TokenProperty::from(Token::Drop));
-                    tokens.push(TokenProperty::from(Token::Column));
-                },
-                _ => panic!("Unknown AlterTableCmd {:#?}", n.subtype()),
-            }
-        },
-        "VariableSetStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Set));
-            match n.kind() {
-                protobuf::VariableSetKind::VarSetValue => tokens.push(TokenProperty::from(Token::To)),
-                _ => panic!("Unknown VariableSetStmt {:#?}", n.kind()),
-            }
-        },
-        "CreatePolicyStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Create));
-            tokens.push(TokenProperty::from(Token::Policy));
-            tokens.push(TokenProperty::from(Token::On));
-            if n.roles.len() > 0 {
-                tokens.push(TokenProperty::from(Token::To));
-            }
-            if n.qual.is_some() {
-                tokens.push(TokenProperty::from(Token::Using));
-            }
-            if n.with_check.is_some() {
-                tokens.push(TokenProperty::from(Token::With));
-                tokens.push(TokenProperty::from(Token::Check));
-            }
-        },
-        "CopyStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Copy));
-            tokens.push(TokenProperty::from(Token::From));
-        },
-        "RenameStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Alter));
-            tokens.push(TokenProperty::from(Token::Table));
-            tokens.push(TokenProperty::from(Token::Rename));
-            tokens.push(TokenProperty::from(Token::To));
-        },
-        "Constraint" => quote! {
-            match n.contype() {
-                protobuf::ConstrType::ConstrNotnull => {
-                    tokens.push(TokenProperty::from(Token::Not));
-                    tokens.push(TokenProperty::from(Token::NullP));
-                },
-                protobuf::ConstrType::ConstrDefault => tokens.push(TokenProperty::from(Token::Default)),
-                protobuf::ConstrType::ConstrCheck => tokens.push(TokenProperty::from(Token::Check)),
-                protobuf::ConstrType::ConstrPrimary => {
-                    tokens.push(TokenProperty::from(Token::Primary));
-                    tokens.push(TokenProperty::from(Token::Key));
-                },
-                protobuf::ConstrType::ConstrForeign => tokens.push(TokenProperty::from(Token::References)),
-                protobuf::ConstrType::ConstrUnique => tokens.push(TokenProperty::from(Token::Unique)),
-                _ => panic!("Unknown Constraint {:#?}", n.contype()),
-            };
-            if n.options.len() > 0 {
-                tokens.push(TokenProperty::from(Token::With));
-            }
-        },
-        "PartitionSpec" => quote! {
-            tokens.push(TokenProperty::from(Token::Partition));
-            tokens.push(TokenProperty::from(Token::By));
-        },
-        "InsertStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Insert));
-            tokens.push(TokenProperty::from(Token::Into));
-        },
-        "DeleteStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::DeleteP));
-            tokens.push(TokenProperty::from(Token::From));
-            if n.where_clause.is_some() {
-                tokens.push(TokenProperty::from(Token::Where));
-            }
-            if n.using_clause.len() > 0 {
-                tokens.push(TokenProperty::from(Token::Using));
-            }
-        },
-        "ViewStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Create));
-            tokens.push(TokenProperty::from(Token::View));
-            if n.query.is_some() {
-                tokens.push(TokenProperty::from(Token::As));
-                // check if SelectStmt with WithClause with recursive set to true
-                if let Some(NodeEnum::SelectStmt(select_stmt)) = n.query.as_ref().and_then(|query| query.node.as_ref()) {
-                    if select_stmt.with_clause.is_some() && select_stmt.with_clause.as_ref().unwrap().recursive {
-                        tokens.push(TokenProperty::from(Token::Recursive));
-                    }
-                }
-            }
-            if n.replace {
-                tokens.push(TokenProperty::from(Token::Or));
-                tokens.push(TokenProperty::from(Token::Replace));
-            }
-            if let Some(n) = &n.view {
-                match n.relpersistence.as_str() {
-                    // Temporary
-                    "t" => tokens.push(TokenProperty::from(Token::Temporary)),
-                    _ => {},
-                }
-            }
-            match n.with_check_option() {
-                protobuf::ViewCheckOption::LocalCheckOption => {
-                    tokens.push(TokenProperty::from(Token::With));
-                    tokens.push(TokenProperty::from(Token::Local));
-                    tokens.push(TokenProperty::from(Token::Check));
-                    tokens.push(TokenProperty::from(Token::Option));
-                },
-                protobuf::ViewCheckOption::CascadedCheckOption => {
-                    tokens.push(TokenProperty::from(Token::With));
-                    tokens.push(TokenProperty::from(Token::Cascaded));
-                    tokens.push(TokenProperty::from(Token::Check));
-                    tokens.push(TokenProperty::from(Token::Option));
-                },
-                _ => {}
-            }
-        },
-        "CreateStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Create));
-            tokens.push(TokenProperty::from(Token::Table));
-            if n.tablespacename.len() > 0 {
-                tokens.push(TokenProperty::from(Token::Tablespace));
-            }
-            if n.options.len() > 0 {
-                tokens.push(TokenProperty::from(Token::With));
-            }
-            if n.if_not_exists {
-                tokens.push(TokenProperty::from(Token::IfP));
-                tokens.push(TokenProperty::from(Token::Not));
-                tokens.push(TokenProperty::from(Token::Exists));
-            }
-            if n.partbound.is_some() {
-                tokens.push(TokenProperty::from(Token::Partition));
-                tokens.push(TokenProperty::from(Token::Of));
-                tokens.push(TokenProperty::from(Token::For));
-                tokens.push(TokenProperty::from(Token::Values));
-            }
-            if let Some(n) = &n.relation {
-                match n.relpersistence.as_str() {
-                    // Unlogged
-                    "u" => tokens.push(TokenProperty::from(Token::Unlogged)),
-                    // Temporary
-                    "t" => tokens.push(TokenProperty::from(Token::Temporary)),
-                    _ => {},
-                }
-                if n.inh {
-                    tokens.push(TokenProperty::from(Token::Inherits));
-                }
-            }
-        },
-        "TableLikeClause" => quote! {
-            tokens.push(TokenProperty::from(Token::Like));
-            // CREATE_TABLE_LIKE_ALL
-            if n.options == 0x7FFFFFFF {
-                tokens.push(TokenProperty::from(Token::Including));
-                tokens.push(TokenProperty::from(Token::All));
-            } else {
-                tokens.push(TokenProperty::from(Token::Excluding));
-                tokens.push(TokenProperty::from(Token::All));
-            }
-        },
-        "TransactionStmt" => quote! {
-            match n.kind() {
-                protobuf::TransactionStmtKind::TransStmtBegin => tokens.push(TokenProperty::from(Token::BeginP)),
-                protobuf::TransactionStmtKind::TransStmtCommit => tokens.push(TokenProperty::from(Token::Commit)),
-                _ => panic!("Unknown TransactionStmt {:#?}", n.kind())
-            }
-        },
-        "PartitionBoundSpec" => quote! {
-            tokens.push(TokenProperty::from(Token::From));
-            tokens.push(TokenProperty::from(Token::To));
-        },
-        "CaseExpr" => quote! {
-            tokens.push(TokenProperty::from(Token::Case));
-            tokens.push(TokenProperty::from(Token::EndP));
-            if n.defresult.is_some() {
-                tokens.push(TokenProperty::from(Token::Else));
-            }
-        },
-        "NullTest" => quote! {
-            match n.nulltesttype() {
-                protobuf::NullTestType::IsNull => tokens.push(TokenProperty::from(Token::Is)),
-                protobuf::NullTestType::IsNotNull => {
-                    tokens.push(TokenProperty::from(Token::Is));
-                    tokens.push(TokenProperty::from(Token::Not));
-                },
-                _ => panic!("Unknown NullTest {:#?}", n.nulltesttype()),
-            }
-            tokens.push(TokenProperty::from(Token::NullP));
-        },
-        "CreateFunctionStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Create));
-            if n.is_procedure {
-                tokens.push(TokenProperty::from(Token::Procedure));
-            } else {
-                tokens.push(TokenProperty::from(Token::Function));
-            }
-            if n.replace {
-                tokens.push(TokenProperty::from(Token::Or));
-                tokens.push(TokenProperty::from(Token::Replace));
-            }
-            if let Some(return_type) = &n.return_type {
-                tokens.push(TokenProperty::from(Token::Returns));
-                if return_type.setof {
-                    tokens.push(TokenProperty::from(Token::Setof));
-                }
-            }
-            for option in &n.options {
-                if let Some(NodeEnum::DefElem(node)) = &option.node {
-                    if node.defname == "strict" {
-                        if let Some(NodeEnum::Boolean(node)) =
-                            node.arg.as_ref().and_then(|arg| arg.node.as_ref())
-                        {
-                            if node.boolval {
-                                tokens.push(TokenProperty::from(Token::NullP));
-                                tokens.push(TokenProperty::from(Token::On));
-                                tokens.push(TokenProperty::from(Token::NullP));
-                                tokens.push(TokenProperty::from(Token::InputP));
-                            } else {
-                                tokens.push(TokenProperty::from(Token::On));
-                                tokens.push(TokenProperty::from(Token::NullP));
-                                tokens.push(TokenProperty::from(Token::InputP));
-                            }
-                        }
-                    }
-                }
-            }
-        },
-        "FunctionParameter" => quote! {
-            match n.mode() {
-                protobuf::FunctionParameterMode::FuncParamIn => tokens.push(TokenProperty::from(Token::InP)),
-                protobuf::FunctionParameterMode::FuncParamOut => tokens.push(TokenProperty::from(Token::OutP)),
-                protobuf::FunctionParameterMode::FuncParamInout => tokens.push(TokenProperty::from(Token::Inout)),
-                protobuf::FunctionParameterMode::FuncParamVariadic => tokens.push(TokenProperty::from(Token::Variadic)),
-                // protobuf::FunctionParameterMode::FuncParamTable => tokens.push(TokenProperty::from(Token::Table)),
-                protobuf::FunctionParameterMode::FuncParamDefault => {}, // do nothing
-                _ => panic!("Unknown FunctionParameter {:#?}", n.mode()),
-            };
-            if n.defexpr.is_some() {
-                tokens.push(TokenProperty::from(Token::Default));
-            }
-        },
-        "NamedArgExpr" => quote! {
-            // =>
-            tokens.push(TokenProperty::from(Token::EqualsGreater));
-        },
-        "CaseWhen" => quote! {
-            tokens.push(TokenProperty::from(Token::When));
-            tokens.push(TokenProperty::from(Token::Then));
-        },
-        "TypeCast" => quote! {
-            tokens.push(TokenProperty::from(Token::Typecast));
-        },
-        "CreateDomainStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Create));
-            tokens.push(TokenProperty::from(Token::DomainP));
-            if n.type_name.is_some() {
-                tokens.push(TokenProperty::from(Token::As));
-            }
-        },
-        "List" => quote! {
-            if parent.is_some() {
-                // if parent is `DefineStmt`, we need to check whether an ORDER BY needs to be added
-                if let NodeEnum::DefineStmt(define_stmt) = parent.unwrap() {
-                    // there *seems* to be an integer node in the last position of the DefineStmt args that
-                    // defines whether the list contains an order by statement
-                    let integer = define_stmt.args.last()
-                        .and_then(|node| node.node.as_ref())
-                        .and_then(|node| if let NodeEnum::Integer(n) = node { Some(n.ival) } else { None });
-                    if integer.is_none() {
-                        panic!("DefineStmt of type ObjectAggregate has no integer node in last position of args");
-                    }
-                    // if the integer is 1, then there is an order by statement
-                    // we add it to the `List` node because that seems to make most sense based off the grammar definition
-                    // ref: https://github.com/postgres/postgres/blob/REL_15_STABLE/src/backend/parser/gram.y#L8355
-                    // ```
-                    //  aggr_args:
-                    //    | '(' aggr_args_list ORDER BY aggr_args_list ')'
-                    // ```
-                    if integer.unwrap() == 1 {
-                        tokens.push(TokenProperty::from(Token::Order));
-                        tokens.push(TokenProperty::from(Token::By));
-                    }
-                }
-            }
-        },
-        "DefineStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Create));
-            if n.replace {
-                tokens.push(TokenProperty::from(Token::Or));
-                tokens.push(TokenProperty::from(Token::Replace));
-            }
-            match n.kind() {
-                protobuf::ObjectType::ObjectAggregate => {
-                    tokens.push(TokenProperty::from(Token::Aggregate));
-
-                    // n.args is always an array with two nodes
-                    assert_eq!(n.args.len(), 2, "DefineStmt of type ObjectAggregate does not have exactly 2 args");
-                    // the first is either a List or a Node { node: None }
-
-                    if let Some(node) = &n.args.first() {
-                        if node.node.is_none() {
-                            // if first element is a Node { node: None }, then it's "*"
-                            tokens.push(TokenProperty::from(Token::Ascii42));
-                        }                     }
-                        // if its a list, we handle it in the handler for `List`
-                },
-                protobuf::ObjectType::ObjectType => {
-                    tokens.push(TokenProperty::from(Token::TypeP));
-                },
-                _ => panic!("Unknown DefineStmt {:#?}", n.kind()),
-            }
-        },
-        "CreateSchemaStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Create));
-            tokens.push(TokenProperty::from(Token::Schema));
-            if n.if_not_exists {
-                tokens.push(TokenProperty::from(Token::IfP));
-                tokens.push(TokenProperty::from(Token::Not));
-                tokens.push(TokenProperty::from(Token::Exists));
-            }
-            if n.authrole.is_some() {
-                tokens.push(TokenProperty::from(Token::Authorization));
-            }
-        },
-        "CreateEnumStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Create));
-            tokens.push(TokenProperty::from(Token::TypeP));
-            tokens.push(TokenProperty::from(Token::As));
-            tokens.push(TokenProperty::from(Token::EnumP));
-        },
-        "CreateCastStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Create));
-            tokens.push(TokenProperty::from(Token::Cast));
-            tokens.push(TokenProperty::from(Token::As));
-            if n.inout {
-                tokens.push(TokenProperty::from(Token::With));
-                tokens.push(TokenProperty::from(Token::Inout));
-            } else if n.func.is_some() {
-                tokens.push(TokenProperty::from(Token::With));
-                tokens.push(TokenProperty::from(Token::Function));
-            } else {
-                tokens.push(TokenProperty::from(Token::Without));
-                tokens.push(TokenProperty::from(Token::Function));
-            }
-            match n.context() {
-                protobuf::CoercionContext::CoercionImplicit => {
-                    tokens.push(TokenProperty::from(Token::As));
-                    tokens.push(TokenProperty::from(Token::ImplicitP));
-                },
-                protobuf::CoercionContext::CoercionAssignment => {
-                    tokens.push(TokenProperty::from(Token::As));
-                    tokens.push(TokenProperty::from(Token::Assignment));
-                },
-                protobuf::CoercionContext::CoercionPlpgsql => {},
-                protobuf::CoercionContext::CoercionExplicit => {},
-                _ => panic!("Unknown CreateCastStmt {:#?}", n.context())
-            }
-        },
-        "CreateRangeStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Create));
-            tokens.push(TokenProperty::from(Token::TypeP));
-            tokens.push(TokenProperty::from(Token::As));
-            tokens.push(TokenProperty::from(Token::Range));
-        },
-        "IndexStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Create));
-            if n.unique {
-                tokens.push(TokenProperty::from(Token::Unique));
-            }
-            tokens.push(TokenProperty::from(Token::Index));
-            if n.concurrent {
-                tokens.push(TokenProperty::from(Token::Concurrently));
-            }
-            if n.if_not_exists {
-                tokens.push(TokenProperty::from(Token::IfP));
-                tokens.push(TokenProperty::from(Token::Not));
-                tokens.push(TokenProperty::from(Token::Exists));
-            }
-            tokens.push(TokenProperty::from(Token::On));
-            // access_method is btree by default
-            if n.access_method.len() > 0 {
-                tokens.push(TokenProperty::from(Token::Using));
-            }
-            if n.index_including_params.len() > 0 {
-                tokens.push(TokenProperty::from(Token::Include));
-            }
-            if n.options.len() > 0 {
-                tokens.push(TokenProperty::from(Token::With));
-            }
-            // table_space is an empty string by default
-            if n.table_space.len() > 0 {
-                tokens.push(TokenProperty::from(Token::Tablespace));
-            }
-        },
-        "IndexElem" => quote! {
-            if n.collation.len() > 0 {
-                tokens.push(TokenProperty::from(Token::Collate));
-            }
-            match n.nulls_ordering() {
-                protobuf::SortByNulls::SortbyNullsDefault => {},
-                protobuf::SortByNulls::SortbyNullsFirst => {
-                    tokens.push(TokenProperty::from(Token::NullsP));
-                    tokens.push(TokenProperty::from(Token::FirstP));
-                },
-                protobuf::SortByNulls::SortbyNullsLast => {
-                    tokens.push(TokenProperty::from(Token::NullsP));
-                    tokens.push(TokenProperty::from(Token::LastP));
-                },
-                _ => panic!("Unknown IndexElem {:#?}", n.nulls_ordering()),
-            }
-        },
-        "CreateTableSpaceStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Create));
-            tokens.push(TokenProperty::from(Token::Tablespace));
-            tokens.push(TokenProperty::from(Token::Location));
-            if n.owner.is_some() {
-                tokens.push(TokenProperty::from(Token::Owner));
-            }
-            if n.options.len() > 0 {
-                tokens.push(TokenProperty::from(Token::With));
-            }
-        },
-        "CreatePublicationStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Create));
-            tokens.push(TokenProperty::from(Token::Publication));
-            if n.for_all_tables {
-                tokens.push(TokenProperty::from(Token::For));
-                tokens.push(TokenProperty::from(Token::All));
-                tokens.push(TokenProperty::from(Token::Tables));
-            }
-            if let Some(n) = n.options.first() {
-                tokens.push(TokenProperty::from(Token::With));
-            }
-            if let Some(n) = n.pubobjects.first() {
-                tokens.push(TokenProperty::from(Token::For));
-                if let Some(NodeEnum::PublicationObjSpec(n)) = &n.node {
-                    match n.pubobjtype() {
-                        protobuf::PublicationObjSpecType::PublicationobjTable => {
-                            tokens.push(TokenProperty::from(Token::Table));
-                        },
-                        protobuf::PublicationObjSpecType::PublicationobjTablesInSchema => {
-                            tokens.push(TokenProperty::from(Token::Tables));
-                            tokens.push(TokenProperty::from(Token::InP));
-                            tokens.push(TokenProperty::from(Token::Schema));
-                        },
-                        _ => panic!("Unknown CreatePublicationStmt {:#?}", n.pubobjtype())
-                    }
-                }
-            }
-            if let Some(n) = n.pubobjects.last() {
-                if let Some(NodeEnum::PublicationObjSpec(n)) = &n.node {
-                    match n.pubobjtype() {
-                        protobuf::PublicationObjSpecType::PublicationobjTablesInSchema => {
-                            tokens.push(TokenProperty::from(Token::Tables));
-                            tokens.push(TokenProperty::from(Token::InP));
-                            tokens.push(TokenProperty::from(Token::Schema));
-                        },
-                        _ => {}
-                    }
-                }
-            }
-        },
-        "PublicationTable" => quote! {
-            if n.where_clause.is_some() {
-                tokens.push(TokenProperty::from(Token::Where));
-            }
-        },
-        "BooleanTest" => quote! {
-            match n.booltesttype() {
-                protobuf::BoolTestType::IsTrue => {
-                    tokens.push(TokenProperty::from(Token::Is));
-                    tokens.push(TokenProperty::from(Token::TrueP));
-                },
-                protobuf::BoolTestType::IsNotTrue => {
-                    tokens.push(TokenProperty::from(Token::Is));
-                    tokens.push(TokenProperty::from(Token::Not));
-                    tokens.push(TokenProperty::from(Token::TrueP));
-                },
-                protobuf::BoolTestType::IsFalse => {
-                    tokens.push(TokenProperty::from(Token::Is));
-                    tokens.push(TokenProperty::from(Token::FalseP));
-                },
-                protobuf::BoolTestType::IsNotFalse => {
-                    tokens.push(TokenProperty::from(Token::Is));
-                    tokens.push(TokenProperty::from(Token::Not));
-                    tokens.push(TokenProperty::from(Token::FalseP));
-                },
-                _ => panic!("Unknown BooleanTest {:#?}", n.booltesttype()),
-            }
-        },
-        "CompositeTypeStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Create));
-            tokens.push(TokenProperty::from(Token::TypeP));
-            tokens.push(TokenProperty::from(Token::As));
-        },
-        "CreatedbStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Create));
-            tokens.push(TokenProperty::from(Token::Database));
-        },
-        "CreateExtensionStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Create));
-            tokens.push(TokenProperty::from(Token::Extension));
-            if n.if_not_exists {
-                tokens.push(TokenProperty::from(Token::IfP));
-                tokens.push(TokenProperty::from(Token::Not));
-                tokens.push(TokenProperty::from(Token::Exists));
-            }
-        },
-        "CreateConversionStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Create));
-            if n.def {
-                tokens.push(TokenProperty::from(Token::Default));
-            }
-            tokens.push(TokenProperty::from(Token::ConversionP));
-            if n.for_encoding_name.len() > 0 {
-                tokens.push(TokenProperty::from(Token::For));
-            }
-            if n.to_encoding_name.len() > 0 {
-                tokens.push(TokenProperty::from(Token::To));
-            }
-            if n.func_name.len() == 1 {
-                tokens.push(TokenProperty::from(Token::From));
-            } else if n.func_name.len() > 1 {
-                panic!("Encountered multiple defined func_name elements in CreateConversionStmt");
-            }
-        },
-        "CreateTransformStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Create));
-            if n.replace {
-                tokens.push(TokenProperty::from(Token::Or));
-                tokens.push(TokenProperty::from(Token::Replace));
-            }
-            tokens.push(TokenProperty::from(Token::Transform));
-            if n.type_name.is_some() {
-                tokens.push(TokenProperty::from(Token::For));
-            }
-            tokens.push(TokenProperty::from(Token::Language));
-            if n.fromsql.is_some() {
-                tokens.push(TokenProperty::from(Token::From));
-                tokens.push(TokenProperty::from(Token::SqlP));
-                tokens.push(TokenProperty::from(Token::With));
-                tokens.push(TokenProperty::from(Token::Function));
-            }
-            if n.tosql.is_some() {
-                tokens.push(TokenProperty::from(Token::To));
-                tokens.push(TokenProperty::from(Token::SqlP));
-                tokens.push(TokenProperty::from(Token::With));
-                tokens.push(TokenProperty::from(Token::Function));
-            }
-        },
-        "TypeName" => quote! {
-            let names = n.names
-                .iter()
-                .filter_map(|n| if let Some(NodeEnum::String(s)) = &n.node { Some(s.sval.clone()) } else { None })
-                .collect::<Vec<_>>();
-
-            if names.len() == 2 && names[0] == "pg_catalog" {
-                match names[1].as_str() {
-                    "float8" => {
-                        tokens.push(TokenProperty::from(Token::DoubleP));
-                        tokens.push(TokenProperty::from(Token::Precision));
-                    },
-                    "interval" => {
-                        // Adapted from https://github.com/postgres/postgres/blob/REL_15_STABLE/src/backend/utils/adt/timestamp.c#L1103
-                        const MONTH: i32 = 1;
-                        const YEAR: i32 = 2;
-                        const DAY: i32 = 3;
-                        const HOUR: i32 = 10;
-                        const MINUTE: i32 = 11;
-                        const SECOND: i32 = 12;
-
-                        let fields = &n.typmods.first()
-                            .and_then(|node| node.node.as_ref())
-                            .and_then(|node| if let NodeEnum::AConst(n) = node { n.val.clone() } else { None })
-                            .and_then(|node| if let protobuf::a_const::Val::Ival(n) = node { Some(n.ival) } else { None });
-
-                        if let Some(fields) = fields {
-                            match fields.clone() {
-                                // YEAR TO MONTH
-                                i if i == 1 << YEAR | 1 << MONTH => {
-                                    tokens.push(TokenProperty::from(Token::To));
-                                    tokens.push(TokenProperty::from(Token::MonthP));
-                                },
-                                // DAY TO HOUR
-                                i if i == 1 << DAY | 1 << HOUR => {
-                                    tokens.push(TokenProperty::from(Token::To));
-                                    tokens.push(TokenProperty::from(Token::HourP));
-                                },
-                                // DAY TO MINUTE
-                                i if i == 1 << DAY | 1 << HOUR | 1 << MINUTE => {
-                                    tokens.push(TokenProperty::from(Token::To));
-                                    tokens.push(TokenProperty::from(Token::MinuteP));
-                                },
-                                // DAY TO SECOND
-                                i if i == 1 << DAY | 1 << HOUR | 1 << MINUTE | 1 << SECOND => {
-                                    tokens.push(TokenProperty::from(Token::To));
-                                    tokens.push(TokenProperty::from(Token::SecondP));
-                                },
-                                // HOUR TO MINUTE
-                                i if i == 1 << HOUR | 1 << MINUTE => {
-                                    tokens.push(TokenProperty::from(Token::To));
-                                    tokens.push(TokenProperty::from(Token::MinuteP));
-                                },
-                                // HOUR TO SECOND
-                                i if i == 1 << HOUR | 1 << MINUTE | 1 << SECOND => {
-                                    tokens.push(TokenProperty::from(Token::To));
-                                    tokens.push(TokenProperty::from(Token::SecondP));
-                                },
-                                // MINUTE TO SECOND
-                                i if i == 1 << MINUTE | 1 << SECOND => {
-                                    tokens.push(TokenProperty::from(Token::To));
-                                    tokens.push(TokenProperty::from(Token::SecondP));
-                                },
-                                _ => panic!("Unknown Interval fields {:#?}", fields),
-                            }
-                        }
-                    },
-                    "timestamptz" => {
-                        tokens.push(TokenProperty::from(Token::Timestamp));
-                        tokens.push(TokenProperty::from(Token::With));
-                        tokens.push(TokenProperty::from(Token::Time));
-                        tokens.push(TokenProperty::from(Token::Zone));
-                    }
-                    "timetz" => {
-                        tokens.push(TokenProperty::from(Token::Time));
-                        tokens.push(TokenProperty::from(Token::With));
-                        tokens.push(TokenProperty::from(Token::Time));
-                        tokens.push(TokenProperty::from(Token::Zone));
-                    }
-                    _ => {}
-                }
-            }
-        },
-        "TruncateStmt" => quote! {
-            tokens.push(TokenProperty::from(Token::Truncate));
-            tokens.push(TokenProperty::from(Token::Table));
-            if n.restart_seqs {
-                tokens.push(TokenProperty::from(Token::Restart));
-                tokens.push(TokenProperty::from(Token::IdentityP));
-            } else {
-                tokens.push(TokenProperty::from(Token::ContinueP));
-                tokens.push(TokenProperty::from(Token::IdentityP));
-            }
-            match n.behavior {
-                // DropRestrict
-                1 => tokens.push(TokenProperty::from(Token::Restrict)),
-                // DropCascade
-                2 => tokens.push(TokenProperty::from(Token::Cascade)),
-                _ => {}
-            }
-        },
-        _ => quote! {},
-    }
-}
-
-fn string_property_handlers(node: &Node) -> Vec<TokenStream> {
-    node.fields
-        .iter()
-        .filter_map(|field| {
-            if field.repeated {
-                return None;
-            }
-            let field_name = format_ident!("{}", field.name.as_str());
-            match field.field_type {
-                // just handle string values for now
-                FieldType::String => Some(quote! {
-                    // most string values are never None, but an empty string
-                    if n.#field_name.len() > 0 {
-                        tokens.push(TokenProperty::from(n.#field_name.to_owned()));
-                    }
-                }),
-                _ => None,
-            }
-        })
-        .collect()
-}
diff --git a/crates/pgt_query_ext_codegen/src/get_nodes.rs b/crates/pgt_query_ext_codegen/src/get_nodes.rs
deleted file mode 100644
index e0381331..00000000
--- a/crates/pgt_query_ext_codegen/src/get_nodes.rs
+++ /dev/null
@@ -1,141 +0,0 @@
-use pgt_query_proto_parser::{FieldType, Node, ProtoFile};
-use proc_macro2::{Ident, TokenStream};
-use quote::{format_ident, quote};
-
-pub fn get_nodes_mod(proto_file: &ProtoFile) -> proc_macro2::TokenStream {
-    let manual_node_names = manual_node_names();
-
-    let node_identifiers = node_identifiers(&proto_file.nodes, &manual_node_names);
-    let node_handlers = node_handlers(&proto_file.nodes, &manual_node_names);
-
-    quote! {
-        #[derive(Debug, Clone)]
-        pub struct Node {
-            pub inner: NodeEnum,
-            pub depth: usize,
-            pub properties: Vec<TokenProperty>,
-            pub location: Option<usize>,
-        }
-
-        /// Returns all children of the node, recursively
-        /// location is resolved manually
-        pub fn get_nodes(node: &NodeEnum) -> StableGraph<Node, ()> {
-            let mut g = StableGraph::<Node, ()>::new();
-
-            let root_node_idx = g.add_node(Node {
-                inner: node.to_owned(),
-                depth: 0,
-                properties: get_node_properties(node, None),
-                location: get_location(node),
-            });
-
-            // Parent node idx, Node, depth
-            let mut stack: VecDeque<(NodeIndex, NodeEnum, usize)> =
-                VecDeque::from(vec![(root_node_idx, node.to_owned(), 0)]);
-            while !stack.is_empty() {
-                let (parent_idx, node, depth) = stack.pop_front().unwrap();
-                let current_depth = depth + 1;
-                let mut handle_child = |c: NodeEnum| {
-                    if match &c {
-                        // all "simple nodes" are not handled individually but merged with their parent
-                        NodeEnum::String(n) => true,
-                        NodeEnum::Integer(n) => true,
-                        NodeEnum::Float(n) => true,
-                        NodeEnum::Boolean(n) => true,
-                        NodeEnum::BitString(n) => true,
-                        _ => false
-                    } {
-                        g[parent_idx].properties.extend(get_node_properties(&c, Some(&node)));
-                    } else {
-                        let node_idx = g.add_node(Node {
-                            depth: current_depth,
-                            properties: get_node_properties(&c, Some(&node)),
-                            location: get_location(&c),
-                            inner: c.to_owned(),
-                        });
-                        g.add_edge(parent_idx, node_idx, ());
-                        stack.push_back((node_idx, c.to_owned(), current_depth));
-                    }
-                };
-                match &node {
-                    // `AConst` is the only node with a `one of` property, so we handle it manually
-                    // if you need to handle other nodes manually, add them to the `manual_node_names` function below
-                    NodeEnum::AConst(n) => {
-                        if n.val.is_some() {
-                            handle_child(match n.val.to_owned().unwrap() {
-                                pg_query::protobuf::a_const::Val::Ival(v) => NodeEnum::Integer(v),
-                                pg_query::protobuf::a_const::Val::Fval(v) => NodeEnum::Float(v),
-                                pg_query::protobuf::a_const::Val::Boolval(v) => NodeEnum::Boolean(v),
-                                pg_query::protobuf::a_const::Val::Sval(v) => NodeEnum::String(v),
-                                pg_query::protobuf::a_const::Val::Bsval(v) => NodeEnum::BitString(v),
-                            });
-                        }
-                    }
-                    #(NodeEnum::#node_identifiers(n) => {#node_handlers}),*,
-                };
-            }
-            g
-        }
-    }
-}
-
-fn manual_node_names() -> Vec<&'static str> {
-    vec!["AConst"]
-}
-
-fn node_identifiers(nodes: &[Node], exclude_nodes: &[&str]) -> Vec<Ident> {
-    nodes
-        .iter()
-        .filter(|node| !exclude_nodes.contains(&node.name.as_str()))
-        .map(|node| format_ident!("{}", &node.name))
-        .collect()
-}
-
-fn node_handlers(nodes: &[Node], exclude_nodes: &[&str]) -> Vec<TokenStream> {
-    nodes
-        .iter()
-        .filter(|node| !exclude_nodes.contains(&node.name.as_str()))
-        .map(|node| {
-            let property_handlers = property_handlers(node);
-            quote! {
-                #(#property_handlers)*
-            }
-        })
-        .collect()
-}
-
-fn property_handlers(node: &Node) -> Vec<TokenStream> {
-    node.fields
-        .iter()
-        .filter_map(|field| {
-            let field_name = format_ident!("{}", field.name.as_str());
-            if field.field_type == FieldType::Node && field.repeated {
-                Some(quote! {
-                    n.#field_name
-                        .iter()
-                        .for_each(|x| if x.node.is_some() {
-                            handle_child(x.node.as_ref().unwrap().to_owned());
-                        });
-                })
-            } else if field.field_type == FieldType::Node && !field.is_one_of {
-                if field.node_name == Some("Node".to_owned()) {
-                    Some(quote! {
-                        if n.#field_name.is_some() {
-                            handle_child(n.#field_name.to_owned().unwrap().node.unwrap());
-                        }
-                    })
-                } else {
-                    let enum_variant_name =
-                        format_ident!("{}", field.enum_variant_name.as_ref().unwrap().as_str());
-                    Some(quote! {
-                        if n.#field_name.is_some() {
-                            handle_child(NodeEnum::#enum_variant_name(n.#field_name.to_owned().unwrap()));
-                        }
-                    })
-                }
-            } else {
-                None
-            }
-        })
-        .collect()
-}
diff --git a/crates/pgt_query_ext_codegen/src/lib.rs b/crates/pgt_query_ext_codegen/src/lib.rs
deleted file mode 100644
index c4f39c0e..00000000
--- a/crates/pgt_query_ext_codegen/src/lib.rs
+++ /dev/null
@@ -1,48 +0,0 @@
-mod get_location;
-mod get_node_properties;
-mod get_nodes;
-mod node_iterator;
-
-use get_location::get_location_mod;
-use get_node_properties::get_node_properties_mod;
-use get_nodes::get_nodes_mod;
-use node_iterator::node_iterator_mod;
-use pgt_query_proto_parser::ProtoParser;
-use quote::quote;
-use std::{env, path, path::Path};
-
-#[proc_macro]
-pub fn codegen(_input: proc_macro::TokenStream) -> proc_macro::TokenStream {
-    let parser = ProtoParser::new(&proto_file_path());
-    let proto_file = parser.parse();
-
-    let get_location = get_location_mod(&proto_file);
-    let get_node_properties = get_node_properties_mod(&proto_file);
-    let get_nodes = get_nodes_mod(&proto_file);
-    let iterator = node_iterator_mod(&proto_file);
-
-    quote! {
-        use pgt_lexer::SyntaxKind;
-        use std::collections::VecDeque;
-        use pg_query::{protobuf, protobuf::ScanToken, protobuf::Token, NodeEnum, NodeRef};
-        use std::cmp::{min, Ordering};
-        use std::fmt::{Display, Formatter};
-        use petgraph::stable_graph::{StableGraph};
-        use petgraph::graph::{NodeIndex};
-
-        #get_location
-        #get_node_properties
-        #get_nodes
-        #iterator
-    }
-    .into()
-}
-
-fn proto_file_path() -> path::PathBuf {
-    Path::new(env!("CARGO_MANIFEST_DIR"))
-        .ancestors()
-        .nth(2)
-        .unwrap()
-        .join("libpg_query/protobuf/pg_query.proto")
-        .to_path_buf()
-}
diff --git a/crates/pgt_query_ext_codegen/src/node_iterator.rs b/crates/pgt_query_ext_codegen/src/node_iterator.rs
deleted file mode 100644
index 526966df..00000000
--- a/crates/pgt_query_ext_codegen/src/node_iterator.rs
+++ /dev/null
@@ -1,123 +0,0 @@
-use pgt_query_proto_parser::{FieldType, Node, ProtoFile};
-use proc_macro2::{Ident, TokenStream};
-use quote::{format_ident, quote};
-
-pub fn node_iterator_mod(proto_file: &ProtoFile) -> proc_macro2::TokenStream {
-    let manual_node_names = manual_node_names();
-
-    let node_identifiers = node_identifiers(&proto_file.nodes, &manual_node_names);
-    let node_handlers = node_handlers(&proto_file.nodes, &manual_node_names);
-
-    quote! {
-        #[derive(Debug, Clone)]
-        pub struct ChildrenIterator {
-            stack: VecDeque<(NodeEnum, usize)>,
-            nodes: Vec<NodeEnum>,
-        }
-
-        impl ChildrenIterator {
-            pub fn new(root: NodeEnum) -> Self {
-                Self {
-                    stack: VecDeque::from(vec![(root, 0)]),
-                    nodes: Vec::new(),
-                }
-            }
-        }
-
-        impl Iterator for ChildrenIterator {
-            type Item = NodeEnum;
-
-            fn next(&mut self) -> Option<Self::Item> {
-                if self.stack.is_empty() {
-                    return None;
-                }
-
-                let (node, depth) = self.stack.pop_front().unwrap();
-
-                let current_depth = depth + 1;
-
-                match &node {
-                    // `AConst` is the only node with a `one of` property, so we handle it manually
-                    // if you need to handle other nodes manually, add them to the `manual_node_names` function below
-                    NodeEnum::AConst(n) => {
-                        // if n.val.is_some() {
-                        //     let new_node = match n.val.as_ref().unwrap() {
-                        //         pg_query::protobuf::a_const::Val::Ival(v) => Box::new(NodeEnum::Integer(v.clone())),
-                        //         pg_query::protobuf::a_const::Val::Fval(v) => Box::new(NodeEnum::Float(v.clone())),
-                        //         pg_query::protobuf::a_const::Val::Boolval(v) => Box::new(NodeEnum::Boolean(v.clone())),
-                        //         pg_query::protobuf::a_const::Val::Sval(v) => Box::new(NodeEnum::String(v.clone())),
-                        //         pg_query::protobuf::a_const::Val::Bsval(v) => Box::new(NodeEnum::BitString(v.clone())),
-                        //     };
-                        //     self.stack.push_back((&new_node, current_depth));
-                        //     self.boxed_nodes.push(new_node);
-                        // }
-                    }
-                    #(NodeEnum::#node_identifiers(n) => {#node_handlers}),*,
-                };
-
-                Some(node)
-            }
-        }
-    }
-}
-
-fn manual_node_names() -> Vec<&'static str> {
-    vec!["AConst"]
-}
-
-fn node_identifiers(nodes: &[Node], exclude_nodes: &[&str]) -> Vec<Ident> {
-    nodes
-        .iter()
-        .filter(|node| !exclude_nodes.contains(&node.name.as_str()))
-        .map(|node| format_ident!("{}", &node.name))
-        .collect()
-}
-
-fn node_handlers(nodes: &[Node], exclude_nodes: &[&str]) -> Vec<TokenStream> {
-    nodes
-        .iter()
-        .filter(|node| !exclude_nodes.contains(&node.name.as_str()))
-        .map(|node| {
-            let property_handlers = property_handlers(node);
-            quote! {
-                #(#property_handlers)*
-            }
-        })
-        .collect()
-}
-
-fn property_handlers(node: &Node) -> Vec<TokenStream> {
-    node.fields
-        .iter()
-        .filter_map(|field| {
-            let field_name = format_ident!("{}", field.name.as_str());
-            if field.field_type == FieldType::Node && field.repeated {
-                Some(quote! {
-                    n.#field_name
-                        .iter()
-                        .for_each(|x| if x.node.is_some() {
-                            self.stack.push_back((x.node.as_ref().unwrap().to_owned(), current_depth));
-                        });
-                })
-            } else if field.field_type == FieldType::Node && !field.is_one_of {
-                if field.node_name == Some("Node".to_owned()) {
-                    Some(quote! {
-                        if n.#field_name.is_some() {
-                            self.stack.push_back((n.#field_name.to_owned().unwrap().node.unwrap(), current_depth));
-                        }
-                    })
-                } else {
-                    let enum_variant_name =
-                        format_ident!("{}", field.enum_variant_name.as_ref().unwrap().as_str());
-                    Some(quote! {
-                        if n.#field_name.is_some() {
-                            self.stack.push_back((NodeEnum::#enum_variant_name(n.#field_name.to_owned().unwrap()), current_depth));
-                        }
-                    })
-                }
-            } else {
-                None
-            }
-        })
-        .collect()
-}
diff --git a/crates/pgt_statement_splitter/src/diagnostics.rs b/crates/pgt_statement_splitter/src/diagnostics.rs
index bcff6e80..d543d4e5 100644
--- a/crates/pgt_statement_splitter/src/diagnostics.rs
+++ b/crates/pgt_statement_splitter/src/diagnostics.rs
@@ -1,6 +1,9 @@
 use pgt_diagnostics::{Diagnostic, MessageAndDescription};
+use pgt_lexer::{LexDiagnostic, Lexed};
 use pgt_text_size::TextRange;
 
+use crate::splitter::SplitError;
+
 /// A specialized diagnostic for the statement splitter parser.
 ///
 /// Parser diagnostics are always **errors**.
@@ -23,3 +26,22 @@ impl SplitDiagnostic {
         }
     }
 }
+
+impl From<LexDiagnostic> for SplitDiagnostic {
+    fn from(lex_diagnostic: LexDiagnostic) -> Self {
+        Self {
+            span: Some(lex_diagnostic.span),
+            message: lex_diagnostic.message,
+        }
+    }
+}
+
+impl SplitDiagnostic {
+    pub fn from_split_error(split_error: SplitError, lexed: &Lexed) -> Self {
+        let range = lexed.range(split_error.token);
+        Self {
+            span: Some(range),
+            message: MessageAndDescription::from(split_error.msg),
+        }
+    }
+}
diff --git a/crates/pgt_statement_splitter/src/lib.rs b/crates/pgt_statement_splitter/src/lib.rs
index c53ae78c..de028336 100644
--- a/crates/pgt_statement_splitter/src/lib.rs
+++ b/crates/pgt_statement_splitter/src/lib.rs
@@ -2,19 +2,40 @@
 //!
 //! This crate provides a function to split a SQL source string into individual statements.
 pub mod diagnostics;
-mod parser;
+mod splitter;
 
-use parser::{Parser, ParserResult, source};
-use pgt_lexer::diagnostics::ScanError;
+use diagnostics::SplitDiagnostic;
+use pgt_lexer::Lexer;
+use pgt_text_size::TextRange;
+use splitter::{Splitter, source};
 
-pub fn split(sql: &str) -> Result<ParserResult, Vec<ScanError>> {
-    let tokens = pgt_lexer::lex(sql)?;
+pub struct SplitResult {
+    pub ranges: Vec<TextRange>,
+    pub errors: Vec<SplitDiagnostic>,
+}
+
+pub fn split(sql: &str) -> SplitResult {
+    let lexed = Lexer::new(sql).lex();
+
+    let mut splitter = Splitter::new(&lexed);
+
+    source(&mut splitter);
+
+    let split_result = splitter.finish();
 
-    let mut parser = Parser::new(tokens);
+    let mut errors: Vec<SplitDiagnostic> = lexed.errors().into_iter().map(Into::into).collect();
 
-    source(&mut parser);
+    errors.extend(
+        split_result
+            .errors
+            .into_iter()
+            .map(|err| SplitDiagnostic::from_split_error(err, &lexed)),
+    );
 
-    Ok(parser.finish())
+    SplitResult {
+        ranges: split_result.ranges,
+        errors,
+    }
 }
 
 #[cfg(test)]
@@ -28,13 +49,13 @@ mod tests {
 
     struct Tester {
         input: String,
-        parse: ParserResult,
+        result: SplitResult,
     }
 
     impl From<&str> for Tester {
         fn from(input: &str) -> Self {
             Tester {
-                parse: split(input).expect("Failed to split"),
+                result: split(input),
                 input: input.to_string(),
             }
         }
@@ -43,25 +64,25 @@ mod tests {
     impl Tester {
         fn expect_statements(&self, expected: Vec<&str>) -> &Self {
             assert_eq!(
-                self.parse.ranges.len(),
+                self.result.ranges.len(),
                 expected.len(),
                 "Expected {} statements for input {}, got {}: {:?}",
                 expected.len(),
                 self.input,
-                self.parse.ranges.len(),
-                self.parse
+                self.result.ranges.len(),
+                self.result
                     .ranges
                     .iter()
                     .map(|r| &self.input[*r])
                     .collect::<Vec<_>>()
             );
 
-            for (range, expected) in self.parse.ranges.iter().zip(expected.iter()) {
+            for (range, expected) in self.result.ranges.iter().zip(expected.iter()) {
                 assert_eq!(*expected, self.input[*range].to_string());
             }
 
             assert!(
-                self.parse.ranges.is_sorted_by_key(|r| r.start()),
+                self.result.ranges.is_sorted_by_key(|r| r.start()),
                 "Ranges are not sorted"
             );
 
@@ -70,15 +91,15 @@ mod tests {
 
         fn expect_errors(&self, expected: Vec<SplitDiagnostic>) -> &Self {
             assert_eq!(
-                self.parse.errors.len(),
+                self.result.errors.len(),
                 expected.len(),
                 "Expected {} errors, got {}: {:?}",
                 expected.len(),
-                self.parse.errors.len(),
-                self.parse.errors
+                self.result.errors.len(),
+                self.result.errors
             );
 
-            for (err, expected) in self.parse.errors.iter().zip(expected.iter()) {
+            for (err, expected) in self.result.errors.iter().zip(expected.iter()) {
                 assert_eq!(expected, err);
             }
 
@@ -93,13 +114,6 @@ mod tests {
         );
     }
 
-    #[test]
-    fn failing_lexer() {
-        let input = "select 1443ddwwd33djwdkjw13331333333333";
-        let res = split(input).unwrap_err();
-        assert!(!res.is_empty());
-    }
-
     #[test]
     #[timeout(1000)]
     fn basic() {
@@ -161,7 +175,7 @@ mod tests {
         Tester::from("\ninsert select 1\n\nselect 3")
             .expect_statements(vec!["insert select 1", "select 3"])
             .expect_errors(vec![SplitDiagnostic::new(
-                format!("Expected {:?}", SyntaxKind::Into),
+                format!("Expected {:?}", SyntaxKind::INTO_KW),
                 TextRange::new(8.into(), 14.into()),
             )]);
     }
diff --git a/crates/pgt_statement_splitter/src/parser.rs b/crates/pgt_statement_splitter/src/parser.rs
deleted file mode 100644
index 241d0c70..00000000
--- a/crates/pgt_statement_splitter/src/parser.rs
+++ /dev/null
@@ -1,237 +0,0 @@
-mod common;
-mod data;
-mod ddl;
-mod dml;
-
-pub use common::source;
-
-use pgt_lexer::{SyntaxKind, Token, WHITESPACE_TOKENS};
-use pgt_text_size::{TextRange, TextSize};
-
-use crate::diagnostics::SplitDiagnostic;
-
-/// Main parser that exposes the `cstree` api, and collects errors and statements
-/// It is modelled after a Pratt Parser. For a gentle introduction to Pratt Parsing, see https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
-pub struct Parser {
-    /// The statement ranges are defined by the indices of the start/end tokens
-    stmt_ranges: Vec<(usize, usize)>,
-
-    /// The syntax errors accumulated during parsing
-    errors: Vec<SplitDiagnostic>,
-
-    current_stmt_start: Option<usize>,
-
-    tokens: Vec<Token>,
-
-    eof_token: Token,
-
-    current_pos: usize,
-}
-
-#[derive(Debug)]
-pub struct ParserResult {
-    /// The ranges of the parsed statements
-    pub ranges: Vec<TextRange>,
-    /// The syntax errors accumulated during parsing
-    pub errors: Vec<SplitDiagnostic>,
-}
-
-impl Parser {
-    pub fn new(tokens: Vec<Token>) -> Self {
-        let eof_token = Token::eof(usize::from(
-            tokens
-                .last()
-                .map(|t| t.span.end())
-                .unwrap_or(TextSize::from(0)),
-        ));
-
-        // Place `current_pos` on the first relevant token
-        let mut current_pos = 0;
-        while is_irrelevant_token(tokens.get(current_pos).unwrap_or(&eof_token)) {
-            current_pos += 1;
-        }
-
-        Self {
-            stmt_ranges: Vec::new(),
-            eof_token,
-            errors: Vec::new(),
-            current_stmt_start: None,
-            tokens,
-            current_pos,
-        }
-    }
-
-    pub fn finish(self) -> ParserResult {
-        ParserResult {
-            ranges: self
-                .stmt_ranges
-                .iter()
-                .map(|(start_token_pos, end_token_pos)| {
-                    let from = self.tokens.get(*start_token_pos);
-                    let to = self.tokens.get(*end_token_pos).unwrap_or(&self.eof_token);
-
-                    TextRange::new(from.unwrap().span.start(), to.span.end())
-                })
-                .collect(),
-            errors: self.errors,
-        }
-    }
-
-    pub fn start_stmt(&mut self) {
-        assert!(
-            self.current_stmt_start.is_none(),
-            "cannot start statement within statement at {:?}",
-            self.tokens.get(self.current_stmt_start.unwrap())
-        );
-        self.current_stmt_start = Some(self.current_pos);
-    }
-
-    pub fn close_stmt(&mut self) {
-        assert!(
-            self.current_stmt_start.is_some(),
-            "Must start statement before closing it."
-        );
-
-        let start_token_pos = self.current_stmt_start.unwrap();
-
-        assert!(
-            self.current_pos > start_token_pos,
-            "Must close the statement on a token that's later than the start token."
-        );
-
-        let (end_token_pos, _) = self.find_last_relevant().unwrap();
-
-        self.stmt_ranges.push((start_token_pos, end_token_pos));
-
-        self.current_stmt_start = None;
-    }
-
-    fn current(&self) -> &Token {
-        match self.tokens.get(self.current_pos) {
-            Some(token) => token,
-            None => &self.eof_token,
-        }
-    }
-
-    /// Advances the parser to the next relevant token and returns it.
-    ///
-    /// NOTE: This will skip irrelevant tokens.
-    fn advance(&mut self) -> &Token {
-        // can't reuse any `find_next_relevant` logic because of Mr. Borrow Checker
-        let (pos, token) = self
-            .tokens
-            .iter()
-            .enumerate()
-            .skip(self.current_pos + 1)
-            .find(|(_, t)| is_relevant(t))
-            .unwrap_or((self.tokens.len(), &self.eof_token));
-
-        self.current_pos = pos;
-        token
-    }
-
-    fn look_ahead(&self) -> Option<&Token> {
-        self.tokens
-            .iter()
-            .skip(self.current_pos + 1)
-            .find(|t| is_relevant(t))
-    }
-
-    /// Returns `None` if there are no previous relevant tokens
-    fn look_back(&self) -> Option<&Token> {
-        self.find_last_relevant().map(|it| it.1)
-    }
-
-    /// Will advance if the `kind` matches the current token.
-    /// Otherwise, will add a diagnostic to the internal `errors`.
-    pub fn expect(&mut self, kind: SyntaxKind) {
-        if self.current().kind == kind {
-            self.advance();
-        } else {
-            self.errors.push(SplitDiagnostic::new(
-                format!("Expected {:#?}", kind),
-                self.current().span,
-            ));
-        }
-    }
-
-    fn find_last_relevant(&self) -> Option<(usize, &Token)> {
-        self.tokens
-            .iter()
-            .enumerate()
-            .take(self.current_pos)
-            .rfind(|(_, t)| is_relevant(t))
-    }
-}
-
-#[cfg(windows)]
-/// Returns true if the token is relevant for the parsing process
-///
-/// On windows, a newline is represented by `\r\n` which is two characters.
-fn is_irrelevant_token(t: &Token) -> bool {
-    WHITESPACE_TOKENS.contains(&t.kind)
-        // double new lines are relevant, single ones are not
-        && (t.kind != SyntaxKind::Newline || t.text == "\r\n" || t.text.chars().count() == 1)
-}
-
-#[cfg(not(windows))]
-/// Returns true if the token is relevant for the parsing process
-fn is_irrelevant_token(t: &Token) -> bool {
-    WHITESPACE_TOKENS.contains(&t.kind)
-        // double new lines are relevant, single ones are not
-        && (t.kind != SyntaxKind::Newline || t.text.chars().count() == 1)
-}
-
-fn is_relevant(t: &Token) -> bool {
-    !is_irrelevant_token(t)
-}
-
-#[cfg(test)]
-mod tests {
-    use pgt_lexer::SyntaxKind;
-
-    use crate::parser::Parser;
-
-    #[test]
-    fn advance_works_as_expected() {
-        let sql = r#"
-        create table users (
-            id serial primary key,
-            name text,
-            email text
-        );
-        "#;
-        let tokens = pgt_lexer::lex(sql).unwrap();
-        let total_num_tokens = tokens.len();
-
-        let mut parser = Parser::new(tokens);
-
-        let expected = vec![
-            (SyntaxKind::Create, 2),
-            (SyntaxKind::Table, 4),
-            (SyntaxKind::Ident, 6),
-            (SyntaxKind::Ascii40, 8),
-            (SyntaxKind::Ident, 11),
-            (SyntaxKind::Ident, 13),
-            (SyntaxKind::Primary, 15),
-            (SyntaxKind::Key, 17),
-            (SyntaxKind::Ascii44, 18),
-            (SyntaxKind::NameP, 21),
-            (SyntaxKind::TextP, 23),
-            (SyntaxKind::Ascii44, 24),
-            (SyntaxKind::Ident, 27),
-            (SyntaxKind::TextP, 29),
-            (SyntaxKind::Ascii41, 32),
-            (SyntaxKind::Ascii59, 33),
-        ];
-
-        for (kind, pos) in expected {
-            assert_eq!(parser.current().kind, kind);
-            assert_eq!(parser.current_pos, pos);
-            parser.advance();
-        }
-
-        assert_eq!(parser.current().kind, SyntaxKind::Eof);
-        assert_eq!(parser.current_pos, total_num_tokens);
-    }
-}
diff --git a/crates/pgt_statement_splitter/src/parser/common.rs b/crates/pgt_statement_splitter/src/parser/common.rs
deleted file mode 100644
index 4c4ab986..00000000
--- a/crates/pgt_statement_splitter/src/parser/common.rs
+++ /dev/null
@@ -1,307 +0,0 @@
-use pgt_lexer::{SyntaxKind, Token, TokenType, WHITESPACE_TOKENS};
-
-use super::{
-    Parser,
-    data::at_statement_start,
-    ddl::{alter, create},
-    dml::{cte, delete, insert, select, update},
-};
-
-pub fn source(p: &mut Parser) {
-    loop {
-        match p.current() {
-            Token {
-                kind: SyntaxKind::Eof,
-                ..
-            } => {
-                break;
-            }
-            Token {
-                // we might want to ignore TokenType::NoKeyword here too
-                // but this will lead to invalid statements to not being picked up
-                token_type: TokenType::Whitespace,
-                ..
-            } => {
-                p.advance();
-            }
-            Token {
-                kind: SyntaxKind::Ascii92,
-                ..
-            } => {
-                plpgsql_command(p);
-            }
-            _ => {
-                statement(p);
-            }
-        }
-    }
-}
-
-pub(crate) fn statement(p: &mut Parser) {
-    p.start_stmt();
-    match p.current().kind {
-        SyntaxKind::With => {
-            cte(p);
-        }
-        SyntaxKind::Select => {
-            select(p);
-        }
-        SyntaxKind::Insert => {
-            insert(p);
-        }
-        SyntaxKind::Update => {
-            update(p);
-        }
-        SyntaxKind::DeleteP => {
-            delete(p);
-        }
-        SyntaxKind::Create => {
-            create(p);
-        }
-        SyntaxKind::Alter => {
-            alter(p);
-        }
-        _ => {
-            unknown(p, &[]);
-        }
-    }
-    p.close_stmt();
-}
-
-pub(crate) fn parenthesis(p: &mut Parser) {
-    p.expect(SyntaxKind::Ascii40);
-
-    let mut depth = 1;
-
-    loop {
-        match p.current().kind {
-            SyntaxKind::Ascii40 => {
-                p.advance();
-                depth += 1;
-            }
-            SyntaxKind::Ascii41 | SyntaxKind::Eof => {
-                p.advance();
-                depth -= 1;
-                if depth == 0 {
-                    break;
-                }
-            }
-            _ => {
-                p.advance();
-            }
-        }
-    }
-}
-
-pub(crate) fn plpgsql_command(p: &mut Parser) {
-    p.expect(SyntaxKind::Ascii92);
-
-    loop {
-        match p.current().kind {
-            SyntaxKind::Newline => {
-                p.advance();
-                break;
-            }
-            _ => {
-                // advance the parser to the next token without ignoring irrelevant tokens
-                // we would skip a newline with `advance()`
-                p.current_pos += 1;
-            }
-        }
-    }
-}
-
-pub(crate) fn case(p: &mut Parser) {
-    p.expect(SyntaxKind::Case);
-
-    loop {
-        match p.current().kind {
-            SyntaxKind::EndP => {
-                p.advance();
-                break;
-            }
-            _ => {
-                p.advance();
-            }
-        }
-    }
-}
-
-pub(crate) fn unknown(p: &mut Parser, exclude: &[SyntaxKind]) {
-    loop {
-        match p.current() {
-            Token {
-                kind: SyntaxKind::Ascii59,
-                ..
-            } => {
-                p.advance();
-                break;
-            }
-            Token {
-                kind: SyntaxKind::Eof,
-                ..
-            } => {
-                break;
-            }
-            Token {
-                kind: SyntaxKind::Newline,
-                ..
-            } => {
-                if p.look_back().is_some_and(|t| t.kind == SyntaxKind::Ascii44) {
-                    p.advance();
-                } else {
-                    break;
-                }
-            }
-            Token {
-                kind: SyntaxKind::Case,
-                ..
-            } => {
-                case(p);
-            }
-            Token {
-                kind: SyntaxKind::Ascii92,
-                ..
-            } => {
-                // pgsql commands e.g.
-                //
-                // ```
-                // \if test
-                // ```
-                //
-                // we wait for "\" and check if the previous token is a newline
-
-                // newline is a whitespace, but we do not want to ignore it here
-                let irrelevant = WHITESPACE_TOKENS
-                    .iter()
-                    .filter(|t| **t != SyntaxKind::Newline)
-                    .collect::<Vec<_>>();
-
-                // go back from the current position without ignoring irrelevant tokens
-                if p.tokens
-                    .iter()
-                    .take(p.current_pos)
-                    .rev()
-                    .find(|t| !irrelevant.contains(&&t.kind))
-                    .is_some_and(|t| t.kind == SyntaxKind::Newline)
-                {
-                    break;
-                }
-                p.advance();
-            }
-            Token {
-                kind: SyntaxKind::Ascii40,
-                ..
-            } => {
-                parenthesis(p);
-            }
-            t => match at_statement_start(t.kind, exclude) {
-                Some(SyntaxKind::Select) => {
-                    let prev = p.look_back().map(|t| t.kind);
-                    if [
-                        // for policies, with for select
-                        SyntaxKind::For,
-                        // for create view / table as
-                        SyntaxKind::As,
-                        // for create rule
-                        SyntaxKind::On,
-                        // for create rule
-                        SyntaxKind::Also,
-                        // for create rule
-                        SyntaxKind::Instead,
-                        // for UNION
-                        SyntaxKind::Union,
-                        // for UNION ALL
-                        SyntaxKind::All,
-                        // for UNION ... EXCEPT
-                        SyntaxKind::Except,
-                        // for grant
-                        SyntaxKind::Grant,
-                        SyntaxKind::Ascii44,
-                    ]
-                    .iter()
-                    .all(|x| Some(x) != prev.as_ref())
-                    {
-                        break;
-                    }
-
-                    p.advance();
-                }
-                Some(SyntaxKind::Insert) | Some(SyntaxKind::Update) | Some(SyntaxKind::DeleteP) => {
-                    let prev = p.look_back().map(|t| t.kind);
-                    if [
-                        // for create trigger
-                        SyntaxKind::Before,
-                        SyntaxKind::After,
-                        // for policies, e.g. for insert
-                        SyntaxKind::For,
-                        // e.g. on insert or delete
-                        SyntaxKind::Or,
-                        // e.g. INSTEAD OF INSERT
-                        SyntaxKind::Of,
-                        // for create rule
-                        SyntaxKind::On,
-                        // for create rule
-                        SyntaxKind::Also,
-                        // for create rule
-                        SyntaxKind::Instead,
-                        // for grant
-                        SyntaxKind::Grant,
-                        SyntaxKind::Ascii44,
-                        // Do update in INSERT stmt
-                        SyntaxKind::Do,
-                    ]
-                    .iter()
-                    .all(|x| Some(x) != prev.as_ref())
-                    {
-                        break;
-                    }
-                    p.advance();
-                }
-                Some(SyntaxKind::With) => {
-                    let next = p.look_ahead().map(|t| t.kind);
-                    if [
-                        // WITH ORDINALITY should not start a new statement
-                        SyntaxKind::Ordinality,
-                        // WITH CHECK should not start a new statement
-                        SyntaxKind::Check,
-                        // TIMESTAMP WITH TIME ZONE should not start a new statement
-                        SyntaxKind::Time,
-                        SyntaxKind::Grant,
-                        SyntaxKind::Admin,
-                        SyntaxKind::Inherit,
-                        SyntaxKind::Set,
-                    ]
-                    .iter()
-                    .all(|x| Some(x) != next.as_ref())
-                    {
-                        break;
-                    }
-                    p.advance();
-                }
-
-                Some(SyntaxKind::Create) => {
-                    let prev = p.look_back().map(|t| t.kind);
-                    if [
-                        // for grant
-                        SyntaxKind::Grant,
-                        SyntaxKind::Ascii44,
-                    ]
-                    .iter()
-                    .all(|x| Some(x) != prev.as_ref())
-                    {
-                        break;
-                    }
-
-                    p.advance();
-                }
-                Some(_) => {
-                    break;
-                }
-                None => {
-                    p.advance();
-                }
-            },
-        }
-    }
-}
diff --git a/crates/pgt_statement_splitter/src/parser/ddl.rs b/crates/pgt_statement_splitter/src/parser/ddl.rs
deleted file mode 100644
index d9f233c2..00000000
--- a/crates/pgt_statement_splitter/src/parser/ddl.rs
+++ /dev/null
@@ -1,15 +0,0 @@
-use pgt_lexer::SyntaxKind;
-
-use super::{Parser, common::unknown};
-
-pub(crate) fn create(p: &mut Parser) {
-    p.expect(SyntaxKind::Create);
-
-    unknown(p, &[SyntaxKind::With]);
-}
-
-pub(crate) fn alter(p: &mut Parser) {
-    p.expect(SyntaxKind::Alter);
-
-    unknown(p, &[SyntaxKind::Alter]);
-}
diff --git a/crates/pgt_statement_splitter/src/parser/dml.rs b/crates/pgt_statement_splitter/src/parser/dml.rs
deleted file mode 100644
index 015c50b6..00000000
--- a/crates/pgt_statement_splitter/src/parser/dml.rs
+++ /dev/null
@@ -1,59 +0,0 @@
-use pgt_lexer::SyntaxKind;
-
-use super::{
-    Parser,
-    common::{parenthesis, unknown},
-};
-
-pub(crate) fn cte(p: &mut Parser) {
-    p.expect(SyntaxKind::With);
-
-    loop {
-        p.expect(SyntaxKind::Ident);
-        p.expect(SyntaxKind::As);
-        parenthesis(p);
-
-        if p.current().kind == SyntaxKind::Ascii44 {
-            p.advance();
-        } else {
-            break;
-        }
-    }
-
-    unknown(
-        p,
-        &[
-            SyntaxKind::Select,
-            SyntaxKind::Insert,
-            SyntaxKind::Update,
-            SyntaxKind::DeleteP,
-            SyntaxKind::Merge,
-        ],
-    );
-}
-
-pub(crate) fn select(p: &mut Parser) {
-    p.expect(SyntaxKind::Select);
-
-    unknown(p, &[]);
-}
-
-pub(crate) fn insert(p: &mut Parser) {
-    p.expect(SyntaxKind::Insert);
-    p.expect(SyntaxKind::Into);
-
-    unknown(p, &[SyntaxKind::Select]);
-}
-
-pub(crate) fn update(p: &mut Parser) {
-    p.expect(SyntaxKind::Update);
-
-    unknown(p, &[]);
-}
-
-pub(crate) fn delete(p: &mut Parser) {
-    p.expect(SyntaxKind::DeleteP);
-    p.expect(SyntaxKind::From);
-
-    unknown(p, &[]);
-}
diff --git a/crates/pgt_statement_splitter/src/splitter.rs b/crates/pgt_statement_splitter/src/splitter.rs
new file mode 100644
index 00000000..cfb4716d
--- /dev/null
+++ b/crates/pgt_statement_splitter/src/splitter.rs
@@ -0,0 +1,168 @@
+mod common;
+mod data;
+mod ddl;
+mod dml;
+
+pub use common::source;
+
+use pgt_lexer::{Lexed, SyntaxKind};
+use pgt_text_size::TextRange;
+
+pub struct SplitResult {
+    pub ranges: Vec<TextRange>,
+    pub errors: Vec<SplitError>,
+}
+
+pub static TRIVIA_TOKENS: &[SyntaxKind] = &[
+    SyntaxKind::SPACE,
+    SyntaxKind::TAB,
+    SyntaxKind::VERTICAL_TAB,
+    SyntaxKind::FORM_FEED,
+    SyntaxKind::COMMENT,
+    // LINE_ENDING is relevant
+];
+
+/// Internal error type used during splitting
+#[derive(Debug, Clone)]
+pub struct SplitError {
+    pub msg: String,
+    pub token: usize,
+}
+
+pub struct Splitter<'a> {
+    lexed: &'a Lexed<'a>,
+    current_pos: usize,
+    stmt_ranges: Vec<(usize, usize)>,
+    errors: Vec<SplitError>,
+    current_stmt_start: Option<usize>,
+}
+
+impl<'a> Splitter<'a> {
+    pub fn new(lexed: &'a Lexed<'a>) -> Self {
+        Self {
+            lexed,
+            current_pos: 0,
+            stmt_ranges: Vec::new(),
+            errors: Vec::new(),
+            current_stmt_start: None,
+        }
+    }
+
+    pub fn finish(self) -> SplitResult {
+        let ranges = self
+            .stmt_ranges
+            .iter()
+            .map(|(start_token_pos, end_token_pos)| {
+                let from = self.lexed.range(*start_token_pos).start();
+                let to = self.lexed.range(*end_token_pos).end();
+                TextRange::new(from, to)
+            })
+            .collect();
+
+        SplitResult {
+            ranges,
+            errors: self.errors,
+        }
+    }
+
+    pub fn start_stmt(&mut self) {
+        assert!(
+            self.current_stmt_start.is_none(),
+            "cannot start statement within statement",
+        );
+        self.current_stmt_start = Some(self.current_pos);
+    }
+
+    pub fn close_stmt(&mut self) {
+        assert!(
+            self.current_stmt_start.is_some(),
+            "Must start statement before closing it."
+        );
+
+        let start_token_pos = self.current_stmt_start.unwrap();
+
+        assert!(
+            self.current_pos > start_token_pos,
+            "Must close the statement on a token that's later than the start token: {} > {}",
+            self.current_pos,
+            start_token_pos
+        );
+
+        let end_token_pos = (0..self.current_pos)
+            .rev()
+            .find(|&idx| !self.is_trivia(idx))
+            .unwrap();
+
+        self.stmt_ranges.push((start_token_pos, end_token_pos));
+
+        self.current_stmt_start = None;
+    }
+
+    fn current(&self) -> SyntaxKind {
+        self.lexed.kind(self.current_pos)
+    }
+
+    fn kind(&self, idx: usize) -> SyntaxKind {
+        self.lexed.kind(idx)
+    }
+
+    /// Advances the parser to the next relevant token and returns it.
+    ///
+    /// NOTE: This will skip trivia tokens.
+    fn advance(&mut self) -> SyntaxKind {
+        let pos = (self.current_pos + 1..self.lexed.len())
+            .find(|&idx| !self.is_trivia(idx))
+            .expect("lexed should have non-trivia eof token");
+
+        self.current_pos = pos;
+        self.lexed.kind(pos)
+    }
+
+    fn look_ahead(&self, ignore_trivia: bool) -> SyntaxKind {
+        let pos = if ignore_trivia {
+            (self.current_pos + 1..self.lexed.len())
+                .find(|&idx| !self.is_trivia(idx))
+                .expect("lexed should have non-trivia eof token")
+        } else {
+            (self.current_pos + 1..self.lexed.len())
+                .next()
+                .expect("lexed should have a eof token")
+        };
+        self.lexed.kind(pos)
+    }
+
+    /// Returns `None` if there are no previous relevant tokens
+    fn look_back(&self, ignore_trivia: bool) -> Option<SyntaxKind> {
+        if ignore_trivia {
+            (0..self.current_pos)
+                .rev()
+                .find(|&idx| !self.is_trivia(idx))
+                .map(|idx| self.lexed.kind(idx))
+        } else {
+            (0..self.current_pos)
+                .next_back()
+                .map(|idx| self.lexed.kind(idx))
+        }
+    }
+
+    fn is_trivia(&self, idx: usize) -> bool {
+        match self.lexed.kind(idx) {
+            k if TRIVIA_TOKENS.contains(&k) => true,
+            SyntaxKind::LINE_ENDING => self.lexed.line_ending_count(idx) < 2,
+            _ => false,
+        }
+    }
+
+    /// Will advance if the `kind` matches the current token.
+    /// Otherwise, will add a diagnostic to the internal `errors`.
+    fn expect(&mut self, kind: SyntaxKind) {
+        if self.current() == kind {
+            self.advance();
+        } else {
+            self.errors.push(SplitError {
+                msg: format!("Expected {:#?}", kind),
+                token: self.current_pos,
+            });
+        }
+    }
+}
diff --git a/crates/pgt_statement_splitter/src/splitter/common.rs b/crates/pgt_statement_splitter/src/splitter/common.rs
new file mode 100644
index 00000000..4f2cd069
--- /dev/null
+++ b/crates/pgt_statement_splitter/src/splitter/common.rs
@@ -0,0 +1,275 @@
+use super::TRIVIA_TOKENS;
+use pgt_lexer::SyntaxKind;
+
+use super::{
+    Splitter,
+    data::at_statement_start,
+    ddl::{alter, create},
+    dml::{cte, delete, insert, select, update},
+};
+
+pub fn source(p: &mut Splitter) {
+    loop {
+        match p.current() {
+            SyntaxKind::EOF => {
+                break;
+            }
+            kind if TRIVIA_TOKENS.contains(&kind) || kind == SyntaxKind::LINE_ENDING => {
+                p.advance();
+            }
+            SyntaxKind::BACKSLASH => {
+                plpgsql_command(p);
+            }
+            _ => {
+                statement(p);
+            }
+        }
+    }
+}
+
+pub(crate) fn statement(p: &mut Splitter) {
+    p.start_stmt();
+    match p.current() {
+        SyntaxKind::WITH_KW => {
+            cte(p);
+        }
+        SyntaxKind::SELECT_KW => {
+            select(p);
+        }
+        SyntaxKind::INSERT_KW => {
+            insert(p);
+        }
+        SyntaxKind::UPDATE_KW => {
+            update(p);
+        }
+        SyntaxKind::DELETE_KW => {
+            delete(p);
+        }
+        SyntaxKind::CREATE_KW => {
+            create(p);
+        }
+        SyntaxKind::ALTER_KW => {
+            alter(p);
+        }
+        _ => {
+            unknown(p, &[]);
+        }
+    }
+    p.close_stmt();
+}
+
+pub(crate) fn parenthesis(p: &mut Splitter) {
+    p.expect(SyntaxKind::L_PAREN);
+
+    let mut depth = 1;
+
+    loop {
+        match p.current() {
+            SyntaxKind::L_PAREN => {
+                p.advance();
+                depth += 1;
+            }
+            SyntaxKind::R_PAREN | SyntaxKind::EOF => {
+                p.advance();
+                depth -= 1;
+                if depth == 0 {
+                    break;
+                }
+            }
+            _ => {
+                p.advance();
+            }
+        }
+    }
+}
+
+pub(crate) fn plpgsql_command(p: &mut Splitter) {
+    p.expect(SyntaxKind::BACKSLASH);
+
+    loop {
+        match p.current() {
+            SyntaxKind::LINE_ENDING => {
+                p.advance();
+                break;
+            }
+            _ => {
+                // advance the splitter to the next token without ignoring irrelevant tokens
+                // we would skip a newline with `advance()`
+                p.current_pos += 1;
+            }
+        }
+    }
+}
+
+pub(crate) fn case(p: &mut Splitter) {
+    p.expect(SyntaxKind::CASE_KW);
+
+    loop {
+        match p.current() {
+            SyntaxKind::END_KW => {
+                p.advance();
+                break;
+            }
+            _ => {
+                p.advance();
+            }
+        }
+    }
+}
+
+pub(crate) fn unknown(p: &mut Splitter, exclude: &[SyntaxKind]) {
+    loop {
+        match p.current() {
+            SyntaxKind::SEMICOLON => {
+                p.advance();
+                break;
+            }
+            SyntaxKind::EOF => {
+                break;
+            }
+            SyntaxKind::LINE_ENDING => {
+                if p.look_back(true).is_some_and(|t| t == SyntaxKind::COMMA) {
+                    p.advance();
+                } else {
+                    break;
+                }
+            }
+            SyntaxKind::CASE_KW => {
+                case(p);
+            }
+            SyntaxKind::BACKSLASH => {
+                // pgsql commands
+                // we want to check if the previous token non-trivia token is a LINE_ENDING
+                // we cannot use the is_trivia() method because that would exclude LINE_ENDINGs
+                // with count > 1
+                if (0..p.current_pos)
+                    .rev()
+                    .find_map(|idx| {
+                        let kind = p.kind(idx);
+                        if !TRIVIA_TOKENS.contains(&kind) {
+                            Some(kind)
+                        } else {
+                            None
+                        }
+                    })
+                    .is_some_and(|t| t == SyntaxKind::LINE_ENDING)
+                {
+                    break;
+                }
+                p.advance();
+            }
+            SyntaxKind::L_PAREN => {
+                parenthesis(p);
+            }
+            t => match at_statement_start(t, exclude) {
+                Some(SyntaxKind::SELECT_KW) => {
+                    let prev = p.look_back(true);
+                    if [
+                        // for policies, with for select
+                        SyntaxKind::FOR_KW,
+                        // for create view / table as
+                        SyntaxKind::AS_KW,
+                        // for create rule
+                        SyntaxKind::ON_KW,
+                        // for create rule
+                        SyntaxKind::ALSO_KW,
+                        // for create rule
+                        SyntaxKind::INSTEAD_KW,
+                        // for UNION
+                        SyntaxKind::UNION_KW,
+                        // for UNION ALL
+                        SyntaxKind::ALL_KW,
+                        // for UNION ... EXCEPT
+                        SyntaxKind::EXCEPT_KW,
+                        // for grant
+                        SyntaxKind::GRANT_KW,
+                        SyntaxKind::COMMA,
+                    ]
+                    .iter()
+                    .all(|x| Some(x) != prev.as_ref())
+                    {
+                        break;
+                    }
+
+                    p.advance();
+                }
+                Some(SyntaxKind::INSERT_KW)
+                | Some(SyntaxKind::UPDATE_KW)
+                | Some(SyntaxKind::DELETE_KW) => {
+                    let prev = p.look_back(true);
+                    if [
+                        // for create trigger
+                        SyntaxKind::BEFORE_KW,
+                        SyntaxKind::AFTER_KW,
+                        // for policies, e.g. for insert
+                        SyntaxKind::FOR_KW,
+                        // e.g. on insert or delete
+                        SyntaxKind::OR_KW,
+                        // e.g. INSTEAD OF INSERT
+                        SyntaxKind::OF_KW,
+                        // for create rule
+                        SyntaxKind::ON_KW,
+                        // for create rule
+                        SyntaxKind::ALSO_KW,
+                        // for create rule
+                        SyntaxKind::INSTEAD_KW,
+                        // for grant
+                        SyntaxKind::GRANT_KW,
+                        SyntaxKind::COMMA,
+                        // Do update in INSERT stmt
+                        SyntaxKind::DO_KW,
+                    ]
+                    .iter()
+                    .all(|x| Some(x) != prev.as_ref())
+                    {
+                        break;
+                    }
+                    p.advance();
+                }
+                Some(SyntaxKind::WITH_KW) => {
+                    let next = p.look_ahead(true);
+                    if [
+                        // WITH ORDINALITY should not start a new statement
+                        SyntaxKind::ORDINALITY_KW,
+                        // WITH CHECK should not start a new statement
+                        SyntaxKind::CHECK_KW,
+                        // TIMESTAMP WITH TIME ZONE should not start a new statement
+                        SyntaxKind::TIME_KW,
+                        SyntaxKind::GRANT_KW,
+                        SyntaxKind::ADMIN_KW,
+                        SyntaxKind::INHERIT_KW,
+                        SyntaxKind::SET_KW,
+                    ]
+                    .iter()
+                    .all(|x| x != &next)
+                    {
+                        break;
+                    }
+                    p.advance();
+                }
+
+                Some(SyntaxKind::CREATE_KW) => {
+                    let prev = p.look_back(true);
+                    if [
+                        // for grant
+                        SyntaxKind::GRANT_KW,
+                        SyntaxKind::COMMA,
+                    ]
+                    .iter()
+                    .all(|x| Some(x) != prev.as_ref())
+                    {
+                        break;
+                    }
+
+                    p.advance();
+                }
+                Some(_) => {
+                    break;
+                }
+                None => {
+                    p.advance();
+                }
+            },
+        }
+    }
+}
diff --git a/crates/pgt_statement_splitter/src/parser/data.rs b/crates/pgt_statement_splitter/src/splitter/data.rs
similarity index 62%
rename from crates/pgt_statement_splitter/src/parser/data.rs
rename to crates/pgt_statement_splitter/src/splitter/data.rs
index c0792c39..0827484b 100644
--- a/crates/pgt_statement_splitter/src/parser/data.rs
+++ b/crates/pgt_statement_splitter/src/splitter/data.rs
@@ -3,15 +3,15 @@ use pgt_lexer::SyntaxKind;
 // All tokens listed here must be explicitly handled in the `unknown` function to ensure that we do
 // not break in the middle of another statement that contains a statement start token.
 //
-// All of these statements must have a dedicated parser function called from the `statement` function
+// All of these statements must have a dedicated splitter function called from the `statement` function
 static STATEMENT_START_TOKENS: &[SyntaxKind] = &[
-    SyntaxKind::With,
-    SyntaxKind::Select,
-    SyntaxKind::Insert,
-    SyntaxKind::Update,
-    SyntaxKind::DeleteP,
-    SyntaxKind::Create,
-    SyntaxKind::Alter,
+    SyntaxKind::WITH_KW,
+    SyntaxKind::SELECT_KW,
+    SyntaxKind::INSERT_KW,
+    SyntaxKind::UPDATE_KW,
+    SyntaxKind::DELETE_KW,
+    SyntaxKind::CREATE_KW,
+    SyntaxKind::ALTER_KW,
 ];
 
 pub(crate) fn at_statement_start(kind: SyntaxKind, exclude: &[SyntaxKind]) -> Option<&SyntaxKind> {
diff --git a/crates/pgt_statement_splitter/src/splitter/ddl.rs b/crates/pgt_statement_splitter/src/splitter/ddl.rs
new file mode 100644
index 00000000..449288aa
--- /dev/null
+++ b/crates/pgt_statement_splitter/src/splitter/ddl.rs
@@ -0,0 +1,15 @@
+use pgt_lexer::SyntaxKind;
+
+use super::{Splitter, common::unknown};
+
+pub(crate) fn create(p: &mut Splitter) {
+    p.expect(SyntaxKind::CREATE_KW);
+
+    unknown(p, &[SyntaxKind::WITH_KW]);
+}
+
+pub(crate) fn alter(p: &mut Splitter) {
+    p.expect(SyntaxKind::ALTER_KW);
+
+    unknown(p, &[SyntaxKind::ALTER_KW]);
+}
diff --git a/crates/pgt_statement_splitter/src/splitter/dml.rs b/crates/pgt_statement_splitter/src/splitter/dml.rs
new file mode 100644
index 00000000..9c833301
--- /dev/null
+++ b/crates/pgt_statement_splitter/src/splitter/dml.rs
@@ -0,0 +1,59 @@
+use pgt_lexer::SyntaxKind;
+
+use super::{
+    Splitter,
+    common::{parenthesis, unknown},
+};
+
+pub(crate) fn cte(p: &mut Splitter) {
+    p.expect(SyntaxKind::WITH_KW);
+
+    loop {
+        p.expect(SyntaxKind::IDENT);
+        p.expect(SyntaxKind::AS_KW);
+        parenthesis(p);
+
+        if p.current() == SyntaxKind::COMMA {
+            p.advance();
+        } else {
+            break;
+        }
+    }
+
+    unknown(
+        p,
+        &[
+            SyntaxKind::SELECT_KW,
+            SyntaxKind::INSERT_KW,
+            SyntaxKind::UPDATE_KW,
+            SyntaxKind::DELETE_KW,
+            SyntaxKind::MERGE_KW,
+        ],
+    );
+}
+
+pub(crate) fn select(p: &mut Splitter) {
+    p.expect(SyntaxKind::SELECT_KW);
+
+    unknown(p, &[]);
+}
+
+pub(crate) fn insert(p: &mut Splitter) {
+    p.expect(SyntaxKind::INSERT_KW);
+    p.expect(SyntaxKind::INTO_KW);
+
+    unknown(p, &[SyntaxKind::SELECT_KW]);
+}
+
+pub(crate) fn update(p: &mut Splitter) {
+    p.expect(SyntaxKind::UPDATE_KW);
+
+    unknown(p, &[]);
+}
+
+pub(crate) fn delete(p: &mut Splitter) {
+    p.expect(SyntaxKind::DELETE_KW);
+    p.expect(SyntaxKind::FROM_KW);
+
+    unknown(p, &[]);
+}
diff --git a/crates/pgt_statement_splitter/tests/statement_splitter_tests.rs b/crates/pgt_statement_splitter/tests/statement_splitter_tests.rs
index e0534725..a4cf3259 100644
--- a/crates/pgt_statement_splitter/tests/statement_splitter_tests.rs
+++ b/crates/pgt_statement_splitter/tests/statement_splitter_tests.rs
@@ -22,7 +22,7 @@ fn test_statement_splitter() {
 
         let contents = fs::read_to_string(&path).unwrap();
 
-        let split = pgt_statement_splitter::split(&contents).expect("Failed to split");
+        let split = pgt_statement_splitter::split(&contents);
 
         assert_eq!(
             split.ranges.len(),
diff --git a/crates/pgt_query_ext_codegen/Cargo.toml b/crates/pgt_tokenizer/Cargo.toml
similarity index 62%
rename from crates/pgt_query_ext_codegen/Cargo.toml
rename to crates/pgt_tokenizer/Cargo.toml
index c3a0f20d..9cd4bf5e 100644
--- a/crates/pgt_query_ext_codegen/Cargo.toml
+++ b/crates/pgt_tokenizer/Cargo.toml
@@ -6,17 +6,14 @@ edition.workspace    = true
 homepage.workspace   = true
 keywords.workspace   = true
 license.workspace    = true
-name                 = "pgt_query_ext_codegen"
+name                 = "pgt_tokenizer"
 repository.workspace = true
 version              = "0.0.0"
 
 
 [dependencies]
-proc-macro2.workspace = true
-quote.workspace       = true
 
-pgt_query_proto_parser.workspace = true
+[dev-dependencies]
+insta.workspace = true
 
 [lib]
-doctest    = false
-proc-macro = true
diff --git a/crates/pgt_tokenizer/README.md b/crates/pgt_tokenizer/README.md
new file mode 100644
index 00000000..8fc21d34
--- /dev/null
+++ b/crates/pgt_tokenizer/README.md
@@ -0,0 +1 @@
+Heavily inspired by and copied from [squawk_lexer](https://github.com/sbdchd/squawk/tree/9acfecbbb7f3c7eedcbaf060e7b25f9afa136db3/crates/squawk_lexer). Thanks for making all the hard work MIT-licensed!
diff --git a/crates/pgt_tokenizer/src/cursor.rs b/crates/pgt_tokenizer/src/cursor.rs
new file mode 100644
index 00000000..64710f29
--- /dev/null
+++ b/crates/pgt_tokenizer/src/cursor.rs
@@ -0,0 +1,73 @@
+use std::str::Chars;
+
+/// Peekable iterator over a char sequence.
+///
+/// Next characters can be peeked via `first` method,
+/// and position can be shifted forward via `bump` method.
+/// based on:
+/// - <https://github.com/rust-lang/rust/blob/d1b7355d3d7b4ead564dbecb1d240fcc74fff21b/compiler/rustc_lexer/src/cursor.rs>
+/// - <https://github.com/astral-sh/ruff/blob/d1079680bb29f6b797b5df15327195300f635f3c/crates/ruff_python_parser/src/lexer/cursor.rs>
+///
+pub(crate) struct Cursor<'a> {
+    /// Iterator over chars. Slightly faster than a &str.
+    chars: Chars<'a>,
+    len_remaining: usize,
+}
+
+pub(crate) const EOF_CHAR: char = '\0';
+
+impl<'a> Cursor<'a> {
+    pub(crate) fn new(input: &'a str) -> Cursor<'a> {
+        Cursor {
+            len_remaining: input.len(),
+            chars: input.chars(),
+        }
+    }
+
+    /// Peeks the next symbol from the input stream without consuming it.
+    /// If requested position doesn't exist, `EOF_CHAR` is returned.
+    /// However, getting `EOF_CHAR` doesn't always mean actual end of file,
+    /// it should be checked with `is_eof` method.
+    pub(crate) fn first(&self) -> char {
+        // `.next()` optimizes better than `.nth(0)`
+        self.chars.clone().next().unwrap_or(EOF_CHAR)
+    }
+
+    /// Peeks the second next symbol from the input stream without consuming it.
+    /// If requested position doesn't exist, `EOF_CHAR` is returned.
+    /// However, getting `EOF_CHAR` doesn't always mean actual end of file,
+    /// it should be checked with `is_eof` method.
+    pub(crate) fn second(&self) -> char {
+        self.chars.clone().nth(1).unwrap_or(EOF_CHAR)
+    }
+
+    /// Checks if there is nothing more to consume.
+    pub(crate) fn is_eof(&self) -> bool {
+        self.chars.as_str().is_empty()
+    }
+
+    /// Returns amount of already consumed symbols.
+    pub(crate) fn pos_within_token(&self) -> u32 {
+        (self.len_remaining - self.chars.as_str().len()) as u32
+    }
+
+    /// Resets the number of bytes consumed to 0.
+    pub(crate) fn reset_pos_within_token(&mut self) {
+        self.len_remaining = self.chars.as_str().len();
+    }
+
+    /// Moves to the next character.
+    pub(crate) fn bump(&mut self) -> Option<char> {
+        let c = self.chars.next()?;
+        Some(c)
+    }
+
+    /// Eats symbols while predicate returns true or until the end of file is reached.
+    pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
+        // It was tried making optimized version of this for eg. line comments, but
+        // LLVM can inline all of this and compile it down to fast iteration over bytes.
+        while predicate(self.first()) && !self.is_eof() {
+            self.bump();
+        }
+    }
+}
diff --git a/crates/pgt_tokenizer/src/lib.rs b/crates/pgt_tokenizer/src/lib.rs
new file mode 100644
index 00000000..787adcaa
--- /dev/null
+++ b/crates/pgt_tokenizer/src/lib.rs
@@ -0,0 +1,830 @@
+mod cursor;
+mod token;
+use cursor::{Cursor, EOF_CHAR};
+pub use token::{Base, LiteralKind, Token, TokenKind};
+
+// via: https://github.com/postgres/postgres/blob/db0c96cc18aec417101e37e59fcc53d4bf647915/src/backend/parser/scan.l#L346
+// ident_start		[A-Za-z\200-\377_]
+const fn is_ident_start(c: char) -> bool {
+    matches!(c, 'a'..='z' | 'A'..='Z' | '_' | '\u{80}'..='\u{FF}')
+}
+
+// ident_cont		[A-Za-z\200-\377_0-9\$]
+const fn is_ident_cont(c: char) -> bool {
+    matches!(c, 'a'..='z' | 'A'..='Z' | '_' | '0'..='9' | '$' | '\u{80}'..='\u{FF}')
+}
+
+// whitespace
+// - https://github.com/postgres/postgres/blob/db0c96cc18aec417101e37e59fcc53d4bf647915/src/backend/parser/scansup.c#L107-L128
+// - https://github.com/postgres/postgres/blob/db0c96cc18aec417101e37e59fcc53d4bf647915/src/backend/parser/scan.l#L204-L229
+
+const fn is_space(c: char) -> bool {
+    matches!(
+        c, ' ' // space
+    )
+}
+
+const fn is_tab(c: char) -> bool {
+    matches!(
+        c, '\t' // tab
+    )
+}
+
+const fn is_line_ending(c: char) -> bool {
+    matches!(
+        c,
+        '\n' | '\r' // newline or carriage return
+    )
+}
+
+const fn is_vertical_tab(c: char) -> bool {
+    matches!(
+        c, '\u{000B}' // vertical tab
+    )
+}
+
+const fn is_form_feed(c: char) -> bool {
+    matches!(
+        c, '\u{000C}' // form feed
+    )
+}
+
+impl Cursor<'_> {
+    // see: https://github.com/rust-lang/rust/blob/ba1d7f4a083e6402679105115ded645512a7aea8/compiler/rustc_lexer/src/lib.rs#L339
+    pub(crate) fn advance_token(&mut self) -> Token {
+        let Some(first_char) = self.bump() else {
+            return Token::new(TokenKind::Eof, 0);
+        };
+        let token_kind = match first_char {
+            // Slash, comment or block comment.
+            '/' => match self.first() {
+                '*' => self.block_comment(),
+                _ => TokenKind::Slash,
+            },
+            '-' => match self.first() {
+                '-' => self.line_comment(),
+                _ => TokenKind::Minus,
+            },
+
+            c if is_space(c) => {
+                self.eat_while(is_space);
+                TokenKind::Space
+            }
+
+            c if is_tab(c) => {
+                self.eat_while(is_tab);
+                TokenKind::Tab
+            }
+
+            c if is_line_ending(c) => self.line_ending_sequence(c),
+
+            c if is_vertical_tab(c) => {
+                self.eat_while(is_vertical_tab);
+                TokenKind::VerticalTab
+            }
+
+            c if is_form_feed(c) => {
+                self.eat_while(is_form_feed);
+                TokenKind::FormFeed
+            }
+
+            // https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-UESCAPE
+            'u' | 'U' => match self.first() {
+                '&' => {
+                    self.bump();
+                    self.prefixed_string(
+                        |terminated| LiteralKind::UnicodeEscStr { terminated },
+                        true,
+                    )
+                }
+                _ => self.ident_or_unknown_prefix(),
+            },
+
+            // escaped strings
+            'e' | 'E' => {
+                self.prefixed_string(|terminated| LiteralKind::EscStr { terminated }, false)
+            }
+
+            // bit string
+            'b' | 'B' => {
+                self.prefixed_string(|terminated| LiteralKind::BitStr { terminated }, false)
+            }
+
+            // hexadecimal byte string
+            'x' | 'X' => {
+                self.prefixed_string(|terminated| LiteralKind::ByteStr { terminated }, false)
+            }
+
+            // Identifier (this should be checked after other variant that can
+            // start as identifier).
+            c if is_ident_start(c) => self.ident(),
+
+            // Numeric literal.
+            // see: https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS-NUMERIC
+            c @ '0'..='9' => {
+                let literal_kind = self.number(c);
+                TokenKind::Literal { kind: literal_kind }
+            }
+            '.' => match self.first() {
+                '0'..='9' => {
+                    let literal_kind = self.number('.');
+                    TokenKind::Literal { kind: literal_kind }
+                }
+                _ => TokenKind::Dot,
+            },
+            // One-symbol tokens.
+            ';' => TokenKind::Semi,
+            '\\' => TokenKind::Backslash,
+            ',' => TokenKind::Comma,
+            '(' => TokenKind::OpenParen,
+            ')' => TokenKind::CloseParen,
+            '[' => TokenKind::OpenBracket,
+            ']' => TokenKind::CloseBracket,
+            '@' => TokenKind::At,
+            '#' => TokenKind::Pound,
+            '~' => TokenKind::Tilde,
+            '?' => TokenKind::Question,
+            ':' => TokenKind::Colon,
+            '$' => {
+                // Dollar quoted strings
+                if is_ident_start(self.first()) || self.first() == '$' {
+                    self.dollar_quoted_string()
+                } else {
+                    // Parameters
+                    while self.first().is_ascii_digit() {
+                        self.bump();
+                    }
+                    TokenKind::PositionalParam
+                }
+            }
+            '`' => TokenKind::Backtick,
+            '=' => TokenKind::Eq,
+            '!' => TokenKind::Bang,
+            '<' => TokenKind::Lt,
+            '>' => TokenKind::Gt,
+            '&' => TokenKind::And,
+            '|' => TokenKind::Or,
+            '+' => TokenKind::Plus,
+            '*' => TokenKind::Star,
+            '^' => TokenKind::Caret,
+            '%' => TokenKind::Percent,
+
+            // String literal
+            '\'' => {
+                let terminated = self.single_quoted_string();
+                let kind = LiteralKind::Str { terminated };
+                TokenKind::Literal { kind }
+            }
+
+            // Quoted indentifiers
+            '"' => {
+                let terminated = self.double_quoted_string();
+                TokenKind::QuotedIdent { terminated }
+            }
+            _ => TokenKind::Unknown,
+        };
+        let res = Token::new(token_kind, self.pos_within_token());
+        self.reset_pos_within_token();
+        res
+    }
+    pub(crate) fn ident(&mut self) -> TokenKind {
+        self.eat_while(is_ident_cont);
+        TokenKind::Ident
+    }
+
+    fn ident_or_unknown_prefix(&mut self) -> TokenKind {
+        // Start is already eaten, eat the rest of identifier.
+        self.eat_while(is_ident_cont);
+        // Known prefixes must have been handled earlier. So if
+        // we see a prefix here, it is definitely an unknown prefix.
+        match self.first() {
+            '#' | '"' | '\'' => TokenKind::UnknownPrefix,
+            _ => TokenKind::Ident,
+        }
+    }
+
+    // see: https://github.com/postgres/postgres/blob/db0c96cc18aec417101e37e59fcc53d4bf647915/src/backend/parser/scan.l#L227
+    // comment			("--"{non_newline}*)
+    pub(crate) fn line_comment(&mut self) -> TokenKind {
+        self.bump();
+
+        self.eat_while(|c| c != '\n');
+        TokenKind::LineComment
+    }
+
+    // see: https://github.com/postgres/postgres/blob/db0c96cc18aec417101e37e59fcc53d4bf647915/src/backend/parser/scan.l#L324-L344
+    pub(crate) fn block_comment(&mut self) -> TokenKind {
+        self.bump();
+
+        let mut depth = 1usize;
+        while let Some(c) = self.bump() {
+            match c {
+                '/' if self.first() == '*' => {
+                    self.bump();
+                    depth += 1;
+                }
+                '*' if self.first() == '/' => {
+                    self.bump();
+                    depth -= 1;
+                    if depth == 0 {
+                        // This block comment is closed, so for a construction like "/* */ */"
+                        // there will be a successfully parsed block comment "/* */"
+                        // and " */" will be processed separately.
+                        break;
+                    }
+                }
+                _ => (),
+            }
+        }
+
+        TokenKind::BlockComment {
+            terminated: depth == 0,
+        }
+    }
+
+    // invariant: we care about the number of consecutive newlines so we count them.
+    //
+    // Postgres considers a DOS-style \r\n sequence as two successive newlines, but we care about
+    // logical line breaks and consider \r\n as one logical line break
+    fn line_ending_sequence(&mut self, prev: char) -> TokenKind {
+        // already consumed first line ending character (\n or \r)
+        let mut line_breaks = 1;
+
+        // started with \r, check if it's part of \r\n
+        if prev == '\r' && self.first() == '\n' {
+            // consume the \n - \r\n still counts as 1 logical line break
+            self.bump();
+        }
+
+        // continue checking for more line endings
+        loop {
+            match self.first() {
+                '\r' if self.second() == '\n' => {
+                    self.bump(); // consume \r
+                    self.bump(); // consume \n
+                    line_breaks += 1;
+                }
+                '\n' => {
+                    self.bump();
+                    line_breaks += 1;
+                }
+                '\r' => {
+                    self.bump();
+                    line_breaks += 1;
+                }
+                _ => break,
+            }
+        }
+
+        TokenKind::LineEnding { count: line_breaks }
+    }
+
+    fn prefixed_string(
+        &mut self,
+        mk_kind: fn(bool) -> LiteralKind,
+        allows_double: bool,
+    ) -> TokenKind {
+        match self.first() {
+            '\'' => {
+                self.bump();
+                let terminated = self.single_quoted_string();
+                let kind = mk_kind(terminated);
+                TokenKind::Literal { kind }
+            }
+            '"' if allows_double => {
+                self.bump();
+                let terminated = self.double_quoted_string();
+                TokenKind::QuotedIdent { terminated }
+            }
+            _ => self.ident_or_unknown_prefix(),
+        }
+    }
+
+    fn number(&mut self, first_digit: char) -> LiteralKind {
+        let mut base = Base::Decimal;
+        if first_digit == '0' {
+            // Attempt to parse encoding base.
+            match self.first() {
+                // https://github.com/postgres/postgres/blob/db0c96cc18aec417101e37e59fcc53d4bf647915/src/backend/parser/scan.l#L403
+                'b' | 'B' => {
+                    base = Base::Binary;
+                    self.bump();
+                    if !self.eat_decimal_digits() {
+                        return LiteralKind::Int {
+                            base,
+                            empty_int: true,
+                        };
+                    }
+                }
+                // https://github.com/postgres/postgres/blob/db0c96cc18aec417101e37e59fcc53d4bf647915/src/backend/parser/scan.l#L402
+                'o' | 'O' => {
+                    base = Base::Octal;
+                    self.bump();
+                    if !self.eat_decimal_digits() {
+                        return LiteralKind::Int {
+                            base,
+                            empty_int: true,
+                        };
+                    }
+                }
+                // https://github.com/postgres/postgres/blob/db0c96cc18aec417101e37e59fcc53d4bf647915/src/backend/parser/scan.l#L401
+                'x' | 'X' => {
+                    base = Base::Hexadecimal;
+                    self.bump();
+                    if !self.eat_hexadecimal_digits() {
+                        return LiteralKind::Int {
+                            base,
+                            empty_int: true,
+                        };
+                    }
+                }
+                // Not a base prefix; consume additional digits.
+                '0'..='9' | '_' => {
+                    self.eat_decimal_digits();
+                }
+
+                // Also not a base prefix; nothing more to do here.
+                '.' | 'e' | 'E' => {}
+
+                // Just a 0.
+                _ => {
+                    return LiteralKind::Int {
+                        base,
+                        empty_int: false,
+                    };
+                }
+            }
+        } else {
+            // No base prefix, parse number in the usual way.
+            self.eat_decimal_digits();
+        };
+
+        match self.first() {
+            '.' => {
+                // might have stuff after the ., and if it does, it needs to start
+                // with a number
+                self.bump();
+                let mut empty_exponent = false;
+                if self.first().is_ascii_digit() {
+                    self.eat_decimal_digits();
+                    match self.first() {
+                        'e' | 'E' => {
+                            self.bump();
+                            empty_exponent = !self.eat_float_exponent();
+                        }
+                        _ => (),
+                    }
+                } else {
+                    match self.first() {
+                        'e' | 'E' => {
+                            self.bump();
+                            empty_exponent = !self.eat_float_exponent();
+                        }
+                        _ => (),
+                    }
+                }
+                LiteralKind::Float {
+                    base,
+                    empty_exponent,
+                }
+            }
+            'e' | 'E' => {
+                self.bump();
+                let empty_exponent = !self.eat_float_exponent();
+                LiteralKind::Float {
+                    base,
+                    empty_exponent,
+                }
+            }
+            _ => LiteralKind::Int {
+                base,
+                empty_int: false,
+            },
+        }
+    }
+
+    fn single_quoted_string(&mut self) -> bool {
+        // Parse until either quotes are terminated or error is detected.
+        loop {
+            match self.first() {
+                // Quotes might be terminated.
+                '\'' => {
+                    self.bump();
+
+                    match self.first() {
+                        // encountered an escaped quote ''
+                        '\'' => {
+                            self.bump();
+                        }
+                        // encountered terminating quote
+                        _ => return true,
+                    }
+                }
+                // End of file, stop parsing.
+                EOF_CHAR if self.is_eof() => break,
+                // Skip the character.
+                _ => {
+                    self.bump();
+                }
+            }
+        }
+        // String was not terminated.
+        false
+    }
+
+    /// Eats double-quoted string and returns true
+    /// if string is terminated.
+    fn double_quoted_string(&mut self) -> bool {
+        while let Some(c) = self.bump() {
+            match c {
+                '"' if self.first() == '"' => {
+                    // Bump again to skip escaped character.
+                    self.bump();
+                }
+                '"' => {
+                    return true;
+                }
+                _ => (),
+            }
+        }
+        // End of file reached.
+        false
+    }
+
+    // https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-DOLLAR-QUOTING
+    fn dollar_quoted_string(&mut self) -> TokenKind {
+        // Get the start sequence of the dollar quote, i.e., 'foo' in
+        // $foo$hello$foo$
+        let mut start = vec![];
+        while let Some(c) = self.bump() {
+            match c {
+                '$' => {
+                    self.bump();
+                    break;
+                }
+                _ => {
+                    start.push(c);
+                }
+            }
+        }
+
+        // we have a dollar quoted string deliminated with `$$`
+        if start.is_empty() {
+            loop {
+                self.eat_while(|c| c != '$');
+                if self.is_eof() {
+                    return TokenKind::Literal {
+                        kind: LiteralKind::DollarQuotedString { terminated: false },
+                    };
+                }
+                // eat $
+                self.bump();
+                if self.first() == '$' {
+                    self.bump();
+                    return TokenKind::Literal {
+                        kind: LiteralKind::DollarQuotedString { terminated: true },
+                    };
+                }
+            }
+        } else {
+            loop {
+                self.eat_while(|c| c != start[0]);
+                if self.is_eof() {
+                    return TokenKind::Literal {
+                        kind: LiteralKind::DollarQuotedString { terminated: false },
+                    };
+                }
+
+                // might be the start of our start/end sequence
+                let mut match_count = 0;
+                for start_char in &start {
+                    if self.first() == *start_char {
+                        self.bump();
+                        match_count += 1;
+                    } else {
+                        self.bump();
+                        break;
+                    }
+                }
+
+                // closing '$'
+                let terminated = match_count == start.len();
+                if self.first() == '$' && terminated {
+                    self.bump();
+                    return TokenKind::Literal {
+                        kind: LiteralKind::DollarQuotedString { terminated },
+                    };
+                }
+            }
+        }
+    }
+
+    fn eat_decimal_digits(&mut self) -> bool {
+        let mut has_digits = false;
+        loop {
+            match self.first() {
+                '_' => {
+                    self.bump();
+                }
+                '0'..='9' => {
+                    has_digits = true;
+                    self.bump();
+                }
+                _ => break,
+            }
+        }
+        has_digits
+    }
+
+    fn eat_hexadecimal_digits(&mut self) -> bool {
+        let mut has_digits = false;
+        loop {
+            match self.first() {
+                '_' => {
+                    self.bump();
+                }
+                '0'..='9' | 'a'..='f' | 'A'..='F' => {
+                    has_digits = true;
+                    self.bump();
+                }
+                _ => break,
+            }
+        }
+        has_digits
+    }
+
+    /// Eats the float exponent. Returns true if at least one digit was met,
+    /// and returns false otherwise.
+    fn eat_float_exponent(&mut self) -> bool {
+        if self.first() == '-' || self.first() == '+' {
+            self.bump();
+        }
+        self.eat_decimal_digits()
+    }
+}
+
+/// Creates an iterator that produces tokens from the input string.
+pub fn tokenize(input: &str) -> impl Iterator<Item = Token> + '_ {
+    let mut cursor = Cursor::new(input);
+    std::iter::from_fn(move || {
+        let token = cursor.advance_token();
+        if token.kind != TokenKind::Eof {
+            Some(token)
+        } else {
+            None
+        }
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use std::fmt;
+
+    use super::*;
+    use insta::assert_debug_snapshot;
+
+    struct TokenDebug<'a> {
+        content: &'a str,
+        token: Token,
+    }
+    impl fmt::Debug for TokenDebug<'_> {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+            write!(f, "{:?} @ {:?}", self.content, self.token.kind)
+        }
+    }
+
+    impl<'a> TokenDebug<'a> {
+        fn new(token: Token, input: &'a str, start: u32) -> TokenDebug<'a> {
+            TokenDebug {
+                token,
+                content: &input[start as usize..(start + token.len) as usize],
+            }
+        }
+    }
+
+    fn lex(input: &str) -> Vec<TokenDebug> {
+        let mut tokens = vec![];
+        let mut start = 0;
+
+        for token in tokenize(input) {
+            let length = token.len;
+            tokens.push(TokenDebug::new(token, input, start));
+            start += length;
+        }
+        tokens
+    }
+    #[test]
+    fn lex_statement() {
+        let result = lex("select 1;");
+        assert_debug_snapshot!(result);
+    }
+
+    #[test]
+    fn block_comment() {
+        let result = lex(r#"
+/*
+ * foo
+ * bar
+*/"#);
+        assert_debug_snapshot!(result);
+    }
+
+    #[test]
+    fn block_comment_unterminated() {
+        let result = lex(r#"
+/*
+ * foo
+ * bar
+ /*
+*/"#);
+        assert_debug_snapshot!(result);
+    }
+
+    #[test]
+    fn line_comment() {
+        let result = lex(r#"
+-- foooooooooooo bar buzz
+"#);
+        assert_debug_snapshot!(result);
+    }
+
+    #[test]
+    fn line_comment_whitespace() {
+        assert_debug_snapshot!(lex(r#"
+select 'Hello' -- This is a comment
+' World';"#))
+    }
+
+    #[test]
+    fn dollar_quoting() {
+        assert_debug_snapshot!(lex(r#"
+$$Dianne's horse$$
+$SomeTag$Dianne's horse$SomeTag$
+
+-- with dollar inside and matching tags
+$foo$hello$world$bar$
+"#))
+    }
+
+    #[test]
+    fn dollar_strings_part2() {
+        assert_debug_snapshot!(lex(r#"
+DO $doblock$
+end
+$doblock$;"#))
+    }
+
+    #[test]
+    fn dollar_quote_mismatch_tags_simple() {
+        assert_debug_snapshot!(lex(r#"
+-- dollar quoting with mismatched tags
+$foo$hello world$bar$
+"#));
+    }
+
+    #[test]
+    fn dollar_quote_mismatch_tags_complex() {
+        assert_debug_snapshot!(lex(r#"
+-- with dollar inside but mismatched tags
+$foo$hello$world$bar$
+"#));
+    }
+
+    #[test]
+    fn numeric() {
+        assert_debug_snapshot!(lex(r#"
+42
+3.5
+4.
+.001
+.123e10
+5e2
+1.925e-3
+1e-10
+1e+10
+1e10
+4664.E+5
+"#))
+    }
+
+    #[test]
+    fn numeric_non_decimal() {
+        assert_debug_snapshot!(lex(r#"
+0b100101
+0B10011001
+0o273
+0O755
+0x42f
+0XFFFF
+"#))
+    }
+
+    #[test]
+    fn numeric_with_seperators() {
+        assert_debug_snapshot!(lex(r#"
+1_500_000_000
+0b10001000_00000000
+0o_1_755
+0xFFFF_FFFF
+1.618_034
+"#))
+    }
+
+    #[test]
+    fn select_with_period() {
+        assert_debug_snapshot!(lex(r#"
+select public.users;
+"#))
+    }
+
+    #[test]
+    fn bitstring() {
+        assert_debug_snapshot!(lex(r#"
+B'1001'
+b'1001'
+X'1FF'
+x'1FF'
+"#))
+    }
+
+    #[test]
+    fn string() {
+        assert_debug_snapshot!(lex(r#"
+'Dianne''s horse'
+
+select 'foo ''
+bar';
+
+select 'foooo'
+   'bar';
+
+
+'foo \\ \n \tbar'
+
+'forgot to close the string
+"#))
+    }
+
+    #[test]
+    fn params() {
+        assert_debug_snapshot!(lex(r#"
+select $1 + $2;
+
+select $1123123123123;
+
+select $;
+"#))
+    }
+
+    #[test]
+    fn string_with_escapes() {
+        // https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-ESCAPE
+
+        assert_debug_snapshot!(lex(r#"
+E'foo'
+
+e'bar'
+
+e'\b\f\n\r\t'
+
+e'\0\11\777'
+
+e'\x0\x11\xFF'
+
+e'\uAAAA \UFFFFFFFF'
+
+"#))
+    }
+
+    #[test]
+    fn string_unicode_escape() {
+        // https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-UESCAPE
+
+        assert_debug_snapshot!(lex(r#"
+U&"d\0061t\+000061"
+
+U&"\0441\043B\043E\043D"
+
+u&'\0441\043B'
+
+U&"d!0061t!+000061" UESCAPE '!'
+"#))
+    }
+
+    #[test]
+    fn quoted_ident() {
+        assert_debug_snapshot!(lex(r#"
+"hello &1 -world";
+
+
+"hello-world
+"#))
+    }
+
+    #[test]
+    fn quoted_ident_with_escape_quote() {
+        assert_debug_snapshot!(lex(r#"
+"foo "" bar"
+"#))
+    }
+}
diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__bitstring.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__bitstring.snap
new file mode 100644
index 00000000..ff3eec09
--- /dev/null
+++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__bitstring.snap
@@ -0,0 +1,16 @@
+---
+source: crates/pgt_tokenizer/src/lib.rs
+expression: "lex(r#\"\nB'1001'\nb'1001'\nX'1FF'\nx'1FF'\n\"#)"
+snapshot_kind: text
+---
+[
+    "\n" @ LineEnding { count: 1 },
+    "B'1001'" @ Literal { kind: BitStr { terminated: true } },
+    "\n" @ LineEnding { count: 1 },
+    "b'1001'" @ Literal { kind: BitStr { terminated: true } },
+    "\n" @ LineEnding { count: 1 },
+    "X'1FF'" @ Literal { kind: ByteStr { terminated: true } },
+    "\n" @ LineEnding { count: 1 },
+    "x'1FF'" @ Literal { kind: ByteStr { terminated: true } },
+    "\n" @ LineEnding { count: 1 },
+]
diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__block_comment.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__block_comment.snap
new file mode 100644
index 00000000..22961ecf
--- /dev/null
+++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__block_comment.snap
@@ -0,0 +1,9 @@
+---
+source: crates/pgt_tokenizer/src/lib.rs
+expression: result
+snapshot_kind: text
+---
+[
+    "\n" @ LineEnding { count: 1 },
+    "/*\n * foo\n * bar\n*/" @ BlockComment { terminated: true },
+]
diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__block_comment_unterminated.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__block_comment_unterminated.snap
new file mode 100644
index 00000000..4dd6957e
--- /dev/null
+++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__block_comment_unterminated.snap
@@ -0,0 +1,9 @@
+---
+source: crates/pgt_tokenizer/src/lib.rs
+expression: result
+snapshot_kind: text
+---
+[
+    "\n" @ LineEnding { count: 1 },
+    "/*\n * foo\n * bar\n /*\n*/" @ BlockComment { terminated: false },
+]
diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__dollar_quote_mismatch_tags_complex.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__dollar_quote_mismatch_tags_complex.snap
new file mode 100644
index 00000000..7f6a6649
--- /dev/null
+++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__dollar_quote_mismatch_tags_complex.snap
@@ -0,0 +1,11 @@
+---
+source: crates/pgt_tokenizer/src/lib.rs
+expression: "lex(r#\"\n-- with dollar inside but mismatched tags\n$foo$hello$world$bar$\n\"#)"
+snapshot_kind: text
+---
+[
+    "\n" @ LineEnding { count: 1 },
+    "-- with dollar inside but mismatched tags" @ LineComment,
+    "\n" @ LineEnding { count: 1 },
+    "$foo$hello$world$bar$\n" @ Literal { kind: DollarQuotedString { terminated: false } },
+]
diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__dollar_quote_mismatch_tags_simple.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__dollar_quote_mismatch_tags_simple.snap
new file mode 100644
index 00000000..9d6d43a0
--- /dev/null
+++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__dollar_quote_mismatch_tags_simple.snap
@@ -0,0 +1,11 @@
+---
+source: crates/pgt_tokenizer/src/lib.rs
+expression: "lex(r#\"\n-- dollar quoting with mismatched tags\n$foo$hello world$bar$\n\"#)"
+snapshot_kind: text
+---
+[
+    "\n" @ LineEnding { count: 1 },
+    "-- dollar quoting with mismatched tags" @ LineComment,
+    "\n" @ LineEnding { count: 1 },
+    "$foo$hello world$bar$\n" @ Literal { kind: DollarQuotedString { terminated: false } },
+]
diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__dollar_quoting.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__dollar_quoting.snap
new file mode 100644
index 00000000..ad1aa07d
--- /dev/null
+++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__dollar_quoting.snap
@@ -0,0 +1,15 @@
+---
+source: crates/pgt_tokenizer/src/lib.rs
+expression: "lex(r#\"\n$$Dianne's horse$$\n$SomeTag$Dianne's horse$SomeTag$\n\n-- with dollar inside and matching tags\n$foo$hello$world$bar$\n\"#)"
+snapshot_kind: text
+---
+[
+    "\n" @ LineEnding { count: 1 },
+    "$$Dianne's horse$$" @ Literal { kind: DollarQuotedString { terminated: true } },
+    "\n" @ LineEnding { count: 1 },
+    "$SomeTag$Dianne's horse$SomeTag$" @ Literal { kind: DollarQuotedString { terminated: true } },
+    "\n\n" @ LineEnding { count: 2 },
+    "-- with dollar inside and matching tags" @ LineComment,
+    "\n" @ LineEnding { count: 1 },
+    "$foo$hello$world$bar$\n" @ Literal { kind: DollarQuotedString { terminated: false } },
+]
diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__dollar_strings_part2.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__dollar_strings_part2.snap
new file mode 100644
index 00000000..9aa49446
--- /dev/null
+++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__dollar_strings_part2.snap
@@ -0,0 +1,12 @@
+---
+source: crates/pgt_tokenizer/src/lib.rs
+expression: "lex(r#\"\nDO $doblock$\nend\n$doblock$;\"#)"
+snapshot_kind: text
+---
+[
+    "\n" @ LineEnding { count: 1 },
+    "DO" @ Ident,
+    " " @ Space,
+    "$doblock$\nend\n$doblock$" @ Literal { kind: DollarQuotedString { terminated: true } },
+    ";" @ Semi,
+]
diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__lex_statement.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__lex_statement.snap
new file mode 100644
index 00000000..5679f2a7
--- /dev/null
+++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__lex_statement.snap
@@ -0,0 +1,11 @@
+---
+source: crates/pgt_tokenizer/src/lib.rs
+expression: result
+snapshot_kind: text
+---
+[
+    "select" @ Ident,
+    " " @ Space,
+    "1" @ Literal { kind: Int { base: Decimal, empty_int: false } },
+    ";" @ Semi,
+]
diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__line_comment.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__line_comment.snap
new file mode 100644
index 00000000..1cd8782a
--- /dev/null
+++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__line_comment.snap
@@ -0,0 +1,10 @@
+---
+source: crates/pgt_tokenizer/src/lib.rs
+expression: result
+snapshot_kind: text
+---
+[
+    "\n" @ LineEnding { count: 1 },
+    "-- foooooooooooo bar buzz" @ LineComment,
+    "\n" @ LineEnding { count: 1 },
+]
diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__line_comment_whitespace.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__line_comment_whitespace.snap
new file mode 100644
index 00000000..3cf5fb50
--- /dev/null
+++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__line_comment_whitespace.snap
@@ -0,0 +1,16 @@
+---
+source: crates/pgt_tokenizer/src/lib.rs
+expression: "lex(r#\"\nselect 'Hello' -- This is a comment\n' World';\"#)"
+snapshot_kind: text
+---
+[
+    "\n" @ LineEnding { count: 1 },
+    "select" @ Ident,
+    " " @ Space,
+    "'Hello'" @ Literal { kind: Str { terminated: true } },
+    " " @ Space,
+    "-- This is a comment" @ LineComment,
+    "\n" @ LineEnding { count: 1 },
+    "' World'" @ Literal { kind: Str { terminated: true } },
+    ";" @ Semi,
+]
diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__numeric.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__numeric.snap
new file mode 100644
index 00000000..95fdb27a
--- /dev/null
+++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__numeric.snap
@@ -0,0 +1,30 @@
+---
+source: crates/pgt_tokenizer/src/lib.rs
+expression: "lex(r#\"\n42\n3.5\n4.\n.001\n.123e10\n5e2\n1.925e-3\n1e-10\n1e+10\n1e10\n4664.E+5\n\"#)"
+snapshot_kind: text
+---
+[
+    "\n" @ LineEnding { count: 1 },
+    "42" @ Literal { kind: Int { base: Decimal, empty_int: false } },
+    "\n" @ LineEnding { count: 1 },
+    "3.5" @ Literal { kind: Float { base: Decimal, empty_exponent: false } },
+    "\n" @ LineEnding { count: 1 },
+    "4." @ Literal { kind: Float { base: Decimal, empty_exponent: false } },
+    "\n" @ LineEnding { count: 1 },
+    ".001" @ Literal { kind: Int { base: Decimal, empty_int: false } },
+    "\n" @ LineEnding { count: 1 },
+    ".123e10" @ Literal { kind: Float { base: Decimal, empty_exponent: false } },
+    "\n" @ LineEnding { count: 1 },
+    "5e2" @ Literal { kind: Float { base: Decimal, empty_exponent: false } },
+    "\n" @ LineEnding { count: 1 },
+    "1.925e-3" @ Literal { kind: Float { base: Decimal, empty_exponent: false } },
+    "\n" @ LineEnding { count: 1 },
+    "1e-10" @ Literal { kind: Float { base: Decimal, empty_exponent: false } },
+    "\n" @ LineEnding { count: 1 },
+    "1e+10" @ Literal { kind: Float { base: Decimal, empty_exponent: false } },
+    "\n" @ LineEnding { count: 1 },
+    "1e10" @ Literal { kind: Float { base: Decimal, empty_exponent: false } },
+    "\n" @ LineEnding { count: 1 },
+    "4664.E+5" @ Literal { kind: Float { base: Decimal, empty_exponent: false } },
+    "\n" @ LineEnding { count: 1 },
+]
diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__numeric_non_decimal.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__numeric_non_decimal.snap
new file mode 100644
index 00000000..e4430348
--- /dev/null
+++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__numeric_non_decimal.snap
@@ -0,0 +1,20 @@
+---
+source: crates/pgt_tokenizer/src/lib.rs
+expression: "lex(r#\"\n0b100101\n0B10011001\n0o273\n0O755\n0x42f\n0XFFFF\n\"#)"
+snapshot_kind: text
+---
+[
+    "\n" @ LineEnding { count: 1 },
+    "0b100101" @ Literal { kind: Int { base: Binary, empty_int: false } },
+    "\n" @ LineEnding { count: 1 },
+    "0B10011001" @ Literal { kind: Int { base: Binary, empty_int: false } },
+    "\n" @ LineEnding { count: 1 },
+    "0o273" @ Literal { kind: Int { base: Octal, empty_int: false } },
+    "\n" @ LineEnding { count: 1 },
+    "0O755" @ Literal { kind: Int { base: Octal, empty_int: false } },
+    "\n" @ LineEnding { count: 1 },
+    "0x42f" @ Literal { kind: Int { base: Hexadecimal, empty_int: false } },
+    "\n" @ LineEnding { count: 1 },
+    "0XFFFF" @ Literal { kind: Int { base: Hexadecimal, empty_int: false } },
+    "\n" @ LineEnding { count: 1 },
+]
diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__numeric_with_seperators.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__numeric_with_seperators.snap
new file mode 100644
index 00000000..cd0ecb21
--- /dev/null
+++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__numeric_with_seperators.snap
@@ -0,0 +1,18 @@
+---
+source: crates/pgt_tokenizer/src/lib.rs
+expression: "lex(r#\"\n1_500_000_000\n0b10001000_00000000\n0o_1_755\n0xFFFF_FFFF\n1.618_034\n\"#)"
+snapshot_kind: text
+---
+[
+    "\n" @ LineEnding { count: 1 },
+    "1_500_000_000" @ Literal { kind: Int { base: Decimal, empty_int: false } },
+    "\n" @ LineEnding { count: 1 },
+    "0b10001000_00000000" @ Literal { kind: Int { base: Binary, empty_int: false } },
+    "\n" @ LineEnding { count: 1 },
+    "0o_1_755" @ Literal { kind: Int { base: Octal, empty_int: false } },
+    "\n" @ LineEnding { count: 1 },
+    "0xFFFF_FFFF" @ Literal { kind: Int { base: Hexadecimal, empty_int: false } },
+    "\n" @ LineEnding { count: 1 },
+    "1.618_034" @ Literal { kind: Float { base: Decimal, empty_exponent: false } },
+    "\n" @ LineEnding { count: 1 },
+]
diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__params.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__params.snap
new file mode 100644
index 00000000..6a436417
--- /dev/null
+++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__params.snap
@@ -0,0 +1,27 @@
+---
+source: crates/pgt_tokenizer/src/lib.rs
+expression: "lex(r#\"\nselect $1 + $2;\n\nselect $1123123123123;\n\nselect $;\n\"#)"
+snapshot_kind: text
+---
+[
+    "\n" @ LineEnding { count: 1 },
+    "select" @ Ident,
+    " " @ Space,
+    "$1" @ PositionalParam,
+    " " @ Space,
+    "+" @ Plus,
+    " " @ Space,
+    "$2" @ PositionalParam,
+    ";" @ Semi,
+    "\n\n" @ LineEnding { count: 2 },
+    "select" @ Ident,
+    " " @ Space,
+    "$1123123123123" @ PositionalParam,
+    ";" @ Semi,
+    "\n\n" @ LineEnding { count: 2 },
+    "select" @ Ident,
+    " " @ Space,
+    "$" @ PositionalParam,
+    ";" @ Semi,
+    "\n" @ LineEnding { count: 1 },
+]
diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__quoted_ident.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__quoted_ident.snap
new file mode 100644
index 00000000..e1dffb06
--- /dev/null
+++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__quoted_ident.snap
@@ -0,0 +1,12 @@
+---
+source: crates/pgt_tokenizer/src/lib.rs
+expression: "lex(r#\"\n\"hello &1 -world\";\n\n\n\"hello-world\n\"#)"
+snapshot_kind: text
+---
+[
+    "\n" @ LineEnding { count: 1 },
+    "\"hello &1 -world\"" @ QuotedIdent { terminated: true },
+    ";" @ Semi,
+    "\n\n\n" @ LineEnding { count: 3 },
+    "\"hello-world\n" @ QuotedIdent { terminated: false },
+]
diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__quoted_ident_with_escape_quote.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__quoted_ident_with_escape_quote.snap
new file mode 100644
index 00000000..44ff06e5
--- /dev/null
+++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__quoted_ident_with_escape_quote.snap
@@ -0,0 +1,10 @@
+---
+source: crates/pgt_tokenizer/src/lib.rs
+expression: "lex(r#\"\n\"foo \"\" bar\"\n\"#)"
+snapshot_kind: text
+---
+[
+    "\n" @ LineEnding { count: 1 },
+    "\"foo \"\" bar\"" @ QuotedIdent { terminated: true },
+    "\n" @ LineEnding { count: 1 },
+]
diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__select_with_period.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__select_with_period.snap
new file mode 100644
index 00000000..bc03da6a
--- /dev/null
+++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__select_with_period.snap
@@ -0,0 +1,15 @@
+---
+source: crates/pgt_tokenizer/src/lib.rs
+expression: "lex(r#\"\nselect public.users;\n\"#)"
+snapshot_kind: text
+---
+[
+    "\n" @ LineEnding { count: 1 },
+    "select" @ Ident,
+    " " @ Space,
+    "public" @ Ident,
+    "." @ Dot,
+    "users" @ Ident,
+    ";" @ Semi,
+    "\n" @ LineEnding { count: 1 },
+]
diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__string.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__string.snap
new file mode 100644
index 00000000..c7e5b8ba
--- /dev/null
+++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__string.snap
@@ -0,0 +1,26 @@
+---
+source: crates/pgt_tokenizer/src/lib.rs
+expression: "lex(r#\"\n'Dianne''s horse'\n\nselect 'foo ''\nbar';\n\nselect 'foooo'\n   'bar';\n\n\n'foo \\\\ \\n \\tbar'\n\n'forgot to close the string\n\"#)"
+snapshot_kind: text
+---
+[
+    "\n" @ LineEnding { count: 1 },
+    "'Dianne''s horse'" @ Literal { kind: Str { terminated: true } },
+    "\n\n" @ LineEnding { count: 2 },
+    "select" @ Ident,
+    " " @ Space,
+    "'foo ''\nbar'" @ Literal { kind: Str { terminated: true } },
+    ";" @ Semi,
+    "\n\n" @ LineEnding { count: 2 },
+    "select" @ Ident,
+    " " @ Space,
+    "'foooo'" @ Literal { kind: Str { terminated: true } },
+    "\n" @ LineEnding { count: 1 },
+    "   " @ Space,
+    "'bar'" @ Literal { kind: Str { terminated: true } },
+    ";" @ Semi,
+    "\n\n\n" @ LineEnding { count: 3 },
+    "'foo \\\\ \\n \\tbar'" @ Literal { kind: Str { terminated: true } },
+    "\n\n" @ LineEnding { count: 2 },
+    "'forgot to close the string\n" @ Literal { kind: Str { terminated: false } },
+]
diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__string_unicode_escape.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__string_unicode_escape.snap
new file mode 100644
index 00000000..225a208a
--- /dev/null
+++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__string_unicode_escape.snap
@@ -0,0 +1,20 @@
+---
+source: crates/pgt_tokenizer/src/lib.rs
+expression: "lex(r#\"\nU&\"d\\0061t\\+000061\"\n\nU&\"\\0441\\043B\\043E\\043D\"\n\nu&'\\0441\\043B'\n\nU&\"d!0061t!+000061\" UESCAPE '!'\n\"#)"
+snapshot_kind: text
+---
+[
+    "\n" @ LineEnding { count: 1 },
+    "U&\"d\\0061t\\+000061\"" @ QuotedIdent { terminated: true },
+    "\n\n" @ LineEnding { count: 2 },
+    "U&\"\\0441\\043B\\043E\\043D\"" @ QuotedIdent { terminated: true },
+    "\n\n" @ LineEnding { count: 2 },
+    "u&'\\0441\\043B'" @ Literal { kind: UnicodeEscStr { terminated: true } },
+    "\n\n" @ LineEnding { count: 2 },
+    "U&\"d!0061t!+000061\"" @ QuotedIdent { terminated: true },
+    " " @ Space,
+    "UESCAPE" @ Ident,
+    " " @ Space,
+    "'!'" @ Literal { kind: Str { terminated: true } },
+    "\n" @ LineEnding { count: 1 },
+]
diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__string_with_escapes.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__string_with_escapes.snap
new file mode 100644
index 00000000..bbc94048
--- /dev/null
+++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__string_with_escapes.snap
@@ -0,0 +1,20 @@
+---
+source: crates/pgt_tokenizer/src/lib.rs
+expression: "lex(r#\"\nE'foo'\n\ne'bar'\n\ne'\\b\\f\\n\\r\\t'\n\ne'\\0\\11\\777'\n\ne'\\x0\\x11\\xFF'\n\ne'\\uAAAA \\UFFFFFFFF'\n\n\"#)"
+snapshot_kind: text
+---
+[
+    "\n" @ LineEnding { count: 1 },
+    "E'foo'" @ Literal { kind: EscStr { terminated: true } },
+    "\n\n" @ LineEnding { count: 2 },
+    "e'bar'" @ Literal { kind: EscStr { terminated: true } },
+    "\n\n" @ LineEnding { count: 2 },
+    "e'\\b\\f\\n\\r\\t'" @ Literal { kind: EscStr { terminated: true } },
+    "\n\n" @ LineEnding { count: 2 },
+    "e'\\0\\11\\777'" @ Literal { kind: EscStr { terminated: true } },
+    "\n\n" @ LineEnding { count: 2 },
+    "e'\\x0\\x11\\xFF'" @ Literal { kind: EscStr { terminated: true } },
+    "\n\n" @ LineEnding { count: 2 },
+    "e'\\uAAAA \\UFFFFFFFF'" @ Literal { kind: EscStr { terminated: true } },
+    "\n\n" @ LineEnding { count: 2 },
+]
diff --git a/crates/pgt_tokenizer/src/token.rs b/crates/pgt_tokenizer/src/token.rs
new file mode 100644
index 00000000..50a7d12a
--- /dev/null
+++ b/crates/pgt_tokenizer/src/token.rs
@@ -0,0 +1,170 @@
+// based on: https://github.com/rust-lang/rust/blob/d1b7355d3d7b4ead564dbecb1d240fcc74fff21b/compiler/rustc_lexer/src/lib.rs#L58
+#[derive(Debug, PartialEq, Clone, Copy)]
+pub enum TokenKind {
+    /// Used when there's an error of some sort while lexing.
+    Unknown,
+    /// Examples: `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid
+    /// suffix, but may be present here on string and float literals. Users of
+    /// this type will need to check for and reject that case.
+    ///
+    /// See [`LiteralKind`] for more details.
+    Literal {
+        kind: LiteralKind,
+    },
+    /// Whitespace characters.
+    Space,
+    Tab,
+    VerticalTab,
+    FormFeed,
+    // Handles \n, \r, and sequences
+    LineEnding {
+        count: usize,
+    },
+    /// Identifier
+    ///
+    /// case-sensitive
+    Ident,
+    /// `;`
+    Semi,
+    /// End of file
+    Eof,
+    /// `/`
+    Slash,
+    /// `\`
+    Backslash,
+    /// `-- foo`
+    LineComment,
+    /// ```
+    /// /*
+    /// foo
+    /// */
+    /// ```
+    BlockComment {
+        terminated: bool,
+    },
+    /// `-`
+    Minus,
+    /// `:`
+    Colon,
+    /// `.`
+    Dot,
+    /// `=`
+    Eq,
+    /// `>`
+    Gt,
+    /// `&`
+    And,
+    /// `<`
+    Lt,
+    /// `!`
+    Bang,
+    /// `+`
+    Plus,
+    /// `~`
+    Tilde,
+    /// `#`
+    Pound,
+    /// `?`
+    Question,
+    /// `|`
+    Or,
+    /// `%`
+    Percent,
+    /// `^`
+    Caret,
+    /// `*`
+    Star,
+    /// `` ` ``
+    Backtick,
+    /// `@`
+    At,
+    /// `]`
+    CloseBracket,
+    /// `[`
+    OpenBracket,
+    /// `)`
+    CloseParen,
+    /// `(`
+    OpenParen,
+    /// `,`
+    Comma,
+    /// Error case that we need to report later on.
+    UnknownPrefix,
+    /// Positional Parameter, e.g., `$1`
+    ///
+    /// see: <https://www.postgresql.org/docs/16/sql-expressions.html#SQL-EXPRESSIONS-PARAMETERS-POSITIONAL>
+    PositionalParam,
+    /// Quoted Identifier, e.g., `"update"` in `update "my_table" set "a" = 5;`
+    ///
+    /// These are case-sensitive, unlike [`TokenKind::Ident`]
+    ///
+    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS>
+    QuotedIdent {
+        terminated: bool,
+    },
+}
+
+/// Parsed token.
+/// It doesn't contain information about data that has been parsed,
+/// only the type of the token and its size.
+#[derive(Debug, Clone, Copy)]
+pub struct Token {
+    pub kind: TokenKind,
+    pub len: u32,
+}
+
+impl Token {
+    pub(crate) fn new(kind: TokenKind, len: u32) -> Token {
+        Token { kind, len }
+    }
+}
+
+/// Base of numeric literal encoding according to its prefix.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub enum Base {
+    /// Literal starts with "0b".
+    Binary = 2,
+    /// Literal starts with "0o".
+    Octal = 8,
+    /// Literal doesn't contain a prefix.
+    Decimal = 10,
+    /// Literal starts with "0x".
+    Hexadecimal = 16,
+}
+
+// Enum representing the literal types supported by the lexer.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub enum LiteralKind {
+    /// Integer Numeric, e.g., `42`
+    ///
+    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS-NUMERIC>
+    Int { base: Base, empty_int: bool },
+    /// Float Numeric, e.g., `1.925e-3`
+    ///
+    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS-NUMERIC>
+    Float { base: Base, empty_exponent: bool },
+    /// String, e.g., `'foo'`
+    ///
+    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS>
+    Str { terminated: bool },
+    /// Hexidecimal Bit String, e.g., `X'1FF'`
+    ///
+    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-BIT-STRINGS>
+    ByteStr { terminated: bool },
+    /// Bit String, e.g., `B'1001'`
+    ///
+    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-BIT-STRINGS>
+    BitStr { terminated: bool },
+    /// Dollar Quoted String, e.g., `$$Dianne's horse$$`
+    ///
+    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-DOLLAR-QUOTING>
+    DollarQuotedString { terminated: bool },
+    /// Unicode Escape String, e.g., `U&'d\0061t\+000061'`
+    ///
+    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-UESCAPE>
+    UnicodeEscStr { terminated: bool },
+    /// Escape String, e.g, `E'foo'`
+    ///
+    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html>
+    EscStr { terminated: bool },
+}
diff --git a/crates/pgt_workspace/src/workspace/server/annotation.rs b/crates/pgt_workspace/src/workspace/server/annotation.rs
index 2fdf32eb..db6a8b3b 100644
--- a/crates/pgt_workspace/src/workspace/server/annotation.rs
+++ b/crates/pgt_workspace/src/workspace/server/annotation.rs
@@ -1,7 +1,7 @@
 use std::sync::Arc;
 
 use dashmap::DashMap;
-use pgt_lexer::{SyntaxKind, WHITESPACE_TOKENS};
+use pgt_lexer::SyntaxKind;
 
 use super::statement_identifier::StatementId;
 
@@ -11,9 +11,18 @@ pub struct StatementAnnotations {
 }
 
 pub struct AnnotationStore {
-    db: DashMap<StatementId, Option<Arc<StatementAnnotations>>>,
+    db: DashMap<StatementId, Arc<StatementAnnotations>>,
 }
 
+const WHITESPACE_TOKENS: [SyntaxKind; 6] = [
+    SyntaxKind::SPACE,
+    SyntaxKind::TAB,
+    SyntaxKind::VERTICAL_TAB,
+    SyntaxKind::FORM_FEED,
+    SyntaxKind::LINE_ENDING,
+    SyntaxKind::EOF,
+];
+
 impl AnnotationStore {
     pub fn new() -> AnnotationStore {
         AnnotationStore { db: DashMap::new() }
@@ -24,26 +33,26 @@ impl AnnotationStore {
         &self,
         statement_id: &StatementId,
         content: &str,
-    ) -> Option<Arc<StatementAnnotations>> {
+    ) -> Arc<StatementAnnotations> {
         if let Some(existing) = self.db.get(statement_id).map(|x| x.clone()) {
             return existing;
         }
 
-        // we swallow the error here because the lexing within the document would have already
-        // thrown and we wont even get here if that happened.
-        let annotations = pgt_lexer::lex(content).ok().map(|tokens| {
-            let ends_with_semicolon = tokens
-                .iter()
-                .rev()
-                .find(|token| !WHITESPACE_TOKENS.contains(&token.kind))
-                .is_some_and(|token| token.kind == SyntaxKind::Ascii59);
-
-            Arc::new(StatementAnnotations {
-                ends_with_semicolon,
-            })
+        let lexed = pgt_lexer::lex(content);
+
+        let ends_with_semicolon = (0..lexed.len())
+            // Iterate through tokens in reverse to find the last non-whitespace token
+            .filter(|t| !WHITESPACE_TOKENS.contains(&lexed.kind(*t)))
+            .next_back()
+            .map(|t| lexed.kind(t) == SyntaxKind::SEMICOLON)
+            .unwrap_or(false);
+
+        let annotations = Arc::new(StatementAnnotations {
+            ends_with_semicolon,
         });
 
-        self.db.insert(statement_id.clone(), None);
+        self.db.insert(statement_id.clone(), annotations.clone());
+
         annotations
     }
 
@@ -80,8 +89,7 @@ mod tests {
 
             let annotations = store.get_annotations(&statement_id, content);
 
-            assert!(annotations.is_some());
-            assert_eq!(annotations.unwrap().ends_with_semicolon, *expected);
+            assert_eq!(annotations.ends_with_semicolon, *expected);
         }
     }
 }
diff --git a/crates/pgt_workspace/src/workspace/server/change.rs b/crates/pgt_workspace/src/workspace/server/change.rs
index 62e3da03..cc455134 100644
--- a/crates/pgt_workspace/src/workspace/server/change.rs
+++ b/crates/pgt_workspace/src/workspace/server/change.rs
@@ -445,9 +445,7 @@ fn get_affected(content: &str, range: TextRange) -> &str {
 
 #[cfg(test)]
 mod tests {
-
     use super::*;
-    use pgt_diagnostics::Diagnostic;
     use pgt_text_size::TextRange;
 
     use crate::workspace::{ChangeFileParams, ChangeParams};
@@ -462,9 +460,7 @@ mod tests {
     }
 
     fn assert_document_integrity(d: &Document) {
-        let ranges = pgt_statement_splitter::split(&d.content)
-            .expect("Unexpected scan error")
-            .ranges;
+        let ranges = pgt_statement_splitter::split(&d.content).ranges;
 
         assert!(
             ranges.len() == d.positions.len(),
@@ -479,16 +475,6 @@ mod tests {
         );
     }
 
-    #[test]
-    fn open_doc_with_scan_error() {
-        let input = "select id from users;\n\n\n\nselect 1443ddwwd33djwdkjw13331333333333;";
-
-        let d = Document::new(input.to_string(), 0);
-
-        assert_eq!(d.positions.len(), 0);
-        assert!(d.has_fatal_error());
-    }
-
     #[test]
     fn comments_at_begin() {
         let path = PgTPath::new("test.sql");
@@ -621,149 +607,6 @@ mod tests {
         assert_document_integrity(&d);
     }
 
-    #[test]
-    fn change_into_scan_error_within_statement() {
-        let path = PgTPath::new("test.sql");
-        let input = "select id from users;\n\n\n\nselect 1;";
-
-        let mut d = Document::new(input.to_string(), 0);
-
-        assert_eq!(d.positions.len(), 2);
-        assert!(!d.has_fatal_error());
-
-        let change = ChangeFileParams {
-            path: path.clone(),
-            version: 1,
-            changes: vec![ChangeParams {
-                text: "d".to_string(),
-                range: Some(TextRange::new(33.into(), 33.into())),
-            }],
-        };
-
-        let changed = d.apply_file_change(&change);
-
-        assert_eq!(d.content, "select id from users;\n\n\n\nselect 1d;");
-        assert!(
-            changed
-                .iter()
-                .all(|c| matches!(c, StatementChange::Deleted(_))),
-            "should delete all statements"
-        );
-        assert!(d.positions.is_empty(), "should clear all positions");
-        assert_eq!(d.diagnostics.len(), 1, "should return a scan error");
-        assert_eq!(
-            d.diagnostics[0].location().span,
-            Some(TextRange::new(32.into(), 34.into())),
-            "should have correct span"
-        );
-        assert!(d.has_fatal_error());
-    }
-
-    #[test]
-    fn change_into_scan_error_across_statements() {
-        let path = PgTPath::new("test.sql");
-        let input = "select id from users;\n\n\n\nselect 1;";
-
-        let mut d = Document::new(input.to_string(), 0);
-
-        assert_eq!(d.positions.len(), 2);
-        assert!(!d.has_fatal_error());
-
-        let change = ChangeFileParams {
-            path: path.clone(),
-            version: 1,
-            changes: vec![ChangeParams {
-                text: "1d".to_string(),
-                range: Some(TextRange::new(7.into(), 33.into())),
-            }],
-        };
-
-        let changed = d.apply_file_change(&change);
-
-        assert_eq!(d.content, "select 1d;");
-        assert!(
-            changed
-                .iter()
-                .all(|c| matches!(c, StatementChange::Deleted(_))),
-            "should delete all statements"
-        );
-        assert!(d.positions.is_empty(), "should clear all positions");
-        assert_eq!(d.diagnostics.len(), 1, "should return a scan error");
-        assert_eq!(
-            d.diagnostics[0].location().span,
-            Some(TextRange::new(7.into(), 9.into())),
-            "should have correct span"
-        );
-        assert!(d.has_fatal_error());
-    }
-
-    #[test]
-    fn change_from_invalid_to_invalid() {
-        let path = PgTPath::new("test.sql");
-        let input = "select 1d;";
-
-        let mut d = Document::new(input.to_string(), 0);
-
-        assert_eq!(d.positions.len(), 0);
-        assert!(d.has_fatal_error());
-        assert_eq!(d.diagnostics.len(), 1);
-
-        let change = ChangeFileParams {
-            path: path.clone(),
-            version: 1,
-            changes: vec![ChangeParams {
-                text: "2e".to_string(),
-                range: Some(TextRange::new(7.into(), 9.into())),
-            }],
-        };
-
-        let changed = d.apply_file_change(&change);
-
-        assert_eq!(d.content, "select 2e;");
-        assert!(changed.is_empty(), "should not emit any changes");
-        assert!(d.positions.is_empty(), "should keep positions empty");
-        assert_eq!(d.diagnostics.len(), 1, "should still have a scan error");
-        assert_eq!(
-            d.diagnostics[0].location().span,
-            Some(TextRange::new(7.into(), 9.into())),
-            "should have updated span"
-        );
-        assert!(d.has_fatal_error());
-    }
-
-    #[test]
-    fn change_from_invalid_to_valid() {
-        let path = PgTPath::new("test.sql");
-        let input = "select 1d;";
-
-        let mut d = Document::new(input.to_string(), 0);
-
-        assert_eq!(d.positions.len(), 0);
-        assert!(d.has_fatal_error());
-        assert_eq!(d.diagnostics.len(), 1);
-
-        let change = ChangeFileParams {
-            path: path.clone(),
-            version: 1,
-            changes: vec![ChangeParams {
-                text: "1".to_string(),
-                range: Some(TextRange::new(7.into(), 9.into())),
-            }],
-        };
-
-        let changed = d.apply_file_change(&change);
-
-        assert_eq!(d.content, "select 1;");
-        assert_eq!(changed.len(), 1, "should emit one change");
-        assert!(matches!(
-            changed[0],
-            StatementChange::Added(AddedStatement { .. })
-        ));
-        assert_eq!(d.positions.len(), 1, "should have one position");
-        assert!(d.diagnostics.is_empty(), "should have no diagnostics");
-        assert!(!d.has_fatal_error());
-    }
-
     #[test]
     fn within_statements() {
         let path = PgTPath::new("test.sql");
diff --git a/crates/pgt_workspace/src/workspace/server/document.rs b/crates/pgt_workspace/src/workspace/server/document.rs
index ed0ca40f..89516b23 100644
--- a/crates/pgt_workspace/src/workspace/server/document.rs
+++ b/crates/pgt_workspace/src/workspace/server/document.rs
@@ -62,32 +62,21 @@ pub(crate) fn split_with_diagnostics(
     offset: Option<TextSize>,
 ) -> (Vec<TextRange>, Vec<SDiagnostic>) {
     let o = offset.unwrap_or_else(|| 0.into());
-    match pgt_statement_splitter::split(content) {
-        Ok(parse) => (
-            parse.ranges,
-            parse
-                .errors
-                .into_iter()
-                .map(|err| {
-                    SDiagnostic::new(
-                        err.clone()
-                            .with_file_span(err.location().span.map(|r| r + o)),
-                    )
-                })
-                .collect(),
-        ),
-        Err(errs) => (
-            vec![],
-            errs.into_iter()
-                .map(|err| {
-                    SDiagnostic::new(
-                        err.clone()
-                            .with_file_span(err.location().span.map(|r| r + o)),
-                    )
-                })
-                .collect(),
-        ),
-    }
+    let result = pgt_statement_splitter::split(content);
+
+    (
+        result.ranges,
+        result
+            .errors
+            .into_iter()
+            .map(|err| {
+                SDiagnostic::new(
+                    err.clone()
+                        .with_file_span(err.location().span.map(|r| r + o)),
+                )
+            })
+            .collect(),
+    )
 }
 
 pub struct StatementIterator<'a> {
diff --git a/docs/codegen/src/rules_docs.rs b/docs/codegen/src/rules_docs.rs
index 92f0dc42..68db53db 100644
--- a/docs/codegen/src/rules_docs.rs
+++ b/docs/codegen/src/rules_docs.rs
@@ -442,7 +442,7 @@ fn print_diagnostics(
     });
 
     // split and parse each statement
-    let stmts = pgt_statement_splitter::split(code).expect("unexpected parse error");
+    let stmts = pgt_statement_splitter::split(code);
     for stmt in stmts.ranges {
         match pgt_query_ext::parse(&code[stmt]) {
             Ok(ast) => {
diff --git a/xtask/rules_check/src/lib.rs b/xtask/rules_check/src/lib.rs
index 68a6d650..da4b4c73 100644
--- a/xtask/rules_check/src/lib.rs
+++ b/xtask/rules_check/src/lib.rs
@@ -126,52 +126,47 @@ fn assert_lint(
         filter,
     });
 
-    // split and parse each statement
-    match pgt_statement_splitter::split(code) {
-        Ok(stmts) => {
-            for stmt in stmts.ranges {
-                match pgt_query_ext::parse(&code[stmt]) {
-                    Ok(ast) => {
-                        for rule_diag in analyser.run(pgt_analyser::AnalyserContext { root: &ast })
-                        {
-                            let diag = pgt_diagnostics::serde::Diagnostic::new(rule_diag);
-
-                            let category = diag.category().expect("linter diagnostic has no code");
-                            let severity = settings.get_severity_from_rule_code(category).expect(
+    let result = pgt_statement_splitter::split(code);
+    for stmt in result.ranges {
+        match pgt_query_ext::parse(&code[stmt]) {
+            Ok(ast) => {
+                for rule_diag in analyser.run(pgt_analyser::AnalyserContext { root: &ast }) {
+                    let diag = pgt_diagnostics::serde::Diagnostic::new(rule_diag);
+
+                    let category = diag.category().expect("linter diagnostic has no code");
+                    let severity = settings.get_severity_from_rule_code(category).expect(
                                 "If you see this error, it means you need to run cargo codegen-configuration",
                             );
 
-                            let error = diag
-                                .with_severity(severity)
-                                .with_file_path(&file_path)
-                                .with_file_source_code(code);
-
-                            write_diagnostic(code, error)?;
-                        }
-                    }
-                    Err(e) => {
-                        let error = SyntaxDiagnostic::from(e)
-                            .with_file_path(&file_path)
-                            .with_file_source_code(code);
-                        write_diagnostic(code, error)?;
-                    }
-                };
+                    let error = diag
+                        .with_severity(severity)
+                        .with_file_path(&file_path)
+                        .with_file_source_code(code);
+
+                    write_diagnostic(code, error)?;
+                }
             }
-        }
-        Err(errs) => {
-            // Print all diagnostics to help the user
-            let mut console = pgt_console::EnvConsole::default();
-            for err in errs {
-                console.println(
-                    pgt_console::LogLevel::Error,
-                    markup! {
-                        {PrintDiagnostic::verbose(&err)}
-                    },
-                );
+            Err(e) => {
+                let error = SyntaxDiagnostic::from(e)
+                    .with_file_path(&file_path)
+                    .with_file_source_code(code);
+                write_diagnostic(code, error)?;
             }
-            bail!("Analysis of '{group}/{rule}' on the following code block returned a scan diagnostic.\n\n{code}");
+        };
+    }
+    if !result.errors.is_empty() {
+        // Print all diagnostics to help the user
+        let mut console = pgt_console::EnvConsole::default();
+        for err in result.errors {
+            console.println(
+                pgt_console::LogLevel::Error,
+                markup! {
+                    {PrintDiagnostic::verbose(&err)}
+                },
+            );
         }
-    };
+        bail!("Analysis of '{group}/{rule}' on the following code block returned a scan diagnostic.\n\n{code}");
+    }
 
     Ok(())
 }