From 46901014bd2c9a79f006df8bc90a2c9ddf83cddf Mon Sep 17 00:00:00 2001 From: bohan Date: Tue, 15 Apr 2025 20:33:42 +0800 Subject: [PATCH 1/2] refactor(ecma/lexer): split lexer of parser --- Cargo.lock | 32 + crates/swc_ecma_lexer/Cargo.toml | 66 ++ .../benches/lexer.rs | 38 +- .../src/error.rs | 2 +- crates/swc_ecma_lexer/src/input.rs | 568 +++++++++++++++ .../src/lexer/comments_buffer.rs | 0 .../src/lexer/input.rs | 0 .../src/lexer/jsx.rs | 0 .../src/lexer/mod.rs | 4 +- .../src/lexer/number.rs | 0 .../src/lexer/state.rs | 7 +- .../src/lexer/table.rs | 0 .../src/lexer/tests.rs | 12 +- .../src/lexer/util.rs | 0 .../src/lexer/whitespace.rs | 0 crates/swc_ecma_lexer/src/lib.rs | 653 ++++++++++++++++++ .../src/token.rs | 31 +- crates/swc_ecma_lexer/src/utils.rs | 111 +++ crates/swc_ecma_parser/Cargo.toml | 6 +- crates/swc_ecma_parser/src/lib.rs | 311 +-------- crates/swc_ecma_parser/src/macros.rs | 308 +-------- crates/swc_ecma_parser/src/parser/expr.rs | 6 +- crates/swc_ecma_parser/src/parser/ident.rs | 12 +- crates/swc_ecma_parser/src/parser/input.rs | 566 +-------------- crates/swc_ecma_parser/src/parser/macros.rs | 12 +- crates/swc_ecma_parser/src/parser/mod.rs | 13 +- crates/swc_ecma_parser/src/parser/object.rs | 4 +- crates/swc_ecma_parser/src/parser/pat.rs | 8 +- crates/swc_ecma_parser/src/parser/util.rs | 108 --- 29 files changed, 1523 insertions(+), 1355 deletions(-) create mode 100644 crates/swc_ecma_lexer/Cargo.toml rename crates/{swc_ecma_parser => swc_ecma_lexer}/benches/lexer.rs (62%) rename crates/{swc_ecma_parser => swc_ecma_lexer}/src/error.rs (99%) create mode 100644 crates/swc_ecma_lexer/src/input.rs rename crates/{swc_ecma_parser => swc_ecma_lexer}/src/lexer/comments_buffer.rs (100%) rename crates/{swc_ecma_parser => swc_ecma_lexer}/src/lexer/input.rs (100%) rename crates/{swc_ecma_parser => swc_ecma_lexer}/src/lexer/jsx.rs (100%) rename crates/{swc_ecma_parser => swc_ecma_lexer}/src/lexer/mod.rs (99%) rename crates/{swc_ecma_parser => swc_ecma_lexer}/src/lexer/number.rs (100%) rename crates/{swc_ecma_parser => swc_ecma_lexer}/src/lexer/state.rs (99%) rename crates/{swc_ecma_parser => swc_ecma_lexer}/src/lexer/table.rs (100%) rename crates/{swc_ecma_parser => swc_ecma_lexer}/src/lexer/tests.rs (99%) rename crates/{swc_ecma_parser => swc_ecma_lexer}/src/lexer/util.rs (100%) rename crates/{swc_ecma_parser => swc_ecma_lexer}/src/lexer/whitespace.rs (100%) create mode 100644 crates/swc_ecma_lexer/src/lib.rs rename crates/{swc_ecma_parser => swc_ecma_lexer}/src/token.rs (96%) create mode 100644 crates/swc_ecma_lexer/src/utils.rs diff --git a/Cargo.lock b/Cargo.lock index fa253356e6ea..5fd827cb8095 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5386,6 +5386,37 @@ dependencies = [ "wide", ] +[[package]] +name = "swc_ecma_lexer" +version = "11.1.2" +dependencies = [ + "arrayvec", + "bitflags 2.6.0", + "codspeed-criterion-compat", + "criterion", + "either", + "new_debug_unreachable", + "num-bigint", + "num-traits", + "phf", + "pretty_assertions", + "rustc-hash 2.1.0", + "serde", + "serde_json", + "smallvec", + "smartstring", + "stacker", + "swc_atoms", + "swc_common", + "swc_ecma_ast", + "swc_ecma_visit", + "swc_malloc", + "testing", + "tracing", + "typed-arena", + "walkdir", +] + [[package]] name = "swc_ecma_lints" version = "12.1.0" @@ -5508,6 +5539,7 @@ dependencies = [ "swc_atoms", "swc_common", "swc_ecma_ast", + "swc_ecma_lexer", "swc_ecma_visit", "swc_malloc", "testing", diff --git a/crates/swc_ecma_lexer/Cargo.toml b/crates/swc_ecma_lexer/Cargo.toml new file mode 100644 index 000000000000..6dfdc29560fe --- /dev/null +++ b/crates/swc_ecma_lexer/Cargo.toml @@ -0,0 +1,66 @@ +[package] +authors = ["강동윤 "] +description = "Feature-complete es2019 parser." +documentation = "https://rustdoc.swc.rs/swc_ecma_lexer/" +edition = { workspace = true } +include = ["Cargo.toml", "src/**/*.rs", "examples/**/*.rs"] +license = { workspace = true } +name = "swc_ecma_lexer" +repository = { workspace = true } +version = "11.1.2" + + [package.metadata.docs.rs] + all-features = true + rustdoc-args = ["--cfg", "docsrs"] + +[lib] +bench = false + +[features] +# Used for debugging +debug = ["tracing-spans"] +default = ["typescript", "stacker"] +tracing-spans = [] +typescript = [] +verify = ["swc_ecma_visit"] + +[dependencies] +arrayvec = { workspace = true } +bitflags = { workspace = true } +either = { workspace = true } +num-bigint = { workspace = true } +num-traits = { workspace = true } +rustc-hash = { workspace = true } +serde = { workspace = true, features = ["derive"] } +smallvec = { workspace = true } +smartstring = { workspace = true } +tracing = { workspace = true } +typed-arena = { workspace = true } + +new_debug_unreachable = { workspace = true } +phf = { workspace = true, features = ["macros"] } +swc_atoms = { version = "5.0.0", path = "../swc_atoms" } +swc_common = { version = "8.1.0", path = "../swc_common" } +swc_ecma_ast = { version = "8.1.2", path = "../swc_ecma_ast" } +swc_ecma_visit = { version = "8.0.0", path = "../swc_ecma_visit", optional = true } + +[target.'cfg(not(any(target_arch = "wasm32", target_arch = "arm")))'.dependencies] +stacker = { version = "0.1.15", optional = true } + +[dev-dependencies] +criterion = { workspace = true } +pretty_assertions = { workspace = true } +serde_json = { workspace = true } +walkdir = { workspace = true } + +codspeed-criterion-compat = { workspace = true } +swc_ecma_ast = { version = "8.1.2", path = "../swc_ecma_ast", features = [ + "serde-impl", +] } +swc_ecma_visit = { version = "8.0.0", path = "../swc_ecma_visit" } +swc_malloc = { version = "1.2.2", path = "../swc_malloc" } +testing = { version = "9.0.0", path = "../testing" } + +[[bench]] +harness = false +name = "lexer" diff --git a/crates/swc_ecma_parser/benches/lexer.rs b/crates/swc_ecma_lexer/benches/lexer.rs similarity index 62% rename from crates/swc_ecma_parser/benches/lexer.rs rename to crates/swc_ecma_lexer/benches/lexer.rs index 5f3a9e8f8b48..d5e24f35a0f1 100644 --- a/crates/swc_ecma_parser/benches/lexer.rs +++ b/crates/swc_ecma_lexer/benches/lexer.rs @@ -1,8 +1,8 @@ extern crate swc_malloc; use codspeed_criterion_compat::{black_box, criterion_group, criterion_main, Bencher, Criterion}; -use swc_common::FileName; -use swc_ecma_parser::{lexer::Lexer, StringInput, Syntax, TsSyntax}; +use swc_common::{input::StringInput, FileName}; +use swc_ecma_lexer::{lexer::Lexer, Syntax, TsSyntax}; fn bench_module(b: &mut Bencher, syntax: Syntax, src: &'static str) { let _ = ::testing::run_test(false, |cm, _| { @@ -21,14 +21,18 @@ fn bench_module(b: &mut Bencher, syntax: Syntax, src: &'static str) { fn bench_files(c: &mut Criterion) { c.bench_function("es/lexer/colors", |b| { // Copied from ratel-rust - bench_module(b, Default::default(), include_str!("../colors.js")) + bench_module( + b, + Default::default(), + include_str!("../../swc_ecma_parser/colors.js"), + ) }); c.bench_function("es/lexer/angular", |b| { bench_module( b, Default::default(), - include_str!("./files/angular-1.2.5.js"), + include_str!("../../swc_ecma_parser/benches/files/angular-1.2.5.js"), ) }); @@ -36,7 +40,7 @@ fn bench_files(c: &mut Criterion) { bench_module( b, Default::default(), - include_str!("./files/backbone-1.1.0.js"), + include_str!("../../swc_ecma_parser/benches/files/backbone-1.1.0.js"), ) }); @@ -44,7 +48,7 @@ fn bench_files(c: &mut Criterion) { bench_module( b, Default::default(), - include_str!("./files/jquery-1.9.1.js"), + include_str!("../../swc_ecma_parser/benches/files/jquery-1.9.1.js"), ) }); @@ -52,14 +56,14 @@ fn bench_files(c: &mut Criterion) { bench_module( b, Default::default(), - include_str!("./files/jquery.mobile-1.4.2.js"), + include_str!("../../swc_ecma_parser/benches/files/jquery.mobile-1.4.2.js"), ) }); c.bench_function("es/lexer/mootools", |b| { bench_module( b, Default::default(), - include_str!("./files/mootools-1.4.5.js"), + include_str!("../../swc_ecma_parser/benches/files/mootools-1.4.5.js"), ) }); @@ -67,7 +71,7 @@ fn bench_files(c: &mut Criterion) { bench_module( b, Default::default(), - include_str!("./files/underscore-1.5.2.js"), + include_str!("../../swc_ecma_parser/benches/files/underscore-1.5.2.js"), ) }); @@ -75,12 +79,16 @@ fn bench_files(c: &mut Criterion) { bench_module( b, Default::default(), - include_str!("./files/three-0.138.3.js"), + include_str!("../../swc_ecma_parser/benches/files/three-0.138.3.js"), ) }); c.bench_function("es/lexer/yui", |b| { - bench_module(b, Default::default(), include_str!("./files/yui-3.12.0.js")) + bench_module( + b, + Default::default(), + include_str!("../../swc_ecma_parser/benches/files/yui-3.12.0.js"), + ) }); c.bench_function("es/lexer/cal-com", |b| { @@ -90,12 +98,16 @@ fn bench_files(c: &mut Criterion) { tsx: true, ..Default::default() }), - include_str!("./files/cal.com.tsx"), + include_str!("../../swc_ecma_parser/benches/files/cal.com.tsx"), ) }); c.bench_function("es/lexer/typescript", |b| { - bench_module(b, Default::default(), include_str!("./files/typescript.js")) + bench_module( + b, + Default::default(), + include_str!("../../swc_ecma_parser/benches/files/typescript.js"), + ) }); } diff --git a/crates/swc_ecma_parser/src/error.rs b/crates/swc_ecma_lexer/src/error.rs similarity index 99% rename from crates/swc_ecma_parser/src/error.rs rename to crates/swc_ecma_lexer/src/error.rs index dc064eb3c50d..db293dedda6a 100644 --- a/crates/swc_ecma_parser/src/error.rs +++ b/crates/swc_ecma_lexer/src/error.rs @@ -24,7 +24,7 @@ impl Spanned for Error { impl Error { #[cold] - pub(crate) fn new(span: Span, error: SyntaxError) -> Self { + pub fn new(span: Span, error: SyntaxError) -> Self { Self { error: Box::new((span, error)), } diff --git a/crates/swc_ecma_lexer/src/input.rs b/crates/swc_ecma_lexer/src/input.rs new file mode 100644 index 000000000000..9ffe2a6c9e52 --- /dev/null +++ b/crates/swc_ecma_lexer/src/input.rs @@ -0,0 +1,568 @@ +use std::{cell::RefCell, mem, mem::take, rc::Rc}; + +use debug_unreachable::debug_unreachable; +use lexer::TokenContexts; +use swc_common::{BytePos, Span}; +use swc_ecma_ast::EsVersion; + +use crate::{ + error::Error, + lexer::{self}, + tok, + token::*, + Context, Syntax, +}; + +/// Clone should be cheap if you are parsing typescript because typescript +/// syntax requires backtracking. +pub trait Tokens: Clone + Iterator { + fn set_ctx(&mut self, ctx: Context); + fn ctx(&self) -> Context; + fn syntax(&self) -> Syntax; + fn target(&self) -> EsVersion; + + fn start_pos(&self) -> BytePos { + BytePos(0) + } + + fn set_expr_allowed(&mut self, allow: bool); + fn set_next_regexp(&mut self, start: Option); + + fn token_context(&self) -> &lexer::TokenContexts; + fn token_context_mut(&mut self) -> &mut lexer::TokenContexts; + fn set_token_context(&mut self, _c: lexer::TokenContexts); + + /// Implementors should use Rc>>. + /// + /// It is required because parser should backtrack while parsing typescript + /// code. + fn add_error(&self, error: Error); + + /// Add an error which is valid syntax in script mode. + /// + /// This errors should be dropped if it's not a module. + /// + /// Implementor should check for if [Context].module, and buffer errors if + /// module is false. Also, implementors should move errors to the error + /// buffer on set_ctx if the parser mode become module mode. + fn add_module_mode_error(&self, error: Error); + + fn end_pos(&self) -> BytePos; + + fn take_errors(&mut self) -> Vec; + + /// If the program was parsed as a script, this contains the module + /// errors should the program be identified as a module in the future. + fn take_script_module_errors(&mut self) -> Vec; +} + +#[derive(Clone)] +pub struct TokensInput { + iter: as IntoIterator>::IntoIter, + ctx: Context, + syntax: Syntax, + start_pos: BytePos, + target: EsVersion, + token_ctx: TokenContexts, + errors: Rc>>, + module_errors: Rc>>, +} + +impl TokensInput { + pub fn new(tokens: Vec, ctx: Context, syntax: Syntax, target: EsVersion) -> Self { + let start_pos = tokens.first().map(|t| t.span.lo).unwrap_or(BytePos(0)); + + TokensInput { + iter: tokens.into_iter(), + ctx, + syntax, + start_pos, + target, + token_ctx: Default::default(), + errors: Default::default(), + module_errors: Default::default(), + } + } +} + +impl Iterator for TokensInput { + type Item = TokenAndSpan; + + fn next(&mut self) -> Option { + self.iter.next() + } +} + +impl Tokens for TokensInput { + fn set_ctx(&mut self, ctx: Context) { + if ctx.contains(Context::Module) && !self.module_errors.borrow().is_empty() { + let mut module_errors = self.module_errors.borrow_mut(); + self.errors.borrow_mut().append(&mut *module_errors); + } + self.ctx = ctx; + } + + fn ctx(&self) -> Context { + self.ctx + } + + fn syntax(&self) -> Syntax { + self.syntax + } + + fn target(&self) -> EsVersion { + self.target + } + + fn start_pos(&self) -> BytePos { + self.start_pos + } + + fn set_expr_allowed(&mut self, _: bool) {} + + fn set_next_regexp(&mut self, _: Option) {} + + fn token_context(&self) -> &TokenContexts { + &self.token_ctx + } + + fn token_context_mut(&mut self) -> &mut TokenContexts { + &mut self.token_ctx + } + + fn set_token_context(&mut self, c: TokenContexts) { + self.token_ctx = c; + } + + fn add_error(&self, error: Error) { + self.errors.borrow_mut().push(error); + } + + fn add_module_mode_error(&self, error: Error) { + if self.ctx.contains(Context::Module) { + self.add_error(error); + return; + } + self.module_errors.borrow_mut().push(error); + } + + fn take_errors(&mut self) -> Vec { + take(&mut self.errors.borrow_mut()) + } + + fn take_script_module_errors(&mut self) -> Vec { + take(&mut self.module_errors.borrow_mut()) + } + + fn end_pos(&self) -> BytePos { + self.iter + .as_slice() + .last() + .map(|t| t.span.hi) + .unwrap_or(self.start_pos) + } +} + +/// Note: Lexer need access to parser's context to lex correctly. +#[derive(Debug)] +pub struct Capturing { + inner: I, + captured: Rc>>, +} + +impl Clone for Capturing { + fn clone(&self) -> Self { + Capturing { + inner: self.inner.clone(), + captured: self.captured.clone(), + } + } +} + +impl Capturing { + pub fn new(input: I) -> Self { + Capturing { + inner: input, + captured: Default::default(), + } + } + + pub fn tokens(&self) -> Rc>> { + self.captured.clone() + } + + /// Take captured tokens + pub fn take(&mut self) -> Vec { + mem::take(&mut *self.captured.borrow_mut()) + } +} + +impl Iterator for Capturing { + type Item = TokenAndSpan; + + fn next(&mut self) -> Option { + let next = self.inner.next(); + + match next { + Some(ts) => { + let mut v = self.captured.borrow_mut(); + + // remove tokens that could change due to backtracing + while let Some(last) = v.last() { + if last.span.lo >= ts.span.lo { + v.pop(); + } else { + break; + } + } + + v.push(ts.clone()); + + Some(ts) + } + None => None, + } + } +} + +impl Tokens for Capturing { + fn set_ctx(&mut self, ctx: Context) { + self.inner.set_ctx(ctx) + } + + fn ctx(&self) -> Context { + self.inner.ctx() + } + + fn syntax(&self) -> Syntax { + self.inner.syntax() + } + + fn target(&self) -> EsVersion { + self.inner.target() + } + + fn start_pos(&self) -> BytePos { + self.inner.start_pos() + } + + fn set_expr_allowed(&mut self, allow: bool) { + self.inner.set_expr_allowed(allow) + } + + fn set_next_regexp(&mut self, start: Option) { + self.inner.set_next_regexp(start); + } + + fn token_context(&self) -> &TokenContexts { + self.inner.token_context() + } + + fn token_context_mut(&mut self) -> &mut TokenContexts { + self.inner.token_context_mut() + } + + fn set_token_context(&mut self, c: TokenContexts) { + self.inner.set_token_context(c) + } + + fn add_error(&self, error: Error) { + self.inner.add_error(error); + } + + fn add_module_mode_error(&self, error: Error) { + self.inner.add_module_mode_error(error) + } + + fn take_errors(&mut self) -> Vec { + self.inner.take_errors() + } + + fn take_script_module_errors(&mut self) -> Vec { + self.inner.take_script_module_errors() + } + + fn end_pos(&self) -> BytePos { + self.inner.end_pos() + } +} + +/// This struct is responsible for managing current token and peeked token. +#[derive(Clone)] +pub struct Buffer { + pub iter: I, + /// Span of the previous token. + pub prev_span: Span, + pub cur: Option, + /// Peeked token + pub next: Option, +} + +impl Buffer { + pub fn new(lexer: I) -> Self { + let start_pos = lexer.start_pos(); + Buffer { + iter: lexer, + cur: None, + prev_span: Span::new(start_pos, start_pos), + next: None, + } + } + + pub fn store(&mut self, token: Token) { + debug_assert!(self.next.is_none()); + debug_assert!(self.cur.is_none()); + let span = self.prev_span; + + self.cur = Some(TokenAndSpan { + span, + token, + had_line_break: false, + }); + } + + #[allow(dead_code)] + pub fn cur_debug(&self) -> Option<&Token> { + self.cur.as_ref().map(|it| &it.token) + } + + #[cold] + #[inline(never)] + pub fn dump_cur(&mut self) -> String { + match self.cur() { + Some(v) => format!("{:?}", v), + None => "".to_string(), + } + } + + /// Returns current token. + pub fn bump(&mut self) -> Token { + let prev = match self.cur.take() { + Some(t) => t, + None => unsafe { + debug_unreachable!( + "Current token is `None`. Parser should not call bump() without knowing \ + current token" + ) + }, + }; + self.prev_span = prev.span; + + prev.token + } + + pub fn knows_cur(&self) -> bool { + self.cur.is_some() + } + + pub fn peek(&mut self) -> Option<&Token> { + debug_assert!( + self.cur.is_some(), + "parser should not call peek() without knowing current token" + ); + + if self.next.is_none() { + self.next = self.iter.next(); + } + + self.next.as_ref().map(|ts| &ts.token) + } + + /// Returns true on eof. + pub fn had_line_break_before_cur(&mut self) -> bool { + self.cur(); + + self.cur + .as_ref() + .map(|it| it.had_line_break) + .unwrap_or_else(|| true) + } + + /// This returns true on eof. + pub fn has_linebreak_between_cur_and_peeked(&mut self) -> bool { + let _ = self.peek(); + self.next + .as_ref() + .map(|item| item.had_line_break) + .unwrap_or({ + // return true on eof. + true + }) + } + + /// Get current token. Returns `None` only on eof. + #[inline] + pub fn cur(&mut self) -> Option<&Token> { + if self.cur.is_none() { + // If we have peeked a token, take it instead of calling lexer.next() + self.cur = self.next.take().or_else(|| self.iter.next()); + } + + match &self.cur { + Some(v) => Some(&v.token), + None => None, + } + } + + #[inline] + pub fn cut_lshift(&mut self) { + debug_assert!( + self.is(&tok!("<<")), + "parser should only call cut_lshift when encountering LShift token" + ); + self.cur = Some(TokenAndSpan { + token: tok!('<'), + span: self.cur_span().with_lo(self.cur_span().lo + BytePos(1)), + had_line_break: false, + }); + } + + pub fn merge_lt_gt(&mut self) { + debug_assert!( + self.is(&tok!('<')) || self.is(&tok!('>')), + "parser should only call merge_lt_gt when encountering '<' or '>' token" + ); + + let span = self.cur_span(); + + if self.peek().is_none() { + return; + } + + let next = self.next.as_ref().unwrap(); + + if span.hi != next.span.lo { + return; + } + + let cur = self.cur.take().unwrap(); + let next = self.next.take().unwrap(); + + let token = match (&cur.token, &next.token) { + (tok!('>'), tok!('>')) => tok!(">>"), + (tok!('>'), tok!('=')) => tok!(">="), + (tok!('>'), tok!(">>")) => tok!(">>>"), + (tok!('>'), tok!(">=")) => tok!(">>="), + (tok!('>'), tok!(">>=")) => tok!(">>>="), + (tok!('<'), tok!('<')) => tok!("<<"), + (tok!('<'), tok!('=')) => tok!("<="), + (tok!('<'), tok!("<=")) => tok!("<<="), + + _ => { + self.cur = Some(cur); + self.next = Some(next); + return; + } + }; + let span = span.with_hi(next.span.hi); + + self.cur = Some(TokenAndSpan { + token, + span, + had_line_break: cur.had_line_break, + }); + } + + #[inline] + pub fn is(&mut self, expected: &Token) -> bool { + match self.cur() { + Some(t) => *expected == *t, + _ => false, + } + } + + #[inline] + pub fn eat(&mut self, expected: &Token) -> bool { + let v = self.is(expected); + if v { + self.bump(); + } + v + } + + /// Returns start of current token. + #[inline] + pub fn cur_pos(&mut self) -> BytePos { + let _ = self.cur(); + self.cur + .as_ref() + .map(|item| item.span.lo) + .unwrap_or_else(|| { + // eof + self.last_pos() + }) + } + + #[inline] + pub fn cur_span(&self) -> Span { + let data = self + .cur + .as_ref() + .map(|item| item.span) + .unwrap_or(self.prev_span); + + Span::new(data.lo, data.hi) + } + + /// Returns last byte position of previous token. + #[inline] + pub fn last_pos(&self) -> BytePos { + self.prev_span.hi + } + + /// Returns span of the previous token. + #[inline] + pub fn prev_span(&self) -> Span { + self.prev_span + } + + #[inline] + pub fn get_ctx(&self) -> Context { + self.iter.ctx() + } + + #[inline] + pub fn set_ctx(&mut self, ctx: Context) { + self.iter.set_ctx(ctx); + } + + #[inline] + pub fn syntax(&self) -> Syntax { + self.iter.syntax() + } + + #[inline] + pub fn target(&self) -> EsVersion { + self.iter.target() + } + + #[inline] + pub fn set_expr_allowed(&mut self, allow: bool) { + self.iter.set_expr_allowed(allow) + } + + #[inline] + pub fn set_next_regexp(&mut self, start: Option) { + self.iter.set_next_regexp(start); + } + + #[inline] + pub fn token_context(&self) -> &lexer::TokenContexts { + self.iter.token_context() + } + + #[inline] + pub fn token_context_mut(&mut self) -> &mut lexer::TokenContexts { + self.iter.token_context_mut() + } + + #[inline] + pub fn set_token_context(&mut self, c: lexer::TokenContexts) { + self.iter.set_token_context(c) + } + + #[inline] + pub fn end_pos(&self) -> BytePos { + self.iter.end_pos() + } +} diff --git a/crates/swc_ecma_parser/src/lexer/comments_buffer.rs b/crates/swc_ecma_lexer/src/lexer/comments_buffer.rs similarity index 100% rename from crates/swc_ecma_parser/src/lexer/comments_buffer.rs rename to crates/swc_ecma_lexer/src/lexer/comments_buffer.rs diff --git a/crates/swc_ecma_parser/src/lexer/input.rs b/crates/swc_ecma_lexer/src/lexer/input.rs similarity index 100% rename from crates/swc_ecma_parser/src/lexer/input.rs rename to crates/swc_ecma_lexer/src/lexer/input.rs diff --git a/crates/swc_ecma_parser/src/lexer/jsx.rs b/crates/swc_ecma_lexer/src/lexer/jsx.rs similarity index 100% rename from crates/swc_ecma_parser/src/lexer/jsx.rs rename to crates/swc_ecma_lexer/src/lexer/jsx.rs diff --git a/crates/swc_ecma_parser/src/lexer/mod.rs b/crates/swc_ecma_lexer/src/lexer/mod.rs similarity index 99% rename from crates/swc_ecma_parser/src/lexer/mod.rs rename to crates/swc_ecma_lexer/src/lexer/mod.rs index b4c170ed08d3..ee638ab9fd27 100644 --- a/crates/swc_ecma_parser/src/lexer/mod.rs +++ b/crates/swc_ecma_lexer/src/lexer/mod.rs @@ -21,6 +21,7 @@ use self::{ }; use crate::{ error::{Error, SyntaxError}, + tok, token::{BinOpToken, IdentLike, Token, Word}, Context, Syntax, }; @@ -138,7 +139,7 @@ pub struct Lexer<'a> { /// [Some] if comment comment parsing is enabled. Otherwise [None] comments_buffer: Option, - pub(crate) ctx: Context, + pub ctx: Context, input: StringInput<'a>, start_pos: BytePos, @@ -791,6 +792,7 @@ impl Lexer<'_> { // 'await' and 'yield' may have semantic of reserved word, which means lexer // should know context or parser should handle this error. Our approach to this // problem is former one. + if has_escape && self.ctx.is_reserved(&word) { self.error( start, diff --git a/crates/swc_ecma_parser/src/lexer/number.rs b/crates/swc_ecma_lexer/src/lexer/number.rs similarity index 100% rename from crates/swc_ecma_parser/src/lexer/number.rs rename to crates/swc_ecma_lexer/src/lexer/number.rs diff --git a/crates/swc_ecma_parser/src/lexer/state.rs b/crates/swc_ecma_lexer/src/lexer/state.rs similarity index 99% rename from crates/swc_ecma_parser/src/lexer/state.rs rename to crates/swc_ecma_lexer/src/lexer/state.rs index 9327d77cd3be..a13b891aaa3e 100644 --- a/crates/swc_ecma_parser/src/lexer/state.rs +++ b/crates/swc_ecma_lexer/src/lexer/state.rs @@ -2,6 +2,7 @@ use std::mem::take; use smallvec::{smallvec, SmallVec}; use swc_common::{BytePos, Span}; +use swc_ecma_ast::EsVersion; use tracing::trace; use super::{ @@ -13,7 +14,7 @@ use crate::{ input::Tokens, lexer::util::CharExt, token::{BinOpToken, Keyword, Token, TokenAndSpan, TokenKind, WordKind}, - EsVersion, Syntax, + Syntax, *, }; /// State of lexer. @@ -667,7 +668,7 @@ impl State { } #[derive(Clone, Default)] -pub struct TokenContexts(pub(crate) SmallVec<[TokenContext; 128]>); +pub struct TokenContexts(pub SmallVec<[TokenContext; 128]>); impl TokenContexts { /// Returns true if following `LBrace` token is `block statement` according @@ -771,7 +772,7 @@ impl TokenContexts { } #[inline] - pub(crate) fn push(&mut self, t: TokenContext) { + pub fn push(&mut self, t: TokenContext) { self.0.push(t); if cfg!(feature = "debug") { diff --git a/crates/swc_ecma_parser/src/lexer/table.rs b/crates/swc_ecma_lexer/src/lexer/table.rs similarity index 100% rename from crates/swc_ecma_parser/src/lexer/table.rs rename to crates/swc_ecma_lexer/src/lexer/table.rs diff --git a/crates/swc_ecma_parser/src/lexer/tests.rs b/crates/swc_ecma_lexer/src/lexer/tests.rs similarity index 99% rename from crates/swc_ecma_parser/src/lexer/tests.rs rename to crates/swc_ecma_lexer/src/lexer/tests.rs index 4dda678f7ec5..cb7c96cd6370 100644 --- a/crates/swc_ecma_parser/src/lexer/tests.rs +++ b/crates/swc_ecma_lexer/src/lexer/tests.rs @@ -18,7 +18,7 @@ use crate::{ Token::{self, *}, TokenAndSpan, Word, }, - Syntax, + *, }; fn sp(r: Range) -> Span { @@ -1659,15 +1659,17 @@ fn issue_915_1() { ); } +const COLOR_JS_CODE: &str = include_str!("../../../swc_ecma_parser/colors.js"); + #[bench] fn lex_colors_js(b: &mut Bencher) { - b.bytes = include_str!("../../colors.js").len() as _; + b.bytes = COLOR_JS_CODE.len() as _; b.iter(|| { let _ = with_lexer( Syntax::default(), Default::default(), - include_str!("../../colors.js"), + COLOR_JS_CODE, |lexer| { for t in lexer { black_box(t); @@ -1680,13 +1682,13 @@ fn lex_colors_js(b: &mut Bencher) { #[bench] fn lex_colors_ts(b: &mut Bencher) { - b.bytes = include_str!("../../colors.js").len() as _; + b.bytes = COLOR_JS_CODE.len() as _; b.iter(|| { let _ = with_lexer( Syntax::Typescript(Default::default()), Default::default(), - include_str!("../../colors.js"), + COLOR_JS_CODE, |lexer| { for t in lexer { black_box(t); diff --git a/crates/swc_ecma_parser/src/lexer/util.rs b/crates/swc_ecma_lexer/src/lexer/util.rs similarity index 100% rename from crates/swc_ecma_parser/src/lexer/util.rs rename to crates/swc_ecma_lexer/src/lexer/util.rs diff --git a/crates/swc_ecma_parser/src/lexer/whitespace.rs b/crates/swc_ecma_lexer/src/lexer/whitespace.rs similarity index 100% rename from crates/swc_ecma_parser/src/lexer/whitespace.rs rename to crates/swc_ecma_lexer/src/lexer/whitespace.rs diff --git a/crates/swc_ecma_lexer/src/lib.rs b/crates/swc_ecma_lexer/src/lib.rs new file mode 100644 index 000000000000..74d216b0beed --- /dev/null +++ b/crates/swc_ecma_lexer/src/lib.rs @@ -0,0 +1,653 @@ +#![cfg_attr(docsrs, feature(doc_cfg))] +#![cfg_attr(test, feature(test))] +#![deny(clippy::all)] +#![deny(unused)] +#![allow(clippy::nonminimal_bool)] +#![allow(clippy::too_many_arguments)] +#![allow(clippy::unnecessary_unwrap)] +#![allow(clippy::vec_box)] +#![allow(clippy::wrong_self_convention)] +#![allow(clippy::match_like_matches_macro)] + +use serde::{Deserialize, Serialize}; + +pub mod lexer; + +use input::Tokens; +pub use lexer::*; + +#[macro_use] +pub mod token; +pub mod error; +pub mod input; +mod utils; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)] +#[serde(deny_unknown_fields, tag = "syntax")] +pub enum Syntax { + /// Standard + #[serde(rename = "ecmascript")] + Es(EsSyntax), + /// This variant requires the cargo feature `typescript` to be enabled. + #[cfg(feature = "typescript")] + #[cfg_attr(docsrs, doc(cfg(feature = "typescript")))] + #[serde(rename = "typescript")] + Typescript(TsSyntax), +} + +impl Default for Syntax { + fn default() -> Self { + Syntax::Es(Default::default()) + } +} + +impl Syntax { + pub fn auto_accessors(self) -> bool { + match self { + Syntax::Es(EsSyntax { + auto_accessors: true, + .. + }) => true, + #[cfg(feature = "typescript")] + Syntax::Typescript(_) => true, + _ => false, + } + } + + pub fn import_attributes(self) -> bool { + match self { + Syntax::Es(EsSyntax { + import_attributes, .. + }) => import_attributes, + #[cfg(feature = "typescript")] + Syntax::Typescript(_) => true, + } + } + + /// Should we parse jsx? + pub fn jsx(self) -> bool { + match self { + Syntax::Es(EsSyntax { jsx: true, .. }) => true, + #[cfg(feature = "typescript")] + Syntax::Typescript(TsSyntax { tsx: true, .. }) => true, + _ => false, + } + } + + pub fn fn_bind(self) -> bool { + matches!(self, Syntax::Es(EsSyntax { fn_bind: true, .. })) + } + + pub fn decorators(self) -> bool { + match self { + Syntax::Es(EsSyntax { + decorators: true, .. + }) => true, + #[cfg(feature = "typescript")] + Syntax::Typescript(TsSyntax { + decorators: true, .. + }) => true, + _ => false, + } + } + + pub fn decorators_before_export(self) -> bool { + match self { + Syntax::Es(EsSyntax { + decorators_before_export: true, + .. + }) => true, + #[cfg(feature = "typescript")] + Syntax::Typescript(..) => true, + _ => false, + } + } + + /// Should we parse typescript? + #[cfg(not(feature = "typescript"))] + pub const fn typescript(self) -> bool { + false + } + + /// Should we parse typescript? + #[cfg(feature = "typescript")] + pub const fn typescript(self) -> bool { + matches!(self, Syntax::Typescript(..)) + } + + pub fn export_default_from(self) -> bool { + matches!( + self, + Syntax::Es(EsSyntax { + export_default_from: true, + .. + }) + ) + } + + pub fn dts(self) -> bool { + match self { + #[cfg(feature = "typescript")] + Syntax::Typescript(t) => t.dts, + _ => false, + } + } + + pub fn allow_super_outside_method(self) -> bool { + match self { + Syntax::Es(EsSyntax { + allow_super_outside_method, + .. + }) => allow_super_outside_method, + #[cfg(feature = "typescript")] + Syntax::Typescript(_) => true, + } + } + + pub fn allow_return_outside_function(self) -> bool { + match self { + Syntax::Es(EsSyntax { + allow_return_outside_function, + .. + }) => allow_return_outside_function, + #[cfg(feature = "typescript")] + Syntax::Typescript(_) => false, + } + } + + pub fn early_errors(self) -> bool { + match self { + #[cfg(feature = "typescript")] + Syntax::Typescript(t) => !t.no_early_errors, + Syntax::Es(..) => true, + } + } + + pub fn disallow_ambiguous_jsx_like(self) -> bool { + match self { + #[cfg(feature = "typescript")] + Syntax::Typescript(t) => t.disallow_ambiguous_jsx_like, + _ => false, + } + } + + pub fn explicit_resource_management(&self) -> bool { + match self { + Syntax::Es(EsSyntax { + explicit_resource_management: using_decl, + .. + }) => *using_decl, + #[cfg(feature = "typescript")] + Syntax::Typescript(_) => true, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct TsSyntax { + #[serde(default)] + pub tsx: bool, + + #[serde(default)] + pub decorators: bool, + + /// `.d.ts` + #[serde(skip, default)] + pub dts: bool, + + #[serde(skip, default)] + pub no_early_errors: bool, + + /// babel: `disallowAmbiguousJSXLike` + /// Even when JSX parsing is not enabled, this option disallows using syntax + /// that would be ambiguous with JSX (` y` type assertions and + /// `()=>{}` type arguments) + /// see: https://babeljs.io/docs/en/babel-plugin-transform-typescript#disallowambiguousjsxlike + #[serde(skip, default)] + pub disallow_ambiguous_jsx_like: bool, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct EsSyntax { + #[serde(default)] + pub jsx: bool, + + /// Support function bind expression. + #[serde(rename = "functionBind")] + #[serde(default)] + pub fn_bind: bool, + + /// Enable decorators. + #[serde(default)] + pub decorators: bool, + + /// babel: `decorators.decoratorsBeforeExport` + /// + /// Effective only if `decorator` is true. + #[serde(rename = "decoratorsBeforeExport")] + #[serde(default)] + pub decorators_before_export: bool, + + #[serde(default)] + pub export_default_from: bool, + + /// Stage 3. + #[serde(default, alias = "importAssertions")] + pub import_attributes: bool, + + #[serde(default, rename = "allowSuperOutsideMethod")] + pub allow_super_outside_method: bool, + + #[serde(default, rename = "allowReturnOutsideFunction")] + pub allow_return_outside_function: bool, + + #[serde(default)] + pub auto_accessors: bool, + + #[serde(default)] + pub explicit_resource_management: bool, +} + +bitflags::bitflags! { + #[derive(Debug, Clone, Copy, Default)] + pub struct Context: u32 { + + /// `true` while backtracking + const IgnoreError = 1 << 0; + + /// Is in module code? + const Module = 1 << 1; + const CanBeModule = 1 << 2; + const Strict = 1 << 3; + + const ForLoopInit = 1 << 4; + const ForAwaitLoopInit = 1 << 5; + + const IncludeInExpr = 1 << 6; + /// If true, await expression is parsed, and "await" is treated as a + /// keyword. + const InAsync = 1 << 7; + /// If true, yield expression is parsed, and "yield" is treated as a + /// keyword. + const InGenerator = 1 << 8; + + /// If true, await is treated as a keyword. + const InStaticBlock = 1 << 9; + + const IsContinueAllowed = 1 << 10; + const IsBreakAllowed = 1 << 11; + + const InType = 1 << 12; + /// Typescript extension. + const ShouldNotLexLtOrGtAsType = 1 << 13; + /// Typescript extension. + const InDeclare = 1 << 14; + + /// If true, `:` should not be treated as a type annotation. + const InCondExpr = 1 << 15; + const WillExpectColonForCond = 1 << 16; + + const InClass = 1 << 17; + + const InClassField = 1 << 18; + + const InFunction = 1 << 19; + + /// This indicates current scope or the scope out of arrow function is + /// function declaration or function expression or not. + const InsideNonArrowFunctionScope = 1 << 20; + + const InParameters = 1 << 21; + + const HasSuperClass = 1 << 22; + + const InPropertyName = 1 << 23; + + const InForcedJsxContext = 1 << 24; + + // If true, allow super.x and super[x] + const AllowDirectSuper = 1 << 25; + + const IgnoreElseClause = 1 << 26; + + const DisallowConditionalTypes = 1 << 27; + + const AllowUsingDecl = 1 << 28; + + const TopLevel = 1 << 29; + } +} + +#[cfg(test)] +fn with_test_sess(src: &str, f: F) -> Result +where + F: FnOnce(&swc_common::errors::Handler, swc_common::input::StringInput<'_>) -> Result, +{ + use swc_common::FileName; + + ::testing::run_test(false, |cm, handler| { + let fm = cm.new_source_file(FileName::Real("testing".into()).into(), src.into()); + + f(handler, (&*fm).into()) + }) +} + +#[macro_export] +macro_rules! tok { + ('`') => { + $crate::token::Token::BackQuote + }; + // (';') => { Token::Semi }; + ('@') => { + $crate::token::Token::At + }; + ('#') => { + $crate::token::Token::Hash + }; + + ('&') => { + $crate::token::Token::BinOp($crate::token::BinOpToken::BitAnd) + }; + ('|') => { + $crate::token::Token::BinOp($crate::token::BinOpToken::BitOr) + }; + ('^') => { + $crate::token::Token::BinOp($crate::token::BinOpToken::BitXor) + }; + ('+') => { + $crate::token::Token::BinOp($crate::token::BinOpToken::Add) + }; + ('-') => { + $crate::token::Token::BinOp($crate::token::BinOpToken::Sub) + }; + ("??") => { + $crate::token::Token::BinOp($crate::token::BinOpToken::NullishCoalescing) + }; + ('~') => { + $crate::token::Token::Tilde + }; + ('!') => { + $crate::token::Token::Bang + }; + ("&&") => { + $crate::token::Token::BinOp($crate::token::BinOpToken::LogicalAnd) + }; + ("||") => { + $crate::token::Token::BinOp($crate::token::BinOpToken::LogicalOr) + }; + ("&&=") => { + $crate::token::Token::AssignOp(swc_ecma_ast::AssignOp::AndAssign) + }; + ("||=") => { + $crate::token::Token::AssignOp(swc_ecma_ast::AssignOp::OrAssign) + }; + ("??=") => { + $crate::token::Token::AssignOp(swc_ecma_ast::AssignOp::NullishAssign) + }; + + ("==") => { + $crate::token::Token::BinOp($crate::token::BinOpToken::EqEq) + }; + ("===") => { + $crate::token::Token::BinOp($crate::token::BinOpToken::EqEqEq) + }; + ("!=") => { + $crate::token::Token::BinOp($crate::token::BinOpToken::NotEq) + }; + ("!==") => { + $crate::token::Token::BinOp($crate::token::BinOpToken::NotEqEq) + }; + + (',') => { + $crate::token::Token::Comma + }; + ('?') => { + $crate::token::Token::QuestionMark + }; + (':') => { + $crate::token::Token::Colon + }; + ('.') => { + $crate::token::Token::Dot + }; + ("=>") => { + $crate::token::Token::Arrow + }; + ("...") => { + $crate::token::Token::DotDotDot + }; + ("${") => { + $crate::token::Token::DollarLBrace + }; + + ('+') => { + $crate::token::Token::BinOp($crate::token::BinOpToken::Add) + }; + ('-') => { + $crate::token::Token::BinOp($crate::token::BinOpToken::Sub) + }; + ('*') => { + $crate::token::Token::BinOp($crate::token::BinOpToken::Mul) + }; + ('/') => { + $crate::token::Token::BinOp($crate::token::BinOpToken::Div) + }; + ("/=") => { + $crate::token::Token::AssignOp(swc_ecma_ast::AssignOp::DivAssign) + }; + ('%') => { + $crate::token::Token::BinOp($crate::token::BinOpToken::Mod) + }; + ('~') => { + $crate::token::Token::Tilde + }; + ('<') => { + $crate::token::Token::BinOp($crate::token::BinOpToken::Lt) + }; + ("<<") => { + $crate::token::Token::BinOp($crate::token::BinOpToken::LShift) + }; + ("<=") => { + $crate::token::Token::BinOp($crate::token::BinOpToken::LtEq) + }; + ("<<=") => { + $crate::token::Token::AssignOp($crate::token::AssignOp::LShiftAssign) + }; + ('>') => { + $crate::token::Token::BinOp($crate::token::BinOpToken::Gt) + }; + (">>") => { + $crate::token::Token::BinOp($crate::token::BinOpToken::RShift) + }; + (">>>") => { + $crate::token::Token::BinOp($crate::token::BinOpToken::ZeroFillRShift) + }; + (">=") => { + $crate::token::Token::BinOp($crate::token::BinOpToken::GtEq) + }; + (">>=") => { + $crate::token::Token::AssignOp(swc_ecma_ast::AssignOp::RShiftAssign) + }; + (">>>=") => { + $crate::token::Token::AssignOp(swc_ecma_ast::AssignOp::ZeroFillRShiftAssign) + }; + + ("++") => { + $crate::token::Token::PlusPlus + }; + ("--") => { + $crate::token::Token::MinusMinus + }; + + ('=') => { + $crate::token::Token::AssignOp(swc_ecma_ast::AssignOp::Assign) + }; + + ('(') => { + $crate::token::Token::LParen + }; + (')') => { + $crate::token::Token::RParen + }; + ('{') => { + $crate::token::Token::LBrace + }; + ('}') => { + $crate::token::Token::RBrace + }; + ('[') => { + $crate::token::Token::LBracket + }; + (']') => { + $crate::token::Token::RBracket + }; + + ("await") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::Await)) + }; + ("break") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::Break)) + }; + ("case") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::Case)) + }; + ("catch") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::Catch)) + }; + ("class") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::Class)) + }; + ("const") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::Const)) + }; + ("continue") => { + $crate::token::Token::Word($crate::token::Word::Keyword( + $crate::token::Keyword::Continue, + )) + }; + ("debugger") => { + $crate::token::Token::Word($crate::token::Word::Keyword( + $crate::token::Keyword::Debugger, + )) + }; + ("default") => { + $crate::token::Token::Word($crate::token::Word::Keyword( + $crate::token::Keyword::Default_, + )) + }; + ("delete") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::Delete)) + }; + ("do") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::Do)) + }; + ("else") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::Else)) + }; + ("export") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::Export)) + }; + ("extends") => { + $crate::token::Token::Word($crate::token::Word::Keyword( + $crate::token::Keyword::Extends, + )) + }; + ("false") => { + $crate::token::Token::Word($crate::token::Word::False) + }; + ("finally") => { + $crate::token::Token::Word($crate::token::Word::Keyword( + $crate::token::Keyword::Finally, + )) + }; + ("for") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::For)) + }; + ("function") => { + $crate::token::Token::Word($crate::token::Word::Keyword( + $crate::token::Keyword::Function, + )) + }; + ("if") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::If)) + }; + ("in") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::In)) + }; + ("instanceof") => { + $crate::token::Token::Word($crate::token::Word::Keyword( + $crate::token::Keyword::InstanceOf, + )) + }; + ("import") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::Import)) + }; + ("let") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::Let)) + }; + ("new") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::New)) + }; + ("null") => { + $crate::token::Token::Word($crate::token::Word::Null) + }; + + ("return") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::Return)) + }; + ("super") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::Super)) + }; + ("switch") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::Switch)) + }; + ("this") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::This)) + }; + ("throw") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::Throw)) + }; + ("true") => { + $crate::token::Token::Word($crate::token::Word::True) + }; + ("try") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::Try)) + }; + ("typeof") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::TypeOf)) + }; + ("var") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::Var)) + }; + ("void") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::Void)) + }; + ("while") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::While)) + }; + ("with") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::With)) + }; + ("yield") => { + $crate::token::Token::Word($crate::token::Word::Keyword($crate::token::Keyword::Yield)) + }; + + // ---------- + // JSX + // ---------- + (JSXTagStart) => { + $crate::token::Token::JSXTagStart + }; + + (JSXTagEnd) => { + $crate::token::Token::JSXTagEnd + }; + + ($tt:tt) => { + $crate::token::Token::Word($crate::token::Word::Ident($crate::token::IdentLike::Known( + known_ident!($tt), + ))) + }; +} diff --git a/crates/swc_ecma_parser/src/token.rs b/crates/swc_ecma_lexer/src/token.rs similarity index 96% rename from crates/swc_ecma_parser/src/token.rs rename to crates/swc_ecma_lexer/src/token.rs index 5654995f402f..cc11205a0a4e 100644 --- a/crates/swc_ecma_parser/src/token.rs +++ b/crates/swc_ecma_lexer/src/token.rs @@ -29,30 +29,33 @@ macro_rules! define_known_ident { } #[allow(unused)] + #[macro_export] macro_rules! known_ident_token { $( ($value) => { - crate::token::TokenKind::Word(crate::token::WordKind::Ident( - crate::token::IdentKind::Known(crate::token::KnownIdent::$name), + $crate::token::TokenKind::Word($crate::token::WordKind::Ident( + $crate::token::IdentKind::Known($crate::token::KnownIdent::$name), )) }; )* } #[allow(unused)] + #[macro_export] macro_rules! known_ident { $( ($value) => { - crate::token::KnownIdent::$name + $crate::token::KnownIdent::$name }; )* } #[allow(unused)] + #[macro_export] macro_rules! ident_like { $( ($value) => { - crate::token::IdentLike::Known( - crate::token::KnownIdent::$name + $crate::token::IdentLike::Known( + $crate::token::KnownIdent::$name ) }; )* @@ -64,10 +67,7 @@ macro_rules! define_known_ident { )* }; - - - - impl From for Atom { + impl From for swc_atoms::Atom { fn from(s: KnownIdent) -> Self { match s { @@ -311,7 +311,7 @@ pub enum Token { } impl Token { - pub(crate) fn kind(&self) -> TokenKind { + pub fn kind(&self) -> TokenKind { match self { Self::Arrow => TokenKind::Arrow, Self::Hash => TokenKind::Hash, @@ -353,7 +353,7 @@ impl Token { } impl TokenKind { - pub(crate) const fn before_expr(self) -> bool { + pub const fn before_expr(self) -> bool { match self { Self::Word(w) => w.before_expr(), Self::BinOp(w) => w.before_expr(), @@ -377,7 +377,7 @@ impl TokenKind { } } - pub(crate) const fn starts_expr(self) -> bool { + pub const fn starts_expr(self) -> bool { match self { Self::Word(w) => w.starts_expr(), Self::BinOp(w) => w.starts_expr(), @@ -712,12 +712,13 @@ impl Display for KnownIdent { } } +#[macro_export] macro_rules! declare_keyword { ($( $name:ident => $value:tt, )*) => { impl Keyword { - pub(crate) fn into_atom(self) -> Atom { + pub fn into_atom(self) -> Atom { match self { $(Keyword::$name => atom!($value),)* } @@ -919,7 +920,7 @@ impl TokenKind { /// Returns true if `self` can follow keyword let. /// /// e.g. `let a = xx;`, `let {a:{}} = 1` - pub(crate) fn follows_keyword_let(self, _strict: bool) -> bool { + pub fn follows_keyword_let(self, _strict: bool) -> bool { match self { Self::Word(WordKind::Keyword(Keyword::Let)) | TokenKind::LBrace @@ -933,7 +934,7 @@ impl TokenKind { } impl Word { - pub(crate) fn cow(&self) -> Cow { + pub fn cow(&self) -> Cow { match self { Word::Keyword(k) => Cow::Owned(k.into_atom()), Word::Ident(IdentLike::Known(w)) => Cow::Owned((*w).into()), diff --git a/crates/swc_ecma_lexer/src/utils.rs b/crates/swc_ecma_lexer/src/utils.rs new file mode 100644 index 000000000000..8ac48300412e --- /dev/null +++ b/crates/swc_ecma_lexer/src/utils.rs @@ -0,0 +1,111 @@ +use swc_atoms::Atom; + +use super::Context; +use crate::token::*; + +impl Context { + pub fn is_reserved(self, word: &Word) -> bool { + match *word { + Word::Keyword(Keyword::Let) => self.contains(Context::Strict), + Word::Keyword(Keyword::Await) => { + self.contains(Context::InAsync) + || self.contains(Context::InStaticBlock) + || self.contains(Context::Strict) + } + Word::Keyword(Keyword::Yield) => { + self.contains(Context::InGenerator) || self.contains(Context::Strict) + } + + Word::Null + | Word::True + | Word::False + | Word::Keyword(Keyword::Break) + | Word::Keyword(Keyword::Case) + | Word::Keyword(Keyword::Catch) + | Word::Keyword(Keyword::Continue) + | Word::Keyword(Keyword::Debugger) + | Word::Keyword(Keyword::Default_) + | Word::Keyword(Keyword::Do) + | Word::Keyword(Keyword::Export) + | Word::Keyword(Keyword::Else) + | Word::Keyword(Keyword::Finally) + | Word::Keyword(Keyword::For) + | Word::Keyword(Keyword::Function) + | Word::Keyword(Keyword::If) + | Word::Keyword(Keyword::Return) + | Word::Keyword(Keyword::Switch) + | Word::Keyword(Keyword::Throw) + | Word::Keyword(Keyword::Try) + | Word::Keyword(Keyword::Var) + | Word::Keyword(Keyword::Const) + | Word::Keyword(Keyword::While) + | Word::Keyword(Keyword::With) + | Word::Keyword(Keyword::New) + | Word::Keyword(Keyword::This) + | Word::Keyword(Keyword::Super) + | Word::Keyword(Keyword::Class) + | Word::Keyword(Keyword::Extends) + | Word::Keyword(Keyword::Import) + | Word::Keyword(Keyword::In) + | Word::Keyword(Keyword::InstanceOf) + | Word::Keyword(Keyword::TypeOf) + | Word::Keyword(Keyword::Void) + | Word::Keyword(Keyword::Delete) => true, + + // Future reserved word + Word::Ident(IdentLike::Known(known_ident!("enum"))) => true, + + Word::Ident(IdentLike::Known( + known_ident!("implements") + | known_ident!("package") + | known_ident!("protected") + | known_ident!("interface") + | known_ident!("private") + | known_ident!("public"), + )) if self.contains(Context::Strict) => true, + + _ => false, + } + } + + #[cfg_attr(not(feature = "verify"), inline(always))] + pub fn is_reserved_word(self, word: &Atom) -> bool { + if !cfg!(feature = "verify") { + return false; + } + + match &**word { + "let" => self.contains(Context::Strict), + // SyntaxError in the module only, not in the strict. + // ```JavaScript + // function foo() { + // "use strict"; + // let await = 1; + // } + // ``` + "await" => { + self.contains(Context::InAsync) + || self.contains(Context::InStaticBlock) + || self.contains(Context::Module) + } + "yield" => self.contains(Context::InGenerator) || self.contains(Context::Strict), + + "null" | "true" | "false" | "break" | "case" | "catch" | "continue" | "debugger" + | "default" | "do" | "export" | "else" | "finally" | "for" | "function" | "if" + | "return" | "switch" | "throw" | "try" | "var" | "const" | "while" | "with" + | "new" | "this" | "super" | "class" | "extends" | "import" | "in" | "instanceof" + | "typeof" | "void" | "delete" => true, + + // Future reserved word + "enum" => true, + + "implements" | "package" | "protected" | "interface" | "private" | "public" + if self.contains(Context::Strict) => + { + true + } + + _ => false, + } + } +} diff --git a/crates/swc_ecma_parser/Cargo.toml b/crates/swc_ecma_parser/Cargo.toml index 2e9a62741c92..8ce61d2d0d84 100644 --- a/crates/swc_ecma_parser/Cargo.toml +++ b/crates/swc_ecma_parser/Cargo.toml @@ -22,7 +22,7 @@ debug = ["tracing-spans"] default = ["typescript", "stacker"] tracing-spans = [] typescript = [] -verify = ["swc_ecma_visit"] +verify = ["swc_ecma_visit", "swc_ecma_lexer/verify"] [dependencies] arrayvec = { workspace = true } @@ -42,6 +42,7 @@ phf = { workspace = true, features = ["macros"] } swc_atoms = { version = "5.0.0", path = "../swc_atoms" } swc_common = { version = "8.1.0", path = "../swc_common" } swc_ecma_ast = { version = "8.1.2", path = "../swc_ecma_ast" } +swc_ecma_lexer = { version = "11.1.2", path = "../swc_ecma_lexer" } swc_ecma_visit = { version = "8.0.0", path = "../swc_ecma_visit", optional = true } [target.'cfg(not(any(target_arch = "wasm32", target_arch = "arm")))'.dependencies] @@ -71,9 +72,6 @@ name = "typescript" harness = false name = "compare" -[[bench]] -harness = false -name = "lexer" [[bench]] harness = false diff --git a/crates/swc_ecma_parser/src/lib.rs b/crates/swc_ecma_parser/src/lib.rs index b6154ad7ead5..ebd196bffe57 100644 --- a/crates/swc_ecma_parser/src/lib.rs +++ b/crates/swc_ecma_parser/src/lib.rs @@ -124,322 +124,19 @@ #![allow(clippy::wrong_self_convention)] #![allow(clippy::match_like_matches_macro)] -use error::Error; -use lexer::Lexer; -use serde::{Deserialize, Serialize}; pub use swc_common::input::{Input, StringInput}; use swc_common::{comments::Comments, input::SourceFileInput, SourceFile}; use swc_ecma_ast::*; +use swc_ecma_lexer::error::Error; pub use self::parser::*; #[macro_use] mod macros; -#[macro_use] -pub mod token; -pub mod error; -pub mod lexer; -mod parser; - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)] -#[serde(deny_unknown_fields, tag = "syntax")] -pub enum Syntax { - /// Standard - #[serde(rename = "ecmascript")] - Es(EsSyntax), - /// This variant requires the cargo feature `typescript` to be enabled. - #[cfg(feature = "typescript")] - #[cfg_attr(docsrs, doc(cfg(feature = "typescript")))] - #[serde(rename = "typescript")] - Typescript(TsSyntax), -} - -impl Default for Syntax { - fn default() -> Self { - Syntax::Es(Default::default()) - } -} - -impl Syntax { - fn auto_accessors(self) -> bool { - match self { - Syntax::Es(EsSyntax { - auto_accessors: true, - .. - }) => true, - #[cfg(feature = "typescript")] - Syntax::Typescript(_) => true, - _ => false, - } - } - - pub fn import_attributes(self) -> bool { - match self { - Syntax::Es(EsSyntax { - import_attributes, .. - }) => import_attributes, - #[cfg(feature = "typescript")] - Syntax::Typescript(_) => true, - } - } - - /// Should we parse jsx? - pub fn jsx(self) -> bool { - match self { - Syntax::Es(EsSyntax { jsx: true, .. }) => true, - #[cfg(feature = "typescript")] - Syntax::Typescript(TsSyntax { tsx: true, .. }) => true, - _ => false, - } - } - - pub fn fn_bind(self) -> bool { - matches!(self, Syntax::Es(EsSyntax { fn_bind: true, .. })) - } - - pub fn decorators(self) -> bool { - match self { - Syntax::Es(EsSyntax { - decorators: true, .. - }) => true, - #[cfg(feature = "typescript")] - Syntax::Typescript(TsSyntax { - decorators: true, .. - }) => true, - _ => false, - } - } - - pub fn decorators_before_export(self) -> bool { - match self { - Syntax::Es(EsSyntax { - decorators_before_export: true, - .. - }) => true, - #[cfg(feature = "typescript")] - Syntax::Typescript(..) => true, - _ => false, - } - } - - /// Should we parse typescript? - #[cfg(not(feature = "typescript"))] - pub const fn typescript(self) -> bool { - false - } - - /// Should we parse typescript? - #[cfg(feature = "typescript")] - pub const fn typescript(self) -> bool { - matches!(self, Syntax::Typescript(..)) - } - - pub fn export_default_from(self) -> bool { - matches!( - self, - Syntax::Es(EsSyntax { - export_default_from: true, - .. - }) - ) - } - - pub fn dts(self) -> bool { - match self { - #[cfg(feature = "typescript")] - Syntax::Typescript(t) => t.dts, - _ => false, - } - } - - pub(crate) fn allow_super_outside_method(self) -> bool { - match self { - Syntax::Es(EsSyntax { - allow_super_outside_method, - .. - }) => allow_super_outside_method, - #[cfg(feature = "typescript")] - Syntax::Typescript(_) => true, - } - } - - pub(crate) fn allow_return_outside_function(self) -> bool { - match self { - Syntax::Es(EsSyntax { - allow_return_outside_function, - .. - }) => allow_return_outside_function, - #[cfg(feature = "typescript")] - Syntax::Typescript(_) => false, - } - } - - pub(crate) fn early_errors(self) -> bool { - match self { - #[cfg(feature = "typescript")] - Syntax::Typescript(t) => !t.no_early_errors, - Syntax::Es(..) => true, - } - } - - fn disallow_ambiguous_jsx_like(self) -> bool { - match self { - #[cfg(feature = "typescript")] - Syntax::Typescript(t) => t.disallow_ambiguous_jsx_like, - _ => false, - } - } - - pub fn explicit_resource_management(&self) -> bool { - match self { - Syntax::Es(EsSyntax { - explicit_resource_management: using_decl, - .. - }) => *using_decl, - #[cfg(feature = "typescript")] - Syntax::Typescript(_) => true, - } - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct TsSyntax { - #[serde(default)] - pub tsx: bool, - - #[serde(default)] - pub decorators: bool, - - /// `.d.ts` - #[serde(skip, default)] - pub dts: bool, - - #[serde(skip, default)] - pub no_early_errors: bool, +use swc_ecma_lexer::Lexer; +pub use swc_ecma_lexer::{error, lexer, token, Context, EsSyntax, Syntax, TsSyntax}; - /// babel: `disallowAmbiguousJSXLike` - /// Even when JSX parsing is not enabled, this option disallows using syntax - /// that would be ambiguous with JSX (` y` type assertions and - /// `()=>{}` type arguments) - /// see: https://babeljs.io/docs/en/babel-plugin-transform-typescript#disallowambiguousjsxlike - #[serde(skip, default)] - pub disallow_ambiguous_jsx_like: bool, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct EsSyntax { - #[serde(default)] - pub jsx: bool, - - /// Support function bind expression. - #[serde(rename = "functionBind")] - #[serde(default)] - pub fn_bind: bool, - - /// Enable decorators. - #[serde(default)] - pub decorators: bool, - - /// babel: `decorators.decoratorsBeforeExport` - /// - /// Effective only if `decorator` is true. - #[serde(rename = "decoratorsBeforeExport")] - #[serde(default)] - pub decorators_before_export: bool, - - #[serde(default)] - pub export_default_from: bool, - - /// Stage 3. - #[serde(default, alias = "importAssertions")] - pub import_attributes: bool, - - #[serde(default, rename = "allowSuperOutsideMethod")] - pub allow_super_outside_method: bool, - - #[serde(default, rename = "allowReturnOutsideFunction")] - pub allow_return_outside_function: bool, - - #[serde(default)] - pub auto_accessors: bool, - - #[serde(default)] - pub explicit_resource_management: bool, -} - -use bitflags::bitflags; - -bitflags! { - #[derive(Debug, Clone, Copy, Default)] - pub struct Context: u32 { - - /// `true` while backtracking - const IgnoreError = 1 << 0; - - /// Is in module code? - const Module = 1 << 1; - const CanBeModule = 1 << 2; - const Strict = 1 << 3; - - const ForLoopInit = 1 << 4; - const ForAwaitLoopInit = 1 << 5; - - const IncludeInExpr = 1 << 6; - /// If true, await expression is parsed, and "await" is treated as a - /// keyword. - const InAsync = 1 << 7; - /// If true, yield expression is parsed, and "yield" is treated as a - /// keyword. - const InGenerator = 1 << 8; - - /// If true, await is treated as a keyword. - const InStaticBlock = 1 << 9; - - const IsContinueAllowed = 1 << 10; - const IsBreakAllowed = 1 << 11; - - const InType = 1 << 12; - /// Typescript extension. - const ShouldNotLexLtOrGtAsType = 1 << 13; - /// Typescript extension. - const InDeclare = 1 << 14; - - /// If true, `:` should not be treated as a type annotation. - const InCondExpr = 1 << 15; - const WillExpectColonForCond = 1 << 16; - - const InClass = 1 << 17; - - const InClassField = 1 << 18; - - const InFunction = 1 << 19; - - /// This indicates current scope or the scope out of arrow function is - /// function declaration or function expression or not. - const InsideNonArrowFunctionScope = 1 << 20; - - const InParameters = 1 << 21; - - const HasSuperClass = 1 << 22; - - const InPropertyName = 1 << 23; - - const InForcedJsxContext = 1 << 24; - - // If true, allow super.x and super[x] - const AllowDirectSuper = 1 << 25; - - const IgnoreElseClause = 1 << 26; - - const DisallowConditionalTypes = 1 << 27; - - const AllowUsingDecl = 1 << 28; - - const TopLevel = 1 << 29; - } -} +mod parser; #[cfg(test)] fn with_test_sess(src: &str, f: F) -> Result diff --git a/crates/swc_ecma_parser/src/macros.rs b/crates/swc_ecma_parser/src/macros.rs index 46e35eb5a068..7c203cb53af7 100644 --- a/crates/swc_ecma_parser/src/macros.rs +++ b/crates/swc_ecma_parser/src/macros.rs @@ -1,314 +1,8 @@ -#[allow(unused)] -macro_rules! tok { - ('`') => { - crate::token::Token::BackQuote - }; - // (';') => { Token::Semi }; - ('@') => { - crate::token::Token::At - }; - ('#') => { - crate::token::Token::Hash - }; - - ('&') => { - crate::token::Token::BinOp(crate::token::BinOpToken::BitAnd) - }; - ('|') => { - crate::token::Token::BinOp(crate::token::BinOpToken::BitOr) - }; - ('^') => { - crate::token::Token::BinOp(crate::token::BinOpToken::BitXor) - }; - ('+') => { - crate::token::Token::BinOp(crate::token::BinOpToken::Add) - }; - ('-') => { - crate::token::Token::BinOp(crate::token::BinOpToken::Sub) - }; - ("??") => { - crate::token::Token::BinOp(crate::token::BinOpToken::NullishCoalescing) - }; - ('~') => { - crate::token::Token::Tilde - }; - ('!') => { - crate::token::Token::Bang - }; - ("&&") => { - crate::token::Token::BinOp(crate::token::BinOpToken::LogicalAnd) - }; - ("||") => { - crate::token::Token::BinOp(crate::token::BinOpToken::LogicalOr) - }; - ("&&=") => { - crate::token::Token::AssignOp(swc_ecma_ast::AssignOp::AndAssign) - }; - ("||=") => { - crate::token::Token::AssignOp(swc_ecma_ast::AssignOp::OrAssign) - }; - ("??=") => { - crate::token::Token::AssignOp(swc_ecma_ast::AssignOp::NullishAssign) - }; - - ("==") => { - crate::token::Token::BinOp(crate::token::BinOpToken::EqEq) - }; - ("===") => { - crate::token::Token::BinOp(crate::token::BinOpToken::EqEqEq) - }; - ("!=") => { - crate::token::Token::BinOp(crate::token::BinOpToken::NotEq) - }; - ("!==") => { - crate::token::Token::BinOp(crate::token::BinOpToken::NotEqEq) - }; - - (',') => { - crate::token::Token::Comma - }; - ('?') => { - crate::token::Token::QuestionMark - }; - (':') => { - crate::token::Token::Colon - }; - ('.') => { - crate::token::Token::Dot - }; - ("=>") => { - crate::token::Token::Arrow - }; - ("...") => { - crate::token::Token::DotDotDot - }; - ("${") => { - crate::token::Token::DollarLBrace - }; - - ('+') => { - crate::token::Token::BinOp(crate::token::BinOpToken::Add) - }; - ('-') => { - crate::token::Token::BinOp(crate::token::BinOpToken::Sub) - }; - ('*') => { - crate::token::Token::BinOp(crate::token::BinOpToken::Mul) - }; - ('/') => { - crate::token::Token::BinOp(crate::token::BinOpToken::Div) - }; - ("/=") => { - crate::token::Token::AssignOp(swc_ecma_ast::AssignOp::DivAssign) - }; - ('%') => { - crate::token::Token::BinOp(crate::token::BinOpToken::Mod) - }; - ('~') => { - crate::token::Token::Tilde - }; - ('<') => { - crate::token::Token::BinOp(crate::token::BinOpToken::Lt) - }; - ("<<") => { - crate::token::Token::BinOp(crate::token::BinOpToken::LShift) - }; - ("<=") => { - crate::token::Token::BinOp(crate::token::BinOpToken::LtEq) - }; - ("<<=") => { - crate::token::Token::AssignOp(crate::token::AssignOp::LShiftAssign) - }; - ('>') => { - crate::token::Token::BinOp(crate::token::BinOpToken::Gt) - }; - (">>") => { - crate::token::Token::BinOp(crate::token::BinOpToken::RShift) - }; - (">>>") => { - crate::token::Token::BinOp(crate::token::BinOpToken::ZeroFillRShift) - }; - (">=") => { - crate::token::Token::BinOp(crate::token::BinOpToken::GtEq) - }; - (">>=") => { - crate::token::Token::AssignOp(crate::AssignOp::RShiftAssign) - }; - (">>>=") => { - crate::token::Token::AssignOp(crate::AssignOp::ZeroFillRShiftAssign) - }; - - ("++") => { - crate::token::Token::PlusPlus - }; - ("--") => { - crate::token::Token::MinusMinus - }; - - ('=') => { - crate::token::Token::AssignOp(swc_ecma_ast::AssignOp::Assign) - }; - - ('(') => { - crate::token::Token::LParen - }; - (')') => { - crate::token::Token::RParen - }; - ('{') => { - crate::token::Token::LBrace - }; - ('}') => { - crate::token::Token::RBrace - }; - ('[') => { - crate::token::Token::LBracket - }; - (']') => { - crate::token::Token::RBracket - }; - - ("await") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Await)) - }; - ("break") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Break)) - }; - ("case") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Case)) - }; - ("catch") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Catch)) - }; - ("class") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Class)) - }; - ("const") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Const)) - }; - ("continue") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Continue)) - }; - ("debugger") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Debugger)) - }; - ("default") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Default_)) - }; - ("delete") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Delete)) - }; - ("do") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Do)) - }; - ("else") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Else)) - }; - ("export") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Export)) - }; - ("extends") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Extends)) - }; - ("false") => { - crate::token::Token::Word(crate::token::Word::False) - }; - ("finally") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Finally)) - }; - ("for") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::For)) - }; - ("function") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Function)) - }; - ("if") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::If)) - }; - ("in") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::In)) - }; - ("instanceof") => { - crate::token::Token::Word(crate::token::Word::Keyword( - crate::token::Keyword::InstanceOf, - )) - }; - ("import") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Import)) - }; - ("let") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Let)) - }; - ("new") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::New)) - }; - ("null") => { - crate::token::Token::Word(crate::token::Word::Null) - }; - - ("return") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Return)) - }; - ("super") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Super)) - }; - ("switch") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Switch)) - }; - ("this") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::This)) - }; - ("throw") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Throw)) - }; - ("true") => { - crate::token::Token::Word(crate::token::Word::True) - }; - ("try") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Try)) - }; - ("typeof") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::TypeOf)) - }; - ("var") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Var)) - }; - ("void") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Void)) - }; - ("while") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::While)) - }; - ("with") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::With)) - }; - ("yield") => { - crate::token::Token::Word(crate::token::Word::Keyword(crate::token::Keyword::Yield)) - }; - - // ---------- - // JSX - // ---------- - (JSXTagStart) => { - crate::token::Token::JSXTagStart - }; - - (JSXTagEnd) => { - crate::token::Token::JSXTagEnd - }; - - ($tt:tt) => { - crate::token::Token::Word(crate::token::Word::Ident(crate::token::IdentLike::Known( - known_ident!($tt), - ))) - }; -} - macro_rules! token_including_semi { (';') => { Token::Semi }; ($t:tt) => { - tok!($t) + swc_ecma_lexer::tok!($t) }; } diff --git a/crates/swc_ecma_parser/src/parser/expr.rs b/crates/swc_ecma_parser/src/parser/expr.rs index f0193900013b..7d892181d465 100644 --- a/crates/swc_ecma_parser/src/parser/expr.rs +++ b/crates/swc_ecma_parser/src/parser/expr.rs @@ -127,7 +127,7 @@ impl Parser { } self.state.potential_arrow_start = match *cur!(self, true) { - Word(Word::Ident(..)) | tok!('(') | tok!("yield") => Some(cur_pos!(self)), + Token::Word(Word::Ident(..)) | tok!('(') | tok!("yield") => Some(cur_pos!(self)), _ => None, }; @@ -2022,12 +2022,12 @@ impl Parser { let start = cur_pos!(self); let v = match cur!(self, true) { - Word(Word::Null) => { + Token::Word(Word::Null) => { bump!(self); let span = span!(self, start); Lit::Null(Null { span }) } - Word(Word::True) | Word(Word::False) => { + Token::Word(Word::True) | Token::Word(Word::False) => { let value = is!(self, "true"); bump!(self); let span = span!(self, start); diff --git a/crates/swc_ecma_parser/src/parser/ident.rs b/crates/swc_ecma_parser/src/parser/ident.rs index 99a26921f923..6856847dafb3 100644 --- a/crates/swc_ecma_parser/src/parser/ident.rs +++ b/crates/swc_ecma_parser/src/parser/ident.rs @@ -1,9 +1,9 @@ //! 12.1 Identifiers use either::Either; use swc_atoms::atom; +use swc_ecma_lexer::token::{IdentLike, Keyword}; use super::*; -use crate::token::{IdentLike, Keyword}; impl Parser { pub(super) fn parse_maybe_private_name(&mut self) -> PResult> { @@ -64,8 +64,8 @@ impl Parser { let start = cur_pos!(self); let w = match cur!(self, true) { - Word(..) => match bump!(self) { - Word(w) => w.into(), + Token::Word(..) => match bump!(self) { + Token::Word(w) => w.into(), _ => unreachable!(), }, @@ -87,7 +87,7 @@ impl Parser { Lit::Str(str_lit) => ModuleExportName::Str(str_lit), _ => unreachable!(), }, - Ok(&Word(..)) => ModuleExportName::Ident(self.parse_ident_name()?.into()), + Ok(&Token::Word(..)) => ModuleExportName::Ident(self.parse_ident_name()?.into()), _ => { unexpected!(self, "identifier or string"); } @@ -105,8 +105,8 @@ impl Parser { let word = self.parse_with(|p| { let w = match cur!(p, true) { - &Word(..) => match bump!(p) { - Word(w) => w, + &Token::Word(..) => match bump!(p) { + Token::Word(w) => w, _ => unreachable!(), }, _ => syntax_error!(p, SyntaxError::ExpectedIdent), diff --git a/crates/swc_ecma_parser/src/parser/input.rs b/crates/swc_ecma_parser/src/parser/input.rs index 33bc7d47709c..1f086b2a0b8e 100644 --- a/crates/swc_ecma_parser/src/parser/input.rs +++ b/crates/swc_ecma_parser/src/parser/input.rs @@ -1,301 +1,6 @@ -use std::{cell::RefCell, mem, mem::take, rc::Rc}; - -use debug_unreachable::debug_unreachable; -use lexer::TokenContexts; -use swc_common::{BytePos, Span}; +use swc_ecma_lexer::input::Tokens; use super::Parser; -use crate::{ - error::Error, - lexer::{self}, - token::*, - Context, EsVersion, Syntax, -}; - -/// Clone should be cheap if you are parsing typescript because typescript -/// syntax requires backtracking. -pub trait Tokens: Clone + Iterator { - fn set_ctx(&mut self, ctx: Context); - fn ctx(&self) -> Context; - fn syntax(&self) -> Syntax; - fn target(&self) -> EsVersion; - - fn start_pos(&self) -> BytePos { - BytePos(0) - } - - fn set_expr_allowed(&mut self, allow: bool); - fn set_next_regexp(&mut self, start: Option); - - fn token_context(&self) -> &lexer::TokenContexts; - fn token_context_mut(&mut self) -> &mut lexer::TokenContexts; - fn set_token_context(&mut self, _c: lexer::TokenContexts); - - /// Implementors should use Rc>>. - /// - /// It is required because parser should backtrack while parsing typescript - /// code. - fn add_error(&self, error: Error); - - /// Add an error which is valid syntax in script mode. - /// - /// This errors should be dropped if it's not a module. - /// - /// Implementor should check for if [Context].module, and buffer errors if - /// module is false. Also, implementors should move errors to the error - /// buffer on set_ctx if the parser mode become module mode. - fn add_module_mode_error(&self, error: Error); - - fn end_pos(&self) -> BytePos; - - fn take_errors(&mut self) -> Vec; - - /// If the program was parsed as a script, this contains the module - /// errors should the program be identified as a module in the future. - fn take_script_module_errors(&mut self) -> Vec; -} - -#[derive(Clone)] -pub struct TokensInput { - iter: as IntoIterator>::IntoIter, - ctx: Context, - syntax: Syntax, - start_pos: BytePos, - target: EsVersion, - token_ctx: TokenContexts, - errors: Rc>>, - module_errors: Rc>>, -} - -impl TokensInput { - pub fn new(tokens: Vec, ctx: Context, syntax: Syntax, target: EsVersion) -> Self { - let start_pos = tokens.first().map(|t| t.span.lo).unwrap_or(BytePos(0)); - - TokensInput { - iter: tokens.into_iter(), - ctx, - syntax, - start_pos, - target, - token_ctx: Default::default(), - errors: Default::default(), - module_errors: Default::default(), - } - } -} - -impl Iterator for TokensInput { - type Item = TokenAndSpan; - - fn next(&mut self) -> Option { - self.iter.next() - } -} - -impl Tokens for TokensInput { - fn set_ctx(&mut self, ctx: Context) { - if ctx.contains(Context::Module) && !self.module_errors.borrow().is_empty() { - let mut module_errors = self.module_errors.borrow_mut(); - self.errors.borrow_mut().append(&mut *module_errors); - } - self.ctx = ctx; - } - - fn ctx(&self) -> Context { - self.ctx - } - - fn syntax(&self) -> Syntax { - self.syntax - } - - fn target(&self) -> EsVersion { - self.target - } - - fn start_pos(&self) -> BytePos { - self.start_pos - } - - fn set_expr_allowed(&mut self, _: bool) {} - - fn set_next_regexp(&mut self, _: Option) {} - - fn token_context(&self) -> &TokenContexts { - &self.token_ctx - } - - fn token_context_mut(&mut self) -> &mut TokenContexts { - &mut self.token_ctx - } - - fn set_token_context(&mut self, c: TokenContexts) { - self.token_ctx = c; - } - - fn add_error(&self, error: Error) { - self.errors.borrow_mut().push(error); - } - - fn add_module_mode_error(&self, error: Error) { - if self.ctx.contains(Context::Module) { - self.add_error(error); - return; - } - self.module_errors.borrow_mut().push(error); - } - - fn take_errors(&mut self) -> Vec { - take(&mut self.errors.borrow_mut()) - } - - fn take_script_module_errors(&mut self) -> Vec { - take(&mut self.module_errors.borrow_mut()) - } - - fn end_pos(&self) -> BytePos { - self.iter - .as_slice() - .last() - .map(|t| t.span.hi) - .unwrap_or(self.start_pos) - } -} - -/// Note: Lexer need access to parser's context to lex correctly. -#[derive(Debug)] -pub struct Capturing { - inner: I, - captured: Rc>>, -} - -impl Clone for Capturing { - fn clone(&self) -> Self { - Capturing { - inner: self.inner.clone(), - captured: self.captured.clone(), - } - } -} - -impl Capturing { - pub fn new(input: I) -> Self { - Capturing { - inner: input, - captured: Default::default(), - } - } - - pub fn tokens(&self) -> Rc>> { - self.captured.clone() - } - - /// Take captured tokens - pub fn take(&mut self) -> Vec { - mem::take(&mut *self.captured.borrow_mut()) - } -} - -impl Iterator for Capturing { - type Item = TokenAndSpan; - - fn next(&mut self) -> Option { - let next = self.inner.next(); - - match next { - Some(ts) => { - let mut v = self.captured.borrow_mut(); - - // remove tokens that could change due to backtracing - while let Some(last) = v.last() { - if last.span.lo >= ts.span.lo { - v.pop(); - } else { - break; - } - } - - v.push(ts.clone()); - - Some(ts) - } - None => None, - } - } -} - -impl Tokens for Capturing { - fn set_ctx(&mut self, ctx: Context) { - self.inner.set_ctx(ctx) - } - - fn ctx(&self) -> Context { - self.inner.ctx() - } - - fn syntax(&self) -> Syntax { - self.inner.syntax() - } - - fn target(&self) -> EsVersion { - self.inner.target() - } - - fn start_pos(&self) -> BytePos { - self.inner.start_pos() - } - - fn set_expr_allowed(&mut self, allow: bool) { - self.inner.set_expr_allowed(allow) - } - - fn set_next_regexp(&mut self, start: Option) { - self.inner.set_next_regexp(start); - } - - fn token_context(&self) -> &TokenContexts { - self.inner.token_context() - } - - fn token_context_mut(&mut self) -> &mut TokenContexts { - self.inner.token_context_mut() - } - - fn set_token_context(&mut self, c: TokenContexts) { - self.inner.set_token_context(c) - } - - fn add_error(&self, error: Error) { - self.inner.add_error(error); - } - - fn add_module_mode_error(&self, error: Error) { - self.inner.add_module_mode_error(error) - } - - fn take_errors(&mut self) -> Vec { - self.inner.take_errors() - } - - fn take_script_module_errors(&mut self) -> Vec { - self.inner.take_script_module_errors() - } - - fn end_pos(&self) -> BytePos { - self.inner.end_pos() - } -} - -/// This struct is responsible for managing current token and peeked token. -#[derive(Clone)] -pub(super) struct Buffer { - iter: I, - /// Span of the previous token. - prev_span: Span, - cur: Option, - /// Peeked token - next: Option, -} impl Parser { pub fn input(&mut self) -> &mut I { @@ -306,272 +11,3 @@ impl Parser { &self.input.iter } } - -impl Buffer { - pub fn new(lexer: I) -> Self { - let start_pos = lexer.start_pos(); - Buffer { - iter: lexer, - cur: None, - prev_span: Span::new(start_pos, start_pos), - next: None, - } - } - - pub fn store(&mut self, token: Token) { - debug_assert!(self.next.is_none()); - debug_assert!(self.cur.is_none()); - let span = self.prev_span; - - self.cur = Some(TokenAndSpan { - span, - token, - had_line_break: false, - }); - } - - #[allow(dead_code)] - pub fn cur_debug(&self) -> Option<&Token> { - self.cur.as_ref().map(|it| &it.token) - } - - #[cold] - #[inline(never)] - pub fn dump_cur(&mut self) -> String { - match self.cur() { - Some(v) => format!("{:?}", v), - None => "".to_string(), - } - } - - /// Returns current token. - pub fn bump(&mut self) -> Token { - let prev = match self.cur.take() { - Some(t) => t, - None => unsafe { - debug_unreachable!( - "Current token is `None`. Parser should not call bump() without knowing \ - current token" - ) - }, - }; - self.prev_span = prev.span; - - prev.token - } - - pub fn knows_cur(&self) -> bool { - self.cur.is_some() - } - - pub fn peek(&mut self) -> Option<&Token> { - debug_assert!( - self.cur.is_some(), - "parser should not call peek() without knowing current token" - ); - - if self.next.is_none() { - self.next = self.iter.next(); - } - - self.next.as_ref().map(|ts| &ts.token) - } - - /// Returns true on eof. - pub fn had_line_break_before_cur(&mut self) -> bool { - self.cur(); - - self.cur - .as_ref() - .map(|it| it.had_line_break) - .unwrap_or_else(|| true) - } - - /// This returns true on eof. - pub fn has_linebreak_between_cur_and_peeked(&mut self) -> bool { - let _ = self.peek(); - self.next - .as_ref() - .map(|item| item.had_line_break) - .unwrap_or({ - // return true on eof. - true - }) - } - - /// Get current token. Returns `None` only on eof. - #[inline] - pub fn cur(&mut self) -> Option<&Token> { - if self.cur.is_none() { - // If we have peeked a token, take it instead of calling lexer.next() - self.cur = self.next.take().or_else(|| self.iter.next()); - } - - match &self.cur { - Some(v) => Some(&v.token), - None => None, - } - } - - #[inline] - pub fn cut_lshift(&mut self) { - debug_assert!( - self.is(&tok!("<<")), - "parser should only call cut_lshift when encountering LShift token" - ); - self.cur = Some(TokenAndSpan { - token: tok!('<'), - span: self.cur_span().with_lo(self.cur_span().lo + BytePos(1)), - had_line_break: false, - }); - } - - pub fn merge_lt_gt(&mut self) { - debug_assert!( - self.is(&tok!('<')) || self.is(&tok!('>')), - "parser should only call merge_lt_gt when encountering '<' or '>' token" - ); - - let span = self.cur_span(); - - if self.peek().is_none() { - return; - } - - let next = self.next.as_ref().unwrap(); - - if span.hi != next.span.lo { - return; - } - - let cur = self.cur.take().unwrap(); - let next = self.next.take().unwrap(); - - let token = match (&cur.token, &next.token) { - (tok!('>'), tok!('>')) => tok!(">>"), - (tok!('>'), tok!('=')) => tok!(">="), - (tok!('>'), tok!(">>")) => tok!(">>>"), - (tok!('>'), tok!(">=")) => tok!(">>="), - (tok!('>'), tok!(">>=")) => tok!(">>>="), - (tok!('<'), tok!('<')) => tok!("<<"), - (tok!('<'), tok!('=')) => tok!("<="), - (tok!('<'), tok!("<=")) => tok!("<<="), - - _ => { - self.cur = Some(cur); - self.next = Some(next); - return; - } - }; - let span = span.with_hi(next.span.hi); - - self.cur = Some(TokenAndSpan { - token, - span, - had_line_break: cur.had_line_break, - }); - } - - #[inline] - pub fn is(&mut self, expected: &Token) -> bool { - match self.cur() { - Some(t) => *expected == *t, - _ => false, - } - } - - #[inline] - pub fn eat(&mut self, expected: &Token) -> bool { - let v = self.is(expected); - if v { - self.bump(); - } - v - } - - /// Returns start of current token. - #[inline] - pub fn cur_pos(&mut self) -> BytePos { - let _ = self.cur(); - self.cur - .as_ref() - .map(|item| item.span.lo) - .unwrap_or_else(|| { - // eof - self.last_pos() - }) - } - - #[inline] - pub fn cur_span(&self) -> Span { - let data = self - .cur - .as_ref() - .map(|item| item.span) - .unwrap_or(self.prev_span); - - Span::new(data.lo, data.hi) - } - - /// Returns last byte position of previous token. - #[inline] - pub fn last_pos(&self) -> BytePos { - self.prev_span.hi - } - - /// Returns span of the previous token. - #[inline] - pub fn prev_span(&self) -> Span { - self.prev_span - } - - #[inline] - pub(crate) fn get_ctx(&self) -> Context { - self.iter.ctx() - } - - #[inline] - pub(crate) fn set_ctx(&mut self, ctx: Context) { - self.iter.set_ctx(ctx); - } - - #[inline] - pub fn syntax(&self) -> Syntax { - self.iter.syntax() - } - - #[inline] - pub fn target(&self) -> EsVersion { - self.iter.target() - } - - #[inline] - pub(crate) fn set_expr_allowed(&mut self, allow: bool) { - self.iter.set_expr_allowed(allow) - } - - #[inline] - pub fn set_next_regexp(&mut self, start: Option) { - self.iter.set_next_regexp(start); - } - - #[inline] - pub(crate) fn token_context(&self) -> &lexer::TokenContexts { - self.iter.token_context() - } - - #[inline] - pub(crate) fn token_context_mut(&mut self) -> &mut lexer::TokenContexts { - self.iter.token_context_mut() - } - - #[inline] - pub(crate) fn set_token_context(&mut self, c: lexer::TokenContexts) { - self.iter.set_token_context(c) - } - - #[inline] - pub(crate) fn end_pos(&self) -> BytePos { - self.iter.end_pos() - } -} diff --git a/crates/swc_ecma_parser/src/parser/macros.rs b/crates/swc_ecma_parser/src/parser/macros.rs index c3ec7517a8da..9cb276571c24 100644 --- a/crates/swc_ecma_parser/src/parser/macros.rs +++ b/crates/swc_ecma_parser/src/parser/macros.rs @@ -19,7 +19,7 @@ macro_rules! is { ($p:expr, BindingIdent) => {{ let ctx = $p.ctx(); match $p.input.cur() { - Some(&Word(ref w)) => !ctx.is_reserved(w), + Some(&Token::Word(ref w)) => !ctx.is_reserved(w), _ => false, } }}; @@ -27,14 +27,14 @@ macro_rules! is { ($p:expr, IdentRef) => {{ let ctx = $p.ctx(); match $p.input.cur() { - Some(&Word(ref w)) => !ctx.is_reserved(w), + Some(&Token::Word(ref w)) => !ctx.is_reserved(w), _ => false, } }}; ($p:expr,IdentName) => {{ match $p.input.cur() { - Some(&Word(..)) => true, + Some(&Token::Word(..)) => true, _ => false, } }}; @@ -84,7 +84,7 @@ macro_rules! peeked_is { ($p:expr, BindingIdent) => {{ let ctx = $p.ctx(); match peek!($p) { - Some(&Word(ref w)) => !ctx.is_reserved(w), + Some(&Token::Word(ref w)) => !ctx.is_reserved(w), _ => false, } }}; @@ -92,14 +92,14 @@ macro_rules! peeked_is { ($p:expr, IdentRef) => {{ let ctx = $p.ctx(); match peek!($p) { - Some(&Word(ref w)) => !ctx.is_reserved(w), + Some(&Token::Word(ref w)) => !ctx.is_reserved(w), _ => false, } }}; ($p:expr,IdentName) => {{ match peek!($p) { - Some(&Word(..)) => true, + Some(&Token::Word(..)) => true, _ => false, } }}; diff --git a/crates/swc_ecma_parser/src/parser/mod.rs b/crates/swc_ecma_parser/src/parser/mod.rs index d305e769c659..7178f10ffd26 100644 --- a/crates/swc_ecma_parser/src/parser/mod.rs +++ b/crates/swc_ecma_parser/src/parser/mod.rs @@ -7,15 +7,16 @@ use rustc_hash::FxHashMap; use swc_atoms::Atom; use swc_common::{comments::Comments, input::StringInput, BytePos, Span}; use swc_ecma_ast::*; - -pub use self::input::{Capturing, Tokens, TokensInput}; -use self::{input::Buffer, util::ParseObject}; -use crate::{ +pub use swc_ecma_lexer::input::{Capturing, Tokens, TokensInput}; +use swc_ecma_lexer::{ error::SyntaxError, - lexer::Lexer, + input::Buffer, token::{Token, Word}, - Context, EsVersion, Syntax, TsSyntax, + Lexer, *, }; + +use self::util::ParseObject; +use crate::{Context, EsVersion, Syntax, TsSyntax}; #[cfg(test)] extern crate test; #[cfg(test)] diff --git a/crates/swc_ecma_parser/src/parser/object.rs b/crates/swc_ecma_parser/src/parser/object.rs index e3bbf45a486c..5667d4ffecbc 100644 --- a/crates/swc_ecma_parser/src/parser/object.rs +++ b/crates/swc_ecma_parser/src/parser/object.rs @@ -69,8 +69,8 @@ impl Parser { }), _ => unreachable!(), }, - Word(..) => match bump!(p) { - Word(w) => PropName::Ident(IdentName::new(w.into(), span!(p, start))), + Token::Word(..) => match bump!(p) { + Token::Word(w) => PropName::Ident(IdentName::new(w.into(), span!(p, start))), _ => unreachable!(), }, tok!('[') => { diff --git a/crates/swc_ecma_parser/src/parser/pat.rs b/crates/swc_ecma_parser/src/parser/pat.rs index a74f2ebfa460..ddbbd0566790 100644 --- a/crates/swc_ecma_parser/src/parser/pat.rs +++ b/crates/swc_ecma_parser/src/parser/pat.rs @@ -5,7 +5,7 @@ use swc_common::Spanned; use super::{util::ExprExt, *}; use crate::{ parser::{class_and_fn::is_not_this, expr::AssignTargetOrSpread}, - token::{IdentLike, Keyword}, + token::{IdentLike, Keyword, Token}, }; impl Parser { @@ -59,7 +59,9 @@ impl Parser { trace_cur!(self, parse_binding_pat_or_ident); match *cur!(self, true) { - tok!("yield") | Word(..) => self.parse_binding_ident(disallow_let).map(Pat::from), + tok!("yield") | Token::Word(..) => { + self.parse_binding_ident(disallow_let).map(Pat::from) + } tok!('[') => self.parse_array_binding_pat(), tok!('{') => self.parse_object(), // tok!('(') => { @@ -162,7 +164,7 @@ impl Parser { let has_modifier = self.syntax().typescript() && matches!( *cur!(self, false)?, - Word(Word::Ident(IdentLike::Known( + Token::Word(Word::Ident(IdentLike::Known( known_ident!("public") | known_ident!("protected") | known_ident!("private") diff --git a/crates/swc_ecma_parser/src/parser/util.rs b/crates/swc_ecma_parser/src/parser/util.rs index 154664edc19a..63104ca6fda6 100644 --- a/crates/swc_ecma_parser/src/parser/util.rs +++ b/crates/swc_ecma_parser/src/parser/util.rs @@ -1,112 +1,4 @@ use super::*; -use crate::token::{IdentLike, Keyword}; - -impl Context { - pub(crate) fn is_reserved(self, word: &Word) -> bool { - match *word { - Word::Keyword(Keyword::Let) => self.contains(Context::Strict), - Word::Keyword(Keyword::Await) => { - self.contains(Context::InAsync) - || self.contains(Context::InStaticBlock) - || self.contains(Context::Strict) - } - Word::Keyword(Keyword::Yield) => { - self.contains(Context::InGenerator) || self.contains(Context::Strict) - } - - Word::Null - | Word::True - | Word::False - | Word::Keyword(Keyword::Break) - | Word::Keyword(Keyword::Case) - | Word::Keyword(Keyword::Catch) - | Word::Keyword(Keyword::Continue) - | Word::Keyword(Keyword::Debugger) - | Word::Keyword(Keyword::Default_) - | Word::Keyword(Keyword::Do) - | Word::Keyword(Keyword::Export) - | Word::Keyword(Keyword::Else) - | Word::Keyword(Keyword::Finally) - | Word::Keyword(Keyword::For) - | Word::Keyword(Keyword::Function) - | Word::Keyword(Keyword::If) - | Word::Keyword(Keyword::Return) - | Word::Keyword(Keyword::Switch) - | Word::Keyword(Keyword::Throw) - | Word::Keyword(Keyword::Try) - | Word::Keyword(Keyword::Var) - | Word::Keyword(Keyword::Const) - | Word::Keyword(Keyword::While) - | Word::Keyword(Keyword::With) - | Word::Keyword(Keyword::New) - | Word::Keyword(Keyword::This) - | Word::Keyword(Keyword::Super) - | Word::Keyword(Keyword::Class) - | Word::Keyword(Keyword::Extends) - | Word::Keyword(Keyword::Import) - | Word::Keyword(Keyword::In) - | Word::Keyword(Keyword::InstanceOf) - | Word::Keyword(Keyword::TypeOf) - | Word::Keyword(Keyword::Void) - | Word::Keyword(Keyword::Delete) => true, - - // Future reserved word - Word::Ident(IdentLike::Known(known_ident!("enum"))) => true, - - Word::Ident(IdentLike::Known( - known_ident!("implements") - | known_ident!("package") - | known_ident!("protected") - | known_ident!("interface") - | known_ident!("private") - | known_ident!("public"), - )) if self.contains(Context::Strict) => true, - - _ => false, - } - } - - #[cfg_attr(not(feature = "verify"), inline(always))] - pub fn is_reserved_word(self, word: &Atom) -> bool { - if !cfg!(feature = "verify") { - return false; - } - - match &**word { - "let" => self.contains(Context::Strict), - // SyntaxError in the module only, not in the strict. - // ```JavaScript - // function foo() { - // "use strict"; - // let await = 1; - // } - // ``` - "await" => { - self.contains(Context::InAsync) - || self.contains(Context::InStaticBlock) - || self.contains(Context::Module) - } - "yield" => self.contains(Context::InGenerator) || self.contains(Context::Strict), - - "null" | "true" | "false" | "break" | "case" | "catch" | "continue" | "debugger" - | "default" | "do" | "export" | "else" | "finally" | "for" | "function" | "if" - | "return" | "switch" | "throw" | "try" | "var" | "const" | "while" | "with" - | "new" | "this" | "super" | "class" | "extends" | "import" | "in" | "instanceof" - | "typeof" | "void" | "delete" => true, - - // Future reserved word - "enum" => true, - - "implements" | "package" | "protected" | "interface" | "private" | "public" - if self.contains(Context::Strict) => - { - true - } - - _ => false, - } - } -} impl Parser { /// Original context is restored when returned guard is dropped. From 6b0e4fc1190daad236d85a77de121cf8adad1e41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Wed, 16 Apr 2025 14:46:09 -0700 Subject: [PATCH 2/2] Create tame-dots-own.md --- .changeset/tame-dots-own.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .changeset/tame-dots-own.md diff --git a/.changeset/tame-dots-own.md b/.changeset/tame-dots-own.md new file mode 100644 index 000000000000..abfc516fd45f --- /dev/null +++ b/.changeset/tame-dots-own.md @@ -0,0 +1,7 @@ +--- +swc_ecma_lexer: patch +swc_ecma_parser: patch +swc_core: patch +--- + +refactor(ecma/lexer): split lexer of parser