1
1
use std:: borrow:: Cow ;
2
2
3
3
use char:: { Char , CharExt } ;
4
+ use comments_buffer:: { BufferedComment , BufferedCommentKind } ;
4
5
use either:: Either :: { self , Left , Right } ;
5
6
use num_bigint:: BigInt as BigIntValue ;
6
7
use num_traits:: { Num as NumTrait , ToPrimitive } ;
@@ -9,6 +10,7 @@ use smartstring::{LazyCompact, SmartString};
9
10
use state:: State ;
10
11
use swc_atoms:: Atom ;
11
12
use swc_common:: {
13
+ comments:: { Comment , CommentKind } ,
12
14
input:: { Input , StringInput } ,
13
15
BytePos , Span ,
14
16
} ;
@@ -49,8 +51,6 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
49
51
fn atom < ' b > ( & self , s : impl Into < Cow < ' b , str > > ) -> swc_atoms:: Atom ;
50
52
fn push_error ( & self , error : crate :: error:: Error ) ;
51
53
fn buf ( & self ) -> std:: rc:: Rc < std:: cell:: RefCell < String > > ;
52
- // TODO: invest why there has regression if implement this by trait
53
- fn skip_block_comment ( & mut self ) ;
54
54
55
55
#[ inline( always) ]
56
56
#[ allow( clippy:: misnamed_getters) ]
@@ -198,14 +198,14 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
198
198
while idx < len {
199
199
let b = * unsafe { bytes. get_unchecked ( idx) } ;
200
200
if b == b'\r' || b == b'\n' {
201
- self . state_mut ( ) . set_had_line_break ( true ) ;
201
+ self . state_mut ( ) . mark_had_line_break ( ) ;
202
202
break ;
203
203
} else if b > 127 {
204
204
// non-ASCII case: Check for Unicode line termination characters
205
205
let s = unsafe { input_str. get_unchecked ( idx..) } ;
206
206
if let Some ( first_char) = s. chars ( ) . next ( ) {
207
207
if first_char == '\u{2028}' || first_char == '\u{2029}' {
208
- self . state_mut ( ) . set_had_line_break ( true ) ;
208
+ self . state_mut ( ) . mark_had_line_break ( ) ;
209
209
break ;
210
210
}
211
211
idx += first_char. len_utf8 ( ) - 1 ; // `-1` will incrumented
@@ -250,6 +250,117 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
250
250
}
251
251
}
252
252
253
+ /// Expects current char to be '/' and next char to be '*'.
254
+ fn skip_block_comment ( & mut self ) {
255
+ let start = self . cur_pos ( ) ;
256
+
257
+ debug_assert_eq ! ( self . cur( ) , Some ( '/' ) ) ;
258
+ debug_assert_eq ! ( self . peek( ) , Some ( '*' ) ) ;
259
+
260
+ self . input_mut ( ) . bump_bytes ( 2 ) ;
261
+
262
+ // jsdoc
263
+ let slice_start = self . cur_pos ( ) ;
264
+
265
+ // Check if there's an asterisk at the beginning (JSDoc style)
266
+ let mut was_star = if self . input ( ) . is_byte ( b'*' ) {
267
+ self . bump ( ) ;
268
+ true
269
+ } else {
270
+ false
271
+ } ;
272
+
273
+ let mut is_for_next =
274
+ self . state ( ) . had_line_break ( ) || !self . state ( ) . can_have_trailing_comment ( ) ;
275
+
276
+ // Optimization for finding block comment end position
277
+ let input_str = self . input ( ) . as_str ( ) ;
278
+ let bytes = input_str. as_bytes ( ) ;
279
+ let mut pos = 0 ;
280
+ let len = bytes. len ( ) ;
281
+ let mut should_mark_had_line_break = false ;
282
+
283
+ // Byte-based scanning for faster search
284
+ while pos < len {
285
+ let b = * unsafe { bytes. get_unchecked ( pos) } ;
286
+
287
+ if was_star && b == b'/' {
288
+ if should_mark_had_line_break {
289
+ self . state_mut ( ) . mark_had_line_break ( ) ;
290
+ }
291
+ // Found comment end: "*/"
292
+ self . input_mut ( ) . bump_bytes ( pos + 1 ) ;
293
+
294
+ let end = self . cur_pos ( ) ;
295
+
296
+ self . skip_space :: < false > ( ) ;
297
+
298
+ // Check if this is a comment before semicolon
299
+ if !self . state ( ) . had_line_break ( ) && self . input ( ) . is_byte ( b';' ) {
300
+ is_for_next = false ;
301
+ }
302
+
303
+ if self . comments_buffer ( ) . is_some ( ) {
304
+ let src = unsafe {
305
+ // Safety: We got slice_start and end from self.input so those are valid.
306
+ self . input_mut ( ) . slice ( slice_start, end)
307
+ } ;
308
+ let s = & src[ ..src. len ( ) - 2 ] ;
309
+ let cmt = Comment {
310
+ kind : CommentKind :: Block ,
311
+ span : Span :: new ( start, end) ,
312
+ text : self . atom ( s) ,
313
+ } ;
314
+
315
+ let _ = self . input ( ) . peek ( ) ;
316
+ if is_for_next {
317
+ self . comments_buffer_mut ( )
318
+ . unwrap ( )
319
+ . push_pending_leading ( cmt) ;
320
+ } else {
321
+ let pos = self . state ( ) . prev_hi ( ) ;
322
+ self . comments_buffer_mut ( ) . unwrap ( ) . push ( BufferedComment {
323
+ kind : BufferedCommentKind :: Trailing ,
324
+ pos,
325
+ comment : cmt,
326
+ } ) ;
327
+ }
328
+ }
329
+
330
+ return ;
331
+ }
332
+
333
+ // Check for line break characters - ASCII case
334
+ if b == b'\r' || b == b'\n' {
335
+ should_mark_had_line_break = true ;
336
+ }
337
+ // Check for Unicode line breaks (rare case)
338
+ else if b > 127 {
339
+ let remaining = & input_str[ pos..] ;
340
+ if let Some ( c) = remaining. chars ( ) . next ( ) {
341
+ if c == '\u{2028}' || c == '\u{2029}' {
342
+ should_mark_had_line_break = true ;
343
+ }
344
+ // Skip multibyte characters
345
+ pos += c. len_utf8 ( ) - 1 ; // `-1` will incrumented below
346
+ }
347
+ }
348
+
349
+ was_star = b == b'*' ;
350
+ pos += 1 ;
351
+ }
352
+
353
+ if should_mark_had_line_break {
354
+ self . state_mut ( ) . mark_had_line_break ( ) ;
355
+ }
356
+
357
+ // If we reached here, it's an unterminated block comment
358
+ self . input_mut ( ) . bump_bytes ( len) ; // skip remaining
359
+ let end = self . input ( ) . end_pos ( ) ;
360
+ let span = Span :: new ( end, end) ;
361
+ self . emit_error_span ( span, SyntaxError :: UnterminatedBlockComment )
362
+ }
363
+
253
364
/// Skip comments or whitespaces.
254
365
///
255
366
/// See https://tc39.github.io/ecma262/#sec-white-space
@@ -270,7 +381,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
270
381
271
382
self . input_mut ( ) . bump_bytes ( offset as usize ) ;
272
383
if newline {
273
- self . state_mut ( ) . set_had_line_break ( true ) ;
384
+ self . state_mut ( ) . mark_had_line_break ( ) ;
274
385
}
275
386
276
387
if LEX_COMMENTS && self . input ( ) . is_byte ( b'/' ) {
@@ -1213,7 +1324,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
1213
1324
1214
1325
cooked_slice_start = self . cur_pos ( ) ;
1215
1326
} else if c. is_line_terminator ( ) {
1216
- self . state_mut ( ) . set_had_line_break ( true ) ;
1327
+ self . state_mut ( ) . mark_had_line_break ( ) ;
1217
1328
1218
1329
consume_cooked ! ( ) ;
1219
1330
0 commit comments