swc-project · bvanjoi · Jul 14, 2025 · Jul 15, 2025 · Jul 15, 2025 · Jul 15, 2025
@@ -0,0 +1,7 @@
+---
+swc_common: patch
+swc_ecma_lexer: major
+swc_ecma_compat_es2015: patch
+---
+
+refactor(es/parser): rm span swap in parser
@@ -0,0 +1,10 @@
+  x Unexpected escape sequence in reserved word: static
+   ,-[$DIR/tests/fixture/issues-2xxx/2844/input/index.js:1:1]
+ 1 | class X { st\u0061tic y() { } }
+   :           ^^^^^^^^^^^
+   `----
+  x Unexpected token `<lexing error: Error { error: (11..22, EscapeInReservedWord { word: "static" }) }>`. Expected identifier, string literal, numeric literal or [ for the computed key
+   ,-[$DIR/tests/fixture/issues-2xxx/2844/input/index.js:1:1]
+ 1 | class X { st\u0061tic y() { } }
+   :           ^^^^^^^^^^^
+   `----
@@ -1,5 +1,5 @@
 //// [parserStrictMode1.ts]
-//!   x `static` cannot be used as an identifier in strict mode
+//!   x Expression expected
 //!    ,-[4:1]
 //!  1 | foo1();
 //!  2 | foo1();

@@ -1,5 +1,5 @@
 //// [parserStrictMode1.ts]
-//!   x `static` cannot be used as an identifier in strict mode
+//!   x Expression expected
 //!    ,-[4:1]
 //!  1 | foo1();
 //!  2 | foo1();

@@ -406,12 +406,18 @@ impl Span {
         Span { lo, hi }
     }
 
+    #[inline]
+    pub fn new_with_checked(lo: BytePos, hi: BytePos) -> Self {
+        debug_assert!(lo <= hi, "lo: {lo:#?}, hi: {hi:#?}");
+        Span { lo, hi }
+    }
+
     #[inline]
     pub fn with_lo(&self, lo: BytePos) -> Span {
         Span::new(lo, self.hi)
     }
 
-    #[inline]
+    #[inline(always)]
     pub fn hi(self) -> BytePos {
         self.hi
     }

@@ -2642,7 +2642,7 @@ impl Generator {
                         .push(expr);
                 }
                 return Invalid {
-                    span: Span::new(BytePos(label.0 as _), BytePos(label.0 as _)),
+                    span: Span::new_with_checked(BytePos(label.0 as _), BytePos(label.0 as _)),
                 }
                 .into();
             }

@@ -267,7 +267,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
                     let s = unsafe { self.input_slice(slice_start, end) };
                     let cmt = swc_common::comments::Comment {
                         kind: swc_common::comments::CommentKind::Line,
-                        span: Span::new(start, end),
+                        span: Span::new_with_checked(start, end),
                         text: self.atom(s),
                     };
 
@@ -298,7 +298,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
             };
             let cmt = swc_common::comments::Comment {
                 kind: swc_common::comments::CommentKind::Line,
-                span: Span::new(start, end),
+                span: Span::new_with_checked(start, end),
                 text: self.atom(s),
             };
 
@@ -371,7 +371,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
                         self.state_mut().mark_had_line_break();
                     }
                     let end_pos = self.input().end_pos();
-                    let span = Span::new(end_pos, end_pos);
+                    let span = Span::new_with_checked(end_pos, end_pos);
                     self.emit_error_span(span, SyntaxError::UnterminatedBlockComment);
                     return;
                 }
@@ -407,7 +407,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
                             let s = &src[..src.len() - 2];
                             let cmt = Comment {
                                 kind: CommentKind::Block,
-                                span: Span::new(start, end),
+                                span: Span::new_with_checked(start, end),
                                 text: self.atom(s),
                             };
 
@@ -642,20 +642,20 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
     }
 
     /// Reads an integer, octal integer, or floating-point number
-    fn read_number(
+    fn read_number<const START_WITH_DOT: bool, const START_WITH_ZERO: bool>(
         &mut self,
-        starts_with_dot: bool,
     ) -> LexResult<Either<(f64, Atom), (Box<BigIntValue>, Atom)>> {
+        debug_assert!(!(START_WITH_DOT && START_WITH_ZERO));
         debug_assert!(self.cur().is_some());
 
         let start = self.cur_pos();
         let mut has_underscore = false;
 
-        let lazy_integer = if starts_with_dot {
+        let lazy_integer = if START_WITH_DOT {
             // first char is '.'
             debug_assert!(
                 self.cur().is_some_and(|c| c == '.'),
-                "read_number(starts_with_dot = true) expects current char to be '.'"
+                "read_number<START_WITH_DOT = true> expects current char to be '.'"
             );
             LazyInteger {
                 start,
@@ -664,7 +664,8 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
                 has_underscore: false,
             }
         } else {
-            let starts_with_zero = self.cur().unwrap() == '0';
+            debug_assert!(!START_WITH_DOT);
+            debug_assert!(!START_WITH_ZERO || self.cur().unwrap() == '0');
 
             // Use read_number_no_dot to support long numbers.
             let lazy_integer = self.read_number_no_dot_as_str::<10>()?;
@@ -683,7 +684,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
                 return Ok(Either::Right((Box::new(bigint_value), self.atom(raw))));
             }
 
-            if starts_with_zero {
+            if START_WITH_ZERO {
                 // TODO: I guess it would be okay if I don't use -ffast-math
                 // (or something like that), but needs review.
                 if s.as_bytes().iter().all(|&c| c == b'0') {
@@ -736,8 +737,8 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
         if has_dot {
             self.bump();
 
-            // equal: if starts_with_dot { debug_assert!(xxxx) }
-            debug_assert!(!starts_with_dot || self.cur().is_some_and(|cur| cur.is_ascii_digit()));
+            // equal: if START_WITH_DOT { debug_assert!(xxxx) }
+            debug_assert!(!START_WITH_DOT || self.cur().is_some_and(|cur| cur.is_ascii_digit()));
 
             // Read numbers after dot
             self.read_digits::<_, (), 10>(|_, _, _| Ok(((), true)), true, &mut has_underscore)?;
@@ -807,7 +808,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
                     .checked_mul(radix as u32)
                     .and_then(|v| v.checked_add(val))
                     .ok_or_else(|| {
-                        let span = Span::new(start, start);
+                        let span = Span::new_with_checked(start, start);
                         crate::error::Error::new(span, SyntaxError::InvalidUnicodeEscape)
                     })?;
 
@@ -1788,9 +1789,9 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
             }
         };
         if next.is_ascii_digit() {
-            return self.read_number(true).map(|v| match v {
+            return self.read_number::<true, false>().map(|v| match v {
                 Left((value, raw)) => Self::Token::num(value, raw, self),
-                Right((value, raw)) => Self::Token::bigint(value, raw, self),
+                Right(_) => unreachable!("read_number should not return bigint for leading dot"),
             });
         }
 
@@ -1847,7 +1848,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
             Some('o') | Some('O') => self.read_radix_number::<8>(),
             Some('b') | Some('B') => self.read_radix_number::<2>(),
             _ => {
-                return self.read_number(false).map(|v| match v {
+                return self.read_number::<false, true>().map(|v| match v {
                     Left((value, raw)) => Self::Token::num(value, raw, self),
                     Right((value, raw)) => Self::Token::bigint(value, raw, self),
                 });
@@ -2110,20 +2111,16 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
         }
     }
 
-    /// This can be used if there's no keyword starting with the first
-    /// character.
-    fn read_word_with(
+    fn read_keyword_with(
         &mut self,
         convert: &dyn Fn(&str) -> Option<Self::Token>,
     ) -> LexResult<Option<Self::Token>> {
         debug_assert!(self.cur().is_some());
 
         let start = self.cur_pos();
-        let (word, has_escape) = self.read_word_as_str_with(|l, s, _, can_be_known| {
-            if can_be_known {
-                if let Some(word) = convert(s) {
-                    return word;
-                }
+        let (word, has_escape) = self.read_keyword_as_str_with(|l, s, _, _| {
+            if let Some(word) = convert(s) {
+                return word;
             }
             let atom = l.atom(s);
             Self::Token::unknown_ident(atom, l)
@@ -2133,20 +2130,71 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
         // 'await' and 'yield' may have semantic of reserved word, which means lexer
         // should know context or parser should handle this error. Our approach to this
         // problem is former one.
-
         if has_escape && word.is_reserved(self.ctx()) {
             let word = word.into_atom(self).unwrap();
             self.error(start, SyntaxError::EscapeInReservedWord { word })?
         } else {
             Ok(Some(word))
         }
     }
+
+    /// This is a performant version of [Lexer::read_word_as_str_with] for
+    /// reading keywords. We should make sure the first byte is a valid
+    /// ASCII.
+    fn read_keyword_as_str_with<F, Ret>(&mut self, convert: F) -> LexResult<(Ret, bool)>
+    where
+        F: FnOnce(&mut Self, &str, bool, bool) -> Ret,
+    {
+        let slice_start = self.cur_pos();
+        let has_escape = false;
+
+        // Fast path: try to scan ASCII identifier using byte_search
+        // Performance optimization: check if first char disqualifies as keyword
+        // Advance past first byte
+        self.bump();
+
+        // Use byte_search to quickly scan to end of ASCII identifier
+        let next_byte = byte_search! {
+            lexer: self,
+            table: NOT_ASCII_ID_CONTINUE_TABLE,
+            handle_eof: {
+                // Reached EOF, entire remainder is identifier
+                let end = self.cur_pos();
+                let s = unsafe {
+                    // Safety: slice_start and end are valid position because we got them from
+                    // `self.input`
+                    self.input_slice(slice_start, end)
+                };
+
+                return Ok((convert(self, s, false, true), false));
+            },
+        };
+
+        // Check if we hit end of identifier or need to fall back to slow path
+        if !next_byte.is_ascii() {
+            // Hit Unicode character, fall back to slow path from current position
+            self.read_word_as_str_with_slow_path(convert, slice_start, has_escape, true)
+        } else if next_byte == b'\\' {
+            // Hit escape sequence, fall back to slow path from current position
+            self.read_word_as_str_with_slow_path(convert, slice_start, has_escape, true)
+        } else {
+            // Hit end of identifier (non-continue ASCII char)
+            let end = self.cur_pos();
+            let s = unsafe {
+                // Safety: slice_start and end are valid position because we got them from
+                // `self.input`
+                self.input_slice(slice_start, end)
+            };
+
+            return Ok((convert(self, s, has_escape, true), has_escape));
+        }
+    }
 }
 
 pub fn pos_span(p: BytePos) -> Span {
-    Span::new(p, p)
+    Span::new_with_checked(p, p)
 }
 
 pub fn fixed_len_span(p: BytePos, len: u32) -> Span {
-    Span::new(p, p + BytePos(len))
+    Span::new_with_checked(p, p + BytePos(len))
 }
@@ -185,7 +185,7 @@ pub trait TokenFactory<'a, TokenAndSpan, I: Tokens<TokenAndSpan>>: Sized + Parti
     fn unknown_ident(value: Atom, lexer: &mut Self::Lexer) -> Self;
     fn is_unknown_ident(&self) -> bool;
     fn take_unknown_ident(self, buffer: &mut Self::Buffer) -> Atom;
-    fn take_unknown_ident_ref<'b>(&'b self, buffer: &'b mut Self::Buffer) -> &'b Atom;
+    fn take_unknown_ident_ref<'b>(&'b self, buffer: &'b Self::Buffer) -> &'b Atom;
 
     fn is_known_ident(&self) -> bool;
     fn take_known_ident(&self) -> Atom;

@@ -201,7 +201,7 @@ pub trait Buffer<'a> {
             .get_cur()
             .map(|item| item.span())
             .unwrap_or(self.prev_span());
-        Span::new(data.lo, data.hi)
+        Span::new_with_checked(data.lo, data.hi)
     }
 
     /// Returns last byte position of previous token.

@@ -1508,9 +1508,8 @@ fn parse_class_body<'a, P: Parser<'a>>(p: &mut P) -> PResult<Vec<ClassMember>> {
     while !eof!(p) && !p.input_mut().is(&P::Token::RBRACE) {
         if p.input_mut().eat(&P::Token::SEMI) {
             let span = p.input().prev_span();
-            elems.push(ClassMember::Empty(EmptyStmt {
-                span: Span::new(span.lo, span.hi),
-            }));
+            debug_assert!(span.lo <= span.hi);
+            elems.push(ClassMember::Empty(EmptyStmt { span }));
             continue;
         }
         let elem = p.do_inside_of_context(Context::AllowDirectSuper, parse_class_member)?;
@@ -1667,12 +1666,12 @@ fn parse_class_inner<'a, P: Parser<'a>>(
         } else {
             expect!(p, &P::Token::RBRACE);
         }
-        let end = p.last_pos();
 
+        let span = p.span(class_start);
         Ok((
             ident,
             Box::new(Class {
-                span: Span::new(class_start, end),
+                span,
                 decorators,
                 is_abstract: false,
                 type_params,