Skip to content
Draft
7 changes: 7 additions & 0 deletions .changeset/eighty-pigs-drum.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
swc_common: patch
swc_ecma_lexer: major
swc_ecma_compat_es2015: patch
---

refactor(es/parser): rm span swap in parser
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
x Unexpected escape sequence in reserved word: static
,-[$DIR/tests/fixture/issues-2xxx/2844/input/index.js:1:1]
1 | class X { st\u0061tic y() { } }
: ^^^^^^^^^^^
`----
x Unexpected token `<lexing error: Error { error: (11..22, EscapeInReservedWord { word: "static" }) }>`. Expected identifier, string literal, numeric literal or [ for the computed key
,-[$DIR/tests/fixture/issues-2xxx/2844/input/index.js:1:1]
1 | class X { st\u0061tic y() { } }
: ^^^^^^^^^^^
`----
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//// [parserStrictMode1.ts]
//! x `static` cannot be used as an identifier in strict mode
//! x Expression expected
//! ,-[4:1]
//! 1 | foo1();
//! 2 | foo1();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//// [parserStrictMode1.ts]
//! x `static` cannot be used as an identifier in strict mode
//! x Expression expected
//! ,-[4:1]
//! 1 | foo1();
//! 2 | foo1();
Expand Down
8 changes: 7 additions & 1 deletion crates/swc_common/src/syntax_pos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -406,12 +406,18 @@ impl Span {
Span { lo, hi }
}

#[inline]
pub fn new_with_checked(lo: BytePos, hi: BytePos) -> Self {
debug_assert!(lo <= hi, "lo: {lo:#?}, hi: {hi:#?}");
Span { lo, hi }
}

#[inline]
pub fn with_lo(&self, lo: BytePos) -> Span {
Span::new(lo, self.hi)
}

#[inline]
#[inline(always)]
pub fn hi(self) -> BytePos {
self.hi
}
Expand Down
2 changes: 1 addition & 1 deletion crates/swc_ecma_compat_es2015/src/generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2642,7 +2642,7 @@ impl Generator {
.push(expr);
}
return Invalid {
span: Span::new(BytePos(label.0 as _), BytePos(label.0 as _)),
span: Span::new_with_checked(BytePos(label.0 as _), BytePos(label.0 as _)),
}
.into();
}
Expand Down
102 changes: 75 additions & 27 deletions crates/swc_ecma_lexer/src/common/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
let s = unsafe { self.input_slice(slice_start, end) };
let cmt = swc_common::comments::Comment {
kind: swc_common::comments::CommentKind::Line,
span: Span::new(start, end),
span: Span::new_with_checked(start, end),
text: self.atom(s),
};

Expand Down Expand Up @@ -298,7 +298,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
};
let cmt = swc_common::comments::Comment {
kind: swc_common::comments::CommentKind::Line,
span: Span::new(start, end),
span: Span::new_with_checked(start, end),
text: self.atom(s),
};

Expand Down Expand Up @@ -371,7 +371,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
self.state_mut().mark_had_line_break();
}
let end_pos = self.input().end_pos();
let span = Span::new(end_pos, end_pos);
let span = Span::new_with_checked(end_pos, end_pos);
self.emit_error_span(span, SyntaxError::UnterminatedBlockComment);
return;
}
Expand Down Expand Up @@ -407,7 +407,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
let s = &src[..src.len() - 2];
let cmt = Comment {
kind: CommentKind::Block,
span: Span::new(start, end),
span: Span::new_with_checked(start, end),
text: self.atom(s),
};

Expand Down Expand Up @@ -642,20 +642,20 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
}

/// Reads an integer, octal integer, or floating-point number
fn read_number(
fn read_number<const START_WITH_DOT: bool, const START_WITH_ZERO: bool>(
&mut self,
starts_with_dot: bool,
) -> LexResult<Either<(f64, Atom), (Box<BigIntValue>, Atom)>> {
debug_assert!(!(START_WITH_DOT && START_WITH_ZERO));
debug_assert!(self.cur().is_some());

let start = self.cur_pos();
let mut has_underscore = false;

let lazy_integer = if starts_with_dot {
let lazy_integer = if START_WITH_DOT {
// first char is '.'
debug_assert!(
self.cur().is_some_and(|c| c == '.'),
"read_number(starts_with_dot = true) expects current char to be '.'"
"read_number<START_WITH_DOT = true> expects current char to be '.'"
);
LazyInteger {
start,
Expand All @@ -664,7 +664,8 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
has_underscore: false,
}
} else {
let starts_with_zero = self.cur().unwrap() == '0';
debug_assert!(!START_WITH_DOT);
debug_assert!(!START_WITH_ZERO || self.cur().unwrap() == '0');

// Use read_number_no_dot to support long numbers.
let lazy_integer = self.read_number_no_dot_as_str::<10>()?;
Expand All @@ -683,7 +684,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
return Ok(Either::Right((Box::new(bigint_value), self.atom(raw))));
}

if starts_with_zero {
if START_WITH_ZERO {
// TODO: I guess it would be okay if I don't use -ffast-math
// (or something like that), but needs review.
if s.as_bytes().iter().all(|&c| c == b'0') {
Expand Down Expand Up @@ -736,8 +737,8 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
if has_dot {
self.bump();

// equal: if starts_with_dot { debug_assert!(xxxx) }
debug_assert!(!starts_with_dot || self.cur().is_some_and(|cur| cur.is_ascii_digit()));
// equal: if START_WITH_DOT { debug_assert!(xxxx) }
debug_assert!(!START_WITH_DOT || self.cur().is_some_and(|cur| cur.is_ascii_digit()));

// Read numbers after dot
self.read_digits::<_, (), 10>(|_, _, _| Ok(((), true)), true, &mut has_underscore)?;
Expand Down Expand Up @@ -807,7 +808,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
.checked_mul(radix as u32)
.and_then(|v| v.checked_add(val))
.ok_or_else(|| {
let span = Span::new(start, start);
let span = Span::new_with_checked(start, start);
crate::error::Error::new(span, SyntaxError::InvalidUnicodeEscape)
})?;

Expand Down Expand Up @@ -1788,9 +1789,9 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
}
};
if next.is_ascii_digit() {
return self.read_number(true).map(|v| match v {
return self.read_number::<true, false>().map(|v| match v {
Left((value, raw)) => Self::Token::num(value, raw, self),
Right((value, raw)) => Self::Token::bigint(value, raw, self),
Right(_) => unreachable!("read_number should not return bigint for leading dot"),
});
}

Expand Down Expand Up @@ -1847,7 +1848,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
Some('o') | Some('O') => self.read_radix_number::<8>(),
Some('b') | Some('B') => self.read_radix_number::<2>(),
_ => {
return self.read_number(false).map(|v| match v {
return self.read_number::<false, true>().map(|v| match v {
Left((value, raw)) => Self::Token::num(value, raw, self),
Right((value, raw)) => Self::Token::bigint(value, raw, self),
});
Expand Down Expand Up @@ -2110,20 +2111,16 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
}
}

/// This can be used if there's no keyword starting with the first
/// character.
fn read_word_with(
fn read_keyword_with(
&mut self,
convert: &dyn Fn(&str) -> Option<Self::Token>,
) -> LexResult<Option<Self::Token>> {
debug_assert!(self.cur().is_some());

let start = self.cur_pos();
let (word, has_escape) = self.read_word_as_str_with(|l, s, _, can_be_known| {
if can_be_known {
if let Some(word) = convert(s) {
return word;
}
let (word, has_escape) = self.read_keyword_as_str_with(|l, s, _, _| {
if let Some(word) = convert(s) {
return word;
}
let atom = l.atom(s);
Self::Token::unknown_ident(atom, l)
Expand All @@ -2133,20 +2130,71 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
// 'await' and 'yield' may have semantic of reserved word, which means lexer
// should know context or parser should handle this error. Our approach to this
// problem is former one.

if has_escape && word.is_reserved(self.ctx()) {
let word = word.into_atom(self).unwrap();
self.error(start, SyntaxError::EscapeInReservedWord { word })?
} else {
Ok(Some(word))
}
}

/// This is a performant version of [Lexer::read_word_as_str_with] for
/// reading keywords. We should make sure the first byte is a valid
/// ASCII.
fn read_keyword_as_str_with<F, Ret>(&mut self, convert: F) -> LexResult<(Ret, bool)>
where
F: FnOnce(&mut Self, &str, bool, bool) -> Ret,
{
let slice_start = self.cur_pos();
let has_escape = false;

// Fast path: try to scan ASCII identifier using byte_search
// Performance optimization: check if first char disqualifies as keyword
// Advance past first byte
self.bump();

// Use byte_search to quickly scan to end of ASCII identifier
let next_byte = byte_search! {
lexer: self,
table: NOT_ASCII_ID_CONTINUE_TABLE,
handle_eof: {
// Reached EOF, entire remainder is identifier
let end = self.cur_pos();
let s = unsafe {
// Safety: slice_start and end are valid position because we got them from
// `self.input`
self.input_slice(slice_start, end)
};

return Ok((convert(self, s, false, true), false));
},
};

// Check if we hit end of identifier or need to fall back to slow path
if !next_byte.is_ascii() {
// Hit Unicode character, fall back to slow path from current position
self.read_word_as_str_with_slow_path(convert, slice_start, has_escape, true)
} else if next_byte == b'\\' {
// Hit escape sequence, fall back to slow path from current position
self.read_word_as_str_with_slow_path(convert, slice_start, has_escape, true)
} else {
// Hit end of identifier (non-continue ASCII char)
let end = self.cur_pos();
let s = unsafe {
// Safety: slice_start and end are valid position because we got them from
// `self.input`
self.input_slice(slice_start, end)
};

return Ok((convert(self, s, has_escape, true), has_escape));
}
}
}

pub fn pos_span(p: BytePos) -> Span {
Span::new(p, p)
Span::new_with_checked(p, p)
}

pub fn fixed_len_span(p: BytePos, len: u32) -> Span {
Span::new(p, p + BytePos(len))
Span::new_with_checked(p, p + BytePos(len))
}
2 changes: 1 addition & 1 deletion crates/swc_ecma_lexer/src/common/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ pub trait TokenFactory<'a, TokenAndSpan, I: Tokens<TokenAndSpan>>: Sized + Parti
fn unknown_ident(value: Atom, lexer: &mut Self::Lexer) -> Self;
fn is_unknown_ident(&self) -> bool;
fn take_unknown_ident(self, buffer: &mut Self::Buffer) -> Atom;
fn take_unknown_ident_ref<'b>(&'b self, buffer: &'b mut Self::Buffer) -> &'b Atom;
fn take_unknown_ident_ref<'b>(&'b self, buffer: &'b Self::Buffer) -> &'b Atom;

fn is_known_ident(&self) -> bool;
fn take_known_ident(&self) -> Atom;
Expand Down
2 changes: 1 addition & 1 deletion crates/swc_ecma_lexer/src/common/parser/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ pub trait Buffer<'a> {
.get_cur()
.map(|item| item.span())
.unwrap_or(self.prev_span());
Span::new(data.lo, data.hi)
Span::new_with_checked(data.lo, data.hi)
}

/// Returns last byte position of previous token.
Expand Down
9 changes: 4 additions & 5 deletions crates/swc_ecma_lexer/src/common/parser/class_and_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1508,9 +1508,8 @@ fn parse_class_body<'a, P: Parser<'a>>(p: &mut P) -> PResult<Vec<ClassMember>> {
while !eof!(p) && !p.input_mut().is(&P::Token::RBRACE) {
if p.input_mut().eat(&P::Token::SEMI) {
let span = p.input().prev_span();
elems.push(ClassMember::Empty(EmptyStmt {
span: Span::new(span.lo, span.hi),
}));
debug_assert!(span.lo <= span.hi);
elems.push(ClassMember::Empty(EmptyStmt { span }));
continue;
}
let elem = p.do_inside_of_context(Context::AllowDirectSuper, parse_class_member)?;
Expand Down Expand Up @@ -1667,12 +1666,12 @@ fn parse_class_inner<'a, P: Parser<'a>>(
} else {
expect!(p, &P::Token::RBRACE);
}
let end = p.last_pos();

let span = p.span(class_start);
Ok((
ident,
Box::new(Class {
span: Span::new(class_start, end),
span,
decorators,
is_abstract: false,
type_params,
Expand Down
Loading
Loading