Skip to content

Commit 6468f42

Browse files
committed
Support multiple non-overlapping recursions
1 parent cbdebe5 commit 6468f42

File tree

3 files changed

+65
-33
lines changed

3 files changed

+65
-33
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@
33
[![npm version][npm-version-src]][npm-version-href]
44
[![bundle][bundle-src]][bundle-href]
55

6-
This is an official plugin for [Regex+](https://github.com/slevithan/regex) that adds support for recursive matching up to a specified max depth *N*, where *N* can be between 2 and 100. Generated regexes are native `RegExp` instances, and support all JavaScript regular expression features except numbered backreferences (support could be added in future versions).
6+
This is an official plugin for [Regex+](https://github.com/slevithan/regex) that adds support for recursive matching up to a specified max depth *N*, where *N* can be between 2 and 100. Generated regexes are native JavaScript `RegExp` instances, and support all regular expression features except numbered backreferences (support could be added in future versions).
77

88
Recursive matching is added to a regex via one of the following (the recursion depth limit is provided in place of *`N`*):
99

1010
- `(?R=N)` — Recursively match the entire regex at this position.
1111
- `\g<name&R=N>` or `\g<number&R=N>` — Recursively match the contents of the group referenced by name or number at this position.
1212
- The `\g` subroutine must be *within* the referenced group.
1313

14-
Named captures and backreferences are supported within recursion, and are independent per depth level. So e.g. `groups.name` on a match object is the value captured by group `name` at the top level of the recursion stack.
14+
Multiple uses of recursion within the same pattern are allowed if they are non-overlapping. Named captures and backreferences are supported within recursion, and are independent per depth level. So e.g. `groups.name` on a match object is the value captured by group `name` at the top level of the recursion stack.
1515

1616
## Install and use
1717

spec/recursion-spec.js

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,6 @@ import {regex} from 'regex';
22
import {recursion} from '../src/index.js';
33

44
describe('recursion', () => {
5-
it('should throw for invalid and unsupported recursion depths', () => {
6-
const values = ['-2', '0', '1', '02', '+2', '2.5', '101', 'a', null];
7-
for (const value of values) {
8-
expect(() => regex({plugins: [recursion]})({raw: [`a(?R=${value})?b`]})).toThrow();
9-
expect(() => regex({plugins: [recursion]})({raw: [`(?<r>a\\g<r&R=${value}>?b)`]})).toThrow();
10-
}
11-
});
12-
135
it('should allow recursion depths 2-100', () => {
146
const values = ['2', '100'];
157
for (const value of values) {
@@ -18,7 +10,15 @@ describe('recursion', () => {
1810
}
1911
});
2012

21-
// Just documenting current behavior; this could be supported in the future
13+
it('should throw for invalid and unsupported recursion depths', () => {
14+
const values = ['-2', '0', '1', '02', '+2', '2.5', '101', 'a', 'null'];
15+
for (const value of values) {
16+
expect(() => regex({plugins: [recursion]})({raw: [`a(?R=${value})?b`]})).toThrow();
17+
expect(() => regex({plugins: [recursion]})({raw: [`(?<r>a\\g<r&R=${value}>?b)`]})).toThrow();
18+
}
19+
});
20+
21+
// Documenting current behavior; this could be supported in the future
2222
it('should throw for numbered backrefs in interpolated regexes when using recursion', () => {
2323
expect(() => regex({plugins: [recursion]})`a(?R=2)?b${/()\1/}`).toThrow();
2424
expect(() => regex({plugins: [recursion]})`(?<n>a|\g<n&R=2>${/()\1/})`).toThrow();
@@ -34,15 +34,26 @@ describe('recursion', () => {
3434
});
3535

3636
it('should not modify escaped recursion operators', () => {
37+
expect(() => regex({plugins: [recursion]})`a\(?R=2)?b`).toThrow();
3738
expect('a\\g<r&R=2>b').toMatch(regex({plugins: [recursion]})`^(?<r>a\\g<r&R=2>?b)$`);
3839
expect('a\\a\\bb').toMatch(regex({plugins: [recursion]})`^(?<r>a\\\g<r&R=2>?b)$`);
3940
});
4041

42+
it('should not modify recursion-like syntax in character classes', () => {
43+
expect(() => regex({plugins: [recursion]})`a[(?R=2)]b`).toThrow();
44+
expect(() => regex({plugins: [recursion]})`(?<r>a[\g<r&R=2>]b)`).toThrow();
45+
});
46+
4147
describe('global', () => {
4248
it('should match global recursion', () => {
4349
expect(regex({plugins: [recursion]})`a(?R=2)?b`.exec('aabb')?.[0]).toBe('aabb');
4450
});
4551

52+
it('should throw for overlapping global recursions', () => {
53+
expect(() => regex({plugins: [recursion]})`a(?R=2)?b(?R=2)?`).toThrow();
54+
expect(() => regex({plugins: [recursion]})`(a(?R=2)?)(b(?R=2)?)`).toThrow();
55+
});
56+
4657
it('should have backrefs refer to their own recursion depth', () => {
4758
expect(regex({plugins: [recursion]})`(?<w>\w)0(?R=2)?1\k<w>`.exec('a0b01b1a')?.[0]).toBe('a0b01b1a');
4859
expect(regex({plugins: [recursion]})`(?<w>\w)0(?R=2)?1\k<w>`.test('a0b01a1b')).toBeFalse();
@@ -55,20 +66,24 @@ describe('recursion', () => {
5566
expect('aab').not.toMatch(regex({plugins: [recursion]})`^(?<r>a\g<r&R=2>?b)$`);
5667
});
5768

69+
it('should match multiple direct, nonoverlapping recursions', () => {
70+
expect('aabbcddee').toMatch(regex({plugins: [recursion]})`^(?<a>a\g<a&R=2>?b)c(?<b>d\g<b&R=2>?e)$`);
71+
expect('aabbcddee').toMatch(regex({plugins: [recursion]})`^(?<r>(?<a>a\g<a&R=2>?b)c(?<b>d\g<b&R=2>?e))$`);
72+
expect('aabbcddee').toMatch(regex({plugins: [recursion]})`^(?<r>(?<a>a\g<r&R=2>?b))c(?<b>d\g<b&R=2>?e)$`);
73+
});
74+
5875
it('should throw for multiple direct, overlapping recursions', () => {
5976
expect(() => regex({plugins: [recursion]})`a(?R=2)?(?<r>a\g<r&R=2>?)`).toThrow();
6077
expect(() => regex({plugins: [recursion]})`(?<r>a\g<r&R=2>?\g<r&R=2>?)`).toThrow();
61-
});
62-
63-
it('should throw for multiple direct, nonoverlapping recursions', () => {
64-
expect(() => regex({plugins: [recursion]})`(?<r1>a\g<r1&R=2>?)(?<r2>a\g<r2&R=2>?)`).toThrow();
78+
expect(() => regex({plugins: [recursion]})`(?<a>(?<b>a\g<b&R=2>?)\g<a&R=2>)`).toThrow();
6579
});
6680

6781
it('should throw for indirect recursion', () => {
6882
expect(() => regex({plugins: [recursion]})`(?<a>\g<b&R=2>)(?<b>a\g<a&R=2>?)`).toThrow();
6983
expect(() => regex({plugins: [recursion]})`\g<a&R=2>(?<a>\g<b&R=2>)(?<b>a\g<a&R=2>?)`).toThrow();
7084
expect(() => regex({plugins: [recursion]})`(?<a>\g<b&R=2>)(?<b>\g<c&R=2>)(?<c>a\g<a&R=2>?)`).toThrow();
7185
expect(() => regex({plugins: [recursion]})`(?<a>(?<b>a\g<a&R=2>?)\g<b&R=2>)`).toThrow();
86+
expect(() => regex({plugins: [recursion]})`(?<a>(?<b>a\g<b&R=2>?)\g<b&R=2>)`).toThrow();
7287
expect(() => regex({plugins: [recursion]})`(?<a>\g<b&R=2>(?<b>a\g<a&R=2>?))`).toThrow();
7388
});
7489

src/index.js

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ const gRToken = String.raw`\\g<(?<gRNameOrNum>[^>&]+)&R=(?<gRDepth>[^>]+)>`;
44
const recursiveToken = String.raw`\(\?R=(?<rDepth>[^\)]+)\)|${gRToken}`;
55
const namedCapturingDelim = String.raw`\(\?<(?![=!])(?<captureName>[^>]+)>`;
66
const token = new RegExp(String.raw`${namedCapturingDelim}|${recursiveToken}|\\?.`, 'gsu');
7+
const overlappingRecursionMsg = 'Cannot use multiple overlapping recursions';
78

89
/**
910
@param {string} expression
@@ -28,13 +29,14 @@ export function recursion(expression) {
2829
// duplicated by recursion and refer to a group inside the expression being recursed.
2930
// Additionally, numbered backrefs inside and outside of the recursed expression would need to
3031
// be adjusted based on any capturing groups added by recursion.
31-
throw new Error(`Numbered backrefs cannot be used with recursion; use named backref`);
32+
throw new Error(`Numbered backrefs cannot be used with recursion`);
3233
}
3334
if (hasUnescaped(expression, String.raw`\(\?\(DEFINE\)`, Context.DEFAULT)) {
3435
throw new Error(`DEFINE groups cannot be used with recursion`);
3536
}
3637
const groupContentsStartPos = new Map();
3738
const openGroups = [];
39+
let hasRecursed = false;
3840
let numCharClassesOpen = 0;
3941
let numCaptures = 0;
4042
let match;
@@ -48,24 +50,45 @@ export function recursion(expression) {
4850
// `(?R=N)`
4951
if (rDepth) {
5052
assertMaxInBounds(rDepth);
53+
if (hasRecursed) {
54+
throw new Error(overlappingRecursionMsg);
55+
}
5156
const pre = expression.slice(0, match.index);
5257
const post = expression.slice(token.lastIndex);
53-
assertNoFollowingRecursion(post);
58+
if (hasUnescaped(post, recursiveToken, Context.DEFAULT)) {
59+
throw new Error(overlappingRecursionMsg);
60+
}
61+
// No need to parse further
5462
return makeRecursive(pre, post, +rDepth, false);
55-
// `\g<name&R=N>`, `\g<N&R=N>`
63+
// `\g<name&R=N>`, `\g<number&R=N>`
5664
} else if (gRNameOrNum) {
5765
assertMaxInBounds(gRDepth);
58-
assertNoFollowingRecursion(expression.slice(token.lastIndex));
59-
if (!openGroups.some(g => g.name === gRNameOrNum || g.num === +gRNameOrNum)) {
60-
throw new Error(`Recursion via \\g<${gRNameOrNum}&R=${gRDepth}> must be used within the referenced group`);
66+
let isWithinReffedGroup = false;
67+
for (const g of openGroups) {
68+
if (g.name === gRNameOrNum || g.num === +gRNameOrNum) {
69+
isWithinReffedGroup = true;
70+
if (g.hasRecursedWithin) {
71+
throw new Error(overlappingRecursionMsg);
72+
}
73+
break;
74+
}
75+
}
76+
if (!isWithinReffedGroup) {
77+
throw new Error(`Recursive \\g cannot be used outside the referenced group "\\g<${gRNameOrNum}&R=${gRDepth}>"`);
6178
}
6279
const startPos = groupContentsStartPos.get(gRNameOrNum);
63-
const recursiveGroupContents = getGroupContents(expression, startPos);
64-
const pre = expression.slice(startPos, match.index);
65-
const post = recursiveGroupContents.slice(pre.length + m.length);
66-
return expression.slice(0, startPos) +
67-
makeRecursive(pre, post, +gRDepth, true) +
68-
expression.slice(startPos + recursiveGroupContents.length);
80+
const groupContents = getGroupContents(expression, startPos);
81+
const groupContentsPre = expression.slice(startPos, match.index);
82+
const groupContentsPost = groupContents.slice(groupContentsPre.length + m.length);
83+
const expansion = makeRecursive(groupContentsPre, groupContentsPost, +gRDepth, true);
84+
const pre = expression.slice(0, startPos);
85+
const post = expression.slice(startPos + groupContents.length);
86+
// Modify the string we're looping over
87+
expression = `${pre}${expansion}${post}`;
88+
// Step forward for the next loop iteration
89+
token.lastIndex += expansion.length - m.length - groupContentsPre.length - groupContentsPost.length;
90+
openGroups.forEach(g => g.hasRecursedWithin = true);
91+
hasRecursed = true;
6992
} else if (captureName) {
7093
numCaptures++;
7194
groupContentsStartPos.set(String(numCaptures), token.lastIndex);
@@ -107,12 +130,6 @@ function assertMaxInBounds(max) {
107130
}
108131
}
109132

110-
function assertNoFollowingRecursion(remainingExpression) {
111-
if (hasUnescaped(remainingExpression, recursiveToken, Context.DEFAULT)) {
112-
throw new Error('Recursion can only be used once per regex');
113-
}
114-
}
115-
116133
/**
117134
@param {string} pre
118135
@param {string} post

0 commit comments

Comments
 (0)