Skip to content

Commit 5faf91d

Browse files
committed
Don't include duplicated captures in result array when using subclass option
1 parent 313bd96 commit 5faf91d

File tree

2 files changed

+44
-10
lines changed

2 files changed

+44
-10
lines changed

spec/recursion-spec.js

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,26 @@ describe('recursion', () => {
124124
});
125125
});
126126

127+
describe('subclass option', () => {
128+
it('should exclude duplicate numbered captures', () => {
129+
// Subpattern recursion
130+
expect(regex({plugins: [recursion], subclass: false, disable: {n: true}})`((a)\g<1&R=2>?)`.exec('aa')).toHaveSize(4);
131+
expect(regex({plugins: [recursion], subclass: true, disable: {n: true}})`((a)\g<1&R=2>?)`.exec('aa')).toHaveSize(3);
132+
// Global recursion
133+
expect(regex({plugins: [recursion], subclass: false, disable: {n: true}})`(a)(?R=2)?`.exec('aa')).toHaveSize(3);
134+
expect(regex({plugins: [recursion], subclass: true, disable: {n: true}})`(a)(?R=2)?`.exec('aa')).toHaveSize(2);
135+
});
136+
137+
it('should exclude duplicate named captures', () => {
138+
// Subpattern recursion
139+
expect(regex({plugins: [recursion], subclass: false})`(?<r>(?<d>a)\g<r&R=2>?)`.exec('aa')).toHaveSize(4);
140+
expect(regex({plugins: [recursion], subclass: true})`(?<r>(?<d>a)\g<r&R=2>?)`.exec('aa')).toHaveSize(3);
141+
// Global recursion
142+
expect(regex({plugins: [recursion], subclass: false})`(?<d>a)(?R=2)?`.exec('aa')).toHaveSize(3);
143+
expect(regex({plugins: [recursion], subclass: true})`(?<d>a)(?R=2)?`.exec('aa')).toHaveSize(2);
144+
});
145+
});
146+
127147
describe('readme examples', () => {
128148
it('should match an equal number of two different subpatterns', () => {
129149
expect(regex({plugins: [recursion]})`a(?R=50)?b`.exec('test aaaaaabbb')[0]).toBe('aaabbb');

src/index.js

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,18 @@ const recursiveToken = r`\(\?R=(?<rDepth>[^\)]+)\)|${gRToken}`;
66
const namedCapturingDelim = r`\(\?<(?![=!])(?<captureName>[^>]+)>`;
77
const token = new RegExp(r`${namedCapturingDelim}|${recursiveToken}|\(\?|\\?.`, 'gsu');
88
const overlappingRecursionMsg = 'Cannot use multiple overlapping recursions';
9+
// See <github.com/slevithan/regex/blob/main/src/subclass.js>
10+
const emulationGroupMarker = '$E$';
911

1012
/**
1113
@param {string} expression
14+
@param {{
15+
flags?: string;
16+
useEmulationGroups?: boolean;
17+
}} [data]
1218
@returns {string}
1319
*/
14-
export function recursion(expression) {
20+
export function recursion(expression, data) {
1521
// Keep the initial fail-check (which avoids unneeded processing) as fast as possible by testing
1622
// without the accuracy improvement of using `hasUnescaped` with default `Context`
1723
if (!(new RegExp(recursiveToken, 'su').test(expression))) {
@@ -20,6 +26,7 @@ export function recursion(expression) {
2026
if (hasUnescaped(expression, r`\(\?\(DEFINE\)`, Context.DEFAULT)) {
2127
throw new Error('DEFINE groups cannot be used with recursion');
2228
}
29+
const useEmulationGroups = !!data?.useEmulationGroups;
2330
const hasNumberedBackref = hasUnescaped(expression, r`\\[1-9]`, Context.DEFAULT);
2431
const groupContentsStartPos = new Map();
2532
const openGroups = [];
@@ -57,7 +64,7 @@ export function recursion(expression) {
5764
throw new Error(overlappingRecursionMsg);
5865
}
5966
// No need to parse further
60-
return makeRecursive(pre, post, +rDepth, false);
67+
return makeRecursive(pre, post, +rDepth, false, useEmulationGroups);
6168
// `\g<name&R=N>`, `\g<number&R=N>`
6269
} else if (gRNameOrNum) {
6370
assertMaxInBounds(gRDepth);
@@ -84,7 +91,7 @@ export function recursion(expression) {
8491
}
8592
const groupContentsPre = expression.slice(startPos, match.index);
8693
const groupContentsPost = groupContents.slice(groupContentsPre.length + m.length);
87-
const expansion = makeRecursive(groupContentsPre, groupContentsPost, +gRDepth, true);
94+
const expansion = makeRecursive(groupContentsPre, groupContentsPost, +gRDepth, true, useEmulationGroups);
8895
const pre = expression.slice(0, startPos);
8996
const post = expression.slice(startPos + groupContents.length);
9097
// Modify the string we're looping over
@@ -139,9 +146,10 @@ function assertMaxInBounds(max) {
139146
@param {string} post
140147
@param {number} maxDepth
141148
@param {boolean} isSubpattern
149+
@param {boolean} useEmulationGroups
142150
@returns {string}
143151
*/
144-
function makeRecursive(pre, post, maxDepth, isSubpattern) {
152+
function makeRecursive(pre, post, maxDepth, isSubpattern, useEmulationGroups) {
145153
const namesInRecursed = new Set();
146154
// Avoid this work if not needed
147155
if (isSubpattern) {
@@ -153,35 +161,41 @@ function makeRecursive(pre, post, maxDepth, isSubpattern) {
153161
// Depth 2: 'pre(?:pre(?:)post)post'
154162
// Depth 3: 'pre(?:pre(?:pre(?:)post)post)post'
155163
return `${pre}${
156-
repeatWithDepth(`(?:${pre}`, reps, (isSubpattern ? namesInRecursed : null))
164+
repeatWithDepth(`(?:${pre}`, reps, (isSubpattern ? namesInRecursed : null), 'forward', useEmulationGroups)
157165
}(?:)${
158-
repeatWithDepth(`${post})`, reps, (isSubpattern ? namesInRecursed : null), 'backward')
166+
repeatWithDepth(`${post})`, reps, (isSubpattern ? namesInRecursed : null), 'backward', useEmulationGroups)
159167
}${post}`;
160168
}
161169

162170
/**
163171
@param {string} expression
164172
@param {number} reps
165173
@param {Set<string> | null} namesInRecursed
166-
@param {'forward' | 'backward'} [direction]
174+
@param {'forward' | 'backward'} direction
175+
@param {boolean} useEmulationGroups
167176
@returns {string}
168177
*/
169-
function repeatWithDepth(expression, reps, namesInRecursed, direction = 'forward') {
178+
function repeatWithDepth(expression, reps, namesInRecursed, direction, useEmulationGroups) {
170179
const startNum = 2;
171180
const depthNum = i => direction === 'backward' ? reps - i + startNum - 1 : i + startNum;
172181
let result = '';
173182
for (let i = 0; i < reps; i++) {
174183
const captureNum = depthNum(i);
175184
result += replaceUnescaped(
176185
expression,
177-
r`${namedCapturingDelim}|\\k<(?<backref>[^>]+)>`,
186+
r`${namedCapturingDelim}|\\k<(?<backref>[^>]+)>${useEmulationGroups ? r`|\((?!\?)` : ''}`,
178187
({0: m, groups: {captureName, backref}}) => {
179188
if (backref && namesInRecursed && !namesInRecursed.has(backref)) {
180189
// Don't alter backrefs to groups outside the recursed subpattern
181190
return m;
182191
}
192+
if (m === '(') {
193+
return `(${emulationGroupMarker}`;
194+
}
183195
const suffix = `_$${captureNum}`;
184-
return captureName ? `(?<${captureName}${suffix}>` : r`\k<${backref}${suffix}>`;
196+
return captureName ?
197+
`(?<${captureName}${suffix}>${useEmulationGroups ? emulationGroupMarker : ''}` :
198+
r`\k<${backref}${suffix}>`;
185199
},
186200
Context.DEFAULT
187201
);

0 commit comments

Comments
 (0)