Skip to content

Commit 6791615

Browse files
muellerj2alexprabhatbaraStephanTLavavej
authored
<regex>: Make capture groups in negative lookahead assertions always match nothing (#5366)
Co-authored-by: alexprabhatbara <[email protected]> Co-authored-by: Stephan T. Lavavej <[email protected]>
1 parent 5a1cbde commit 6791615

File tree

2 files changed

+65
-11
lines changed

2 files changed

+65
-11
lines changed

stl/inc/regex

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1670,6 +1670,8 @@ private:
16701670
_Tgt_state_t<_It> _Res;
16711671
vector<_Loop_vals_t> _Loop_vals;
16721672

1673+
bool _Do_assert(_Node_assert*);
1674+
bool _Do_neg_assert(_Node_assert*);
16731675
bool _Do_if(_Node_if*);
16741676
bool _Do_rep0(_Node_rep*, bool);
16751677
bool _Do_rep(_Node_rep*, bool, int);
@@ -3142,6 +3144,28 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Tidy() noexcept { // free memory
31423144
_Root = nullptr;
31433145
}
31443146

3147+
template <class _BidIt, class _Elem, class _RxTraits, class _It>
3148+
bool _Matcher<_BidIt, _Elem, _RxTraits, _It>::_Do_assert(_Node_assert* _Node) { // apply assert node
3149+
_It _Ch = _Tgt_state._Cur;
3150+
if (_Match_pat(_Node->_Child)) {
3151+
_Tgt_state._Cur = _Ch;
3152+
return true;
3153+
} else {
3154+
return false;
3155+
}
3156+
}
3157+
3158+
template <class _BidIt, class _Elem, class _RxTraits, class _It>
3159+
bool _Matcher<_BidIt, _Elem, _RxTraits, _It>::_Do_neg_assert(_Node_assert* _Node) { // apply negative assert node
3160+
_Bt_state_t<_It> _St = _Tgt_state;
3161+
if (!_Match_pat(_Node->_Child)) {
3162+
_Tgt_state = _St;
3163+
return true;
3164+
} else {
3165+
return false;
3166+
}
3167+
}
3168+
31453169
template <class _BidIt, class _Elem, class _RxTraits, class _It>
31463170
bool _Matcher<_BidIt, _Elem, _RxTraits, _It>::_Do_if(_Node_if* _Node) { // apply if node
31473171
_Tgt_state_t<_It> _St = _Tgt_state;
@@ -3597,20 +3621,15 @@ bool _Matcher<_BidIt, _Elem, _RxTraits, _It>::_Match_pat(_Node_base* _Nx) { // c
35973621
case _N_end_group:
35983622
break;
35993623

3600-
case _N_neg_assert:
36013624
case _N_assert:
36023625
{ // check assert
3603-
_It _Ch = _Tgt_state._Cur;
3604-
bool _Neg = _Nx->_Kind == _N_neg_assert;
3605-
_Bt_state_t<_It> _St = _Tgt_state;
3606-
if (_Match_pat(static_cast<_Node_assert*>(_Nx)->_Child) == _Neg) {
3607-
// restore initial state and indicate failure
3608-
_Tgt_state = _St;
3609-
_Failed = true;
3610-
} else {
3611-
_Tgt_state._Cur = _Ch;
3612-
}
3626+
_Failed = !_Do_assert(static_cast<_Node_assert*>(_Nx));
3627+
break;
3628+
}
36133629

3630+
case _N_neg_assert:
3631+
{ // check negative assert
3632+
_Failed = !_Do_neg_assert(static_cast<_Node_assert*>(_Nx));
36143633
break;
36153634
}
36163635

tests/std/tests/VSO_0000000_regex_use/test.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1150,6 +1150,40 @@ void test_gh_5214() {
11501150
}
11511151
}
11521152

1153+
void test_gh_5245() {
1154+
// GH-5245: <regex>: Successful negative lookahead assertions
1155+
// sometimes mistakenly assign matches to capture groups
1156+
{
1157+
test_regex neg_assert(&g_regexTester, "^(?!(a)b)..$");
1158+
neg_assert.should_search_fail("ab"); // rejected by the negative assertion
1159+
neg_assert.should_search_match_capture_groups("ac", "ac", match_default, {{-1, -1}}); // test the fix
1160+
neg_assert.should_search_match_capture_groups("cb", "cb", match_default, {{-1, -1}}); // never captures
1161+
1162+
// These 3-character and 4-character tests verify that after a lookahead assertion, we reset the position:
1163+
neg_assert.should_search_fail("abb");
1164+
neg_assert.should_search_fail("acc");
1165+
neg_assert.should_search_fail("cbb");
1166+
neg_assert.should_search_fail("abab");
1167+
neg_assert.should_search_fail("abcc");
1168+
neg_assert.should_search_fail("accc");
1169+
}
1170+
1171+
{
1172+
test_regex pos_assert(&g_regexTester, "^(?=(a)b)..$");
1173+
pos_assert.should_search_match_capture_groups("ab", "ab", match_default, {{0, 1}}); // capture group retained
1174+
pos_assert.should_search_fail("ac"); // rejected by the positive assertion midway through
1175+
pos_assert.should_search_fail("cb"); // rejected by the positive assertion immediately
1176+
1177+
// These 3-character and 4-character tests verify that after a lookahead assertion, we reset the position:
1178+
pos_assert.should_search_fail("abb");
1179+
pos_assert.should_search_fail("acc");
1180+
pos_assert.should_search_fail("cbb");
1181+
pos_assert.should_search_fail("abab");
1182+
pos_assert.should_search_fail("abcc");
1183+
pos_assert.should_search_fail("accc");
1184+
}
1185+
}
1186+
11531187
void test_gh_5253() {
11541188
// GH-5253 cleaned up parsing logic for quantifiers that were applied to single characters
11551189
g_regexTester.should_match("abbb", "ab*");
@@ -1436,6 +1470,7 @@ int main() {
14361470
test_gh_5167();
14371471
test_gh_5192();
14381472
test_gh_5214();
1473+
test_gh_5245();
14391474
test_gh_5253();
14401475
test_gh_5362();
14411476
test_gh_5364();

0 commit comments

Comments
 (0)