Skip to content

Commit 023485c

Browse files
authored
<regex>: Move saved match state to the heap (#5682)
1 parent a433b37 commit 023485c

File tree

1 file changed

+78
-39
lines changed

1 file changed

+78
-39
lines changed

stl/inc/regex

Lines changed: 78 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1574,7 +1574,7 @@ public:
15741574
};
15751575

15761576
struct _Loop_vals_v2_t { // storage for loop administration
1577-
void* _Loop_iter = nullptr;
1577+
size_t _Loop_frame_idx = 0;
15781578
int _Loop_idx = 0;
15791579
unsigned int _Group_first = 0;
15801580
};
@@ -1736,10 +1736,14 @@ public:
17361736
_Full = _Full_match;
17371737
_Max_complexity_count = _REGEX_MAX_COMPLEXITY_COUNT;
17381738
_Max_stack_count = _REGEX_MAX_STACK_COUNT;
1739+
_Frames_count = 0;
17391740

17401741
_Matched = false;
17411742

1742-
if (!_Match_pat(_Rep)) {
1743+
bool _Succeeded = _Match_pat(_Rep);
1744+
_STL_INTERNAL_CHECK(_Frames_count == 0);
1745+
1746+
if (!_Succeeded) {
17431747
return false;
17441748
}
17451749

@@ -1784,6 +1788,11 @@ private:
17841788
_Tgt_state_t<_It> _Tgt_state;
17851789
_Tgt_state_t<_It> _Res;
17861790
vector<_Loop_vals_v2_t> _Loop_vals;
1791+
vector<_Tgt_state_t<_It>> _Frames;
1792+
size_t _Frames_count;
1793+
1794+
size_t _Push_frame();
1795+
void _Pop_frame(size_t);
17871796

17881797
bool _Do_assert(_Node_assert*);
17891798
bool _Do_neg_assert(_Node_assert*);
@@ -3338,6 +3347,22 @@ void _Builder2<_FwdIt, _Elem, _RxTraits>::_Tidy() noexcept { // free memory
33383347
_Root = nullptr;
33393348
}
33403349

3350+
template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
3351+
size_t _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Push_frame() {
3352+
if (_Frames_count >= _Frames.size()) {
3353+
_Frames.push_back(_Tgt_state);
3354+
} else {
3355+
_Frames[_Frames_count] = _Tgt_state;
3356+
}
3357+
return _Frames_count++;
3358+
}
3359+
3360+
template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
3361+
void _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Pop_frame(size_t _Idx) {
3362+
_STL_INTERNAL_CHECK(_Idx + 1 == _Frames_count);
3363+
_Frames_count = _Idx;
3364+
}
3365+
33413366
template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
33423367
bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_assert(_Node_assert* _Node) { // apply assert node
33433368
_It _Ch = _Tgt_state._Cur;
@@ -3352,34 +3377,37 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_assert(_Node_assert*
33523377
template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
33533378
bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_neg_assert(_Node_assert* _Node) {
33543379
// apply negative assert node
3355-
_Bt_state_t<_It> _St = _Tgt_state;
3356-
if (!_Match_pat(_Node->_Child)) {
3357-
_Tgt_state = _St;
3358-
return true;
3359-
} else {
3360-
return false;
3380+
const size_t _Frame_idx = _Push_frame();
3381+
bool _Succeeded = !_Match_pat(_Node->_Child);
3382+
if (_Succeeded) {
3383+
const _Bt_state_t<_It>& _St = _Frames[_Frame_idx];
3384+
_Tgt_state = _St;
33613385
}
3386+
_Pop_frame(_Frame_idx);
3387+
return _Succeeded;
33623388
}
33633389

33643390
template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
33653391
bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_if(_Node_if* _Node) { // apply if node
3366-
_Tgt_state_t<_It> _St = _Tgt_state;
3392+
const size_t _Frame_idx = _Push_frame();
33673393

33683394
// look for the first match
33693395
for (; _Node; _Node = _Node->_Child) { // process one branch of if
3370-
_Tgt_state = _St; // rewind to where the alternation starts in input
3396+
_Tgt_state = _Frames[_Frame_idx]; // rewind to where the alternation starts in input
33713397
if (_Match_pat(_Node->_Next)) { // try to match this branch
33723398
break;
33733399
}
33743400
}
33753401

33763402
// if none of the if branches matched, fail to match
33773403
if (!_Node) {
3404+
_Pop_frame(_Frame_idx);
33783405
return false;
33793406
}
33803407

33813408
// if we aren't looking for the longest match, that's it
33823409
if (!_Longest) {
3410+
_Pop_frame(_Frame_idx);
33833411
return true;
33843412
}
33853413

@@ -3390,32 +3418,36 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_if(_Node_if* _Node) {
33903418
break;
33913419
}
33923420

3393-
_Tgt_state = _St;
3421+
_Tgt_state = _Frames[_Frame_idx];
33943422
(void) _Match_pat(_Node->_Next);
33953423
}
3424+
_Pop_frame(_Frame_idx);
33963425
return true;
33973426
}
33983427

33993428
template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
34003429
bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node, bool _Greedy) {
34013430
// apply repetition to loop with no nested if/do
3402-
int _Ix = 0;
3403-
_Tgt_state_t<_It> _St = _Tgt_state;
3431+
int _Ix = 0;
3432+
const size_t _Frame_idx = _Push_frame();
34043433

34053434
if (0 < _Node->_Min) {
34063435
// GH-5365: We can avoid resetting capture groups for the first iteration
34073436
// because we know that a simple repetition of this loop was not encountered before.
34083437
if (!_Match_pat(_Node->_Next)) { // didn't match minimum number of reps, fail
3438+
_Pop_frame(_Frame_idx);
34093439
return false;
3410-
} else if (_Tgt_state._Cur == _St._Cur) { // matches empty string
3440+
} else if (_Tgt_state._Cur == _Frames[_Frame_idx]._Cur) { // matches empty string
34113441
// loop is branchless, so it will only ever match empty strings
34123442
// -> skip all other matches as they don't change state and immediately try tail
3443+
_Pop_frame(_Frame_idx);
34133444
return _Match_pat(_Node->_End_rep->_Next);
34143445
} else { // loop never matches the empty string
34153446
for (_Ix = 1; _Ix < _Node->_Min; ++_Ix) { // do minimum number of reps
34163447
// GH-5365: We have to reset the capture groups from the second iteration on.
3417-
_Tgt_state._Grp_valid = _St._Grp_valid;
3448+
_Tgt_state._Grp_valid = _Frames[_Frame_idx]._Grp_valid;
34183449
if (!_Match_pat(_Node->_Next)) { // didn't match minimum number of reps, fail
3450+
_Pop_frame(_Frame_idx);
34193451
return false;
34203452
}
34213453
}
@@ -3429,6 +3461,7 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
34293461

34303462
if (_Match_pat(_Node->_End_rep->_Next)) {
34313463
if (!_Greedy) {
3464+
_Pop_frame(_Frame_idx);
34323465
return true; // go with current match
34333466
}
34343467

@@ -3439,20 +3472,22 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
34393472

34403473
if (_Ix == 0 && _Node->_Max != 0) {
34413474
_Tgt_state._Cur = _Saved_pos;
3442-
_Tgt_state._Grp_valid = _St._Grp_valid;
3475+
_Tgt_state._Grp_valid = _Frames[_Frame_idx]._Grp_valid;
34433476

34443477
if (!_Match_pat(_Node->_Next)) { // rep match failed, we are done
34453478
_Done = true;
34463479
} else if (_Saved_pos == _Tgt_state._Cur) { // match empty, try no more repetitions
34473480
_Done = true;
34483481
// we only potentially accept/try tail for POSIX
34493482
if ((_Sflags & regex_constants::_Any_posix) && _Match_pat(_Node->_End_rep->_Next)) {
3483+
_Pop_frame(_Frame_idx);
34503484
return true; // go with current match
34513485
}
34523486
} else {
34533487
_Saved_pos = _Tgt_state._Cur;
34543488
if (_Match_pat(_Node->_End_rep->_Next)) {
34553489
if (!_Greedy) {
3490+
_Pop_frame(_Frame_idx);
34563491
return true; // go with current match
34573492
}
34583493

@@ -3467,7 +3502,7 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
34673502
if (!_Done) {
34683503
while (_Node->_Max == -1 || _Ix++ < _Node->_Max) { // try another rep/tail match
34693504
_Tgt_state._Cur = _Saved_pos;
3470-
_Tgt_state._Grp_valid = _St._Grp_valid;
3505+
_Tgt_state._Grp_valid = _Frames[_Frame_idx]._Grp_valid;
34713506
if (!_Match_pat(_Node->_Next) || _Tgt_state._Cur == _Saved_pos) {
34723507
break; // rep match failed, quit loop
34733508
}
@@ -3476,6 +3511,7 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
34763511
_Saved_pos = _Tgt_state._Cur;
34773512
if (_Match_pat(_Node->_End_rep->_Next)) {
34783513
if (!_Greedy) {
3514+
_Pop_frame(_Frame_idx);
34793515
return true; // go with current match
34803516
}
34813517

@@ -3489,22 +3525,24 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
34893525
if (_Matched0) { // record final match
34903526
_Tgt_state = _Final;
34913527
}
3528+
3529+
_Pop_frame(_Frame_idx);
34923530
return _Matched0;
34933531
}
34943532

34953533
template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
34963534
bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep(_Node_rep* _Node, bool _Greedy, int _Init_idx) {
34973535
// apply repetition
3498-
bool _Matched0 = false;
3499-
_Tgt_state_t<_It> _St = _Tgt_state;
3500-
_Loop_vals_v2_t* _Psav = &_Loop_vals[_Node->_Loop_number];
3501-
int _Loop_idx_sav = _Psav->_Loop_idx;
3502-
_It* _Loop_iter_sav = static_cast<_It*>(_Psav->_Loop_iter);
3503-
bool _Progress = _Init_idx == 0 || *_Loop_iter_sav != _St._Cur;
3536+
bool _Matched0 = false;
3537+
_Loop_vals_v2_t* _Psav = &_Loop_vals[_Node->_Loop_number];
3538+
const int _Loop_idx_sav = _Psav->_Loop_idx;
3539+
const size_t _Loop_frame_idx_sav = _Psav->_Loop_frame_idx;
3540+
const size_t _Frame_idx = _Push_frame();
3541+
const bool _Progress = _Init_idx == 0 || _Frames[_Loop_frame_idx_sav]._Cur != _Tgt_state._Cur;
35043542

35053543
if (_Init_idx < _Node->_Min) { // try another required match
3506-
_Psav->_Loop_iter = _STD addressof(_St._Cur);
3507-
_Psav->_Loop_idx = _Progress ? _Init_idx + 1 : _Node->_Min; // try only one more match after an empty match
3544+
_Psav->_Loop_frame_idx = _Frame_idx;
3545+
_Psav->_Loop_idx = _Progress ? _Init_idx + 1 : _Node->_Min; // try only one more match after an empty match
35083546
_STD fill(_Tgt_state._Grp_valid.begin() + static_cast<ptrdiff_t>(_Psav->_Group_first),
35093547
_Tgt_state._Grp_valid.end(), false);
35103548
_Matched0 = _Match_pat(_Node->_Next);
@@ -3517,35 +3555,35 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep(_Node_rep* _Node,
35173555
_Matched0 = _Match_pat(_Node->_End_rep->_Next);
35183556

35193557
// try to match with one more repetition
3520-
_Tgt_state = _St;
3521-
_Psav->_Loop_idx = _Init_idx + 1;
3522-
_Psav->_Loop_iter = _STD addressof(_St._Cur);
3558+
_Tgt_state = _Frames[_Frame_idx];
3559+
_Psav->_Loop_idx = _Init_idx + 1;
3560+
_Psav->_Loop_frame_idx = _Frame_idx;
35233561
if (_Match_pat(_Node->_Next)) { // always call _Match_pat, even when _Matched0 is already true
35243562
_Matched0 = true;
35253563
}
35263564
} else if (!_Greedy) { // not greedy, favor minimum number of reps
35273565
_Matched0 = _Match_pat(_Node->_End_rep->_Next);
35283566
if (!_Matched0) { // tail failed, try another rep
3529-
_Tgt_state = _St;
3530-
_Psav->_Loop_idx = _Init_idx + 1;
3531-
_Psav->_Loop_iter = _STD addressof(_St._Cur);
3567+
_Tgt_state = _Frames[_Frame_idx];
3568+
_Psav->_Loop_idx = _Init_idx + 1;
3569+
_Psav->_Loop_frame_idx = _Frame_idx;
35323570
_STD fill(_Tgt_state._Grp_valid.begin() + static_cast<ptrdiff_t>(_Psav->_Group_first),
35333571
_Tgt_state._Grp_valid.end(), false);
35343572
_Matched0 = _Match_pat(_Node->_Next);
35353573
}
35363574
} else { // greedy, favor maximum number of reps,
35373575
// so try another rep
3538-
_Psav->_Loop_idx = _Init_idx + 1;
3539-
_Psav->_Loop_iter = _STD addressof(_St._Cur);
3576+
_Psav->_Loop_idx = _Init_idx + 1;
3577+
_Psav->_Loop_frame_idx = _Frame_idx;
35403578
_STD fill(_Tgt_state._Grp_valid.begin() + static_cast<ptrdiff_t>(_Psav->_Group_first),
35413579
_Tgt_state._Grp_valid.end(), false);
35423580
_Matched0 = _Match_pat(_Node->_Next);
35433581

35443582
if (!_Matched0) { // rep failed, try tail
3545-
_Psav->_Loop_idx = _Loop_idx_sav;
3546-
_Psav->_Loop_iter = _Loop_iter_sav;
3547-
_Tgt_state = _St;
3548-
_Matched0 = _Match_pat(_Node->_End_rep->_Next);
3583+
_Psav->_Loop_idx = _Loop_idx_sav;
3584+
_Psav->_Loop_frame_idx = _Loop_frame_idx_sav;
3585+
_Tgt_state = _Frames[_Frame_idx];
3586+
_Matched0 = _Match_pat(_Node->_End_rep->_Next);
35493587
}
35503588
}
35513589
} else if (_Init_idx == 1 && (_Sflags & regex_constants::_Any_posix)) {
@@ -3554,8 +3592,9 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep(_Node_rep* _Node,
35543592
_Matched0 = _Match_pat(_Node->_End_rep->_Next);
35553593
}
35563594

3557-
_Psav->_Loop_idx = _Loop_idx_sav;
3558-
_Psav->_Loop_iter = _Loop_iter_sav;
3595+
_Psav->_Loop_idx = _Loop_idx_sav;
3596+
_Psav->_Loop_frame_idx = _Loop_frame_idx_sav;
3597+
_Pop_frame(_Frame_idx);
35593598
return _Matched0;
35603599
}
35613600

0 commit comments

Comments
 (0)