@@ -1493,9 +1493,10 @@ public:
1493
1493
_Node_end_rep& operator=(const _Node_end_rep&) = delete;
1494
1494
};
1495
1495
1496
- struct _Loop_vals_t { // storage for loop administration
1497
- int _Loop_idx;
1496
+ struct _Loop_vals_v2_t { // storage for loop administration
1498
1497
void* _Loop_iter;
1498
+ int _Loop_idx;
1499
+ unsigned int _Group_first;
1499
1500
};
1500
1501
1501
1502
class _Node_rep : public _Node_base { // node that marks the beginning of a repetition
@@ -1681,13 +1682,15 @@ public:
1681
1682
private:
1682
1683
_Tgt_state_t<_It> _Tgt_state;
1683
1684
_Tgt_state_t<_It> _Res;
1684
- vector<_Loop_vals_t > _Loop_vals;
1685
+ vector<_Loop_vals_v2_t > _Loop_vals;
1685
1686
1686
1687
bool _Do_assert(_Node_assert*);
1687
1688
bool _Do_neg_assert(_Node_assert*);
1688
1689
bool _Do_if(_Node_if*);
1689
1690
bool _Do_rep0(_Node_rep*, bool);
1690
1691
bool _Do_rep(_Node_rep*, bool, int);
1692
+ bool _Do_rep_first(_Node_rep*);
1693
+ bool _Find_first_inner_capture_group(_Node_base*, _Loop_vals_v2_t*);
1691
1694
bool _Do_class(_Node_base*);
1692
1695
bool _Match_pat(_Node_base*);
1693
1696
bool _Better_match();
@@ -3235,6 +3238,13 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
3235
3238
_Tgt_state_t<_It> _St = _Tgt_state;
3236
3239
3237
3240
for (; _Ix < _Node->_Min; ++_Ix) { // do minimum number of reps
3241
+ // GH-5365: We have to reset the capture groups from the second iteration on.
3242
+ // We can avoid the reset for the first iteration
3243
+ // because we know that a simple repetition was not encountered before.
3244
+ if (_Ix > 0) {
3245
+ _Tgt_state._Grp_valid = _St._Grp_valid;
3246
+ }
3247
+
3238
3248
_It _Cur = _Tgt_state._Cur;
3239
3249
if (!_Match_pat(_Node->_Next)) { // didn't match minimum number of reps, fail
3240
3250
_Tgt_state = _St;
@@ -3290,17 +3300,12 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
3290
3300
template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
3291
3301
bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep(_Node_rep* _Node, bool _Greedy, int _Init_idx) {
3292
3302
// apply repetition
3293
- if (_Node->_Simple_loop == 1) {
3294
- return _Do_rep0(_Node, _Greedy);
3295
- }
3296
-
3297
- bool _Matched0 = false;
3298
- _Tgt_state_t<_It> _St = _Tgt_state;
3299
- _Loop_vals_t* _Psav = &_Loop_vals[_Node->_Loop_number];
3300
- int _Loop_idx_sav = _Psav->_Loop_idx;
3301
- _It* _Loop_iter_sav = static_cast<_It*>(_Psav->_Loop_iter);
3302
-
3303
- bool _Progress = _Init_idx == 0 || *_Loop_iter_sav != _St._Cur;
3303
+ bool _Matched0 = false;
3304
+ _Tgt_state_t<_It> _St = _Tgt_state;
3305
+ _Loop_vals_v2_t* _Psav = &_Loop_vals[_Node->_Loop_number];
3306
+ int _Loop_idx_sav = _Psav->_Loop_idx;
3307
+ _It* _Loop_iter_sav = static_cast<_It*>(_Psav->_Loop_iter);
3308
+ bool _Progress = _Init_idx == 0 || *_Loop_iter_sav != _St._Cur;
3304
3309
3305
3310
if (0 <= _Node->_Max && _Node->_Max <= _Init_idx) {
3306
3311
_Matched0 = _Match_pat(_Node->_End_rep->_Next); // reps done, try tail
@@ -3310,7 +3315,9 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep(_Node_rep* _Node,
3310
3315
} else { // try another required match
3311
3316
_Psav->_Loop_idx = _Init_idx + 1;
3312
3317
_Psav->_Loop_iter = _STD addressof(_St._Cur);
3313
- _Matched0 = _Match_pat(_Node->_Next);
3318
+ _STD fill(_Tgt_state._Grp_valid.begin() + static_cast<ptrdiff_t>(_Psav->_Group_first),
3319
+ _Tgt_state._Grp_valid.end(), false);
3320
+ _Matched0 = _Match_pat(_Node->_Next);
3314
3321
}
3315
3322
} else if (_Longest) { // longest, try any number of repetitions
3316
3323
@@ -3332,13 +3339,17 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep(_Node_rep* _Node,
3332
3339
_Tgt_state = _St;
3333
3340
_Psav->_Loop_idx = _Init_idx + 1;
3334
3341
_Psav->_Loop_iter = _STD addressof(_St._Cur);
3335
- _Matched0 = _Match_pat(_Node->_Next);
3342
+ _STD fill(_Tgt_state._Grp_valid.begin() + static_cast<ptrdiff_t>(_Psav->_Group_first),
3343
+ _Tgt_state._Grp_valid.end(), false);
3344
+ _Matched0 = _Match_pat(_Node->_Next);
3336
3345
}
3337
3346
} else { // greedy, favor maximum number of reps
3338
3347
if (_Progress) { // try another rep
3339
3348
_Psav->_Loop_idx = _Init_idx + 1;
3340
3349
_Psav->_Loop_iter = _STD addressof(_St._Cur);
3341
- _Matched0 = _Match_pat(_Node->_Next);
3350
+ _STD fill(_Tgt_state._Grp_valid.begin() + static_cast<ptrdiff_t>(_Psav->_Group_first),
3351
+ _Tgt_state._Grp_valid.end(), false);
3352
+ _Matched0 = _Match_pat(_Node->_Next);
3342
3353
}
3343
3354
3344
3355
if ((_Progress || 1 >= _Init_idx) && !_Matched0) { // rep failed, try tail
@@ -3358,6 +3369,127 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep(_Node_rep* _Node,
3358
3369
return _Matched0;
3359
3370
}
3360
3371
3372
+ template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
3373
+ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep_first(_Node_rep* _Node) {
3374
+ bool _Greedy = (_Node->_Flags & _Fl_greedy) != 0;
3375
+ // apply repetition
3376
+ if (_Node->_Simple_loop == 1) {
3377
+ return _Do_rep0(_Node, _Greedy);
3378
+ }
3379
+ _Loop_vals_v2_t* _Psav = &_Loop_vals[_Node->_Loop_number];
3380
+
3381
+ // Determine first capture group in repetition for later capture group reset, if not done so previously.
3382
+ // No capture group reset is performed for POSIX regexes,
3383
+ // so we prevent any reset by setting the first capture group to the number of capture groups _Ncap.
3384
+ if (_Psav->_Group_first == 0) {
3385
+ if ((_Sflags
3386
+ & (regex_constants::basic | regex_constants::extended | regex_constants::grep | regex_constants::egrep
3387
+ | regex_constants::awk))
3388
+ || !_Find_first_inner_capture_group(_Node->_Next, _Psav)) {
3389
+ _Psav->_Group_first = _Ncap;
3390
+ }
3391
+ }
3392
+
3393
+ return _Do_rep(_Node, _Greedy, 0);
3394
+ }
3395
+
3396
+ template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
3397
+ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Find_first_inner_capture_group(
3398
+ _Node_base* _Nx, _Loop_vals_v2_t* _Loop_state) {
3399
+ if (0 < _Max_stack_count && --_Max_stack_count <= 0) {
3400
+ _Xregex_error(regex_constants::error_stack);
3401
+ }
3402
+
3403
+ bool _Found_group = false;
3404
+ while (_Nx) {
3405
+ switch (_Nx->_Kind) {
3406
+ case _N_nop:
3407
+ case _N_bol:
3408
+ case _N_eol:
3409
+ case _N_wbound:
3410
+ case _N_dot:
3411
+ case _N_str:
3412
+ case _N_class:
3413
+ case _N_group:
3414
+ case _N_end_group:
3415
+ case _N_end_capture:
3416
+ case _N_back:
3417
+ case _N_begin:
3418
+ break;
3419
+
3420
+ case _N_assert:
3421
+ case _N_neg_assert:
3422
+ {
3423
+ if (_Find_first_inner_capture_group(static_cast<_Node_assert*>(_Nx), _Loop_state)) {
3424
+ _Found_group = true;
3425
+ _Nx = nullptr;
3426
+ }
3427
+ break;
3428
+ }
3429
+
3430
+ case _N_capture:
3431
+ {
3432
+ _Node_capture* _Node = static_cast<_Node_capture*>(_Nx);
3433
+ _Loop_state->_Group_first = _Node->_Idx;
3434
+ _Found_group = true;
3435
+ _Nx = nullptr;
3436
+ break;
3437
+ }
3438
+
3439
+ case _N_if:
3440
+ {
3441
+ _Node_if* _Node = static_cast<_Node_if*>(_Nx);
3442
+ for (; _Node != nullptr; _Node = _Node->_Child) {
3443
+ if (_Find_first_inner_capture_group(_Node->_Next, _Loop_state)) {
3444
+ _Found_group = true;
3445
+ _Nx = nullptr;
3446
+ break;
3447
+ }
3448
+ }
3449
+
3450
+ if (_Nx != nullptr) { // continue search after the branches of the _N_if node
3451
+ _Nx = static_cast<_Node_if*>(_Nx)->_Endif;
3452
+ }
3453
+ break;
3454
+ }
3455
+
3456
+ case _N_rep:
3457
+ {
3458
+ _Node_rep* _Inner_rep = static_cast<_Node_rep*>(_Nx);
3459
+ _Loop_vals_v2_t* _Inner_loop_state = &_Loop_vals[_Inner_rep->_Loop_number];
3460
+ if (_Find_first_inner_capture_group(_Inner_rep->_Next, _Inner_loop_state)) {
3461
+ _Loop_state->_Group_first = _Inner_loop_state->_Group_first;
3462
+ _Found_group = true;
3463
+ _Nx = nullptr;
3464
+ } else {
3465
+ _Inner_loop_state->_Group_first = _Ncap;
3466
+ _Nx = _Inner_rep->_End_rep;
3467
+ }
3468
+ break;
3469
+ }
3470
+
3471
+ case _N_end_assert:
3472
+ case _N_endif:
3473
+ case _N_end_rep:
3474
+ case _N_end:
3475
+ case _N_none:
3476
+ default:
3477
+ _Nx = nullptr;
3478
+ break;
3479
+ }
3480
+
3481
+ if (_Nx) {
3482
+ _Nx = _Nx->_Next;
3483
+ }
3484
+ }
3485
+
3486
+ if (0 < _Max_stack_count) {
3487
+ ++_Max_stack_count;
3488
+ }
3489
+
3490
+ return _Found_group;
3491
+ }
3492
+
3361
3493
template <class _BidIt1, class _BidIt2, class _Pr>
3362
3494
_BidIt1 _Cmp_chrange(_BidIt1 _Begin1, _BidIt1 _End1, _BidIt2 _Begin2, _BidIt2 _End2, _Pr _Pred) {
3363
3495
// compare character ranges
@@ -3695,15 +3827,6 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
3695
3827
{ // record current position
3696
3828
_Node_capture* _Node = static_cast<_Node_capture*>(_Nx);
3697
3829
_Tgt_state._Grps[_Node->_Idx]._Begin = _Tgt_state._Cur;
3698
- if (!(_Sflags
3699
- & (regex_constants::basic | regex_constants::extended | regex_constants::grep
3700
- | regex_constants::egrep | regex_constants::awk))) {
3701
- // CodeQL [SM02323] Comparing unchanging unsigned int _Node->_Idx to decreasing size_t _Idx is safe.
3702
- for (size_t _Idx = _Tgt_state._Grp_valid.size(); _Node->_Idx < _Idx;) {
3703
- _Tgt_state._Grp_valid[--_Idx] = false;
3704
- }
3705
- }
3706
-
3707
3830
break;
3708
3831
}
3709
3832
@@ -3752,7 +3875,7 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
3752
3875
break;
3753
3876
3754
3877
case _N_rep:
3755
- if (!_Do_rep (static_cast<_Node_rep*>(_Nx), (_Nx->_Flags & _Fl_greedy) != 0, 0 )) {
3878
+ if (!_Do_rep_first (static_cast<_Node_rep*>(_Nx))) {
3756
3879
_Failed = true;
3757
3880
}
3758
3881
@@ -3761,10 +3884,9 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
3761
3884
3762
3885
case _N_end_rep:
3763
3886
{
3764
- _Node_rep* _Nr = static_cast<_Node_end_rep*>(_Nx)->_Begin_rep;
3765
- _Loop_vals_t* _Psav = &_Loop_vals[_Nr->_Loop_number];
3766
-
3767
- if (_Nr->_Simple_loop == 0 && !_Do_rep(_Nr, (_Nr->_Flags & _Fl_greedy) != 0, _Psav->_Loop_idx)) {
3887
+ _Node_rep* _Nr = static_cast<_Node_end_rep*>(_Nx)->_Begin_rep;
3888
+ if (_Nr->_Simple_loop == 0
3889
+ && !_Do_rep(_Nr, (_Nr->_Flags & _Fl_greedy) != 0, _Loop_vals[_Nr->_Loop_number]->_Loop_idx)) {
3768
3890
_Failed = true; // recurse only if loop contains if/do
3769
3891
}
3770
3892
0 commit comments