@@ -121,10 +121,11 @@ namespace regex_constants {
121
121
_Gmask = 0x3F,
122
122
_Any_posix = basic | extended | grep | egrep | awk,
123
123
124
- icase = 0x0100,
125
- nosubs = 0x0200,
126
- optimize = 0x0400,
127
- collate = 0x0800
124
+ icase = 0x0100,
125
+ nosubs = 0x0200,
126
+ optimize = 0x0400,
127
+ collate = 0x0800,
128
+ multiline = 0x1000
128
129
};
129
130
130
131
_BITMASK_OPS(_EXPORT_STD, syntax_option_type)
@@ -1666,6 +1667,15 @@ public:
1666
1667
if (_Re->_Flags & _Fl_begin_needs_d) {
1667
1668
_Char_class_d = _Lookup_char_class(static_cast<_Elem>('D'));
1668
1669
}
1670
+
1671
+ // sanitize multiline mode setting
1672
+ #ifdef _REGEX_MAKE_MULTILINE_MODE_DEFAULT
1673
+ _Sflags |= regex_constants::multiline; // old matcher applied multiline mode for all grammars
1674
+ #else // ^^^ defined(_REGEX_MAKE_MULTILINE_MODE_DEFAULT) / !defined(_REGEX_MAKE_MULTILINE_MODE_DEFAULT) vvv
1675
+ if (_Sflags & regex_constants::_Any_posix) { // multiline mode is ECMAScript-only
1676
+ _Sflags &= ~regex_constants::multiline;
1677
+ }
1678
+ #endif // ^^^ !defined(_REGEX_MAKE_MULTILINE_MODE_DEFAULT) ^^^
1669
1679
}
1670
1680
1671
1681
void _Setf(regex_constants::match_flag_type _Mf) { // set specified flags
@@ -1920,6 +1930,7 @@ public:
1920
1930
static constexpr flag_type awk = regex_constants::awk;
1921
1931
static constexpr flag_type grep = regex_constants::grep;
1922
1932
static constexpr flag_type egrep = regex_constants::egrep;
1933
+ static constexpr flag_type multiline = regex_constants::multiline;
1923
1934
1924
1935
basic_regex() = default; // construct empty object
1925
1936
@@ -3833,6 +3844,11 @@ typename _RxTraits::char_class_type _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Al
3833
3844
return _Traits.lookup_classname(_Ptr, _Ptr + 1, (_Sflags & regex_constants::icase) != 0);
3834
3845
}
3835
3846
3847
+ template <class _Elem>
3848
+ bool _Is_ecmascript_line_terminator(_Elem _Ch) {
3849
+ return _Ch == _Meta_nl || _Ch == _Meta_cr || _Ch == _Meta_ls || _Ch == _Meta_ps;
3850
+ }
3851
+
3836
3852
template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
3837
3853
bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _Nx) { // check for match
3838
3854
if (0 < _Max_stack_count && --_Max_stack_count <= 0) {
@@ -3852,18 +3868,19 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
3852
3868
case _N_bol:
3853
3869
if ((_Mflags & regex_constants::match_prev_avail)
3854
3870
|| _Tgt_state._Cur != _Begin) { // if --_Cur is valid, check for preceding newline
3855
- _Failed = *_Prev_iter(_Tgt_state._Cur) != _Meta_nl;
3871
+ _Failed = !(_Sflags & regex_constants::multiline)
3872
+ || !_STD _Is_ecmascript_line_terminator(*_STD _Prev_iter(_Tgt_state._Cur));
3856
3873
} else {
3857
3874
_Failed = (_Mflags & regex_constants::match_not_bol) != 0;
3858
3875
}
3859
-
3860
3876
break;
3861
3877
3862
3878
case _N_eol:
3863
3879
if (_Tgt_state._Cur == _End) {
3864
3880
_Failed = (_Mflags & regex_constants::match_not_eol) != 0;
3865
3881
} else {
3866
- _Failed = *_Tgt_state._Cur != _Meta_nl;
3882
+ _Failed =
3883
+ !(_Sflags & regex_constants::multiline) || !_STD _Is_ecmascript_line_terminator(*_Tgt_state._Cur);
3867
3884
}
3868
3885
3869
3886
break;
@@ -3881,7 +3898,7 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
3881
3898
if (_Ch == _Elem()) {
3882
3899
_Failed = true;
3883
3900
}
3884
- } else if (_Ch == _Meta_nl || _Ch == _Meta_cr || _Ch == _Meta_ls || _Ch == _Meta_ps) { // ECMAScript
3901
+ } else if (_STD _Is_ecmascript_line_terminator( _Ch)) {
3885
3902
_Failed = true;
3886
3903
}
3887
3904
@@ -4054,30 +4071,55 @@ template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
4054
4071
_BidIt _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Skip(_BidIt _First_arg, _BidIt _Last, _Node_base* _Node_arg) {
4055
4072
// skip until possible match
4056
4073
// assumes --_First_arg is valid
4057
- _Node_base* _Nx = _Node_arg ? _Node_arg : _Rep;
4074
+ constexpr char _Line_terminators_char[] = {static_cast<char>(_Meta_cr), static_cast<char>(_Meta_nl)};
4075
+ constexpr wchar_t _Line_terminators_wchar_t[] = {static_cast<wchar_t>(_Meta_cr), static_cast<wchar_t>(_Meta_nl),
4076
+ static_cast<wchar_t>(_Meta_ls), static_cast<wchar_t>(_Meta_ps)};
4077
+ _Node_base* _Nx = _Node_arg ? _Node_arg : _Rep;
4058
4078
4059
4079
while (_First_arg != _Last && _Nx) { // check current node
4060
4080
switch (_Nx->_Kind) { // handle current node's type
4061
4081
case _N_nop:
4062
4082
break;
4063
4083
4064
- case _N_bol:
4065
- { // check for embedded newline
4066
- // return iterator to character just after the newline; for input like "\nabc"
4067
- // matching "^abc", _First_arg could be pointing at 'a', so we need to check
4068
- // --_First_arg for '\n'
4069
- if (*_Prev_iter(_First_arg) != _Meta_nl) {
4070
- _First_arg = _STD find(_First_arg, _Last, _Meta_nl);
4084
+ case _N_bol: // check for beginning anchor
4085
+ if (_Sflags & regex_constants::multiline) {
4086
+ // multiline mode: check for embedded line terminator
4087
+ // return iterator to character just after the newline; for input like "\nabc"
4088
+ // matching "^abc", _First_arg could be pointing at 'a', so we need to check
4089
+ // --_First_arg for '\n'
4090
+ if (!_STD _Is_ecmascript_line_terminator(*_STD _Prev_iter(_First_arg))) {
4091
+ if constexpr (sizeof(_Elem) == 1) {
4092
+ _First_arg = _STD find_first_of(
4093
+ _First_arg, _Last, _Line_terminators_char, _STD end(_Line_terminators_char));
4094
+ } else {
4095
+ _First_arg = _STD find_first_of(
4096
+ _First_arg, _Last, _Line_terminators_wchar_t, _STD end(_Line_terminators_wchar_t));
4097
+ }
4098
+
4071
4099
if (_First_arg != _Last) {
4072
4100
++_First_arg;
4073
4101
}
4074
4102
}
4075
4103
4076
4104
return _First_arg;
4105
+ } else {
4106
+ // non-multiline mode: never matches because --_First_arg is valid
4107
+ return _Last;
4077
4108
}
4078
4109
4079
4110
case _N_eol:
4080
- return _STD find(_First_arg, _Last, _Meta_nl);
4111
+ if (_Sflags & regex_constants::multiline) {
4112
+ // multiline mode: matches at next line terminator or end of input
4113
+ if constexpr (sizeof(_Elem) == 1) {
4114
+ return _STD find_first_of(
4115
+ _First_arg, _Last, _Line_terminators_char, _STD end(_Line_terminators_char));
4116
+ } else {
4117
+ return _STD find_first_of(
4118
+ _First_arg, _Last, _Line_terminators_wchar_t, _STD end(_Line_terminators_wchar_t));
4119
+ }
4120
+ } else {
4121
+ return _Last; // non-multiline mode: matches at end of input or not at all
4122
+ }
4081
4123
4082
4124
case _N_str:
4083
4125
{ // check for string match
0 commit comments