@@ -33,6 +33,16 @@ _STL_DISABLE_CLANG_WARNINGS
33
33
#pragma push_macro("new")
34
34
#undef new
35
35
36
+ // Controls whether LWG-2503 "multiline option should be added to syntax_option_type" is implemented.
37
+ // Defining this to 0 requests Standard behavior:
38
+ // * For ECMAScript, matching is non-multiline by default, but regex_constants::multiline can be requested.
39
+ // * For POSIX grammars, matching is non-multiline, and regex_constants::multiline is ignored (N5008 [tab:re.synopt]).
40
+ // Defining this to 1 requests legacy behavior:
41
+ // * For all grammars, matching is multiline, and regex_constants::multiline is redundant.
42
+ #ifndef _REGEX_LEGACY_MULTILINE_MODE
43
+ #define _REGEX_LEGACY_MULTILINE_MODE 0
44
+ #endif
45
+
36
46
#ifndef _REGEX_MAX_COMPLEXITY_COUNT
37
47
#define _REGEX_MAX_COMPLEXITY_COUNT 10000000L // set to 0 to disable
38
48
#endif // !defined(_REGEX_MAX_COMPLEXITY_COUNT)
@@ -121,10 +131,11 @@ namespace regex_constants {
121
131
_Gmask = 0x3F,
122
132
_Any_posix = basic | extended | grep | egrep | awk,
123
133
124
- icase = 0x0100,
125
- nosubs = 0x0200,
126
- optimize = 0x0400,
127
- collate = 0x0800
134
+ icase = 0x0100,
135
+ nosubs = 0x0200,
136
+ optimize = 0x0400,
137
+ collate = 0x0800,
138
+ multiline = 0x1000
128
139
};
129
140
130
141
_BITMASK_OPS(_EXPORT_STD, syntax_option_type)
@@ -1666,6 +1677,15 @@ public:
1666
1677
if (_Re->_Flags & _Fl_begin_needs_d) {
1667
1678
_Char_class_d = _Lookup_char_class(static_cast<_Elem>('D'));
1668
1679
}
1680
+
1681
+ // sanitize multiline mode setting
1682
+ #if _REGEX_LEGACY_MULTILINE_MODE
1683
+ _Sflags |= regex_constants::multiline; // old matcher applied multiline mode for all grammars
1684
+ #else // ^^^ _REGEX_LEGACY_MULTILINE_MODE / !_REGEX_LEGACY_MULTILINE_MODE vvv
1685
+ if (_Sflags & regex_constants::_Any_posix) { // multiline mode is ECMAScript-only
1686
+ _Sflags &= ~regex_constants::multiline;
1687
+ }
1688
+ #endif // ^^^ !_REGEX_LEGACY_MULTILINE_MODE ^^^
1669
1689
}
1670
1690
1671
1691
void _Setf(regex_constants::match_flag_type _Mf) { // set specified flags
@@ -1920,6 +1940,7 @@ public:
1920
1940
static constexpr flag_type awk = regex_constants::awk;
1921
1941
static constexpr flag_type grep = regex_constants::grep;
1922
1942
static constexpr flag_type egrep = regex_constants::egrep;
1943
+ static constexpr flag_type multiline = regex_constants::multiline;
1923
1944
1924
1945
basic_regex() = default; // construct empty object
1925
1946
@@ -3833,6 +3854,11 @@ typename _RxTraits::char_class_type _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Al
3833
3854
return _Traits.lookup_classname(_Ptr, _Ptr + 1, (_Sflags & regex_constants::icase) != 0);
3834
3855
}
3835
3856
3857
+ template <class _Elem>
3858
+ bool _Is_ecmascript_line_terminator(_Elem _Ch) {
3859
+ return _Ch == _Meta_nl || _Ch == _Meta_cr || _Ch == _Meta_ls || _Ch == _Meta_ps;
3860
+ }
3861
+
3836
3862
template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
3837
3863
bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _Nx) { // check for match
3838
3864
if (0 < _Max_stack_count && --_Max_stack_count <= 0) {
@@ -3852,18 +3878,19 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
3852
3878
case _N_bol:
3853
3879
if ((_Mflags & regex_constants::match_prev_avail)
3854
3880
|| _Tgt_state._Cur != _Begin) { // if --_Cur is valid, check for preceding newline
3855
- _Failed = *_Prev_iter(_Tgt_state._Cur) != _Meta_nl;
3881
+ _Failed = !(_Sflags & regex_constants::multiline)
3882
+ || !_STD _Is_ecmascript_line_terminator(*_STD _Prev_iter(_Tgt_state._Cur));
3856
3883
} else {
3857
3884
_Failed = (_Mflags & regex_constants::match_not_bol) != 0;
3858
3885
}
3859
-
3860
3886
break;
3861
3887
3862
3888
case _N_eol:
3863
3889
if (_Tgt_state._Cur == _End) {
3864
3890
_Failed = (_Mflags & regex_constants::match_not_eol) != 0;
3865
3891
} else {
3866
- _Failed = *_Tgt_state._Cur != _Meta_nl;
3892
+ _Failed =
3893
+ !(_Sflags & regex_constants::multiline) || !_STD _Is_ecmascript_line_terminator(*_Tgt_state._Cur);
3867
3894
}
3868
3895
3869
3896
break;
@@ -3881,7 +3908,7 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
3881
3908
if (_Ch == _Elem()) {
3882
3909
_Failed = true;
3883
3910
}
3884
- } else if (_Ch == _Meta_nl || _Ch == _Meta_cr || _Ch == _Meta_ls || _Ch == _Meta_ps) { // ECMAScript
3911
+ } else if (_STD _Is_ecmascript_line_terminator( _Ch)) {
3885
3912
_Failed = true;
3886
3913
}
3887
3914
@@ -4054,30 +4081,55 @@ template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
4054
4081
_BidIt _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Skip(_BidIt _First_arg, _BidIt _Last, _Node_base* _Node_arg) {
4055
4082
// skip until possible match
4056
4083
// assumes --_First_arg is valid
4057
- _Node_base* _Nx = _Node_arg ? _Node_arg : _Rep;
4084
+ static constexpr char _Line_terminators_char[] = {static_cast<char>(_Meta_cr), static_cast<char>(_Meta_nl)};
4085
+ static constexpr wchar_t _Line_terminators_wchar_t[] = {static_cast<wchar_t>(_Meta_cr),
4086
+ static_cast<wchar_t>(_Meta_nl), static_cast<wchar_t>(_Meta_ls), static_cast<wchar_t>(_Meta_ps)};
4087
+ _Node_base* _Nx = _Node_arg ? _Node_arg : _Rep;
4058
4088
4059
4089
while (_First_arg != _Last && _Nx) { // check current node
4060
4090
switch (_Nx->_Kind) { // handle current node's type
4061
4091
case _N_nop:
4062
4092
break;
4063
4093
4064
- case _N_bol:
4065
- { // check for embedded newline
4066
- // return iterator to character just after the newline; for input like "\nabc"
4067
- // matching "^abc", _First_arg could be pointing at 'a', so we need to check
4068
- // --_First_arg for '\n'
4069
- if (*_Prev_iter(_First_arg) != _Meta_nl) {
4070
- _First_arg = _STD find(_First_arg, _Last, _Meta_nl);
4094
+ case _N_bol: // check for beginning anchor
4095
+ if (_Sflags & regex_constants::multiline) {
4096
+ // multiline mode: check for embedded line terminator
4097
+ // return iterator to character just after the newline; for input like "\nabc"
4098
+ // matching "^abc", _First_arg could be pointing at 'a', so we need to check
4099
+ // --_First_arg for '\n'
4100
+ if (!_STD _Is_ecmascript_line_terminator(*_STD _Prev_iter(_First_arg))) {
4101
+ if constexpr (sizeof(_Elem) == 1) {
4102
+ _First_arg = _STD find_first_of(
4103
+ _First_arg, _Last, _Line_terminators_char, _STD end(_Line_terminators_char));
4104
+ } else {
4105
+ _First_arg = _STD find_first_of(
4106
+ _First_arg, _Last, _Line_terminators_wchar_t, _STD end(_Line_terminators_wchar_t));
4107
+ }
4108
+
4071
4109
if (_First_arg != _Last) {
4072
4110
++_First_arg;
4073
4111
}
4074
4112
}
4075
4113
4076
4114
return _First_arg;
4115
+ } else {
4116
+ // non-multiline mode: never matches because --_First_arg is valid
4117
+ return _Last;
4077
4118
}
4078
4119
4079
4120
case _N_eol:
4080
- return _STD find(_First_arg, _Last, _Meta_nl);
4121
+ if (_Sflags & regex_constants::multiline) {
4122
+ // multiline mode: matches at next line terminator or end of input
4123
+ if constexpr (sizeof(_Elem) == 1) {
4124
+ return _STD find_first_of(
4125
+ _First_arg, _Last, _Line_terminators_char, _STD end(_Line_terminators_char));
4126
+ } else {
4127
+ return _STD find_first_of(
4128
+ _First_arg, _Last, _Line_terminators_wchar_t, _STD end(_Line_terminators_wchar_t));
4129
+ }
4130
+ } else {
4131
+ return _Last; // non-multiline mode: matches at end of input or not at all
4132
+ }
4081
4133
4082
4134
case _N_str:
4083
4135
{ // check for string match
0 commit comments