Skip to content

Commit 63fe8a2

Browse files
<regex>: Make negated character classes not match characters not included in the negated character class (#5214)
Co-authored-by: Stephan T. Lavavej <[email protected]>
1 parent 30dd267 commit 63fe8a2

File tree

2 files changed

+26
-7
lines changed

2 files changed

+26
-7
lines changed

stl/inc/regex

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1524,7 +1524,7 @@ public:
15241524
void _Add_class();
15251525
void _Add_char_to_class(_Elem _Ch);
15261526
void _Add_range2(_Elem, _Elem);
1527-
void _Add_named_class(_Regex_traits_base::char_class_type, bool = false);
1527+
void _Add_named_class(typename _RxTraits::char_class_type, bool);
15281528
void _Add_equiv(_FwdIt, _FwdIt, _Difft);
15291529
void _Add_coll(_FwdIt, _FwdIt, _Difft);
15301530
_Node_base* _Begin_group();
@@ -1548,7 +1548,7 @@ private:
15481548
bool _Beg_expr(_Node_base*) const;
15491549
void _Add_char_to_bitmap(_Elem _Ch);
15501550
void _Add_char_to_array(_Elem _Ch);
1551-
void _Add_elts(_Node_class<_Elem, _RxTraits>*, _Regex_traits_base::char_class_type, bool);
1551+
void _Add_elts(_Node_class<_Elem, _RxTraits>*, typename _RxTraits::char_class_type, bool);
15521552
void _Char_to_elts(_FwdIt, _FwdIt, _Difft, _Sequence<_Elem>**);
15531553

15541554
_Root_node* _Root;
@@ -2935,7 +2935,7 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_range2(const _Elem _Arg0, const _E
29352935

29362936
template <class _FwdIt, class _Elem, class _RxTraits>
29372937
void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_elts(
2938-
_Node_class<_Elem, _RxTraits>* _Node, _Regex_traits_base::char_class_type _Cl, bool _Negate) {
2938+
_Node_class<_Elem, _RxTraits>* _Node, typename _RxTraits::char_class_type _Cl, bool _Negate) {
29392939
// add characters in named class to set
29402940
for (unsigned int _Ch = 0; _Ch < _Bmp_max; ++_Ch) { // add elements or their inverse
29412941
bool _Matches = _Traits.isctype(static_cast<_Elem>(_Ch), _Cl);
@@ -2950,12 +2950,12 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_elts(
29502950
}
29512951

29522952
template <class _FwdIt, class _Elem, class _RxTraits>
2953-
void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_named_class(typename _Regex_traits_base::char_class_type _Cl,
2953+
void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_named_class(typename _RxTraits::char_class_type _Cl,
29542954
bool _Negate) { // add contents of named class to bracket expression
29552955
_Node_class<_Elem, _RxTraits>* _Node = static_cast<_Node_class<_Elem, _RxTraits>*>(_Current);
29562956
_Add_elts(_Node, _Cl, _Negate);
2957-
if (_Bmp_max < static_cast<unsigned int>(_STD _Max_limit<_Elem>())) {
2958-
_Node->_Classes = static_cast<_Regex_traits_base::char_class_type>(_Node->_Classes | _Cl);
2957+
if (_Bmp_max <= _STD _Max_limit<typename _RxTraits::_Uelem>() && !_Negate) {
2958+
_Node->_Classes = static_cast<typename _RxTraits::char_class_type>(_Node->_Classes | _Cl);
29592959
}
29602960
}
29612961

@@ -4023,7 +4023,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Do_ex_class(
40234023
_Error(regex_constants::error_ctype);
40244024
}
40254025

4026-
_Nfa._Add_named_class(_Cls);
4026+
_Nfa._Add_named_class(_Cls, false);
40274027
} else if (_End_arg == _Meta_equal) { // process =
40284028
if (_Beg == _Pat) {
40294029
_Error(regex_constants::error_collate);

tests/std/tests/VSO_0000000_regex_use/test.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,24 @@ void test_gh_5192() {
730730
}
731731
}
732732

733+
void test_gh_5214() {
734+
// GH-5214 makes negated character class escapes not match characters not included in the negated character class
735+
{
736+
const test_wregex neg_word_regex(&g_regexTester, LR"([\W])");
737+
neg_word_regex.should_search_fail(L"\u0100"); // U+0100 LATIN CAPITAL LETTER A WITH MACRON
738+
}
739+
740+
{
741+
const test_wregex neg_space_regex(&g_regexTester, LR"([\S])");
742+
neg_space_regex.should_search_fail(L"\u2028"); // U+2028 LINE SEPARATOR
743+
}
744+
745+
{
746+
const test_wregex neg_digit_regex(&g_regexTester, LR"([\D])");
747+
neg_digit_regex.should_search_fail(L"\u0662"); // U+0662 ARABIC-INDIC DIGIT TWO
748+
}
749+
}
750+
733751
int main() {
734752
test_dev10_449367_case_insensitivity_should_work();
735753
test_dev11_462743_regex_collate_should_not_disable_regex_icase();
@@ -763,6 +781,7 @@ int main() {
763781
test_gh_5160();
764782
test_gh_5167();
765783
test_gh_5192();
784+
test_gh_5214();
766785

767786
return g_regexTester.result();
768787
}

0 commit comments

Comments
 (0)