Skip to content

Commit 5a1cbde

Browse files
<regex>: Allow initial ] to start character ranges in POSIX regular expressions (#5364)
Co-authored-by: Stephan T. Lavavej <[email protected]>
1 parent 13f1f16 commit 5a1cbde

File tree

2 files changed

+51
-9
lines changed

2 files changed

+51
-9
lines changed

stl/inc/regex

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1734,7 +1734,7 @@ private:
17341734
void _Do_ex_class(_Meta_type);
17351735
bool _CharacterClassEscape(bool);
17361736
_Prs_ret _ClassEscape2();
1737-
_Prs_ret _ClassAtom();
1737+
_Prs_ret _ClassAtom(bool);
17381738
void _ClassRanges();
17391739
void _CharacterClass();
17401740
bool _IdentityEscape();
@@ -4129,7 +4129,7 @@ _Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassEscape2() { // check for class
41294129
}
41304130

41314131
template <class _FwdIt, class _Elem, class _RxTraits>
4132-
_Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassAtom() { // check for class atom
4132+
_Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassAtom(const bool _Initial) { // check for class atom
41334133
if (_Mchar == _Meta_esc) { // check for valid escape sequence
41344134
_Next();
41354135
if (_L_flags & _L_grp_esc) {
@@ -4152,7 +4152,10 @@ _Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassAtom() { // check for class at
41524152
_Val = _Meta_lsq;
41534153
return _Prs_chr;
41544154
}
4155-
} else if (_Mchar == _Meta_rsq || _Mchar == _Meta_eos) {
4155+
} else if ((_Mchar == _Meta_rsq
4156+
&& (!(_L_flags & _L_brk_rstr)
4157+
|| !_Initial)) // initial ] does not close the class when it is not special
4158+
|| _Mchar == _Meta_eos) {
41564159
return _Prs_none;
41574160
} else { // handle ordinary character
41584161
_Val = _Char;
@@ -4165,10 +4168,12 @@ template <class _FwdIt, class _Elem, class _RxTraits>
41654168
void _Parser<_FwdIt, _Elem, _RxTraits>::_ClassRanges() { // check for valid class ranges
41664169
_Prs_ret _Ret;
41674170

4171+
bool _Initial = true;
41684172
for (;;) { // process characters through end of bracket expression
4169-
if ((_Ret = _ClassAtom()) == _Prs_none) {
4173+
if ((_Ret = _ClassAtom(_Initial)) == _Prs_none) {
41704174
return;
41714175
}
4176+
_Initial = false;
41724177

41734178
if (_Ret == _Prs_chr && _Val == 0 && !(_L_flags & _L_bzr_chr)) {
41744179
_Error(regex_constants::error_escape);
@@ -4178,7 +4183,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_ClassRanges() { // check for valid clas
41784183
_Next();
41794184
_Elem _Chr1 = static_cast<_Elem>(_Val);
41804185
const bool _Set_preceding = _Ret == _Prs_set;
4181-
if ((_Ret = _ClassAtom()) == _Prs_none) { // treat - as ordinary character
4186+
if ((_Ret = _ClassAtom(false)) == _Prs_none) { // treat - as ordinary character
41824187
if (!_Set_preceding) {
41834188
_Nfa._Add_char_to_class(_Chr1);
41844189
}
@@ -4227,10 +4232,6 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_CharacterClass() { // add bracket expre
42274232
_Next();
42284233
}
42294234

4230-
if ((_L_flags & _L_brk_rstr) && _Mchar == _Meta_rsq) { // insert initial ] when not special
4231-
_Nfa._Add_char_to_class(_Meta_rsq);
4232-
_Next();
4233-
}
42344235
_ClassRanges();
42354236
}
42364237

tests/std/tests/VSO_0000000_regex_use/test.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1360,6 +1360,46 @@ void test_gh_5362() {
13601360
test_gh_5362_grep();
13611361
}
13621362

1363+
void test_gh_5364() {
1364+
// GH-5364 <regex>: Allow initial ] to start character ranges in POSIX regular expressions
1365+
for (syntax_option_type option : {basic, extended, awk, grep, egrep}) {
1366+
g_regexTester.should_match("]", "[]-_]", option);
1367+
g_regexTester.should_match("^", "[]-_]", option);
1368+
g_regexTester.should_match("_", "[]-_]", option);
1369+
g_regexTester.should_not_match("-", "[]-_]", option);
1370+
1371+
g_regexTester.should_not_match("]", "[^]-_]", option);
1372+
g_regexTester.should_not_match("^", "[^]-_]", option);
1373+
g_regexTester.should_not_match("_", "[^]-_]", option);
1374+
g_regexTester.should_match("-", "[^]-_]", option);
1375+
1376+
g_regexTester.should_match("]", "[]a]", option);
1377+
g_regexTester.should_match("a", "[]a]", option);
1378+
g_regexTester.should_not_match("_", "[]a]", option);
1379+
g_regexTester.should_not_match("a]", "[]a]", option);
1380+
g_regexTester.should_not_match("]a", "[]a]", option);
1381+
g_regexTester.should_not_match("__", "[]a]", option);
1382+
1383+
g_regexTester.should_not_match("]", "[^]a]", option);
1384+
g_regexTester.should_not_match("a", "[^]a]", option);
1385+
g_regexTester.should_match("_", "[^]a]", option);
1386+
g_regexTester.should_not_match("a]", "[^]a]", option);
1387+
g_regexTester.should_not_match("]a", "[^]a]", option);
1388+
g_regexTester.should_not_match("__", "[^]a]", option);
1389+
1390+
g_regexTester.should_throw("[]", error_brack, option);
1391+
g_regexTester.should_throw("[^]", error_brack, option);
1392+
}
1393+
1394+
g_regexTester.should_throw("[]-_]", error_brack, ECMAScript);
1395+
g_regexTester.should_throw("[^]-_]", error_brack, ECMAScript);
1396+
g_regexTester.should_throw("[]a]", error_brack, ECMAScript);
1397+
g_regexTester.should_throw("[^]a]", error_brack, ECMAScript);
1398+
1399+
g_regexTester.should_not_match("c", "[]", ECMAScript);
1400+
g_regexTester.should_match("c", "[^]", ECMAScript);
1401+
}
1402+
13631403
int main() {
13641404
test_dev10_449367_case_insensitivity_should_work();
13651405
test_dev11_462743_regex_collate_should_not_disable_regex_icase();
@@ -1398,6 +1438,7 @@ int main() {
13981438
test_gh_5214();
13991439
test_gh_5253();
14001440
test_gh_5362();
1441+
test_gh_5364();
14011442

14021443
return g_regexTester.result();
14031444
}

0 commit comments

Comments
 (0)