@@ -636,6 +636,91 @@ void test_gh_731() {
636
636
}
637
637
}
638
638
639
+ void test_gh_992 () {
640
+ // GH-992 <regex> mishandles locale-based character classes outside of the char range
641
+ {
642
+ const test_wregex neg_w_regex (&g_regexTester, LR"( Y[\W]*Z)" );
643
+ neg_w_regex.should_search_match (L" xxxY Zxxx" , L" Y Z" );
644
+ neg_w_regex.should_search_match (L" xxxY \x2009 Zxxx" , L" Y \x2009 Z" ); // U+2009 THIN SPACE
645
+ neg_w_regex.should_search_fail (L" xxxY \x0078 Zxxx" ); // U+0078 LATIN SMALL LETTER X
646
+ neg_w_regex.should_search_fail (L" xxxY \x03C7 Zxxx" ); // U+03C7 GREEK SMALL LETTER CHI
647
+ neg_w_regex.should_search_fail (L" xxxY 3 Zxxx" );
648
+ neg_w_regex.should_search_fail (L" xxxY \x0662 Zxxx" ); // U+0662 ARABIC-INDIC DIGIT TWO
649
+ }
650
+ {
651
+ const test_wregex neg_s_regex (&g_regexTester, LR"( Y[\S]*Z)" );
652
+ neg_s_regex.should_search_match (L" xxxYxx\x0078 xxxZxxx" , L" Yxx\x0078 xxxZ" ); // U+0078 LATIN SMALL LETTER X
653
+ neg_s_regex.should_search_match (L" xxxYxx\x03C7 xxxZxxx" , L" Yxx\x03C7 xxxZ" ); // U+03C7 GREEK SMALL LETTER CHI
654
+ neg_s_regex.should_search_match (L" xxxYxx3xxxZxxx" , L" Yxx3xxxZ" );
655
+ neg_s_regex.should_search_match (L" xxxYxx\x0662 xxxZxxx" , L" Yxx\x0662 xxxZ" ); // U+0662 ARABIC-INDIC DIGIT TWO
656
+ neg_s_regex.should_search_fail (L" xxxYxx xxxZxxx" );
657
+ neg_s_regex.should_search_fail (L" xxxYxx\x2009 xxxZxxx" ); // U+2009 THIN SPACE
658
+ }
659
+ for (const wstring& pattern : {LR"( Y[\D]*Z)" , LR"( Y[\W\D]*Z)" }) {
660
+ const test_wregex neg_d_regex (&g_regexTester, pattern);
661
+ neg_d_regex.should_search_match (L" xxxYxx\x0078 xxxZxxx" , L" Yxx\x0078 xxxZ" ); // U+0078 LATIN SMALL LETTER X
662
+ neg_d_regex.should_search_match (L" xxxYxx\x03C7 xxxZxxx" , L" Yxx\x03C7 xxxZ" ); // U+03C7 GREEK SMALL LETTER CHI
663
+ neg_d_regex.should_search_match (L" xxxYxx xxxZxxx" , L" Yxx xxxZ" );
664
+ neg_d_regex.should_search_match (L" xxxYxx\x2009 xxxZxxx" , L" Yxx\x2009 xxxZ" ); // U+2009 THIN SPACE
665
+ neg_d_regex.should_search_fail (L" xxxYxx3xxxZxxx" );
666
+ neg_d_regex.should_search_fail (L" xxxYxx\x0662 xxxZxxx" ); // U+0662 ARABIC-INDIC DIGIT TWO
667
+ }
668
+ for (const wstring& pattern : {LR"( Y[\W\S]*Z)" , LR"( Y[\S\D]*Z)" , LR"( Y[\W\S\D]*Z)" }) {
669
+ const test_wregex class_matches_all_regex (&g_regexTester, pattern);
670
+ class_matches_all_regex.should_search_match (
671
+ L" xxxYxx\x0078 xxxZxxx" , L" Yxx\x0078 xxxZ" ); // U+0078 LATIN SMALL LETTER X
672
+ class_matches_all_regex.should_search_match (
673
+ L" xxxYxx\x03C7 xxxZxxx" , L" Yxx\x03C7 xxxZ" ); // U+03C7 GREEK SMALL LETTER CHI
674
+ class_matches_all_regex.should_search_match (L" xxxYxx xxxZxxx" , L" Yxx xxxZ" );
675
+ class_matches_all_regex.should_search_match (L" xxxYxx\x2009 xxxZxxx" , L" Yxx\x2009 xxxZ" ); // U+2009 THIN SPACE
676
+ class_matches_all_regex.should_search_match (L" xxxYxx3xxxZxxx" , L" Yxx3xxxZ" );
677
+ class_matches_all_regex.should_search_match (
678
+ L" xxxYxx\x0662 xxxZxxx" , L" Yxx\x0662 xxxZ" ); // U+0662 ARABIC-INDIC DIGIT TWO
679
+ }
680
+ {
681
+ const test_wregex neg_w_regex_skip (&g_regexTester, LR"( [\W])" );
682
+ neg_w_regex_skip.should_search_match (L" xxxx\x2009 xxxx" , L" \x2009 " ); // U+2009 THIN SPACE
683
+ neg_w_regex_skip.should_search_fail (L" xxxx\x03C7 xxxx" ); // U+03C7 GREEK SMALL LETTER CHI
684
+ neg_w_regex_skip.should_search_fail (L" xxxx\x0662 xxxx" ); // U+0662 ARABIC-INDIC DIGIT TWO
685
+ }
686
+ {
687
+ const test_wregex neg_s_regex_skip (&g_regexTester, LR"( [\S])" );
688
+ neg_s_regex_skip.should_search_match (L" \x03C7 " , L" \x03C7 " ); // U+03C7 GREEK SMALL LETTER CHI
689
+ neg_s_regex_skip.should_search_match (L" \x0662 " , L" \x0662 " ); // U+0662 ARABIC-INDIC DIGIT TWO
690
+ neg_s_regex_skip.should_search_fail (L" \x2009 " ); // U+2009 THIN SPACE
691
+ }
692
+ {
693
+ const test_wregex neg_d_regex_skip (&g_regexTester, LR"( [\D])" );
694
+ neg_d_regex_skip.should_search_match (L" 1623\x03C7 " s + L" 253" , L" \x03C7 " ); // U+03C7 GREEK SMALL LETTER CHI
695
+ neg_d_regex_skip.should_search_match (L" 1623\x2009 " s + L" 253" , L" \x2009 " ); // U+2009 THIN SPACE
696
+ neg_d_regex_skip.should_search_fail (L" 1623\x0662 " s + L" 253" ); // U+0662 ARABIC-INDIC DIGIT TWO
697
+ }
698
+ {
699
+ const test_wregex double_negative_w (&g_regexTester, LR"( [^\W])" );
700
+ double_negative_w.should_search_match (L" \x03C7 " , L" \x03C7 " ); // U+03C7 GREEK SMALL LETTER CHI
701
+ double_negative_w.should_search_match (L" \x0662 " , L" \x0662 " ); // U+0662 ARABIC-INDIC DIGIT TWO
702
+ double_negative_w.should_search_fail (L" \x2009 " ); // U+2009 THIN SPACE
703
+ }
704
+ {
705
+ const test_wregex double_negative_s (&g_regexTester, LR"( [^\S])" );
706
+ double_negative_s.should_search_fail (L" \x03C7 " ); // U+03C7 GREEK SMALL LETTER CHI
707
+ double_negative_s.should_search_fail (L" \x0662 " ); // U+0662 ARABIC-INDIC DIGIT TWO
708
+ double_negative_s.should_search_match (L" \x2009 " , L" \x2009 " ); // U+2009 THIN SPACE
709
+ }
710
+ {
711
+ const test_wregex double_negative_d (&g_regexTester, LR"( [^\D])" );
712
+ double_negative_d.should_search_fail (L" \x03C7 " ); // U+03C7 GREEK SMALL LETTER CHI
713
+ double_negative_d.should_search_match (L" \x0662 " , L" \x0662 " ); // U+0662 ARABIC-INDIC DIGIT TWO
714
+ double_negative_d.should_search_fail (L" \x2009 " ); // U+2009 THIN SPACE
715
+ }
716
+ for (const wstring& pattern : {LR"( [\w\W])" , LR"( [\s\S])" , LR"( [\d\D])" }) {
717
+ const test_wregex omni_regex (&g_regexTester, pattern);
718
+ omni_regex.should_search_match (L" \x03C7 " , L" \x03C7 " ); // U+03C7 GREEK SMALL LETTER CHI
719
+ omni_regex.should_search_match (L" \x0662 " , L" \x0662 " ); // U+0662 ARABIC-INDIC DIGIT TWO
720
+ omni_regex.should_search_match (L" \x2009 " , L" \x2009 " ); // U+2009 THIN SPACE
721
+ }
722
+ }
723
+
639
724
void test_gh_993 () {
640
725
// GH-993 regex::icase is not handled correctly for some input.
641
726
{
@@ -751,11 +836,51 @@ void test_gh_5058() {
751
836
void test_gh_5160 () {
752
837
// GH-5160 fixed mishandled negated character class escapes
753
838
// outside character class definitions
754
- const test_wregex neg_regex (&g_regexTester, LR"( Y\S*Z)" );
755
- neg_regex.should_search_match (L" xxxYxx\x0078 xxxZxxx" , L" Yxx\x0078 xxxZ" ); // U+0078 LATIN SMALL LETTER X
756
- neg_regex.should_search_match (L" xxxYxx\x03C7 xxxZxxx" , L" Yxx\x03C7 xxxZ" ); // U+03C7 GREEK SMALL LETTER CHI
757
- neg_regex.should_search_fail (L" xxxYxx xxxZxxx" );
758
- neg_regex.should_search_fail (L" xxxYxx\x2009 xxxZxxx" ); // U+2009 THIN SPACE
839
+ {
840
+ const test_wregex neg_w_regex (&g_regexTester, LR"( Y\W*Z)" );
841
+ neg_w_regex.should_search_match (L" xxxY Zxxx" , L" Y Z" );
842
+ neg_w_regex.should_search_match (L" xxxY \x2009 Zxxx" , L" Y \x2009 Z" ); // U+2009 THIN SPACE
843
+ neg_w_regex.should_search_fail (L" xxxY \x0078 Zxxx" ); // U+0078 LATIN SMALL LETTER X
844
+ neg_w_regex.should_search_fail (L" xxxY \x03C7 Zxxx" ); // U+03C7 GREEK SMALL LETTER CHI
845
+ neg_w_regex.should_search_fail (L" xxxY 3 Zxxx" );
846
+ neg_w_regex.should_search_fail (L" xxxY \x0662 Zxxx" ); // U+0662 ARABIC-INDIC DIGIT TWO
847
+ }
848
+ {
849
+ const test_wregex neg_s_regex (&g_regexTester, LR"( Y\S*Z)" );
850
+ neg_s_regex.should_search_match (L" xxxYxx\x0078 xxxZxxx" , L" Yxx\x0078 xxxZ" ); // U+0078 LATIN SMALL LETTER X
851
+ neg_s_regex.should_search_match (L" xxxYxx\x03C7 xxxZxxx" , L" Yxx\x03C7 xxxZ" ); // U+03C7 GREEK SMALL LETTER CHI
852
+ neg_s_regex.should_search_match (L" xxxYxx3xxxZxxx" , L" Yxx3xxxZ" );
853
+ neg_s_regex.should_search_match (L" xxxYxx\x0662 xxxZxxx" , L" Yxx\x0662 xxxZ" ); // U+0662 ARABIC-INDIC DIGIT TWO
854
+ neg_s_regex.should_search_fail (L" xxxYxx xxxZxxx" );
855
+ neg_s_regex.should_search_fail (L" xxxYxx\x2009 xxxZxxx" ); // U+2009 THIN SPACE
856
+ }
857
+ {
858
+ const test_wregex neg_d_regex (&g_regexTester, LR"( Y\D*Z)" );
859
+ neg_d_regex.should_search_match (L" xxxYxx\x0078 xxxZxxx" , L" Yxx\x0078 xxxZ" ); // U+0078 LATIN SMALL LETTER X
860
+ neg_d_regex.should_search_match (L" xxxYxx\x03C7 xxxZxxx" , L" Yxx\x03C7 xxxZ" ); // U+03C7 GREEK SMALL LETTER CHI
861
+ neg_d_regex.should_search_match (L" xxxYxx xxxZxxx" , L" Yxx xxxZ" );
862
+ neg_d_regex.should_search_match (L" xxxYxx\x2009 xxxZxxx" , L" Yxx\x2009 xxxZ" ); // U+2009 THIN SPACE
863
+ neg_d_regex.should_search_fail (L" xxxYxx3xxxZxxx" );
864
+ neg_d_regex.should_search_fail (L" xxxYxx\x0662 xxxZxxx" ); // U+0662 ARABIC-INDIC DIGIT TWO
865
+ }
866
+ {
867
+ const test_wregex neg_w_regex_skip (&g_regexTester, LR"( \W)" );
868
+ neg_w_regex_skip.should_search_match (L" xxxx\x2009 xxxx" , L" \x2009 " ); // U+2009 THIN SPACE
869
+ neg_w_regex_skip.should_search_fail (L" xxxx\x03C7 xxxx" ); // U+03C7 GREEK SMALL LETTER CHI
870
+ neg_w_regex_skip.should_search_fail (L" xxxx\x0662 xxxx" ); // U+0662 ARABIC-INDIC DIGIT TWO
871
+ }
872
+ {
873
+ const test_wregex neg_s_regex_skip (&g_regexTester, LR"( \S)" );
874
+ neg_s_regex_skip.should_search_match (L" \x03C7 " , L" \x03C7 " ); // U+03C7 GREEK SMALL LETTER CHI
875
+ neg_s_regex_skip.should_search_match (L" \x0662 " , L" \x0662 " ); // U+0662 ARABIC-INDIC DIGIT TWO
876
+ neg_s_regex_skip.should_search_fail (L" \x2009 " ); // U+2009 THIN SPACE
877
+ }
878
+ {
879
+ const test_wregex neg_d_regex_skip (&g_regexTester, LR"( \D)" );
880
+ neg_d_regex_skip.should_search_match (L" 1623\x03C7 " s + L" 253" , L" \x03C7 " ); // U+03C7 GREEK SMALL LETTER CHI
881
+ neg_d_regex_skip.should_search_match (L" 1623\x2009 " s + L" 253" , L" \x2009 " ); // U+2009 THIN SPACE
882
+ neg_d_regex_skip.should_search_fail (L" 1623\x0662 " s + L" 253" ); // U+0662 ARABIC-INDIC DIGIT TWO
883
+ }
759
884
}
760
885
761
886
void test_gh_5165_syntax_option (const syntax_option_type basic_or_grep) {
@@ -1526,6 +1651,7 @@ int main() {
1526
1651
test_VSO_226914_word_boundaries ();
1527
1652
test_construction_from_nullptr_and_zero ();
1528
1653
test_gh_731 ();
1654
+ test_gh_992 ();
1529
1655
test_gh_993 ();
1530
1656
test_gh_4995 ();
1531
1657
test_gh_5058 ();
0 commit comments