microsoft · StephanTLavavej · Jun 14, 2025 · May 23, 2025 · Jun 9, 2025 · Jun 9, 2025
@@ -31,6 +31,7 @@ void bm_lorem_search(benchmark::State& state, const char* pattern) {
     }
 }
 
+BENCHMARK_CAPTURE(bm_lorem_search, "^bibe", "^bibe")->Arg(2)->Arg(3)->Arg(4);
 BENCHMARK_CAPTURE(bm_lorem_search, "bibe", "bibe")->Arg(2)->Arg(3)->Arg(4);
 BENCHMARK_CAPTURE(bm_lorem_search, "(bibe)", "(bibe)")->Arg(2)->Arg(3)->Arg(4);
 BENCHMARK_CAPTURE(bm_lorem_search, "(bibe)+", "(bibe)+")->Arg(2)->Arg(3)->Arg(4);

@@ -33,6 +33,16 @@ _STL_DISABLE_CLANG_WARNINGS
 #pragma push_macro("new")
 #undef new
 
+// Controls whether LWG-2503 "multiline option should be added to syntax_option_type" is implemented.
+// Defining this to 0 requests Standard behavior:
+// * For ECMAScript, matching is non-multiline by default, but regex_constants::multiline can be requested.
+// * For POSIX grammars, matching is non-multiline, and regex_constants::multiline is ignored (N5008 [tab:re.synopt]).
+// Defining this to 1 requests legacy behavior:
+// * For all grammars, matching is multiline, and regex_constants::multiline is redundant.
+#ifndef _REGEX_LEGACY_MULTILINE_MODE
+#define _REGEX_LEGACY_MULTILINE_MODE 0
+#endif
+
 #ifndef _REGEX_MAX_COMPLEXITY_COUNT
 #define _REGEX_MAX_COMPLEXITY_COUNT 10000000L // set to 0 to disable
 #endif // !defined(_REGEX_MAX_COMPLEXITY_COUNT)
@@ -121,10 +131,11 @@ namespace regex_constants {
         _Gmask     = 0x3F,
         _Any_posix = basic | extended | grep | egrep | awk,
 
-        icase    = 0x0100,
-        nosubs   = 0x0200,
-        optimize = 0x0400,
-        collate  = 0x0800
+        icase     = 0x0100,
+        nosubs    = 0x0200,
+        optimize  = 0x0400,
+        collate   = 0x0800,
+        multiline = 0x1000
     };
 
     _BITMASK_OPS(_EXPORT_STD, syntax_option_type)
@@ -1666,6 +1677,15 @@ public:
         if (_Re->_Flags & _Fl_begin_needs_d) {
             _Char_class_d = _Lookup_char_class(static_cast<_Elem>('D'));
         }
+
+// sanitize multiline mode setting
+#if _REGEX_LEGACY_MULTILINE_MODE
+        _Sflags |= regex_constants::multiline; // old matcher applied multiline mode for all grammars
+#else // ^^^ _REGEX_LEGACY_MULTILINE_MODE / !_REGEX_LEGACY_MULTILINE_MODE vvv
+        if (_Sflags & regex_constants::_Any_posix) { // multiline mode is ECMAScript-only
+            _Sflags &= ~regex_constants::multiline;
+        }
+#endif // ^^^ !_REGEX_LEGACY_MULTILINE_MODE ^^^
     }
 
     void _Setf(regex_constants::match_flag_type _Mf) { // set specified flags
@@ -1920,6 +1940,7 @@ public:
     static constexpr flag_type awk        = regex_constants::awk;
     static constexpr flag_type grep       = regex_constants::grep;
     static constexpr flag_type egrep      = regex_constants::egrep;
+    static constexpr flag_type multiline  = regex_constants::multiline;
 
     basic_regex() = default; // construct empty object
 
@@ -3833,6 +3854,11 @@ typename _RxTraits::char_class_type _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Al
     return _Traits.lookup_classname(_Ptr, _Ptr + 1, (_Sflags & regex_constants::icase) != 0);
 }
 
+template <class _Elem>
+bool _Is_ecmascript_line_terminator(_Elem _Ch) {
+    return _Ch == _Meta_nl || _Ch == _Meta_cr || _Ch == _Meta_ls || _Ch == _Meta_ps;
+}
+
 template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
 bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _Nx) { // check for match
     if (0 < _Max_stack_count && --_Max_stack_count <= 0) {
@@ -3852,18 +3878,19 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
         case _N_bol:
             if ((_Mflags & regex_constants::match_prev_avail)
                 || _Tgt_state._Cur != _Begin) { // if --_Cur is valid, check for preceding newline
-                _Failed = *_Prev_iter(_Tgt_state._Cur) != _Meta_nl;
+                _Failed = !(_Sflags & regex_constants::multiline)
+                       || !_STD _Is_ecmascript_line_terminator(*_STD _Prev_iter(_Tgt_state._Cur));
             } else {
                 _Failed = (_Mflags & regex_constants::match_not_bol) != 0;
             }
-
             break;
 
         case _N_eol:
             if (_Tgt_state._Cur == _End) {
                 _Failed = (_Mflags & regex_constants::match_not_eol) != 0;
             } else {
-                _Failed = *_Tgt_state._Cur != _Meta_nl;
+                _Failed =
+                    !(_Sflags & regex_constants::multiline) || !_STD _Is_ecmascript_line_terminator(*_Tgt_state._Cur);
             }
 
             break;
@@ -3881,7 +3908,7 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
                     if (_Ch == _Elem()) {
                         _Failed = true;
                     }
-                } else if (_Ch == _Meta_nl || _Ch == _Meta_cr || _Ch == _Meta_ls || _Ch == _Meta_ps) { // ECMAScript
+                } else if (_STD _Is_ecmascript_line_terminator(_Ch)) {
                     _Failed = true;
                 }
 
@@ -4054,30 +4081,55 @@ template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
 _BidIt _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Skip(_BidIt _First_arg, _BidIt _Last, _Node_base* _Node_arg) {
     // skip until possible match
     // assumes --_First_arg is valid
-    _Node_base* _Nx = _Node_arg ? _Node_arg : _Rep;
+    static constexpr char _Line_terminators_char[]       = {static_cast<char>(_Meta_cr), static_cast<char>(_Meta_nl)};
+    static constexpr wchar_t _Line_terminators_wchar_t[] = {static_cast<wchar_t>(_Meta_cr),
+        static_cast<wchar_t>(_Meta_nl), static_cast<wchar_t>(_Meta_ls), static_cast<wchar_t>(_Meta_ps)};
+    _Node_base* _Nx                                      = _Node_arg ? _Node_arg : _Rep;
 
     while (_First_arg != _Last && _Nx) { // check current node
         switch (_Nx->_Kind) { // handle current node's type
         case _N_nop:
             break;
 
-        case _N_bol:
-            { // check for embedded newline
-              // return iterator to character just after the newline; for input like "\nabc"
-              // matching "^abc", _First_arg could be pointing at 'a', so we need to check
-              // --_First_arg for '\n'
-                if (*_Prev_iter(_First_arg) != _Meta_nl) {
-                    _First_arg = _STD find(_First_arg, _Last, _Meta_nl);
+        case _N_bol: // check for beginning anchor
+            if (_Sflags & regex_constants::multiline) {
+                // multiline mode: check for embedded line terminator
+                // return iterator to character just after the newline; for input like "\nabc"
+                // matching "^abc", _First_arg could be pointing at 'a', so we need to check
+                // --_First_arg for '\n'
+                if (!_STD _Is_ecmascript_line_terminator(*_STD _Prev_iter(_First_arg))) {
+                    if constexpr (sizeof(_Elem) == 1) {
+                        _First_arg = _STD find_first_of(
+                            _First_arg, _Last, _Line_terminators_char, _STD end(_Line_terminators_char));
+                    } else {
+                        _First_arg = _STD find_first_of(
+                            _First_arg, _Last, _Line_terminators_wchar_t, _STD end(_Line_terminators_wchar_t));
+                    }
+
                     if (_First_arg != _Last) {
                         ++_First_arg;
                     }
                 }
 
                 return _First_arg;
+            } else {
+                // non-multiline mode: never matches because --_First_arg is valid
+                return _Last;
             }
 
         case _N_eol:
-            return _STD find(_First_arg, _Last, _Meta_nl);
+            if (_Sflags & regex_constants::multiline) {
+                // multiline mode: matches at next line terminator or end of input
+                if constexpr (sizeof(_Elem) == 1) {
+                    return _STD find_first_of(
+                        _First_arg, _Last, _Line_terminators_char, _STD end(_Line_terminators_char));
+                } else {
+                    return _STD find_first_of(
+                        _First_arg, _Last, _Line_terminators_wchar_t, _STD end(_Line_terminators_wchar_t));
+                }
+            } else {
+                return _Last; // non-multiline mode: matches at end of input or not at all
+            }
 
         case _N_str:
             { // check for string match

@@ -575,12 +575,6 @@ std/utilities/meta/meta.unary/meta.unary.prop/is_implicit_lifetime.pass.cpp FAIL
 
 
 # *** MISSING LWG ISSUE RESOLUTIONS ***
-# LWG-2503 "multiline option should be added to syntax_option_type"
-std/re/re.alg/re.alg.search/no_update_pos.pass.cpp FAIL
-std/re/re.const/re.matchflag/match_multiline.pass.cpp FAIL
-std/re/re.const/re.matchflag/match_not_eol.pass.cpp FAIL
-std/re/re.const/re.synopt/syntax_option_type.pass.cpp FAIL
-
 # LWG-2532 "Satisfying a promise at thread exit" (Open)
 std/thread/futures/futures.promise/set_exception_at_thread_exit.pass.cpp FAIL
 std/thread/futures/futures.promise/set_lvalue_at_thread_exit.pass.cpp FAIL

@@ -154,6 +154,7 @@ tests\Dev11_1140665_unique_ptr_array_conversions
 tests\Dev11_1150223_shared_mutex
 tests\Dev11_1158803_regex_thread_safety
 tests\Dev11_1180290_filesystem_error_code
+tests\GH_000073_regex_multiline_escape_hatch
 tests\GH_000140_adl_proof_comparison
 tests\GH_000140_adl_proof_construction
 tests\GH_000140_adl_proof_views

@@ -0,0 +1,4 @@
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+RUNALL_INCLUDE ..\usual_matrix.lst
@@ -0,0 +1,107 @@
+// Copyright (c) Microsoft Corporation.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#define _REGEX_LEGACY_MULTILINE_MODE 1
+
+#include <cstddef>
+#include <cstdio>
+#include <regex>
+#include <string>
+
+#include <test_regex_support.hpp>
+
+using namespace std;
+using namespace std::regex_constants;
+
+regex_fixture g_regexTester;
+
+void test_VSO_225160_match_bol_flag() {
+    // Old tests for caret anchor in default multiline mode
+    for (syntax_option_type syntax : {syntax_option_type{}, ECMAScript, basic, grep, extended, egrep, awk}) {
+        const test_regex emptyAnchor(&g_regexTester, R"(^)", syntax);
+        emptyAnchor.should_search_match("", "");
+        emptyAnchor.should_search_fail("", match_not_bol);
+        emptyAnchor.should_search_match("\n", "");
+        emptyAnchor.should_search_match("\n", "", match_not_bol);
+
+        const test_regex beginCd(&g_regexTester, R"(^cd)", syntax);
+        beginCd.should_search_match("ab\ncdefg", "cd");
+        beginCd.should_search_match("ab\ncdefg", "cd", match_not_bol);
+
+        beginCd.should_search_match("cdefg", "cd");
+        beginCd.should_search_fail("cdefg", match_not_bol);
+        beginCd.should_search_match("\ncdefg", "cd");
+        beginCd.should_search_match("\ncdefg", "cd", match_not_bol);
+
+        beginCd.should_search_fail("ab\nxcdefg");
+        beginCd.should_search_fail("ab\nxcdefg", match_not_bol);
+    }
+}
+
+void test_VSO_225160_match_eol_flag() {
+    // Old tests for dollar anchor in default multiline mode
+    for (syntax_option_type syntax : {syntax_option_type{}, ECMAScript, basic, grep, extended, egrep, awk}) {
+        const test_regex emptyAnchor(&g_regexTester, R"($)", syntax);
+        emptyAnchor.should_search_match("", "");
+        emptyAnchor.should_search_fail("", match_not_eol);
+        emptyAnchor.should_search_match("\n", "");
+        emptyAnchor.should_search_match("\n", "", match_not_eol);
+
+        const test_regex cdEnd(&g_regexTester, R"(cd$)", syntax);
+        cdEnd.should_search_match("abcd\nefg", "cd");
+        cdEnd.should_search_match("abcd\nefg", "cd", match_not_eol);
+
+        cdEnd.should_search_match("abcd", "cd");
+        cdEnd.should_search_fail("abcd", match_not_eol);
+        cdEnd.should_search_match("abcd\n", "cd");
+        cdEnd.should_search_match("abcd\n", "cd", match_not_eol);
+
+        cdEnd.should_search_fail("abcdx\nefg");
+        cdEnd.should_search_fail("abcdx\nefg", match_not_eol);
+    }
+}
+
+void test_gh_73() {
+    for (syntax_option_type syntax : {syntax_option_type{}, ECMAScript, basic, grep, extended, egrep, awk}) {
+        {
+            test_regex a_anchored_on_both_sides(&g_regexTester, "^a$", syntax);
+            a_anchored_on_both_sides.should_search_match("a", "a");
+            a_anchored_on_both_sides.should_search_match("b\na", "a");
+            a_anchored_on_both_sides.should_search_match("a\nb", "a");
+            a_anchored_on_both_sides.should_search_fail("a\nb", match_not_bol);
+            a_anchored_on_both_sides.should_search_fail("b\na", match_not_eol);
+        }
+
+        {
+            test_regex a_anchored_front(&g_regexTester, "^a", syntax);
+            a_anchored_front.should_search_match("a", "a");
+            a_anchored_front.should_search_match("a\n", "a");
+            a_anchored_front.should_search_match("a\nb", "a");
+            a_anchored_front.should_search_match("b\na", "a");
+            a_anchored_front.should_search_match("\na", "a");
+            a_anchored_front.should_search_fail("a", match_not_bol);
+            a_anchored_front.should_search_match("\na", "a", match_not_bol);
+            a_anchored_front.should_search_match("b\na", "a", match_not_bol);
+        }
+
+        {
+            test_regex a_anchored_back(&g_regexTester, "a$", syntax);
+            a_anchored_back.should_search_match("a", "a");
+            a_anchored_back.should_search_match("\na", "a");
+            a_anchored_back.should_search_match("b\na", "a");
+            a_anchored_back.should_search_match("a\nb", "a");
+            a_anchored_back.should_search_match("a\n", "a");
+            a_anchored_back.should_search_fail("a", match_not_eol);
+            a_anchored_back.should_search_match("a\n", "a", match_not_eol);
+            a_anchored_back.should_search_match("a\nb", "a", match_not_eol);
+        }
+    }
+}
+
+int main() {
+    test_VSO_225160_match_bol_flag();
+    test_VSO_225160_match_eol_flag();
+    test_gh_73();
+
+    return g_regexTester.result();
+}
@@ -387,9 +387,9 @@ void test_VSO_180466_regex_search_missing_Unchecked_call() {
 }
 
 void test_VSO_226914_match_prev_avail() {
-    // N.B. assumes our nonstandard multiline behavior. See also: LWG-2343, LWG-2503
+    // test exercises multiline mode
     const char bol_haystack[] = {'\n', 'a'};
-    const regex bol_anchor(R"(^a)");
+    const regex bol_anchor(R"(^a)", regex_constants::multiline);
     assert(regex_match(bol_haystack + 1, end(bol_haystack), bol_anchor));
     assert(!regex_match(bol_haystack + 1, end(bol_haystack), bol_anchor, match_not_bol));
     assert(regex_match(bol_haystack + 1, end(bol_haystack), bol_anchor, match_prev_avail));