From c350ee15107b9784511f956b00fbc1608bae60dc Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 18 Jan 2025 16:04:42 -0700 Subject: [PATCH] utf8.h: Add UNICODE_IS_NONCHAR_GIVEN_NOT_SUPER If we already know the input is a Unicode code point, we can save a conditional in checking if it is a Unicode non-character code point. --- regexec.c | 2 +- utf8.c | 2 +- utf8.h | 8 +++++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/regexec.c b/regexec.c index 7a7d0d495846..ca903a1466be 100644 --- a/regexec.c +++ b/regexec.c @@ -11658,7 +11658,7 @@ Perl_is_grapheme(pTHX_ const U8 * strbeg, const U8 * s, const U8 * strend, const PERL_ARGS_ASSERT_IS_GRAPHEME; if ( UNLIKELY(UNICODE_IS_SUPER(cp)) - || UNLIKELY(UNICODE_IS_NONCHAR(cp))) + || UNLIKELY(UNICODE_IS_NONCHAR_GIVEN_NOT_SUPER(cp))) { /* These are considered graphemes */ return true; diff --git a/utf8.c b/utf8.c index 90ec77026412..c577df458f3b 100644 --- a/utf8.c +++ b/utf8.c @@ -1937,7 +1937,7 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0, possible_problems |= UTF8_GOT_SUPER; } } - else if (UNLIKELY(UNICODE_IS_NONCHAR(uv))) { + else if (UNLIKELY(UNICODE_IS_NONCHAR_GIVEN_NOT_SUPER(uv))) { if (flags & (UTF8_DISALLOW_NONCHAR|UTF8_WARN_NONCHAR)) { possible_problems |= UTF8_GOT_NONCHAR; } diff --git a/utf8.h b/utf8.h index 97c31f1d33b1..93b4fa1b5ed3 100644 --- a/utf8.h +++ b/utf8.h @@ -1112,11 +1112,13 @@ non-character code points * the Unicode legal max */ #define UNICODE_IS_END_PLANE_NONCHAR_GIVEN_NOT_SUPER(uv) \ UNLIKELY(((UV) (uv) & 0xFFFE) == 0xFFFE) +#define UNICODE_IS_NONCHAR_GIVEN_NOT_SUPER(uv) \ + ( UNLIKELY(UNICODE_IS_32_CONTIGUOUS_NONCHARS(uv)) \ + || UNLIKELY(UNICODE_IS_END_PLANE_NONCHAR_GIVEN_NOT_SUPER(uv))) #define UNICODE_IS_NONCHAR(uv) \ - ( UNLIKELY(UNICODE_IS_32_CONTIGUOUS_NONCHARS(uv)) \ - || ( UNLIKELY(UNICODE_IS_END_PLANE_NONCHAR_GIVEN_NOT_SUPER(uv)) \ - && LIKELY(! UNICODE_IS_SUPER(uv)))) + ( LIKELY(! UNICODE_IS_SUPER(uv)) \ + && UNLIKELY(UNICODE_IS_NONCHAR_GIVEN_NOT_SUPER(uv))) /* =for apidoc Am|bool|UTF8_IS_NONCHAR|const U8 *s|const U8 *e