Skip to content

Commit 9c40b48

Browse files
Fix vectorized ranges::find with unreachable_sentinel to properly mask the beginning and handle unaligned pointers (#4450)
1 parent 0407db6 commit 9c40b48

File tree

2 files changed

+65
-21
lines changed

2 files changed

+65
-21
lines changed

stl/src/vector_algorithms.cpp

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1844,7 +1844,10 @@ namespace {
18441844
template <class _Traits, class _Ty>
18451845
const void* __stdcall __std_find_trivial_unsized_impl(const void* _First, const _Ty _Val) noexcept {
18461846
#ifndef _M_ARM64EC
1847-
if (_Use_avx2()) {
1847+
if ((reinterpret_cast<uintptr_t>(_First) & (sizeof(_Ty) - 1)) != 0) {
1848+
// _First isn't aligned to sizeof(_Ty), so we need to use the scalar fallback below.
1849+
// This can happen with 8-byte elements on x86's 4-aligned stack. It can also happen with packed structs.
1850+
} else if (_Use_avx2()) {
18481851
_Zeroupper_on_exit _Guard; // TRANSITION, DevCom-10331414
18491852

18501853
// We read by vector-sized pieces, and we align pointers to vector-sized boundary.
@@ -1862,27 +1865,20 @@ namespace {
18621865
unsigned int _Bingo = static_cast<unsigned int>(_mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand)));
18631866

18641867
_Bingo &= _Mask;
1865-
if (_Bingo != 0) {
1866-
unsigned long _Offset = _tzcnt_u32(_Bingo);
1867-
_Advance_bytes(_First, _Offset);
1868-
return _First;
1869-
}
18701868

18711869
for (;;) {
1872-
_Data = _mm256_load_si256(static_cast<const __m256i*>(_First));
1873-
_Bingo = static_cast<unsigned int>(_mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand)));
1874-
18751870
if (_Bingo != 0) {
18761871
unsigned long _Offset = _tzcnt_u32(_Bingo);
18771872
_Advance_bytes(_First, _Offset);
18781873
return _First;
18791874
}
18801875

18811876
_Advance_bytes(_First, 32);
1882-
}
1883-
}
18841877

1885-
if (_Traits::_Sse_available()) {
1878+
_Data = _mm256_load_si256(static_cast<const __m256i*>(_First));
1879+
_Bingo = static_cast<unsigned int>(_mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand)));
1880+
}
1881+
} else if (_Traits::_Sse_available()) {
18861882
// We read by vector-sized pieces, and we align pointers to vector-sized boundary.
18871883
// From start partial piece we mask out matches that don't belong to the range.
18881884
// This makes sure we never cross page boundary, thus we read 'as if' sequentially.
@@ -1898,17 +1894,8 @@ namespace {
18981894
unsigned int _Bingo = static_cast<unsigned int>(_mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand)));
18991895

19001896
_Bingo &= _Mask;
1901-
if (_Bingo != 0) {
1902-
unsigned long _Offset;
1903-
_BitScanForward(&_Offset, _Bingo); // lgtm [cpp/conditionallyuninitializedvariable]
1904-
_Advance_bytes(_First, _Offset);
1905-
return _First;
1906-
}
19071897

19081898
for (;;) {
1909-
_Data = _mm_load_si128(static_cast<const __m128i*>(_First));
1910-
_Bingo = static_cast<unsigned int>(_mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand)));
1911-
19121899
if (_Bingo != 0) {
19131900
unsigned long _Offset;
19141901
_BitScanForward(&_Offset, _Bingo); // lgtm [cpp/conditionallyuninitializedvariable]
@@ -1917,6 +1904,9 @@ namespace {
19171904
}
19181905

19191906
_Advance_bytes(_First, 16);
1907+
1908+
_Data = _mm_load_si128(static_cast<const __m128i*>(_First));
1909+
_Bingo = static_cast<unsigned int>(_mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand)));
19201910
}
19211911
}
19221912
#endif // !_M_ARM64EC

tests/std/tests/VSO_0000000_vector_algorithms/test.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,53 @@ void test_find(mt19937_64& gen) {
140140
}
141141
}
142142

143+
#if _HAS_CXX20
144+
template <class T, size_t N>
145+
struct NormalArrayWrapper {
146+
T m_arr[N];
147+
};
148+
149+
// Also test GH-4454 "vector_algorithms.cpp: __std_find_trivial_unsized_impl assumes N-byte elements are N-aligned"
150+
#pragma pack(push, 1)
151+
template <class T, size_t N>
152+
struct PackedArrayWrapper {
153+
uint8_t m_ignored; // to misalign the following array
154+
T m_arr[N];
155+
};
156+
#pragma pack(pop)
157+
158+
// GH-4449 <xutility>: ranges::find with unreachable_sentinel / __std_find_trivial_unsized_1 gives wrong result
159+
template <class T, template <class, size_t> class ArrayWrapper>
160+
void test_gh_4449_impl() {
161+
constexpr T desired_val{11};
162+
constexpr T unwanted_val{22};
163+
164+
ArrayWrapper<T, 256> wrapper;
165+
auto& arr = wrapper.m_arr;
166+
167+
constexpr int mid1 = 64;
168+
constexpr int mid2 = 192;
169+
170+
ranges::fill(arr, arr + mid1, desired_val);
171+
ranges::fill(arr + mid1, arr + mid2, unwanted_val);
172+
ranges::fill(arr + mid2, end(arr), desired_val);
173+
174+
for (int idx = mid1; idx <= mid2; ++idx) { // when idx == mid2, the value is immediately found
175+
const auto where = ranges::find(arr + idx, unreachable_sentinel, desired_val);
176+
177+
assert(where == arr + mid2);
178+
179+
arr[idx] = desired_val; // get ready for the next iteration
180+
}
181+
}
182+
183+
template <class T>
184+
void test_gh_4449() {
185+
test_gh_4449_impl<T, NormalArrayWrapper>();
186+
test_gh_4449_impl<T, PackedArrayWrapper>();
187+
}
188+
#endif // _HAS_CXX20
189+
143190
#if _HAS_CXX23
144191
template <class T>
145192
void test_case_find_last(const vector<T>& input, T v) {
@@ -371,6 +418,13 @@ void test_vector_algorithms(mt19937_64& gen) {
371418
test_find<long long>(gen);
372419
test_find<unsigned long long>(gen);
373420

421+
#if _HAS_CXX20
422+
test_gh_4449<uint8_t>();
423+
test_gh_4449<uint16_t>();
424+
test_gh_4449<uint32_t>();
425+
test_gh_4449<uint64_t>();
426+
#endif // _HAS_CXX20
427+
374428
#if _HAS_CXX23
375429
test_find_last<char>(gen);
376430
test_find_last<signed char>(gen);

0 commit comments

Comments
 (0)