@@ -1844,7 +1844,10 @@ namespace {
1844
1844
template <class _Traits , class _Ty >
1845
1845
const void * __stdcall __std_find_trivial_unsized_impl (const void * _First, const _Ty _Val) noexcept {
1846
1846
#ifndef _M_ARM64EC
1847
- if (_Use_avx2 ()) {
1847
+ if ((reinterpret_cast <uintptr_t >(_First) & (sizeof (_Ty) - 1 )) != 0 ) {
1848
+ // _First isn't aligned to sizeof(_Ty), so we need to use the scalar fallback below.
1849
+ // This can happen with 8-byte elements on x86's 4-aligned stack. It can also happen with packed structs.
1850
+ } else if (_Use_avx2 ()) {
1848
1851
_Zeroupper_on_exit _Guard; // TRANSITION, DevCom-10331414
1849
1852
1850
1853
// We read by vector-sized pieces, and we align pointers to vector-sized boundary.
@@ -1862,27 +1865,20 @@ namespace {
1862
1865
unsigned int _Bingo = static_cast <unsigned int >(_mm256_movemask_epi8 (_Traits::_Cmp_avx (_Data, _Comparand)));
1863
1866
1864
1867
_Bingo &= _Mask;
1865
- if (_Bingo != 0 ) {
1866
- unsigned long _Offset = _tzcnt_u32 (_Bingo);
1867
- _Advance_bytes (_First, _Offset);
1868
- return _First;
1869
- }
1870
1868
1871
1869
for (;;) {
1872
- _Data = _mm256_load_si256 (static_cast <const __m256i*>(_First));
1873
- _Bingo = static_cast <unsigned int >(_mm256_movemask_epi8 (_Traits::_Cmp_avx (_Data, _Comparand)));
1874
-
1875
1870
if (_Bingo != 0 ) {
1876
1871
unsigned long _Offset = _tzcnt_u32 (_Bingo);
1877
1872
_Advance_bytes (_First, _Offset);
1878
1873
return _First;
1879
1874
}
1880
1875
1881
1876
_Advance_bytes (_First, 32 );
1882
- }
1883
- }
1884
1877
1885
- if (_Traits::_Sse_available ()) {
1878
+ _Data = _mm256_load_si256 (static_cast <const __m256i*>(_First));
1879
+ _Bingo = static_cast <unsigned int >(_mm256_movemask_epi8 (_Traits::_Cmp_avx (_Data, _Comparand)));
1880
+ }
1881
+ } else if (_Traits::_Sse_available ()) {
1886
1882
// We read by vector-sized pieces, and we align pointers to vector-sized boundary.
1887
1883
// From start partial piece we mask out matches that don't belong to the range.
1888
1884
// This makes sure we never cross page boundary, thus we read 'as if' sequentially.
@@ -1898,17 +1894,8 @@ namespace {
1898
1894
unsigned int _Bingo = static_cast <unsigned int >(_mm_movemask_epi8 (_Traits::_Cmp_sse (_Data, _Comparand)));
1899
1895
1900
1896
_Bingo &= _Mask;
1901
- if (_Bingo != 0 ) {
1902
- unsigned long _Offset;
1903
- _BitScanForward (&_Offset, _Bingo); // lgtm [cpp/conditionallyuninitializedvariable]
1904
- _Advance_bytes (_First, _Offset);
1905
- return _First;
1906
- }
1907
1897
1908
1898
for (;;) {
1909
- _Data = _mm_load_si128 (static_cast <const __m128i*>(_First));
1910
- _Bingo = static_cast <unsigned int >(_mm_movemask_epi8 (_Traits::_Cmp_sse (_Data, _Comparand)));
1911
-
1912
1899
if (_Bingo != 0 ) {
1913
1900
unsigned long _Offset;
1914
1901
_BitScanForward (&_Offset, _Bingo); // lgtm [cpp/conditionallyuninitializedvariable]
@@ -1917,6 +1904,9 @@ namespace {
1917
1904
}
1918
1905
1919
1906
_Advance_bytes (_First, 16 );
1907
+
1908
+ _Data = _mm_load_si128 (static_cast <const __m128i*>(_First));
1909
+ _Bingo = static_cast <unsigned int >(_mm_movemask_epi8 (_Traits::_Cmp_sse (_Data, _Comparand)));
1920
1910
}
1921
1911
}
1922
1912
#endif // !_M_ARM64EC
0 commit comments