@@ -227,14 +227,13 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_4(void* _Firs
227
227
if (_Byte_length (_First, _Last) >= 64 && _bittest (&__isa_enabled, __ISA_AVAILABLE_AVX2)) {
228
228
const void * _Stop_at = _First;
229
229
_Advance_bytes (_Stop_at, _Byte_length (_First, _Last) >> 6 << 5 );
230
+ const __m256i _Shuf = _mm256_set_epi32 (0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 );
230
231
do {
231
232
_Advance_bytes (_Last, -32 );
232
233
const __m256i _Left = _mm256_loadu_si256 (static_cast <__m256i*>(_First));
233
234
const __m256i _Right = _mm256_loadu_si256 (static_cast <__m256i*>(_Last));
234
- const __m256i _Left_perm = _mm256_permute4x64_epi64 (_Left, _MM_SHUFFLE (1 , 0 , 3 , 2 ));
235
- const __m256i _Right_perm = _mm256_permute4x64_epi64 (_Right, _MM_SHUFFLE (1 , 0 , 3 , 2 ));
236
- const __m256i _Left_reversed = _mm256_shuffle_epi32 (_Left_perm, _MM_SHUFFLE (0 , 1 , 2 , 3 ));
237
- const __m256i _Right_reversed = _mm256_shuffle_epi32 (_Right_perm, _MM_SHUFFLE (0 , 1 , 2 , 3 ));
235
+ const __m256i _Left_reversed = _mm256_permutevar8x32_epi32 (_Left, _Shuf);
236
+ const __m256i _Right_reversed = _mm256_permutevar8x32_epi32 (_Right, _Shuf);
238
237
_mm256_storeu_si256 (static_cast <__m256i*>(_First), _Right_reversed);
239
238
_mm256_storeu_si256 (static_cast <__m256i*>(_Last), _Left_reversed);
240
239
_Advance_bytes (_First, 32 );
@@ -384,11 +383,11 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_4(
384
383
if (_Byte_length (_First, _Last) >= 32 && _bittest (&__isa_enabled, __ISA_AVAILABLE_AVX2)) {
385
384
const void * _Stop_at = _Dest;
386
385
_Advance_bytes (_Stop_at, _Byte_length (_First, _Last) >> 5 << 5 );
386
+ const __m256i _Shuf = _mm256_set_epi32 (0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 );
387
387
do {
388
388
_Advance_bytes (_Last, -32 );
389
389
const __m256i _Block = _mm256_loadu_si256 (static_cast <const __m256i*>(_Last));
390
- const __m256i _Block_permuted = _mm256_permute4x64_epi64 (_Block, _MM_SHUFFLE (1 , 0 , 3 , 2 ));
391
- const __m256i _Block_reversed = _mm256_shuffle_epi32 (_Block_permuted, _MM_SHUFFLE (0 , 1 , 2 , 3 ));
390
+ const __m256i _Block_reversed = _mm256_permutevar8x32_epi32 (_Block, _Shuf);
392
391
_mm256_storeu_si256 (static_cast <__m256i*>(_Dest), _Block_reversed);
393
392
_Advance_bytes (_Dest, 32 );
394
393
} while (_Dest != _Stop_at);
0 commit comments