Skip to content

Commit 6c86f50

Browse files
committed
[vector] Expand _Copy_vbool to also handle non aligned iterators
1 parent 608b47a commit 6c86f50

File tree

2 files changed

+678
-444
lines changed
  • stl/inc
  • tests/std/tests/GH_000625_vector_bool_optimization

2 files changed

+678
-444
lines changed

stl/inc/vector

Lines changed: 121 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2893,54 +2893,140 @@ template <class _InIt, class _OutIt>
28932893
_CONSTEXPR20 _OutIt _Copy_vbool(_InIt _First, _InIt _Last, _OutIt _Dest) {
28942894
// copy [_First, _Last) to [_Dest, ...)
28952895

2896-
// Slow path as _First and _Dest are not aligned
2897-
if (_First._Myoff != _Dest._Myoff) {
2898-
for (; _First != _Last; ++_Dest, (void) ++_First) {
2899-
*_Dest = *_First;
2900-
}
2896+
const auto _DestEnd = _Dest + (_Last - _First);
2897+
_Vbase* _UFirst = const_cast<_Vbase*>(_First._Myptr);
2898+
_Vbase* _UDest = const_cast<_Vbase*>(_Dest._Myptr);
2899+
_Vbase* _ULast = const_cast<_Vbase*>(_Last._Myptr);
29012900

2902-
return _Dest;
2903-
}
2901+
const bool _IsRightShift = _Dest._Myoff < _First._Myoff;
2902+
const auto _SourceShift = _IsRightShift ? _First._Myoff - _Dest._Myoff : _Dest._Myoff - _First._Myoff;
29042903

2905-
_Vbase* _UFirst = const_cast<_Vbase*>(_First._Myptr);
2906-
_Vbase* _UDest = const_cast<_Vbase*>(_Dest._Myptr);
2907-
_Vbase* _ULast = const_cast<_Vbase*>(_Last._Myptr);
2904+
const auto _SourceMask = _Vbase(-1) << _First._Myoff;
2905+
const auto _DestMask = _Dest._Myoff == 0 ? _Vbase(0) : _Vbase(-1) >> (_VBITS - _Dest._Myoff);
2906+
const auto _LastMask = _Last._Myoff == 0 ? _Vbase(0) : _Vbase(-1) >> (_VBITS - _Last._Myoff);
2907+
const auto _LastDestMask = _IsRightShift ? _Vbase(-1) << (_Last._Myoff - _SourceShift) //
2908+
: _Vbase(-1) << (_Last._Myoff + _SourceShift);
29082909

29092910
// Fast path for less than _VBITS
2911+
// Always needed in case we only copy a range within one char
29102912
if (_UFirst == _ULast) {
2911-
const auto _SourceMask = (_Vbase(-1) << _First._Myoff) & (_Vbase(-1) >> (_VBITS - _Last._Myoff));
2912-
const auto _DestMask = _Vbase(-1) ^ _SourceMask;
2913-
*_UDest = (*_UDest & _DestMask) | (*_UFirst & _SourceMask);
2914-
return _Dest + (_Last - _First);
2915-
}
2913+
const auto _SourceVal = _IsRightShift ? (*_UFirst & _SourceMask & _LastMask) >> _SourceShift //
2914+
: (*_UFirst & _SourceMask & _LastMask) << _SourceShift;
2915+
*_UDest = (*_UDest & (_DestMask | _LastDestMask)) | _SourceVal;
2916+
2917+
if (_DestEnd._Myptr == _Dest._Myptr) {
2918+
return _DestEnd;
2919+
}
29162920

2917-
if (_First._Myoff != 0) {
2918-
const auto _FirstSourceMask = _Vbase(-1) << _First._Myoff;
2919-
const auto _FirstDestMask = _Vbase(-1) ^ _FirstSourceMask;
2920-
*_UDest = (*_UDest & _FirstDestMask) | (*_UFirst & _FirstSourceMask);
2921-
++_UFirst;
29222921
++_UDest;
2922+
const auto _CarryShift = _Last._Myoff - _DestEnd._Myoff;
2923+
const auto _CarryMask = _Vbase(-1) << _CarryShift;
2924+
const auto _CarryVal = (*_UFirst & _CarryMask & _LastMask) >> _CarryShift;
2925+
2926+
const auto _DestEndMask = _Vbase(-1) << _DestEnd._Myoff;
2927+
*_UDest = (*_UDest & _DestEndMask) | _CarryVal;
2928+
return _DestEnd;
29232929
}
29242930

2925-
if (true
2931+
// _First and _Dest have matching char alignment, so use memmove
2932+
const auto _UnalignedFirstBits = _First._Myoff & _Vbase(7);
2933+
const auto _UnalignedDestBits = _Dest._Myoff & _Vbase(7);
2934+
if (_UnalignedFirstBits == _UnalignedDestBits) {
2935+
const auto _UnalignedLastBits = _Last._Myoff & _Vbase(7);
2936+
2937+
// What is the Strict Aliasing Rule and Why do we care?
2938+
char* _UFirst_ch = reinterpret_cast<char*>(_UFirst) + (_First._Myoff - _UnalignedFirstBits) / 8;
2939+
char* _UDest_ch = reinterpret_cast<char*>(_UDest) + (_Dest._Myoff - _UnalignedFirstBits) / 8;
2940+
char* _ULast_ch = reinterpret_cast<char*>(_ULast) + (_Last._Myoff - _UnalignedLastBits) / 8;
2941+
2942+
// Copy bits until the next char alignment
2943+
if (_UnalignedFirstBits != 0) {
2944+
const auto _SourceBitMask = static_cast<char>(UCHAR_MAX << _UnalignedFirstBits);
2945+
const auto _DestBitMask = static_cast<char>(UCHAR_MAX >> (8 - _UnalignedFirstBits));
2946+
*_UDest_ch = (*_UDest_ch & _DestBitMask) | (*_UFirst_ch & _SourceBitMask);
2947+
++_UFirst_ch;
2948+
++_UDest_ch;
2949+
}
2950+
2951+
if (true
29262952
#ifdef __cpp_lib_is_constant_evaluated
2927-
&& !_STD is_constant_evaluated()
2953+
&& !_STD is_constant_evaluated()
29282954
#endif // __cpp_lib_is_constant_evaluated
2929-
) {
2930-
_UDest = _Copy_memmove(_UFirst, _ULast, _UDest);
2931-
} else {
2932-
for (; _UFirst != _ULast; ++_UDest, (void) ++_UFirst) {
2933-
*_UDest = *_UFirst;
2955+
) {
2956+
_UDest_ch = _Copy_memmove(_UFirst_ch, _ULast_ch, _UDest_ch);
2957+
} else {
2958+
for (; _UFirst_ch != _ULast_ch; ++_UDest_ch, (void) ++_UFirst_ch) {
2959+
*_UDest_ch = *_UFirst_ch;
2960+
}
29342961
}
2935-
}
29362962

2937-
if (_Last._Myoff != 0) {
2938-
const auto _LastSourceMask = _Vbase(-1) >> (_VBITS - _Last._Myoff);
2939-
const auto _LastDestMask = _Vbase(-1) ^ _LastSourceMask;
2940-
*_UDest = (*_UDest & _LastDestMask) | (*_ULast & _LastSourceMask);
2963+
// Copy remaining last bits, shifts needed as we are already in
2964+
if (_UnalignedLastBits != 0) {
2965+
const auto _SourceBitMask = static_cast<char>(UCHAR_MAX >> (8 - _UnalignedLastBits));
2966+
const auto _DestBitMask = static_cast<char>(UCHAR_MAX << _UnalignedLastBits);
2967+
*_UDest_ch = (*_UDest_ch & _DestBitMask) | (*_ULast_ch & _SourceBitMask);
2968+
}
2969+
return _DestEnd;
29412970
}
29422971

2943-
return _Dest + (_Last - _First);
2972+
// Unaligned _UFirst and _ULast require a two step copy with carry
2973+
const auto _FirstSourceVal = _IsRightShift ? (*_UFirst & _SourceMask) >> _SourceShift //
2974+
: (*_UFirst & _SourceMask) << _SourceShift;
2975+
*_UDest = (*_UDest & _DestMask) | _FirstSourceVal;
2976+
2977+
const auto _CarryShift = _VBITS - _SourceShift;
2978+
if (_IsRightShift) {
2979+
// Source : | | |
2980+
// Dest : | | |
2981+
// ^^^^^^^ ^^^^^^^^^^^^^^^^^ ^^^^^^^ ^^^^^^^^^^^^^^^^^
2982+
// _CarryVal _SourceVal _CarryVal _SourceVal
2983+
const auto _SourceCarryMask = _Vbase(-1) >> _CarryShift;
2984+
const auto _DestCarryMask = _Vbase(-1) >> _SourceShift;
2985+
2986+
++_UFirst;
2987+
for (; _UFirst != _ULast; ++_UFirst) {
2988+
const auto _CarryVal = (*_UFirst & _SourceCarryMask) << _CarryShift;
2989+
*_UDest = (*_UDest & _DestCarryMask) | _CarryVal;
2990+
2991+
++_UDest;
2992+
const auto _SourceVal = (*_UFirst & _SourceMask) >> _SourceShift;
2993+
*_UDest = _SourceVal;
2994+
}
2995+
2996+
if (_Last._Myoff != 0) {
2997+
const auto _LastSourceCarryMask = _SourceCarryMask & _LastMask;
2998+
const auto _LastDestCarryMask = _DestCarryMask | (_Vbase(-1) << (_VBITS - _Last._Myoff));
2999+
const auto _CarryVal = (*_UFirst & _LastSourceCarryMask) << _CarryShift;
3000+
*_UDest = (*_UDest & _LastDestCarryMask) | _CarryVal;
3001+
}
3002+
3003+
if (_Last._Myoff > _SourceShift) {
3004+
const auto _LastSourceMask = _SourceMask & _LastMask;
3005+
const auto _SourceVal = (*_UFirst & _LastSourceMask) >> _SourceShift;
3006+
++_UDest;
3007+
*_UDest = (*_UDest & _LastDestMask) | _SourceVal;
3008+
}
3009+
} else {
3010+
// Source : | | |
3011+
// Dest : | | |
3012+
// ^^^^^^^ ^^^^^^^^^^^^^^^^^ ^^^^^^^ ^^^^^^^^^^^^^^^^^
3013+
// _CarryVal _SourceVal _CarryVal _SourceVal
3014+
const auto _SourceCarryMask = _Vbase(-1) << _CarryShift;
3015+
3016+
auto _CarryVal = (*_UFirst & _SourceCarryMask) >> _CarryShift;
3017+
++_UFirst;
3018+
++_UDest;
3019+
for (; _UFirst != _ULast; ++_UFirst, (void) ++_UDest) {
3020+
const auto _SourceVal = *_UFirst << _SourceShift;
3021+
*_UDest = _CarryVal | _SourceVal;
3022+
3023+
_CarryVal = (*_UFirst & _SourceCarryMask) >> _CarryShift;
3024+
}
3025+
3026+
const auto _SourceVal = _Last._Myoff != 0 ? (*_UFirst & _LastMask) << _SourceShift : _Vbase(0);
3027+
*_UDest = (*_UDest & _LastDestMask) | _CarryVal | _SourceVal;
3028+
}
3029+
return _DestEnd;
29443030
}
29453031

29463032
template <class _BidIt1, class _BidIt2>
@@ -2972,8 +3058,6 @@ _CONSTEXPR20 _BidIt2 _Copy_backward_vbool(_BidIt1 _First, _BidIt1 _Last, _BidIt2
29723058
const auto _LastSourceMask = _Vbase(-1) >> _LastShift;
29733059
const auto _LastDestMask = _Vbase(-1) ^ _LastSourceMask;
29743060
*_UDest = (*_UDest & _LastDestMask) | (*_ULast & _LastSourceMask);
2975-
--_ULast;
2976-
--_UDest;
29773061
}
29783062

29793063
if (true
@@ -2985,6 +3069,8 @@ _CONSTEXPR20 _BidIt2 _Copy_backward_vbool(_BidIt1 _First, _BidIt1 _Last, _BidIt2
29853069
_UDest = _Copy_backward_memmove(_UFirst + 1, _ULast, _UDest);
29863070
--_UDest;
29873071
} else {
3072+
--_UDest;
3073+
--_ULast;
29883074
for (; _UFirst != _ULast; --_UDest, (void) --_ULast) {
29893075
*_UDest = *_ULast;
29903076
}

0 commit comments

Comments
 (0)