Skip to content

Commit 35dbfdc

Browse files
committed
WIP copy
1 parent 669cbde commit 35dbfdc

File tree

1 file changed

+135
-38
lines changed

1 file changed

+135
-38
lines changed

stl/inc/vector

Lines changed: 135 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2893,54 +2893,151 @@ template <class _InIt, class _OutIt>
28932893
_CONSTEXPR20 _OutIt _Copy_vbool(_InIt _First, _InIt _Last, _OutIt _Dest) {
28942894
// copy [_First, _Last) to [_Dest, ...)
28952895

2896-
// Slow path as _First and _Dest are not aligned
2897-
if (_First._Myoff != _Dest._Myoff) {
2898-
for (; _First != _Last; ++_Dest, (void) ++_First) {
2899-
*_Dest = *_First;
2900-
}
2901-
2902-
return _Dest;
2903-
}
2904-
2905-
_Vbase* _UFirst = const_cast<_Vbase*>(_First._Myptr);
2906-
_Vbase* _UDest = const_cast<_Vbase*>(_Dest._Myptr);
2907-
_Vbase* _ULast = const_cast<_Vbase*>(_Last._Myptr);
2896+
const auto _DestEnd = _Dest + (_Last - _First);
2897+
_Vbase* _UFirst = const_cast<_Vbase*>(_First._Myptr);
2898+
_Vbase* _UDest = const_cast<_Vbase*>(_Dest._Myptr);
2899+
_Vbase* _ULast = const_cast<_Vbase*>(_Last._Myptr);
29082900

29092901
// Fast path for less than _VBITS
2902+
// Always needed in case we only copy a range within one char
2903+
const bool _IsRightShift = _Dest._Myoff < _First._Myoff;
2904+
const auto _SourceShift = _IsRightShift ? _First._Myoff - _Dest._Myoff : _Dest._Myoff - _First._Myoff;
29102905
if (_UFirst == _ULast) {
2911-
const auto _SourceMask = (_Vbase(-1) << _First._Myoff) & (_Vbase(-1) >> (_VBITS - _Last._Myoff));
2912-
const auto _DestMask = _Vbase(-1) ^ _SourceMask;
2913-
*_UDest = (*_UDest & _DestMask) | (*_UFirst & _SourceMask);
2914-
return _Dest + (_Last - _First);
2915-
}
2906+
const bool _NeedsCarry = _DestEnd._Myptr != _Dest._Myptr;
2907+
const auto _SourceMask = (_Vbase(-1) << _First._Myoff) & (_Vbase(-1) >> _VBITS - _Last._Myoff);
2908+
const auto _SourceVal = _IsRightShift ? (*_UFirst & _SourceMask) >> _SourceShift //
2909+
: (*_UFirst & _SourceMask) << _SourceShift;
2910+
2911+
const auto _DestMask = (_Vbase(-1) << _Dest._Myoff);
2912+
*_UDest = (*_UDest & _DestMask) | _SourceVal;
2913+
2914+
if (!_NeedsCarry) {
2915+
return _DestEnd;
2916+
}
29162917

2917-
if (_First._Myoff != 0) {
2918-
const auto _FirstSourceMask = _Vbase(-1) << _First._Myoff;
2919-
const auto _FirstDestMask = _Vbase(-1) ^ _FirstSourceMask;
2920-
*_UDest = (*_UDest & _FirstDestMask) | (*_UFirst & _FirstSourceMask);
2921-
++_UFirst;
29222918
++_UDest;
2919+
const auto _CarryMask = (_Vbase(-1) << _Last._Myoff - _DestEnd._Myptr) & (_Vbase(-1) >> _VBITS - _Last._Myoff);
2920+
const auto _CarryVal = (*_UFirst & _CarryMask) >> _Last._Myoff - _DestEnd._Myptr;
2921+
2922+
const auto _DestCarryMask = _Vbase(-1) << _DestEnd._Myptr;
2923+
*_UDest = (*_UDest & _DestCarryMask) | _CarryVal;
2924+
return _DestEnd;
29232925
}
29242926

2925-
if (true
2927+
// _First and _Dest have matching char alignment, so use memmove
2928+
if (_First._Myoff & _Vbase(7) == _Dest._Myoff & _Vbase(7)) {
2929+
const auto _UnalignedFirstBits = _First._Myoff & _Vbase(7);
2930+
const auto _UnalignedLastBits = _Last._Myoff & _Vbase(7);
2931+
2932+
// What is the Strict Aliasing Rule and Why do we care?
2933+
char* _UFirst_ch = reinterpret_cast<char*>(_UFirst) + (_First._Myoff - _UnalignedFirstBits) / sizeof(_Vbase);
2934+
char* _UDest_ch = reinterpret_cast<char*>(_UDest) + (_Dest._Myoff - _UnalignedFirstBits) / sizeof(_Vbase);
2935+
char* _ULast_ch = reinterpret_cast<char*>(_ULast) + (_Last._Myoff - _UnalignedLastBits) / sizeof(_Vbase);
2936+
2937+
// Copy bits until the next char alignment
2938+
if (_UnalignedFirstBits != 0) {
2939+
const auto _SourceMask = static_cast<char>(UCHAR_MAX << _UnalignedFirstBits);
2940+
const auto _DestMask = static_cast<char>(UCHAR_MAX >> 8 - _UnalignedFirstBits);
2941+
*_UDest_ch = (*_UDest_ch & _DestMask) | (*_UFirst_ch & _SourceMask);
2942+
++_UFirst_ch;
2943+
++_UDest_ch;
2944+
}
2945+
2946+
if (true
29262947
#ifdef __cpp_lib_is_constant_evaluated
2927-
&& !_STD is_constant_evaluated()
2948+
&& !_STD is_constant_evaluated()
29282949
#endif // __cpp_lib_is_constant_evaluated
2929-
) {
2930-
_UDest = _Copy_memmove(_UFirst, _ULast, _UDest);
2931-
} else {
2932-
for (; _UFirst != _ULast; ++_UDest, (void) ++_UFirst) {
2933-
*_UDest = *_UFirst;
2950+
) {
2951+
_UDest_ch = _Copy_memmove(_UFirst_ch, _ULast_ch, _UDest_ch);
2952+
} else {
2953+
for (; _UFirst_ch != _ULast_ch; ++_UDest_ch, (void) ++_UFirst_ch) {
2954+
*_UDest_ch = *_UFirst_ch;
2955+
}
29342956
}
2935-
}
29362957

2937-
if (_Last._Myoff != 0) {
2938-
const auto _LastSourceMask = _Vbase(-1) >> (_VBITS - _Last._Myoff);
2939-
const auto _LastDestMask = _Vbase(-1) ^ _LastSourceMask;
2940-
*_UDest = (*_UDest & _LastDestMask) | (*_ULast & _LastSourceMask);
2958+
// Copy remaining last bits, shifts needed as we are already in
2959+
if (_UnalignedLastBits != 0) {
2960+
const auto _SourceMask = static_cast<char>(UCHAR_MAX >> 8 - _UnalignedLastBits);
2961+
const auto _DestMask = static_cast<char>(UCHAR_MAX << _UnalignedLastBits);
2962+
*_UDest_ch = (*_UDest_ch & _DestMask) | (*_UFirst_ch & _SourceMask);
2963+
}
2964+
return _DestEnd;
29412965
}
29422966

2943-
return _Dest + (_Last - _First);
2967+
// Unaligned _UFirst and _ULast require a two step copy with carry
2968+
const auto _FirstSourceMask = _Vbase(-1) << _First._Myoff;
2969+
const auto _FirstSourceVal = _IsRightShift ? (*_UFirst & _FirstSourceMask) >> _SourceShift //
2970+
: (*_UFirst & _FirstSourceMask) << _SourceShift;
2971+
2972+
const auto _FirstDestMask = _IsRightShift ? (_Vbase(-1) << _Dest._Myoff) | (_Vbase(-1) << _VBITS - _SourceShift) //
2973+
: (_Vbase(-1) << _Dest._Myoff);
2974+
*_UDest = (*_UDest & _FirstDestMask) | _FirstSourceVal;
2975+
2976+
if (_IsRightShift) {
2977+
// Source : | | |
2978+
// Dest : | | |
2979+
// ^^^^^^^ ^^^^^^^^^^^^^^^^^ ^^^^^^^ ^^^^^^^^^^^^^^^^^
2980+
// _CarryVal _SourceVal _CarryVal _SourceVal
2981+
const auto _SourceCarryMask = _Vbase(-1) >> _VBITS - _SourceShift;
2982+
const auto _DestCarryMask = _Vbase(-1) << _VBITS - _SourceShift;
2983+
const auto _SourceMask = _Vbase(-1) << _SourceShift;
2984+
const auto _DestMask = _Vbase(-1) >> _SourceShift;
2985+
2986+
++_UFirst;
2987+
for (; _UFirst != _ULast; ++_UFirst) {
2988+
const auto _CarryVal = (*_UFirst & _SourceCarryMask) << _VBITS - _SourceShift;
2989+
*_UDest = (*_UDest & _DestCarryMask) | _CarryVal;
2990+
2991+
++_UDest;
2992+
const auto _SourceVal = (*_UFirst & _SourceMask) >> _SourceShift;
2993+
*_UDest = (*_UDest & _DestMask) | _SourceVal;
2994+
}
2995+
2996+
// Copy remainder of _Carry from _ULast
2997+
if (_Last._Myoff != 0) {
2998+
const auto _LastSourceCarryMask = _SourceCarryMask & (_Vbase(-1) >> _Last._Myoff);
2999+
const auto _LastDestCarryMask = _Vbase(-1) << _VBITS - (_STD min)(_SourceShift, _Last._Myoff);
3000+
const auto _CarryVal = (*_UFirst & _LastSourceCarryMask) << _VBITS - _SourceShift;
3001+
*_UDest = (*_UDest & _LastDestCarryMask) | _CarryVal;
3002+
}
3003+
3004+
// Copy last bits from _ULast only if it was not already done in _Carry as we need to increment _UDest
3005+
if (_Last._Myoff > _SourceShift) {
3006+
const auto _LastSourceMask = _SourceMask & (_Vbase(-1) >> _VBITS - _Last._Myoff);
3007+
const auto _LastDestMask = _Vbase(-1) >> _Last._Myoff - _SourceShift;
3008+
const auto _SourceVal = (*_UFirst & _LastSourceMask) >> _SourceShift;
3009+
++_UDest;
3010+
*_UDest = (*_UDest & _LastDestMask) | _SourceVal;
3011+
}
3012+
} else {
3013+
// Source : | | |
3014+
// Dest : | | |
3015+
// ^^^^^^^ ^^^^^^^^^^^^^^^^^ ^^^^^^^ ^^^^^^^^^^^^^^^^^
3016+
// _CarryVal _SourceVal _CarryVal _SourceVal
3017+
const auto _CarryMask = _Vbase(-1) << _VBITS - _SourceShift;
3018+
3019+
auto _CarryVal = (*_UFirst & _CarryMask) >> _VBITS - _SourceShift;
3020+
++_First;
3021+
++_UDest;
3022+
for (; _UFirst != _ULast; ++_UFirst, (void) ++_UDest) {
3023+
const auto _SourceVal = *_UFirst << _SourceShift;
3024+
*_UDest = _CarryVal | _SourceVal;
3025+
3026+
_CarryVal = (*_UFirst & _CarryMask) >> _VBITS - _SourceShift;
3027+
}
3028+
3029+
// Copy remainder of _Carry from _ULast
3030+
if (_Last._Myoff != 0) {
3031+
const auto _LastSourceMask = _Vbase(-1) >> _VBITS - _Last._Myoff;
3032+
const auto _LastDestMask = _Vbase(-1) >> _Last._Myoff + _SourceShift;
3033+
const auto _SourceVal = (*_UFirst & _LastSourceMask) << _SourceShift;
3034+
*_UDest = (*_UDest & _LastDestMask) | _CarryVal | _SourceVal;
3035+
} else {
3036+
const auto _LastDestMask = _Vbase(-1) >> _SourceShift;
3037+
*_UDest = (*_UDest & _LastDestMask) | _CarryVal;
3038+
}
3039+
}
3040+
return _DestEnd;
29443041
}
29453042

29463043
template <class _BidIt1, class _BidIt2>
@@ -3114,8 +3211,8 @@ _NODISCARD _CONSTEXPR20 _InIt _Find_vbool(_InIt _First, const _InIt _Last, const
31143211
if (_UFirst == _ULast) {
31153212
const auto _Mask = (_Vbase(-1) << _First._Myoff) & (_Vbase(-1) >> (_VBITS - _Last._Myoff));
31163213
const auto _SelectVal = _Val ? *_UFirst : static_cast<_Vbase>(~*_UFirst);
3117-
const auto _Count = _Countr_zero(_SelectVal & _Mask) - _First._Myoff;
3118-
return _First + _Count;
3214+
const auto _Count = _Countr_zero(_SelectVal & _Mask);
3215+
return _First + (_Count - _First._Myoff);
31193216
}
31203217

31213218
_Iter_diff_t<_InIt> _TotalCount = 0;
@@ -3142,7 +3239,7 @@ _NODISCARD _CONSTEXPR20 _InIt _Find_vbool(_InIt _First, const _InIt _Last, const
31423239
const auto _LastMask = _Vbase(-1) >> (_VBITS - _Last._Myoff);
31433240
const auto _SelectVal = _Val ? *_UFirst : static_cast<_Vbase>(~*_UFirst);
31443241
const auto _Count = _Countr_zero(_SelectVal & _LastMask);
3145-
if (_Count != _VBITS) {
3242+
if (_Count < _Last._Myoff) {
31463243
return _First + (_TotalCount + _Count);
31473244
}
31483245
}

0 commit comments

Comments
 (0)