@@ -2893,54 +2893,140 @@ template <class _InIt, class _OutIt>
2893
2893
_CONSTEXPR20 _OutIt _Copy_vbool(_InIt _First, _InIt _Last, _OutIt _Dest) {
2894
2894
// copy [_First, _Last) to [_Dest, ...)
2895
2895
2896
- // Slow path as _First and _Dest are not aligned
2897
- if (_First._Myoff != _Dest._Myoff) {
2898
- for (; _First != _Last; ++_Dest, (void) ++_First) {
2899
- *_Dest = *_First;
2900
- }
2896
+ const auto _DestEnd = _Dest + (_Last - _First);
2897
+ _Vbase* _UFirst = const_cast<_Vbase*>(_First._Myptr);
2898
+ _Vbase* _UDest = const_cast<_Vbase*>(_Dest._Myptr);
2899
+ _Vbase* _ULast = const_cast<_Vbase*>(_Last._Myptr);
2901
2900
2902
- return _Dest;
2903
- }
2901
+ const bool _IsRightShift = _Dest._Myoff < _First._Myoff ;
2902
+ const auto _SourceShift = _IsRightShift ? _First._Myoff - _Dest._Myoff : _Dest._Myoff - _First._Myoff;
2904
2903
2905
- _Vbase* _UFirst = const_cast<_Vbase*>(_First._Myptr);
2906
- _Vbase* _UDest = const_cast<_Vbase*>(_Dest._Myptr);
2907
- _Vbase* _ULast = const_cast<_Vbase*>(_Last._Myptr);
2904
+ const auto _SourceMask = _Vbase(-1) << _First._Myoff;
2905
+ const auto _DestMask = _Dest._Myoff == 0 ? _Vbase(0) : _Vbase(-1) >> (_VBITS - _Dest._Myoff);
2906
+ const auto _LastMask = _Last._Myoff == 0 ? _Vbase(0) : _Vbase(-1) >> (_VBITS - _Last._Myoff);
2907
+ const auto _LastDestMask = _IsRightShift ? _Vbase(-1) << (_Last._Myoff - _SourceShift) //
2908
+ : _Vbase(-1) << (_Last._Myoff + _SourceShift);
2908
2909
2909
2910
// Fast path for less than _VBITS
2911
+ // Always needed in case we only copy a range within one char
2910
2912
if (_UFirst == _ULast) {
2911
- const auto _SourceMask = (_Vbase(-1) << _First._Myoff) & (_Vbase(-1) >> (_VBITS - _Last._Myoff));
2912
- const auto _DestMask = _Vbase(-1) ^ _SourceMask;
2913
- *_UDest = (*_UDest & _DestMask) | (*_UFirst & _SourceMask);
2914
- return _Dest + (_Last - _First);
2915
- }
2913
+ const auto _SourceVal = _IsRightShift ? (*_UFirst & _SourceMask & _LastMask) >> _SourceShift //
2914
+ : (*_UFirst & _SourceMask & _LastMask) << _SourceShift;
2915
+ *_UDest = (*_UDest & (_DestMask | _LastDestMask)) | _SourceVal;
2916
+
2917
+ if (_DestEnd._Myptr == _Dest._Myptr) {
2918
+ return _DestEnd;
2919
+ }
2916
2920
2917
- if (_First._Myoff != 0) {
2918
- const auto _FirstSourceMask = _Vbase(-1) << _First._Myoff;
2919
- const auto _FirstDestMask = _Vbase(-1) ^ _FirstSourceMask;
2920
- *_UDest = (*_UDest & _FirstDestMask) | (*_UFirst & _FirstSourceMask);
2921
- ++_UFirst;
2922
2921
++_UDest;
2922
+ const auto _CarryShift = _Last._Myoff - _DestEnd._Myoff;
2923
+ const auto _CarryMask = _Vbase(-1) << _CarryShift;
2924
+ const auto _CarryVal = (*_UFirst & _CarryMask & _LastMask) >> _CarryShift;
2925
+
2926
+ const auto _DestEndMask = _Vbase(-1) << _DestEnd._Myoff;
2927
+ *_UDest = (*_UDest & _DestEndMask) | _CarryVal;
2928
+ return _DestEnd;
2923
2929
}
2924
2930
2925
- if (true
2931
+ // _First and _Dest have matching char alignment, so use memmove
2932
+ const auto _UnalignedFirstBits = _First._Myoff & _Vbase(7);
2933
+ const auto _UnalignedDestBits = _Dest._Myoff & _Vbase(7);
2934
+ if (_UnalignedFirstBits == _UnalignedDestBits) {
2935
+ const auto _UnalignedLastBits = _Last._Myoff & _Vbase(7);
2936
+
2937
+ // What is the Strict Aliasing Rule and Why do we care?
2938
+ char* _UFirst_ch = reinterpret_cast<char*>(_UFirst) + (_First._Myoff - _UnalignedFirstBits) / 8;
2939
+ char* _UDest_ch = reinterpret_cast<char*>(_UDest) + (_Dest._Myoff - _UnalignedFirstBits) / 8;
2940
+ char* _ULast_ch = reinterpret_cast<char*>(_ULast) + (_Last._Myoff - _UnalignedLastBits) / 8;
2941
+
2942
+ // Copy bits until the next char alignment
2943
+ if (_UnalignedFirstBits != 0) {
2944
+ const auto _SourceBitMask = static_cast<char>(UCHAR_MAX << _UnalignedFirstBits);
2945
+ const auto _DestBitMask = static_cast<char>(UCHAR_MAX >> (8 - _UnalignedFirstBits));
2946
+ *_UDest_ch = (*_UDest_ch & _DestBitMask) | (*_UFirst_ch & _SourceBitMask);
2947
+ ++_UFirst_ch;
2948
+ ++_UDest_ch;
2949
+ }
2950
+
2951
+ if (true
2926
2952
#ifdef __cpp_lib_is_constant_evaluated
2927
- && !_STD is_constant_evaluated()
2953
+ && !_STD is_constant_evaluated()
2928
2954
#endif // __cpp_lib_is_constant_evaluated
2929
- ) {
2930
- _UDest = _Copy_memmove(_UFirst, _ULast, _UDest);
2931
- } else {
2932
- for (; _UFirst != _ULast; ++_UDest, (void) ++_UFirst) {
2933
- *_UDest = *_UFirst;
2955
+ ) {
2956
+ _UDest_ch = _Copy_memmove(_UFirst_ch, _ULast_ch, _UDest_ch);
2957
+ } else {
2958
+ for (; _UFirst_ch != _ULast_ch; ++_UDest_ch, (void) ++_UFirst_ch) {
2959
+ *_UDest_ch = *_UFirst_ch;
2960
+ }
2934
2961
}
2935
- }
2936
2962
2937
- if (_Last._Myoff != 0) {
2938
- const auto _LastSourceMask = _Vbase(-1) >> (_VBITS - _Last._Myoff);
2939
- const auto _LastDestMask = _Vbase(-1) ^ _LastSourceMask;
2940
- *_UDest = (*_UDest & _LastDestMask) | (*_ULast & _LastSourceMask);
2963
+ // Copy remaining last bits, shifts needed as we are already in
2964
+ if (_UnalignedLastBits != 0) {
2965
+ const auto _SourceBitMask = static_cast<char>(UCHAR_MAX >> (8 - _UnalignedLastBits));
2966
+ const auto _DestBitMask = static_cast<char>(UCHAR_MAX << _UnalignedLastBits);
2967
+ *_UDest_ch = (*_UDest_ch & _DestBitMask) | (*_ULast_ch & _SourceBitMask);
2968
+ }
2969
+ return _DestEnd;
2941
2970
}
2942
2971
2943
- return _Dest + (_Last - _First);
2972
+ // Unaligned _UFirst and _ULast require a two step copy with carry
2973
+ const auto _FirstSourceVal = _IsRightShift ? (*_UFirst & _SourceMask) >> _SourceShift //
2974
+ : (*_UFirst & _SourceMask) << _SourceShift;
2975
+ *_UDest = (*_UDest & _DestMask) | _FirstSourceVal;
2976
+
2977
+ const auto _CarryShift = _VBITS - _SourceShift;
2978
+ if (_IsRightShift) {
2979
+ // Source : | | |
2980
+ // Dest : | | |
2981
+ // ^^^^^^^ ^^^^^^^^^^^^^^^^^ ^^^^^^^ ^^^^^^^^^^^^^^^^^
2982
+ // _CarryVal _SourceVal _CarryVal _SourceVal
2983
+ const auto _SourceCarryMask = _Vbase(-1) >> _CarryShift;
2984
+ const auto _DestCarryMask = _Vbase(-1) >> _SourceShift;
2985
+
2986
+ ++_UFirst;
2987
+ for (; _UFirst != _ULast; ++_UFirst) {
2988
+ const auto _CarryVal = (*_UFirst & _SourceCarryMask) << _CarryShift;
2989
+ *_UDest = (*_UDest & _DestCarryMask) | _CarryVal;
2990
+
2991
+ ++_UDest;
2992
+ const auto _SourceVal = (*_UFirst & _SourceMask) >> _SourceShift;
2993
+ *_UDest = _SourceVal;
2994
+ }
2995
+
2996
+ if (_Last._Myoff != 0) {
2997
+ const auto _LastSourceCarryMask = _SourceCarryMask & _LastMask;
2998
+ const auto _LastDestCarryMask = _DestCarryMask | (_Vbase(-1) << (_VBITS - _Last._Myoff));
2999
+ const auto _CarryVal = (*_UFirst & _LastSourceCarryMask) << _CarryShift;
3000
+ *_UDest = (*_UDest & _LastDestCarryMask) | _CarryVal;
3001
+ }
3002
+
3003
+ if (_Last._Myoff > _SourceShift) {
3004
+ const auto _LastSourceMask = _SourceMask & _LastMask;
3005
+ const auto _SourceVal = (*_UFirst & _LastSourceMask) >> _SourceShift;
3006
+ ++_UDest;
3007
+ *_UDest = (*_UDest & _LastDestMask) | _SourceVal;
3008
+ }
3009
+ } else {
3010
+ // Source : | | |
3011
+ // Dest : | | |
3012
+ // ^^^^^^^ ^^^^^^^^^^^^^^^^^ ^^^^^^^ ^^^^^^^^^^^^^^^^^
3013
+ // _CarryVal _SourceVal _CarryVal _SourceVal
3014
+ const auto _SourceCarryMask = _Vbase(-1) << _CarryShift;
3015
+
3016
+ auto _CarryVal = (*_UFirst & _SourceCarryMask) >> _CarryShift;
3017
+ ++_UFirst;
3018
+ ++_UDest;
3019
+ for (; _UFirst != _ULast; ++_UFirst, (void) ++_UDest) {
3020
+ const auto _SourceVal = *_UFirst << _SourceShift;
3021
+ *_UDest = _CarryVal | _SourceVal;
3022
+
3023
+ _CarryVal = (*_UFirst & _SourceCarryMask) >> _CarryShift;
3024
+ }
3025
+
3026
+ const auto _SourceVal = _Last._Myoff != 0 ? (*_UFirst & _LastMask) << _SourceShift : _Vbase(0);
3027
+ *_UDest = (*_UDest & _LastDestMask) | _CarryVal | _SourceVal;
3028
+ }
3029
+ return _DestEnd;
2944
3030
}
2945
3031
2946
3032
template <class _BidIt1, class _BidIt2>
@@ -2972,8 +3058,6 @@ _CONSTEXPR20 _BidIt2 _Copy_backward_vbool(_BidIt1 _First, _BidIt1 _Last, _BidIt2
2972
3058
const auto _LastSourceMask = _Vbase(-1) >> _LastShift;
2973
3059
const auto _LastDestMask = _Vbase(-1) ^ _LastSourceMask;
2974
3060
*_UDest = (*_UDest & _LastDestMask) | (*_ULast & _LastSourceMask);
2975
- --_ULast;
2976
- --_UDest;
2977
3061
}
2978
3062
2979
3063
if (true
@@ -2985,6 +3069,8 @@ _CONSTEXPR20 _BidIt2 _Copy_backward_vbool(_BidIt1 _First, _BidIt1 _Last, _BidIt2
2985
3069
_UDest = _Copy_backward_memmove(_UFirst + 1, _ULast, _UDest);
2986
3070
--_UDest;
2987
3071
} else {
3072
+ --_UDest;
3073
+ --_ULast;
2988
3074
for (; _UFirst != _ULast; --_UDest, (void) --_ULast) {
2989
3075
*_UDest = *_ULast;
2990
3076
}
0 commit comments