@@ -2893,54 +2893,151 @@ template <class _InIt, class _OutIt>
2893
2893
_CONSTEXPR20 _OutIt _Copy_vbool(_InIt _First, _InIt _Last, _OutIt _Dest) {
2894
2894
// copy [_First, _Last) to [_Dest, ...)
2895
2895
2896
- // Slow path as _First and _Dest are not aligned
2897
- if (_First._Myoff != _Dest._Myoff) {
2898
- for (; _First != _Last; ++_Dest, (void) ++_First) {
2899
- *_Dest = *_First;
2900
- }
2901
-
2902
- return _Dest;
2903
- }
2904
-
2905
- _Vbase* _UFirst = const_cast<_Vbase*>(_First._Myptr);
2906
- _Vbase* _UDest = const_cast<_Vbase*>(_Dest._Myptr);
2907
- _Vbase* _ULast = const_cast<_Vbase*>(_Last._Myptr);
2896
+ const auto _DestEnd = _Dest + (_Last - _First);
2897
+ _Vbase* _UFirst = const_cast<_Vbase*>(_First._Myptr);
2898
+ _Vbase* _UDest = const_cast<_Vbase*>(_Dest._Myptr);
2899
+ _Vbase* _ULast = const_cast<_Vbase*>(_Last._Myptr);
2908
2900
2909
2901
// Fast path for less than _VBITS
2902
+ // Always needed in case we only copy a range within one char
2903
+ const bool _IsRightShift = _Dest._Myoff < _First._Myoff;
2904
+ const auto _SourceShift = _IsRightShift ? _First._Myoff - _Dest._Myoff : _Dest._Myoff - _First._Myoff;
2910
2905
if (_UFirst == _ULast) {
2911
- const auto _SourceMask = (_Vbase(-1) << _First._Myoff) & (_Vbase(-1) >> (_VBITS - _Last._Myoff));
2912
- const auto _DestMask = _Vbase(-1) ^ _SourceMask;
2913
- *_UDest = (*_UDest & _DestMask) | (*_UFirst & _SourceMask);
2914
- return _Dest + (_Last - _First);
2915
- }
2906
+ const bool _NeedsCarry = _DestEnd._Myptr != _Dest._Myptr;
2907
+ const auto _SourceMask = (_Vbase(-1) << _First._Myoff) & (_Vbase(-1) >> _VBITS - _Last._Myoff);
2908
+ const auto _SourceVal = _IsRightShift ? (*_UFirst & _SourceMask) >> _SourceShift //
2909
+ : (*_UFirst & _SourceMask) << _SourceShift;
2910
+
2911
+ const auto _DestMask = (_Vbase(-1) << _Dest._Myoff);
2912
+ *_UDest = (*_UDest & _DestMask) | _SourceVal;
2913
+
2914
+ if (!_NeedsCarry) {
2915
+ return _DestEnd;
2916
+ }
2916
2917
2917
- if (_First._Myoff != 0) {
2918
- const auto _FirstSourceMask = _Vbase(-1) << _First._Myoff;
2919
- const auto _FirstDestMask = _Vbase(-1) ^ _FirstSourceMask;
2920
- *_UDest = (*_UDest & _FirstDestMask) | (*_UFirst & _FirstSourceMask);
2921
- ++_UFirst;
2922
2918
++_UDest;
2919
+ const auto _CarryMask = (_Vbase(-1) << _Last._Myoff - _DestEnd._Myptr) & (_Vbase(-1) >> _VBITS - _Last._Myoff);
2920
+ const auto _CarryVal = (*_UFirst & _CarryMask) >> _Last._Myoff - _DestEnd._Myptr;
2921
+
2922
+ const auto _DestCarryMask = _Vbase(-1) << _DestEnd._Myptr;
2923
+ *_UDest = (*_UDest & _DestCarryMask) | _CarryVal;
2924
+ return _DestEnd;
2923
2925
}
2924
2926
2925
- if (true
2927
+ // _First and _Dest have matching char alignment, so use memmove
2928
+ if (_First._Myoff & _Vbase(7) == _Dest._Myoff & _Vbase(7)) {
2929
+ const auto _UnalignedFirstBits = _First._Myoff & _Vbase(7);
2930
+ const auto _UnalignedLastBits = _Last._Myoff & _Vbase(7);
2931
+
2932
+ // What is the Strict Aliasing Rule and Why do we care?
2933
+ char* _UFirst_ch = reinterpret_cast<char*>(_UFirst) + (_First._Myoff - _UnalignedFirstBits) / sizeof(_Vbase);
2934
+ char* _UDest_ch = reinterpret_cast<char*>(_UDest) + (_Dest._Myoff - _UnalignedFirstBits) / sizeof(_Vbase);
2935
+ char* _ULast_ch = reinterpret_cast<char*>(_ULast) + (_Last._Myoff - _UnalignedLastBits) / sizeof(_Vbase);
2936
+
2937
+ // Copy bits until the next char alignment
2938
+ if (_UnalignedFirstBits != 0) {
2939
+ const auto _SourceMask = static_cast<char>(UCHAR_MAX << _UnalignedFirstBits);
2940
+ const auto _DestMask = static_cast<char>(UCHAR_MAX >> 8 - _UnalignedFirstBits);
2941
+ *_UDest_ch = (*_UDest_ch & _DestMask) | (*_UFirst_ch & _SourceMask);
2942
+ ++_UFirst_ch;
2943
+ ++_UDest_ch;
2944
+ }
2945
+
2946
+ if (true
2926
2947
#ifdef __cpp_lib_is_constant_evaluated
2927
- && !_STD is_constant_evaluated()
2948
+ && !_STD is_constant_evaluated()
2928
2949
#endif // __cpp_lib_is_constant_evaluated
2929
- ) {
2930
- _UDest = _Copy_memmove(_UFirst, _ULast, _UDest);
2931
- } else {
2932
- for (; _UFirst != _ULast; ++_UDest, (void) ++_UFirst) {
2933
- *_UDest = *_UFirst;
2950
+ ) {
2951
+ _UDest_ch = _Copy_memmove(_UFirst_ch, _ULast_ch, _UDest_ch);
2952
+ } else {
2953
+ for (; _UFirst_ch != _ULast_ch; ++_UDest_ch, (void) ++_UFirst_ch) {
2954
+ *_UDest_ch = *_UFirst_ch;
2955
+ }
2934
2956
}
2935
- }
2936
2957
2937
- if (_Last._Myoff != 0) {
2938
- const auto _LastSourceMask = _Vbase(-1) >> (_VBITS - _Last._Myoff);
2939
- const auto _LastDestMask = _Vbase(-1) ^ _LastSourceMask;
2940
- *_UDest = (*_UDest & _LastDestMask) | (*_ULast & _LastSourceMask);
2958
+ // Copy remaining last bits, shifts needed as we are already in
2959
+ if (_UnalignedLastBits != 0) {
2960
+ const auto _SourceMask = static_cast<char>(UCHAR_MAX >> 8 - _UnalignedLastBits);
2961
+ const auto _DestMask = static_cast<char>(UCHAR_MAX << _UnalignedLastBits);
2962
+ *_UDest_ch = (*_UDest_ch & _DestMask) | (*_UFirst_ch & _SourceMask);
2963
+ }
2964
+ return _DestEnd;
2941
2965
}
2942
2966
2943
- return _Dest + (_Last - _First);
2967
+ // Unaligned _UFirst and _ULast require a two step copy with carry
2968
+ const auto _FirstSourceMask = _Vbase(-1) << _First._Myoff;
2969
+ const auto _FirstSourceVal = _IsRightShift ? (*_UFirst & _FirstSourceMask) >> _SourceShift //
2970
+ : (*_UFirst & _FirstSourceMask) << _SourceShift;
2971
+
2972
+ const auto _FirstDestMask = _IsRightShift ? (_Vbase(-1) << _Dest._Myoff) | (_Vbase(-1) << _VBITS - _SourceShift) //
2973
+ : (_Vbase(-1) << _Dest._Myoff);
2974
+ *_UDest = (*_UDest & _FirstDestMask) | _FirstSourceVal;
2975
+
2976
+ if (_IsRightShift) {
2977
+ // Source : | | |
2978
+ // Dest : | | |
2979
+ // ^^^^^^^ ^^^^^^^^^^^^^^^^^ ^^^^^^^ ^^^^^^^^^^^^^^^^^
2980
+ // _CarryVal _SourceVal _CarryVal _SourceVal
2981
+ const auto _SourceCarryMask = _Vbase(-1) >> _VBITS - _SourceShift;
2982
+ const auto _DestCarryMask = _Vbase(-1) << _VBITS - _SourceShift;
2983
+ const auto _SourceMask = _Vbase(-1) << _SourceShift;
2984
+ const auto _DestMask = _Vbase(-1) >> _SourceShift;
2985
+
2986
+ ++_UFirst;
2987
+ for (; _UFirst != _ULast; ++_UFirst) {
2988
+ const auto _CarryVal = (*_UFirst & _SourceCarryMask) << _VBITS - _SourceShift;
2989
+ *_UDest = (*_UDest & _DestCarryMask) | _CarryVal;
2990
+
2991
+ ++_UDest;
2992
+ const auto _SourceVal = (*_UFirst & _SourceMask) >> _SourceShift;
2993
+ *_UDest = (*_UDest & _DestMask) | _SourceVal;
2994
+ }
2995
+
2996
+ // Copy remainder of _Carry from _ULast
2997
+ if (_Last._Myoff != 0) {
2998
+ const auto _LastSourceCarryMask = _SourceCarryMask & (_Vbase(-1) >> _Last._Myoff);
2999
+ const auto _LastDestCarryMask = _Vbase(-1) << _VBITS - (_STD min)(_SourceShift, _Last._Myoff);
3000
+ const auto _CarryVal = (*_UFirst & _LastSourceCarryMask) << _VBITS - _SourceShift;
3001
+ *_UDest = (*_UDest & _LastDestCarryMask) | _CarryVal;
3002
+ }
3003
+
3004
+ // Copy last bits from _ULast only if it was not already done in _Carry as we need to increment _UDest
3005
+ if (_Last._Myoff > _SourceShift) {
3006
+ const auto _LastSourceMask = _SourceMask & (_Vbase(-1) >> _VBITS - _Last._Myoff);
3007
+ const auto _LastDestMask = _Vbase(-1) >> _Last._Myoff - _SourceShift;
3008
+ const auto _SourceVal = (*_UFirst & _LastSourceMask) >> _SourceShift;
3009
+ ++_UDest;
3010
+ *_UDest = (*_UDest & _LastDestMask) | _SourceVal;
3011
+ }
3012
+ } else {
3013
+ // Source : | | |
3014
+ // Dest : | | |
3015
+ // ^^^^^^^ ^^^^^^^^^^^^^^^^^ ^^^^^^^ ^^^^^^^^^^^^^^^^^
3016
+ // _CarryVal _SourceVal _CarryVal _SourceVal
3017
+ const auto _CarryMask = _Vbase(-1) << _VBITS - _SourceShift;
3018
+
3019
+ auto _CarryVal = (*_UFirst & _CarryMask) >> _VBITS - _SourceShift;
3020
+ ++_First;
3021
+ ++_UDest;
3022
+ for (; _UFirst != _ULast; ++_UFirst, (void) ++_UDest) {
3023
+ const auto _SourceVal = *_UFirst << _SourceShift;
3024
+ *_UDest = _CarryVal | _SourceVal;
3025
+
3026
+ _CarryVal = (*_UFirst & _CarryMask) >> _VBITS - _SourceShift;
3027
+ }
3028
+
3029
+ // Copy remainder of _Carry from _ULast
3030
+ if (_Last._Myoff != 0) {
3031
+ const auto _LastSourceMask = _Vbase(-1) >> _VBITS - _Last._Myoff;
3032
+ const auto _LastDestMask = _Vbase(-1) >> _Last._Myoff + _SourceShift;
3033
+ const auto _SourceVal = (*_UFirst & _LastSourceMask) << _SourceShift;
3034
+ *_UDest = (*_UDest & _LastDestMask) | _CarryVal | _SourceVal;
3035
+ } else {
3036
+ const auto _LastDestMask = _Vbase(-1) >> _SourceShift;
3037
+ *_UDest = (*_UDest & _LastDestMask) | _CarryVal;
3038
+ }
3039
+ }
3040
+ return _DestEnd;
2944
3041
}
2945
3042
2946
3043
template <class _BidIt1, class _BidIt2>
@@ -3114,8 +3211,8 @@ _NODISCARD _CONSTEXPR20 _InIt _Find_vbool(_InIt _First, const _InIt _Last, const
3114
3211
if (_UFirst == _ULast) {
3115
3212
const auto _Mask = (_Vbase(-1) << _First._Myoff) & (_Vbase(-1) >> (_VBITS - _Last._Myoff));
3116
3213
const auto _SelectVal = _Val ? *_UFirst : static_cast<_Vbase>(~*_UFirst);
3117
- const auto _Count = _Countr_zero(_SelectVal & _Mask) - _First._Myoff ;
3118
- return _First + _Count;
3214
+ const auto _Count = _Countr_zero(_SelectVal & _Mask);
3215
+ return _First + ( _Count - _First._Myoff) ;
3119
3216
}
3120
3217
3121
3218
_Iter_diff_t<_InIt> _TotalCount = 0;
@@ -3142,7 +3239,7 @@ _NODISCARD _CONSTEXPR20 _InIt _Find_vbool(_InIt _First, const _InIt _Last, const
3142
3239
const auto _LastMask = _Vbase(-1) >> (_VBITS - _Last._Myoff);
3143
3240
const auto _SelectVal = _Val ? *_UFirst : static_cast<_Vbase>(~*_UFirst);
3144
3241
const auto _Count = _Countr_zero(_SelectVal & _LastMask);
3145
- if (_Count != _VBITS ) {
3242
+ if (_Count < _Last._Myoff ) {
3146
3243
return _First + (_TotalCount + _Count);
3147
3244
}
3148
3245
}
0 commit comments