Skip to content

Commit e3ed206

Browse files
Improve search/find_end perf by dropping memcmp (#4654)
Co-authored-by: Stephan T. Lavavej <[email protected]>
1 parent a1f8a58 commit e3ed206

File tree

6 files changed

+197
-39
lines changed

6 files changed

+197
-39
lines changed

benchmarks/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ add_benchmark(path_lexically_normal src/path_lexically_normal.cpp)
119119
add_benchmark(priority_queue_push_range src/priority_queue_push_range.cpp)
120120
add_benchmark(random_integer_generation src/random_integer_generation.cpp)
121121
add_benchmark(replace src/replace.cpp)
122+
add_benchmark(search src/search.cpp)
122123
add_benchmark(std_copy src/std_copy.cpp)
123124
add_benchmark(swap_ranges src/swap_ranges.cpp)
124125

benchmarks/src/replace.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ const char src[] =
1010
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam mollis imperdiet massa, at dapibus elit interdum "
1111
"ac. In eget sollicitudin mi. Nam at tellus at sapien tincidunt sollicitudin vel non eros. Pellentesque nunc nunc, "
1212
"ullamcorper eu accumsan at, pulvinar non turpis. Quisque vel mauris pulvinar, pretium purus vel, ultricies erat. "
13-
"Curabitur a magna in ligula tristique ornare. Quisque commodo, massa viverra laoreet luctus, sem nisi aliquet "
13+
"Curabitur a magna in ligula tristique ornare. Quisque commodo, massa viverra laoreet luctus, sem nisi aliquam "
1414
"velit, fermentum pulvinar velit leo eget justo. Suspendisse vel erat efficitur, pulvinar eros volutpat, vulputate "
1515
"ex. Phasellus non purus vel velit tristique tristique id at ligula. Quisque mollis sodales magna. Mauris et quam "
1616
"eu quam viverra tempus. Nullam tempus maximus porta. Nunc mattis eleifend fermentum. Nullam aliquam libero "
@@ -34,7 +34,7 @@ const char src[] =
3434
"montes, nascetur ridiculus mus. Mauris varius dui sit amet tortor facilisis vestibulum. Curabitur condimentum "
3535
"justo nec orci mattis auctor. Quisque aliquet condimentum arcu ac sollicitudin. Maecenas elit elit, condimentum "
3636
"vitae auctor a, cursus et sem. Cras vehicula ante in consequat fermentum. Praesent at massa nisi. Mauris pretium "
37-
"euismod eros, ut posuere ligula ullamcorper id. Nullam aliquam malesuada est at dignissim. Pellentesque finibus "
37+
"euismod eros, ut posuere ligula ullamcorper id. Nullam aliquet malesuada est at dignissim. Pellentesque finibus "
3838
"sagittis libero nec bibendum. Phasellus dolor ipsum, finibus quis turpis quis, mollis interdum felis.";
3939

4040
template <class T>

benchmarks/src/search.cpp

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3+
4+
#include <algorithm>
5+
#include <benchmark/benchmark.h>
6+
#include <cstdint>
7+
#include <cstring>
8+
#include <functional>
9+
#include <string>
10+
#include <vector>
11+
12+
const char src_haystack[] =
13+
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam mollis imperdiet massa, at dapibus elit interdum "
14+
"ac. In eget sollicitudin mi. Nam at tellus at sapien tincidunt sollicitudin vel non eros. Pellentesque nunc nunc, "
15+
"ullamcorper eu accumsan at, pulvinar non turpis. Quisque vel mauris pulvinar, pretium purus vel, ultricies erat. "
16+
"Curabitur a magna in ligula tristique ornare. Quisque commodo, massa viverra laoreet luctus, sem nisi aliquam "
17+
"velit, fermentum pulvinar velit leo eget justo. Suspendisse vel erat efficitur, pulvinar eros volutpat, vulputate "
18+
"ex. Phasellus non purus vel velit tristique tristique id at ligula. Quisque mollis sodales magna. Mauris et quam "
19+
"eu quam viverra tempus. Nullam tempus maximus porta. Nunc mattis eleifend fermentum. Nullam aliquam libero "
20+
"accumsan velit elementum, eu laoreet metus convallis. Donec pellentesque lacus ut iaculis iaculis. Curabitur orci "
21+
"elit, bibendum sit amet feugiat at, iaculis sit amet massa. Maecenas imperdiet lacus at vehicula iaculis. Donec "
22+
"volutpat nunc sit amet accumsan tempor. Quisque pretium vestibulum ultricies. Suspendisse potenti. Aenean at diam "
23+
"iaculis, condimentum felis venenatis, condimentum erat. Nam quis elit dui. Duis quis odio vitae metus hendrerit "
24+
"rhoncus ut et magna. Cras ac augue quis nibh pharetra sagittis. Donec ullamcorper vel eros semper pretium. Proin "
25+
"vel sollicitudin eros. Nulla sollicitudin mattis turpis id suscipit. Aliquam sed risus velit. Aliquam iaculis nec "
26+
"nibh ac egestas. Duis finibus semper est sed consequat. Sed in sapien quis nibh dignissim mattis. Vestibulum nec "
27+
"metus sodales, euismod mauris ac, sollicitudin libero. Maecenas non arcu ac velit ullamcorper fringilla et quis "
28+
"nulla. Curabitur posuere leo eget ipsum tincidunt dignissim. Cras ultricies suscipit neque, quis suscipit tortor "
29+
"venenatis non. Cras nisl mi, bibendum in vulputate quis, vestibulum ornare enim. Nunc hendrerit placerat dui, "
30+
"aliquam mollis sem convallis et. Integer vitae urna diam. Phasellus et imperdiet est. Maecenas auctor facilisis "
31+
"nibh non commodo. Suspendisse iaculis quam id bibendum feugiat. Pellentesque felis erat, egestas a libero ac, "
32+
"laoreet consectetur elit. Cras ut suscipit ex. Etiam gravida sem quis ex porta, eu lacinia tortor fermentum. "
33+
"Nulla consequat odio enim, sed condimentum est sagittis a. Quisque nec commodo tellus. Phasellus elementum "
34+
"feugiat dolor et feugiat. Praesent sed mattis tortor. In vitae sodales purus. Morbi accumsan, ligula et interdum "
35+
"lacinia, leo risus suscipit urna, non luctus mi justo eu ipsum. Curabitur venenatis pretium orci id porttitor. "
36+
"Quisque dapibus nisl sit amet elit lobortis sagittis. Orci varius natoque penatibus et magnis dis parturient "
37+
"montes, nascetur ridiculus mus. Mauris varius dui sit amet tortor facilisis vestibulum. Curabitur condimentum "
38+
"justo nec orci mattis auctor. Quisque aliquet condimentum arcu ac sollicitudin. Maecenas elit elit, condimentum "
39+
"vitae auctor a, cursus et sem. Cras vehicula ante in consequat fermentum. Praesent at massa nisi. Mauris pretium "
40+
"euismod eros, ut posuere ligula ullamcorper id. Nullam aliquet malesuada est at dignissim. Pellentesque finibus "
41+
"sagittis libero nec bibendum. Phasellus dolor ipsum, finibus quis turpis quis, mollis interdum felis.";
42+
43+
const char src_needle[] = "aliquet";
44+
45+
void c_strstr(benchmark::State& state) {
46+
const std::string haystack(std::begin(src_haystack), std::end(src_haystack));
47+
const std::string needle(std::begin(src_needle), std::end(src_needle));
48+
49+
for (auto _ : state) {
50+
benchmark::DoNotOptimize(haystack);
51+
benchmark::DoNotOptimize(needle);
52+
auto res = std::strstr(haystack.c_str(), needle.c_str());
53+
benchmark::DoNotOptimize(res);
54+
}
55+
}
56+
57+
template <class T>
58+
void classic_search(benchmark::State& state) {
59+
const std::vector<T> haystack(std::begin(src_haystack), std::end(src_haystack));
60+
const std::vector<T> needle(std::begin(src_needle), std::end(src_needle));
61+
62+
for (auto _ : state) {
63+
benchmark::DoNotOptimize(haystack);
64+
benchmark::DoNotOptimize(needle);
65+
auto res = std::search(haystack.begin(), haystack.end(), needle.begin(), needle.end());
66+
benchmark::DoNotOptimize(res);
67+
}
68+
}
69+
70+
template <class T>
71+
void ranges_search(benchmark::State& state) {
72+
const std::vector<T> haystack(std::begin(src_haystack), std::end(src_haystack));
73+
const std::vector<T> needle(std::begin(src_needle), std::end(src_needle));
74+
75+
for (auto _ : state) {
76+
benchmark::DoNotOptimize(haystack);
77+
benchmark::DoNotOptimize(needle);
78+
auto res = std::ranges::search(haystack, needle);
79+
benchmark::DoNotOptimize(res);
80+
}
81+
}
82+
83+
template <class T>
84+
void search_default_searcher(benchmark::State& state) {
85+
const std::vector<T> haystack(std::begin(src_haystack), std::end(src_haystack));
86+
const std::vector<T> needle(std::begin(src_needle), std::end(src_needle));
87+
88+
for (auto _ : state) {
89+
benchmark::DoNotOptimize(haystack);
90+
benchmark::DoNotOptimize(needle);
91+
auto res = std::search(haystack.begin(), haystack.end(), std::default_searcher{needle.begin(), needle.end()});
92+
benchmark::DoNotOptimize(res);
93+
}
94+
}
95+
96+
BENCHMARK(c_strstr);
97+
98+
BENCHMARK(classic_search<std::uint8_t>);
99+
BENCHMARK(classic_search<std::uint16_t>);
100+
BENCHMARK(classic_search<std::uint32_t>);
101+
BENCHMARK(classic_search<std::uint64_t>);
102+
103+
BENCHMARK(ranges_search<std::uint8_t>);
104+
BENCHMARK(ranges_search<std::uint16_t>);
105+
BENCHMARK(ranges_search<std::uint32_t>);
106+
BENCHMARK(ranges_search<std::uint64_t>);
107+
108+
BENCHMARK(search_default_searcher<std::uint8_t>);
109+
BENCHMARK(search_default_searcher<std::uint16_t>);
110+
BENCHMARK(search_default_searcher<std::uint32_t>);
111+
BENCHMARK(search_default_searcher<std::uint64_t>);
112+
113+
114+
BENCHMARK_MAIN();

stl/inc/algorithm

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ _Ty* _Find_last_vectorized(_Ty* const _First, _Ty* const _Last, const _TVal _Val
200200
template <class _Ty1, class _Ty2>
201201
_Ty1* _Find_first_of_vectorized(
202202
_Ty1* const _First1, _Ty1* const _Last1, _Ty2* const _First2, _Ty2* const _Last2) noexcept {
203+
_STL_INTERNAL_STATIC_ASSERT(sizeof(_Ty1) == sizeof(_Ty2));
203204
if constexpr (sizeof(_Ty1) == 1) {
204205
return const_cast<_Ty1*>(
205206
static_cast<const _Ty1*>(::__std_find_first_of_trivial_1(_First1, _Last1, _First2, _Last2)));
@@ -2119,15 +2120,6 @@ namespace ranges {
21192120
template <class _InIt1, class _InIt2, class _Pr>
21202121
_NODISCARD _CONSTEXPR20 bool _Equal_rev_pred_unchecked(_InIt1 _First1, _InIt2 _First2, const _InIt2 _Last2, _Pr _Pred) {
21212122
// compare [_First1, ...) to [_First2, _Last2)
2122-
if constexpr (_Equal_memcmp_is_safe<_InIt1, _InIt2, _Pr>) {
2123-
#if _HAS_CXX20
2124-
if (!_STD is_constant_evaluated())
2125-
#endif // _HAS_CXX20
2126-
{
2127-
return _STD _Memcmp_ranges(_First2, _Last2, _First1) == 0;
2128-
}
2129-
}
2130-
21312123
for (; _First2 != _Last2; ++_First1, (void) ++_First2) {
21322124
if (!_Pred(*_First1, *_First2)) {
21332125
return false;

stl/inc/xutility

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5356,7 +5356,7 @@ constexpr bool _Equal_memcmp_is_safe_helper =
53565356

53575357
template <class _Iter1, class _Iter2, class _Pr>
53585358
constexpr bool _Equal_memcmp_is_safe =
5359-
_Equal_memcmp_is_safe_helper<remove_const_t<_Iter1>, remove_const_t<_Iter2>, _Pr>;
5359+
_Equal_memcmp_is_safe_helper<remove_const_t<_Iter1>, remove_const_t<_Iter2>, remove_const_t<_Pr>>;
53605360

53615361
template <class _CtgIt1, class _CtgIt2>
53625362
_NODISCARD int _Memcmp_ranges(_CtgIt1 _First1, _CtgIt1 _Last1, _CtgIt2 _First2) {
@@ -6648,35 +6648,12 @@ namespace ranges {
66486648

66496649
_EXPORT_STD inline constexpr _Adjacent_find_fn adjacent_find;
66506650

6651-
template <class _It1, class _It2, class _Se2, class _Pr, class _Pj1, class _Pj2>
6652-
concept _Equal_rev_pred_can_memcmp = is_same_v<_Pj1, identity> && is_same_v<_Pj2, identity>
6653-
&& sized_sentinel_for<_Se2, _It2> && _Equal_memcmp_is_safe<_It1, _It2, _Pr>;
6654-
66556651
template <forward_iterator _It1, input_iterator _It2, sentinel_for<_It2> _Se2, class _Pr, class _Pj1, class _Pj2>
66566652
requires indirectly_comparable<_It1, _It2, _Pr, _Pj1, _Pj2>
66576653
_NODISCARD constexpr pair<bool, _It1> _Equal_rev_pred(
66586654
_It1 _First1, _It2 _First2, const _Se2 _Last2, _Pr _Pred, _Pj1 _Proj1, _Pj2 _Proj2) {
66596655
// Returns {true, _First1 + (_Last2 - _First2)} if [_First1, ...) equals [_First2, _Last2), and {false, {}}
66606656
// otherwise.
6661-
constexpr bool _Optimize = _Equal_rev_pred_can_memcmp<_It1, _It2, _Se2, _Pr, _Pj1, _Pj2>;
6662-
if constexpr (_Optimize) {
6663-
if (!_STD is_constant_evaluated()) {
6664-
bool _Ans;
6665-
if constexpr (same_as<_It2, _Se2>) {
6666-
_Ans = _STD _Memcmp_ranges(_First2, _Last2, _First1) == 0;
6667-
} else {
6668-
_Ans = _STD _Memcmp_count(_First1, _First2, static_cast<size_t>(_Last2 - _First2)) == 0;
6669-
}
6670-
6671-
if (_Ans) {
6672-
_First1 += (_Last2 - _First2);
6673-
return {true, _STD move(_First1)};
6674-
} else {
6675-
return {false, _It1 {}};
6676-
}
6677-
}
6678-
}
6679-
66806657
for (; _First2 != _Last2; ++_First1, (void) ++_First2) {
66816658
if (!_STD invoke(_Pred, _STD invoke(_Proj1, *_First1), _STD invoke(_Proj2, *_First2))) {
66826659
return {false, _It1 {}};

tests/std/tests/VSO_0000000_vector_algorithms/test.cpp

Lines changed: 78 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,19 @@ auto last_known_good_find_first_of(FwdItH h_first, FwdItH h_last, FwdItN n_first
170170
return h_first;
171171
}
172172

173+
template <class RanItH, class RanItN>
174+
auto last_known_good_search(RanItH h_first, RanItH h_last, RanItN n_first, RanItN n_last) {
175+
const auto n_len = n_last - n_first;
176+
177+
for (; h_last - h_first >= n_len; ++h_first) {
178+
if (equal(h_first, h_first + n_len, n_first, n_last)) {
179+
return h_first;
180+
}
181+
}
182+
183+
return h_last;
184+
}
185+
173186
template <class T>
174187
void test_case_find(const vector<T>& input, T v) {
175188
auto expected = last_known_good_find(input.begin(), input.end(), v);
@@ -275,12 +288,13 @@ void test_case_find_first_of(const vector<T>& input_haystack, const vector<T>& i
275288

276289
template <class T>
277290
void test_find_first_of(mt19937_64& gen) {
278-
constexpr size_t needleDataCount = 50;
279-
using TD = conditional_t<sizeof(T) == 1, int, T>;
291+
constexpr size_t haystackDataCount = 200;
292+
constexpr size_t needleDataCount = 35;
293+
using TD = conditional_t<sizeof(T) == 1, int, T>;
280294
uniform_int_distribution<TD> dis('a', 'z');
281295
vector<T> input_haystack;
282296
vector<T> input_needle;
283-
input_haystack.reserve(dataCount);
297+
input_haystack.reserve(haystackDataCount);
284298
input_needle.reserve(needleDataCount);
285299

286300
for (;;) {
@@ -292,7 +306,7 @@ void test_find_first_of(mt19937_64& gen) {
292306
test_case_find_first_of(input_haystack, input_needle);
293307
}
294308

295-
if (input_haystack.size() == dataCount) {
309+
if (input_haystack.size() == haystackDataCount) {
296310
break;
297311
}
298312

@@ -312,6 +326,56 @@ void test_find_first_of_containers() {
312326
#endif // _HAS_CXX20
313327
}
314328

329+
template <class T>
330+
void test_case_search(const vector<T>& input_haystack, const vector<T>& input_needle) {
331+
auto expected =
332+
last_known_good_search(input_haystack.begin(), input_haystack.end(), input_needle.begin(), input_needle.end());
333+
auto actual = search(input_haystack.begin(), input_haystack.end(), input_needle.begin(), input_needle.end());
334+
assert(expected == actual);
335+
#if _HAS_CXX17
336+
auto searcher_actual = search(
337+
input_haystack.begin(), input_haystack.end(), default_searcher{input_needle.begin(), input_needle.end()});
338+
assert(expected == searcher_actual);
339+
#endif // _HAS_CXX17
340+
#if _HAS_CXX20
341+
auto ranges_actual = ranges::search(input_haystack, input_needle);
342+
assert(expected == begin(ranges_actual));
343+
if (expected != input_haystack.end()) {
344+
assert(expected + static_cast<ptrdiff_t>(input_needle.size()) == end(ranges_actual));
345+
} else {
346+
assert(expected == end(ranges_actual));
347+
}
348+
#endif // _HAS_CXX20
349+
}
350+
351+
template <class T>
352+
void test_search(mt19937_64& gen) {
353+
constexpr size_t haystackDataCount = 200;
354+
constexpr size_t needleDataCount = 35;
355+
using TD = conditional_t<sizeof(T) == 1, int, T>;
356+
uniform_int_distribution<TD> dis('0', '9');
357+
vector<T> input_haystack;
358+
vector<T> input_needle;
359+
input_haystack.reserve(haystackDataCount);
360+
input_needle.reserve(needleDataCount);
361+
362+
for (;;) {
363+
input_needle.clear();
364+
365+
test_case_search(input_haystack, input_needle);
366+
for (size_t attempts = 0; attempts < needleDataCount; ++attempts) {
367+
input_needle.push_back(static_cast<T>(dis(gen)));
368+
test_case_search(input_haystack, input_needle);
369+
}
370+
371+
if (input_haystack.size() == haystackDataCount) {
372+
break;
373+
}
374+
375+
input_haystack.push_back(static_cast<T>(dis(gen)));
376+
}
377+
}
378+
315379
template <class T>
316380
void test_min_max_element(mt19937_64& gen) {
317381
using Limits = numeric_limits<T>;
@@ -817,6 +881,16 @@ void test_vector_algorithms(mt19937_64& gen) {
817881
test_find_first_of_containers<const vector<wchar_t>, vector<wchar_t>>();
818882
test_find_first_of_containers<vector<char>, vector<int>>();
819883

884+
test_search<char>(gen);
885+
test_search<signed char>(gen);
886+
test_search<unsigned char>(gen);
887+
test_search<short>(gen);
888+
test_search<unsigned short>(gen);
889+
test_search<int>(gen);
890+
test_search<unsigned int>(gen);
891+
test_search<long long>(gen);
892+
test_search<unsigned long long>(gen);
893+
820894
test_min_max_element<char>(gen);
821895
test_min_max_element<signed char>(gen);
822896
test_min_max_element<unsigned char>(gen);

0 commit comments

Comments
 (0)