From 72c91d195999ca270e7353a17e1ab470e50ca55f Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Sun, 24 Jul 2022 22:45:47 +0800 Subject: [PATCH 1/9] optimize ColumnString Append --- clickhouse/columns/string.cpp | 64 ++++++--------------------- clickhouse/columns/string.h | 82 +++++++++++++++++++++++++++++++++-- ut/columns_ut.cpp | 12 +++++ 3 files changed, 104 insertions(+), 54 deletions(-) diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index cfd4c061..01ef25dc 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -4,7 +4,6 @@ #include "../base/wire_format.h" namespace { -const size_t DEFAULT_BLOCK_SIZE = 4096; template size_t ComputeTotalSize(const Container & strings, size_t begin = 0, size_t len = -1) @@ -120,47 +119,6 @@ ItemView ColumnFixedString::GetItem(size_t index) const { return ItemView{Type::FixedString, this->At(index)}; } -struct ColumnString::Block -{ - using CharT = typename std::string::value_type; - - explicit Block(size_t starting_capacity) - : size(0), - capacity(starting_capacity), - data_(new CharT[capacity]) - {} - - inline auto GetAvailable() const - { - return capacity - size; - } - - std::string_view AppendUnsafe(std::string_view str) - { - const auto pos = &data_[size]; - - memcpy(pos, str.data(), str.size()); - size += str.size(); - - return std::string_view(pos, str.size()); - } - - auto GetCurrentWritePos() - { - return &data_[size]; - } - - std::string_view ConsumeTailAsStringViewUnsafe(size_t len) - { - const auto start = &data_[size]; - size += len; - return std::string_view(start, len); - } - - size_t size; - const size_t capacity; - std::unique_ptr data_; -}; ColumnString::ColumnString() : Column(Type::CreateString()) @@ -179,18 +137,21 @@ ColumnString::ColumnString(const std::vector & data) } } -ColumnString::~ColumnString() -{} +ColumnString::ColumnString(std::vector&& data) + : Column(Type::CreateString()) +{ + items_.reserve(data.size()); -void ColumnString::Append(std::string_view str) { - if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length()) - { - blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, str.size())); + for (auto&& d : data) { + append_data_.emplace_back(std::move(d)); + auto& last_data = append_data_.back(); + items_.emplace_back(std::string_view{ last_data.data(),last_data.length() }); } - - items_.emplace_back(blocks_.back().AppendUnsafe(str)); } +ColumnString::~ColumnString() +{} + void ColumnString::AppendUnsafe(std::string_view str) { items_.emplace_back(blocks_.back().AppendUnsafe(str)); @@ -199,6 +160,8 @@ void ColumnString::AppendUnsafe(std::string_view str) void ColumnString::Clear() { items_.clear(); blocks_.clear(); + append_data_.clear(); + append_data_.shrink_to_fit(); } std::string_view ColumnString::At(size_t n) const { @@ -283,6 +246,7 @@ void ColumnString::Swap(Column& other) { auto & col = dynamic_cast(other); items_.swap(col.items_); blocks_.swap(col.blocks_); + append_data_.swap(col.append_data_); } ItemView ColumnString::GetItem(size_t index) const { diff --git a/clickhouse/columns/string.h b/clickhouse/columns/string.h index d6defe50..cb7638e9 100644 --- a/clickhouse/columns/string.h +++ b/clickhouse/columns/string.h @@ -6,9 +6,17 @@ #include #include #include +#include +#include +#include namespace clickhouse { +constexpr size_t DEFAULT_BLOCK_SIZE = 4096; + +template +inline constexpr bool always_false_v = false; + /** * Represents column of fixed-length strings. */ @@ -78,12 +86,10 @@ class ColumnString : public Column { ~ColumnString(); explicit ColumnString(const std::vector & data); + explicit ColumnString(std::vector&& data); ColumnString& operator=(const ColumnString&) = delete; ColumnString(const ColumnString&) = delete; - /// Appends one element to the column. - void Append(std::string_view str); - /// Returns element at given row number. std::string_view At(size_t n) const; @@ -116,10 +122,78 @@ class ColumnString : public Column { void AppendUnsafe(std::string_view); private: - struct Block; + struct Block { + using CharT = typename std::string::value_type; + + explicit Block(size_t starting_capacity) + : size(0), + capacity(starting_capacity), + data_(new CharT[capacity]) + {} + + inline auto GetAvailable() const + { + return capacity - size; + } + + std::string_view AppendUnsafe(std::string_view str) + { + const auto pos = &data_[size]; + + memcpy(pos, str.data(), str.size()); + size += str.size(); + + return std::string_view(pos, str.size()); + } + + auto GetCurrentWritePos() + { + return &data_[size]; + } + + std::string_view ConsumeTailAsStringViewUnsafe(size_t len) + { + const auto start = &data_[size]; + size += len; + return std::string_view(start, len); + } + + size_t size; + const size_t capacity; + std::unique_ptr data_; + }; std::vector items_; std::vector blocks_; + std::deque append_data_; + +public: + /// Appends one element to the column. Copy or move str + template + void Append(StringType&& str) { + using str_type = decltype(str); + if (std::is_same_v> && std::is_rvalue_reference_v) { + append_data_.emplace_back(std::move(str)); + auto& last_data = append_data_.back(); + items_.emplace_back(std::string_view{ last_data.data(),last_data.length() }); + } + else if constexpr (std::is_convertible_v, std::string_view>) { + auto data_view = std::string_view(str); + if (blocks_.size() == 0 || blocks_.back().GetAvailable() < data_view.length()) { + blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, data_view.size())); + } + items_.emplace_back(blocks_.back().AppendUnsafe(data_view)); + } + else { + static_assert(always_false_v, "the StringType is not correct"); + } + } + + /// Appends one element to the column. + /// If str lifetime is managed elsewhere and guaranteed to outlive the Block sent to the server + void AppendNoManagedLifetime(std::string_view str) { + items_.emplace_back(str); + } }; } diff --git a/ut/columns_ut.cpp b/ut/columns_ut.cpp index 9a806e39..16e6aa43 100644 --- a/ut/columns_ut.cpp +++ b/ut/columns_ut.cpp @@ -111,6 +111,18 @@ TEST(ColumnsCase, StringInit) { ASSERT_EQ(col->At(3), "abcd"); } +TEST(ColumnsCase, StringAppend) { + auto col = std::make_shared(); + std::string data = "ufiudhf3493fyiudferyer3yrifhdflkdjfeuroe"; + col->Append(data); + col->Append(std::move(data)); + col->Append("11"); + + ASSERT_EQ(col->Size(), 3u); + ASSERT_EQ(col->At(0), "ufiudhf3493fyiudferyer3yrifhdflkdjfeuroe"); + ASSERT_EQ(col->At(1), "ufiudhf3493fyiudferyer3yrifhdflkdjfeuroe"); + ASSERT_EQ(col->At(2), "11"); +} TEST(ColumnsCase, TupleAppend){ auto tuple1 = std::make_shared(std::vector({ From 6c58f13e9ad086bbd0332c7fe659e122663e4315 Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Mon, 25 Jul 2022 21:06:42 +0800 Subject: [PATCH 2/9] adjust code --- clickhouse/columns/string.cpp | 62 +++++++++++++++++++++++++++ clickhouse/columns/string.h | 79 +++++------------------------------ ut/columns_ut.cpp | 7 ++-- 3 files changed, 77 insertions(+), 71 deletions(-) diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index 01ef25dc..9607f16c 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -119,6 +119,47 @@ ItemView ColumnFixedString::GetItem(size_t index) const { return ItemView{Type::FixedString, this->At(index)}; } +struct ColumnString::Block +{ + using CharT = typename std::string::value_type; + + explicit Block(size_t starting_capacity) + : size(0), + capacity(starting_capacity), + data_(new CharT[capacity]) + {} + + inline auto GetAvailable() const + { + return capacity - size; + } + + std::string_view AppendUnsafe(std::string_view str) + { + const auto pos = &data_[size]; + + memcpy(pos, str.data(), str.size()); + size += str.size(); + + return std::string_view(pos, str.size()); + } + + auto GetCurrentWritePos() + { + return &data_[size]; + } + + std::string_view ConsumeTailAsStringViewUnsafe(size_t len) + { + const auto start = &data_[size]; + size += len; + return std::string_view(start, len); + } + + size_t size; + const size_t capacity; + std::unique_ptr data_; +}; ColumnString::ColumnString() : Column(Type::CreateString()) @@ -152,6 +193,27 @@ ColumnString::ColumnString(std::vector&& data) ColumnString::~ColumnString() {} +void ColumnString::Append(const std::string_view& str) { + if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length()) + { + blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, str.size())); + } + + items_.emplace_back(blocks_.back().AppendUnsafe(str)); +} + +void ColumnString::Append(std::string&& steal_value) +{ + append_data_.emplace_back(std::move(steal_value)); + auto& last_data = append_data_.back(); + items_.emplace_back(std::string_view{ last_data.data(),last_data.length() }); +} + +void ColumnString::AppendNoManagedLifetime(std::string_view str) +{ + items_.emplace_back(str); +} + void ColumnString::AppendUnsafe(std::string_view str) { items_.emplace_back(blocks_.back().AppendUnsafe(str)); diff --git a/clickhouse/columns/string.h b/clickhouse/columns/string.h index cb7638e9..36cb9caf 100644 --- a/clickhouse/columns/string.h +++ b/clickhouse/columns/string.h @@ -90,6 +90,16 @@ class ColumnString : public Column { ColumnString& operator=(const ColumnString&) = delete; ColumnString(const ColumnString&) = delete; + /// Appends one element to the column. + void Append(const std::string_view& str); + + /// Appends one element to the column. + void Append(std::string&& steal_value); + + /// Appends one element to the column. + /// If str lifetime is managed elsewhere and guaranteed to outlive the Block sent to the server + void AppendNoManagedLifetime(std::string_view str); + /// Returns element at given row number. std::string_view At(size_t n) const; @@ -122,78 +132,11 @@ class ColumnString : public Column { void AppendUnsafe(std::string_view); private: - struct Block { - using CharT = typename std::string::value_type; - - explicit Block(size_t starting_capacity) - : size(0), - capacity(starting_capacity), - data_(new CharT[capacity]) - {} - - inline auto GetAvailable() const - { - return capacity - size; - } - - std::string_view AppendUnsafe(std::string_view str) - { - const auto pos = &data_[size]; - - memcpy(pos, str.data(), str.size()); - size += str.size(); - - return std::string_view(pos, str.size()); - } - - auto GetCurrentWritePos() - { - return &data_[size]; - } - - std::string_view ConsumeTailAsStringViewUnsafe(size_t len) - { - const auto start = &data_[size]; - size += len; - return std::string_view(start, len); - } - - size_t size; - const size_t capacity; - std::unique_ptr data_; - }; + struct Block; std::vector items_; std::vector blocks_; std::deque append_data_; - -public: - /// Appends one element to the column. Copy or move str - template - void Append(StringType&& str) { - using str_type = decltype(str); - if (std::is_same_v> && std::is_rvalue_reference_v) { - append_data_.emplace_back(std::move(str)); - auto& last_data = append_data_.back(); - items_.emplace_back(std::string_view{ last_data.data(),last_data.length() }); - } - else if constexpr (std::is_convertible_v, std::string_view>) { - auto data_view = std::string_view(str); - if (blocks_.size() == 0 || blocks_.back().GetAvailable() < data_view.length()) { - blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, data_view.size())); - } - items_.emplace_back(blocks_.back().AppendUnsafe(data_view)); - } - else { - static_assert(always_false_v, "the StringType is not correct"); - } - } - - /// Appends one element to the column. - /// If str lifetime is managed elsewhere and guaranteed to outlive the Block sent to the server - void AppendNoManagedLifetime(std::string_view str) { - items_.emplace_back(str); - } }; } diff --git a/ut/columns_ut.cpp b/ut/columns_ut.cpp index 16e6aa43..3d045ba2 100644 --- a/ut/columns_ut.cpp +++ b/ut/columns_ut.cpp @@ -113,14 +113,15 @@ TEST(ColumnsCase, StringInit) { TEST(ColumnsCase, StringAppend) { auto col = std::make_shared(); - std::string data = "ufiudhf3493fyiudferyer3yrifhdflkdjfeuroe"; + const char* expected = "ufiudhf3493fyiudferyer3yrifhdflkdjfeuroe"; + std::string data(expected); col->Append(data); col->Append(std::move(data)); col->Append("11"); ASSERT_EQ(col->Size(), 3u); - ASSERT_EQ(col->At(0), "ufiudhf3493fyiudferyer3yrifhdflkdjfeuroe"); - ASSERT_EQ(col->At(1), "ufiudhf3493fyiudferyer3yrifhdflkdjfeuroe"); + ASSERT_EQ(col->At(0), expected); + ASSERT_EQ(col->At(1), expected); ASSERT_EQ(col->At(2), "11"); } From e56d9bfa3d4b2e72d3a87ec6af6fd37fac1a3624 Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Fri, 29 Jul 2022 20:31:42 +0800 Subject: [PATCH 3/9] remove extra headers and delegate constructor --- clickhouse/columns/string.cpp | 8 ++++---- clickhouse/columns/string.h | 2 -- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index 9607f16c..a6faec9c 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -166,8 +166,8 @@ ColumnString::ColumnString() { } -ColumnString::ColumnString(const std::vector & data) - : Column(Type::CreateString()) +ColumnString::ColumnString(const std::vector& data) + : ColumnString() { items_.reserve(data.size()); blocks_.emplace_back(ComputeTotalSize(data)); @@ -176,10 +176,10 @@ ColumnString::ColumnString(const std::vector & data) { AppendUnsafe(s); } -} +}; ColumnString::ColumnString(std::vector&& data) - : Column(Type::CreateString()) + : ColumnString() { items_.reserve(data.size()); diff --git a/clickhouse/columns/string.h b/clickhouse/columns/string.h index 36cb9caf..8b165a2b 100644 --- a/clickhouse/columns/string.h +++ b/clickhouse/columns/string.h @@ -6,8 +6,6 @@ #include #include #include -#include -#include #include namespace clickhouse { From 87bcac9751bc85ba318529b31b7ac056a2316dba Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Sat, 30 Jul 2022 13:39:09 +0800 Subject: [PATCH 4/9] fix gcc clang compile error --- clickhouse/columns/string.cpp | 12 +++++++++++- clickhouse/columns/string.h | 5 ++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index a6faec9c..2694d26a 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -193,7 +193,7 @@ ColumnString::ColumnString(std::vector&& data) ColumnString::~ColumnString() {} -void ColumnString::Append(const std::string_view& str) { +void ColumnString::Append(std::string_view str) { if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length()) { blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, str.size())); @@ -209,6 +209,16 @@ void ColumnString::Append(std::string&& steal_value) items_.emplace_back(std::string_view{ last_data.data(),last_data.length() }); } +void ColumnString::Append(const char* str) { + auto len = strlen(str); + if (blocks_.size() == 0 || blocks_.back().GetAvailable() < len) + { + blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, len)); + } + + items_.emplace_back(blocks_.back().AppendUnsafe(str)); +} + void ColumnString::AppendNoManagedLifetime(std::string_view str) { items_.emplace_back(str); diff --git a/clickhouse/columns/string.h b/clickhouse/columns/string.h index 8b165a2b..3e726e86 100644 --- a/clickhouse/columns/string.h +++ b/clickhouse/columns/string.h @@ -89,11 +89,14 @@ class ColumnString : public Column { ColumnString(const ColumnString&) = delete; /// Appends one element to the column. - void Append(const std::string_view& str); + void Append(std::string_view str); /// Appends one element to the column. void Append(std::string&& steal_value); + /// Appends one element to the column. + void Append(const char* str); + /// Appends one element to the column. /// If str lifetime is managed elsewhere and guaranteed to outlive the Block sent to the server void AppendNoManagedLifetime(std::string_view str); From 863728525c1b29e10182cea06b7931455dfbd974 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 1 Aug 2022 18:06:29 +0300 Subject: [PATCH 5/9] Minor; removed unnecessary code --- clickhouse/columns/string.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/clickhouse/columns/string.h b/clickhouse/columns/string.h index 3e726e86..f7bff737 100644 --- a/clickhouse/columns/string.h +++ b/clickhouse/columns/string.h @@ -10,11 +10,6 @@ namespace clickhouse { -constexpr size_t DEFAULT_BLOCK_SIZE = 4096; - -template -inline constexpr bool always_false_v = false; - /** * Represents column of fixed-length strings. */ From 70d3329b1904c983753e7a250cb12b2a4a29cbae Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 1 Aug 2022 18:07:50 +0300 Subject: [PATCH 6/9] Minor: stype + DEFAULT_BLOCK_SIZE --- clickhouse/columns/string.cpp | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index 2694d26a..1227c113 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -5,9 +5,10 @@ namespace { +constexpr size_t DEFAULT_BLOCK_SIZE = 4096; + template -size_t ComputeTotalSize(const Container & strings, size_t begin = 0, size_t len = -1) -{ +size_t ComputeTotalSize(const Container & strings, size_t begin = 0, size_t len = -1) { size_t result = 0; if (begin < strings.size()) { len = std::min(len, strings.size() - begin); @@ -63,8 +64,7 @@ std::string_view ColumnFixedString::operator [](size_t n) const { return std::string_view(&data_[pos], string_size_); } -size_t ColumnFixedString::FixedSize() const -{ +size_t ColumnFixedString::FixedSize() const { return string_size_; } @@ -202,8 +202,7 @@ void ColumnString::Append(std::string_view str) { items_.emplace_back(blocks_.back().AppendUnsafe(str)); } -void ColumnString::Append(std::string&& steal_value) -{ +void ColumnString::Append(std::string&& steal_value) { append_data_.emplace_back(std::move(steal_value)); auto& last_data = append_data_.back(); items_.emplace_back(std::string_view{ last_data.data(),last_data.length() }); @@ -211,21 +210,18 @@ void ColumnString::Append(std::string&& steal_value) void ColumnString::Append(const char* str) { auto len = strlen(str); - if (blocks_.size() == 0 || blocks_.back().GetAvailable() < len) - { + if (blocks_.size() == 0 || blocks_.back().GetAvailable() < len) { blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, len)); } items_.emplace_back(blocks_.back().AppendUnsafe(str)); } -void ColumnString::AppendNoManagedLifetime(std::string_view str) -{ +void ColumnString::AppendNoManagedLifetime(std::string_view str) { items_.emplace_back(str); } -void ColumnString::AppendUnsafe(std::string_view str) -{ +void ColumnString::AppendUnsafe(std::string_view str) { items_.emplace_back(blocks_.back().AppendUnsafe(str)); } From 7bdc11fff7d7ee2a4b3b4d93c22c67ed5fc13fa3 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Thu, 4 Aug 2022 16:44:52 +0300 Subject: [PATCH 7/9] Removed Append(const char*) overload Users should call string_view overload instead. --- clickhouse/columns/string.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index 1227c113..b1e53244 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -208,15 +208,6 @@ void ColumnString::Append(std::string&& steal_value) { items_.emplace_back(std::string_view{ last_data.data(),last_data.length() }); } -void ColumnString::Append(const char* str) { - auto len = strlen(str); - if (blocks_.size() == 0 || blocks_.back().GetAvailable() < len) { - blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, len)); - } - - items_.emplace_back(blocks_.back().AppendUnsafe(str)); -} - void ColumnString::AppendNoManagedLifetime(std::string_view str) { items_.emplace_back(str); } From f6d3d8c6a17c556d5e8f77f8edfe44de95771fb5 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Thu, 4 Aug 2022 16:45:35 +0300 Subject: [PATCH 8/9] Update string.h --- clickhouse/columns/string.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/clickhouse/columns/string.h b/clickhouse/columns/string.h index f7bff737..30066148 100644 --- a/clickhouse/columns/string.h +++ b/clickhouse/columns/string.h @@ -89,9 +89,6 @@ class ColumnString : public Column { /// Appends one element to the column. void Append(std::string&& steal_value); - /// Appends one element to the column. - void Append(const char* str); - /// Appends one element to the column. /// If str lifetime is managed elsewhere and guaranteed to outlive the Block sent to the server void AppendNoManagedLifetime(std::string_view str); From 2bbc74afd19f3b6e3520d1ef1a220ad7c8b2eb15 Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Sun, 7 Aug 2022 12:21:47 +0800 Subject: [PATCH 9/9] Append(const char*) overload is necessary for gcc and clang --- clickhouse/columns/string.cpp | 9 +++++++++ clickhouse/columns/string.h | 3 +++ 2 files changed, 12 insertions(+) diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index b1e53244..8ea362c4 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -202,6 +202,15 @@ void ColumnString::Append(std::string_view str) { items_.emplace_back(blocks_.back().AppendUnsafe(str)); } +void ColumnString::Append(const char* str) { + auto len = strlen(str); + if (blocks_.size() == 0 || blocks_.back().GetAvailable() < len) { + blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, len)); + } + + items_.emplace_back(blocks_.back().AppendUnsafe(str)); +} + void ColumnString::Append(std::string&& steal_value) { append_data_.emplace_back(std::move(steal_value)); auto& last_data = append_data_.back(); diff --git a/clickhouse/columns/string.h b/clickhouse/columns/string.h index 30066148..f2216f40 100644 --- a/clickhouse/columns/string.h +++ b/clickhouse/columns/string.h @@ -86,6 +86,9 @@ class ColumnString : public Column { /// Appends one element to the column. void Append(std::string_view str); + /// Appends one element to the column. + void Append(const char* str); + /// Appends one element to the column. void Append(std::string&& steal_value);