1
1
#include " compressed.h"
2
2
#include " wire_format.h"
3
+ #include " output.h"
3
4
4
5
#include < cityhash/city.h>
5
6
#include < lz4/lz4.h>
6
7
#include < stdexcept>
7
8
#include < system_error>
8
9
10
+ #include < iostream>
11
+
12
+ namespace {
13
+ static const size_t HEADER_SIZE = 9 ;
14
+ static const size_t EXTRA_PREALLOCATE_COMPRESS_BUFFER = 15 ;
15
+ static const uint8_t COMPRESSION_METHOD = 0x82 ;
9
16
#define DBMS_MAX_COMPRESSED_SIZE 0x40000000ULL // 1GB
17
+ }
10
18
11
19
namespace clickhouse {
12
20
13
- CompressedInput::CompressedInput (CodedInputStream * input)
21
+ CompressedInput::CompressedInput (InputStream * input)
14
22
: input_(input)
15
23
{
16
24
}
@@ -50,7 +58,7 @@ bool CompressedInput::Decompress() {
50
58
return false ;
51
59
}
52
60
53
- if (method != 0x82 ) {
61
+ if (method != COMPRESSION_METHOD ) {
54
62
throw std::runtime_error (" unsupported compression method " +
55
63
std::to_string (int (method)));
56
64
} else {
@@ -75,7 +83,7 @@ bool CompressedInput::Decompress() {
75
83
out.Write (&original, sizeof (original));
76
84
}
77
85
78
- if (!WireFormat::ReadBytes (input_, tmp.data () + 9 , compressed - 9 )) {
86
+ if (!WireFormat::ReadBytes (input_, tmp.data () + HEADER_SIZE , compressed - HEADER_SIZE )) {
79
87
return false ;
80
88
} else {
81
89
if (hash != CityHash128 ((const char *)tmp.data (), compressed)) {
@@ -85,7 +93,7 @@ bool CompressedInput::Decompress() {
85
93
86
94
data_ = Buffer (original);
87
95
88
- if (LZ4_decompress_safe ((const char *)tmp.data () + 9 , (char *)data_.data (), compressed - 9 , original) < 0 ) {
96
+ if (LZ4_decompress_safe ((const char *)tmp.data () + HEADER_SIZE , (char *)data_.data (), compressed - HEADER_SIZE , original) < 0 ) {
89
97
throw std::runtime_error (" can't decompress data" );
90
98
} else {
91
99
mem_.Reset (data_.data (), original);
@@ -95,4 +103,64 @@ bool CompressedInput::Decompress() {
95
103
return true ;
96
104
}
97
105
106
+
107
+ CompressedOutput::CompressedOutput (OutputStream * destination, size_t max_compressed_chunk_size)
108
+ : destination_ (destination),
109
+ max_compressed_chunk_size_ (max_compressed_chunk_size)
110
+ {
111
+ }
112
+
113
+ CompressedOutput::~CompressedOutput () {
114
+ Flush ();
115
+ }
116
+
117
+ size_t CompressedOutput::DoWrite (const void * data, size_t len) {
118
+ const size_t original_len = len;
119
+ const size_t max_chunk_size = max_compressed_chunk_size_ ? max_compressed_chunk_size_ : len;
120
+
121
+ while (len > 0 )
122
+ {
123
+ auto to_compress = std::min (len, max_chunk_size);
124
+ if (!Compress (data, to_compress))
125
+ break ;
126
+
127
+ len -= to_compress;
128
+ data = reinterpret_cast <const char *>(data) + to_compress;
129
+ }
130
+
131
+ return original_len - len;
132
+ }
133
+
134
+ void CompressedOutput::DoFlush () {
135
+ destination_->Flush ();
136
+ }
137
+
138
+ bool CompressedOutput::Compress (const void * data, size_t len) {
139
+
140
+ const size_t expected_out_size = LZ4_compressBound (len);
141
+ compressed_buffer_.resize (std::max (compressed_buffer_.size (), expected_out_size + HEADER_SIZE + EXTRA_PREALLOCATE_COMPRESS_BUFFER));
142
+
143
+ const int compressed_size = LZ4_compress_default (
144
+ (const char *)data,
145
+ (char *)compressed_buffer_.data () + HEADER_SIZE,
146
+ len,
147
+ compressed_buffer_.size () - HEADER_SIZE);
148
+
149
+ {
150
+ auto header = compressed_buffer_.data ();
151
+ WriteUnaligned (header, COMPRESSION_METHOD);
152
+ // Compressed data size with header
153
+ WriteUnaligned (header + 1 , static_cast <uint32_t >(compressed_size + HEADER_SIZE));
154
+ // Original data size
155
+ WriteUnaligned (header + 5 , static_cast <uint32_t >(len));
156
+ }
157
+
158
+ WireFormat::WriteFixed (destination_, CityHash128 (
159
+ (const char *)compressed_buffer_.data (), compressed_size + HEADER_SIZE));
160
+ WireFormat::WriteBytes (destination_, compressed_buffer_.data (), compressed_size + HEADER_SIZE);
161
+
162
+ destination_->Flush ();
163
+ return true ;
164
+ }
165
+
98
166
}
0 commit comments