Skip to content

Commit 037555c

Browse files
authored
Allow full range for var-int encoded integers (#471)
porting https://github.com/ClickHouse/ClickHouse/pull/51905/files
1 parent ea30ae1 commit 037555c

File tree

6 files changed

+201
-146
lines changed

6 files changed

+201
-146
lines changed

src/IO/VarInt.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#include <IO/VarInt.h>
2+
#include <Common/Exception.h>
3+
4+
namespace DB
5+
{
6+
namespace ErrorCodes
7+
{
8+
extern const int ATTEMPT_TO_READ_AFTER_EOF;
9+
}
10+
11+
void throwReadAfterEOF()
12+
{
13+
throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Attempt to read after eof");
14+
}
15+
16+
}

src/IO/VarInt.h

Lines changed: 98 additions & 143 deletions
Original file line numberDiff line numberDiff line change
@@ -1,138 +1,97 @@
11
#pragma once
22

3-
#include <iostream>
43
#include <base/types.h>
54
#include <IO/ReadBuffer.h>
65
#include <IO/WriteBuffer.h>
76

87

98
namespace DB
109
{
11-
namespace ErrorCodes
12-
{
13-
extern const int ATTEMPT_TO_READ_AFTER_EOF;
14-
}
15-
1610

17-
/** Write UInt64 in variable length format (base128) NOTE Only up to 2^63 - 1 are supported. */
18-
void writeVarUInt(UInt64 x, std::ostream & ostr);
19-
void writeVarUInt(UInt64 x, WriteBuffer & ostr);
20-
char * writeVarUInt(UInt64 x, char * ostr);
11+
/// Variable-Length Quantity (VLQ) Base-128 compression, also known as Variable Byte (VB) or Varint encoding.
2112

13+
[[noreturn]] void throwReadAfterEOF();
2214

23-
/** Read UInt64, written in variable length format (base128) */
24-
void readVarUInt(UInt64 & x, std::istream & istr);
25-
void readVarUInt(UInt64 & x, ReadBuffer & istr);
26-
const char * readVarUInt(UInt64 & x, const char * istr, size_t size);
2715

16+
inline void writeVarUInt(UInt64 x, WriteBuffer & ostr)
17+
{
18+
while (x > 0x7F)
19+
{
20+
uint8_t byte = 0x80 | (x & 0x7F);
2821

29-
/** Get the length of UInt64 in VarUInt format */
30-
size_t getLengthOfVarUInt(UInt64 x);
22+
ostr.nextIfAtEnd();
23+
*ostr.position() = byte;
24+
++ostr.position();
3125

32-
/** Get the Int64 length in VarInt format */
33-
size_t getLengthOfVarInt(Int64 x);
26+
x >>= 7;
27+
}
3428

29+
uint8_t final_byte = static_cast<uint8_t>(x);
3530

36-
/** Write Int64 in variable length format (base128) */
37-
template <typename OUT>
38-
inline void writeVarInt(Int64 x, OUT & ostr)
39-
{
40-
writeVarUInt(static_cast<UInt64>((x << 1) ^ (x >> 63)), ostr);
31+
ostr.nextIfAtEnd();
32+
*ostr.position() = final_byte;
33+
++ostr.position();
4134
}
4235

43-
inline char * writeVarInt(Int64 x, char * ostr)
36+
inline void writeVarUInt(UInt64 x, std::ostream & ostr)
4437
{
45-
return writeVarUInt(static_cast<UInt64>((x << 1) ^ (x >> 63)), ostr);
46-
}
38+
while (x > 0x7F)
39+
{
40+
uint8_t byte = 0x80 | (x & 0x7F);
41+
ostr.put(byte);
4742

43+
x >>= 7;
44+
}
4845

49-
/** Read Int64, written in variable length format (base128) */
50-
template <typename IN>
51-
inline void readVarInt(Int64 & x, IN & istr)
52-
{
53-
readVarUInt(*reinterpret_cast<UInt64*>(&x), istr);
54-
x = (static_cast<UInt64>(x) >> 1) ^ -(x & 1);
46+
uint8_t final_byte = static_cast<uint8_t>(x);
47+
ostr.put(final_byte);
5548
}
5649

57-
inline const char * readVarInt(Int64 & x, const char * istr, size_t size)
50+
inline char * writeVarUInt(UInt64 x, char * ostr)
5851
{
59-
const char * res = readVarUInt(*reinterpret_cast<UInt64*>(&x), istr, size);
60-
x = (static_cast<UInt64>(x) >> 1) ^ -(x & 1);
61-
return res;
62-
}
63-
64-
65-
inline void writeVarT(UInt64 x, std::ostream & ostr) { writeVarUInt(x, ostr); }
66-
inline void writeVarT(Int64 x, std::ostream & ostr) { writeVarInt(x, ostr); }
67-
inline void writeVarT(UInt64 x, WriteBuffer & ostr) { writeVarUInt(x, ostr); }
68-
inline void writeVarT(Int64 x, WriteBuffer & ostr) { writeVarInt(x, ostr); }
69-
inline char * writeVarT(UInt64 x, char * & ostr) { return writeVarUInt(x, ostr); }
70-
inline char * writeVarT(Int64 x, char * & ostr) { return writeVarInt(x, ostr); }
52+
while (x > 0x7F)
53+
{
54+
uint8_t byte = 0x80 | (x & 0x7F);
7155

72-
inline void readVarT(UInt64 & x, std::istream & istr) { readVarUInt(x, istr); }
73-
inline void readVarT(Int64 & x, std::istream & istr) { readVarInt(x, istr); }
74-
inline void readVarT(UInt64 & x, ReadBuffer & istr) { readVarUInt(x, istr); }
75-
inline void readVarT(Int64 & x, ReadBuffer & istr) { readVarInt(x, istr); }
76-
inline const char * readVarT(UInt64 & x, const char * istr, size_t size) { return readVarUInt(x, istr, size); }
77-
inline const char * readVarT(Int64 & x, const char * istr, size_t size) { return readVarInt(x, istr, size); }
56+
*ostr = byte;
57+
++ostr;
7858

59+
x >>= 7;
60+
}
7961

80-
/// For [U]Int32, [U]Int16, size_t.
62+
uint8_t final_byte = static_cast<uint8_t>(x);
8163

82-
inline void readVarUInt(UInt32 & x, ReadBuffer & istr)
83-
{
84-
UInt64 tmp;
85-
readVarUInt(tmp, istr);
86-
x = static_cast<UInt32>(tmp);
87-
}
64+
*ostr = final_byte;
65+
++ostr;
8866

89-
inline void readVarInt(Int32 & x, ReadBuffer & istr)
90-
{
91-
Int64 tmp;
92-
readVarInt(tmp, istr);
93-
x = static_cast<Int32>(tmp);
67+
return ostr;
9468
}
9569

96-
inline void readVarUInt(UInt16 & x, ReadBuffer & istr)
70+
template <typename Out>
71+
inline void writeVarInt(Int64 x, Out & ostr)
9772
{
98-
UInt64 tmp;
99-
readVarUInt(tmp, istr);
100-
x = tmp;
73+
writeVarUInt(static_cast<UInt64>((x << 1) ^ (x >> 63)), ostr);
10174
}
10275

103-
inline void readVarInt(Int16 & x, ReadBuffer & istr)
76+
inline char * writeVarInt(Int64 x, char * ostr)
10477
{
105-
Int64 tmp;
106-
readVarInt(tmp, istr);
107-
x = tmp;
78+
return writeVarUInt(static_cast<UInt64>((x << 1) ^ (x >> 63)), ostr);
10879
}
10980

110-
template <typename T>
111-
requires (!std::is_same_v<T, UInt64>)
112-
inline void readVarUInt(T & x, ReadBuffer & istr)
81+
namespace impl
11382
{
114-
UInt64 tmp;
115-
readVarUInt(tmp, istr);
116-
x = tmp;
117-
}
118-
11983

120-
[[noreturn]] inline void throwReadAfterEOF()
121-
{
122-
throw Exception("Attempt to read after eof", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF);
123-
}
124-
125-
template <bool fast>
126-
inline void readVarUIntImpl(UInt64 & x, ReadBuffer & istr)
84+
template <bool check_eof>
85+
inline void readVarUInt(UInt64 & x, ReadBuffer & istr)
12786
{
12887
x = 0;
129-
for (size_t i = 0; i < 9; ++i)
88+
for (size_t i = 0; i < 10; ++i)
13089
{
131-
if constexpr (!fast)
132-
if (istr.eof())
90+
if constexpr (check_eof)
91+
if (istr.eof()) [[unlikely]]
13392
throwReadAfterEOF();
13493

135-
UInt64 byte = *istr.position(); /// NOLINT
94+
UInt64 byte = *istr.position();
13695
++istr.position();
13796
x |= (byte & 0x7F) << (7 * i);
13897

@@ -141,18 +100,19 @@ inline void readVarUIntImpl(UInt64 & x, ReadBuffer & istr)
141100
}
142101
}
143102

103+
}
104+
144105
inline void readVarUInt(UInt64 & x, ReadBuffer & istr)
145106
{
146-
if (istr.buffer().end() - istr.position() >= 9)
147-
return readVarUIntImpl<true>(x, istr);
148-
return readVarUIntImpl<false>(x, istr);
107+
if (istr.buffer().end() - istr.position() >= 10)
108+
return impl::readVarUInt<false>(x, istr);
109+
return impl::readVarUInt<true>(x, istr);
149110
}
150111

151-
152112
inline void readVarUInt(UInt64 & x, std::istream & istr)
153113
{
154114
x = 0;
155-
for (size_t i = 0; i < 9; ++i)
115+
for (size_t i = 0; i < 10; ++i)
156116
{
157117
UInt64 byte = istr.get();
158118
x |= (byte & 0x7F) << (7 * i);
@@ -167,12 +127,12 @@ inline const char * readVarUInt(UInt64 & x, const char * istr, size_t size)
167127
const char * end = istr + size;
168128

169129
x = 0;
170-
for (size_t i = 0; i < 9; ++i)
130+
for (size_t i = 0; i < 10; ++i)
171131
{
172-
if (istr == end)
132+
if (istr == end) [[unlikely]]
173133
throwReadAfterEOF();
174134

175-
UInt64 byte = *istr; /// NOLINT
135+
UInt64 byte = *istr;
176136
++istr;
177137
x |= (byte & 0x7F) << (7 * i);
178138

@@ -183,62 +143,56 @@ inline const char * readVarUInt(UInt64 & x, const char * istr, size_t size)
183143
return istr;
184144
}
185145

186-
187-
inline void writeVarUInt(UInt64 x, WriteBuffer & ostr)
146+
template <typename In>
147+
inline void readVarInt(Int64 & x, In & istr)
188148
{
189-
for (size_t i = 0; i < 9; ++i)
190-
{
191-
uint8_t byte = x & 0x7F;
192-
if (x > 0x7F)
193-
byte |= 0x80;
194-
195-
ostr.nextIfAtEnd();
196-
*ostr.position() = byte;
197-
++ostr.position();
198-
199-
x >>= 7;
200-
if (!x)
201-
return;
202-
}
149+
readVarUInt(*reinterpret_cast<UInt64*>(&x), istr);
150+
x = (static_cast<UInt64>(x) >> 1) ^ -(x & 1);
203151
}
204152

205-
206-
inline void writeVarUInt(UInt64 x, std::ostream & ostr)
153+
inline const char * readVarInt(Int64 & x, const char * istr, size_t size)
207154
{
208-
for (size_t i = 0; i < 9; ++i)
209-
{
210-
uint8_t byte = x & 0x7F;
211-
if (x > 0x7F)
212-
byte |= 0x80;
213-
214-
ostr.put(byte);
215-
216-
x >>= 7;
217-
if (!x)
218-
return;
219-
}
155+
const char * res = readVarUInt(*reinterpret_cast<UInt64*>(&x), istr, size);
156+
x = (static_cast<UInt64>(x) >> 1) ^ -(x & 1);
157+
return res;
220158
}
221159

222-
223-
inline char * writeVarUInt(UInt64 x, char * ostr)
160+
inline void readVarUInt(UInt32 & x, ReadBuffer & istr)
224161
{
225-
for (size_t i = 0; i < 9; ++i)
226-
{
227-
uint8_t byte = x & 0x7F;
228-
if (x > 0x7F)
229-
byte |= 0x80;
162+
UInt64 tmp;
163+
readVarUInt(tmp, istr);
164+
x = static_cast<UInt32>(tmp);
165+
}
230166

231-
*ostr = byte;
232-
++ostr;
167+
inline void readVarInt(Int32 & x, ReadBuffer & istr)
168+
{
169+
Int64 tmp;
170+
readVarInt(tmp, istr);
171+
x = static_cast<Int32>(tmp);
172+
}
233173

234-
x >>= 7;
235-
if (!x)
236-
return ostr;
237-
}
174+
inline void readVarUInt(UInt16 & x, ReadBuffer & istr)
175+
{
176+
UInt64 tmp;
177+
readVarUInt(tmp, istr);
178+
x = tmp;
179+
}
238180

239-
return ostr;
181+
inline void readVarInt(Int16 & x, ReadBuffer & istr)
182+
{
183+
Int64 tmp;
184+
readVarInt(tmp, istr);
185+
x = tmp;
240186
}
241187

188+
template <typename T>
189+
requires (!std::is_same_v<T, UInt64>)
190+
inline void readVarUInt(T & x, ReadBuffer & istr)
191+
{
192+
UInt64 tmp;
193+
readVarUInt(tmp, istr);
194+
x = tmp;
195+
}
242196

243197
inline size_t getLengthOfVarUInt(UInt64 x)
244198
{
@@ -250,7 +204,8 @@ inline size_t getLengthOfVarUInt(UInt64 x)
250204
: (x < (1ULL << 42) ? 6
251205
: (x < (1ULL << 49) ? 7
252206
: (x < (1ULL << 56) ? 8
253-
: 9)))))));
207+
: (x < (1ULL << 63) ? 9
208+
: 10))))))));
254209
}
255210

256211

0 commit comments

Comments
 (0)