Skip to content

Commit caf45ab

Browse files
committed
Correctly encode unicode strings
Signed-off-by: Juan Cruz Viotti <jv@jviotti.com>
1 parent 5b0b335 commit caf45ab

File tree

3 files changed

+68
-4
lines changed

3 files changed

+68
-4
lines changed

src/runtime/encoder_string.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ auto Encoder::FLOOR_VARINT_PREFIX_UTF8_STRING_SHARED(
2020
assert(document.is_string());
2121
const sourcemeta::jsontoolkit::JSON::String value{document.to_string()};
2222
const auto size{value.size()};
23-
assert(document.size() == size);
23+
assert(document.byte_size() == size);
2424
const auto shared{this->cache_.find(value, Cache::Type::Standalone)};
2525

2626
// (1) Write 0x00 if shared, else do nothing
@@ -46,7 +46,7 @@ auto Encoder::ROOF_VARINT_PREFIX_UTF8_STRING_SHARED(
4646
assert(document.is_string());
4747
const sourcemeta::jsontoolkit::JSON::String value{document.to_string()};
4848
const auto size{value.size()};
49-
assert(document.size() == size);
49+
assert(document.byte_size() == size);
5050
assert(size <= options.maximum);
5151
const auto shared{this->cache_.find(value, Cache::Type::Standalone)};
5252

@@ -73,7 +73,7 @@ auto Encoder::BOUNDED_8BIT_PREFIX_UTF8_STRING_SHARED(
7373
assert(document.is_string());
7474
const sourcemeta::jsontoolkit::JSON::String value{document.to_string()};
7575
const auto size{value.size()};
76-
assert(document.size() == size);
76+
assert(document.byte_size() == size);
7777
assert(options.minimum <= options.maximum);
7878
assert(is_byte(options.maximum - options.minimum + 1));
7979
assert(is_within(size, options.minimum, options.maximum));
@@ -137,7 +137,7 @@ auto Encoder::PREFIX_VARINT_LENGTH_STRING_SHARED(
137137
Cache::Type::PrefixLengthVarintPlusOne);
138138
} else {
139139
const auto size{value.size()};
140-
assert(document.size() == size);
140+
assert(document.byte_size() == size);
141141
this->cache_.record(value, this->position(),
142142
Cache::Type::PrefixLengthVarintPlusOne);
143143
this->put_varint(size + 1);

test/runtime/decode_string_test.cc

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,14 @@ TEST(JSONBinPack_Decoder, FLOOR_VARINT_PREFIX_UTF8_STRING_SHARED_foo_0_foo_3) {
3737
EXPECT_EQ(result2, expected);
3838
}
3939

40+
TEST(JSONBinPack_Decoder, FLOOR_VARINT_PREFIX_UTF8_STRING_SHARED_unicode_1) {
41+
InputByteStream stream{0x04, 0x66, 0x6f, 0xc3, 0xb8};
42+
sourcemeta::jsonbinpack::Decoder decoder{stream};
43+
const auto result = decoder.FLOOR_VARINT_PREFIX_UTF8_STRING_SHARED({1});
44+
const sourcemeta::jsontoolkit::JSON expected{"foø"};
45+
EXPECT_EQ(result, expected);
46+
}
47+
4048
TEST(JSONBinPack_Decoder, ROOF_VARINT_PREFIX_UTF8_STRING_SHARED_foo_4) {
4149
InputByteStream stream{0x02, 0x66, 0x6f, 0x6f};
4250
sourcemeta::jsonbinpack::Decoder decoder{stream};
@@ -61,6 +69,14 @@ TEST(JSONBinPack_Decoder, ROOF_VARINT_PREFIX_UTF8_STRING_SHARED_foo_3_foo_5) {
6169
EXPECT_EQ(result2, expected);
6270
}
6371

72+
TEST(JSONBinPack_Decoder, ROOF_VARINT_PREFIX_UTF8_STRING_SHARED_unicode_4) {
73+
InputByteStream stream{0x01, 0x66, 0x6f, 0xc3, 0xb8};
74+
sourcemeta::jsonbinpack::Decoder decoder{stream};
75+
const auto result = decoder.FLOOR_VARINT_PREFIX_UTF8_STRING_SHARED({4});
76+
const sourcemeta::jsontoolkit::JSON expected{"foø"};
77+
EXPECT_EQ(result, expected);
78+
}
79+
6480
TEST(JSONBinPack_Decoder, BOUNDED_8BIT_PREFIX_UTF8_STRING_SHARED_foo_3_5) {
6581
InputByteStream stream{0x01, 0x66, 0x6f, 0x6f};
6682
sourcemeta::jsonbinpack::Decoder decoder{stream};
@@ -94,6 +110,14 @@ TEST(JSONBinPack_Decoder,
94110
EXPECT_EQ(result2, expected);
95111
}
96112

113+
TEST(JSONBinPack_Decoder, BOUNDED_8BIT_PREFIX_UTF8_STRING_SHARED_unicode_0_6) {
114+
InputByteStream stream{0x05, 0x66, 0x6f, 0xc3, 0xb8};
115+
sourcemeta::jsonbinpack::Decoder decoder{stream};
116+
const auto result = decoder.BOUNDED_8BIT_PREFIX_UTF8_STRING_SHARED({0, 6});
117+
const sourcemeta::jsontoolkit::JSON expected{"foø"};
118+
EXPECT_EQ(result, expected);
119+
}
120+
97121
TEST(JSONBinPack_Decoder, RFC3339_DATE_INTEGER_TRIPLET_2014_10_01) {
98122
InputByteStream stream{0xde, 0x07, 0x0a, 0x01};
99123
sourcemeta::jsonbinpack::Decoder decoder{stream};
@@ -155,3 +179,11 @@ TEST(JSONBinPack_Decoder,
155179
EXPECT_EQ(result1, expected);
156180
EXPECT_EQ(result2, expected);
157181
}
182+
183+
TEST(JSONBinPack_Decoder, PREFIX_VARINT_LENGTH_STRING_SHARED_unicode) {
184+
InputByteStream stream{0x05, 0x66, 0x6f, 0xc3, 0xb8};
185+
sourcemeta::jsonbinpack::Decoder decoder{stream};
186+
const auto result = decoder.PREFIX_VARINT_LENGTH_STRING_SHARED({});
187+
const sourcemeta::jsontoolkit::JSON expected{"foø"};
188+
EXPECT_EQ(result, expected);
189+
}

test/runtime/encode_string_test.cc

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,14 @@ TEST(JSONBinPack_Encoder, FLOOR_VARINT_PREFIX_UTF8_STRING_SHARED_foo_0_foo_3) {
2929
EXPECT_BYTES(stream, {0x04, 0x66, 0x6f, 0x6f, 0x00, 0x01, 0x05});
3030
}
3131

32+
TEST(JSONBinPack_Encoder, FLOOR_VARINT_PREFIX_UTF8_STRING_SHARED_unicode_1) {
33+
const sourcemeta::jsontoolkit::JSON document{"foø"};
34+
OutputByteStream stream{};
35+
sourcemeta::jsonbinpack::Encoder encoder{stream};
36+
encoder.FLOOR_VARINT_PREFIX_UTF8_STRING_SHARED(document, {1});
37+
EXPECT_BYTES(stream, {0x04, 0x66, 0x6f, 0xc3, 0xb8});
38+
}
39+
3240
TEST(JSONBinPack_Encoder, ROOF_VARINT_PREFIX_UTF8_STRING_SHARED_foo_4) {
3341
const sourcemeta::jsontoolkit::JSON document{"foo"};
3442
OutputByteStream stream{};
@@ -46,6 +54,14 @@ TEST(JSONBinPack_Encoder, ROOF_VARINT_PREFIX_UTF8_STRING_SHARED_foo_3_foo_5) {
4654
EXPECT_BYTES(stream, {0x01, 0x66, 0x6f, 0x6f, 0x00, 0x03, 0x05});
4755
}
4856

57+
TEST(JSONBinPack_Encoder, ROOF_VARINT_PREFIX_UTF8_STRING_SHARED_unicode_4) {
58+
const sourcemeta::jsontoolkit::JSON document{"foø"};
59+
OutputByteStream stream{};
60+
sourcemeta::jsonbinpack::Encoder encoder{stream};
61+
encoder.ROOF_VARINT_PREFIX_UTF8_STRING_SHARED(document, {4});
62+
EXPECT_BYTES(stream, {0x01, 0x66, 0x6f, 0xc3, 0xb8});
63+
}
64+
4965
TEST(JSONBinPack_Encoder, BOUNDED_8BIT_PREFIX_UTF8_STRING_SHARED_foo_3_5) {
5066
const sourcemeta::jsontoolkit::JSON document{"foo"};
5167
OutputByteStream stream{};
@@ -72,6 +88,14 @@ TEST(JSONBinPack_Encoder,
7288
EXPECT_BYTES(stream, {0x04, 0x66, 0x6f, 0x6f, 0x00, 0x01, 0x05});
7389
}
7490

91+
TEST(JSONBinPack_Encoder, BOUNDED_8BIT_PREFIX_UTF8_STRING_SHARED_unicode_0_6) {
92+
const sourcemeta::jsontoolkit::JSON document{"foø"};
93+
OutputByteStream stream{};
94+
sourcemeta::jsonbinpack::Encoder encoder{stream};
95+
encoder.BOUNDED_8BIT_PREFIX_UTF8_STRING_SHARED(document, {0, 6});
96+
EXPECT_BYTES(stream, {0x05, 0x66, 0x6f, 0xc3, 0xb8});
97+
}
98+
7599
TEST(JSONBinPack_Encoder, RFC3339_DATE_INTEGER_TRIPLET_2014_10_01) {
76100
const sourcemeta::jsontoolkit::JSON document{"2014-10-01"};
77101
OutputByteStream stream{};
@@ -144,3 +168,11 @@ TEST(JSONBinPack_Encoder,
144168
0x05 // Pointer (6 - 1 = 5)
145169
});
146170
}
171+
172+
TEST(JSONBinPack_Encoder, PREFIX_VARINT_LENGTH_STRING_SHARED_unicode) {
173+
const sourcemeta::jsontoolkit::JSON document{"foø"};
174+
OutputByteStream stream{};
175+
sourcemeta::jsonbinpack::Encoder encoder{stream};
176+
encoder.PREFIX_VARINT_LENGTH_STRING_SHARED(document, {});
177+
EXPECT_BYTES(stream, {0x05, 0x66, 0x6f, 0xc3, 0xb8});
178+
}

0 commit comments

Comments
 (0)