Skip to content

Raise the correct exception in fast_serialize_string #633

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 33 additions & 7 deletions ext/json/ext/generator/generator.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
#define RB_UNLIKELY(cond) (cond)
#endif

static VALUE mJSON, cState, mString_Extend, eGeneratorError, eNestingError;
static VALUE mJSON, cState, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;

static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend;
static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;

/* Converts in_string to a JSON string (without the wrapping '"'
* characters) in FBuffer out_buffer.
Expand Down Expand Up @@ -735,20 +735,41 @@ static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_St
fbuffer_append_char(buffer, ']');
}

static int usascii_encindex, utf8_encindex;
static int usascii_encindex, utf8_encindex, binary_encindex;

static int enc_utf8_compatible_p(int enc_idx)
static inline int enc_utf8_compatible_p(int enc_idx)
{
if (enc_idx == usascii_encindex) return 1;
if (enc_idx == utf8_encindex) return 1;
return 0;
}

static inline VALUE ensure_valid_encoding(VALUE str)
{
int encindex = RB_ENCODING_GET(str);
VALUE utf8_string;
if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) {
if (encindex == binary_encindex) {
// For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
// TODO: Deprecate in 2.8.0
// TODO: Remove in 3.0.0
utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
switch (rb_enc_str_coderange(utf8_string)) {
case ENC_CODERANGE_7BIT:
case ENC_CODERANGE_VALID:
return utf8_string;
break;
}
}

str = rb_funcall(str, i_encode, 1, Encoding_UTF_8);
}
return str;
}

static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
{
if (!enc_utf8_compatible_p(RB_ENCODING_GET(obj))) {
obj = rb_str_export_to_enc(obj, rb_utf8_encoding());
}
obj = ensure_valid_encoding(obj);

fbuffer_append_char(buffer, '"');

Expand Down Expand Up @@ -1462,14 +1483,19 @@ void Init_generator(void)
VALUE mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass");
rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1);

rb_global_variable(&Encoding_UTF_8);
Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));

i_to_s = rb_intern("to_s");
i_to_json = rb_intern("to_json");
i_new = rb_intern("new");
i_pack = rb_intern("pack");
i_unpack = rb_intern("unpack");
i_create_id = rb_intern("create_id");
i_extend = rb_intern("extend");
i_encode = rb_intern("encode");

usascii_encindex = rb_usascii_encindex();
utf8_encindex = rb_utf8_encindex();
binary_encindex = rb_ascii8bit_encindex();
}
19 changes: 11 additions & 8 deletions ext/json/ext/parser/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -1794,6 +1794,9 @@ static VALUE convert_encoding(VALUE source)
}

if (encindex == binary_encindex) {
// For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
// TODO: Deprecate in 2.8.0
// TODO: Remove in 3.0.0
return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
}

Expand Down Expand Up @@ -1943,15 +1946,15 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
}


#line 1947 "parser.c"
#line 1950 "parser.c"
enum {JSON_start = 1};
enum {JSON_first_final = 10};
enum {JSON_error = 0};

enum {JSON_en_main = 1};


#line 855 "parser.rl"
#line 858 "parser.rl"


/*
Expand All @@ -1969,16 +1972,16 @@ static VALUE cParser_parse(VALUE self)
GET_PARSER;


#line 1973 "parser.c"
#line 1976 "parser.c"
{
cs = JSON_start;
}

#line 872 "parser.rl"
#line 875 "parser.rl"
p = json->source;
pe = p + json->len;

#line 1982 "parser.c"
#line 1985 "parser.c"
{
if ( p == pe )
goto _test_eof;
Expand Down Expand Up @@ -2012,7 +2015,7 @@ case 1:
cs = 0;
goto _out;
tr2:
#line 847 "parser.rl"
#line 850 "parser.rl"
{
char *np = JSON_parse_value(json, p, pe, &result, 0);
if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
Expand All @@ -2022,7 +2025,7 @@ cs = 0;
if ( ++p == pe )
goto _test_eof10;
case 10:
#line 2026 "parser.c"
#line 2029 "parser.c"
switch( (*p) ) {
case 13: goto st10;
case 32: goto st10;
Expand Down Expand Up @@ -2111,7 +2114,7 @@ case 9:
_out: {}
}

#line 875 "parser.rl"
#line 878 "parser.rl"

if (cs >= JSON_first_final && p == pe) {
return result;
Expand Down
3 changes: 3 additions & 0 deletions ext/json/ext/parser/parser.rl
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,9 @@ static VALUE convert_encoding(VALUE source)
}

if (encindex == binary_encindex) {
// For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
// TODO: Deprecate in 2.8.0
// TODO: Remove in 3.0.0
return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
}

Expand Down
1 change: 1 addition & 0 deletions lib/json/pure/generator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@ def generate(obj)
private def fast_serialize_string(string, buf) # :nodoc:
buf << '"'
string = string.encode(::Encoding::UTF_8) unless string.encoding == ::Encoding::UTF_8
raise GeneratorError, "source sequence is illegal/malformed utf-8" unless string.valid_encoding?

if /["\\\x0-\x1f]/n.match?(string)
buf << string.gsub(/["\\\x0-\x1f]/n, MAP)
Expand Down
13 changes: 13 additions & 0 deletions test/json/json_generator_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,19 @@ def test_invalid_encoding_string
"\x82\xAC\xEF".to_json
end
assert_includes error.message, "source sequence is illegal/malformed utf-8"

error = assert_raise(JSON::GeneratorError) do
JSON.dump("\x82\xAC\xEF")
end
assert_includes error.message, "source sequence is illegal/malformed utf-8"

assert_raise(Encoding::UndefinedConversionError) do
"\x82\xAC\xEF".b.to_json
end

assert_raise(Encoding::UndefinedConversionError) do
JSON.dump("\x82\xAC\xEF".b)
end
end

if defined?(JSON::Ext::Generator) and RUBY_PLATFORM != "java"
Expand Down
Loading