Skip to content

Commit dd8b3db

Browse files
authored
Merge pull request #665 from eregon/parse_broken_string
Add test for parsing broken strings and use String#encode instead of rb_str_conv_enc() in parser
2 parents 96397cf + 0f0b16b commit dd8b3db

File tree

5 files changed

+46
-31
lines changed

5 files changed

+46
-31
lines changed

ext/json/ext/parser/parser.c

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,14 @@ static void raise_parse_error(const char *format, const char *start)
8989
rb_enc_raise(rb_utf8_encoding(), rb_path2class("JSON::ParserError"), format, ptr);
9090
}
9191

92-
static VALUE mJSON, mExt, cParser, eNestingError;
92+
static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8;
9393
static VALUE CNaN, CInfinity, CMinusInfinity;
9494

9595
static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
9696
i_chr, i_max_nesting, i_allow_nan, i_symbolize_names,
9797
i_object_class, i_array_class, i_decimal_class,
9898
i_deep_const_get, i_match, i_match_string, i_aset, i_aref,
99-
i_leftshift, i_new, i_try_convert, i_freeze, i_uminus;
99+
i_leftshift, i_new, i_try_convert, i_freeze, i_uminus, i_encode;
100100

101101
static int binary_encindex;
102102
static int utf8_encindex;
@@ -1797,16 +1797,11 @@ static VALUE convert_encoding(VALUE source)
17971797
}
17981798

17991799
if (encindex == binary_encindex) {
1800-
// For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
1801-
VALUE utf8_string = rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
1802-
switch (rb_enc_str_coderange(utf8_string)) {
1803-
case ENC_CODERANGE_7BIT:
1804-
case ENC_CODERANGE_VALID:
1805-
return utf8_string;
1806-
}
1800+
// For historical reason, we silently reinterpret binary strings as UTF-8
1801+
return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
18071802
}
18081803

1809-
return rb_str_conv_enc(source, rb_enc_from_index(encindex), rb_utf8_encoding());
1804+
return rb_funcall(source, i_encode, 1, Encoding_UTF_8);
18101805
}
18111806

18121807
/*
@@ -1958,15 +1953,15 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
19581953
}
19591954

19601955

1961-
#line 1962 "parser.c"
1956+
#line 1957 "parser.c"
19621957
enum {JSON_start = 1};
19631958
enum {JSON_first_final = 10};
19641959
enum {JSON_error = 0};
19651960

19661961
enum {JSON_en_main = 1};
19671962

19681963

1969-
#line 870 "parser.rl"
1964+
#line 865 "parser.rl"
19701965

19711966

19721967
/*
@@ -1984,16 +1979,16 @@ static VALUE cParser_parse(VALUE self)
19841979
GET_PARSER;
19851980

19861981

1987-
#line 1988 "parser.c"
1982+
#line 1983 "parser.c"
19881983
{
19891984
cs = JSON_start;
19901985
}
19911986

1992-
#line 887 "parser.rl"
1987+
#line 882 "parser.rl"
19931988
p = json->source;
19941989
pe = p + json->len;
19951990

1996-
#line 1997 "parser.c"
1991+
#line 1992 "parser.c"
19971992
{
19981993
if ( p == pe )
19991994
goto _test_eof;
@@ -2027,7 +2022,7 @@ case 1:
20272022
cs = 0;
20282023
goto _out;
20292024
tr2:
2030-
#line 862 "parser.rl"
2025+
#line 857 "parser.rl"
20312026
{
20322027
char *np = JSON_parse_value(json, p, pe, &result, 0);
20332028
if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
@@ -2037,7 +2032,7 @@ cs = 0;
20372032
if ( ++p == pe )
20382033
goto _test_eof10;
20392034
case 10:
2040-
#line 2041 "parser.c"
2035+
#line 2036 "parser.c"
20412036
switch( (*p) ) {
20422037
case 13: goto st10;
20432038
case 32: goto st10;
@@ -2126,7 +2121,7 @@ case 9:
21262121
_out: {}
21272122
}
21282123

2129-
#line 890 "parser.rl"
2124+
#line 885 "parser.rl"
21302125

21312126
if (cs >= JSON_first_final && p == pe) {
21322127
return result;
@@ -2214,6 +2209,9 @@ void Init_parser(void)
22142209
CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
22152210
rb_gc_register_mark_object(CMinusInfinity);
22162211

2212+
rb_global_variable(&Encoding_UTF_8);
2213+
Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
2214+
22172215
i_json_creatable_p = rb_intern("json_creatable?");
22182216
i_json_create = rb_intern("json_create");
22192217
i_create_id = rb_intern("create_id");
@@ -2235,6 +2233,7 @@ void Init_parser(void)
22352233
i_try_convert = rb_intern("try_convert");
22362234
i_freeze = rb_intern("freeze");
22372235
i_uminus = rb_intern("-@");
2236+
i_encode = rb_intern("encode");
22382237

22392238
binary_encindex = rb_ascii8bit_encindex();
22402239
utf8_encindex = rb_utf8_encindex();

ext/json/ext/parser/parser.rl

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -87,14 +87,14 @@ static void raise_parse_error(const char *format, const char *start)
8787
rb_enc_raise(rb_utf8_encoding(), rb_path2class("JSON::ParserError"), format, ptr);
8888
}
8989

90-
static VALUE mJSON, mExt, cParser, eNestingError;
90+
static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8;
9191
static VALUE CNaN, CInfinity, CMinusInfinity;
9292

9393
static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
9494
i_chr, i_max_nesting, i_allow_nan, i_symbolize_names,
9595
i_object_class, i_array_class, i_decimal_class,
9696
i_deep_const_get, i_match, i_match_string, i_aset, i_aref,
97-
i_leftshift, i_new, i_try_convert, i_freeze, i_uminus;
97+
i_leftshift, i_new, i_try_convert, i_freeze, i_uminus, i_encode;
9898

9999
static int binary_encindex;
100100
static int utf8_encindex;
@@ -692,16 +692,11 @@ static VALUE convert_encoding(VALUE source)
692692
}
693693

694694
if (encindex == binary_encindex) {
695-
// For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
696-
VALUE utf8_string = rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
697-
switch (rb_enc_str_coderange(utf8_string)) {
698-
case ENC_CODERANGE_7BIT:
699-
case ENC_CODERANGE_VALID:
700-
return utf8_string;
701-
}
695+
// For historical reason, we silently reinterpret binary strings as UTF-8
696+
return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
702697
}
703698

704-
return rb_str_conv_enc(source, rb_enc_from_index(encindex), rb_utf8_encoding());
699+
return rb_funcall(source, i_encode, 1, Encoding_UTF_8);
705700
}
706701

707702
/*
@@ -974,6 +969,9 @@ void Init_parser(void)
974969
CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
975970
rb_gc_register_mark_object(CMinusInfinity);
976971

972+
rb_global_variable(&Encoding_UTF_8);
973+
Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
974+
977975
i_json_creatable_p = rb_intern("json_creatable?");
978976
i_json_create = rb_intern("json_create");
979977
i_create_id = rb_intern("create_id");
@@ -995,6 +993,7 @@ void Init_parser(void)
995993
i_try_convert = rb_intern("try_convert");
996994
i_freeze = rb_intern("freeze");
997995
i_uminus = rb_intern("-@");
996+
i_encode = rb_intern("encode");
998997

999998
binary_encindex = rb_ascii8bit_encindex();
1000999
utf8_encindex = rb_utf8_encindex();

lib/json/pure/generator.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def utf8_to_json_ascii(string, script_safe = false) # :nodoc:
7474
)/nx) { |c|
7575
c.size == 1 and raise GeneratorError, "invalid utf8 byte: '#{c}'"
7676
s = c.encode(::Encoding::UTF_16BE, ::Encoding::UTF_8).unpack('H*')[0]
77-
s.force_encoding(::Encoding::ASCII_8BIT)
77+
s.force_encoding(::Encoding::BINARY)
7878
s.gsub!(/.{4}/n, '\\\\u\&')
7979
s.force_encoding(::Encoding::UTF_8)
8080
}

lib/json/pure/parser.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,9 @@ def convert_encoding(source)
143143
raise TypeError,
144144
"#{source.inspect} is not like a string"
145145
end
146-
if source.encoding != ::Encoding::ASCII_8BIT
146+
if source.encoding != ::Encoding::BINARY
147147
source = source.encode(::Encoding::UTF_8)
148-
source.force_encoding(::Encoding::ASCII_8BIT)
148+
source.force_encoding(::Encoding::BINARY)
149149
end
150150
source
151151
end

test/json/json_parser_test.rb

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,23 @@ def test_parse_some_strings
196196
)
197197
end
198198

199+
if RUBY_ENGINE != "jruby" # https://github.com/ruby/json/issues/138
200+
def test_parse_broken_string
201+
s = parse(%{["\x80"]})[0]
202+
assert_equal("\x80", s)
203+
assert_equal Encoding::UTF_8, s.encoding
204+
assert_equal false, s.valid_encoding?
205+
206+
s = parse(%{["\x80"]}.b)[0]
207+
assert_equal("\x80", s)
208+
assert_equal Encoding::UTF_8, s.encoding
209+
assert_equal false, s.valid_encoding?
210+
211+
input = %{["\x80"]}.dup.force_encoding(Encoding::US_ASCII)
212+
assert_raise(Encoding::InvalidByteSequenceError) { parse(input) }
213+
end
214+
end
215+
199216
def test_parse_big_integers
200217
json1 = JSON(orig = (1 << 31) - 1)
201218
assert_equal orig, parse(json1)

0 commit comments

Comments
 (0)