Skip to content

Commit 55f5929

Browse files
author
Yorhel
committed
Add an ArrayDeserializer to read a JSON array as a stream
This mimics the StreamDeserializer API and implements issue serde-rs#404. Unlike the StreamDeserializer, the ArrayDeserializer struct itself does not keep track of the type of the array's elements, instead the next() itself is generic to support deserialization of arrays with values of different types. Unfortunately, this means we can't implement the Iterator trait.
1 parent bb58e6c commit 55f5929

File tree

2 files changed

+231
-0
lines changed

2 files changed

+231
-0
lines changed

src/de.rs

+112
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,15 @@ impl<'de, R: Read<'de>> Deserializer<R> {
155155
}
156156
}
157157

158+
/// Parse the JSON array as a stream of values.
159+
pub fn into_array(self) -> ArrayDeserializer<'de, R> {
160+
ArrayDeserializer {
161+
de: self,
162+
started: false,
163+
lifetime: PhantomData,
164+
}
165+
}
166+
158167
/// Parse arbitrarily deep JSON structures without any consideration for
159168
/// overflowing the stack.
160169
///
@@ -2169,6 +2178,109 @@ where
21692178
}
21702179
}
21712180

2181+
2182+
2183+
//////////////////////////////////////////////////////////////////////////////
2184+
2185+
/// A streaming JSON array deserializer.
2186+
///
2187+
/// An array deserializer can be created from any JSON deserializer using the
2188+
/// `Deserializer::into_array` method.
2189+
///
2190+
/// The top-level data should be a JSON array, but each array element can consist of any JSON
2191+
/// value. An array deserializer only needs to keep a single array element in memory, and is
2192+
/// therefore preferable over deserializing into a container type such as `Vec` when the complete
2193+
/// array is too large to fit in memory.
2194+
///
2195+
/// ```edition2018
2196+
/// use serde_json::{Deserializer, Value};
2197+
///
2198+
/// fn main() {
2199+
/// let data = "[{\"k\": 3}, 1, \"cool\", \"stuff\", [0, 1, 2]]";
2200+
///
2201+
/// let mut iter = Deserializer::from_str(data).into_array();
2202+
///
2203+
/// while let Some(value) = iter.next::<Value>() {
2204+
/// println!("{}", value.unwrap());
2205+
/// }
2206+
/// }
2207+
/// ```
2208+
pub struct ArrayDeserializer<'de, R> {
2209+
de: Deserializer<R>,
2210+
started: bool, // True if we have consumed the first '['
2211+
lifetime: PhantomData<&'de ()>,
2212+
}
2213+
2214+
impl<'de, R> ArrayDeserializer<'de, R>
2215+
where
2216+
R: read::Read<'de>,
2217+
{
2218+
/// Create a JSON array deserializer from one of the possible serde_json
2219+
/// input sources.
2220+
///
2221+
/// Typically it is more convenient to use one of these methods instead:
2222+
///
2223+
/// - Deserializer::from_str(...).into_array()
2224+
/// - Deserializer::from_bytes(...).into_array()
2225+
/// - Deserializer::from_reader(...).into_array()
2226+
pub fn new(read: R) -> Self {
2227+
ArrayDeserializer {
2228+
de: Deserializer::new(read),
2229+
started: false,
2230+
lifetime: PhantomData,
2231+
}
2232+
}
2233+
2234+
fn end<T: de::Deserialize<'de>>(&mut self) -> Option<Result<T>> {
2235+
self.de.eat_char();
2236+
match self.de.end() {
2237+
Ok(_) => None,
2238+
Err(e) => Some(Err(e)),
2239+
}
2240+
}
2241+
2242+
fn next_value<T: de::Deserialize<'de>>(&mut self) -> Option<Result<T>> {
2243+
match de::Deserialize::deserialize(&mut self.de) {
2244+
Ok(v) => Some(Ok(v)),
2245+
Err(e) => Some(Err(e))
2246+
}
2247+
}
2248+
2249+
/// Return the next element from the array. Returns None if there are no more elements.
2250+
pub fn next<T: de::Deserialize<'de>>(&mut self) -> Option<Result<T>> {
2251+
match self.de.parse_whitespace() {
2252+
Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))),
2253+
Ok(Some(b'[')) if !self.started => {
2254+
self.started = true;
2255+
self.de.eat_char();
2256+
2257+
// We have to peek at the next character here to handle an empty array.
2258+
match self.de.parse_whitespace() {
2259+
Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))),
2260+
Ok(Some(b']')) => self.end(),
2261+
Ok(Some(_)) => self.next_value(),
2262+
Err(e) => Some(Err(e)),
2263+
}
2264+
},
2265+
Ok(Some(b']')) if self.started => self.end(),
2266+
Ok(Some(b',')) if self.started => {
2267+
self.de.eat_char();
2268+
2269+
match self.de.parse_whitespace() {
2270+
Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))),
2271+
Ok(Some(b']')) => Some(Err(self.de.peek_error(ErrorCode::TrailingComma))),
2272+
Ok(Some(_)) => self.next_value(),
2273+
Err(e) => Some(Err(e)),
2274+
}
2275+
},
2276+
Ok(Some(_)) => Some(Err(self.de.peek_error(ErrorCode::ExpectedSomeValue))),
2277+
Err(e) => Some(Err(e)),
2278+
}
2279+
}
2280+
}
2281+
2282+
2283+
21722284
//////////////////////////////////////////////////////////////////////////////
21732285

21742286
fn from_trait<'de, R, T>(read: R) -> Result<T>

tests/array.rs

+119
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
#![cfg(not(feature = "preserve_order"))]
2+
3+
extern crate serde;
4+
5+
#[macro_use]
6+
extern crate serde_json;
7+
8+
use serde_json::{Deserializer, Value};
9+
10+
// Rustfmt issue https://github.com/rust-lang-nursery/rustfmt/issues/2740
11+
#[cfg_attr(rustfmt, rustfmt_skip)]
12+
macro_rules! test_stream {
13+
($data:expr, |$stream:ident| $test:block) => {
14+
{
15+
let de = Deserializer::from_str($data);
16+
let mut $stream = de.into_array();
17+
$test
18+
}
19+
{
20+
let de = Deserializer::from_slice($data.as_bytes());
21+
let mut $stream = de.into_array();
22+
$test
23+
}
24+
{
25+
let mut bytes = $data.as_bytes();
26+
let de = Deserializer::from_reader(&mut bytes);
27+
let mut $stream = de.into_array();
28+
$test
29+
}
30+
};
31+
}
32+
33+
#[test]
34+
fn test_json_array_empty() {
35+
let data = "[]";
36+
37+
test_stream!(data, |stream| {
38+
assert!(stream.next::<Value>().is_none());
39+
});
40+
}
41+
42+
#[test]
43+
fn test_json_array_whitespace() {
44+
let data = "\r [\n{\"x\":42}\t, {\"y\":43}\n] \t\n";
45+
46+
test_stream!(data, |stream| {
47+
assert_eq!(stream.next::<Value>().unwrap().unwrap()["x"], 42);
48+
49+
assert_eq!(stream.next::<Value>().unwrap().unwrap()["y"], 43);
50+
51+
assert!(stream.next::<Value>().is_none());
52+
});
53+
}
54+
55+
#[test]
56+
fn test_json_array_truncated() {
57+
let data = "[{\"x\":40},{\"x\":";
58+
59+
test_stream!(data, |stream| {
60+
assert_eq!(stream.next::<Value>().unwrap().unwrap()["x"], 40);
61+
62+
assert!(stream.next::<Value>().unwrap().unwrap_err().is_eof());
63+
});
64+
}
65+
66+
#[test]
67+
fn test_json_array_primitive() {
68+
let data = "[{}, true, 1, [], 1.0, \"hey\", null]";
69+
70+
test_stream!(data, |stream| {
71+
assert_eq!(stream.next::<Value>().unwrap().unwrap(), json!({}));
72+
73+
assert_eq!(stream.next::<bool>().unwrap().unwrap(), true);
74+
75+
assert_eq!(stream.next::<u32>().unwrap().unwrap(), 1);
76+
77+
assert_eq!(stream.next::<Value>().unwrap().unwrap(), json!([]));
78+
79+
assert_eq!(stream.next::<f32>().unwrap().unwrap(), 1.0);
80+
81+
assert_eq!(stream.next::<String>().unwrap().unwrap(), "hey");
82+
83+
assert_eq!(stream.next::<Value>().unwrap().unwrap(), Value::Null);
84+
85+
assert!(stream.next::<Value>().is_none());
86+
});
87+
}
88+
89+
#[test]
90+
fn test_json_array_tailing_data() {
91+
let data = "[]e";
92+
93+
test_stream!(data, |stream| {
94+
let second = stream.next::<Value>().unwrap().unwrap_err();
95+
assert_eq!(second.to_string(), "trailing characters at line 1 column 3");
96+
});
97+
}
98+
99+
#[test]
100+
fn test_json_array_tailing_comma() {
101+
let data = "[true,]";
102+
103+
test_stream!(data, |stream| {
104+
assert_eq!(stream.next::<Value>().unwrap().unwrap(), true);
105+
106+
let second = stream.next::<Value>().unwrap().unwrap_err();
107+
assert_eq!(second.to_string(), "trailing comma at line 1 column 7");
108+
});
109+
}
110+
111+
#[test]
112+
fn test_json_array_eof() {
113+
let data = "";
114+
115+
test_stream!(data, |stream| {
116+
let second = stream.next::<Value>().unwrap().unwrap_err();
117+
assert_eq!(second.to_string(), "EOF while parsing a value at line 1 column 0");
118+
});
119+
}

0 commit comments

Comments
 (0)