Skip to content

Commit 80af0b5

Browse files
author
Yorhel
committed
Add an ArrayDeserializer to read a JSON array as a stream
This mimics the StreamDeserializer API and implements issue serde-rs#404.
1 parent 69bfbfc commit 80af0b5

File tree

2 files changed

+243
-0
lines changed

2 files changed

+243
-0
lines changed

src/de.rs

+124
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,19 @@ impl<'de, R: Read<'de>> Deserializer<R> {
155155
}
156156
}
157157

158+
/// Parse the JSON array as an iterator over values of type T.
159+
pub fn into_array_iter<T>(self) -> ArrayDeserializer<'de, R, T>
160+
where
161+
T: de::Deserialize<'de>,
162+
{
163+
ArrayDeserializer {
164+
de: self,
165+
started: false,
166+
output: PhantomData,
167+
lifetime: PhantomData,
168+
}
169+
}
170+
158171
/// Parse arbitrarily deep JSON structures without any consideration for
159172
/// overflowing the stack.
160173
///
@@ -2169,6 +2182,117 @@ where
21692182
}
21702183
}
21712184

2185+
2186+
2187+
//////////////////////////////////////////////////////////////////////////////
2188+
2189+
/// Iterator that deserializes an array into multiple JSON values.
2190+
///
2191+
/// An array deserializer can be created from any JSON deserializer using the
2192+
/// `Deserializer::into_array_iter` method.
2193+
///
2194+
/// The top-level data should be a JSON array, but each array element can consist of any JSON
2195+
/// value. An array deserializer only needs to keep a single array element in memory, and is
2196+
/// therefore preferable over deserializing into a container type such as `Vec` when the complete
2197+
/// array is too large to fit in memory.
2198+
///
2199+
/// ```edition2018
2200+
/// use serde_json::{Deserializer, Value};
2201+
///
2202+
/// fn main() {
2203+
/// let data = "[{\"k\": 3}, 1, \"cool\", \"stuff\", [0, 1, 2]]";
2204+
///
2205+
/// let iter = Deserializer::from_str(data).into_array_iter::<Value>();
2206+
///
2207+
/// for value in iter {
2208+
/// println!("{}", value.unwrap());
2209+
/// }
2210+
/// }
2211+
/// ```
2212+
pub struct ArrayDeserializer<'de, R, T> {
2213+
de: Deserializer<R>,
2214+
started: bool, // True if we have consumed the first '['
2215+
output: PhantomData<T>,
2216+
lifetime: PhantomData<&'de ()>,
2217+
}
2218+
2219+
impl<'de, R, T> ArrayDeserializer<'de, R, T>
2220+
where
2221+
R: read::Read<'de>,
2222+
T: de::Deserialize<'de>,
2223+
{
2224+
/// Create a JSON array deserializer from one of the possible serde_json
2225+
/// input sources.
2226+
///
2227+
/// Typically it is more convenient to use one of these methods instead:
2228+
///
2229+
/// - Deserializer::from_str(...).into_array_iter()
2230+
/// - Deserializer::from_bytes(...).into_array_iter()
2231+
/// - Deserializer::from_reader(...).into_array_iter()
2232+
pub fn new(read: R) -> Self {
2233+
ArrayDeserializer {
2234+
de: Deserializer::new(read),
2235+
started: false,
2236+
output: PhantomData,
2237+
lifetime: PhantomData,
2238+
}
2239+
}
2240+
2241+
fn end(&mut self) -> Option<Result<T>> {
2242+
self.de.eat_char();
2243+
match self.de.end() {
2244+
Ok(_) => None,
2245+
Err(e) => Some(Err(e)),
2246+
}
2247+
}
2248+
2249+
fn next_value(&mut self) -> Option<Result<T>> {
2250+
match de::Deserialize::deserialize(&mut self.de) {
2251+
Ok(v) => Some(Ok(v)),
2252+
Err(e) => Some(Err(e))
2253+
}
2254+
}
2255+
}
2256+
2257+
impl<'de, R, T> Iterator for ArrayDeserializer<'de, R, T>
2258+
where
2259+
R: Read<'de>,
2260+
T: de::Deserialize<'de>,
2261+
{
2262+
type Item = Result<T>;
2263+
2264+
fn next(&mut self) -> Option<Result<T>> {
2265+
match self.de.parse_whitespace() {
2266+
Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))),
2267+
Ok(Some(b'[')) if !self.started => {
2268+
self.started = true;
2269+
self.de.eat_char();
2270+
2271+
// We have to peek at the next character here to handle an empty array.
2272+
match self.de.parse_whitespace() {
2273+
Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))),
2274+
Ok(Some(b']')) => self.end(),
2275+
Ok(Some(_)) => self.next_value(),
2276+
Err(e) => Some(Err(e)),
2277+
}
2278+
},
2279+
Ok(Some(b']')) if self.started => self.end(),
2280+
Ok(Some(b',')) if self.started => {
2281+
self.de.eat_char();
2282+
2283+
match self.de.parse_whitespace() {
2284+
Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))),
2285+
Ok(Some(b']')) => Some(Err(self.de.peek_error(ErrorCode::TrailingComma))),
2286+
Ok(Some(_)) => self.next_value(),
2287+
Err(e) => Some(Err(e)),
2288+
}
2289+
},
2290+
Ok(Some(_)) => Some(Err(self.de.peek_error(ErrorCode::ExpectedSomeValue))),
2291+
Err(e) => Some(Err(e)),
2292+
}
2293+
}
2294+
}
2295+
21722296
//////////////////////////////////////////////////////////////////////////////
21732297

21742298
fn from_trait<'de, R, T>(read: R) -> Result<T>

tests/array.rs

+119
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
#![cfg(not(feature = "preserve_order"))]
2+
3+
extern crate serde;
4+
5+
#[macro_use]
6+
extern crate serde_json;
7+
8+
use serde_json::{Deserializer, Value};
9+
10+
// Rustfmt issue https://github.com/rust-lang-nursery/rustfmt/issues/2740
11+
#[cfg_attr(rustfmt, rustfmt_skip)]
12+
macro_rules! test_stream {
13+
($data:expr, $ty:ty, |$stream:ident| $test:block) => {
14+
{
15+
let de = Deserializer::from_str($data);
16+
let mut $stream = de.into_array_iter::<$ty>();
17+
$test
18+
}
19+
{
20+
let de = Deserializer::from_slice($data.as_bytes());
21+
let mut $stream = de.into_array_iter::<$ty>();
22+
$test
23+
}
24+
{
25+
let mut bytes = $data.as_bytes();
26+
let de = Deserializer::from_reader(&mut bytes);
27+
let mut $stream = de.into_array_iter::<$ty>();
28+
$test
29+
}
30+
};
31+
}
32+
33+
#[test]
34+
fn test_json_array_empty() {
35+
let data = "[]";
36+
37+
test_stream!(data, Value, |stream| {
38+
assert!(stream.next().is_none());
39+
});
40+
}
41+
42+
#[test]
43+
fn test_json_array_whitespace() {
44+
let data = "\r [\n{\"x\":42}\t, {\"y\":43}\n] \t\n";
45+
46+
test_stream!(data, Value, |stream| {
47+
assert_eq!(stream.next().unwrap().unwrap()["x"], 42);
48+
49+
assert_eq!(stream.next().unwrap().unwrap()["y"], 43);
50+
51+
assert!(stream.next().is_none());
52+
});
53+
}
54+
55+
#[test]
56+
fn test_json_array_truncated() {
57+
let data = "[{\"x\":40},{\"x\":";
58+
59+
test_stream!(data, Value, |stream| {
60+
assert_eq!(stream.next().unwrap().unwrap()["x"], 40);
61+
62+
assert!(stream.next().unwrap().unwrap_err().is_eof());
63+
});
64+
}
65+
66+
#[test]
67+
fn test_json_array_primitive() {
68+
let data = "[{}, true, 1, [], 1.0, \"hey\", null]";
69+
70+
test_stream!(data, Value, |stream| {
71+
assert_eq!(stream.next().unwrap().unwrap(), json!({}));
72+
73+
assert_eq!(stream.next().unwrap().unwrap(), true);
74+
75+
assert_eq!(stream.next().unwrap().unwrap(), 1);
76+
77+
assert_eq!(stream.next().unwrap().unwrap(), json!([]));
78+
79+
assert_eq!(stream.next().unwrap().unwrap(), 1.0);
80+
81+
assert_eq!(stream.next().unwrap().unwrap(), "hey");
82+
83+
assert_eq!(stream.next().unwrap().unwrap(), Value::Null);
84+
85+
assert!(stream.next().is_none());
86+
});
87+
}
88+
89+
#[test]
90+
fn test_json_array_tailing_data() {
91+
let data = "[]e";
92+
93+
test_stream!(data, Value, |stream| {
94+
let second = stream.next().unwrap().unwrap_err();
95+
assert_eq!(second.to_string(), "trailing characters at line 1 column 3");
96+
});
97+
}
98+
99+
#[test]
100+
fn test_json_array_tailing_comma() {
101+
let data = "[true,]";
102+
103+
test_stream!(data, Value, |stream| {
104+
assert_eq!(stream.next().unwrap().unwrap(), true);
105+
106+
let second = stream.next().unwrap().unwrap_err();
107+
assert_eq!(second.to_string(), "trailing comma at line 1 column 7");
108+
});
109+
}
110+
111+
#[test]
112+
fn test_json_array_eof() {
113+
let data = "";
114+
115+
test_stream!(data, Value, |stream| {
116+
let second = stream.next().unwrap().unwrap_err();
117+
assert_eq!(second.to_string(), "EOF while parsing a value at line 1 column 0");
118+
});
119+
}

0 commit comments

Comments
 (0)