Skip to content

Commit 684046a

Browse files
committed
Implement serializing CDATA
Closes: #353
1 parent 655691c commit 684046a

File tree

5 files changed

+144
-7
lines changed

5 files changed

+144
-7
lines changed

src/de/simple_type.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -786,7 +786,7 @@ impl<'de, 'a> IntoDeserializer<'de, DeError> for SimpleTypeDeserializer<'de, 'a>
786786
#[cfg(test)]
787787
mod tests {
788788
use super::*;
789-
use crate::se::simple_type::{QuoteTarget, SimpleTypeSerializer};
789+
use crate::se::simple_type::{QuoteTarget, SimpleTypeSerializer, TextFormat};
790790
use crate::se::QuoteLevel;
791791
use crate::utils::{ByteBuf, Bytes};
792792
use serde::de::IgnoredAny;
@@ -824,6 +824,7 @@ mod tests {
824824
writer: String::new(),
825825
target: QuoteTarget::Text,
826826
level: QuoteLevel::Full,
827+
format: TextFormat::Text,
827828
})
828829
.unwrap(),
829830
xml

src/se/content.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
use crate::de::TEXT_KEY;
44
use crate::se::element::{ElementSerializer, Struct, Tuple};
5-
use crate::se::simple_type::{QuoteTarget, SimpleTypeSerializer};
5+
use crate::se::simple_type::{QuoteTarget, SimpleTypeSerializer, TextFormat};
66
use crate::se::{Indent, QuoteLevel, SeError, WriteResult, XmlName};
77
use serde::ser::{
88
Impossible, Serialize, SerializeSeq, SerializeTuple, SerializeTupleStruct, Serializer,
@@ -71,6 +71,8 @@ pub struct ContentSerializer<'w, 'i, W: Write> {
7171
/// If `true`, then current indent will be written before writing the content,
7272
/// but only if content is not empty. This flag is reset after writing indent.
7373
pub write_indent: bool,
74+
/// Defines how text content should be serialized (as escaped text or CDATA)
75+
pub text_format: TextFormat,
7476
/// If `true`, then primitive types that serializes to a text content without
7577
/// surrounding tag will be allowed, otherwise the [`SeError::Unsupported`]
7678
/// will be returned.
@@ -88,11 +90,11 @@ impl<'w, 'i, W: Write> ContentSerializer<'w, 'i, W> {
8890
/// Turns this serializer into serializer of a text content
8991
#[inline]
9092
pub fn into_simple_type_serializer_impl(self) -> SimpleTypeSerializer<&'w mut W> {
91-
//TODO: Customization point: choose between CDATA and Text representation
9293
SimpleTypeSerializer {
9394
writer: self.writer,
9495
target: QuoteTarget::Text,
9596
level: self.level,
97+
format: self.text_format,
9698
}
9799
}
98100

@@ -119,6 +121,7 @@ impl<'w, 'i, W: Write> ContentSerializer<'w, 'i, W> {
119121
level: self.level,
120122
indent: self.indent.borrow(),
121123
write_indent: self.write_indent,
124+
text_format: self.text_format,
122125
allow_primitive,
123126
expand_empty_elements: self.expand_empty_elements,
124127
}
@@ -600,6 +603,7 @@ pub(super) mod tests {
600603
level: QuoteLevel::Full,
601604
indent: Indent::None,
602605
write_indent: false,
606+
text_format: TextFormat::Text,
603607
allow_primitive: true,
604608
expand_empty_elements: false,
605609
};
@@ -623,6 +627,7 @@ pub(super) mod tests {
623627
level: QuoteLevel::Full,
624628
indent: Indent::None,
625629
write_indent: false,
630+
text_format: TextFormat::Text,
626631
allow_primitive: true,
627632
expand_empty_elements: false,
628633
};
@@ -1070,6 +1075,7 @@ pub(super) mod tests {
10701075
level: QuoteLevel::Full,
10711076
indent: Indent::Owned(Indentation::new(b' ', 2)),
10721077
write_indent: false,
1078+
text_format: TextFormat::Text,
10731079
allow_primitive: true,
10741080
expand_empty_elements: false,
10751081
};
@@ -1093,6 +1099,7 @@ pub(super) mod tests {
10931099
level: QuoteLevel::Full,
10941100
indent: Indent::Owned(Indentation::new(b' ', 2)),
10951101
write_indent: false,
1102+
text_format: TextFormat::Text,
10961103
allow_primitive: true,
10971104
expand_empty_elements: false,
10981105
};

src/se/element.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use crate::de::{TEXT_KEY, VALUE_KEY};
44
use crate::se::content::ContentSerializer;
55
use crate::se::key::QNameSerializer;
6-
use crate::se::simple_type::{QuoteTarget, SimpleSeq, SimpleTypeSerializer};
6+
use crate::se::simple_type::{QuoteTarget, SimpleSeq, SimpleTypeSerializer, TextFormat};
77
use crate::se::text::TextSerializer;
88
use crate::se::{SeError, WriteResult, XmlName};
99
use serde::ser::{
@@ -416,6 +416,7 @@ impl<'w, 'k, W: Write> Struct<'w, 'k, W> {
416416
writer: &mut self.ser.ser.writer,
417417
target: QuoteTarget::DoubleQAttr,
418418
level: self.ser.ser.level,
419+
format: TextFormat::Text,
419420
})?;
420421
self.ser.ser.writer.write_char('"')?;
421422

@@ -443,6 +444,7 @@ impl<'w, 'k, W: Write> Struct<'w, 'k, W> {
443444
indent: self.ser.ser.indent.borrow(),
444445
// If previous field does not require indent, do not write it
445446
write_indent: self.write_indent,
447+
text_format: self.ser.ser.text_format,
446448
allow_primitive: true,
447449
expand_empty_elements: self.ser.ser.expand_empty_elements,
448450
};
@@ -635,6 +637,7 @@ mod tests {
635637
level: QuoteLevel::Full,
636638
indent: Indent::None,
637639
write_indent: false,
640+
text_format: TextFormat::Text,
638641
allow_primitive: true,
639642
expand_empty_elements: false,
640643
},
@@ -661,6 +664,7 @@ mod tests {
661664
level: QuoteLevel::Full,
662665
indent: Indent::None,
663666
write_indent: false,
667+
text_format: TextFormat::Text,
664668
allow_primitive: true,
665669
expand_empty_elements: false,
666670
},
@@ -1356,6 +1360,7 @@ mod tests {
13561360
level: QuoteLevel::Full,
13571361
indent: Indent::Owned(Indentation::new(b' ', 2)),
13581362
write_indent: false,
1363+
text_format: TextFormat::Text,
13591364
allow_primitive: true,
13601365
expand_empty_elements: false,
13611366
},
@@ -1382,6 +1387,7 @@ mod tests {
13821387
level: QuoteLevel::Full,
13831388
indent: Indent::Owned(Indentation::new(b' ', 2)),
13841389
write_indent: false,
1390+
text_format: TextFormat::Text,
13851391
allow_primitive: true,
13861392
expand_empty_elements: false,
13871393
},
@@ -2099,6 +2105,7 @@ mod tests {
20992105
level: QuoteLevel::Full,
21002106
indent: Indent::None,
21012107
write_indent: false,
2108+
text_format: TextFormat::Text,
21022109
allow_primitive: true,
21032110
expand_empty_elements: true,
21042111
},

src/se/mod.rs

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ use serde::serde_if_integer128;
8888
use std::fmt::Write;
8989
use std::str::from_utf8;
9090

91-
pub use self::simple_type::SimpleTypeSerializer;
91+
pub use self::simple_type::{SimpleTypeSerializer, TextFormat};
9292
pub use crate::errors::serialize::SeError;
9393

9494
/// Serialize struct into a `Write`r.
@@ -557,6 +557,7 @@ impl<'w, 'r, W: Write> Serializer<'w, 'r, W> {
557557
level: QuoteLevel::Partial,
558558
indent: Indent::None,
559559
write_indent: false,
560+
text_format: TextFormat::Text,
560561
allow_primitive: true,
561562
expand_empty_elements: false,
562563
},
@@ -623,6 +624,7 @@ impl<'w, 'r, W: Write> Serializer<'w, 'r, W> {
623624
level: QuoteLevel::Partial,
624625
indent: Indent::None,
625626
write_indent: false,
627+
text_format: TextFormat::Text,
626628
allow_primitive: true,
627629
expand_empty_elements: false,
628630
},
@@ -663,6 +665,40 @@ impl<'w, 'r, W: Write> Serializer<'w, 'r, W> {
663665
self
664666
}
665667

668+
/// Set the text format used for serializing text content.
669+
///
670+
/// - [`TextFormat::Text`]: Regular XML escaping (default)
671+
/// - [`TextFormat::CData`]: CDATA sections for text content
672+
///
673+
/// # Examples
674+
///
675+
/// ```
676+
/// # use pretty_assertions::assert_eq;
677+
/// # use serde::Serialize;
678+
/// # use quick_xml::se::{Serializer, TextFormat};
679+
///
680+
/// #[derive(Debug, PartialEq, Serialize)]
681+
/// struct Document {
682+
/// #[serde(rename = "$text")]
683+
/// content: String,
684+
/// }
685+
///
686+
/// let mut buffer = String::new();
687+
/// let mut ser = Serializer::with_root(&mut buffer, Some("doc")).unwrap();
688+
/// ser.text_format(TextFormat::CData);
689+
///
690+
/// let data = Document {
691+
/// content: "Content with <markup> & entities".to_string(),
692+
/// };
693+
///
694+
/// data.serialize(ser).unwrap();
695+
/// assert_eq!(buffer, "<doc><![CDATA[Content with <markup> & entities]]></doc>");
696+
/// ```
697+
pub fn text_format(&mut self, format: simple_type::TextFormat) -> &mut Self {
698+
self.ser.text_format = format;
699+
self
700+
}
701+
666702
/// Configure indent for a serializer
667703
pub fn indent(&mut self, indent_char: char, indent_size: usize) -> &mut Self {
668704
self.ser.indent = Indent::Owned(Indentation::new(indent_char as u8, indent_size));

src/se/simple_type.rs

Lines changed: 88 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,15 @@ pub enum QuoteTarget {
2323
SingleQAttr,
2424
}
2525

26+
/// Defines the format for text content serialization
27+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28+
pub enum TextFormat {
29+
/// Serialize as regular text content with escaping
30+
Text,
31+
/// Serialize as CDATA section without escaping
32+
CData,
33+
}
34+
2635
/// Escapes atomic value that could be part of a `xs:list`. All whitespace characters
2736
/// additionally escaped
2837
fn escape_item(value: &str, target: QuoteTarget, level: QuoteLevel) -> Cow<'_, str> {
@@ -93,6 +102,37 @@ fn escape_item(value: &str, target: QuoteTarget, level: QuoteLevel) -> Cow<'_, s
93102
}
94103
}
95104

105+
/// Writes content as CDATA section, handling the `]]>` sequence by splitting into multiple CDATA sections
106+
fn write_cdata_content<W: Write>(writer: &mut W, value: &str) -> Result<(), SeError> {
107+
if value.is_empty() {
108+
writer.write_str("<![CDATA[]]>")?;
109+
return Ok(());
110+
}
111+
112+
let mut remaining = value;
113+
114+
while !remaining.is_empty() {
115+
writer.write_str("<![CDATA[")?;
116+
117+
// Find the first occurrence of "]]>"
118+
if let Some(pos) = remaining.find("]]>") {
119+
// Write everything up to and including "]]"
120+
writer.write_str(&remaining[..pos + 2])?;
121+
writer.write_str("]]>")?;
122+
123+
// Continue with ">" and the rest
124+
remaining = &remaining[pos + 2..];
125+
} else {
126+
// No "]]>" found, write the rest
127+
writer.write_str(remaining)?;
128+
writer.write_str("]]>")?;
129+
break;
130+
}
131+
}
132+
133+
Ok(())
134+
}
135+
96136
/// Escapes XSD simple type value
97137
fn escape_list(value: &str, target: QuoteTarget, level: QuoteLevel) -> Cow<'_, str> {
98138
use QuoteLevel::*;
@@ -404,6 +444,8 @@ pub struct SimpleTypeSerializer<W: Write> {
404444
pub target: QuoteTarget,
405445
/// Defines which XML characters need to be escaped
406446
pub level: QuoteLevel,
447+
/// Format for text content serialization
448+
pub format: TextFormat,
407449
}
408450

409451
impl<W: Write> SimpleTypeSerializer<W> {
@@ -427,8 +469,15 @@ impl<W: Write> Serializer for SimpleTypeSerializer<W> {
427469
write_primitive!();
428470

429471
fn serialize_str(mut self, value: &str) -> Result<Self::Ok, Self::Error> {
430-
if !value.is_empty() {
431-
self.write_str(&escape_list(value, self.target, self.level))?;
472+
match self.format {
473+
TextFormat::CData => {
474+
write_cdata_content(&mut self.writer, value)?;
475+
}
476+
TextFormat::Text => {
477+
if !value.is_empty() {
478+
self.write_str(&escape_list(value, self.target, self.level))?;
479+
}
480+
}
432481
}
433482
Ok(self.writer)
434483
}
@@ -1031,6 +1080,7 @@ mod tests {
10311080
writer: String::new(),
10321081
target: QuoteTarget::Text,
10331082
level: QuoteLevel::Full,
1083+
format: TextFormat::Text,
10341084
};
10351085

10361086
let buffer = $data.serialize(ser).unwrap();
@@ -1050,6 +1100,7 @@ mod tests {
10501100
writer: &mut buffer,
10511101
target: QuoteTarget::Text,
10521102
level: QuoteLevel::Full,
1103+
format: TextFormat::Text,
10531104
};
10541105

10551106
match $data.serialize(ser).unwrap_err() {
@@ -1219,4 +1270,39 @@ mod tests {
12191270
assert_eq!(buffer, "1 2 3");
12201271
}
12211272
}
1273+
1274+
mod cdata {
1275+
use super::*;
1276+
use pretty_assertions::assert_eq;
1277+
1278+
macro_rules! serialize_cdata_as {
1279+
($name:ident: $data:expr => $expected:literal) => {
1280+
#[test]
1281+
fn $name() {
1282+
let ser = SimpleTypeSerializer {
1283+
writer: String::new(),
1284+
target: QuoteTarget::Text,
1285+
level: QuoteLevel::Full,
1286+
format: TextFormat::CData,
1287+
};
1288+
1289+
let buffer = $data.serialize(ser).unwrap();
1290+
assert_eq!(buffer, $expected);
1291+
}
1292+
};
1293+
}
1294+
1295+
serialize_cdata_as!(empty_string: "" => "<![CDATA[]]>");
1296+
serialize_cdata_as!(simple_text: "Hello World" => "<![CDATA[Hello World]]>");
1297+
serialize_cdata_as!(with_markup: "<tag>content</tag>" => "<![CDATA[<tag>content</tag>]]>");
1298+
serialize_cdata_as!(with_ampersand: "Tom & Jerry" => "<![CDATA[Tom & Jerry]]>");
1299+
serialize_cdata_as!(with_quotes: r#"He said "Hello""# => r#"<![CDATA[He said "Hello"]]>"#);
1300+
serialize_cdata_as!(all_xml_chars: "<>&\"'" => "<![CDATA[<>&\"']]>");
1301+
1302+
serialize_cdata_as!(with_cdata_end: "foo]]>bar" => "<![CDATA[foo]]]]><![CDATA[>bar]]>");
1303+
serialize_cdata_as!(multiple_cdata_ends: "a]]>b]]>c" => "<![CDATA[a]]]]><![CDATA[>b]]]]><![CDATA[>c]]>");
1304+
serialize_cdata_as!(starts_with_cdata_end: "]]>hello" => "<![CDATA[]]]]><![CDATA[>hello]]>");
1305+
serialize_cdata_as!(ends_with_cdata_end: "hello]]>" => "<![CDATA[hello]]]]><![CDATA[>]]>");
1306+
serialize_cdata_as!(only_cdata_end: "]]>" => "<![CDATA[]]]]><![CDATA[>]]>");
1307+
}
12221308
}

0 commit comments

Comments
 (0)