diff --git a/.circleci/config.yml b/.circleci/config.yml
index 430d1bb5..094294d0 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -18,7 +18,7 @@ jobs:
command: pip install tox
- run:
name: Run tests
- command: tox
+ command: tox -r
workflows:
version: 2
diff --git a/openformats/formats/android_unescaped.py b/openformats/formats/android_unescaped.py
index c95a1210..76c77ce2 100644
--- a/openformats/formats/android_unescaped.py
+++ b/openformats/formats/android_unescaped.py
@@ -4,6 +4,7 @@
from openformats.formats.android import AndroidHandler
from ..utils.xml import NewDumbXml as DumbXml
+
class AndroidUnescapedHandler(AndroidHandler):
def _create_string(self, name, text, comment, product, child, pluralized=False):
"""Creates a string and returns it. If empty string it returns None.
@@ -88,39 +89,116 @@ def _unprotect_inline_tags(text, protected_tags):
return text
+ @staticmethod
+ def _process_with_cdata_preservation(text, process_func, is_escape=True):
+ """
+ Process text while preserving CDATA sections.
+
+ Args:
+ text (str): The text to process
+ process_func (callable): The processing function to apply to non-CDATA parts
+ is_escape (bool): True for escaping, False for unescaping
+
+ Returns:
+ str: The processed text with CDATA sections preserved
+ """
+ if not text or ""
+
+ # Find all CDATA sections and their positions
+ cdata_matches = list(re.finditer(cdata_pattern, text, re.DOTALL))
+
+ if not cdata_matches:
+ return process_func(text)
+
+ result = []
+ last_end = 0
+
+ for match in cdata_matches:
+ # Process the text before the CDATA section
+ before_cdata = text[last_end : match.start()]
+ if before_cdata:
+ result.append(process_func(before_cdata))
+
+ # Keep the CDATA section almost as-is: we escape/unescape single + double
+ # quotes to be consistent with current handler behavior (see
+ # )
+ cdata_content = match.group(0)
+ if is_escape:
+ cdata_content = cdata_content.replace(
+ DumbXml.DOUBLE_QUOTES,
+ "".join([DumbXml.BACKSLASH, DumbXml.DOUBLE_QUOTES]),
+ ).replace(
+ DumbXml.SINGLE_QUOTE,
+ "".join([DumbXml.BACKSLASH, DumbXml.SINGLE_QUOTE]),
+ )
+ else:
+ cdata_content = cdata_content.replace(
+ "".join([DumbXml.BACKSLASH, DumbXml.DOUBLE_QUOTES]),
+ DumbXml.DOUBLE_QUOTES,
+ ).replace(
+ "".join([DumbXml.BACKSLASH, DumbXml.SINGLE_QUOTE]),
+ DumbXml.SINGLE_QUOTE,
+ )
+ result.append(cdata_content)
+
+ last_end = match.end()
+
+ # Process any remaining text after the last CDATA section
+ after_cdata = text[last_end:]
+ if after_cdata:
+ result.append(process_func(after_cdata))
+
+ return "".join(result)
+
@staticmethod
def escape(string):
- try:
- string, protected_tags = AndroidUnescapedHandler._protect_inline_tags(
- string
- )
- except Exception as _:
- # Exception handling: If an error occurs during tag protection,
- # escape all special characters. One case of these errors is the
- # presence of '<' symbols without corresponding closing tags, causing
- # parsing errors.
+
+ def _escape(string):
+ try:
+ string, protected_tags = AndroidUnescapedHandler._protect_inline_tags(
+ string
+ )
+ except Exception as _:
+ # Exception handling: If an error occurs during tag protection,
+ # escape all special characters. One case of these errors is the
+ # presence of '<' symbols without corresponding closing tags, causing
+ # parsing errors.
+ string = AndroidHandler.escape(string)
+ string = AndroidUnescapedHandler.escape_special_characters(string)
+ string = string.replace("<", "<")
+ return string
+
string = AndroidHandler.escape(string)
string = AndroidUnescapedHandler.escape_special_characters(string)
- string = (
- string.replace("<", "<")
+ return AndroidUnescapedHandler._unprotect_inline_tags(
+ string, protected_tags
)
- return string
- string = AndroidHandler.escape(string)
- string = AndroidUnescapedHandler.escape_special_characters(string)
- return AndroidUnescapedHandler._unprotect_inline_tags(string, protected_tags)
+ return AndroidUnescapedHandler._process_with_cdata_preservation(
+ string, _escape, is_escape=True
+ )
@staticmethod
def unescape(string):
- string = AndroidHandler.unescape(string)
- return (
- string.replace("\\?", "?")
- .replace("\\@", "@")
- .replace("\\t", "\t")
- .replace("\\n", "\n")
- .replace(">", ">")
- .replace("<", "<")
- .replace("&", "&")
+
+ def _unescape(string):
+ string = AndroidHandler.unescape(string)
+ return (
+ string.replace("\\?", "?")
+ .replace("\\@", "@")
+ .replace("\\t", "\t")
+ .replace("\\n", "\n")
+ .replace(">", ">")
+ .replace("<", "<")
+ .replace("&", "&")
+ )
+
+ return AndroidUnescapedHandler._process_with_cdata_preservation(
+ string, _unescape, is_escape=False
)
@staticmethod
diff --git a/openformats/tests/formats/android/test_android_unescaped.py b/openformats/tests/formats/android/test_android_unescaped.py
index 746567f2..2e291cff 100644
--- a/openformats/tests/formats/android/test_android_unescaped.py
+++ b/openformats/tests/formats/android/test_android_unescaped.py
@@ -54,24 +54,24 @@ def test_escape(self):
)
def test_escape_lt_character(self):
- rich = '< 20 units'
- raw = '< 20 units'
+ rich = "< 20 units"
+ raw = "< 20 units"
self.assertEqual(
AndroidUnescapedHandler.escape(rich),
raw,
)
- rich = '< 20 & > 50 units'
- raw = '< 20 & > 50 units'
+ rich = "< 20 & > 50 units"
+ raw = "< 20 & > 50 units"
self.assertEqual(
AndroidUnescapedHandler.escape(rich),
raw,
)
- rich = '< 20 & > 50 unitstest'
- raw = '< 20 & > 50 units<xliff:g>test</xliff:g>'
+ rich = "< 20 & > 50 unitstest"
+ raw = "< 20 & > 50 units<xliff:g>test</xliff:g>"
self.assertEqual(
AndroidUnescapedHandler.escape(rich),
@@ -100,3 +100,216 @@ def test_create_string_raises_error(self):
AndroidUnescapedHandler._check_unescaped_characters,
unescaped_string,
)
+
+ # New tests for CDATA functionality
+ def test_process_with_cdata_preservation_no_cdata(self):
+ """Test _process_with_cdata_preservation with text that has no CDATA sections."""
+
+ def dummy_process(text):
+ return text.replace("&", "&")
+
+ text = "Hello & world"
+ result = AndroidUnescapedHandler._process_with_cdata_preservation(
+ text, dummy_process, is_escape=True
+ )
+ self.assertEqual(result, "Hello & world")
+
+ def test_process_with_cdata_preservation_empty_text(self):
+ """Test _process_with_cdata_preservation with empty or None text."""
+
+ def dummy_process(text):
+ if text is None:
+ return None
+ return text.replace("&", "&")
+
+ result = AndroidUnescapedHandler._process_with_cdata_preservation(
+ "", dummy_process, is_escape=True
+ )
+ self.assertEqual(result, "")
+
+ result = AndroidUnescapedHandler._process_with_cdata_preservation(
+ None, dummy_process, is_escape=True
+ )
+ self.assertEqual(result, None)
+
+ def test_process_with_cdata_preservation_single_cdata(self):
+ """Test _process_with_cdata_preservation with a single CDATA section."""
+
+ def dummy_process(text):
+ return text.replace("&", "&")
+
+ text = "Before after"
+ result = AndroidUnescapedHandler._process_with_cdata_preservation(
+ text, dummy_process, is_escape=True
+ )
+ # The & in regular text should be escaped, but CDATA content is only processed for quotes
+ expected = "Before after"
+ self.assertEqual(result, expected)
+
+ def test_process_with_cdata_preservation_multiple_cdata(self):
+ """Test _process_with_cdata_preservation with multiple CDATA sections."""
+
+ def dummy_process(text):
+ return text.replace("&", "&")
+
+ text = "Start & middle & end &"
+ result = AndroidUnescapedHandler._process_with_cdata_preservation(
+ text, dummy_process, is_escape=True
+ )
+ expected = "Start & middle & end &"
+ self.assertEqual(result, expected)
+
+ def test_process_with_cdata_preservation_unescape(self):
+ """Test _process_with_cdata_preservation with unescaping."""
+
+ def dummy_process(text):
+ return text.replace("&", "&")
+
+ text = "Before & after &"
+ result = AndroidUnescapedHandler._process_with_cdata_preservation(
+ text, dummy_process, is_escape=False
+ )
+ expected = "Before & after &"
+ self.assertEqual(result, expected)
+
+ def test_process_with_cdata_preservation_multiline_cdata(self):
+ """Test _process_with_cdata_preservation with multiline CDATA content."""
+
+ def dummy_process(text):
+ return text.replace("&", "&")
+
+ text = """Before & after &"""
+ result = AndroidUnescapedHandler._process_with_cdata_preservation(
+ text, dummy_process, is_escape=True
+ )
+ expected = """Before & after &"""
+ self.assertEqual(result, expected)
+
+ def test_escape_with_cdata_simple(self):
+ """Test escape method with simple CDATA content."""
+ text = "Hello & world &"
+ result = AndroidUnescapedHandler.escape(text)
+ expected = "Hello & world &"
+ self.assertEqual(result, expected)
+
+ def test_escape_with_cdata_complex(self):
+ """Test escape method with complex CDATA content including various characters."""
+ text = "Start @ Bold with \"quotes\" and 'apostrophes']]> end ?"
+ result = AndroidUnescapedHandler.escape(text)
+ expected = "Start \\@ Bold with \\\"quotes\\\" and \\'apostrophes\\']]> end \\?"
+ self.assertEqual(result, expected)
+
+ def test_escape_with_cdata_and_inline_tags(self):
+ """Test escape method with CDATA sections alongside inline tags."""
+ text = (
+ "Text %1$s & more text"
+ )
+ result = AndroidUnescapedHandler.escape(text)
+ expected = "Text %1$s & more text"
+ self.assertEqual(result, expected)
+
+ def test_unescape_with_cdata_simple(self):
+ """Test unescape method with simple CDATA content."""
+ text = "Hello & world &"
+ result = AndroidUnescapedHandler.unescape(text)
+ expected = "Hello & world &"
+ self.assertEqual(result, expected)
+
+ def test_unescape_with_cdata_complex(self):
+ """Test unescape method with complex CDATA content."""
+ text = "Start \\@ Bold with \\\"quotes\\\" and \\'apostrophes\\']]> end \\?"
+ result = AndroidUnescapedHandler.unescape(text)
+ expected = (
+ "Start @ Bold with \"quotes\" and 'apostrophes']]> end ?"
+ )
+ self.assertEqual(result, expected)
+
+ def test_escape_unescape_cdata_roundtrip(self):
+ """Test that escape and unescape are symmetric for CDATA content."""
+ original = (
+ "Text & @ ?"
+ )
+ escaped = AndroidUnescapedHandler.escape(original)
+ unescaped = AndroidUnescapedHandler.unescape(escaped)
+ self.assertEqual(original, unescaped)
+
+ def test_cdata_with_nested_brackets(self):
+ """Test CDATA sections containing nested brackets."""
+ text = "Before after"
+ escaped = AndroidUnescapedHandler.escape(text)
+ unescaped = AndroidUnescapedHandler.unescape(escaped)
+ self.assertEqual(text, unescaped)
+
+ def test_cdata_empty_content(self):
+ """Test CDATA sections with empty content."""
+ text = "Before after &"
+ result = AndroidUnescapedHandler.escape(text)
+ expected = "Before after &"
+ self.assertEqual(result, expected)
+
+ def test_cdata_only_quotes(self):
+ """Test CDATA sections containing only quotes."""
+ text = "Before after"
+ escaped = AndroidUnescapedHandler.escape(text)
+ expected = "Before after"
+ self.assertEqual(escaped, expected)
+
+ unescaped = AndroidUnescapedHandler.unescape(escaped)
+ self.assertEqual(text, unescaped)
+
+ def test_adjacent_cdata_sections(self):
+ """Test adjacent CDATA sections."""
+ text = ""
+ escaped = AndroidUnescapedHandler.escape(text)
+ expected = ""
+ self.assertEqual(escaped, expected)
+
+ unescaped = AndroidUnescapedHandler.unescape(escaped)
+ self.assertEqual(text, unescaped)
+
+ def test_cdata_at_boundaries(self):
+ """Test CDATA sections at text boundaries."""
+ # CDATA at start
+ text = " regular text &"
+ escaped = AndroidUnescapedHandler.escape(text)
+ expected = " regular text &"
+ self.assertEqual(escaped, expected)
+
+ # CDATA at end
+ text = "regular text & "
+ escaped = AndroidUnescapedHandler.escape(text)
+ expected = "regular text & "
+ self.assertEqual(escaped, expected)
+
+ def test_malformed_cdata_like_text(self):
+ """Test text that looks like CDATA but isn't properly formed."""
+ # Missing closing bracket
+ text = "Before after &"
+ result = AndroidUnescapedHandler.escape(text)
+ expected = "Before ![CDATA[content]]> after &"
+ self.assertEqual(result, expected)
+
+ def test_cdata_with_special_android_chars(self):
+ """Test CDATA preservation with Android-specific special characters."""
+ text = (
+ "Before & after @"
+ )
+ escaped = AndroidUnescapedHandler.escape(text)
+ expected = "Before & after \\@"
+ self.assertEqual(escaped, expected)
+
+ unescaped = AndroidUnescapedHandler.unescape(escaped)
+ self.assertEqual(text, unescaped)