diff --git a/.circleci/config.yml b/.circleci/config.yml index 430d1bb5..094294d0 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -18,7 +18,7 @@ jobs: command: pip install tox - run: name: Run tests - command: tox + command: tox -r workflows: version: 2 diff --git a/openformats/formats/android_unescaped.py b/openformats/formats/android_unescaped.py index c95a1210..76c77ce2 100644 --- a/openformats/formats/android_unescaped.py +++ b/openformats/formats/android_unescaped.py @@ -4,6 +4,7 @@ from openformats.formats.android import AndroidHandler from ..utils.xml import NewDumbXml as DumbXml + class AndroidUnescapedHandler(AndroidHandler): def _create_string(self, name, text, comment, product, child, pluralized=False): """Creates a string and returns it. If empty string it returns None. @@ -88,39 +89,116 @@ def _unprotect_inline_tags(text, protected_tags): return text + @staticmethod + def _process_with_cdata_preservation(text, process_func, is_escape=True): + """ + Process text while preserving CDATA sections. + + Args: + text (str): The text to process + process_func (callable): The processing function to apply to non-CDATA parts + is_escape (bool): True for escaping, False for unescaping + + Returns: + str: The processed text with CDATA sections preserved + """ + if not text or "" + + # Find all CDATA sections and their positions + cdata_matches = list(re.finditer(cdata_pattern, text, re.DOTALL)) + + if not cdata_matches: + return process_func(text) + + result = [] + last_end = 0 + + for match in cdata_matches: + # Process the text before the CDATA section + before_cdata = text[last_end : match.start()] + if before_cdata: + result.append(process_func(before_cdata)) + + # Keep the CDATA section almost as-is: we escape/unescape single + double + # quotes to be consistent with current handler behavior (see + # ) + cdata_content = match.group(0) + if is_escape: + cdata_content = cdata_content.replace( + DumbXml.DOUBLE_QUOTES, + "".join([DumbXml.BACKSLASH, DumbXml.DOUBLE_QUOTES]), + ).replace( + DumbXml.SINGLE_QUOTE, + "".join([DumbXml.BACKSLASH, DumbXml.SINGLE_QUOTE]), + ) + else: + cdata_content = cdata_content.replace( + "".join([DumbXml.BACKSLASH, DumbXml.DOUBLE_QUOTES]), + DumbXml.DOUBLE_QUOTES, + ).replace( + "".join([DumbXml.BACKSLASH, DumbXml.SINGLE_QUOTE]), + DumbXml.SINGLE_QUOTE, + ) + result.append(cdata_content) + + last_end = match.end() + + # Process any remaining text after the last CDATA section + after_cdata = text[last_end:] + if after_cdata: + result.append(process_func(after_cdata)) + + return "".join(result) + @staticmethod def escape(string): - try: - string, protected_tags = AndroidUnescapedHandler._protect_inline_tags( - string - ) - except Exception as _: - # Exception handling: If an error occurs during tag protection, - # escape all special characters. One case of these errors is the - # presence of '<' symbols without corresponding closing tags, causing - # parsing errors. + + def _escape(string): + try: + string, protected_tags = AndroidUnescapedHandler._protect_inline_tags( + string + ) + except Exception as _: + # Exception handling: If an error occurs during tag protection, + # escape all special characters. One case of these errors is the + # presence of '<' symbols without corresponding closing tags, causing + # parsing errors. + string = AndroidHandler.escape(string) + string = AndroidUnescapedHandler.escape_special_characters(string) + string = string.replace("<", "<") + return string + string = AndroidHandler.escape(string) string = AndroidUnescapedHandler.escape_special_characters(string) - string = ( - string.replace("<", "<") + return AndroidUnescapedHandler._unprotect_inline_tags( + string, protected_tags ) - return string - string = AndroidHandler.escape(string) - string = AndroidUnescapedHandler.escape_special_characters(string) - return AndroidUnescapedHandler._unprotect_inline_tags(string, protected_tags) + return AndroidUnescapedHandler._process_with_cdata_preservation( + string, _escape, is_escape=True + ) @staticmethod def unescape(string): - string = AndroidHandler.unescape(string) - return ( - string.replace("\\?", "?") - .replace("\\@", "@") - .replace("\\t", "\t") - .replace("\\n", "\n") - .replace(">", ">") - .replace("<", "<") - .replace("&", "&") + + def _unescape(string): + string = AndroidHandler.unescape(string) + return ( + string.replace("\\?", "?") + .replace("\\@", "@") + .replace("\\t", "\t") + .replace("\\n", "\n") + .replace(">", ">") + .replace("<", "<") + .replace("&", "&") + ) + + return AndroidUnescapedHandler._process_with_cdata_preservation( + string, _unescape, is_escape=False ) @staticmethod diff --git a/openformats/tests/formats/android/test_android_unescaped.py b/openformats/tests/formats/android/test_android_unescaped.py index 746567f2..2e291cff 100644 --- a/openformats/tests/formats/android/test_android_unescaped.py +++ b/openformats/tests/formats/android/test_android_unescaped.py @@ -54,24 +54,24 @@ def test_escape(self): ) def test_escape_lt_character(self): - rich = '< 20 units' - raw = '< 20 units' + rich = "< 20 units" + raw = "< 20 units" self.assertEqual( AndroidUnescapedHandler.escape(rich), raw, ) - rich = '< 20 & > 50 units' - raw = '< 20 & > 50 units' + rich = "< 20 & > 50 units" + raw = "< 20 & > 50 units" self.assertEqual( AndroidUnescapedHandler.escape(rich), raw, ) - rich = '< 20 & > 50 unitstest' - raw = '< 20 & > 50 units<xliff:g>test</xliff:g>' + rich = "< 20 & > 50 unitstest" + raw = "< 20 & > 50 units<xliff:g>test</xliff:g>" self.assertEqual( AndroidUnescapedHandler.escape(rich), @@ -100,3 +100,216 @@ def test_create_string_raises_error(self): AndroidUnescapedHandler._check_unescaped_characters, unescaped_string, ) + + # New tests for CDATA functionality + def test_process_with_cdata_preservation_no_cdata(self): + """Test _process_with_cdata_preservation with text that has no CDATA sections.""" + + def dummy_process(text): + return text.replace("&", "&") + + text = "Hello & world" + result = AndroidUnescapedHandler._process_with_cdata_preservation( + text, dummy_process, is_escape=True + ) + self.assertEqual(result, "Hello & world") + + def test_process_with_cdata_preservation_empty_text(self): + """Test _process_with_cdata_preservation with empty or None text.""" + + def dummy_process(text): + if text is None: + return None + return text.replace("&", "&") + + result = AndroidUnescapedHandler._process_with_cdata_preservation( + "", dummy_process, is_escape=True + ) + self.assertEqual(result, "") + + result = AndroidUnescapedHandler._process_with_cdata_preservation( + None, dummy_process, is_escape=True + ) + self.assertEqual(result, None) + + def test_process_with_cdata_preservation_single_cdata(self): + """Test _process_with_cdata_preservation with a single CDATA section.""" + + def dummy_process(text): + return text.replace("&", "&") + + text = "Before after" + result = AndroidUnescapedHandler._process_with_cdata_preservation( + text, dummy_process, is_escape=True + ) + # The & in regular text should be escaped, but CDATA content is only processed for quotes + expected = "Before after" + self.assertEqual(result, expected) + + def test_process_with_cdata_preservation_multiple_cdata(self): + """Test _process_with_cdata_preservation with multiple CDATA sections.""" + + def dummy_process(text): + return text.replace("&", "&") + + text = "Start & middle & end &" + result = AndroidUnescapedHandler._process_with_cdata_preservation( + text, dummy_process, is_escape=True + ) + expected = "Start & middle & end &" + self.assertEqual(result, expected) + + def test_process_with_cdata_preservation_unescape(self): + """Test _process_with_cdata_preservation with unescaping.""" + + def dummy_process(text): + return text.replace("&", "&") + + text = "Before & after &" + result = AndroidUnescapedHandler._process_with_cdata_preservation( + text, dummy_process, is_escape=False + ) + expected = "Before & after &" + self.assertEqual(result, expected) + + def test_process_with_cdata_preservation_multiline_cdata(self): + """Test _process_with_cdata_preservation with multiline CDATA content.""" + + def dummy_process(text): + return text.replace("&", "&") + + text = """Before & after &""" + result = AndroidUnescapedHandler._process_with_cdata_preservation( + text, dummy_process, is_escape=True + ) + expected = """Before & after &""" + self.assertEqual(result, expected) + + def test_escape_with_cdata_simple(self): + """Test escape method with simple CDATA content.""" + text = "Hello & world &" + result = AndroidUnescapedHandler.escape(text) + expected = "Hello & world &" + self.assertEqual(result, expected) + + def test_escape_with_cdata_complex(self): + """Test escape method with complex CDATA content including various characters.""" + text = "Start @ Bold with \"quotes\" and 'apostrophes']]> end ?" + result = AndroidUnescapedHandler.escape(text) + expected = "Start \\@ Bold with \\\"quotes\\\" and \\'apostrophes\\']]> end \\?" + self.assertEqual(result, expected) + + def test_escape_with_cdata_and_inline_tags(self): + """Test escape method with CDATA sections alongside inline tags.""" + text = ( + "Text %1$s & more text" + ) + result = AndroidUnescapedHandler.escape(text) + expected = "Text %1$s & more text" + self.assertEqual(result, expected) + + def test_unescape_with_cdata_simple(self): + """Test unescape method with simple CDATA content.""" + text = "Hello & world &" + result = AndroidUnescapedHandler.unescape(text) + expected = "Hello & world &" + self.assertEqual(result, expected) + + def test_unescape_with_cdata_complex(self): + """Test unescape method with complex CDATA content.""" + text = "Start \\@ Bold with \\\"quotes\\\" and \\'apostrophes\\']]> end \\?" + result = AndroidUnescapedHandler.unescape(text) + expected = ( + "Start @ Bold with \"quotes\" and 'apostrophes']]> end ?" + ) + self.assertEqual(result, expected) + + def test_escape_unescape_cdata_roundtrip(self): + """Test that escape and unescape are symmetric for CDATA content.""" + original = ( + "Text & @ ?" + ) + escaped = AndroidUnescapedHandler.escape(original) + unescaped = AndroidUnescapedHandler.unescape(escaped) + self.assertEqual(original, unescaped) + + def test_cdata_with_nested_brackets(self): + """Test CDATA sections containing nested brackets.""" + text = "Before after" + escaped = AndroidUnescapedHandler.escape(text) + unescaped = AndroidUnescapedHandler.unescape(escaped) + self.assertEqual(text, unescaped) + + def test_cdata_empty_content(self): + """Test CDATA sections with empty content.""" + text = "Before after &" + result = AndroidUnescapedHandler.escape(text) + expected = "Before after &" + self.assertEqual(result, expected) + + def test_cdata_only_quotes(self): + """Test CDATA sections containing only quotes.""" + text = "Before after" + escaped = AndroidUnescapedHandler.escape(text) + expected = "Before after" + self.assertEqual(escaped, expected) + + unescaped = AndroidUnescapedHandler.unescape(escaped) + self.assertEqual(text, unescaped) + + def test_adjacent_cdata_sections(self): + """Test adjacent CDATA sections.""" + text = "" + escaped = AndroidUnescapedHandler.escape(text) + expected = "" + self.assertEqual(escaped, expected) + + unescaped = AndroidUnescapedHandler.unescape(escaped) + self.assertEqual(text, unescaped) + + def test_cdata_at_boundaries(self): + """Test CDATA sections at text boundaries.""" + # CDATA at start + text = " regular text &" + escaped = AndroidUnescapedHandler.escape(text) + expected = " regular text &" + self.assertEqual(escaped, expected) + + # CDATA at end + text = "regular text & " + escaped = AndroidUnescapedHandler.escape(text) + expected = "regular text & " + self.assertEqual(escaped, expected) + + def test_malformed_cdata_like_text(self): + """Test text that looks like CDATA but isn't properly formed.""" + # Missing closing bracket + text = "Before after &" + result = AndroidUnescapedHandler.escape(text) + expected = "Before ![CDATA[content]]> after &" + self.assertEqual(result, expected) + + def test_cdata_with_special_android_chars(self): + """Test CDATA preservation with Android-specific special characters.""" + text = ( + "Before & after @" + ) + escaped = AndroidUnescapedHandler.escape(text) + expected = "Before & after \\@" + self.assertEqual(escaped, expected) + + unescaped = AndroidUnescapedHandler.unescape(escaped) + self.assertEqual(text, unescaped)