Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
command: pip install tox
- run:
name: Run tests
command: tox
command: tox -r

workflows:
version: 2
Expand Down
126 changes: 102 additions & 24 deletions openformats/formats/android_unescaped.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from openformats.formats.android import AndroidHandler
from ..utils.xml import NewDumbXml as DumbXml


class AndroidUnescapedHandler(AndroidHandler):
def _create_string(self, name, text, comment, product, child, pluralized=False):
"""Creates a string and returns it. If empty string it returns None.
Expand Down Expand Up @@ -88,39 +89,116 @@ def _unprotect_inline_tags(text, protected_tags):

return text

@staticmethod
def _process_with_cdata_preservation(text, process_func, is_escape=True):
"""
Process text while preserving CDATA sections.

Args:
text (str): The text to process
process_func (callable): The processing function to apply to non-CDATA parts
is_escape (bool): True for escaping, False for unescaping

Returns:
str: The processed text with CDATA sections preserved
"""
if not text or "<![CDATA[" not in text:
return process_func(text)

# Pattern to match CDATA sections
cdata_pattern = r"<!\[CDATA\[(.*?)\]\]>"

# Find all CDATA sections and their positions
cdata_matches = list(re.finditer(cdata_pattern, text, re.DOTALL))

if not cdata_matches:
return process_func(text)

result = []
last_end = 0

for match in cdata_matches:
# Process the text before the CDATA section
before_cdata = text[last_end : match.start()]
if before_cdata:
result.append(process_func(before_cdata))

# Keep the CDATA section almost as-is: we escape/unescape single + double
# quotes to be consistent with current handler behavior (see
# )
cdata_content = match.group(0)
if is_escape:
cdata_content = cdata_content.replace(
DumbXml.DOUBLE_QUOTES,
"".join([DumbXml.BACKSLASH, DumbXml.DOUBLE_QUOTES]),
).replace(
DumbXml.SINGLE_QUOTE,
"".join([DumbXml.BACKSLASH, DumbXml.SINGLE_QUOTE]),
)
else:
cdata_content = cdata_content.replace(
"".join([DumbXml.BACKSLASH, DumbXml.DOUBLE_QUOTES]),
DumbXml.DOUBLE_QUOTES,
).replace(
"".join([DumbXml.BACKSLASH, DumbXml.SINGLE_QUOTE]),
DumbXml.SINGLE_QUOTE,
)
result.append(cdata_content)

last_end = match.end()

# Process any remaining text after the last CDATA section
after_cdata = text[last_end:]
if after_cdata:
result.append(process_func(after_cdata))

return "".join(result)

@staticmethod
def escape(string):
try:
string, protected_tags = AndroidUnescapedHandler._protect_inline_tags(
string
)
except Exception as _:
# Exception handling: If an error occurs during tag protection,
# escape all special characters. One case of these errors is the
# presence of '<' symbols without corresponding closing tags, causing
# parsing errors.

def _escape(string):
try:
string, protected_tags = AndroidUnescapedHandler._protect_inline_tags(
string
)
except Exception as _:
# Exception handling: If an error occurs during tag protection,
# escape all special characters. One case of these errors is the
# presence of '<' symbols without corresponding closing tags, causing
# parsing errors.
string = AndroidHandler.escape(string)
string = AndroidUnescapedHandler.escape_special_characters(string)
string = string.replace("<", "&lt;")
return string

string = AndroidHandler.escape(string)
string = AndroidUnescapedHandler.escape_special_characters(string)
string = (
string.replace("<", "&lt;")
return AndroidUnescapedHandler._unprotect_inline_tags(
string, protected_tags
)
return string

string = AndroidHandler.escape(string)
string = AndroidUnescapedHandler.escape_special_characters(string)
return AndroidUnescapedHandler._unprotect_inline_tags(string, protected_tags)
return AndroidUnescapedHandler._process_with_cdata_preservation(
string, _escape, is_escape=True
)

@staticmethod
def unescape(string):
string = AndroidHandler.unescape(string)
return (
string.replace("\\?", "?")
.replace("\\@", "@")
.replace("\\t", "\t")
.replace("\\n", "\n")
.replace("&gt;", ">")
.replace("&lt;", "<")
.replace("&amp;", "&")

def _unescape(string):
string = AndroidHandler.unescape(string)
return (
string.replace("\\?", "?")
.replace("\\@", "@")
.replace("\\t", "\t")
.replace("\\n", "\n")
.replace("&gt;", ">")
.replace("&lt;", "<")
.replace("&amp;", "&")
)

return AndroidUnescapedHandler._process_with_cdata_preservation(
string, _unescape, is_escape=False
)

@staticmethod
Expand Down
Loading
Loading