Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 51 additions & 14 deletions openformats/formats/github_markdown_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,23 +116,60 @@ def parse(self, content, **kwargs):
# Ignore any string that does not appear in the template,
# We do this to avoid parsing strings that are not properly
# handled by the Markdown library, such as ```code``` blocks
if string and string in md_template[curr_pos:]:
string_object = OpenString(six.text_type(order),
string,
order=order)
order += 1
stringset.append(string_object)
# Keep track of the index of the last replaced hash
md_template = (
md_template[:curr_pos] + md_template[curr_pos:].replace(
string, string_object.template_replacement, 1)
)

curr_pos = md_template.find(string_object.template_replacement)
curr_pos = curr_pos + len(string_object.template_replacement)
if string and (
bool(re.match(r'^\s*> \[!NOTE]', string))
or string in md_template[curr_pos:]
):
# Special handling for [!NOTE] blocks
# Investigate if issue extends to all indented blocks
if bool(re.match(r'^\s*> \[!NOTE]', string)):
start, end = self.find_fuzzy_substring(string, md_template)
if start is not None and end is not None:
string_object = OpenString(six.text_type(order),
string,
order=order)
order += 1
stringset.append(string_object)
md_template = (
md_template[:start] + string_object.template_replacement
+ md_template[end:]
)
curr_pos = start + len(string_object.template_replacement)
elif string in md_template[curr_pos:]:
string_object = OpenString(six.text_type(order),
string,
order=order)
order += 1
stringset.append(string_object)
# Keep track of the index of the last replaced hash
md_template = (
md_template[:curr_pos] + md_template[curr_pos:].replace(
string, string_object.template_replacement, 1)
)

curr_pos = md_template.find(string_object.template_replacement)
curr_pos = curr_pos + len(string_object.template_replacement)

template = yaml_template + seperator + md_template
return force_newline_type(template, newline_type), stringset

def find_fuzzy_substring(self, pattern, text):
# Split pattern into non-whitespace tokens
tokens = re.findall(r'\S+', pattern)
if not tokens:
return None

# Escape each token literally; join with \s+ (any whitespace)
core = r'\s+'.join(re.escape(token) for token in tokens)

# Allow optional whitespace before/after the core to absorb indentation
regex = rf'(?P<pre>\s*)({core})(?P<post>\s*)'

m = re.search(regex, text)
if not m:
return None

return (m.start(2), m.end(2))

def _is_yaml_string(self, string):
"""Return True if the given open string is in YAML format, False otherwise.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,59 @@ def test_parse_non_unicode(self):
content_with_normal_space = self.handler.parse(content=u"# foo bar")
self.assertEqual(
content_with_non_unicode_space[0], content_with_normal_space[0])

def test_parse_indented_note_block(self):
indent_content = u"""
Sample heading

> [!NOTE]
> Non-indented block

1. Sample heading

Sample sub-heading

> [!NOTE]
> This is an indented block
"""
expected_hashed_template = (
"\n9a1c7ee2c7ce38d4bbbaf29ab9f2ac1e_tr"
"\n\n3afcdbfeb6ecfbdd0ba628696e3cc163_tr\n\n"
"1. 247730f9d0d2eaad265a470e32aa0cdf_tr\n\n"
" cdee9bf40a070d58d14dfa3bb61e0032_tr\n\n"
" 7693e302dc09b57483d26522ef25feb4_tr\n"
)
parsed_content_indent = self.handler.parse(content=indent_content)
self.assertEqual(parsed_content_indent[0], expected_hashed_template)
self.assertEqual(len(parsed_content_indent[1]), 5)

self.assertEqual(parsed_content_indent[1][0].string, "Sample heading")
self.assertEqual(
parsed_content_indent[1][1].string, "> [!NOTE]\n> Non-indented block"
)
self.assertEqual(parsed_content_indent[1][2].string, "Sample heading")
self.assertEqual(parsed_content_indent[1][3].string, " Sample sub-heading")
self.assertEqual(
parsed_content_indent[1][4].string,
" > [!NOTE]\n > This is an indented block"
)

def test_find_fuzzy_substring(self):
substring = "Here is a string"
string = "Yes. Here is a string that we like"

span = self.handler.find_fuzzy_substring(substring, string)
assert span is not None
assert span == (5, 27)

def test_find_fuzzy_substring_no_match_when_extra_token_present(self):
pattern = "Here is not a string"
text = "Yes. Here is a string that we like"

assert self.handler.find_fuzzy_substring(pattern, text) is None

def test_empty_pattern_returns_none(self):
assert self.handler.find_fuzzy_substring("", "anything at all") is None

class GithubMarkdownV2CustomTestCase(unittest.TestCase):
"""Tests some additional functionality of GithubMarkdownHandlerV2.
Expand Down