transifex · pliakisnick · Oct 1, 2025
diff --git a/openformats/formats/github_markdown_v2.py b/openformats/formats/github_markdown_v2.py
@@ -116,23 +116,60 @@ def parse(self, content, **kwargs):
             # Ignore any string that does not appear in the template,
             # We do this to avoid parsing strings that are not properly
             # handled by the Markdown library, such as ```code``` blocks
-            if string and string in md_template[curr_pos:]:
-                string_object = OpenString(six.text_type(order),
-                                           string,
-                                           order=order)
-                order += 1
-                stringset.append(string_object)
-                # Keep track of the index of the last replaced hash
-                md_template = (
-                    md_template[:curr_pos] + md_template[curr_pos:].replace(
-                        string, string_object.template_replacement, 1)
-                )
-
-                curr_pos = md_template.find(string_object.template_replacement)
-                curr_pos = curr_pos + len(string_object.template_replacement)
+            if string and (
+                bool(re.match(r'^\s*> \[!NOTE]', string)) 
+                or string in md_template[curr_pos:]
+            ):
+                # Special handling for [!NOTE] blocks
+                # Investigate if issue extends to all indented blocks
+                if bool(re.match(r'^\s*> \[!NOTE]', string)):
+                    start, end = self.find_fuzzy_substring(string, md_template)
+                    if start is not None and end is not None:
+                        string_object = OpenString(six.text_type(order),
+                                                string,
+                                                order=order)
+                        order += 1
+                        stringset.append(string_object)
+                        md_template = (
+                            md_template[:start] + string_object.template_replacement
+                            + md_template[end:]
+                        )
+                        curr_pos = start + len(string_object.template_replacement)
+                elif string in md_template[curr_pos:]:
+                    string_object = OpenString(six.text_type(order),
+                                            string,
+                                            order=order)
+                    order += 1
+                    stringset.append(string_object)
+                    # Keep track of the index of the last replaced hash
+                    md_template = (
+                        md_template[:curr_pos] + md_template[curr_pos:].replace(
+                            string, string_object.template_replacement, 1)
+                    )
+
+                    curr_pos = md_template.find(string_object.template_replacement)
+                    curr_pos = curr_pos + len(string_object.template_replacement)
 
         template = yaml_template + seperator + md_template
         return force_newline_type(template, newline_type), stringset
+
+    def find_fuzzy_substring(self, pattern, text):
+        # Split pattern into non-whitespace tokens
+        tokens = re.findall(r'\S+', pattern)
+        if not tokens:
+            return None
+
+        # Escape each token literally; join with \s+ (any whitespace)
+        core = r'\s+'.join(re.escape(token) for token in tokens)
+
+        # Allow optional whitespace before/after the core to absorb indentation
+        regex = rf'(?P<pre>\s*)({core})(?P<post>\s*)'
+
+        m = re.search(regex, text)
+        if not m:
+            return None
+
+        return (m.start(2), m.end(2))
 
     def _is_yaml_string(self, string):
         """Return True if the given open string is in YAML format, False otherwise.

diff --git a/openformats/tests/formats/github_markdown_v2/test_github_markdown.py b/openformats/tests/formats/github_markdown_v2/test_github_markdown.py
@@ -40,7 +40,59 @@ def test_parse_non_unicode(self):
         content_with_normal_space = self.handler.parse(content=u"# foo bar")
         self.assertEqual(
             content_with_non_unicode_space[0], content_with_normal_space[0])
+
+    def test_parse_indented_note_block(self):
+        indent_content = u"""
+Sample heading
+
+> [!NOTE]
+> Non-indented block
+
+1. Sample heading
+
+    Sample sub-heading
+
+    > [!NOTE]
+    > This is an indented block
+"""
+        expected_hashed_template = (
+            "\n9a1c7ee2c7ce38d4bbbaf29ab9f2ac1e_tr"
+            "\n\n3afcdbfeb6ecfbdd0ba628696e3cc163_tr\n\n"
+            "1. 247730f9d0d2eaad265a470e32aa0cdf_tr\n\n"
+            "   cdee9bf40a070d58d14dfa3bb61e0032_tr\n\n"
+            "    7693e302dc09b57483d26522ef25feb4_tr\n"
+        )
+        parsed_content_indent = self.handler.parse(content=indent_content)
+        self.assertEqual(parsed_content_indent[0], expected_hashed_template)
+        self.assertEqual(len(parsed_content_indent[1]), 5)
+
+        self.assertEqual(parsed_content_indent[1][0].string, "Sample heading")
+        self.assertEqual(
+            parsed_content_indent[1][1].string, "> [!NOTE]\n> Non-indented block"
+        )
+        self.assertEqual(parsed_content_indent[1][2].string, "Sample heading")
+        self.assertEqual(parsed_content_indent[1][3].string, " Sample sub-heading")
+        self.assertEqual(
+            parsed_content_indent[1][4].string, 
+            " > [!NOTE]\n > This is an indented block"
+        )
+
+    def test_find_fuzzy_substring(self):
+        substring = "Here is a string"
+        string = "Yes. Here    is a    string that we like"
+
+        span = self.handler.find_fuzzy_substring(substring, string)
+        assert span is not None
+        assert span == (5, 27)
+
+    def test_find_fuzzy_substring_no_match_when_extra_token_present(self):
+        pattern = "Here is not a string"
+        text = "Yes. Here    is a    string that we like"
+
+        assert self.handler.find_fuzzy_substring(pattern, text) is None
 
+    def test_empty_pattern_returns_none(self):
+        assert self.handler.find_fuzzy_substring("", "anything at all") is None
 
 class GithubMarkdownV2CustomTestCase(unittest.TestCase):
     """Tests some additional functionality of GithubMarkdownHandlerV2.