Skip to content

Commit 1acd2ed

Browse files
author
Poulami Das
committed
update
1 parent 9285f5f commit 1acd2ed

File tree

1 file changed

+2
-3
lines changed

1 file changed

+2
-3
lines changed

adi_function_app/pre_embedding_cleaner.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,10 @@ def clean_text(src_text: str) -> str:
7171
try:
7272
# Define specific patterns for each tag
7373
tag_patterns = {
74-
"figurecontent": r"<!-- FigureContent=(.*?)-->",
74+
"figurecontent": r"<!--.*?FigureContent=(.*?)-->",
7575
"figure": r"<figure>(.*?)</figure>",
7676
"figures": r"\(figures/\d+\)(.*?)\(figures/\d+\)",
77-
"figcaption": r"<figcaption>(.*?)</figcaption>",
78-
"figureidandcontent": r'<!--\s*FigureId="[^"]*"\s*FigureContent="[^"]*"\s*-->'
77+
"figcaption": r"<figcaption>(.*?)</figcaption>"
7978
}
8079
cleaned_text = remove_markdown_tags(src_text, tag_patterns)
8180

0 commit comments

Comments
 (0)