Skip to content

Commit a2be29d

Browse files
changdazhouBobholamovic
authored andcommitted
support concatenate_markdown_pages (#4622)
* add PP-DocLayoutV2 in official models * support concatenate_markdown_pages
1 parent 682c15b commit a2be29d

File tree

1 file changed

+17
-0
lines changed

1 file changed

+17
-0
lines changed

paddlex/inference/pipelines/paddleocr_vl/pipeline.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,23 @@ def _worker_vlm():
675675
if thread_vlm.is_alive():
676676
logging.warning("VLM worker did not terminate in time")
677677

678+
def concatenate_markdown_pages(self, markdown_list: list) -> tuple:
679+
"""
680+
Concatenate Markdown content from multiple pages into a single document.
681+
682+
Args:
683+
markdown_list (list): A list containing Markdown data for each page.
684+
685+
Returns:
686+
tuple: A tuple containing the processed Markdown text.
687+
"""
688+
markdown_texts = ""
689+
690+
for res in markdown_list:
691+
markdown_texts += "\n\n" + res["markdown_texts"]
692+
693+
return markdown_texts
694+
678695

679696
@pipeline_requires_extra("ocr")
680697
class PaddleOCRVLPipeline(AutoParallelImageSimpleInferencePipeline):

0 commit comments

Comments
 (0)