Skip to content

Commit 0af6510

Browse files
authored
[cherry-pick]mv crop formula from gen_ai_client to pipeline (#4679)
* update docs * compatible with python3.9 * support print parsing_res_list * mv crop formula from gen_ai_client to pipeline
1 parent 9579f20 commit 0af6510

File tree

3 files changed

+34
-43
lines changed

3 files changed

+34
-43
lines changed

paddlex/inference/models/doc_vlm/predictor.py

Lines changed: 0 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -370,46 +370,6 @@ def _switch_inputs_to_device(self, input_dict):
370370
}
371371
return rst_dict
372372

373-
def crop_margin(self, img): # 输入是OpenCV图像 (numpy数组)
374-
import cv2
375-
376-
# 如果输入是彩色图像,转换为灰度图
377-
if len(img.shape) == 3:
378-
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
379-
else:
380-
gray = img.copy()
381-
382-
# 转换为0-255范围(确保是uint8类型)
383-
if gray.dtype != np.uint8:
384-
gray = gray.astype(np.uint8)
385-
386-
max_val = gray.max()
387-
min_val = gray.min()
388-
389-
if max_val == min_val:
390-
return img
391-
392-
# 归一化并二值化(与PIL版本逻辑一致)
393-
data = (gray - min_val) / (max_val - min_val) * 255
394-
data = data.astype(np.uint8)
395-
396-
# 创建二值图像(暗色区域为白色,亮色区域为黑色)
397-
_, binary = cv2.threshold(data, 200, 255, cv2.THRESH_BINARY_INV)
398-
399-
# 查找非零像素坐标
400-
coords = cv2.findNonZero(binary)
401-
402-
if coords is None: # 如果没有找到任何内容,返回原图
403-
return img
404-
405-
# 获取边界框
406-
x, y, w, h = cv2.boundingRect(coords)
407-
408-
# 裁剪图像
409-
cropped = img[y : y + h, x : x + w]
410-
411-
return cropped
412-
413373
def _genai_client_process(
414374
self,
415375
data,
@@ -425,9 +385,6 @@ def _genai_client_process(
425385

426386
def _process(item):
427387
image = item["image"]
428-
prompt = item["query"]
429-
if prompt == "Formula Recognition:":
430-
image = self.crop_margin(image)
431388
if isinstance(image, str):
432389
if image.startswith("http://") or image.startswith("https://"):
433390
image_url = image

paddlex/inference/pipelines/paddleocr_vl/pipeline.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
from .result import PaddleOCRVLBlock, PaddleOCRVLResult
3636
from .uilts import (
3737
convert_otsl_to_html,
38+
crop_margin,
3839
filter_overlap_boxes,
3940
merge_blocks,
4041
tokenize_figure_of_table,
@@ -243,6 +244,7 @@ def get_layout_parsing_results(
243244
text_prompt = "Chart Recognition:"
244245
elif "formula" in block_label and block_label != "formula_number":
245246
text_prompt = "Formula Recognition:"
247+
block_img = crop_margin(block_img)
246248
block_imgs.append(block_img)
247249
text_prompts.append(text_prompt)
248250
figure_token_maps.append(figure_token_map)

paddlex/inference/pipelines/paddleocr_vl/uilts.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -923,3 +923,35 @@ def truncate_repetitive_content(
923923
return most_common_line
924924

925925
return content
926+
927+
928+
def crop_margin(img):
929+
import cv2
930+
931+
if len(img.shape) == 3:
932+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
933+
else:
934+
gray = img.copy()
935+
936+
if gray.dtype != np.uint8:
937+
gray = gray.astype(np.uint8)
938+
939+
max_val = gray.max()
940+
min_val = gray.min()
941+
942+
if max_val == min_val:
943+
return img
944+
945+
data = (gray - min_val) / (max_val - min_val) * 255
946+
data = data.astype(np.uint8)
947+
948+
_, binary = cv2.threshold(data, 200, 255, cv2.THRESH_BINARY_INV)
949+
coords = cv2.findNonZero(binary)
950+
951+
if coords is None:
952+
return img
953+
954+
x, y, w, h = cv2.boundingRect(coords)
955+
cropped = img[y : y + h, x : x + w]
956+
957+
return cropped

0 commit comments

Comments
 (0)