Skip to content

Commit 32fe2f7

Browse files
changdazhouTingquanGao
authored andcommitted
support set max_new_tokens
1 parent c1ca660 commit 32fe2f7

File tree

1 file changed

+17
-2
lines changed

1 file changed

+17
-2
lines changed

paddlex/inference/pipelines/paddleocr_vl/pipeline.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -249,10 +249,14 @@ def get_layout_parsing_results(
249249
vlm_block_ids.append((i, j))
250250
drop_figures_set.update(drop_figures)
251251

252+
if vlm_kwargs is None:
253+
vlm_kwargs = {}
254+
elif vlm_kwargs.get("max_new_tokens", None) is None:
255+
vlm_kwargs["max_new_tokens"] = 4096
256+
252257
kwargs = {
253258
"use_cache": True,
254-
"max_new_tokens": 4096,
255-
**(vlm_kwargs or {}),
259+
**vlm_kwargs,
256260
}
257261
vl_rec_results = list(
258262
self.vl_rec_model.predict(
@@ -358,6 +362,7 @@ def predict(
358362
top_p: Optional[float] = None,
359363
min_pixels: Optional[int] = None,
360364
max_pixels: Optional[int] = None,
365+
max_new_tokens: Optional[int] = None,
361366
**kwargs,
362367
) -> PaddleOCRVLResult:
363368
"""
@@ -376,6 +381,15 @@ def predict(
376381
If it's a tuple of two numbers, then they are used separately for width and height respectively.
377382
If it's None, then no unclipping will be performed.
378383
layout_merge_bboxes_mode (Optional[str], optional): The mode for merging bounding boxes. Defaults to None.
384+
use_queues (Optional[bool], optional): Whether to use queues. Defaults to None.
385+
prompt_label (Optional[Union[str, None]], optional): The label of the prompt in ['ocr', 'formula', 'table', 'chart']. Defaults to None.
386+
format_block_content (Optional[bool]): Whether to format the block content. Default is None.
387+
repetition_penalty (Optional[float]): The repetition penalty parameter used for VL model sampling. Default is None.
388+
temperature (Optional[float]): Temperature parameter used for VL model sampling. Default is None.
389+
top_p (Optional[float]): Top-p parameter used for VL model sampling. Default is None.
390+
min_pixels (Optional[int]): The minimum number of pixels allowed when the VL model preprocesses images. Default is None.
391+
max_pixels (Optional[int]): The maximum number of pixels allowed when the VL model preprocesses images. Default is None.
392+
max_new_tokens (Optional[int]): The maximum number of new tokens. Default is None.
379393
**kwargs (Any): Additional settings to extend functionality.
380394
381395
Returns:
@@ -499,6 +513,7 @@ def _process_vlm(results_cv):
499513
"top_p": top_p,
500514
"min_pixels": min_pixels,
501515
"max_pixels": max_pixels,
516+
"max_new_tokens": max_new_tokens,
502517
},
503518
)
504519
)

0 commit comments

Comments
 (0)