@@ -249,10 +249,14 @@ def get_layout_parsing_results(
249249 vlm_block_ids .append ((i , j ))
250250 drop_figures_set .update (drop_figures )
251251
252+ if vlm_kwargs is None :
253+ vlm_kwargs = {}
254+ elif vlm_kwargs .get ("max_new_tokens" , None ) is None :
255+ vlm_kwargs ["max_new_tokens" ] = 4096
256+
252257 kwargs = {
253258 "use_cache" : True ,
254- "max_new_tokens" : 4096 ,
255- ** (vlm_kwargs or {}),
259+ ** vlm_kwargs ,
256260 }
257261 vl_rec_results = list (
258262 self .vl_rec_model .predict (
@@ -358,6 +362,7 @@ def predict(
358362 top_p : Optional [float ] = None ,
359363 min_pixels : Optional [int ] = None ,
360364 max_pixels : Optional [int ] = None ,
365+ max_new_tokens : Optional [int ] = None ,
361366 ** kwargs ,
362367 ) -> PaddleOCRVLResult :
363368 """
@@ -376,6 +381,15 @@ def predict(
376381 If it's a tuple of two numbers, then they are used separately for width and height respectively.
377382 If it's None, then no unclipping will be performed.
378383 layout_merge_bboxes_mode (Optional[str], optional): The mode for merging bounding boxes. Defaults to None.
384+ use_queues (Optional[bool], optional): Whether to use queues. Defaults to None.
385+ prompt_label (Optional[Union[str, None]], optional): The label of the prompt in ['ocr', 'formula', 'table', 'chart']. Defaults to None.
386+ format_block_content (Optional[bool]): Whether to format the block content. Default is None.
387+ repetition_penalty (Optional[float]): The repetition penalty parameter used for VL model sampling. Default is None.
388+ temperature (Optional[float]): Temperature parameter used for VL model sampling. Default is None.
389+ top_p (Optional[float]): Top-p parameter used for VL model sampling. Default is None.
390+ min_pixels (Optional[int]): The minimum number of pixels allowed when the VL model preprocesses images. Default is None.
391+ max_pixels (Optional[int]): The maximum number of pixels allowed when the VL model preprocesses images. Default is None.
392+ max_new_tokens (Optional[int]): The maximum number of new tokens. Default is None.
379393 **kwargs (Any): Additional settings to extend functionality.
380394
381395 Returns:
@@ -499,6 +513,7 @@ def _process_vlm(results_cv):
499513 "top_p" : top_p ,
500514 "min_pixels" : min_pixels ,
501515 "max_pixels" : max_pixels ,
516+ "max_new_tokens" : max_new_tokens ,
502517 },
503518 )
504519 )
0 commit comments