23
23
from typing import Dict
24
24
from typing import Generator
25
25
from typing import Iterable
26
+ from typing import List
26
27
from typing import Literal
27
28
from typing import Optional
28
29
from typing import Tuple
@@ -485,16 +486,22 @@ def _message_to_generate_content_response(
485
486
486
487
def _get_completion_inputs (
487
488
llm_request : LlmRequest ,
488
- ) -> tuple [Iterable [Message ], Iterable [dict ]]:
489
- """Converts an LlmRequest to litellm inputs.
489
+ ) -> Tuple [
490
+ List [Message ],
491
+ Optional [List [Dict ]],
492
+ Optional [types .SchemaUnion ],
493
+ Optional [Dict ],
494
+ ]:
495
+ """Converts an LlmRequest to litellm inputs and extracts generation params.
490
496
491
497
Args:
492
498
llm_request: The LlmRequest to convert.
493
499
494
500
Returns:
495
- The litellm inputs (message list, tool dictionary and response format).
501
+ The litellm inputs (message list, tool dictionary, response format and generation params ).
496
502
"""
497
- messages = []
503
+ # 1. Construct messages
504
+ messages : List [Message ] = []
498
505
for content in llm_request .contents or []:
499
506
message_param_or_list = _content_to_message_param (content )
500
507
if isinstance (message_param_or_list , list ):
@@ -511,7 +518,8 @@ def _get_completion_inputs(
511
518
),
512
519
)
513
520
514
- tools = None
521
+ # 2. Convert tool declarations
522
+ tools : Optional [List [Dict ]] = None
515
523
if (
516
524
llm_request .config
517
525
and llm_request .config .tools
@@ -522,12 +530,39 @@ def _get_completion_inputs(
522
530
for tool in llm_request .config .tools [0 ].function_declarations
523
531
]
524
532
525
- response_format = None
526
-
527
- if llm_request .config .response_schema :
533
+ # 3. Handle response format
534
+ response_format : Optional [ types . SchemaUnion ] = None
535
+ if llm_request .config and llm_request . config .response_schema :
528
536
response_format = llm_request .config .response_schema
529
537
530
- return messages , tools , response_format
538
+ # 4. Extract generation parameters
539
+ generation_params : Optional [Dict ] = None
540
+ if llm_request .config :
541
+ config_dict = llm_request .config .model_dump (exclude_none = True )
542
+ # Generate LiteLlm parameters here,
543
+ # Following https://docs.litellm.ai/docs/completion/input.
544
+ generation_params = {}
545
+ param_mapping = {
546
+ "max_output_tokens" : "max_completion_tokens" ,
547
+ "stop_sequences" : "stop" ,
548
+ }
549
+ for key in (
550
+ "temperature" ,
551
+ "max_output_tokens" ,
552
+ "top_p" ,
553
+ "top_k" ,
554
+ "stop_sequences" ,
555
+ "presence_penalty" ,
556
+ "frequency_penalty" ,
557
+ ):
558
+ if key in config_dict :
559
+ mapped_key = param_mapping .get (key , key )
560
+ generation_params [mapped_key ] = config_dict [key ]
561
+
562
+ if not generation_params :
563
+ generation_params = None
564
+
565
+ return messages , tools , response_format , generation_params
531
566
532
567
533
568
def _build_function_declaration_log (
@@ -664,7 +699,9 @@ async def generate_content_async(
664
699
self ._maybe_append_user_content (llm_request )
665
700
logger .debug (_build_request_log (llm_request ))
666
701
667
- messages , tools , response_format = _get_completion_inputs (llm_request )
702
+ messages , tools , response_format , generation_params = (
703
+ _get_completion_inputs (llm_request )
704
+ )
668
705
669
706
if "functions" in self ._additional_args :
670
707
# LiteLLM does not support both tools and functions together.
@@ -678,6 +715,9 @@ async def generate_content_async(
678
715
}
679
716
completion_args .update (self ._additional_args )
680
717
718
+ if generation_params :
719
+ completion_args .update (generation_params )
720
+
681
721
if stream :
682
722
text = ""
683
723
# Track function calls by index
0 commit comments