- Introduce new parameter vllm_server_base_url in GRPOConfig

re-imagined · re-imagined · commit a232a1a96488 · 2025-04-24T01:43:57.000+08:00
- Update `VLLMClient` initialization to support base URL- Modify existing parameters `vllm_server_host` and `vllm_server_port` to be ignored if base URL is provided
diff --git a/trl/trainer/grpo_config.py b/trl/trainer/grpo_config.py
@@ -85,10 +85,13 @@ class GRPOConfig(TrainingArguments):
         use_vllm (`bool`, *optional*, defaults to `False`):
             Whether to use vLLM for generating completions. If set to `True`, ensure that a GPU is kept unused for
             training, as vLLM will require one for generation. vLLM must be installed (`pip install vllm`).
+        vllm_server_base_url (`str` or `None`, *optional*, defaults to `None`):
+            Base URL for the vLLM server (e.g., "http://localhost:8000"). If provided, vllm_server_host and
+            vllm_server_port are ignored.
         vllm_server_host (`str`, *optional*, defaults to `"0.0.0.0"`):
-            Host of the vLLM server to connect to.
+            Host of the vLLM server to connect to. Ignored if vllm_server_base_url is provided.
         vllm_server_port (`int`, *optional*, defaults to `8000`):
-            Port of the vLLM server to connect to.
+            Port of the vLLM server to connect to. Ignored if vllm_server_base_url is provided.
         vllm_server_timeout (`float`, *optional*, defaults to `120.0`):
             Total timeout duration in seconds to wait for the vLLM server to be up. If the server is not up after the
             timeout, a `ConnectionError` is raised.
@@ -270,13 +273,20 @@ class GRPOConfig(TrainingArguments):
             "running. To run the server, install vLLM (`pip install vllm`) and run `trl vllm-serve`."
         },
     )
+    vllm_server_base_url: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "Base URL for the vLLM server (e.g., 'http://localhost:8000'). If provided, vllm_server_host and "
+            "vllm_server_port are ignored."
+        },
+    )
     vllm_server_host: str = field(
         default="0.0.0.0",
-        metadata={"help": "Host of the vLLM server to connect to."},
+        metadata={"help": "Host of the vLLM server to connect to. Ignored if vllm_server_base_url is provided."},
     )
     vllm_server_port: int = field(
         default=8000,
-        metadata={"help": "Port of the vLLM server to connect to."},
+        metadata={"help": "Port of the vLLM server to connect to. Ignored if vllm_server_base_url is provided."},
     )
     vllm_server_timeout: float = field(
         default=120.0,
diff --git a/trl/trainer/grpo_trainer.py b/trl/trainer/grpo_trainer.py
@@ -603,9 +603,16 @@ def data_collator(features):  # No data collation is needed in GRPO
                 )
 
             if self.accelerator.is_main_process:
-                self.vllm_client = VLLMClient(
-                    args.vllm_server_host, args.vllm_server_port, connection_timeout=args.vllm_server_timeout
-                )
+                if args.vllm_server_base_url is not None:
+                    self.vllm_client = VLLMClient(
+                        base_url=args.vllm_server_base_url, connection_timeout=args.vllm_server_timeout
+                    )
+                else:
+                    self.vllm_client = VLLMClient(
+                        host=args.vllm_server_host, 
+                        server_port=args.vllm_server_port, 
+                        connection_timeout=args.vllm_server_timeout
+                    )
 
             # vLLM specific sampling arguments
             self.guided_decoding_regex = args.vllm_guided_decoding_regex