support pipeline parallel in V1 engine

weiguihua2 · weiguihua2 · commit c0b73ba69200 · 2025-07-10T22:39:03.000+08:00
Signed-off-by: weiguihua2 &lt;weiguihua2@huawei.com&gt;
diff --git a/tests/e2e/multicard/test_pipeline_parallel.py b/tests/e2e/multicard/test_pipeline_parallel.py
@@ -34,11 +34,7 @@
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("tp_size", TENSOR_PARALLELS)
 @pytest.mark.parametrize("pp_size", PIPELINE_PARALLELS)
-@pytest.mark.parametrize("max_tokens", [64])
-@pytest.mark.parametrize("temperature", [0.0])
-@pytest.mark.parametrize("ignore_eos", [True])
-def test_models(model: str, tp_size: int, pp_size: int, max_tokens: int, temperature: int,
-                ignore_eos: bool) -> None:
+def test_models(model: str, tp_size: int, pp_size: int) -> None:
     # Create an LLM.
     llm = LLM(
         model=model,
@@ -49,9 +45,9 @@ def test_models(model: str, tp_size: int, pp_size: int, max_tokens: int, tempera
     )
     # Prepare sampling_parames
     sampling_params = SamplingParams(
-        max_tokens=max_tokens,
-        temperature=temperature,
-        ignore_eos=ignore_eos,
+        max_tokens=64,
+        temperature=0,
+        ignore_eos=True,
     )
 
     # Generate texts from the prompts.