Skip to content

Commit 35c7f37

Browse files
committed
engineV1 support pipeline parallel
Signed-off-by: weiguihua2 <weiguihua2@huawei.com>
1 parent 0b0832a commit 35c7f37

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

vllm_ascend/worker/model_runner_v1.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1404,9 +1404,6 @@ def execute_model(
14041404
scheduler_output, intermediate_tensors))
14051405

14061406
with ProfileExecuteDuration().capture_async("post process"):
1407-
if self.input_batch.pooling_params:
1408-
return self._pool(hidden_states, num_scheduled_tokens,
1409-
num_scheduled_tokens_np)
14101407
# Broadcast PP output for external_launcher (torchrun)
14111408
# to make sure we are synced across pp ranks
14121409
# TODO: Support overlapping mirco-batches
@@ -1423,6 +1420,9 @@ def execute_model(
14231420
hidden_states.tensors, all_gather_group=get_tp_group())
14241421
logits = None
14251422
else:
1423+
if self.input_batch.pooling_params:
1424+
return self._pool(hidden_states, num_scheduled_tokens,
1425+
num_scheduled_tokens_np)
14261426
sample_hidden_states = hidden_states[logits_indices]
14271427
logits = self.model.compute_logits(sample_hidden_states, None)
14281428
if broadcast_pp_output:

0 commit comments

Comments
 (0)