Skip to content

Commit ec699e4

Browse files
committed
Revert "fix torchair execute issue on padding data, and mtp padding logic (vllm-project#1160)"
This reverts commit 291c216. Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
1 parent 0e6099e commit ec699e4

File tree

2 files changed

+6
-9
lines changed

2 files changed

+6
-9
lines changed

vllm_ascend/attention/mla_v1.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -377,10 +377,7 @@ def build(
377377
seq_lens = seq_lens[:self._num_decode_tokens]
378378
input_positions = input_positions[:self._num_decode_tokens]
379379
block_table = block_table[:self._num_decode_tokens, ...]
380-
if use_torchair_graph and self.runner.attn_state in [
381-
AscendAttentionState.DecodeOnly,
382-
AscendAttentionState.SpecDecoding
383-
]:
380+
if use_torchair_graph and self.runner.attn_state == AscendAttentionState.DecodeOnly:
384381
num_seqs = len(seq_lens)
385382
if graph_pad_size != 0:
386383
pad_value = 1

vllm_ascend/worker/model_runner_v1.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -943,6 +943,11 @@ def _process_reqs(
943943
self.input_ids_cpu[:total_num_scheduled_tokens], non_blocking=True)
944944
input_ids = self.input_ids[:num_input_tokens]
945945

946+
if (envs_ascend.VLLM_ENABLE_MC2
947+
or self.torchair_graph_enabled) and not with_prefill:
948+
input_ids = self.input_ids[:padded_batch_size]
949+
positions = self.positions[:padded_batch_size]
950+
946951
# prepare the MRoPE for mllm if using multimodal
947952
num_input_tokens = total_num_scheduled_tokens
948953
# _prepare_inputs may reorder the batch, so we must gather multi
@@ -980,11 +985,6 @@ def _process_reqs(
980985
else:
981986
positions = self.positions[:num_input_tokens]
982987

983-
if (envs_ascend.VLLM_ENABLE_MC2
984-
or self.torchair_graph_enabled) and not with_prefill:
985-
input_ids = self.input_ids[:padded_batch_size]
986-
positions = self.positions[:padded_batch_size]
987-
988988
# Run forward pass
989989
with set_forward_context(attn_metadata,
990990
self.vllm_config,

0 commit comments

Comments
 (0)