We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ad92dee commit cd9e961Copy full SHA for cd9e961
vllm_ascend/models/qwen3.py
@@ -37,6 +37,7 @@ def pad(tensor, x):
37
return F.pad(tensor, (0, 0, 0, pad_size)), pad_size
38
return tensor, pad_size
39
40
+
41
def unpad(tensor, pad_size):
42
if pad_size > 0:
43
return tensor[:-pad_size, :]
@@ -253,7 +254,7 @@ def pre_attention_process(self, hidden_states, residual, pad_size=0):
253
254
hidden_states = tensor_model_parallel_all_gather(hidden_states, 0)
255
hidden_states = unpad(hidden_states, pad_size)
256
return hidden_states, residual
-
257
258
def pre_mlp_process(self, hidden_states, residual, pad_size=0):
259
token_num = hidden_states.size(0)
260
if token_num != residual.size(0):
@@ -298,7 +299,7 @@ def forward(self,
298
299
hidden_states, residual)
300
hidden_states, pad_size = self.mlp(hidden_states)
301
return hidden_states, residual, pad_size
302
303
304
ALL_DECODER_LAYER_TYPES = {
305
"attention": CustomQwen3DecoderLayer,
0 commit comments