Skip to content

Commit 0fa658a

Browse files
committed
Add support for qwen3_vl and qwen3_vl_moe
Signed-off-by: booker123456 <945658361@qq.com>
1 parent 1ce5057 commit 0fa658a

File tree

2 files changed

+5
-32
lines changed

2 files changed

+5
-32
lines changed

vllm_ascend/models/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ def register_model():
1515

1616
ModelRegistry.register_model(
1717
"Qwen3VLForConditionalGeneration",
18-
"vllm_ascend.models.qwen2_5_vl_without_padding:AscendQwen3VLForConditionalGeneration")
18+
"vllm_ascend.models.qwen2_5_vl_without_padding:AscendQwen3VLForConditionalGeneration"
19+
)
1920

2021
if envs_ascend.USE_OPTIMIZED_MODEL:
2122
ModelRegistry.register_model(

vllm_ascend/models/qwen2_5_vl_without_padding.py

Lines changed: 3 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,6 @@
3838
get_act_and_mul_fn)
3939
from vllm.model_executor.layers.layernorm import RMSNorm
4040
from vllm.model_executor.layers.quantization import QuantizationConfig
41-
from vllm.model_executor.models.interfaces import (SupportsLoRA,
42-
SupportsMultiModal,
43-
SupportsPP)
4441
from vllm.model_executor.models.qwen2_5_vl import (
4542
Qwen2_5_VisionAttention, Qwen2_5_VisionBlock, Qwen2_5_VisionPatchEmbed,
4643
Qwen2_5_VisionTransformer, Qwen2_5_VLDummyInputsBuilder,
@@ -339,6 +336,7 @@ class AscendQwen3_VisionPatchEmbed(Qwen3_VisionPatchEmbed):
339336
def forward(self, x: torch.Tensor) -> torch.Tensor:
340337
x = x.matmul(
341338
self.proj.weight.data.view(self.hidden_size, -1).transpose(0, 1))
339+
x = x + self.proj.bias
342340
return x
343341

344342

@@ -578,14 +576,10 @@ class AscendQwen3VLMoeForConditionalGeneration(
578576
})
579577

580578
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
581-
nn.Module.__init__(self)
582-
SupportsMultiModal.__init__(self)
583-
SupportsLoRA.__init__(self)
584-
SupportsPP.__init__(self)
579+
super.__init__(vllm_config=vllm_config, prefix=prefix)
585580
config: Qwen3VLMoeConfig = vllm_config.model_config.hf_config
586581
quant_config = vllm_config.quant_config
587582
multimodal_config = vllm_config.model_config.multimodal_config
588-
self.config = config
589583
self.multimodal_config = multimodal_config
590584
self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data"
591585

@@ -595,26 +589,4 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
595589
quant_config=self._maybe_ignore_quant_config(quant_config),
596590
prefix=maybe_prefix(prefix, "visual"),
597591
use_data_parallel=self.use_data_parallel,
598-
)
599-
600-
self.language_model = Qwen3MoeLLMForCausalLM(vllm_config=vllm_config,
601-
prefix=maybe_prefix(
602-
prefix,
603-
"language_model"))
604-
605-
self.make_empty_intermediate_tensors = (
606-
self.language_model.make_empty_intermediate_tensors)
607-
608-
self.use_deepstack = hasattr(config.vision_config,
609-
'deepstack_visual_indexes')
610-
self.deepstack_num_level = len(
611-
config.vision_config.deepstack_visual_indexes
612-
) if self.use_deepstack else 0
613-
# register buffer for deepstack
614-
self.deepstack_input_embeds = [
615-
torch.zeros(vllm_config.scheduler_config.max_num_batched_tokens,
616-
config.text_config.hidden_size)
617-
for _ in range(self.deepstack_num_level)
618-
] if self.use_deepstack else None
619-
self.visual_dim = config.vision_config.out_hidden_size
620-
self.multiscale_dim = self.visual_dim * self.deepstack_num_level
592+
)

0 commit comments

Comments
 (0)