38
38
get_act_and_mul_fn )
39
39
from vllm .model_executor .layers .layernorm import RMSNorm
40
40
from vllm .model_executor .layers .quantization import QuantizationConfig
41
- from vllm .model_executor .models .interfaces import (SupportsLoRA ,
42
- SupportsMultiModal ,
43
- SupportsPP )
44
41
from vllm .model_executor .models .qwen2_5_vl import (
45
42
Qwen2_5_VisionAttention , Qwen2_5_VisionBlock , Qwen2_5_VisionPatchEmbed ,
46
43
Qwen2_5_VisionTransformer , Qwen2_5_VLDummyInputsBuilder ,
@@ -339,6 +336,7 @@ class AscendQwen3_VisionPatchEmbed(Qwen3_VisionPatchEmbed):
339
336
def forward (self , x : torch .Tensor ) -> torch .Tensor :
340
337
x = x .matmul (
341
338
self .proj .weight .data .view (self .hidden_size , - 1 ).transpose (0 , 1 ))
339
+ x = x + self .proj .bias
342
340
return x
343
341
344
342
@@ -578,14 +576,10 @@ class AscendQwen3VLMoeForConditionalGeneration(
578
576
})
579
577
580
578
def __init__ (self , * , vllm_config : VllmConfig , prefix : str = "" ):
581
- nn .Module .__init__ (self )
582
- SupportsMultiModal .__init__ (self )
583
- SupportsLoRA .__init__ (self )
584
- SupportsPP .__init__ (self )
579
+ super .__init__ (vllm_config = vllm_config , prefix = prefix )
585
580
config : Qwen3VLMoeConfig = vllm_config .model_config .hf_config
586
581
quant_config = vllm_config .quant_config
587
582
multimodal_config = vllm_config .model_config .multimodal_config
588
- self .config = config
589
583
self .multimodal_config = multimodal_config
590
584
self .use_data_parallel = multimodal_config .mm_encoder_tp_mode == "data"
591
585
@@ -595,26 +589,4 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
595
589
quant_config = self ._maybe_ignore_quant_config (quant_config ),
596
590
prefix = maybe_prefix (prefix , "visual" ),
597
591
use_data_parallel = self .use_data_parallel ,
598
- )
599
-
600
- self .language_model = Qwen3MoeLLMForCausalLM (vllm_config = vllm_config ,
601
- prefix = maybe_prefix (
602
- prefix ,
603
- "language_model" ))
604
-
605
- self .make_empty_intermediate_tensors = (
606
- self .language_model .make_empty_intermediate_tensors )
607
-
608
- self .use_deepstack = hasattr (config .vision_config ,
609
- 'deepstack_visual_indexes' )
610
- self .deepstack_num_level = len (
611
- config .vision_config .deepstack_visual_indexes
612
- ) if self .use_deepstack else 0
613
- # register buffer for deepstack
614
- self .deepstack_input_embeds = [
615
- torch .zeros (vllm_config .scheduler_config .max_num_batched_tokens ,
616
- config .text_config .hidden_size )
617
- for _ in range (self .deepstack_num_level )
618
- ] if self .use_deepstack else None
619
- self .visual_dim = config .vision_config .out_hidden_size
620
- self .multiscale_dim = self .visual_dim * self .deepstack_num_level
592
+ )
0 commit comments