|
44 | 44 | from vllm.model_executor.model_loader.weight_utils import (
|
45 | 45 | default_weight_loader, sharded_weight_loader)
|
46 | 46 | from vllm.model_executor.models.qwen2_moe import Qwen2MoeMLP as Qwen3NextMLP
|
47 |
| -from vllm.model_executor.models.qwen3_next import (Qwen3NextAttention, |
48 |
| - Qwen3NextDecoderLayer, |
49 |
| - Qwen3NextForCausalLM, |
50 |
| - Qwen3NextGatedDeltaNet, |
51 |
| - Qwen3NextModel, |
52 |
| - Qwen3NextSparseMoeBlock, |
53 |
| - fused_gdn_gating) |
54 | 47 | from vllm.model_executor.models.utils import (
|
55 | 48 | PPMissingLayer, extract_layer_index, is_pp_missing_parameter,
|
56 | 49 | make_empty_intermediate_tensors_factory, make_layers, maybe_prefix)
|
57 | 50 | from vllm.model_executor.utils import set_weight_attrs
|
58 | 51 | from vllm.transformers_utils.configs import Qwen3NextConfig
|
59 | 52 | from vllm.v1.attention.backends.gdn_attn import GDNAttentionMetadata
|
60 | 53 |
|
| 54 | +from vllm.model_executor.models.qwen3_next import Qwen3NextAttention # isort: skip |
| 55 | +from vllm.model_executor.models.qwen3_next import Qwen3NextDecoderLayer # isort: skip |
| 56 | +from vllm.model_executor.models.qwen3_next import Qwen3NextForCausalLM # isort: skip |
| 57 | +from vllm.model_executor.models.qwen3_next import Qwen3NextGatedDeltaNet # isort: skip |
| 58 | +from vllm.model_executor.models.qwen3_next import Qwen3NextModel # isort: skip |
| 59 | +from vllm.model_executor.models.qwen3_next import Qwen3NextSparseMoeBlock # isort: skip |
| 60 | +from vllm.model_executor.models.qwen3_next import fused_gdn_gating # isort: skip |
| 61 | + |
61 | 62 |
|
62 | 63 | class CustomQwen3NextGatedDeltaNet(Qwen3NextGatedDeltaNet, MambaBase):
|
63 | 64 |
|
|
0 commit comments