|
2 | 2 | from typing import Optional, Union
|
3 | 3 |
|
4 | 4 | import torch
|
5 |
| -from torch import nn |
6 |
| -import torch.nn.functional as F |
7 | 5 | import torch.distributed as dist
|
| 6 | +import torch.nn.functional as F |
| 7 | +from torch import nn |
8 | 8 | from transformers import Qwen3Config
|
| 9 | +from vllm.attention import AttentionType |
9 | 10 | from vllm.compilation.decorators import support_torch_compile
|
10 | 11 | from vllm.config import CacheConfig, VllmConfig
|
11 |
| -from vllm.attention import AttentionType |
12 |
| -from vllm.distributed import (get_pp_group, |
| 12 | +from vllm.distributed import (get_pp_group, get_tensor_model_parallel_rank, |
13 | 13 | get_tensor_model_parallel_world_size,
|
14 |
| - get_tensor_model_parallel_rank, |
15 | 14 | tensor_model_parallel_all_gather)
|
| 15 | +from vllm.model_executor.layers.linear import (ReplicatedLinear, |
| 16 | + RowParallelLinear) |
16 | 17 | from vllm.model_executor.layers.logits_processor import LogitsProcessor
|
17 | 18 | from vllm.model_executor.layers.quantization import QuantizationConfig
|
18 | 19 | from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
|
19 |
| -from vllm.model_executor.layers.linear import RowParallelLinear, ReplicatedLinear |
20 | 20 | from vllm.model_executor.models.interfaces import SupportsLoRA, SupportsPP
|
21 |
| -from vllm.model_executor.models.qwen2 import Qwen2Model |
22 | 21 | from vllm.model_executor.models.qwen2 import Qwen2MLP as Qwen3MLP
|
| 22 | +from vllm.model_executor.models.qwen2 import Qwen2Model |
23 | 23 | from vllm.model_executor.models.qwen3 import Qwen3Attention
|
24 | 24 | from vllm.model_executor.models.utils import (AutoWeightsLoader,
|
25 | 25 | PPMissingLayer, maybe_prefix)
|
|
0 commit comments