|
4 | 4 | import torch.nn as nn
|
5 | 5 | import torchair
|
6 | 6 | import vllm.envs as envs_vllm
|
7 |
| -from vllm.attention.layer import Attention |
8 | 7 | from torchair import patch_for_hcom
|
| 8 | +from vllm.attention.layer import Attention |
9 | 9 | from vllm.config import (VllmConfig, get_layers_from_vllm_config,
|
10 | 10 | set_current_vllm_config)
|
11 | 11 | from vllm.forward_context import BatchDescriptor, get_forward_context
|
| 12 | +from vllm.model_executor.model_loader import get_model_loader |
| 13 | +from vllm.model_executor.model_loader.utils import ( |
| 14 | + process_weights_after_loading, set_default_torch_dtype) |
12 | 15 | from vllm.v1.core.sched.output import SchedulerOutput
|
13 | 16 | from vllm.v1.sample.metadata import SamplingMetadata
|
14 | 17 | from vllm.v1.spec_decode.metadata import SpecDecodeMetadata
|
15 |
| -from vllm.model_executor.model_loader.utils import ( |
16 |
| - process_weights_after_loading, set_default_torch_dtype) |
17 |
| -from vllm.model_executor.model_loader import get_model_loader |
18 | 18 | from vllm_ascend.ascend_config import get_ascend_config
|
19 | 19 | from vllm_ascend.ascend_forward_context import set_ascend_forward_context
|
20 | 20 | from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
|
21 | 21 | from vllm_ascend.spec_decode import MtpProposer
|
22 |
| -from vllm_ascend.torchair.models.torchair_deepseek_mtp import TorchairDeepSeekMTP |
| 22 | +from vllm_ascend.torchair.models.torchair_deepseek_mtp import \ |
| 23 | + TorchairDeepSeekMTP |
23 | 24 | from vllm_ascend.torchair.utils import (TORCHAIR_CACHE_DIR,
|
24 | 25 | TorchairCommonAttentionMetadata)
|
25 | 26 | from vllm_ascend.utils import ProfileExecuteDuration, lmhead_tp_enable
|
|
0 commit comments