|
90 | 90 | from vllm_ascend.ascend_config import get_ascend_config
|
91 | 91 | from vllm_ascend.ascend_forward_context import set_ascend_forward_context
|
92 | 92 | from vllm_ascend.attention.attention_mask import AttentionMaskBuilder
|
93 |
| -from vllm_ascend.attention.attention_v1 import (AscendAttentionState, |
94 |
| - AscendMetadata) |
95 |
| -from vllm_ascend.attention.mla_v1 import AscendMLAMetadata |
| 93 | +from vllm_ascend.attention.attention_v1 import AscendAttentionState |
96 | 94 | from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
|
97 | 95 | from vllm_ascend.compilation.acl_graph import ACLGraphWrapper
|
98 | 96 | from vllm_ascend.multistream.ms_split import compute_split_seq_index
|
|
103 | 101 | from vllm_ascend.spec_decode.eagle_proposer import EagleProposer
|
104 | 102 | from vllm_ascend.spec_decode.interface import SpecDcodeType
|
105 | 103 | from vllm_ascend.spec_decode.mtp_proposer import MtpProposer
|
106 |
| -from vllm_ascend.torchair.torchair_attention import AscendTorchairMetadata |
107 |
| -from vllm_ascend.torchair.torchair_mla import AscendMLATorchairMetadata |
108 | 104 | from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_ND, ACL_FORMAT_FRACTAL_NZ,
|
109 | 105 | AscendSocVersion, ProfileExecuteDuration,
|
110 | 106 | get_ascend_soc_version, is_310p,
|
@@ -1611,9 +1607,7 @@ def propose_draft_token_ids(
|
1611 | 1607 | positions: torch.Tensor,
|
1612 | 1608 | num_scheduled_tokens: int,
|
1613 | 1609 | hidden_states: torch.Tensor,
|
1614 |
| - attn_metadata: Union[AscendMetadata, AscendMLAMetadata, |
1615 |
| - AscendTorchairMetadata, |
1616 |
| - AscendMLATorchairMetadata], |
| 1610 | + attn_metadata: dict[str, Any], |
1617 | 1611 | aux_hidden_states: torch.Tensor = None,
|
1618 | 1612 | ) -> Optional[list[list[int]]]:
|
1619 | 1613 | if not self.drafter:
|
|
0 commit comments