Skip to content

Commit 0a8b987

Browse files
[Fixbug] Fix soc_version for 310p
Signed-off-by: hfadzxy <starmoon_zhang@163.com>
1 parent 37a0715 commit 0a8b987

20 files changed

+106
-98
lines changed

Dockerfile.a3

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
5050
source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
5151
source /usr/local/Ascend/nnal/atb/set_env.sh && \
5252
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
53+
export SOC_VERSION=Ascend910_9392 && \
5354
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
5455
python3 -m pip cache purge
5556

Dockerfile.a3.openEuler

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
4848
source /usr/local/Ascend/nnal/atb/set_env.sh && \
4949
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
5050
export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/`uname -i`-openEuler-linux && \
51+
export SOC_VERSION=Ascend910_9392 && \
5152
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
5253
python3 -m pip cache purge
5354

examples/disaggregated_prefill_v1/gen_ranktable.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import torch.distributed as dist
66

7-
from vllm_ascend.utils import AscendSocVersion, init_ascend_soc_version, get_ascend_soc_version
7+
from vllm_ascend import _build_info # type: ignore
88

99
parser = argparse.ArgumentParser(
1010
description="Arguments of rank table generator", )
@@ -38,8 +38,7 @@
3838
# and is different from WORLD_SIZE in gen_rank_table.sh.
3939
world_size = os.environ.get("WORLD_SIZE")
4040

41-
init_ascend_soc_version()
42-
soc_info = get_ascend_soc_version()
41+
soc_info = _build_info.__ascend_soc_version__
4342

4443

4544
def get_cmd_stdout(cmd):
@@ -75,7 +74,7 @@ def get_cmd_stdout(cmd):
7574
device_id = local_device_ids[idx]
7675
chip_id = device_id % chips_per_card
7776
card_id = device_id // chips_per_card
78-
if soc_info == AscendSocVersion.A3:
77+
if soc_info == "A3":
7978
device_ip = get_cmd_stdout(
8079
f"{hccn_tool_path} -i {device_id} -vnic -g | grep ipaddr"
8180
).split(":")[1].strip()
@@ -87,15 +86,15 @@ def get_cmd_stdout(cmd):
8786
).split(":")[1].strip()
8887
else:
8988
device_ip = get_cmd_stdout(
90-
f"{hccn_tool_path} -i {device_id} -ip -g | grep ipaddr"
91-
).split(":")[1].strip()
89+
f"{hccn_tool_path} -i {device_id} -ip -g | grep ipaddr").split(
90+
":")[1].strip()
9291

9392
device_info = {
9493
"server_id": local_host,
9594
"device_id": str(device_id),
9695
"device_ip": str(device_ip),
9796
}
98-
if soc_info == AscendSocVersion.A3:
97+
if soc_info == "A3":
9998
device_info.update({
10099
"super_pod_id": str(super_pod_id),
101100
"super_device_id": str(super_device_id)

setup.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@
3232
from setuptools.command.install import install
3333
from setuptools_scm import get_version
3434

35+
# Supported SOC_VERSION codes
36+
ASCEND_A2_SOC_VERSION = ["ASCEND910B1"]
37+
ASCEND_A3_SOC_VERSION = ["ASCEND910_9392"]
38+
ASCEND_310P_SOC_VERSION = ["ASCEND310P3"]
39+
3540

3641
def load_module_from_path(module_name, path):
3742
spec = importlib.util.spec_from_file_location(module_name, path)
@@ -91,11 +96,20 @@ def run(self):
9196
raise ValueError(
9297
"SOC version 310 only supports custom kernels. Please set COMPILE_CUSTOM_KERNELS=1 to enable custom kernels."
9398
)
99+
if soc_version in ASCEND_A2_SOC_VERSION:
100+
ascend_soc_version = "A2"
101+
elif soc_version in ASCEND_A3_SOC_VERSION:
102+
ascend_soc_version = "A3"
103+
elif soc_version in ASCEND_310P_SOC_VERSION:
104+
ascend_soc_version = "310P"
105+
else:
106+
ascend_soc_version = "UNDEFINED"
94107

95108
package_dir = os.path.join(ROOT_DIR, "vllm_ascend", "_build_info.py")
96109
with open(package_dir, "w+") as f:
97110
f.write('# Auto-generated file\n')
98111
f.write(f"__soc_version__ = '{soc_version}'\n")
112+
f.write(f"__ascend_soc_version__ = '{ascend_soc_version}'\n")
99113
f.write(
100114
f"__sleep_mode_enabled__ = {envs.COMPILE_CUSTOM_KERNELS}\n")
101115
logging.info(

tests/ut/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,4 @@
2323
adapt_patch(True)
2424

2525
# register Ascend CustomOp here because uts will use this
26-
register_ascend_customop()
26+
register_ascend_customop()

tests/ut/models/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def mock_distributed():
9898
return_value=Mock(is_first_rank=False, is_last_rank=False)), \
9999
patch("vllm_ascend.ops.fused_moe.get_current_vllm_config", return_value=mock_vllm_config), \
100100
patch("vllm_ascend.ops.moe.token_dispatcher.torch.distributed.get_rank", return_value=0), \
101-
patch("vllm_ascend.ops.moe.token_dispatcher.get_ascend_soc_version", return_value=None), \
101+
patch("vllm_ascend._build_info.__ascend_soc_version__", return_value=""), \
102102
patch.dict("vllm.distributed.parallel_state.__dict__", _TP=tp_group, _EP=ep_group, _DP=dp_group,
103103
_PP=pp_group), \
104104
patch.dict("vllm_ascend.distributed.parallel_state.__dict__", _MC2=ep_group), \

tests/ut/ops/test_fused_ops.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
AscendUnquantizedFusedMoEMethod)
2929
from vllm_ascend.ops.moe.experts_selector import select_experts
3030
from vllm_ascend.ops.moe.moe_mlp import cumsum_group_list, unified_apply_mlp
31-
from vllm_ascend.utils import AscendSocVersion, adapt_patch
31+
from vllm_ascend.utils import adapt_patch
3232

3333
adapt_patch(True)
3434

@@ -125,7 +125,13 @@ def mock_finalize(hidden_states, **kwargs):
125125
return_value=mock_forward_context_obj), \
126126
patch('vllm_ascend.ops.moe.fused_moe_prepare_and_finalize.get_forward_context',
127127
return_value=mock_forward_context_obj), \
128-
patch("vllm_ascend.utils.get_ascend_soc_version", return_value=AscendSocVersion.A3), \
128+
patch('vllm_ascend.ops.fused_moe.get_current_vllm_config',
129+
return_value=MagicMock(
130+
parallel_config=MagicMock(tensor_parallel_size=2),
131+
scheduler_config=MagicMock(max_num_seqs=4),
132+
model_config=MagicMock(max_model_len=2048)
133+
)), \
134+
patch("vllm_ascend._build_info.__ascend_soc_version__", return_value="A3"), \
129135
patch('vllm_ascend.ops.moe.moe_mlp.get_forward_context',
130136
return_value=mock_forward_context_obj), \
131137
patch('vllm_ascend.ops.moe.moe_comm_method.MC2CommImpl._get_token_dispatcher',
@@ -406,7 +412,8 @@ def test_apply_with_expert_map(self, moe_method, mock_dist_env,
406412
forward_context = mock_dist_env['mock_forward_context_obj']
407413

408414
with patch("vllm_ascend.ops.fused_moe.get_forward_context", return_value=forward_context), \
409-
patch("vllm_ascend.utils.get_ascend_soc_version", return_value=AscendSocVersion.A3):
415+
patch("vllm_ascend._build_info.__ascend_soc_version__", return_value="A3"):
416+
410417
expert_map = torch.tensor([0, 1, 2, -1, -1, -1, -1, -1])
411418
moe_method.ep_size = ep_size
412419
x = torch.randn(8, 2, 2)

tests/ut/ops/test_token_dispatcher.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
from tests.ut.base import TestBase
2323

2424
from vllm_ascend.ops.moe.token_dispatcher import ( # isort: skip
25-
AscendSocVersion, TokenDispatcherWithAll2AllV,
26-
TokenDispatcherWithAllGather, TokenDispatcherWithMC2)
25+
TokenDispatcherWithAll2AllV, TokenDispatcherWithAllGather,
26+
TokenDispatcherWithMC2)
2727

2828

2929
class TestTokenDispatcherWithMC2(TestBase):
@@ -50,10 +50,9 @@ def setUp(self):
5050
return_value=self.forward_context)
5151
self.forward_context_patch.start()
5252

53-
# Mock get_ascend_soc_version()
53+
# Mock __ascend_soc_version__
5454
self.ascend_soc_version_patch = patch(
55-
"vllm_ascend.ops.moe.token_dispatcher.get_ascend_soc_version",
56-
return_value=AscendSocVersion.A3)
55+
"vllm_ascend._build_info.__ascend_soc_version__", new="A3")
5756
self.ascend_soc_version_patch.start()
5857

5958
kwargs = {"with_quant": False, "top_k": 8, "num_experts": 128}

tests/ut/torchair/ops/test_torchair_fused_moe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from vllm_ascend.quantization.quant_config import AscendFusedMoEMethod
2727
from vllm_ascend.torchair.ops.torchair_fused_moe import (
2828
TorchairAscendFusedMoE, TorchairAscendUnquantizedFusedMoEMethod)
29-
from vllm_ascend.utils import AscendSocVersion, adapt_patch # noqa E402
29+
from vllm_ascend.utils import adapt_patch # noqa E402
3030

3131
adapt_patch(True)
3232

@@ -383,7 +383,7 @@ def test_apply_with_expert_map(self, moe_method, mock_dist_env,
383383
forward_context = MagicMock(
384384
fused_moe_state=_get_fused_moe_state(ep_size, is_prefill, True))
385385
with patch("vllm_ascend.torchair.ops.torchair_fused_moe.get_forward_context", return_value=forward_context), \
386-
patch("vllm_ascend.torchair.ops.torchair_fused_moe.get_ascend_soc_version", return_value=AscendSocVersion.A3):
386+
patch("vllm_ascend._build_info.__ascend_soc_version__", return_value="A3"):
387387
expert_map = torch.tensor([0, 1, 2, -1, -1, -1, -1, -1])
388388
moe_method.ep_size = ep_size
389389
x = torch.randn(8, 2, 2)

tests/ut/worker/test_model_runner_v1.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import pytest
1717

1818
from vllm_ascend.ascend_forward_context import MoECommType
19-
from vllm_ascend.utils import AscendSocVersion
2019
from vllm_ascend.worker.model_runner_v1 import NPUModelRunner
2120

2221

@@ -25,21 +24,25 @@
2524
"soc_version, enable_expert_parallel, world_size, num_tokens, mc2_tokens_capacity, quant_type, expected_method",
2625
[
2726
# Case 1: Expert parallel is disabled, should always be 'allgather'
28-
(AscendSocVersion.A2, False, 8, 100, 256, None, MoECommType.ALLGATHER),
29-
(AscendSocVersion.A3, False, 16, 500, 256, None, MoECommType.ALLGATHER),
27+
("A2", False, 8, 100, 256, None, "allgather"),
28+
("A3", False, 16, 500, 256, None, "allgather"),
3029
3130
# Case 2: A2 SOC with w4a8_dynamic -> use alltoall when not mc2
32-
(AscendSocVersion.A2, True, 8, 100, 256, "w4a8_dynamic", MoECommType.ALLTOALL),
33-
(AscendSocVersion.A2, True, 16, 257, 256, "w4a8_dynamic", MoECommType.ALLTOALL),
34-
(AscendSocVersion.A2, True, 16, 100, 256, "w4a8_dynamic", MoECommType.MC2), # meets mc2 condition
31+
("A2", True, 8, 100, 256, "w4a8_dynamic", "alltoall"),
32+
("A2", True, 16, 257, 256, "w4a8_dynamic", "alltoall"),
33+
("A2", True, 16, 100, 256, "w4a8_dynamic", "mc2"), # meets mc2 condition
3534
3635
# Case 3: A2 SOC without w4a8_dynamic -> fallback to allgather
37-
(AscendSocVersion.A2, True, 8, 100, 256, None, MoECommType.ALLGATHER),
38-
(AscendSocVersion.A2, True, 16, 257, 256, None, MoECommType.ALLGATHER),
36+
("A2", True, 8, 100, 256, None, "allgather"),
37+
("A2", True, 16, 257, 256, None, "allgather"),
3938
4039
# Case 4: A3 SOC
41-
(AscendSocVersion.A3, True, 8, 100, 256, None, MoECommType.MC2),
42-
(AscendSocVersion.A3, True, 8, 257, 256, None, MoECommType.ALLTOALL),
40+
("A3", True, 8, 100, 256, None, "mc2"),
41+
("A3", True, 8, 257, 256, None, "alltoall"),
42+
43+
# Case 5: P3 SOC
44+
("310P", True, 8, 100, 256, None, "allgather"),
45+
("310P", True, 8, 257, 256, None, "allgather"),
4346
])
4447
# yapf: enable
4548
def test_select_moe_comm_method(soc_version, enable_expert_parallel,
@@ -65,8 +68,8 @@ def test_select_moe_comm_method(soc_version, enable_expert_parallel,
6568
mock_runner.vllm_config = mock_vllm_config
6669

6770
# Patch the helper functions
68-
with patch('vllm_ascend.worker.model_runner_v1.get_ascend_soc_version',
69-
return_value=soc_version), \
71+
with patch('vllm_ascend._build_info.__ascend_soc_version__',
72+
new=soc_version), \
7073
patch('vllm_ascend.worker.model_runner_v1.is_global_first_rank',
7174
return_value=True):
7275

@@ -98,8 +101,8 @@ def test_select_moe_comm_method_unsupported_soc():
98101

99102
unsupported_soc = "UnsupportedSOC"
100103

101-
with patch('vllm_ascend.worker.model_runner_v1.get_ascend_soc_version',
102-
return_value=unsupported_soc), \
104+
with patch('vllm_ascend._build_info.__ascend_soc_version__',
105+
new=unsupported_soc), \
103106
patch('vllm_ascend.worker.model_runner_v1.is_global_first_rank',
104107
return_value=True), \
105108
pytest.raises(ValueError, match=f"Unsupported soc_version: {unsupported_soc}"):

0 commit comments

Comments
 (0)