Skip to content

Commit 7655d3b

Browse files
[Fixbug] Fix soc_version for 310p
Signed-off-by: hfadzxy <starmoon_zhang@163.com>
1 parent 79a910e commit 7655d3b

20 files changed

+107
-98
lines changed

Dockerfile.a3

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
5050
source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
5151
source /usr/local/Ascend/nnal/atb/set_env.sh && \
5252
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
53+
export SOC_VERSION=Ascend910_9392 && \
5354
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
5455
python3 -m pip cache purge
5556

Dockerfile.a3.openEuler

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
4848
source /usr/local/Ascend/nnal/atb/set_env.sh && \
4949
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
5050
export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/`uname -i`-openEuler-linux && \
51+
export SOC_VERSION=Ascend910_9392 && \
5152
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
5253
python3 -m pip cache purge
5354

examples/disaggregated_prefill_v1/gen_ranktable.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import torch.distributed as dist
66

7-
from vllm_ascend.utils import AscendSocVersion, init_ascend_soc_version, get_ascend_soc_version
7+
from vllm_ascend import _build_info # type: ignore
88

99
parser = argparse.ArgumentParser(
1010
description="Arguments of rank table generator", )
@@ -38,8 +38,7 @@
3838
# and is different from WORLD_SIZE in gen_rank_table.sh.
3939
world_size = os.environ.get("WORLD_SIZE")
4040

41-
init_ascend_soc_version()
42-
soc_info = get_ascend_soc_version()
41+
soc_info = _build_info.__ascend_soc_version__
4342

4443

4544
def get_cmd_stdout(cmd):
@@ -75,7 +74,7 @@ def get_cmd_stdout(cmd):
7574
device_id = local_device_ids[idx]
7675
chip_id = device_id % chips_per_card
7776
card_id = device_id // chips_per_card
78-
if soc_info == AscendSocVersion.A3:
77+
if soc_info == "A3":
7978
device_ip = get_cmd_stdout(
8079
f"{hccn_tool_path} -i {device_id} -vnic -g | grep ipaddr"
8180
).split(":")[1].strip()
@@ -87,15 +86,15 @@ def get_cmd_stdout(cmd):
8786
).split(":")[1].strip()
8887
else:
8988
device_ip = get_cmd_stdout(
90-
f"{hccn_tool_path} -i {device_id} -ip -g | grep ipaddr"
91-
).split(":")[1].strip()
89+
f"{hccn_tool_path} -i {device_id} -ip -g | grep ipaddr").split(
90+
":")[1].strip()
9291

9392
device_info = {
9493
"server_id": local_host,
9594
"device_id": str(device_id),
9695
"device_ip": str(device_ip),
9796
}
98-
if soc_info == AscendSocVersion.A3:
97+
if soc_info == "A3":
9998
device_info.update({
10099
"super_pod_id": str(super_pod_id),
101100
"super_device_id": str(super_device_id)

setup.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@
3232
from setuptools.command.install import install
3333
from setuptools_scm import get_version
3434

35+
# Supported SOC_VERSION codes
36+
ASCEND_A2_SOC_VERSION = ["ASCEND910B1"]
37+
ASCEND_A3_SOC_VERSION = ["ASCEND910_9392"]
38+
ASCEND_310P_SOC_VERSION = ["ASCEND310P3"]
39+
3540

3641
def load_module_from_path(module_name, path):
3742
spec = importlib.util.spec_from_file_location(module_name, path)
@@ -91,11 +96,20 @@ def run(self):
9196
raise ValueError(
9297
"SOC version 310 only supports custom kernels. Please set COMPILE_CUSTOM_KERNELS=1 to enable custom kernels."
9398
)
99+
if soc_version in ASCEND_A2_SOC_VERSION:
100+
ascend_soc_version = "A2"
101+
elif soc_version in ASCEND_A3_SOC_VERSION:
102+
ascend_soc_version = "A3"
103+
elif soc_version in ASCEND_310P_SOC_VERSION:
104+
ascend_soc_version = "310P"
105+
else:
106+
ascend_soc_version = "UNDEFINED"
94107

95108
package_dir = os.path.join(ROOT_DIR, "vllm_ascend", "_build_info.py")
96109
with open(package_dir, "w+") as f:
97110
f.write('# Auto-generated file\n')
98111
f.write(f"__soc_version__ = '{soc_version}'\n")
112+
f.write(f"__ascend_soc_version__ = '{ascend_soc_version}'\n")
99113
f.write(
100114
f"__sleep_mode_enabled__ = {envs.COMPILE_CUSTOM_KERNELS}\n")
101115
logging.info(

tests/ut/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,4 @@
2323
adapt_patch(True)
2424

2525
# register Ascend CustomOp here because uts will use this
26-
register_ascend_customop()
26+
register_ascend_customop()

tests/ut/models/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def mock_distributed():
9898
return_value=Mock(is_first_rank=False, is_last_rank=False)), \
9999
patch("vllm_ascend.ops.fused_moe.get_current_vllm_config", return_value=mock_vllm_config), \
100100
patch("vllm_ascend.ops.moe.token_dispatcher.torch.distributed.get_rank", return_value=0), \
101-
patch("vllm_ascend.ops.moe.token_dispatcher.get_ascend_soc_version", return_value=None), \
101+
patch("vllm_ascend._build_info.__ascend_soc_version__", return_value=""), \
102102
patch.dict("vllm.distributed.parallel_state.__dict__", _TP=tp_group, _EP=ep_group, _DP=dp_group,
103103
_PP=pp_group), \
104104
patch.dict("vllm_ascend.distributed.parallel_state.__dict__", _MC2=ep_group), \

tests/ut/ops/test_fused_ops.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
AscendUnquantizedFusedMoEMethod)
2828
from vllm_ascend.ops.moe.experts_selector import select_experts
2929
from vllm_ascend.ops.moe.moe_mlp import cumsum_group_list, unified_apply_mlp
30-
from vllm_ascend.utils import AscendSocVersion, adapt_patch
30+
from vllm_ascend.utils import adapt_patch
3131

3232
adapt_patch(True)
3333

@@ -109,7 +109,7 @@ def mock_finalize(hidden_states, **kwargs):
109109
scheduler_config=MagicMock(max_num_seqs=4),
110110
model_config=MagicMock(max_model_len=2048)
111111
)), \
112-
patch("vllm_ascend.utils.get_ascend_soc_version", return_value=AscendSocVersion.A3), \
112+
patch("vllm_ascend._build_info.__ascend_soc_version__", return_value="A3"), \
113113
patch('vllm_ascend.ops.moe.moe_mlp.get_forward_context',
114114
return_value=mock_forward_context_obj), \
115115
patch('vllm_ascend.ops.moe.moe_comm_method.MC2CommImpl._get_token_dispatcher',
@@ -387,7 +387,8 @@ def test_apply_with_expert_map(self, moe_method, mock_dist_env,
387387
forward_context = mock_dist_env['mock_forward_context_obj']
388388

389389
with patch("vllm_ascend.ops.fused_moe.get_forward_context", return_value=forward_context), \
390-
patch("vllm_ascend.utils.get_ascend_soc_version", return_value=AscendSocVersion.A3):
390+
patch("vllm_ascend._build_info.__ascend_soc_version__", return_value="A3"):
391+
391392
expert_map = torch.tensor([0, 1, 2, -1, -1, -1, -1, -1])
392393
moe_method.ep_size = ep_size
393394
x = torch.randn(8, 2, 2)

tests/ut/ops/test_token_dispatcher.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
from tests.ut.base import TestBase
2323

2424
from vllm_ascend.ops.moe.token_dispatcher import ( # isort: skip
25-
AscendSocVersion, TokenDispatcherWithAll2AllV,
26-
TokenDispatcherWithAllGather, TokenDispatcherWithMC2)
25+
TokenDispatcherWithAll2AllV, TokenDispatcherWithAllGather,
26+
TokenDispatcherWithMC2)
2727

2828

2929
class TestTokenDispatcherWithMC2(TestBase):
@@ -50,10 +50,9 @@ def setUp(self):
5050
return_value=self.forward_context)
5151
self.forward_context_patch.start()
5252

53-
# Mock get_ascend_soc_version()
53+
# Mock __ascend_soc_version__
5454
self.ascend_soc_version_patch = patch(
55-
"vllm_ascend.ops.moe.token_dispatcher.get_ascend_soc_version",
56-
return_value=AscendSocVersion.A3)
55+
"vllm_ascend._build_info.__ascend_soc_version__", new="A3")
5756
self.ascend_soc_version_patch.start()
5857

5958
kwargs = {"with_quant": False, "top_k": 8, "num_experts": 128}

tests/ut/torchair/ops/test_torchair_fused_moe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from vllm_ascend.quantization.quant_config import AscendFusedMoEMethod
2727
from vllm_ascend.torchair.ops.torchair_fused_moe import (
2828
TorchairAscendFusedMoE, TorchairAscendUnquantizedFusedMoEMethod)
29-
from vllm_ascend.utils import AscendSocVersion, adapt_patch # noqa E402
29+
from vllm_ascend.utils import adapt_patch # noqa E402
3030

3131
adapt_patch(True)
3232

@@ -379,7 +379,7 @@ def test_apply_with_expert_map(self, moe_method, mock_dist_env,
379379
forward_context = MagicMock(
380380
fused_moe_state=_get_fused_moe_state(ep_size, is_prefill, True))
381381
with patch("vllm_ascend.torchair.ops.torchair_fused_moe.get_forward_context", return_value=forward_context), \
382-
patch("vllm_ascend.torchair.ops.torchair_fused_moe.get_ascend_soc_version", return_value=AscendSocVersion.A3):
382+
patch("vllm_ascend._build_info.__ascend_soc_version__", return_value="A3"):
383383
expert_map = torch.tensor([0, 1, 2, -1, -1, -1, -1, -1])
384384
moe_method.ep_size = ep_size
385385
x = torch.randn(8, 2, 2)

tests/ut/worker/test_model_runner_v1.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515

1616
import pytest
1717

18-
from vllm_ascend.utils import AscendSocVersion
1918
from vllm_ascend.worker.model_runner_v1 import NPUModelRunner
2019

2120

@@ -24,21 +23,25 @@
2423
"soc_version, enable_expert_parallel, world_size, num_tokens, mc2_tokens_capacity, quant_type, expected_method",
2524
[
2625
# Case 1: Expert parallel is disabled, should always be 'allgather'
27-
(AscendSocVersion.A2, False, 8, 100, 256, None, "allgather"),
28-
(AscendSocVersion.A3, False, 16, 500, 256, None, "allgather"),
26+
("A2", False, 8, 100, 256, None, "allgather"),
27+
("A3", False, 16, 500, 256, None, "allgather"),
2928
3029
# Case 2: A2 SOC with w4a8_dynamic -> use alltoall when not mc2
31-
(AscendSocVersion.A2, True, 8, 100, 256, "w4a8_dynamic", "alltoall"),
32-
(AscendSocVersion.A2, True, 16, 257, 256, "w4a8_dynamic", "alltoall"),
33-
(AscendSocVersion.A2, True, 16, 100, 256, "w4a8_dynamic", "mc2"), # meets mc2 condition
30+
("A2", True, 8, 100, 256, "w4a8_dynamic", "alltoall"),
31+
("A2", True, 16, 257, 256, "w4a8_dynamic", "alltoall"),
32+
("A2", True, 16, 100, 256, "w4a8_dynamic", "mc2"), # meets mc2 condition
3433
3534
# Case 3: A2 SOC without w4a8_dynamic -> fallback to allgather
36-
(AscendSocVersion.A2, True, 8, 100, 256, None, "allgather"),
37-
(AscendSocVersion.A2, True, 16, 257, 256, None, "allgather"),
35+
("A2", True, 8, 100, 256, None, "allgather"),
36+
("A2", True, 16, 257, 256, None, "allgather"),
3837
3938
# Case 4: A3 SOC
40-
(AscendSocVersion.A3, True, 8, 100, 256, None, "mc2"),
41-
(AscendSocVersion.A3, True, 8, 257, 256, None, "alltoall"),
39+
("A3", True, 8, 100, 256, None, "mc2"),
40+
("A3", True, 8, 257, 256, None, "alltoall"),
41+
42+
# Case 5: P3 SOC
43+
("310P", True, 8, 100, 256, None, "allgather"),
44+
("310P", True, 8, 257, 256, None, "allgather"),
4245
])
4346
# yapf: enable
4447
def test_select_moe_comm_method(soc_version, enable_expert_parallel,
@@ -64,8 +67,8 @@ def test_select_moe_comm_method(soc_version, enable_expert_parallel,
6467
mock_runner.vllm_config = mock_vllm_config
6568

6669
# Patch the helper functions
67-
with patch('vllm_ascend.worker.model_runner_v1.get_ascend_soc_version',
68-
return_value=soc_version), \
70+
with patch('vllm_ascend._build_info.__ascend_soc_version__',
71+
new=soc_version), \
6972
patch('vllm_ascend.worker.model_runner_v1.is_global_first_rank',
7073
return_value=True):
7174

@@ -97,8 +100,8 @@ def test_select_moe_comm_method_unsupported_soc():
97100

98101
unsupported_soc = "UnsupportedSOC"
99102

100-
with patch('vllm_ascend.worker.model_runner_v1.get_ascend_soc_version',
101-
return_value=unsupported_soc), \
103+
with patch('vllm_ascend._build_info.__ascend_soc_version__',
104+
new=unsupported_soc), \
102105
patch('vllm_ascend.worker.model_runner_v1.is_global_first_rank',
103106
return_value=True), \
104107
pytest.raises(ValueError, match=f"Unsupported soc_version: {unsupported_soc}"):

0 commit comments

Comments
 (0)