Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/format_pr_body.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:

- name: Get vLLM version
run: |
VLLM_COMMIT=6d8246aaffff3ebec84767e373212a7b8da328e2
VLLM_COMMIT=c60e6137f0bf2034853919b3a9d705d7e06b93cf
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
- name: Checkout repository
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/vllm_ascend_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ jobs:
VLLM_USE_MODELSCOPE: True
strategy:
matrix:
vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2]
vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2]
steps:
- name: Install packages
run: |
Expand Down Expand Up @@ -140,7 +140,7 @@ jobs:
max-parallel: 2
matrix:
os: [linux-aarch64-a2-1]
vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2]
vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2]
name: singlecard e2e test - light
runs-on: ${{ matrix.os }}
container:
Expand Down Expand Up @@ -206,7 +206,7 @@ jobs:
max-parallel: 2
matrix:
os: [linux-aarch64-a2-2]
vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2]
vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2]
name: multicard e2e test - light
runs-on: ${{ matrix.os }}
container:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/vllm_ascend_test_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ jobs:
max-parallel: 2
matrix:
os: [linux-aarch64-a2-1]
vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2]
vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2]
name: singlecard e2e test - full
runs-on: ${{ matrix.os }}
container:
Expand Down Expand Up @@ -156,7 +156,7 @@ jobs:
max-parallel: 2
matrix:
os: [linux-aarch64-a2-2]
vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2]
vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2]
name: multicard e2e test - full
runs-on: ${{ matrix.os }}
container:
Expand Down
9 changes: 8 additions & 1 deletion tests/e2e/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,14 @@
BatchEncoding, BatchFeature)
from transformers.models.auto.auto_factory import _BaseAutoModelClass
from vllm import LLM, SamplingParams
from vllm.config import TaskOption, _get_and_verify_dtype

from vllm_ascend.utils import vllm_version_is

if vllm_version_is("0.10.2"):
from vllm.config import TaskOption, _get_and_verify_dtype
else:
from vllm.config.model import TaskOption, _get_and_verify_dtype

from vllm.inputs import TextPrompt
from vllm.outputs import RequestOutput
from vllm.transformers_utils.utils import maybe_model_redirect
Expand Down
25 changes: 18 additions & 7 deletions vllm_ascend/sample/sampler.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import torch
import torch_npu
from vllm.config import LogprobsMode
from vllm.v1.sample.ops.topk_topp_sampler import TopKTopPSampler, random_sample
from vllm.v1.sample.sampler import Sampler

from vllm_ascend.utils import is_310p
from vllm_ascend.utils import is_310p, vllm_version_is

DEFAULT_LOGPROBS_MODE = LogprobsMode.RAW_LOGPROBS
if vllm_version_is("0.10.2"):
from vllm.config import LogprobsMode
DEFAULT_LOGPROBS_MODE = LogprobsMode.RAW_LOGPROBS
else:
DEFAULT_LOGPROBS_MODE = "raw_logprobs"


class AscendSampler(Sampler):
Expand Down Expand Up @@ -65,10 +68,18 @@ def forward_native(self, logits, generators, k, p):
"""Override pytorch native implementation to torch_npu"""
logits = self._apply_top_k_top_p(logits, k, p)
logits_to_return = None
if self.logprobs_mode == LogprobsMode.PROCESSED_LOGITS:
logits_to_return = logits
elif self.logprobs_mode == LogprobsMode.PROCESSED_LOGPROBS:
logits_to_return = logits.log_softmax(dim=-1, dtype=torch.float32)
if vllm_version_is("0.10.2"):
Copy link
Collaborator Author

@Yikun Yikun Sep 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

>>> from enum import Enum
>>> class Color(Enum):
...     RED = "red"
...     GREEN = "green"
...     BLUE = "blue"
...
>>> Color.RED
<Color.RED: 'red'>
>>> Color.RED=="red"
False
>>>

Here is the note why we need this

if self.logprobs_mode == LogprobsMode.PROCESSED_LOGITS:
logits_to_return = logits
elif self.logprobs_mode == LogprobsMode.PROCESSED_LOGPROBS:
logits_to_return = logits.log_softmax(dim=-1,
dtype=torch.float32)
else:
if self.logprobs_mode == "processed_logits":
logits_to_return = logits
elif self.logprobs_mode == "processed_logprobs":
logits_to_return = logits.log_softmax(dim=-1,
dtype=torch.float32)

probs = logits.softmax(dim=-1, dtype=torch.float32)
return random_sample(probs, generators), logits_to_return
Loading