diff --git a/.github/workflows/format_pr_body.yaml b/.github/workflows/format_pr_body.yaml index 407ce228ab..8b0661a9bf 100644 --- a/.github/workflows/format_pr_body.yaml +++ b/.github/workflows/format_pr_body.yaml @@ -36,7 +36,7 @@ jobs: - name: Get vLLM version run: | - VLLM_COMMIT=6d8246aaffff3ebec84767e373212a7b8da328e2 + VLLM_COMMIT=c60e6137f0bf2034853919b3a9d705d7e06b93cf echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV - name: Checkout repository diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index 7ffff02bea..c406907ebd 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -82,7 +82,7 @@ jobs: VLLM_USE_MODELSCOPE: True strategy: matrix: - vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2] + vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] steps: - name: Install packages run: | @@ -140,7 +140,7 @@ jobs: max-parallel: 2 matrix: os: [linux-aarch64-a2-1] - vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2] + vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] name: singlecard e2e test - light runs-on: ${{ matrix.os }} container: @@ -206,7 +206,7 @@ jobs: max-parallel: 2 matrix: os: [linux-aarch64-a2-2] - vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2] + vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] name: multicard e2e test - light runs-on: ${{ matrix.os }} container: diff --git a/.github/workflows/vllm_ascend_test_full.yaml b/.github/workflows/vllm_ascend_test_full.yaml index ab9992f43d..0c389a58a1 100644 --- a/.github/workflows/vllm_ascend_test_full.yaml +++ b/.github/workflows/vllm_ascend_test_full.yaml @@ -72,7 +72,7 @@ jobs: max-parallel: 2 matrix: os: [linux-aarch64-a2-1] - vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2] + vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] name: singlecard e2e test - full runs-on: ${{ matrix.os }} container: @@ -156,7 +156,7 @@ jobs: max-parallel: 2 matrix: os: [linux-aarch64-a2-2] - vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2] + vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] name: multicard e2e test - full runs-on: ${{ matrix.os }} container: diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py index 430153ae9d..d0f1b762f9 100644 --- a/tests/e2e/conftest.py +++ b/tests/e2e/conftest.py @@ -32,7 +32,14 @@ BatchEncoding, BatchFeature) from transformers.models.auto.auto_factory import _BaseAutoModelClass from vllm import LLM, SamplingParams -from vllm.config import TaskOption, _get_and_verify_dtype + +from vllm_ascend.utils import vllm_version_is + +if vllm_version_is("0.10.2"): + from vllm.config import TaskOption, _get_and_verify_dtype +else: + from vllm.config.model import TaskOption, _get_and_verify_dtype + from vllm.inputs import TextPrompt from vllm.outputs import RequestOutput from vllm.transformers_utils.utils import maybe_model_redirect diff --git a/vllm_ascend/sample/sampler.py b/vllm_ascend/sample/sampler.py index e009e4cd56..6a5c130ffe 100644 --- a/vllm_ascend/sample/sampler.py +++ b/vllm_ascend/sample/sampler.py @@ -1,12 +1,15 @@ import torch import torch_npu -from vllm.config import LogprobsMode from vllm.v1.sample.ops.topk_topp_sampler import TopKTopPSampler, random_sample from vllm.v1.sample.sampler import Sampler -from vllm_ascend.utils import is_310p +from vllm_ascend.utils import is_310p, vllm_version_is -DEFAULT_LOGPROBS_MODE = LogprobsMode.RAW_LOGPROBS +if vllm_version_is("0.10.2"): + from vllm.config import LogprobsMode + DEFAULT_LOGPROBS_MODE = LogprobsMode.RAW_LOGPROBS +else: + DEFAULT_LOGPROBS_MODE = "raw_logprobs" class AscendSampler(Sampler): @@ -65,10 +68,18 @@ def forward_native(self, logits, generators, k, p): """Override pytorch native implementation to torch_npu""" logits = self._apply_top_k_top_p(logits, k, p) logits_to_return = None - if self.logprobs_mode == LogprobsMode.PROCESSED_LOGITS: - logits_to_return = logits - elif self.logprobs_mode == LogprobsMode.PROCESSED_LOGPROBS: - logits_to_return = logits.log_softmax(dim=-1, dtype=torch.float32) + if vllm_version_is("0.10.2"): + if self.logprobs_mode == LogprobsMode.PROCESSED_LOGITS: + logits_to_return = logits + elif self.logprobs_mode == LogprobsMode.PROCESSED_LOGPROBS: + logits_to_return = logits.log_softmax(dim=-1, + dtype=torch.float32) + else: + if self.logprobs_mode == "processed_logits": + logits_to_return = logits + elif self.logprobs_mode == "processed_logprobs": + logits_to_return = logits.log_softmax(dim=-1, + dtype=torch.float32) probs = logits.softmax(dim=-1, dtype=torch.float32) return random_sample(probs, generators), logits_to_return