Skip to content

Commit 0e14f4a

Browse files
authored
Merge branch 'vllm-project:main' into main
2 parents 95da7b0 + 53ecd89 commit 0e14f4a

File tree

18 files changed

+851
-104
lines changed

18 files changed

+851
-104
lines changed

tests/e2e/common.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ _err() { _red "Error: $*" && exit 1; }
1414

1515
CURL_TIMEOUT=1
1616
CURL_COOLDOWN=5
17-
CURL_MAX_TRIES=180
17+
CURL_MAX_TRIES=300
1818

1919
function wait_url_ready() {
2020
local serve_name="$1"

tests/e2e/doctests/001-quickstart-test.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ function quickstart_online_test() {
5757
}
5858

5959
_info "====> Start simple_test"
60-
simple_test
60+
time simple_test
6161
_info "====> Start quickstart_offline_test"
62-
quickstart_offline_test
62+
time quickstart_offline_test
6363
_info "====> Start quickstart_online_test"
64-
quickstart_online_test
64+
time quickstart_online_test

tests/e2e/doctests/002-pip-binary-installation-test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,4 +59,4 @@ function install_binary_test() {
5959
}
6060

6161
_info "====> Start install_binary_test"
62-
install_binary_test
62+
time install_binary_test

tests/e2e/models/configs/DeepSeek-V2-Lite.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@ tasks:
33
- name: "gsm8k"
44
metrics:
55
- name: "exact_match,strict-match"
6-
value: 0.375
6+
value: 0.385
77
- name: "exact_match,flexible-extract"
8-
value: 0.375
8+
value: 0.385
99
tensor_parallel_size: 2
10-
batch_size: 8
10+
batch_size: 32
1111
gpu_memory_utilization: 0.7
1212
apply_chat_template: False
1313
fewshot_as_multiturn: False

tests/e2e/run_doctests.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ set -eo errexit
2222
. $(dirname "$0")/common.sh
2323

2424
export VLLM_USE_MODELSCOPE=true
25-
export VLLM_LOGGING_LEVEL=ERROR
2625

2726
_info "====> Start Quickstart test"
2827
. "${SCRIPT_DIR}/doctests/001-quickstart-test.sh"

tests/ut/attention/test_attention_v1.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,41 @@ def test_forward_decode_only_swa(self, mock_fused_infer_attention_score,
435435
mock_fused_infer_attention_score.assert_called_once()
436436
assert output.shape == (10, 8 * 64)
437437

438+
@patch('torch_npu._npu_reshape_and_cache')
439+
@patch('torch_npu._npu_paged_attention')
440+
@patch('torch_npu.npu_fused_infer_attention_score')
441+
def test_forward_decode_only_swa_seq_len_mismatch(
442+
self, mock_fused_infer_attention_score, mock_paged_attention,
443+
mock_npu_reshape_and_cache):
444+
"""Test forward pass in DecodeOnly state when seq)len_mismatch"""
445+
query = torch.randn(10, 8 * 64)
446+
key = torch.randn(10, 8 * 64)
447+
value = torch.randn(10, 8 * 64)
448+
kv_cache = torch.empty(2, 5, 128, 8, 64)
449+
450+
metadata = self.attn_metadata
451+
metadata.attn_state = AscendAttentionState.DecodeOnly
452+
metadata.seq_lens = torch.tensor([10]) # len == 1 != query.size(0)==10
453+
metadata.block_tables = torch.zeros(1, 5, dtype=torch.long)
454+
metadata.num_actual_tokens = 10
455+
metadata.slot_mapping = torch.zeros(10, dtype=torch.long)
456+
457+
mock_fused_infer_attention_score.return_value = (torch.ones(10, 8,
458+
64), 1)
459+
460+
output = self.impl_swa.forward(self.layer_no_quant,
461+
query,
462+
key,
463+
value,
464+
kv_cache,
465+
metadata,
466+
trace_flag=False)
467+
468+
mock_paged_attention.assert_called_once()
469+
mock_fused_infer_attention_score.assert_not_called()
470+
471+
assert output.shape == (10, 8 * 64)
472+
438473
@patch('vllm_ascend.attention.attention_v1.is_310p', return_value=False)
439474
@patch('torch_npu._npu_reshape_and_cache')
440475
@patch('vllm_ascend.attention.attention_v1.vanilla_chunked_prefill')

0 commit comments

Comments
 (0)