|
6 | 6 | from transformers.configuration_utils import PretrainedConfig |
7 | 7 | from vllm.config import ModelConfig, VllmConfig |
8 | 8 | from vllm.model_executor.layers.rotary_embedding import ( |
9 | | - DeepseekScalingRotaryEmbedding, RotaryEmbedding) |
| 9 | + DeepseekScalingRotaryEmbedding, MRotaryEmbedding, RotaryEmbedding) |
10 | 10 |
|
11 | 11 | from tests.ut.base import TestBase |
12 | 12 | from vllm_ascend.ascend_forward_context import set_ascend_forward_context |
13 | 13 | from vllm_ascend.ops.rotary_embedding import _custom_rotary_embedding_enabled |
14 | 14 |
|
15 | 15 | MODEL = "Qwen3-0.6B" |
| 16 | +MODEL_VL = "Qwen/Qwen2.5-VL-3B-Instruct" |
16 | 17 | MAX_NUM_BATCHED_TOKEND = 10000 |
17 | 18 |
|
18 | 19 |
|
@@ -376,3 +377,86 @@ def test_yarn_get_mscale(self, mock_npuplatform): |
376 | 377 | expected, |
377 | 378 | places=6, |
378 | 379 | msg=f"Failed for scale={scale}, mscale={mscale}") |
| 380 | + |
| 381 | + |
| 382 | +class TestAscendMRotaryEmbedding(unittest.TestCase): |
| 383 | + |
| 384 | + def setUp(self): |
| 385 | + # Common setup for tests |
| 386 | + self.number_tokens = 3 |
| 387 | + self.num_head = 8 |
| 388 | + self.num_kvhead = 8 |
| 389 | + self.head_size = 128 |
| 390 | + self.max_position_embeddings = 128000 |
| 391 | + self.is_neox_style = True |
| 392 | + self.rope_theta = 1000000.0 |
| 393 | + self.positions_1d = torch.tensor([1, 2, 3]) |
| 394 | + self.positions_2d = torch.randint(1, 10, (3, self.number_tokens)) |
| 395 | + |
| 396 | + self.query = torch.randn( |
| 397 | + (self.number_tokens, self.num_head * self.head_size), |
| 398 | + dtype=torch.bfloat16) |
| 399 | + self.key = torch.randn( |
| 400 | + (self.number_tokens, self.num_kvhead * self.head_size), |
| 401 | + dtype=torch.bfloat16) |
| 402 | + |
| 403 | + # Qwen2.5-VL mrope section case |
| 404 | + self.mrope_section = [16, 24, 24] |
| 405 | + |
| 406 | + self.layer = MRotaryEmbedding(self.head_size, |
| 407 | + self.head_size, |
| 408 | + self.max_position_embeddings, |
| 409 | + base=self.rope_theta, |
| 410 | + is_neox_style=self.is_neox_style, |
| 411 | + dtype=torch.bfloat16, |
| 412 | + mrope_section=self.mrope_section) |
| 413 | + |
| 414 | + self.mock_config = MagicMock() |
| 415 | + self.mock_config.torchair_graph_config.enabled = False |
| 416 | + |
| 417 | + def _create_vllm_config(self): |
| 418 | + vllm_config = VllmConfig() |
| 419 | + model_config = ModelConfig(MODEL_VL, |
| 420 | + tokenizer=MODEL_VL, |
| 421 | + max_model_len=MAX_NUM_BATCHED_TOKEND) |
| 422 | + model_config.hf_config = PretrainedConfig() |
| 423 | + vllm_config.model_config = model_config |
| 424 | + return vllm_config |
| 425 | + |
| 426 | + @patch('torch_npu.npu_mrope') |
| 427 | + @patch('vllm.config.ModelConfig.__post_init__', MagicMock()) |
| 428 | + @patch('vllm.config.VllmConfig.__post_init__', MagicMock()) |
| 429 | + @patch('vllm.distributed.parallel_state._DP', MagicMock(world_size=1)) |
| 430 | + @patch('vllm.distributed.parallel_state._TP', MagicMock(world_size=1)) |
| 431 | + def test_forward_oot_1d_positions(self, mock_npu_mrope): |
| 432 | + mock_npu_mrope.return_value = (torch.zeros_like(self.query), |
| 433 | + torch.zeros_like(self.key)) |
| 434 | + |
| 435 | + vllm_config = self._create_vllm_config() |
| 436 | + with set_ascend_forward_context(None, vllm_config): |
| 437 | + result_q, result_k = self.layer.forward_oot( |
| 438 | + self.positions_1d, self.query, self.key) |
| 439 | + |
| 440 | + mock_npu_mrope.assert_called_once() |
| 441 | + self.assertFalse(torch.isnan(result_q).any().item()) |
| 442 | + self.assertFalse(torch.isnan(result_k).any().item()) |
| 443 | + self.assertEqual(result_q.shape, self.query.shape) |
| 444 | + |
| 445 | + @patch('torch_npu.npu_mrope') |
| 446 | + @patch('vllm.config.ModelConfig.__post_init__', MagicMock()) |
| 447 | + @patch('vllm.config.VllmConfig.__post_init__', MagicMock()) |
| 448 | + @patch('vllm.distributed.parallel_state._DP', MagicMock(world_size=1)) |
| 449 | + @patch('vllm.distributed.parallel_state._TP', MagicMock(world_size=1)) |
| 450 | + def test_forward_oot_2d_positions(self, mock_npu_mrope): |
| 451 | + mock_npu_mrope.return_value = (torch.zeros_like(self.query), |
| 452 | + torch.zeros_like(self.key)) |
| 453 | + |
| 454 | + vllm_config = self._create_vllm_config() |
| 455 | + with set_ascend_forward_context(None, vllm_config): |
| 456 | + result_q, result_k = self.layer.forward_oot( |
| 457 | + self.positions_2d, self.query, self.key) |
| 458 | + |
| 459 | + mock_npu_mrope.assert_called_once() |
| 460 | + self.assertFalse(torch.isnan(result_q).any().item()) |
| 461 | + self.assertFalse(torch.isnan(result_k).any().item()) |
| 462 | + self.assertEqual(result_q.shape, self.query.shape) |
0 commit comments