|
10 | 10 | from vllm_ascend.distributed import parallel_state
|
11 | 11 | from vllm_ascend.ops.linear import (AscendColumnParallelLinear,
|
12 | 12 | AscendMergedColumnParallelLinear,
|
13 |
| - AscendQKVParallelLinear, |
14 | 13 | AscendRowParallelLinear,
|
15 | 14 | AscendUnquantizedLinearMethod)
|
16 | 15 |
|
@@ -179,64 +178,5 @@ def test_merged_mlp_tp_init(self):
|
179 | 178 | self.assertEqual(linear.forward_type, "mlp_tp")
|
180 | 179 |
|
181 | 180 |
|
182 |
| -class TestAscendQKVParallelLinear(unittest.TestCase): |
183 |
| - |
184 |
| - def setUp(self): |
185 |
| - os.environ["VLLM_ASCEND_ENABLE_MLP_OPTIMIZE"] = "1" |
186 |
| - # Mock get_tensor_model_parallel_world_size function |
187 |
| - self.get_tensor_model_parallel_world_size_patch = mock.patch( |
188 |
| - 'vllm_ascend.ops.linear.get_tensor_model_parallel_world_size', |
189 |
| - return_value=2) |
190 |
| - self.get_tensor_model_parallel_world_size_mock = \ |
191 |
| - self.get_tensor_model_parallel_world_size_patch.start() |
192 |
| - # Mock divide function (assumed to be in your module) |
193 |
| - self.divide_patch = mock.patch('vllm_ascend.ops.linear.divide') |
194 |
| - self.divide_mock = self.divide_patch.start() |
195 |
| - self.divide_mock.side_effect = lambda x, y: x // y # Simulate division |
196 |
| - |
197 |
| - # Mock AscendMlpColumnParallelLinear's __init__ |
198 |
| - self.linear_init_patch = mock.patch.object( |
199 |
| - AscendMlpColumnParallelLinear, |
200 |
| - "__init__", |
201 |
| - side_effect=self.mock_linear_init) |
202 |
| - self.linear_init_patch.start() |
203 |
| - |
204 |
| - # Create mock objects |
205 |
| - self.quant_method_mock = mock.MagicMock() |
206 |
| - |
207 |
| - def mock_linear_init(self, instance, *args, **kwargs): |
208 |
| - torch.nn.Module.__init__(instance) |
209 |
| - # Set quant_method and other attributes |
210 |
| - instance.quant_method = self.quant_method_mock |
211 |
| - instance.bias = torch.nn.Parameter(torch.randn(8)) # Example bias |
212 |
| - instance.input_size = 16 |
213 |
| - instance.output_size = 8 |
214 |
| - instance.gather_output = False |
215 |
| - instance.skip_bias_add = False |
216 |
| - instance.return_bias = True |
217 |
| - |
218 |
| - def testDown(self): |
219 |
| - self.get_tensor_model_parallel_world_size_patch.stop() |
220 |
| - self.divide_patch.stop() |
221 |
| - |
222 |
| - def test_init_tpsize_larger_than_total_kv_head(self): |
223 |
| - layer = AscendQKVParallelLinear( |
224 |
| - hidden_size=16, |
225 |
| - head_size=8, |
226 |
| - total_num_heads=1, |
227 |
| - ) |
228 |
| - self.assertEqual(layer.num_kv_heads, 1) |
229 |
| - self.assertEqual(layer.num_kv_head_replicas, 2) |
230 |
| - |
231 |
| - def test_init_tpsize_smaller_than_total_kv_head(self): |
232 |
| - layer = AscendQKVParallelLinear( |
233 |
| - hidden_size=16, |
234 |
| - head_size=8, |
235 |
| - total_num_heads=4, |
236 |
| - ) |
237 |
| - self.assertEqual(layer.num_kv_heads, 2) |
238 |
| - self.assertEqual(layer.num_kv_head_replicas, 1) |
239 |
| - |
240 |
| - |
241 | 181 | if __name__ == '__main__':
|
242 | 182 | unittest.main()
|
0 commit comments