We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 53ecd89 commit 846e596Copy full SHA for 846e596
vllm_ascend/ops/linear_op.py
@@ -385,7 +385,9 @@ def apply(
385
bias_ = None if (self.tp_rank > 0 or self.skip_bias_add) else self.bias
386
387
if self.tp_size == 1 or not self.reduce_results:
388
- output = self.quant_method.apply(self, input_parallel, bias=bias_)
+ output = self.quant_method.apply(self.layer,
389
+ input_parallel,
390
+ bias=bias_)
391
else:
392
output_parallel = self.quant_method.apply(self.layer,
393
input_parallel,
0 commit comments