|
32 | 32 |
|
33 | 33 | class vLLMQuantizationModifierPyTorch(vLLMQuantizationModifier):
|
34 | 34 | """
|
35 |
| - Pytorch-specific implementation of quantization modifier |
36 |
| -
|
37 |
| - :param scheme: Default QuantizationScheme to use when enabling quantization |
38 |
| - in a module. May also be a dictionary to be loaded into the QuantizationScheme |
39 |
| - class. A string alias may also be used, supported aliases: |
40 |
| - ['default', 'deepsparse', 'tensorrt']. |
41 |
| - If None, the default scheme (`QuantizationScheme()`) will be used. |
42 |
| - Default is None |
43 |
| - :param scheme_overrides: optional mapping of module type names or submodule type |
44 |
| - names to quantization schemes to override them with. If a scheme is mapped to |
45 |
| - 'default', then it will use the scheme set in the mo difier scheme property |
| 35 | + PyTorch specific implementation of vLLMQuantizationModifier |
| 36 | +
|
| 37 | + Enables post training quantization (PTQ) and quantization aware training (QAT) for a |
| 38 | + given module or its submodules. After calibration (PTQ) or the start epoch (QAT), |
| 39 | + the specified module(s) forward pass will emulate quantized execution and the |
| 40 | + modifier will be enabled until training is completed. |
| 41 | +
|
| 42 | + :param config_groups: dictionary specifying quantization schemes to apply to target |
| 43 | + modules. Modules not matching a scheme target will NOT be quantized. |
| 44 | + :param ignore: optional list of module class names or submodule names to not |
| 45 | + quantize even if they match a target in config_groups. Defaults to empty list. |
| 46 | + :param disable_quantization_observer_epoch: Epoch to disable updates to the module |
| 47 | + quantization observers. At this point, quantized weights and zero points will |
| 48 | + not be updated. Leave None to not disable observers during QAT. Default is None |
| 49 | + :param num_calibration_steps: Number of steps to run post training calibration for. |
| 50 | + When None, the entire calibration_dataloader is used |
46 | 51 | """
|
47 | 52 |
|
48 | 53 | calibration_dataloader_: Any = None
|
|
0 commit comments