From 1ddf8a3f15eccacd2e10fbeb3efd347e7ac97c43 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Thu, 20 Feb 2025 22:15:36 -0600 Subject: [PATCH 1/2] remove old variables when moving of scales --- hls4ml/model/optimizer/passes/move_scales.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/hls4ml/model/optimizer/passes/move_scales.py b/hls4ml/model/optimizer/passes/move_scales.py index 8fba1ec405..03bb0f3b77 100644 --- a/hls4ml/model/optimizer/passes/move_scales.py +++ b/hls4ml/model/optimizer/passes/move_scales.py @@ -12,6 +12,9 @@ from hls4ml.model.layers import ApplyAlpha, Constant, Conv, MatMul, Merge from hls4ml.model.optimizer import OptimizerPass +# These attributes should not be copied. (Should add the output name to this) +_attrs_not_to_copy = ['trace', 'precision', 'scale', 'bias', 'scale_data', 'bias_data'] + class ScaleDownMatMul(OptimizerPass): '''Shift an ApplyAlpha below a MatMul''' @@ -62,7 +65,7 @@ def transform(self, model, node): output = node.get_output_variable() # to remove warning, since these get set again - new_attrs = {k: v for k, v in apply_alpha.attributes.items() if k not in ('trace', 'precision')} + new_attrs = {k: v for k, v in apply_alpha.attributes.items() if k not in _attrs_not_to_copy + apply_alpha.outputs} can_propagate = False if not bias.shape and bias == 0: @@ -258,7 +261,7 @@ def transform(self, model, node): return False # to remove warning, since these get set again - new_attrs = {k: v for k, v in in0.attributes.items() if k not in ('trace', 'precision')} + new_attrs = {k: v for k, v in in0.attributes.items() if k not in _attrs_not_to_copy + in0.outputs} new_name = in0.name model.remove_node(in0) @@ -305,7 +308,7 @@ def transform(self, model, node): return False # to remove warning, since these get set again - new_attrs = {k: v for k, v in in0.attributes.items() if k not in ('trace', 'precision')} + new_attrs = {k: v for k, v in in0.attributes.items() if k not in _attrs_not_to_copy + in0.outputs} new_name = in1.name model.remove_node(in1) @@ -329,7 +332,7 @@ def transform(self, model, node): return False # to remove warning, since these get set again - new_attrs = {k: v for k, v in in2.attributes.items() if k not in ('trace', 'precision')} + new_attrs = {k: v for k, v in in2.attributes.items() if k not in _attrs_not_to_copy + in2.outputs} new_name = in2.name model.remove_node(in2) @@ -391,7 +394,7 @@ def transform(self, model, node): return False # to remove warning, since these get set again - new_attrs = {k: v for k, v in in0.attributes.items() if k not in ('trace', 'precision')} + new_attrs = {k: v for k, v in in0.attributes.items() if k not in _attrs_not_to_copy + in0.outputs} new_name = in1.name model.remove_node(in0) model.remove_node(in1) @@ -415,7 +418,7 @@ def transform(self, model, node): return False # to remove warning, since these get set again - new_attrs = {k: v for k, v in in0.attributes.items() if k not in ('trace', 'precision')} + new_attrs = {k: v for k, v in in0.attributes.items() if k not in _attrs_not_to_copy + in0.outputs} new_name = in0.name model.remove_node(in0) model.remove_node(in2) @@ -442,7 +445,7 @@ def transform(self, model, node): return False # to remove warning, since these get set again - new_attrs = {k: v for k, v in in1.attributes.items() if k not in ('trace', 'precision')} + new_attrs = {k: v for k, v in in1.attributes.items() if k not in _attrs_not_to_copy + in1.outputs} new_name = in1.name model.remove_node(in1) model.remove_node(in2) @@ -478,7 +481,7 @@ def transform(self, model, node): return False # to remove warning, since these get set again - new_attrs = {k: v for k, v in in0.attributes.items() if k not in ('trace', 'precision')} + new_attrs = {k: v for k, v in in0.attributes.items() if k not in _attrs_not_to_copy + in0.outputs} new_name = in0.name model.remove_node(in0) model.remove_node(in1) From 2a7efcb92c1c1de532ec0f83379d01f7da0eb68a Mon Sep 17 00:00:00 2001 From: Jan-Frederik Schulte Date: Mon, 24 Feb 2025 17:08:11 -0500 Subject: [PATCH 2/2] 'fix' precision for non-power-of-2 models in brevitas-QONNX workflow and add pytests for this workflow --- hls4ml/model/optimizer/passes/quant_opt.py | 8 +++ test/pytest/test_qonnx.py | 78 ++++++++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py index 04d5393748..a99cd87a8f 100644 --- a/hls4ml/model/optimizer/passes/quant_opt.py +++ b/hls4ml/model/optimizer/passes/quant_opt.py @@ -343,8 +343,16 @@ def transform(self, model, node): rescale = scale rebias = -bias * scale + + # precision of the scale is important for overall model accuracy, so it is increased here + # This is somewhat stupid and needs a better solution + frac_bits = node.get_attr('bitwidth') * 2 + scale_precision, scale_quantizer = _calculate_precision_quantizer(frac_bits, 0, signed, narrow, rounding_mode) + attributes_rescale['scale_data'] = np.broadcast_to(rescale, inshape) attributes_rescale['bias_data'] = np.broadcast_to(rebias, inshape) + attributes_rescale['scale_quantizer'] = scale_quantizer + attributes_rescale['scale_precision'] = scale_precision rescale_node = model.make_node( ApplyAlpha, rescale_name, attributes_rescale, [x for x in node.inputs], [x for x in node.outputs] diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py index f48f268626..7c8e81e0f5 100644 --- a/test/pytest/test_qonnx.py +++ b/test/pytest/test_qonnx.py @@ -2,16 +2,27 @@ import urllib from pathlib import Path +# To test workflow from brevitas +import brevitas.nn as qnn import numpy as np import pytest import qonnx.core.onnx_exec as oxe import qonnx.util.cleanup import qonnx.util.to_channels_last +import torch +from brevitas.export import export_qonnx +from brevitas.quant import ( + Int8ActPerTensorFixedPoint, + Int8ActPerTensorFloat, + Int8WeightPerTensorFixedPoint, + Int8WeightPerTensorFloat, +) # To conveniently run QONNX inference from qonnx.core.modelwrapper import ModelWrapper from qonnx.transformation.channels_last import ConvertToChannelsLastAndClean from qonnx.transformation.gemm_to_matmul import GemmToMatMul +from torch.nn import Module import hls4ml @@ -432,3 +443,70 @@ def test_simple_model(model_name, io_type, backend, request): y_hls4ml = hls_model.predict(X) np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1) + + +# Test brevitas -> QONNX -> hls4ml workflow +quants = { + 'Int8WeightPerTensorFloat': Int8WeightPerTensorFloat, + 'Int8WeightPerTensorFixedPoint': Int8WeightPerTensorFixedPoint, + 'Int8ActPerTensorFloat': Int8ActPerTensorFloat, + 'Int8ActPerTensorFixedPoint': Int8ActPerTensorFixedPoint, +} + + +class QuantModelLinear(Module): + def __init__(self, weight_quant, act_quant): + super().__init__() + self.lin1 = qnn.QuantLinear(4, 4, bias=True, weight_quant=quants[weight_quant], input_quant=quants[act_quant]) + self.relu1 = qnn.QuantReLU(act_quant=quants[act_quant]) + + def forward(self, x): + out = self.relu1(self.lin1(x)) + return out + + +backend = 'Vivado' +io_type = 'io_parallel' + + +# FixedPoint will give power-of-2 quantization scales, Float non-power-of-2 +@pytest.mark.parametrize('backend', ['Vitis']) +@pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream']) +@pytest.mark.parametrize('quant_type', ['Float', 'FixedPoint']) +def test_brevitas_workflow(backend, io_type, quant_type): + + weight_quant = f'Int8WeightPerTensor{quant_type}' + act_quant = f'Int8ActPerTensor{quant_type}' + + model = QuantModelLinear(weight_quant, act_quant) + + x = torch.rand(1, 4) + + output_path = 'brevitas_onnx.onnx' + _ = export_qonnx(model, input_t=x, export_path=output_path) + + modelQONNX = ModelWrapper('brevitas_onnx.onnx') + modelQONNX = qonnx.util.cleanup.cleanup_model(modelQONNX) + modelQONNX = modelQONNX.transform(ConvertToChannelsLastAndClean()) + modelQONNX = modelQONNX.transform(GemmToMatMul()) + modelQONNX = qonnx.util.cleanup.cleanup_model(modelQONNX) + + pytorch_prediction = model(x).detach().numpy() + + configQONNX = hls4ml.utils.config.config_from_onnx_model( + modelQONNX, granularity='name', backend=backend, default_precision='fixed<16,6>' + ) + # modify the config as desired + hls_modelQONNX = hls4ml.converters.convert_from_onnx_model( + modelQONNX, + output_dir=str(test_root_path / f'hls4mlprj_onnx_brevitas_{quant_type.lower()}_{io_type}_{backend}'), + io_type=io_type, + backend=backend, + hls_config=configQONNX, + ) + print(hls_modelQONNX.output_vars) + hls_modelQONNX.compile() + + hls_predictionQONNX = np.reshape(hls_modelQONNX.predict(x.detach().numpy()), pytorch_prediction.shape) + + np.testing.assert_allclose(pytorch_prediction, hls_predictionQONNX, rtol=0.0, atol=0.05)