fastmachinelearning
diff --git a/‎.pre-commit-config.yaml
Lines changed: 4 additions & 1 deletion b/‎.pre-commit-config.yaml
Lines changed: 4 additions & 1 deletion
diff --git a/‎Jenkinsfile
Lines changed: 1 addition & 1 deletion b/‎Jenkinsfile
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md
Lines changed: 1 addition & 1 deletion b/‎README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/advanced/profiling.rst
Lines changed: 2 additions & 2 deletions b/‎docs/advanced/profiling.rst
Lines changed: 2 additions & 2 deletions
diff --git a/‎hls4ml/backends/fpga/fpga_backend.py
Lines changed: 19 additions & 14 deletions b/‎hls4ml/backends/fpga/fpga_backend.py
Lines changed: 19 additions & 14 deletions
diff --git a/‎hls4ml/backends/fpga/passes/fix_softmax_table_size.py
Lines changed: 5 additions & 1 deletion b/‎hls4ml/backends/fpga/passes/fix_softmax_table_size.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎hls4ml/backends/fpga/passes/hgq_proxy_model.py
Lines changed: 0 additions & 5 deletions b/‎hls4ml/backends/fpga/passes/hgq_proxy_model.py
Lines changed: 0 additions & 5 deletions
diff --git a/‎hls4ml/backends/vivado/passes/convolution_templates.py
Lines changed: 36 additions & 0 deletions b/‎hls4ml/backends/vivado/passes/convolution_templates.py
Lines changed: 36 additions & 0 deletions
diff --git a/‎hls4ml/backends/vivado/passes/core_templates.py
Lines changed: 82 additions & 2 deletions b/‎hls4ml/backends/vivado/passes/core_templates.py
Lines changed: 82 additions & 2 deletions
@@ -47,7 +47,10 @@ repos:
     exclude: docs/conf.py
     additional_dependencies: [flake8-bugbear, flake8-print]
     args: ['--max-line-length=125',  # github viewer width
-           '--extend-ignore=E203,T201']  # E203 is not PEP8 compliant
+           '--extend-ignore=E203,T201',  # E203 is not PEP8 compliant
+           '--per-file-ignores=hls4ml/model/optimizer/passes/bit_exact.py:E741',
+           # i for #int w/o sign, I for #int w/ sign when massively processing bw conversions
+    ]
 
 - repo: https://github.com/mgedmin/check-manifest
   rev: "0.50"
 
@@ -16,7 +16,7 @@ pipeline {
           sh '''#!/bin/bash --login
               conda activate hls4ml-py310
               conda install -y jupyterhub pydot graphviz pytest pytest-cov
-              pip install pytest-randomly jupyter onnx>=1.4.0 matplotlib pandas seaborn pydigitalwavetools==1.1 pyyaml tensorflow==2.14 qonnx torch git+https://github.com/jmitrevs/qkeras.git@qrecurrent_unstack pyparsing
+              pip install pytest-randomly jupyter onnx>=1.4.0 matplotlib pandas seaborn pydigitalwavetools==1.1 pyyaml tensorflow==2.14 qonnx torch git+https://github.com/jmitrevs/qkeras.git@qrecurrent_unstack pyparsing quantizers da4ml
               pip install -U ../ --user
               ./convert-keras-models.sh -x -f keras-models.txt
               pip uninstall hls4ml -y'''
 
@@ -45,7 +45,7 @@ config = hls4ml.utils.fetch_example_model('KERAS_3layer.json')
 print(config)
 
 # Convert it to a hls project
-hls_model = hls4ml.converters.keras_to_hls(config)
+hls_model = hls4ml.converters.keras_v2_to_hls(config)
 
 # Print full list of example models if you want to explore more
 hls4ml.utils.fetch_example_list()
 
@@ -14,7 +14,7 @@ You will need to initialise these objects by using a trained model, loading a mo
 .. code-block:: python
 
    from hls4ml.model.profiling import numerical
-   from hls4ml.converters import keras_to_hls
+   from hls4ml.converters import keras_v2_to_hls
    import matplotlib.pyplot as plt
    import yaml
 
@@ -27,7 +27,7 @@ You will need to initialise these objects by using a trained model, loading a mo
    with open("keras-config.yml", 'r') as ymlfile:
        config = yaml.load(ymlfile)
 
-   hls_model = keras_to_hls(config)
+   hls_model = keras_v2_to_hls(config)
 
    # produce 4 plots
    plots = numerical(model=model, hls_model = hls_model, X=X)
 
@@ -7,7 +7,7 @@
 import numpy as np
 
 from hls4ml.backends.backend import Backend
-from hls4ml.model.attributes import ChoiceAttribute, ConfigurableAttribute, TypeAttribute
+from hls4ml.model.attributes import Attribute, ChoiceAttribute, ConfigurableAttribute, TypeAttribute
 from hls4ml.model.layers import (
     GRU,
     LSTM,
@@ -109,32 +109,37 @@ def __init__(self, name):
         act_attrs.append(TypeAttribute('table', default=FixedPrecisionType(18, 8), description=descriptions.table_type))
         self.attribute_map[Activation] = act_attrs
 
-        softmax_attrs = self.attribute_map.get(Softmax, [])
-        softmax_attrs.append(
+        softmax_attrs = [
+            Attribute('n_in'),
+            Attribute('activation', value_type=str),
+            Attribute('n_outer', value_type=int, default=1),
+            Attribute('n_inner', value_type=int, default=1),
             ChoiceAttribute(
                 'implementation',
                 ['latency', 'stable', 'argmax', 'legacy'],
                 default='stable',
                 description=descriptions.softmax_implementation,
-            )
-        )
-        softmax_attrs.append(
-            ConfigurableAttribute('skip', value_type=bool, default=False, description=descriptions.softmax_skip)
-        )
-        softmax_attrs.append(
+            ),
+            ConfigurableAttribute('skip', value_type=bool, default=False, description=descriptions.softmax_skip),
             TypeAttribute(
                 'exp_table',
                 default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT),
                 description=descriptions.table_type,
-            )
-        )
-        softmax_attrs.append(
+            ),
             TypeAttribute(
                 'inv_table',
                 default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT),
                 description=descriptions.table_type,
-            )
-        )
+            ),
+            TypeAttribute(
+                'inv_inp',
+                default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT),
+            ),
+            TypeAttribute(
+                'accum',
+                default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT),
+            ),
+        ]
         self.attribute_map[Softmax] = softmax_attrs
 
     def create_layer_class(self, layer_class):
 
@@ -6,7 +6,11 @@
 
 class FixSoftmaxTableSize(OptimizerPass):
     def match(self, node):
-        return isinstance(node, Softmax)
+        if not isinstance(node, Softmax):
+            return False
+        if 'inv_table_size' in node.attributes:
+            return False  # handler generating inv_table_size sets it properly
+        return True
 
     def transform(self, model, node: Layer):
         inp_layer = node.get_input_node()  # type: ignore
 
@@ -52,10 +52,6 @@ def match(self, node: Layer):
         return isinstance(node, FixedPointQuantizer)
 
     def transform(self, model, node: FixedPointQuantizer):
-        if node.fusible:
-            model.remove_node(node)
-            return True
-
         if model.config.config['IOType'] != 'io_parallel':
             raise NotImplementedError('Heterogenous quantization for activations is only supported with IOType=io_parallel')
 
@@ -96,7 +92,6 @@ def __init__(self):
 
     def format(self, node):
         params = self._default_function_params(node)
-        node.attributes['result_t'].precision = node.attributes['table_t'].precision
         params['config'] = f'unary_lut_config{node.index}'
         params['table'] = node.get_weights('table').name
 
 
@@ -154,11 +154,21 @@ def format(self, node):
                         mult_params['dense_function'] = 'nnet::DenseResource_rf_gt_nin'
         elif node.get_attr('strategy').lower() == 'resource_unrolled':
             mult_params['dense_function'] = f'{namespace}::dense_resource_unrolled_{node.index}'
+        elif node.get_attr('strategy').lower() == 'distributed_arithmetic':
+            mult_params['dense_function'] = f'{namespace}::dense_da_wrapper_{node.index}'
 
         mult_config = self.mult_template.format(**mult_params)
 
         return mult_config + '\n' + conv_config
 
+    def match(self, node):
+        if node.get_attr('strategy') == 'distributed_arithmetic':
+            io_type = node.model.config.get_config_value("IOType")
+            if io_type == 'io_parallel':
+                # DA impl use alternate entry point for io_parallel conv
+                return False
+        return super().match(node)
+
 
 class Conv1DFunctionTemplate(FunctionCallTemplate):
     def __init__(self):
@@ -173,6 +183,14 @@ def format(self, node):
 
         return self.template.format(**params)
 
+    def match(self, node):
+        if node.get_attr('strategy') == 'distributed_arithmetic':
+            io_type = node.model.config.get_config_value("IOType")
+            if io_type == 'io_parallel':
+                # DA impl use alternate entry point for io_parallel conv
+                return False
+        return super().match(node)
+
 
 class DepthwiseConv1DFunctionTemplate(Conv1DFunctionTemplate):
     def __init__(self):
@@ -299,11 +317,21 @@ def format(self, node):
                         mult_params['dense_function'] = 'nnet::DenseResource_rf_gt_nin'
         elif node.get_attr('strategy').lower() == 'resource_unrolled':
             mult_params['dense_function'] = f'{namespace}::dense_resource_unrolled_{node.index}'
+        elif node.get_attr('strategy').lower() == 'distributed_arithmetic':
+            mult_params['dense_function'] = f'{namespace}::dense_da_wrapper_{node.index}'
 
         mult_config = self.mult_template.format(**mult_params)
 
         return mult_config + '\n' + conv_config
 
+    def match(self, node):
+        if node.get_attr('strategy') == 'distributed_arithmetic':
+            io_type = node.model.config.get_config_value("IOType")
+            if io_type == 'io_parallel':
+                # DA impl use alternate entry point for io_parallel conv
+                return False
+        return super().match(node)
+
 
 class Conv2DFunctionTemplate(FunctionCallTemplate):
     def __init__(self):
@@ -318,6 +346,14 @@ def format(self, node):
 
         return self.template.format(**params)
 
+    def match(self, node):
+        if node.get_attr('strategy') == 'distributed_arithmetic':
+            io_type = node.model.config.get_config_value("IOType")
+            if io_type == 'io_parallel':
+                # DA impl use alternate entry point for io_parallel conv
+                return False
+        return super().match(node)
+
 
 class DepthwiseConv2DFunctionTemplate(Conv2DFunctionTemplate):
     def __init__(self):
 
@@ -1,3 +1,5 @@
+from math import ceil, log2
+
 from hls4ml.backends.backend import get_backend
 from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate
 from hls4ml.model.layers import Activation, BatchNormalization, Dense, HardActivation, ParametrizedActivation, PReLU, Softmax
@@ -55,9 +57,17 @@ def format(self, node):
             # The 3rd case is never used
         elif node.get_attr('strategy').lower() == 'resource_unrolled':
             params['dense_function'] = f'{namespace}::dense_resource_unrolled_{node.index}'
+        elif node.get_attr('strategy').lower() == 'distributed_arithmetic':
+            # Only triggered in io_streaming mode
+            params['dense_function'] = f'{namespace}::dense_da_wrapper_{node.index}'
 
         return self.template.format(**params)
 
+    def match(self, node):
+        if node.get_attr('strategy') == 'distributed_arithmetic':
+            return False  # DA does not use common dense template
+        return super().match(node)
+
 
 class DenseFunctionTemplate(FunctionCallTemplate):
     def __init__(self):
@@ -71,6 +81,11 @@ def format(self, node):
 
         return self.template.format(**params)
 
+    def match(self, node):
+        if node.get_attr('strategy') == 'distributed_arithmetic':
+            return False  # DA does not use common dense template
+        return super().match(node)
+
 
 # BatchNormalization templates
 
@@ -152,13 +167,22 @@ def format(self, node):
 
 softmax_config_template = """struct {type}_config{index} : nnet::activ_config {{
     static const unsigned n_in = {n_in};
-    static const unsigned table_size = {table_size};
+    static const unsigned n_slice = {n_slice};
+    static const unsigned n_outer = {n_outer};
+    static const unsigned n_inner = {n_inner};
+    static const unsigned parallelization_factor = {parallelization_factor};
+    static const unsigned exp_table_size = {exp_table_size};
+    static const unsigned inv_table_size = {inv_table_size};
     static const unsigned io_type = nnet::{iotype};
     static const unsigned reuse_factor = {reuse};
     static const unsigned axis = {axis};
     static const nnet::softmax_implementation implementation = nnet::softmax_implementation::{implementation};
+    static constexpr float exp_scale = {exp_scale};
     typedef {exp_table_t.name} exp_table_t;
     typedef {inv_table_t.name} inv_table_t;
+    typedef {accum_t.name} accum_t;
+    typedef {inv_inp_t.name} inv_inp_t;
+    typedef {inp_norm_t_str} inp_norm_t;
 }};\n"""
 
 activ_function_template = 'nnet::{activation}<{input_t}, {output_t}, {config}>({input}, {output});'
@@ -210,10 +234,66 @@ def __init__(self):
         super(ActivationConfigTemplate, self).__init__(Softmax)  # Skip ActivationConfigTemplate's __init__
         self.template = softmax_config_template
 
+    def format(self, node):
+        params = self._default_config_params(node)
+        params['type'] = node.get_attr('activation')
+        params.setdefault('exp_table_size', params['table_size'])
+        params.setdefault('inv_table_size', params['table_size'])
+        params.setdefault('n_inner', 1)
+        params.setdefault('n_outer', 1)
+        params.setdefault('exp_scale', 1.0)
+        params.setdefault('parallelization_factor', -1)
+
+        n_slice = params['n_in'] // params['n_inner'] // params['n_outer']  # type: ignore
+        params['n_slice'] = n_slice
+
+        if params['accum_t'].name == 'model_default_t':  # type: ignore
+            scale = ceil(log2(n_slice))
+            exp_table_t = node.attributes['exp_table_t'].precision
+            signed, width, integers = exp_table_t.signed, exp_table_t.width, exp_table_t.integer
+            params['accum_t_str'] = f'ap_{"" if signed else "u"}fixed<{width + scale}, {integers + scale}>'
+        else:
+            params['accum_t_str'] = params['accum_t'].name  # type: ignore
+        if params['inv_inp_t'].name == 'model_default_t':  # type: ignore
+            params['inv_inp_t'] = params['exp_table_t']
+
+        if params['implementation'] == 'stable':
+            if 'inp_norm_t' not in params:
+                # Only used in stable (max-normalized) implementation
+                input_t = node.get_input_variable().type.precision
+                width, iwidth, signed = input_t.width, input_t.integer, input_t.signed  # noqa: F841
+                width, iwidth = width - signed, iwidth - signed
+                if signed:
+                    # Fix table size if too large
+                    exp_table_size = params['inv_table_size']
+                    params['exp_table_size'] = str(min(int(exp_table_size), 2**width))
+                params['inp_norm_t_str'] = f'ap_ufixed<{width}, {iwidth}>'
+            else:
+                params['inp_norm_t_str'] = params['inp_norm_t'].name  # type: ignore
+        else:
+            params['inp_norm_t_str'] = 'ap_fixed<1,0>'
+
+        return self.template.format(**params)
+
+
+class SoftmaxFunctionTemplate(FunctionCallTemplate):
+    def __init__(self):
+        super().__init__(Softmax, include_header=activ_include_list)
+        self.template = activ_function_template
+
+    def format(self, node):
+        params = self._default_function_params(node)
+        use_multidim = node.get_attr('n_inner', 1) > 1 or node.get_attr('n_outer', 1) > 1
+        use_multidim = use_multidim and node.model.config.get_config_value('IOType') == 'io_parallel'
+        params['activation'] = 'softmax' if not use_multidim else 'softmax_multidim'
+        params['config'] = f'softmax_config{node.index}'
+
+        return self.template.format(**params)
+
 
 class ActivationFunctionTemplate(FunctionCallTemplate):
     def __init__(self):
-        super().__init__((Activation, HardActivation, Softmax), include_header=activ_include_list)
+        super().__init__((Activation, HardActivation), include_header=activ_include_list)
         self.template = activ_function_template
 
     def format(self, node):