Skip to content

Commit a9bfc6a

Browse files
authored
Merge branch 'main' into softmaxfix_torch
2 parents aaab34a + 352c124 commit a9bfc6a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+1156
-312
lines changed

.pre-commit-config.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@ exclude: (^hls4ml\/templates\/(vivado|quartus)\/(ap_types|ac_types)\/|^test/pyte
22

33
repos:
44
- repo: https://github.com/psf/black
5-
rev: 24.8.0
5+
rev: 24.10.0
66
hooks:
77
- id: black
88
language_version: python3
99
args: ['--line-length=125',
1010
'--skip-string-normalization']
1111

1212
- repo: https://github.com/pre-commit/pre-commit-hooks
13-
rev: v4.6.0
13+
rev: v5.0.0
1414
hooks:
1515
- id: check-added-large-files
1616
- id: check-case-conflict
@@ -30,13 +30,13 @@ repos:
3030
args: ["--profile", "black", --line-length=125]
3131

3232
- repo: https://github.com/asottile/pyupgrade
33-
rev: v3.17.0
33+
rev: v3.18.0
3434
hooks:
3535
- id: pyupgrade
3636
args: ["--py36-plus"]
3737

3838
- repo: https://github.com/asottile/setup-cfg-fmt
39-
rev: v2.5.0
39+
rev: v2.7.0
4040
hooks:
4141
- id: setup-cfg-fmt
4242

@@ -50,7 +50,7 @@ repos:
5050
'--extend-ignore=E203,T201'] # E203 is not PEP8 compliant
5151

5252
- repo: https://github.com/mgedmin/check-manifest
53-
rev: "0.49"
53+
rev: "0.50"
5454
hooks:
5555
- id: check-manifest
5656
stages: [manual]

docs/advanced/model_optimization.rst

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ The code block below showcases three use cases of the hls4ml Optimization API -
1313
from tensorflow.keras.optimizers import Adam
1414
from tensorflow.keras.metrics import CategoricalAccuracy
1515
from tensorflow.keras.losses import CategoricalCrossentropy
16-
from hls4ml.optimization.keras import optimize_model
17-
from hls4ml.optimization.keras.utils import get_model_sparsity
18-
from hls4ml.optimization.attributes import get_attributes_from_keras_model
19-
from hls4ml.optimization.objectives import ParameterEstimator
20-
from hls4ml.optimization.scheduler import PolynomialScheduler
16+
from hls4ml.optimization.dsp_aware_pruning.keras import optimize_model
17+
from hls4ml.optimization.dsp_aware_pruning.keras.utils import get_model_sparsity
18+
from hls4ml.optimization.dsp_aware_pruning.attributes import get_attributes_from_keras_model
19+
from hls4ml.optimization.dsp_aware_pruning.objectives import ParameterEstimator
20+
from hls4ml.optimization.dsp_aware_pruning.scheduler import PolynomialScheduler
2121
# Define baseline model and load data
2222
# X_train, y_train = ...
2323
# X_val, y_val = ...
@@ -75,7 +75,7 @@ To optimize GPU FLOPs, the code is similar to above:
7575

7676
.. code-block:: Python
7777
78-
from hls4ml.optimization.objectives.gpu_objectives import GPUFLOPEstimator
78+
from hls4ml.optimization.dsp_aware_pruning.objectives.gpu_objectives import GPUFLOPEstimator
7979
8080
# Optimize model
8181
# Note the change from ParameterEstimator to GPUFLOPEstimator
@@ -98,7 +98,7 @@ Finally, optimizing Vivado DSPs is possible, given a hls4ml config:
9898
.. code-block:: Python
9999
100100
from hls4ml.utils.config import config_from_keras_model
101-
from hls4ml.optimization.objectives.vivado_objectives import VivadoDSPEstimator
101+
from hls4ml.optimization.dsp_aware_pruning.objectives.vivado_objectives import VivadoDSPEstimator
102102
103103
# Note the change from optimize_model to optimize_keras_model_for_hls4ml
104104
# The function optimize_keras_model_for_hls4ml acts as a wrapper for the function, parsing hls4ml config to model attributes
@@ -130,5 +130,5 @@ Note, to ensure DSPs are optimized, "unrolled" Dense multiplication must be used
130130
.. code-block:: Python
131131
132132
hls_config = config_from_keras_model(optimized_model)
133-
hls_config['Model']['DenseResourceImplementation'] = 'Unrolled'
134-
# Any addition hls4ml config, such as strategy, reuse factor etc...
133+
hls_config['Model']['Strategy'] = 'Unrolled'
134+
# Any addition hls4ml config, reuse factor etc...

docs/api/configuration.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,10 @@ For Vivado backend the options are:
135135
* **IOType**\ : your options are ``io_parallel`` or ``io_stream`` which defines the type of data structure used for inputs, intermediate activations between layers, and outputs. For ``io_parallel``, arrays are used that, in principle, can be fully unrolled and are typically implemented in RAMs. For ``io_stream``, HLS streams are used, which are a more efficient/scalable mechanism to represent data that are produced and consumed in a sequential manner. Typically, HLS streams are implemented with FIFOs instead of RAMs. For more information see `here <https://docs.xilinx.com/r/en-US/ug1399-vitis-hls/pragma-HLS-stream>`__.
136136
* **HLSConfig**\: the detailed configuration of precision and parallelism, including:
137137
* **ReuseFactor**\ : in the case that you are pipelining, this defines the pipeline interval or initiation interval
138-
* **Strategy**\ : Optimization strategy on FPGA, either "Latency" or "Resource". If none is supplied then hl4ml uses "Latency" as default. Note that a reuse factor larger than 1 should be specified when using "resource" strategy. An example of using larger reuse factor can be found `here. <https://github.com/fastmachinelearning/models/tree/master/keras/KERAS_dense>`__
138+
* **ParallelizationFactor**\ : The number of output "pixels" to compute in parallel in convolutional layers. Increasing this parameter results in significant increase in resources required on the FPGA.
139+
* **Strategy**\ : Optimization strategy on FPGA, either "Latency", "Resource" or "Unrolled". If none is supplied then hl4ml uses "Latency" as default. Note that a reuse factor larger than 1 should be specified when using "resource" or "unrolled" strategy. An example of using larger reuse factor can be found `here. <https://github.com/fastmachinelearning/models/tree/master/keras/KERAS_dense>`__
140+
* **PipelineStyle**\ : Set the top level pipeline style. Valid options are "auto", "pipeline" and "dataflow". If unspecified, it defaults to "auto".
141+
* **PipelineInterval**\ : Optionally override the desired initiation interval of the design. Only valid in combination with "pipeline" style. If unspecified, it is left to the compiler to decide, ideally matching the largest reuse factor of the network.
139142
* **Precision**\ : this defines the precsion of your inputs, outputs, weights and biases. It is denoted by ``ap_fixed<X,Y>``\ , where ``Y`` is the number of bits representing the signed number above the binary point (i.e. the integer part), and ``X`` is the total number of bits.
140143
Additionally, integers in fixed precision data type (\ ``ap_int<N>``\ , where ``N`` is a bit-size from 1 to 1024) can also be used. You have a chance to further configure this more finely with per-layer configuration described below.
141144

hls4ml/backends/fpga/fpga_backend.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,10 +238,12 @@ def get_closest_reuse_factor(self, valid_rf, chosen_rf):
238238
else:
239239
return before
240240

241-
def set_closest_reuse_factor(self, layer, n_in, n_out, attribute='reuse_factor'):
241+
def set_closest_reuse_factor(self, layer, n_in, n_out, attribute='reuse_factor', include_max_rf=True):
242242
assert attribute is not None, 'Reuse factor attribute cannot be None'
243243

244244
valid_rf = self.get_valid_reuse_factors(n_in, n_out)
245+
if not include_max_rf:
246+
valid_rf.pop()
245247
chosen_rf = layer.get_attr(attribute)
246248
if chosen_rf not in valid_rf:
247249
closest_rf = self.get_closest_reuse_factor(valid_rf, chosen_rf)

hls4ml/backends/vitis/passes/feature_check.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def transform(self, model, node):
1414
node.set_attr('implementation', 'linebuffer')
1515

1616

17-
class ValidateStrategy(OptimizerPass):
17+
class ValidateResourceStrategy(OptimizerPass):
1818
_resource_layer_cls = ['Conv1D', 'Conv2D', 'Dense']
1919

2020
def match(self, node):
@@ -29,6 +29,23 @@ def transform(self, model, node):
2929
if rf > n_in and rf % n_in > 0:
3030
print(
3131
f'WARNING: "Resource" strategy in "{node.name}" ({node.class_name}) may have suboptimal QoR in Vitis '
32-
'backend due to use of "urem" cores.\n'
33-
'Consider using a different ReuseFactor or switching to "Latency" strategy.'
32+
'backend due to use of "urem" cores in Vitis HLS <= 2022.1.\n'
33+
'Consider using a different ReuseFactor or switching to "Latency" strategy if using older versions '
34+
'of Vitis HLS.'
3435
)
36+
37+
38+
class ValidateResourceUnrolledStrategy(OptimizerPass):
39+
_unrolled_layer_cls = ['Conv1D', 'Conv2D', 'Dense', 'GRU', 'LSTM']
40+
41+
def match(self, node):
42+
is_unrolled_layer = len([layer_cls for layer_cls in self._unrolled_layer_cls if layer_cls in node.class_name]) > 0
43+
is_unrolled_strategy = node.get_attr('strategy', 'latency').lower() == 'resource_unrolled'
44+
45+
return is_unrolled_layer and is_unrolled_strategy
46+
47+
def transform(self, model, node):
48+
print(
49+
f'WARNING: "ResourceUnrolled" strategy in "{node.name}" ({node.class_name}) may have unexpected II in'
50+
'Vitis backend.\nVerify that the final design satisfies the latency/II constraints.'
51+
)

hls4ml/backends/vitis/vitis_backend.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ def __init__(self):
1515
def _register_flows(self):
1616
validation_passes = [
1717
'vitis:validate_conv_implementation',
18-
'vitis:validate_strategy',
18+
'vitis:validate_resource_strategy',
19+
'vitis:validate_resource_unrolled_strategy',
1920
]
2021
validation_flow = register_flow('validation', validation_passes, requires=['vivado:init_layers'], backend=self.name)
2122

hls4ml/backends/vivado/passes/convolution_templates.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
typedef {accum_t.name} accum_t;
2323
typedef {bias_t.name} bias_t;
2424
typedef {weight_t.name} weight_t;
25+
template<class data_T, class res_T, class CONFIG_T>
26+
using kernel = nnet::{dense_function}<data_T, res_T, CONFIG_T>;
2527
template<class x_T, class y_T>
2628
using product = nnet::product::{product_type}<x_T, y_T>;
2729
}};\n"""
@@ -100,6 +102,18 @@ def format(self, node):
100102
mult_params['product_type'] = get_backend('vivado').product_type(
101103
node.get_input_variable().type.precision, node.get_weights('weight').type.precision
102104
)
105+
106+
if node.get_attr('strategy').lower() == 'latency':
107+
mult_params['dense_function'] = 'DenseLatency'
108+
elif node.get_attr('strategy').lower() == 'resource':
109+
if int(mult_params['reuse_factor']) <= int(mult_params['n_in']):
110+
mult_params['dense_function'] = 'DenseResource_rf_leq_nin'
111+
else:
112+
mult_params['dense_function'] = 'DenseResource_rf_gt_nin_rem0'
113+
# The 3rd case is never used
114+
elif node.get_attr('strategy').lower() == 'resource_unrolled':
115+
mult_params['dense_function'] = f'dense_resource_unrolled_{node.index}'
116+
103117
mult_config = self.mult_template.format(**mult_params)
104118

105119
return mult_config + '\n' + conv_config
@@ -213,6 +227,18 @@ def format(self, node):
213227
mult_params['product_type'] = get_backend('vivado').product_type(
214228
node.get_input_variable().type.precision, node.get_weights('weight').type.precision
215229
)
230+
231+
if node.get_attr('strategy').lower() == 'latency':
232+
mult_params['dense_function'] = 'DenseLatency'
233+
elif node.get_attr('strategy').lower() == 'resource':
234+
if int(mult_params['reuse_factor']) <= int(mult_params['n_in']):
235+
mult_params['dense_function'] = 'DenseResource_rf_leq_nin'
236+
else:
237+
mult_params['dense_function'] = 'DenseResource_rf_gt_nin_rem0'
238+
# The 3rd case is never used
239+
elif node.get_attr('strategy').lower() == 'resource_unrolled':
240+
mult_params['dense_function'] = f'dense_resource_unrolled_{node.index}'
241+
216242
mult_config = self.mult_template.format(**mult_params)
217243

218244
return mult_config + '\n' + conv_config
@@ -297,6 +323,8 @@ def format(self, node):
297323
params['scale_index_type'] = 'scale_index_regular'
298324

299325
params['config_t'] = f'config{node.index}_depthwise_mult'
326+
# TODO - Extend unrolled Dense Resource
327+
params['unrolled_function'] = 'DenseResourceUnrolled'
300328
depthwise_config = self.depthwise_template.format(**params)
301329

302330
# Depthwise mult config
@@ -309,6 +337,9 @@ def format(self, node):
309337
mult_params['product_type'] = get_backend('vivado').product_type(
310338
node.get_input_variable().type.precision, node.get_weights('depthwise').type.precision
311339
)
340+
# TODO - Extend unrolled Dense Resource to depthwise Conv1D
341+
mult_params['unrolled_function'] = 'DenseResourceUnrolled'
342+
312343
depthwise_mult_config = self.depthwise_mult_template.format(**mult_params)
313344

314345
# Pointwise config
@@ -338,6 +369,8 @@ def format(self, node):
338369
params['scale_index_type'] = 'scale_index_regular'
339370

340371
params['config_t'] = f'config{node.index}_pointwise_mult'
372+
# TODO - Extend unrolled Dense Resource
373+
params['unrolled_function'] = 'DenseResourceUnrolled'
341374
pointwise_config = self.pointwise_template.format(**params)
342375

343376
# Pointwise mult config
@@ -350,6 +383,9 @@ def format(self, node):
350383
mult_params['product_type'] = get_backend('vivado').product_type(
351384
node.get_input_variable().type.precision, node.get_weights('pointwise').type.precision
352385
)
386+
# TODO - Extend unrolled Dense Resource to separable Conv1D
387+
mult_params['unrolled_function'] = 'DenseResourceUnrolled'
388+
353389
pointwise_mult_config = self.pointwise_mult_template.format(**mult_params)
354390

355391
return (
@@ -425,6 +461,8 @@ def format(self, node):
425461
params['scale_index_width_type'] = 'scale_index_regular'
426462

427463
params['config_t'] = f'config{node.index}_depthwise_mult'
464+
# TODO - Extend unrolled Dense Resource
465+
params['unrolled_function'] = 'DenseResourceUnrolled'
428466
depthwise_config = self.depthwise_template.format(**params)
429467

430468
# Depthwise mult config
@@ -437,6 +475,8 @@ def format(self, node):
437475
mult_params['product_type'] = get_backend('vivado').product_type(
438476
node.get_input_variable().type.precision, node.get_weights('depthwise').type.precision
439477
)
478+
# TODO - Extend unrolled Dense Resource to depthwise Conv2D
479+
mult_params['unrolled_function'] = 'DenseResourceUnrolled'
440480
depthwise_mult_config = self.depthwise_mult_template.format(**mult_params)
441481

442482
# Pointwise config
@@ -474,6 +514,8 @@ def format(self, node):
474514
else:
475515
params['scale_index_width_type'] = 'scale_index_regular'
476516
params['config_t'] = f'config{node.index}_pointwise_mult'
517+
# TODO - Extend unrolled Dense Resource
518+
params['unrolled_function'] = 'DenseResourceUnrolled'
477519
pointwise_config = self.pointwise_template.format(**params)
478520

479521
# Pointwise mult config
@@ -486,6 +528,8 @@ def format(self, node):
486528
mult_params['product_type'] = get_backend('vivado').product_type(
487529
node.get_input_variable().type.precision, node.get_weights('pointwise').type.precision
488530
)
531+
# TODO - Extend unrolled Dense Resource to separable Conv2D
532+
mult_params['unrolled_function'] = 'DenseResourceUnrolled'
489533
pointwise_mult_config = self.pointwise_mult_template.format(**mult_params)
490534

491535
return (

hls4ml/backends/vivado/passes/core_templates.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
typedef {bias_t.name} bias_t;
2020
typedef {weight_t.name} weight_t;
2121
typedef {index_t.name} index_t;
22+
template<class data_T, class res_T, class CONFIG_T>
23+
using kernel = nnet::{dense_function}<data_T, res_T, CONFIG_T>;
2224
template<class x_T, class y_T>
2325
using product = nnet::product::{product_type}<x_T, y_T>;
2426
}};\n"""
@@ -41,6 +43,17 @@ def format(self, node):
4143
node.get_input_variable().type.precision, node.get_weights('weight').type.precision
4244
)
4345

46+
if node.get_attr('strategy').lower() == 'latency':
47+
params['dense_function'] = 'DenseLatency'
48+
elif node.get_attr('strategy').lower() == 'resource':
49+
if int(params['reuse_factor']) <= int(params['n_in']):
50+
params['dense_function'] = 'DenseResource_rf_leq_nin'
51+
else:
52+
params['dense_function'] = 'DenseResource_rf_gt_nin_rem0'
53+
# The 3rd case is never used
54+
elif node.get_attr('strategy').lower() == 'resource_unrolled':
55+
params['dense_function'] = f'dense_resource_unrolled_{node.index}'
56+
4457
return self.template.format(**params)
4558

4659

0 commit comments

Comments
 (0)