Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
b92fd74
laying the foundation for oneapi accelerator
jmitrevs Aug 7, 2025
4c40eee
Merge remote-tracking branch 'upstream/main' into oneAPI-acc
jmitrevs Aug 7, 2025
ad6adfa
snapshot adding files
jmitrevs Aug 7, 2025
0ae77bb
some steps towards oneapi accelerator
jmitrevs Aug 8, 2025
f5fcc0a
update some of the oneapi accelerator backend setup
jmitrevs Aug 15, 2025
944e630
try using DMA also for bridge
jmitrevs Aug 15, 2025
ca6c315
setup predict for easier hardware acceleration
jmitrevs Aug 20, 2025
21812b4
Merge remote-tracking branch 'upstream/main' into oneAPI-acc
jmitrevs Aug 21, 2025
49acece
snapshot of trying to handle streaming in onaAPI accelerator
jmitrevs Aug 21, 2025
f2118b8
change some things that assume name == output[0], and reoder so that …
jmitrevs Aug 2, 2025
5442b94
add optimizers to insert sideband layers
jmitrevs Aug 22, 2025
b4e8da3
try to setup sideband templates
jmitrevs Aug 23, 2025
8e9cb05
another snapshot fixing some bugs
jmitrevs Aug 23, 2025
91bef7b
various bug fixes
jmitrevs Aug 25, 2025
7e14ddc
fix some HLS bugs
jmitrevs Aug 25, 2025
25dcd93
pre-commit fixes
jmitrevs Aug 26, 2025
7c8a313
Merge remote-tracking branch 'upstream/main' into oneAPI-acc
jmitrevs Dec 9, 2025
02eb81f
pre-commit fixes
jmitrevs Dec 9, 2025
677792d
Merge remote-tracking branch 'upstream/main' into oneAPI-acc
jmitrevs Dec 9, 2025
e52c217
Merge branch 'main' into oneAPI-acc
jmitrevs Jan 15, 2026
2a4540c
Merge remote-tracking branch 'upstream/main' into oneAPI-acc
jmitrevs Jan 28, 2026
b8792cc
fix typo in oneapi acc writer that was fixed in main for oneapi
jmitrevs Jan 28, 2026
2553929
fix nnet_printf to work outside of nnet_utils
jmitrevs Jan 28, 2026
eeac6f9
Merge remote-tracking branch 'upstream/main' into oneAPI-acc
jmitrevs Mar 22, 2026
8055e71
update nnet_stream_beat to use return values
jmitrevs Mar 23, 2026
e46c547
Potentially fix streaming accelerator
jmitrevs Mar 23, 2026
3f74680
Merge branch 'main' into oneAPI-acc
jmitrevs Apr 6, 2026
f02263d
make response capacity match invocation capacity
jmitrevs Apr 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions hls4ml/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from hls4ml.backends.fpga.fpga_backend import FPGABackend # noqa: F401
from hls4ml.backends.libero.libero_backend import LiberoBackend
from hls4ml.backends.oneapi.oneapi_backend import OneAPIBackend
from hls4ml.backends.oneapi_accelerator.oneapi_accelerator_backend import OneAPIAcceleratorBackend
from hls4ml.backends.plugin_loader import load_backend_plugins
from hls4ml.backends.quartus.quartus_backend import QuartusBackend
from hls4ml.backends.symbolic.symbolic_backend import SymbolicExpressionBackend
Expand All @@ -22,6 +23,7 @@ def _register_builtin_backends():
register_backend('Catapult', CatapultBackend)
register_backend('SymbolicExpression', SymbolicExpressionBackend)
register_backend('oneAPI', OneAPIBackend)
register_backend('oneAPIAccelerator', OneAPIAcceleratorBackend) # Can only be registered after oneAPI
register_backend('Libero', LiberoBackend)


Expand Down
15 changes: 12 additions & 3 deletions hls4ml/backends/oneapi/oneapi_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@


class OneAPIBackend(FPGABackend):
def __init__(self):
super().__init__('oneAPI')
def __init__(self, name='oneAPI'): # the default name should be used in most cases
super().__init__(name)
self._register_layer_attributes()
self._register_flows()

Expand Down Expand Up @@ -145,7 +145,14 @@ def get_writer_flow(self):
return self._writer_flow

def create_initial_config(
self, part='Agilex7', clock_period=5, hyperopt_handshake=False, io_type='io_parallel', write_tar=False, **_
self,
part='Agilex7',
clock_period=5,
hyperopt_handshake=False,
io_type='io_parallel',
max_parallel=10,
write_tar=False,
**_,
):
"""Create initial configuration of the oneAPI backend.

Expand All @@ -155,6 +162,7 @@ def create_initial_config(
hyperopt_handshake (bool, optional): Should hyper-optimized handshaking be used? Defaults to False
io_type (str, optional): Type of implementation used. One of
'io_parallel' or 'io_stream'. Defaults to 'io_parallel'.
max_parallel(int, optional): The maximum invocations (events) processed in parallel, io_stream only.
write_tar (bool, optional): If True, compresses the output directory into a .tar.gz file. Defaults to False.

Returns:
Expand All @@ -167,6 +175,7 @@ def create_initial_config(
config['ClockPeriod'] = clock_period
config['HyperoptHandshake'] = hyperopt_handshake
config['IOType'] = io_type
config['MaxParallelInvocations'] = max_parallel
config['HLSConfig'] = {}
config['WriterConfig'] = {
# TODO: add namespace
Expand Down
4 changes: 2 additions & 2 deletions hls4ml/backends/oneapi/oneapi_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ def _default_function_params(self, layer):
params = self._default_params(layer)
params['name'] = layer.name
params['config'] = f'config{layer.index}'
params['input_pipe'] = layer.get_input_variable().pipe_name
params['output_pipe'] = layer.get_output_variable().pipe_name
params['input_pipe'] = layer.get_input_variable(layer.inputs[0]).pipe_name
params['output_pipe'] = layer.get_output_variable(layer.outputs[0]).pipe_name

return params

Expand Down
3 changes: 2 additions & 1 deletion hls4ml/backends/oneapi/passes/bn_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@
bn_include_list = ['nnet_utils/nnet_batchnorm.h', 'nnet_utils/nnet_batchnorm_stream.h']

batchnorm_quantized_tanh_task_sequence_template = (
'task_sequence<nnet::normalize_{quantize}_tanh_stream<{input_pipe}, {output_pipe}, {config}>> {name};'
'task_sequence<nnet::normalize_{quantize}_tanh_stream<{input_pipe}, {output_pipe}, {config}>, '
'MAX_INVOC, MAX_INVOC> {name};'
)

batchnorm_quantized_tanh_stream_function_template = '{name}.async({threshold});'
Expand Down
4 changes: 3 additions & 1 deletion hls4ml/backends/oneapi/passes/clone_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ def format(self, node):

output_pipes = ', '.join([f'{{output{i + 1}_pipe}}' for i in range(len(node.outputs))])

template = f'task_sequence<nnet::clone_stream<{{input_pipe}}, {output_pipes}, {{size}}>> {{name}};'
template = (
f'task_sequence<nnet::clone_stream<{{input_pipe}}, {output_pipes}, {{size}}>, MAX_INVOC, MAX_INVOC> {{name}};'
)
return template.format(**params)


Expand Down
4 changes: 2 additions & 2 deletions hls4ml/backends/oneapi/passes/convolution_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
conv1d_function_template = 'nnet::conv_1d_{data_format}<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {b});'

conv1d_task_sequence_template = (
'task_sequence<nnet::conv_1d_{data_format}_stream<{input_pipe}, {output_pipe}, {config}>> {name};'
'task_sequence<nnet::conv_1d_{data_format}_stream<{input_pipe}, {output_pipe}, {config}>, MAX_INVOC, MAX_INVOC> {name};'
)

conv_stream_function_template = '{name}.async({w}, {b});'
Expand Down Expand Up @@ -192,7 +192,7 @@ def __init__(self):
conv2d_function_template = 'nnet::conv_2d_{data_format}<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {b});'

conv2d_task_sequence_template = (
'task_sequence<nnet::conv_2d_{data_format}_stream<{input_pipe}, {output_pipe}, {config}>> {name};'
'task_sequence<nnet::conv_2d_{data_format}_stream<{input_pipe}, {output_pipe}, {config}>, MAX_INVOC, MAX_INVOC> {name};'
)

conv2d_include_list = ['nnet_utils/nnet_conv2d.h', 'nnet_utils/nnet_conv2d_stream.h']
Expand Down
12 changes: 9 additions & 3 deletions hls4ml/backends/oneapi/passes/core_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@
}};\n"""

dense_function_template = 'nnet::dense_{strategy}<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {b});'
dense_task_sequence_template = 'task_sequence<nnet::dense_{strategy}_stream<{input_pipe}, {output_pipe}, {config}>> {name};'
dense_task_sequence_template = (
'task_sequence<nnet::dense_{strategy}_stream<{input_pipe}, {output_pipe}, {config}>, MAX_INVOC, MAX_INVOC> {name};'
)
dense_stream_function_template = '{name}.async({w}, {b});'
dense_include_list = ['nnet_utils/nnet_dense.h', 'nnet_utils/nnet_dense_stream.h']

Expand Down Expand Up @@ -108,7 +110,9 @@ def format(self, node):
}};\n"""

batchnorm_function_template = 'nnet::normalize<{input_t}, {output_t}, {config}>({input}, {output}, {scale}, {bias});'
batchnorm_task_sequence_template = 'task_sequence<nnet::normalize_stream<{input_pipe}, {output_pipe}, {config}>> {name};'
batchnorm_task_sequence_template = (
'task_sequence<nnet::normalize_stream<{input_pipe}, {output_pipe}, {config}>, MAX_INVOC, MAX_INVOC> {name};'
)
batchnorm_stream_function_template = '{name}.async({scale}, {bias});'
batchnorm_include_list = ['nnet_utils/nnet_batchnorm.h', 'nnet_utils/nnet_batchnorm_stream.h']

Expand Down Expand Up @@ -205,7 +209,9 @@ def format(self, node):
activ_function_template = 'nnet::{activation}<{input_t}, {output_t}, {config}>({input}, {output});'
param_activ_function_template = 'nnet::{activation}<{input_t}, {output_t}, {config}>({input}, {param}, {output});'

activ_task_sequence_template = 'task_sequence<nnet::{activation}_stream<{input_pipe}, {output_pipe}, {config}>> {name};'
activ_task_sequence_template = (
'task_sequence<nnet::{activation}_stream<{input_pipe}, {output_pipe}, {config}>, MAX_INVOC, MAX_INVOC> {name};'
)
activ_stream_function_template = '{name}.async();'
param_activ_stream_function_template = '{name}.async({param});'

Expand Down
4 changes: 3 additions & 1 deletion hls4ml/backends/oneapi/passes/embedding_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
from hls4ml.backends.oneapi.oneapi_template import StreamFunctionCallTemplate, TaskSequenceTemplate
from hls4ml.model.layers import Embedding

embed_task_sequence_template = 'task_sequence<nnet::embedding_stream<{input_pipe}, {output_pipe}, {config}>> {name};'
embed_task_sequence_template = (
'task_sequence<nnet::embedding_stream<{input_pipe}, {output_pipe}, {config}>, MAX_INVOC, MAX_INVOC> {name};'
)
embed_stream_function_template = '{name}.async({e});'


Expand Down
3 changes: 2 additions & 1 deletion hls4ml/backends/oneapi/passes/merge_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
merge_function_template = 'nnet::{merge}<{input1_t}, {input2_t}, {output_t}, {config}>({input1}, {input2}, {output});'

merge_task_sequence_template = (
'task_sequence<nnet::{merge}_stream<{input1_pipe}, {input2_pipe}, {output_pipe}, {config}>> {name};'
'task_sequence<nnet::{merge}_stream<{input1_pipe}, {input2_pipe}, {output_pipe}, {config}>, '
'MAX_INVOC, MAX_INVOC> {name};'
)

merge_stream_function_template = '{name}.async();'
Expand Down
6 changes: 4 additions & 2 deletions hls4ml/backends/oneapi/passes/pointwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@
)

pointwise_conv1d_task_sequence_template = (
'task_sequence<nnet::pintwise_conv_1d_{data_format}_stream<{input_pipe}, {output_pipe}, {config}>> {name};'
'task_sequence<nnet::pintwise_conv_1d_{data_format}_stream'
'<{input_pipe}, {output_pipe}, {config}>, MAX_INVOC, MAX_INVOC> {name};'
)

pointwise_conv2d_task_sequence_template = (
'task_sequence<nnet::pintwise_conv_2d_{data_format}_stream<{input_pipe}, {output_pipe}, {config}>> {name};'
'task_sequence<nnet::pintwise_conv_2d_{data_format}_stream'
'<{input_pipe}, {output_pipe}, {config}>, MAX_INVOC, MAX_INVOC> {name};'
)

pointwise_conv_stream_function_template = '{name}.async({w}, {b});'
Expand Down
14 changes: 9 additions & 5 deletions hls4ml/backends/oneapi/passes/pooling_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,22 @@
)

pooling1d_task_sequence_template = (
'task_sequence<nnet::pooling1d_{data_format}_stream<{input_pipe}, {output_pipe}, {config}>>({name});'
'task_sequence<nnet::pooling1d_{data_format}_stream<{input_pipe}, {output_pipe}, {config}>, '
'MAX_INVOC, MAX_INVOC>({name});'
)
pooling2d_task_sequence_template = (
'task_sequence<nnet::pooling2d_{data_format}_stream<{input_pipe}, {output_pipe}, {config}>>({name});'
'task_sequence<nnet::pooling2d_{data_format}_stream<{input_pipe}, {output_pipe}, {config}>, '
'MAX_INVOC, MAX_INVOC>({name});'
)

global_pooling1d_task_sequence_template = (
'task_sequence<nnet::global_pooling1d_{data_format}_stream<{input_pipe}, {output_pipe}, {config}>>({name});'
'task_sequence<nnet::global_pooling1d_{data_format}_stream'
'<{input_pipe}, {output_pipe}, {config}>, MAX_INVOC, MAX_INVOC>({name});'
)
global_pooling2d_task_sequence_template = (
'task_sequence<nnet::global_pooling2d_{data_format}_stream<{input_pipe}, {output_pipe}, {config}>>({name});'
'task_sequence<nnet::global_pooling2d_{data_format}_stream'
'<{input_pipe}, {output_pipe}, {config}>, MAX_INVOC, MAX_INVOC>({name});'
)

pooling_stream_function_template = '{name}.async();'

pooling_include_list = ['nnet_utils/nnet_pooling.h', 'nnet_utils/nnet_pooling_stream.h']
Expand Down
4 changes: 3 additions & 1 deletion hls4ml/backends/oneapi/passes/recurrent_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,9 @@
gru_function_initial_state_template = (
'nnet::gru_init_state<{input_t}, {h_t}, {output_t}, {config}>({input}, {init_state}, {output}, {w}, {wr}, {b}, {br});'
)
gru_task_sequence_template = 'task_sequence<nnet::gru_stream<{input_pipe}, {output_pipe}, {config}>> {name};'
gru_task_sequence_template = (
'task_sequence<nnet::gru_stream<{input_pipe}, {output_pipe}, {config}>, MAX_INVOC, MAX_INVOC> {name};'
)
gru_stream_function_template = '{name}.async({w}, {wr}, {b}, {br});'


Expand Down
16 changes: 11 additions & 5 deletions hls4ml/backends/oneapi/passes/reshaping_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@
zeropad2d_function_template = 'nnet::zeropad2d_{data_format}<{input_t}, {output_t}, {config}>({input}, {output});'

zeropad1d_task_sequence_template = (
'task_sequence<nnet::zeropad1d_{data_format}_stream<{input_pipe}, {output_pipe}, {config}>> {name};'
'task_sequence<nnet::zeropad1d_{data_format}_stream<{input_pipe}, {output_pipe}, {config}>, '
'MAX_INVOC, MAX_INVOC> {name};'
)
zeropad2d_task_sequence_template = (
'task_sequence<nnet::zeropad2d_{data_format}_stream<{input_pipe}, {output_pipe}, {config}>> {name};'
'task_sequence<nnet::zeropad2d_{data_format}_stream<{input_pipe}, {output_pipe}, {config}>, '
'MAX_INVOC, MAX_INVOC> {name};'
)

reshaping_stream_function_template = '{name}.async();'
Expand Down Expand Up @@ -116,7 +118,7 @@ def format(self, node):

resize_function_template = 'nnet::resize_{algorithm}<{input_t}, {output_t}, {config}>({input}, {output});'
resize_task_sequence_template = (
'task_sequence<nnet::resize_{algorithm}_stream<{input_pipe}, {output_pipe}, {config}>> {name};'
'task_sequence<nnet::resize_{algorithm}_stream<{input_pipe}, {output_pipe}, {config}>, MAX_INVOC, MAX_INVOC> {name};'
)
resize_include_list = ['nnet_utils/nnet_resize.h', 'nnet_utils/nnet_resize_stream.h']

Expand Down Expand Up @@ -172,7 +174,9 @@ def format(self, node):
}};\n"""

transpose_function_template = 'nnet::transpose<{input_t}, {output_t}, {config}>({input}, {output});'
transpose_task_sequence_template = 'task_sequence<nnet::transpose_stream<{input_pipe}, {output_pipe}, {config}>> {name};'
transpose_task_sequence_template = (
'task_sequence<nnet::transpose_stream<{input_pipe}, {output_pipe}, {config}>, MAX_INVOC, MAX_INVOC> {name};'
)
transpose_include_list = ['nnet_utils/nnet_transpose.h', 'nnet_utils/nnet_transpose_stream.h']


Expand Down Expand Up @@ -215,7 +219,9 @@ def format(self, node):


# Reshape template (only used in streaming)
reshape_task_sequence_template = 'task_sequence<nnet::repack_stream<{input_pipe}, {output_pipe}, {size}>> {name};'
reshape_task_sequence_template = (
'task_sequence<nnet::repack_stream<{input_pipe}, {output_pipe}, {size}>, MAX_INVOC, MAX_INVOC> {name};'
)
reshape_include_list = ['nnet_utils/nnet_stream.h']


Expand Down
Empty file.
68 changes: 68 additions & 0 deletions hls4ml/backends/oneapi_accelerator/oneapi_accelerator_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from hls4ml.backends import OneAPIBackend
from hls4ml.model.flow import register_flow


class OneAPIAcceleratorBackend(OneAPIBackend):
"""
This is the backend to run oneAPI code on an accelerator using the oneAPI framework.
"""

def __init__(self):
super().__init__(name='oneAPIAccelerator')

def _register_flows(self):
writer_passes = ['make_stamp', 'oneapiaccelerator:write_hls']
self._writer_flow = register_flow('write', writer_passes, requires=['oneapi:ip'], backend=self.name)

oneapi_types = [
'oneapiaccelerator:transform_types',
'oneapi:register_bram_weights',
'oneapi:apply_resource_strategy',
'oneapi:apply_winograd_kernel_transformation',
]
oneapi_types_flow = register_flow('specific_types', oneapi_types, requires=['oneapi:init_layers'], backend=self.name)

streaming_passes = [
'oneapi:clone_output',
'oneapiaccelerator:extract_sideband',
'oneapiaccelerator:merge_sideband',
]
streaming_flow = register_flow('streaming', streaming_passes, requires=['oneapi:init_layers'], backend=self.name)

template_flow = register_flow(
'apply_templates', self._get_layer_templates, requires=['oneapi:init_layers'], backend=self.name
)

accel_flow_requirements = [
'optimize',
'oneapi:init_layers',
streaming_flow,
'oneapi:quantization',
'oneapi:optimize',
oneapi_types_flow,
template_flow,
]

accel_flow_requirements = list(filter(None, accel_flow_requirements))
self._default_flow = register_flow('accel', None, requires=accel_flow_requirements, backend=self.name)

def create_initial_config(
self, part, clock_period=5, hyperopt_handshake=False, io_type='io_parallel', max_parallel=10, write_tar=False, **_
):
"""Create initial configuration of the oneAPI backend.

Args:
part (str): The path to the board support package to be used. Can add :<board-variant>
clock_period (int, optional): The clock period in ns. Defaults to 5.
hyperopt_handshake (bool, optional): Should hyper-optimized handshaking be used? Defaults to False
io_type (str, optional): Type of implementation used. One of
'io_parallel' or 'io_stream'. Defaults to 'io_parallel'.
max_parallel(int, optional): The maximum invocations (events) processed in parallel, io_stream only.
write_tar (bool, optional): If True, compresses the output directory into a .tar.gz file. Defaults to False.

Returns:
dict: initial configuration.
"""
config = super().create_initial_config(part, clock_period, hyperopt_handshake, io_type, max_parallel, write_tar, **_)
config['UseOneAPIBSP'] = True
return config
33 changes: 33 additions & 0 deletions hls4ml/backends/oneapi_accelerator/oneapi_accelerator_layers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from hls4ml.model.attributes import Attribute
from hls4ml.model.layers import Layer, register_layer

SIDEBAND_SHAPE = 2


class SidebandExtraction(Layer):
"""This layer extract the sideband and sends it to a different strem"""

_expected_attributes = [Attribute('n_in')]

def initialize(self):
inp = self.get_input_variable()
self.set_attr('n_in', inp.size())
self.add_output_variable(inp.shape, precision=inp.type.precision)


class SidebandMerging(Layer):
"""This layer gets the sideband from a different input and merges it"""

_expected_attributes = [
Attribute('n_in'),
]

def initialize(self):
inp = self.get_input_variable()
self.set_attr('n_in', inp.size())
self.add_output_variable(inp.shape, precision=inp.type.precision)


# register the layers
register_layer('SidebandExtraction', SidebandExtraction)
register_layer('SidebandMerging', SidebandMerging)
Loading
Loading