From 00f82a37007f8060e5237a7547cf54b6871e29a6 Mon Sep 17 00:00:00 2001 From: "Wang, Harry" Date: Tue, 1 Apr 2025 13:32:28 -0700 Subject: [PATCH 1/2] oneAPI BSP support. --- hls4ml/backends/oneapi/oneapi_backend.py | 6 +++--- hls4ml/templates/oneapi/CMakeLists.txt | 6 +++++- hls4ml/templates/oneapi/firmware/myproject.h | 7 +++++-- hls4ml/writer/oneapi_writer.py | 4 ++++ 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/hls4ml/backends/oneapi/oneapi_backend.py b/hls4ml/backends/oneapi/oneapi_backend.py index a830767ef0..9fd8b4af27 100644 --- a/hls4ml/backends/oneapi/oneapi_backend.py +++ b/hls4ml/backends/oneapi/oneapi_backend.py @@ -153,9 +153,9 @@ def create_initial_config(self, part='Arria10', clock_period=5, io_type='io_para # TODO: add namespace 'WriteTar': write_tar, } - - if 'use_bsp' in _: - config['IS_BSP'] = True + # Target oneAPI Board Support Package (BSP). + if "use_oneapi_bsp" in _: + config['UseOneAPIBSP'] = _["use_oneapi_bsp"] return config diff --git a/hls4ml/templates/oneapi/CMakeLists.txt b/hls4ml/templates/oneapi/CMakeLists.txt index 7f85841110..fe15197210 100644 --- a/hls4ml/templates/oneapi/CMakeLists.txt +++ b/hls4ml/templates/oneapi/CMakeLists.txt @@ -39,15 +39,19 @@ set(LIBRARY_NAME myproject-${LIB_STAMP}) # specific part number (E.g. "10AS066N3F40E2SG") to generate a standalone IP. if(NOT DEFINED FPGA_DEVICE) set(FPGA_DEVICE "Agilex7") + set(BSP_FLAG "") endif() +# Set the target to a BSP if we target an actual accelerator board. +# hls-fpga-machine-learning insert oneapi_bsp_cmake_flag + # Use cmake -DUSER_FPGA_FLAGS= to set extra flags for FPGA backend # compilation. # -Xsoptimize=latency Turns off the hyper-optimized handshake set(USER_FPGA_FLAGS -Wno-unused-label;${USER_FPGA_FLAGS};-Xsoptimize=latency) # Use cmake -DUSER_FLAGS= to set extra flags for general compilation. -set(USER_FLAGS -Wno-unused-label -fconstexpr-steps=134217728 ${USER_FLAGS}) +set(USER_FLAGS -Wno-unused-label -fconstexpr-steps=134217728 ${USER_FLAGS} ${BSP_FLAG}) # Use cmake -DUSER_INCLUDE_PATHS= to set extra paths for general # compilation. diff --git a/hls4ml/templates/oneapi/firmware/myproject.h b/hls4ml/templates/oneapi/firmware/myproject.h index c9d634ea74..74a729efe3 100644 --- a/hls4ml/templates/oneapi/firmware/myproject.h +++ b/hls4ml/templates/oneapi/firmware/myproject.h @@ -11,7 +11,7 @@ using PipeProps = decltype(sycl::ext::oneapi::experimental::properties(sycl::ext // Pipe properties for host pipes. Host pipes connect to the data source DMA and sink DMA. // They are connected to the first and the last layer to stream data into and out from the kernel. using HostPipePropertiesT = decltype(sycl::ext::oneapi::experimental::properties( - sycl::ext::intel::experimental::ready_latency<0>, sycl::ext::intel::experimental::bits_per_symbol<8>, + sycl::ext::intel::experimental::ready_latency<0>, sycl::ext::intel::experimental::bits_per_symbol<16>, sycl::ext::intel::experimental::uses_valid, sycl::ext::intel::experimental::first_symbol_in_high_order_bits, sycl::ext::intel::experimental::protocol_avalon_streaming_uses_ready)); @@ -127,12 +127,15 @@ template struct DMA_convert_data_back { class MyProjectID; struct MyProject { - +#ifndef IS_BSP // kernel property method to config invocation interface auto get(sycl::ext::oneapi::experimental::properties_tag) { return sycl::ext::oneapi::experimental::properties{sycl::ext::intel::experimental::streaming_interface<>, sycl::ext::intel::experimental::pipelined<>}; } +#else + // kernel properties and pipelining is not supported in BSP. +#endif SYCL_EXTERNAL void operator()() const; }; diff --git a/hls4ml/writer/oneapi_writer.py b/hls4ml/writer/oneapi_writer.py index 1b0b9f4f4b..b56c3508e5 100644 --- a/hls4ml/writer/oneapi_writer.py +++ b/hls4ml/writer/oneapi_writer.py @@ -557,6 +557,10 @@ def write_build_script(self, model): if 'set(FPGA_DEVICE' in line: line = f' set(FPGA_DEVICE "{device}")\n' + if model.config.get_config_value('UseOneAPIBSP'): + if 'hls-fpga-machine-learning insert oneapi_bsp_cmake_flag' in line: + line = f'set(BSP_FLAG "-DIS_BSP")' + fout.write(line) def write_nnet_utils(self, model): From 496846df78e4e58bea8b0baf74fa5982b32619fd Mon Sep 17 00:00:00 2001 From: "Wang, Harry" Date: Tue, 1 Apr 2025 14:23:46 -0700 Subject: [PATCH 2/2] User API and documentation. --- docs/api/configuration.rst | 18 ++++++++++++++++++ hls4ml/backends/oneapi/oneapi_backend.py | 8 +++----- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/docs/api/configuration.rst b/docs/api/configuration.rst index 1bc8f0676c..eebe67eb42 100644 --- a/docs/api/configuration.rst +++ b/docs/api/configuration.rst @@ -101,6 +101,20 @@ Finally, one then uses the configuration to create an hls model: backend='Vitis' ) +To target an oneAPI Board Support Package (BSP) enabled FPGA for offload acceleration, you can specify the ``part`` argument to be the path to your BSP and the BSP variant. Then, set ``use_oneapi_bsp=True``. + +.. code-block:: python + + hls_model = hls4ml.converters.convert_from_keras_model( + model, + hls_config=config, + output_dir="my_project_dir", + io_type="io_parallel", + backend="oneAPI", + part="/path/to/my/bsp:bsp_variant", + use_oneapi_bsp=True + ) + See :py:class:`~hls4ml.converters.convert_from_keras_model` for more information on the various options. Similar functions exist for ONNX and PyTorch. ---- @@ -132,6 +146,9 @@ It looks like this: ClockPeriod: 5 IOType: io_parallel # options: io_parallel/io_stream + # oneAPI Offload Acceleration flag. + UseOneAPIBSP: True + HLSConfig: Model: Precision: fixed<16,6> @@ -156,6 +173,7 @@ The backend-specific section of the configuration depends on the backend. You ca For Vivado backend the options are: * **Part**\ : the particular FPGA part number that you are considering, here it's a Xilinx Virtex UltraScale+ VU13P FPGA +* **UseOneAPIBSP**\ : path to the oneAPI Board Support Package (and the BSP variant) to enable offload acceleration with an Altera FPGA. This is only needed if you are using the oneAPI backend. * **ClockPeriod**\ : the clock period, in ns, at which your algorithm runs Then you have some optimization parameters for how your algorithm runs: * **IOType**\ : your options are ``io_parallel`` or ``io_stream`` which defines the type of data structure used for inputs, intermediate activations between layers, and outputs. For ``io_parallel``, arrays are used that, in principle, can be fully unrolled and are typically implemented in RAMs. For ``io_stream``, HLS streams are used, which are a more efficient/scalable mechanism to represent data that are produced and consumed in a sequential manner. Typically, HLS streams are implemented with FIFOs instead of RAMs. For more information see `here `__. diff --git a/hls4ml/backends/oneapi/oneapi_backend.py b/hls4ml/backends/oneapi/oneapi_backend.py index 9fd8b4af27..56a3966737 100644 --- a/hls4ml/backends/oneapi/oneapi_backend.py +++ b/hls4ml/backends/oneapi/oneapi_backend.py @@ -129,7 +129,8 @@ def get_default_flow(self): def get_writer_flow(self): return self._writer_flow - def create_initial_config(self, part='Arria10', clock_period=5, io_type='io_parallel', write_tar=False, **_): + def create_initial_config( + self, part='Arria10', clock_period=5, io_type='io_parallel', write_tar=False, use_oneapi_bsp=False, **_): """Create initial configuration of the oneAPI backend. Args: @@ -153,10 +154,7 @@ def create_initial_config(self, part='Arria10', clock_period=5, io_type='io_para # TODO: add namespace 'WriteTar': write_tar, } - # Target oneAPI Board Support Package (BSP). - if "use_oneapi_bsp" in _: - config['UseOneAPIBSP'] = _["use_oneapi_bsp"] - + config['UseOneAPIBSP'] = use_oneapi_bsp return config def compile(self, model):