Merge pull request #156 from fastmachinelearning/feature/fp16_fixes

maltanar · web-flow · commit f3690c85b24e · 2024-12-09T13:10:33.000+01:00
Fixes for fp16 tensors and datatypes in ONNX
diff --git a/src/qonnx/core/datatype.py b/src/qonnx/core/datatype.py
@@ -168,7 +168,7 @@ def is_fixed_point(self):
         return False
 
     def get_hls_datatype_str(self):
-        return "float"
+        return "half"
 
     def to_numpy_dt(self):
         return np.float16
diff --git a/src/qonnx/core/modelwrapper.py b/src/qonnx/core/modelwrapper.py
@@ -183,8 +183,30 @@ def get_tensor_datatype(self, tensor_name):
             ret = util.get_by_name(ret.quant_parameter_tensor_names, "finn_datatype", "key")
             if ret is not None:
                 return DataType[ret.value]
-        # TODO maybe use native ONNX tensor type instead of assuming fp32?
-        return DataType["FLOAT32"]
+        onnx_dtype_to_qonnx_dtype = {
+            TensorProto.FLOAT: "FLOAT32",
+            TensorProto.FLOAT16: "FLOAT16",
+            # TODO: dtypes below need testing to ensure they do not break FINN,
+            # since it normally assumes float32 containers for these dtypes
+            # TensorProto.UINT8 : "UINT8",
+            # TensorProto.INT8 : "INT8",
+            # TensorProto.UINT16 : "UINT16",
+            # TensorProto.INT16 : "INT16",
+            # TensorProto.UINT32 : "UINT32",
+            # TensorProto.INT32 : "INT32",
+            # TensorProto.UINT64 : "UINT64",
+            # TensorProto.INT64 : "INT64",
+        }
+        tensor_vi = self.get_tensor_valueinfo(tensor_name)
+        if tensor_vi is None:
+            # some initialized tensors don't get ValueInfo even after shape inference
+            _, onnx_dtype = self.get_initializer(tensor_name, return_dtype=True)
+        else:
+            onnx_dtype = tensor_vi.type.tensor_type.elem_type
+        if onnx_dtype in onnx_dtype_to_qonnx_dtype.keys():
+            return DataType[onnx_dtype_to_qonnx_dtype[onnx_dtype]]
+        else:
+            return DataType["FLOAT32"]
 
     def set_tensor_datatype(self, tensor_name, datatype):
         """Sets the QONNX DataType of tensor with given name."""
diff --git a/src/qonnx/custom_op/general/quant.py b/src/qonnx/custom_op/general/quant.py
@@ -172,12 +172,23 @@ def get_nodeattr_types(self):
     def make_shape_compatible_op(self, model):
         """Returns a standard ONNX op which is compatible with this CustomOp
         for performing shape inference."""
-        return helper.make_node(
-            "Cast",
-            inputs=[self.onnx_node.input[0]],
-            outputs=[self.onnx_node.output[0]],
-            to=int(TensorProto.FLOAT),
-        )
+        node_out = self.onnx_node.output[0]
+        # preserve existing ONNX tensor type if it exists
+        node_out_vi = model.get_tensor_valueinfo(node_out)
+        if node_out_vi is None:
+            return helper.make_node(
+                "Cast",
+                inputs=[self.onnx_node.input[0]],
+                outputs=[node_out],
+                to=int(TensorProto.FLOAT),
+            )
+        else:
+            return helper.make_node(
+                "Cast",
+                inputs=[self.onnx_node.input[0]],
+                outputs=[node_out],
+                to=int(node_out_vi.type.tensor_type.elem_type),
+            )
         # For Quant the output shape should be the same as the input shape.
         # Get the output shape from the input
         out_shape = model.get_tensor_shape(self.onnx_node.input[0])
diff --git a/src/qonnx/transformation/infer_datatypes.py b/src/qonnx/transformation/infer_datatypes.py
@@ -38,9 +38,9 @@ def is_scaled_int(x):
     return x.is_integer() or x.is_fixed_point() or isinstance(x, ScaledIntType)
 
 
-def infer_mac_result_dtype(idtypes, possible_negation):
-    # will default to float32 unless specific cases detected
-    ret = DataType["FLOAT32"]
+def infer_mac_result_dtype(idtypes, odtype_orig, possible_negation):
+    # will default to original output dtype unless specific cases detected
+    ret = odtype_orig
     # result may be signed if:
     # - any of the operands are signed
     # - the operator itself may induce negation (like subtraction)
@@ -97,7 +97,8 @@ def _infer_node_datatype(model, node):
             model.set_tensor_datatype(node.output[0], DataType["BIPOLAR"])
         elif node.op_type in mac_like_optypes:
             possible_negation = node.op_type in ["Sub"]
-            odtype = infer_mac_result_dtype(idtypes, possible_negation=possible_negation)
+            odtype_orig = model.get_tensor_datatype(node.output[0])
+            odtype = infer_mac_result_dtype(idtypes, odtype_orig, possible_negation=possible_negation)
             model.set_tensor_datatype(node.output[0], odtype)
         elif node.op_type in ["Resize", "Upsample"]:
             mode = get_by_name(node.attribute, "mode").s
diff --git a/src/qonnx/util/basic.py b/src/qonnx/util/basic.py
@@ -233,12 +233,15 @@ def gen_finn_dt_tensor(finn_dt, tensor_shape):
         int_dt = DataType["INT" + str(finn_dt.bitwidth())]
         tensor_values = np.random.randint(int_dt.min(), high=int_dt.max() + 1, size=tensor_shape)
         tensor_values = tensor_values * finn_dt.scale_factor()
-    elif finn_dt == DataType["FLOAT32"]:
+    elif finn_dt in [DataType["FLOAT32"], DataType["FLOAT16"]]:
         tensor_values = np.random.randn(*tensor_shape)
     else:
         raise ValueError("Datatype {} is not supported, no tensor could be generated".format(finn_dt))
     # always use float type as container
-    return tensor_values.astype(np.float32)
+    if finn_dt == DataType["FLOAT16"]:
+        return tensor_values.astype(np.float16)
+    else:
+        return tensor_values.astype(np.float32)
 
 
 def calculate_signed_dot_prod_range(dt_a, dt_b, len):
diff --git a/tests/transformation/test_infer_datatypes.py b/tests/transformation/test_infer_datatypes.py
@@ -47,19 +47,19 @@ def test_infer_mac_dtype_result():
     si4 = DataType["SCALEDINT<4>"]
     si32 = DataType["SCALEDINT<32>"]
     # test several 2-input (e.g. weights, inputs) cases
-    assert infer_mac_result_dtype([iu4, iu4], False) == iu32
-    assert infer_mac_result_dtype([iu4, is4], False) == is32
-    assert infer_mac_result_dtype([iu4, iu4], True) == is32
-    assert infer_mac_result_dtype([iu4, fx4], False) == si32
-    assert infer_mac_result_dtype([fx4, si4], False) == si32
-    assert infer_mac_result_dtype([is4, si4], False) == si32
-    assert infer_mac_result_dtype([f32, iu4], False) == f32
-    assert infer_mac_result_dtype([f32, si4], False) == f32
+    assert infer_mac_result_dtype([iu4, iu4], None, False) == iu32
+    assert infer_mac_result_dtype([iu4, is4], None, False) == is32
+    assert infer_mac_result_dtype([iu4, iu4], None, True) == is32
+    assert infer_mac_result_dtype([iu4, fx4], None, False) == si32
+    assert infer_mac_result_dtype([fx4, si4], None, False) == si32
+    assert infer_mac_result_dtype([is4, si4], None, False) == si32
+    assert infer_mac_result_dtype([f32, iu4], f32, False) == f32
+    assert infer_mac_result_dtype([f32, si4], f32, False) == f32
     # test several 3-input (e.g. weights, inputs, biases) cases
-    assert infer_mac_result_dtype([iu4, iu4, iu4], False) == iu32
-    assert infer_mac_result_dtype([iu4, iu4, is4], False) == is32
-    assert infer_mac_result_dtype([is4, iu4, fx4], False) == si32
-    assert infer_mac_result_dtype([is4, iu4, f32], False) == f32
+    assert infer_mac_result_dtype([iu4, iu4, iu4], None, False) == iu32
+    assert infer_mac_result_dtype([iu4, iu4, is4], None, False) == is32
+    assert infer_mac_result_dtype([is4, iu4, fx4], None, False) == si32
+    assert infer_mac_result_dtype([is4, iu4, f32], f32, False) == f32
 
 
 def test_infer_datatypes():