Disable per-channel quantization for tflite

PINTO0309 · PINTO0309 · commit febe097169aa · 2022-07-18T14:58:21.000+09:00
diff --git a/README.md b/README.md
@@ -361,6 +361,7 @@ usage: tflite2tensorflow
   [--optimizing_for_edgetpu]
   [--replace_prelu_and_minmax]
   [--disable_experimental_new_quantizer]
+  [--disable_per_channel]
   [--optimizing_barracuda]
   [--locationids_of_the_terminating_output]
 
@@ -467,6 +468,8 @@ optional arguments:
   --disable_experimental_new_quantizer
           Disable MLIRs new quantization feature during INT8 quantization
           in TensorFlowLite.
+  --disable_per_channel
+          Disable per-channel quantization for tflite.
   --optimizing_barracuda
           Generates ONNX by replacing Barracuda unsupported layers
           with standard layers. For example, GatherND.
diff --git a/schema/schema.fbs b/schema/schema.fbs
@@ -220,6 +220,10 @@ table Tensor {
   // Encodes `shape` with unknown dimensions. Unknown dimensions are
   // represented with -1.
   shape_signature:[int]; // Optional.
+
+  // If false, the rank or the number of tensor dimensions is unknown.
+  // If false, "shape" must be [].
+  has_rank: bool = false;
 }
 
 // A list of builtin operators. Builtin operators are slightly faster than custom
@@ -386,6 +390,11 @@ enum BuiltinOperator : int32 {
   MULTINOMIAL = 149,
   GELU = 150,
   DYNAMIC_UPDATE_SLICE = 151,
+  RELU_0_TO_1 = 152,
+  UNSORTED_SEGMENT_PROD = 153,
+  UNSORTED_SEGMENT_MAX = 154,
+  UNSORTED_SEGMENT_SUM = 155,
+  ATAN2 = 156
 }
 // LINT.ThenChange(nnapi_linter/linter.proto)
 
@@ -508,10 +517,17 @@ union BuiltinOptions {
   BucketizeOptions,
   GeluOptions,
   DynamicUpdateSliceOptions,
+  UnsortedSegmentProdOptions,
+  UnsortedSegmentMaxOptions,
+  UnsortedSegmentSumOptions,
+  ATan2Options
 }
 
+// LINT.IfChange
 enum Padding : byte { SAME, VALID }
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
 
+// LINT.IfChange
 enum ActivationFunctionType : byte {
   NONE = 0,
   RELU = 1,
@@ -520,6 +536,7 @@ enum ActivationFunctionType : byte {
   TANH = 4,
   SIGN_BIT = 5,
 }
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
 
 table Conv2DOptions {
   padding:Padding;
@@ -611,10 +628,12 @@ table BidirectionalSequenceRNNOptions {
   asymmetric_quantize_inputs:bool;
 }
 
+// LINT.IfChange
 enum FullyConnectedOptionsWeightsFormat: byte {
   DEFAULT = 0,
   SHUFFLED4x16INT8 = 1,
 }
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
 
 // An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
 table FullyConnectedOptions {
@@ -667,12 +686,14 @@ table LocalResponseNormalizationOptions {
   beta:float;
 }
 
+// LINT.IfChange
 enum LSTMKernelType : byte {
   // Full LSTM kernel which supports peephole and projection.
   FULL = 0,
   // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
   BASIC = 1,
 }
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
 
 // An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
 table LSTMOptions {
@@ -972,12 +993,14 @@ table LeakyReluOptions {
 table SquaredDifferenceOptions {
 }
 
+// LINT.IfChange
 enum MirrorPadMode : byte {
   // Doesn't include borders.
   REFLECT = 0,
   // Includes borders.
   SYMMETRIC = 1,
 }
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
 
 table MirrorPadOptions {
   mode:MirrorPadMode;
@@ -1109,6 +1132,19 @@ table GeluOptions {
 table DynamicUpdateSliceOptions {
 }
 
+table UnsortedSegmentProdOptions {
+}
+
+table UnsortedSegmentMaxOptions {
+}
+
+table UnsortedSegmentSumOptions {
+}
+
+table ATan2Options {
+}
+
+
 // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
 // builtin, or a string if the operator is custom.
 table OperatorCode {
diff --git a/setup.py b/setup.py
@@ -11,7 +11,7 @@
     setup(
         name="tflite2tensorflow",
         scripts=scripts,
-        version="1.21.2",
+        version="1.22.0",
         description="Generate saved_model, tfjs, tf-trt, EdgeTPU, CoreML, quantized tflite, ONNX, OpenVINO, Myriad Inference Engine blob and .pb from .tflite.",
         long_description=long_description,
         long_description_content_type="text/markdown",
diff --git a/tflite2tensorflow/tflite2tensorflow.py b/tflite2tensorflow/tflite2tensorflow.py
@@ -5684,6 +5684,7 @@ def main():
     parser.add_argument('--optimizing_for_edgetpu', action='store_true', help='Optimizing for edgetpu')
     parser.add_argument('--replace_prelu_and_minmax', action='store_true', help='Replace prelu and minimum/maximum with each other')
     parser.add_argument('--disable_experimental_new_quantizer', action='store_true', help='Disable MLIR\'s new quantization feature during INT8 quantization in TensorFlowLite.')
+    parser.add_argument('--disable_per_channel', action='store_true', help='Disable per-channel quantization for tflite')
     parser.add_argument('--optimizing_barracuda', action='store_true', help='Generates ONNX by replacing Barracuda\'s unsupported layers with standard layers.')
     parser.add_argument('--locationids_of_the_terminating_output', type=str, default='', help='A comma-separated list of location IDs to be used as output layers. Default: \'\'')
     args = parser.parse_args()
@@ -5734,6 +5735,7 @@ def main():
     optimizing_for_edgetpu = args.optimizing_for_edgetpu
     replace_prelu_and_minmax = args.replace_prelu_and_minmax
     use_experimental_new_quantizer = not args.disable_experimental_new_quantizer
+    use_per_channel = not args.disable_per_channel
     optimizing_barracuda = args.optimizing_barracuda
     locationids_of_the_terminating_output_tmp = args.locationids_of_the_terminating_output
     locationids_of_the_terminating_output = None
@@ -6057,6 +6059,7 @@ def main():
             try:
                 print(f'{Color.REVERCE}Dynamic Range Quantization started{Color.RESET}', '=' * 50)
                 converter = tf.lite.TFLiteConverter.from_saved_model(model_output_path)
+                converter._experimental_disable_per_channel = not use_per_channel
                 converter.optimizations = [tf.lite.Optimize.DEFAULT]
                 converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
                 tflite_model = converter.convert()
@@ -6073,6 +6076,7 @@ def main():
             try:
                 print(f'{Color.REVERCE}Weight Quantization started{Color.RESET}', '=' * 57)
                 converter = tf.lite.TFLiteConverter.from_saved_model(model_output_path)
+                converter._experimental_disable_per_channel = not use_per_channel
                 converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
                 converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
                 tflite_model = converter.convert()
@@ -6188,6 +6192,7 @@ def representative_dataset_gen():
                 print(f'{Color.REVERCE}Integer Quantization started{Color.RESET}', '=' * 56)
                 converter = tf.lite.TFLiteConverter.from_saved_model(model_output_path)
                 converter.experimental_new_quantizer = use_experimental_new_quantizer
+                converter._experimental_disable_per_channel = not use_per_channel
                 converter.optimizations = [tf.lite.Optimize.DEFAULT]
                 converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8, tf.lite.OpsSet.SELECT_TF_OPS]
                 tflite_model = None
@@ -6217,6 +6222,7 @@ def representative_dataset_gen():
                 print(f'{Color.REVERCE}Full Integer Quantization started{Color.RESET}', '=' * 51)
                 converter = tf.lite.TFLiteConverter.from_saved_model(model_output_path)
                 converter.experimental_new_quantizer = use_experimental_new_quantizer
+                converter._experimental_disable_per_channel = not use_per_channel
                 converter.optimizations = [tf.lite.Optimize.DEFAULT]
                 converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8, tf.lite.OpsSet.SELECT_TF_OPS]
                 inf_type = None