diff --git a/ARM.CMSIS-NN.pdsc b/ARM.CMSIS-NN.pdsc index 6f00e568..30e2010b 100644 --- a/ARM.CMSIS-NN.pdsc +++ b/ARM.CMSIS-NN.pdsc @@ -103,6 +103,7 @@ + @@ -132,6 +133,8 @@ + + diff --git a/Include/arm_nn_types.h b/Include/arm_nn_types.h index 02c22b67..7730cdf7 100644 --- a/Include/arm_nn_types.h +++ b/Include/arm_nn_types.h @@ -22,8 +22,8 @@ * Description: Public header file to contain the CMSIS-NN structs for the * TensorFlowLite micro compliant functions * - * $Date: 19 June 2024 - * $Revision: V.3.3.0 + * $Date: 19 Aug 2024 + * $Revision: V.3.4.0 * * Target : Arm(R) M-Profile Architecture * -------------------------------------------------------------------- */ @@ -111,6 +111,17 @@ typedef struct int32_t shift; /**< Shift value */ } cmsis_nn_per_tensor_quant_params; +/** CMSIS-NN object for quantization parameters. + * This struct supports both per-tensor and per-channels requantization + * and is recommended for new operators. + */ +typedef struct +{ + int32_t *multiplier; /**< Multiplier values */ + int32_t *shift; /**< Shift values */ + int32_t is_per_channel; /** Indicating if per channel or per tensor quantization */ +} cmsis_nn_quant_params; + /** CMSIS-NN object for the quantized Relu activation */ typedef struct { diff --git a/Include/arm_nnfunctions.h b/Include/arm_nnfunctions.h index d992e80c..ebe32e46 100644 --- a/Include/arm_nnfunctions.h +++ b/Include/arm_nnfunctions.h @@ -21,8 +21,8 @@ * Title: arm_nnfunctions.h * Description: Public header file for CMSIS NN Library * - * $Date: 19 June 2024 - * $Revision: V.16.2.0 + * $Date: 19 Aug 2024 + * $Revision: V.16.3.0 * * Target : Arm(R) M-Profile Architecture * -------------------------------------------------------------------- */ @@ -1499,7 +1499,7 @@ int32_t arm_depthwise_conv_s4_opt_get_buffer_size(const cmsis_nn_dims *input_dim * fc_params->filter_offset : 0 * Range of fc_params->output_offset : [-128, 127] * @param[in] quant_params Per-tensor quantization info. - * It contains the multiplier and shift values to be applied to the output tensor. + * It contains the multiplier and shift value to be applied to the output tensor. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] * Input dimension is taken as Nx(H * W * C_IN) * @param[in] input_data Input (activation) data pointer. Data type: int8 @@ -1547,7 +1547,7 @@ arm_cmsis_nn_status arm_fully_connected_s4(const cmsis_nn_context *ctx, * fc_params->filter_offset : 0 * Range of fc_params->output_offset : [-128, 127] * @param[in] quant_params Per-tensor quantization info. - * It contains the multiplier and shift values to be applied to the output tensor. + * It contains the multiplier and shift value to be applied to the output tensor. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] * Input dimension is taken as Nx(H * W * C_IN) * @param[in] input_data Input (activation) data pointer. Data type: int8 @@ -1584,6 +1584,106 @@ arm_cmsis_nn_status arm_fully_connected_s8(const cmsis_nn_context *ctx, const cmsis_nn_dims *output_dims, int8_t *output_data); +/** + * @brief Basic s8 Fully Connected function using per channel quantization. + * + * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function + * definition file to see if an additional buffer is required. + * Optional function {API}_get_buffer_size() provides the buffer + * size if an additional buffer is required. + * The caller is expected to clear the buffer, if applicable, for security reasons. + * @param[in] fc_params Fully Connected layer parameters. + * Range of fc_params->input_offset : [-127, 128] + * fc_params->filter_offset : 0 + * Range of fc_params->output_offset : [-128, 127] + * @param[in] quant_params Per-channel quantization info. + * It contains the multiplier and shift values to be applied to each output channel + * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] + * Input dimension is taken as Nx(H * W * C_IN) + * @param[in] input_data Input (activation) data pointer. Data type: int8 + * @param[in] filter_dims Two dimensional filter dimensions. Format: [N, C] + * N : accumulation depth and equals (H * W * C_IN) from input_dims + * C : output depth and equals C_OUT in output_dims + * H & W : Not used + * @param[in] filter_data Filter data pointer. Data type: int8 + * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] + * N, H, W : Not used + * @param[in] bias_data Bias data pointer. Data type: int32 + * @param[in] output_dims Output tensor dimensions. Format: [N, C_OUT] + * N : Batches + * C_OUT : Output depth + * H & W : Not used. + * @param[in, out] output_data Output data pointer. Data type: int8 + * + * @return The function returns either + * ARM_CMSIS_NN_ARG_ERROR if argument constraints fail. or, + * ARM_CMSIS_NN_SUCCESS on successful completion. + * + * @details + * - Supported framework: TensorFlow Lite + */ +arm_cmsis_nn_status arm_fully_connected_per_channel_s8(const cmsis_nn_context *ctx, + const cmsis_nn_fc_params *fc_params, + const cmsis_nn_per_channel_quant_params *quant_params, + const cmsis_nn_dims *input_dims, + const int8_t *input_data, + const cmsis_nn_dims *filter_dims, + const int8_t *filter_data, + const cmsis_nn_dims *bias_dims, + const int32_t *bias_data, + const cmsis_nn_dims *output_dims, + int8_t *output_data); + +/** + * @brief s8 Fully Connected layer wrapper function + * + * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function + * definition file to see if an additional buffer is required. + * Optional function {API}_get_buffer_size() provides the buffer + * size if an additional buffer is required. + * The caller is expected to clear the buffer, if applicable, for security reasons. + * @param[in] fc_params Fully Connected layer parameters. + * Range of fc_params->input_offset : [-127, 128] + * fc_params->filter_offset : 0 + * Range of fc_params->output_offset : [-128, 127] + * @param[in] quant_params Per-channel or per-tensor quantization info. Check struct defintion for details. + * It contains the multiplier and shift value(s) to be applied to each output channel + * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] + * Input dimension is taken as Nx(H * W * C_IN) + * @param[in] input_data Input (activation) data pointer. Data type: int8 + * @param[in] filter_dims Two dimensional filter dimensions. Format: [N, C] + * N : accumulation depth and equals (H * W * C_IN) from input_dims + * C : output depth and equals C_OUT in output_dims + * H & W : Not used + * @param[in] filter_data Filter data pointer. Data type: int8 + * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] + * N, H, W : Not used + * @param[in] bias_data Bias data pointer. Data type: int32 + * @param[in] output_dims Output tensor dimensions. Format: [N, C_OUT] + * N : Batches + * C_OUT : Output depth + * H & W : Not used. + * @param[in, out] output_data Output data pointer. Data type: int8 + * + * @return The function returns either + * ARM_CMSIS_NN_ARG_ERROR if argument constraints fail. or, + * ARM_CMSIS_NN_SUCCESS on successful completion. + * + * @details + * - Supported framework: TensorFlow Lite + */ +arm_cmsis_nn_status arm_fully_connected_wrapper_s8(const cmsis_nn_context *ctx, + const cmsis_nn_fc_params *fc_params, + const cmsis_nn_quant_params *quant_params, + const cmsis_nn_dims *input_dims, + const int8_t *input_data, + const cmsis_nn_dims *filter_dims, + const int8_t *filter_data, + const cmsis_nn_dims *bias_dims, + const int32_t *bias_data, + const cmsis_nn_dims *output_dims, + int8_t *output_data); + /** * @brief Calculate the sum of each row in vector_data, multiply by lhs_offset and optionally add s32 bias_data. * @param[in, out] vector_sum_buf Buffer for vector sums @@ -1662,7 +1762,7 @@ int32_t arm_fully_connected_s8_get_buffer_size_mve(const cmsis_nn_dims *filter_d * fc_params->filter_offset : 0 * fc_params->output_offset : 0 * @param[in] quant_params Per-tensor quantization info. - * It contains the multiplier and shift values to be applied to the output tensor. + * It contains the multiplier and shift value to be applied to the output tensor. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] * Input dimension is taken as Nx(H * W * C_IN) * @param[in] input_data Input (activation) data pointer. Data type: int16 diff --git a/Include/arm_nnsupportfunctions.h b/Include/arm_nnsupportfunctions.h index 25bf4d34..12d3d20c 100644 --- a/Include/arm_nnsupportfunctions.h +++ b/Include/arm_nnsupportfunctions.h @@ -21,8 +21,8 @@ * Title: arm_nnsupportfunctions.h * Description: Public header file of support functions for CMSIS NN Library * - * $Date: 19 June 2024 - * $Revision: V.22.2.0 + * $Date: 12 Jul 2024 + * $Revision: V.22.3.0 * * Target : Arm(R) M-Profile Architecture * -------------------------------------------------------------------- */ @@ -703,6 +703,47 @@ arm_cmsis_nn_status arm_nn_vec_mat_mult_t_s8(const int8_t *lhs, const int32_t address_offset, const int32_t rhs_offset); +/** + * @brief s8 Vector by Matrix (transposed) multiplication using per channel quantization for output + * + * @param[in] lhs Input left-hand side vector + * @param[in] rhs Input right-hand side matrix (transposed) + * @param[in] kernel_sum Kernel sums of the kernels (rhs). See arm_vector_sum_s8 for more info. + * @param[in] bias Input bias + * @param[out] dst Output vector + * @param[in] lhs_offset Offset to be added to the input values of the left-hand side vector. + * Range: -127 to 128 + * @param[in] dst_offset Offset to be added to the output values. Range: -127 to 128 + * @param[in] dst_multiplier Output multipliers + * @param[in] dst_shift Output shifts + * @param[in] rhs_cols Number of columns in the right-hand side input matrix + * @param[in] rhs_rows Number of rows in the right-hand side input matrix + * @param[in] activation_min Minimum value to clamp the output to. Range: int8 + * @param[in] activation_max Maximum value to clamp the output to. Range: int8 + * @param[in] address_offset Memory position offset for dst. First output is stored at 'dst', the + * second at 'dst + address_offset' and so on. Default value is typically 1. + * @param[in] rhs_offset Offset to be added to the input values of the right-hand side vector. + * Range: -127 to 128 + * + * @return The function returns ARM_CMSIS_NN_SUCCESS + * + */ +arm_cmsis_nn_status arm_nn_vec_mat_mult_t_per_ch_s8(const int8_t *lhs, + const int8_t *rhs, + const int32_t *kernel_sum, + const int32_t *bias, + int8_t *dst, + const int32_t lhs_offset, + const int32_t dst_offset, + const int32_t *dst_multiplier, + const int32_t *dst_shift, + const int32_t rhs_cols, + const int32_t rhs_rows, + const int32_t activation_min, + const int32_t activation_max, + const int32_t address_offset, + const int32_t rhs_offset); + /** * @brief s16 Vector by s8 Matrix (transposed) multiplication * diff --git a/Source/FullyConnectedFunctions/arm_fully_connected_per_channel_s8.c b/Source/FullyConnectedFunctions/arm_fully_connected_per_channel_s8.c new file mode 100644 index 00000000..f8184fa4 --- /dev/null +++ b/Source/FullyConnectedFunctions/arm_fully_connected_per_channel_s8.c @@ -0,0 +1,102 @@ +/* + * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Project: CMSIS NN Library + * Title: arm_fully_connected_per_channel_s8 + * Description: Fully connected function compatible with TF Lite. + * + * $Date: 15 Aug 2024 + * $Revision: V.1.0.0 + * + * Target : Arm(R) M-Profile Architecture + * + * -------------------------------------------------------------------- */ + +#include "arm_nnfunctions.h" +#include "arm_nnsupportfunctions.h" + +/** + * @ingroup Public + */ + +/** + * @addtogroup FC + * @{ + */ + +/* + * S8 basic fully-connected and matrix multiplication layer function using per-channel quantization for TensorFlow Lite + * + * Refer header file for details. + * + */ +arm_cmsis_nn_status arm_fully_connected_per_channel_s8(const cmsis_nn_context *ctx, + const cmsis_nn_fc_params *fc_params, + const cmsis_nn_per_channel_quant_params *quant_params, + const cmsis_nn_dims *input_dims, + const int8_t *input_data, + const cmsis_nn_dims *filter_dims, + const int8_t *kernel, + const cmsis_nn_dims *bias_dims, + const int32_t *bias_data, + const cmsis_nn_dims *output_dims, + int8_t *output_data) +{ + (void)bias_dims; + + int32_t batch_cnt = input_dims->n; + +#if defined(ARM_MATH_MVEI) + if (ctx->buf == NULL) + { + return (ARM_CMSIS_NN_ARG_ERROR); + } +#endif + + const int32_t *kernel_sum = (const int32_t *)ctx->buf; + + while (batch_cnt) + { + + arm_nn_vec_mat_mult_t_per_ch_s8(input_data, + kernel, + kernel_sum, + bias_data, + output_data, + fc_params->input_offset, + fc_params->output_offset, + quant_params->multiplier, + quant_params->shift, + filter_dims->n, /* col_dim or accum_depth */ + output_dims->c, /* row_dim or output_depth */ + fc_params->activation.min, + fc_params->activation.max, + 1L, + fc_params->filter_offset); + + input_data += filter_dims->n; + output_data += output_dims->c; + batch_cnt--; + } + return (ARM_CMSIS_NN_SUCCESS); +} + +/** + * @} end of FC group + */ diff --git a/Source/FullyConnectedFunctions/arm_fully_connected_wrapper_s8.c b/Source/FullyConnectedFunctions/arm_fully_connected_wrapper_s8.c new file mode 100644 index 00000000..6d5b0d7e --- /dev/null +++ b/Source/FullyConnectedFunctions/arm_fully_connected_wrapper_s8.c @@ -0,0 +1,100 @@ +/* + * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Project: CMSIS NN Library + * Title: arm_fully_connected_s8 + * Description: Fully connected function compatible with TF Lite. + * + * $Date: 19 Aug 2024 + * $Revision: V.1.0.0 + * + * Target : Arm(R) M-Profile Architecture + * + * -------------------------------------------------------------------- */ + +#include "arm_nnfunctions.h" +#include "arm_nnsupportfunctions.h" + +/** + * @ingroup Public + */ + +/** + * @addtogroup FC + * @{ + */ + +/* + * S8 basic fully-connected and matrix multiplication layer function for TensorFlow Lite + * + * Refer header file for details. + * + */ + +arm_cmsis_nn_status arm_fully_connected_wrapper_s8(const cmsis_nn_context *ctx, + const cmsis_nn_fc_params *fc_params, + const cmsis_nn_quant_params *quant_params, + const cmsis_nn_dims *input_dims, + const int8_t *input_data, + const cmsis_nn_dims *filter_dims, + const int8_t *filter_data, + const cmsis_nn_dims *bias_dims, + const int32_t *bias_data, + const cmsis_nn_dims *output_dims, + int8_t *output_data) +{ + + if (quant_params->is_per_channel) + { + const cmsis_nn_per_channel_quant_params per_channel_quant_params = {quant_params->multiplier, + quant_params->shift}; + + return arm_fully_connected_per_channel_s8(ctx, + fc_params, + &per_channel_quant_params, + input_dims, + input_data, + filter_dims, + filter_data, + bias_dims, + bias_data, + output_dims, + output_data); + } + else + { + const cmsis_nn_per_tensor_quant_params per_tensor_quant_params = {*quant_params->multiplier, + *quant_params->shift}; + return arm_fully_connected_s8(ctx, + fc_params, + &per_tensor_quant_params, + input_dims, + input_data, + filter_dims, + filter_data, + bias_dims, + bias_data, + output_dims, + output_data); + } +} + +/** + * @} end of FC group + */ diff --git a/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_per_ch_s8.c b/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_per_ch_s8.c new file mode 100644 index 00000000..e8aaf019 --- /dev/null +++ b/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_per_ch_s8.c @@ -0,0 +1,770 @@ +/* + * SPDX-FileCopyrightText: Copyright 2020-2024 Arm Limited and/or its affiliates + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Project: CMSIS NN Library + * Title: arm_nn_vec_mat_mult_t_per_ch_s8 + * Description: s8 vector by matrix (transposed) multiplication + * + * $Date: 19 Aug 2024 + * $Revision: V.1.0.0 + * + * Target : Arm(R) M-Profile Architecture + * + * -------------------------------------------------------------------- */ + +#include "arm_nnsupportfunctions.h" + +/** + * @ingroup groupSupport + */ + +/** + * @defgroup supportFC Fully Connected + * + * Support functions for Fully Connected + * + */ + +/** + * @addtogroup supportFC + * @{ + */ + +/* + * s8 vector(lhs) by matrix (transposed) multiplication and per channel quant output + * + * Refer header file for details. + * + */ +#if !defined(ARM_MATH_MVEI) && defined(ARM_MATH_DSP) && !defined(__ARMCC_VERSION) && !defined(__ICCARM__) + #pragma GCC optimize("unroll-loops") +#endif +arm_cmsis_nn_status arm_nn_vec_mat_mult_t_per_ch_s8(const int8_t *lhs, + const int8_t *rhs, + const int32_t *kernel_sum, + const int32_t *bias, + int8_t *dst, + const int32_t lhs_offset, + const int32_t dst_offset, + const int32_t *dst_multiplier, + const int32_t *dst_shift, + const int32_t rhs_cols, + const int32_t rhs_rows, + const int32_t activation_min, + const int32_t activation_max, + const int32_t address_offset, + const int32_t rhs_offset) +{ + if (rhs_offset) + { +#if defined(ARM_MATH_MVEI) + const int32_t row_loop_cnt = rhs_rows / 4; + const uint32x4_t address_offset_array = {0, address_offset, address_offset * 2, address_offset * 3}; + + for (int i_row_loop_cnt = 0; i_row_loop_cnt < row_loop_cnt; i_row_loop_cnt++) + { + int32_t acc_0 = 0; + int32_t acc_1 = 0; + int32_t acc_2 = 0; + int32_t acc_3 = 0; + + const int32_t col_loop_cnt = (rhs_cols + 15) / 16; + + const int8_t *lhs_vec = lhs; + const int8_t *rhs_0_ptr = rhs; + const int8_t *rhs_1_ptr = rhs + rhs_cols; + const int8_t *rhs_2_ptr = rhs + 2 * rhs_cols; + const int8_t *rhs_3_ptr = rhs + 3 * rhs_cols; + + int32_t lhs_sum = 0; + + if (bias) + { + acc_0 = *bias++; + acc_1 = *bias++; + acc_2 = *bias++; + acc_3 = *bias++; + } + + uint32_t col_cnt = (uint32_t)rhs_cols; + + for (int32_t i = 0; i < col_loop_cnt; i++) + { + mve_pred16_t p = vctp8q(col_cnt); + col_cnt -= 16; + + const int8x16_t input = vldrbq_z_s8(lhs_vec, p); + lhs_sum = vaddvaq_s8(lhs_sum, input); + + const int8x16_t ker_0 = vldrbq_z_s8(rhs_0_ptr, p); + acc_0 = vmladavaq_s8(acc_0, ker_0, input); + + const int8x16_t ker_1 = vldrbq_z_s8(rhs_1_ptr, p); + acc_1 = vmladavaq_s8(acc_1, ker_1, input); + + const int8x16_t ker_2 = vldrbq_z_s8(rhs_2_ptr, p); + acc_2 = vmladavaq_s8(acc_2, ker_2, input); + + const int8x16_t ker_3 = vldrbq_z_s8(rhs_3_ptr, p); + acc_3 = vmladavaq_s8(acc_3, ker_3, input); + + lhs_vec += 16; + rhs_0_ptr += 16; + rhs_1_ptr += 16; + rhs_2_ptr += 16; + rhs_3_ptr += 16; + } + rhs += 4 * rhs_cols; + + int32x4_t acc = {acc_0, acc_1, acc_2, acc_3}; + + const int32x4_t rhs_sum = {kernel_sum[0], kernel_sum[1], kernel_sum[2], kernel_sum[3]}; + acc += vdupq_n_s32(lhs_offset) * rhs_sum; + kernel_sum += 4; + + acc += vdupq_n_s32(rhs_offset) * vdupq_n_s32(lhs_sum); + acc += vdupq_n_s32(rhs_offset * lhs_offset) * vdupq_n_s32(rhs_cols); + + acc = arm_requantize_mve(acc, *dst_multiplier++, *dst_shift++); + + acc = vaddq_s32(acc, vdupq_n_s32(dst_offset)); + acc = vmaxq_s32(acc, vdupq_n_s32(activation_min)); + acc = vminq_s32(acc, vdupq_n_s32(activation_max)); + + vstrbq_scatter_offset_s32(dst, address_offset_array, acc); + + dst += 4 * address_offset; + } + + const int loop_cnt = rhs_rows % 4; + for (int32_t i_row_loop_cnt = 0; i_row_loop_cnt < loop_cnt; i_row_loop_cnt++) + { + int32_t acc_0 = 0; + const int32_t col_loop_cnt = (rhs_cols + 15) / 16; + const int8_t *lhs_vec = lhs; + const int8_t *rhs_ptr = rhs; + int32_t lhs_sum = 0; + uint32_t col_cnt = (uint32_t)rhs_cols; + + for (int32_t i = 0; i < col_loop_cnt; i++) + { + mve_pred16_t p = vctp8q(col_cnt); + col_cnt -= 16; + const int8x16_t input = vldrbq_z_s8(lhs_vec, p); + lhs_sum = vaddvaq_s8(lhs_sum, input); + + const int8x16_t ker_0 = vldrbq_z_s8(rhs_ptr, p); + acc_0 = vmladavaq_s8(acc_0, ker_0, input); + + lhs_vec += 16; + rhs_ptr += 16; + } + rhs += rhs_cols; + + if (bias) + { + acc_0 += *bias; + bias++; + } + const int32_t rhs_sum = kernel_sum[i_row_loop_cnt]; + acc_0 += rhs_sum * lhs_offset; + acc_0 += lhs_sum * rhs_offset; + acc_0 += rhs_cols * lhs_offset * rhs_offset; + + acc_0 = arm_nn_requantize(acc_0, *dst_multiplier++, *dst_shift++); + acc_0 += dst_offset; + + // Clamp the result + acc_0 = MAX(acc_0, activation_min); + *dst = MIN(acc_0, activation_max); + dst += address_offset; + } + +#elif defined(ARM_MATH_DSP) + (void)kernel_sum; + + const int32_t row_loop_cnt = rhs_rows / 2; + const int16_t lhs_offset_s16 = (int16_t)lhs_offset; + const uint32_t lhs_offset_s16x2 = PKHBT(lhs_offset_s16, lhs_offset_s16, 16); + + const int16_t rhs_offset_s16 = (int16_t)rhs_offset; + const uint32_t rhs_offset_s16x2 = PKHBT(rhs_offset_s16, rhs_offset_s16, 16); + + for (int32_t i = 0; i < row_loop_cnt; i++) + { + int32_t acc_0 = 0; + int32_t acc_1 = 0; + if (bias) + { + acc_0 = *bias++; + acc_1 = *bias++; + } + + const int32_t col_loop_cnt = rhs_cols / 4; + + const int8_t *lhs_vec = lhs; + const int8_t *rhs_0_ptr = rhs; + const int8_t *rhs_1_ptr = rhs + rhs_cols; + rhs += 2 * rhs_cols; + + for (int32_t j = col_loop_cnt; j != 0; j--) + { + int32_t vec_0 = arm_nn_read_s8x4_ia(&lhs_vec); + int32_t vec_1 = SXTAB16_RORn(lhs_offset_s16x2, (uint32_t)vec_0, 8); + + vec_0 = SXTAB16(lhs_offset_s16x2, vec_0); + + int32_t ker_0 = arm_nn_read_s8x4_ia(&rhs_0_ptr); + int32_t ker_1 = SXTAB16_RORn(rhs_offset_s16x2, (uint32_t)ker_0, 8); + ker_0 = SXTAB16(rhs_offset_s16x2, ker_0); + + acc_0 = SMLAD(ker_1, vec_1, acc_0); + acc_0 = SMLAD(ker_0, vec_0, acc_0); + + ker_0 = arm_nn_read_s8x4_ia(&rhs_1_ptr); + ker_1 = SXTAB16_RORn(rhs_offset_s16x2, (uint32_t)ker_0, 8); + ker_0 = SXTAB16(rhs_offset_s16x2, ker_0); + + acc_1 = SMLAD(ker_1, vec_1, acc_1); + acc_1 = SMLAD(ker_0, vec_0, acc_1); + } + + for (int32_t k = col_loop_cnt * 4; k < rhs_cols; k++) + { + const int32_t lhs_temp = (*lhs_vec + lhs_offset); + lhs_vec++; + acc_0 += lhs_temp * (*rhs_0_ptr + rhs_offset); + rhs_0_ptr++; + acc_1 += lhs_temp * (*rhs_1_ptr + rhs_offset); + rhs_1_ptr++; + } + + acc_0 = arm_nn_requantize(acc_0, *dst_multiplier++, *dst_shift++); + acc_1 = arm_nn_requantize(acc_1, *dst_multiplier++, *dst_shift++); + + // Add offset + acc_0 += dst_offset; + acc_1 += dst_offset; + // Clamp the result + acc_0 = MAX(acc_0, activation_min); + acc_0 = MIN(acc_0, activation_max); + acc_1 = MAX(acc_1, activation_min); + acc_1 = MIN(acc_1, activation_max); + *dst = (int8_t)acc_0; + *(dst + address_offset) = (int8_t)acc_1; + dst += 2 * address_offset; + } + + if (rhs_rows & 0x1) + { + int32_t acc_0 = 0; + if (bias) + { + acc_0 = *bias++; + } + const int32_t col_loop_cnt = rhs_cols / 4; + + const int8_t *lhs_vec = lhs; + const int8_t *rhs_ptr = rhs; + + for (int32_t i = col_loop_cnt; i != 0; i--) + { + int32_t vec_0 = arm_nn_read_s8x4_ia(&lhs_vec); + int32_t vec_1 = SXTAB16_RORn(lhs_offset_s16x2, (uint32_t)vec_0, 8); + vec_0 = SXTAB16(lhs_offset_s16x2, vec_0); + + int32_t ker_0 = arm_nn_read_s8x4_ia(&rhs_ptr); + int32_t ker_1 = SXTAB16_RORn(rhs_offset_s16x2, (uint32_t)ker_0, 8); + ker_0 = SXTAB16(rhs_offset_s16x2, ker_0); + + acc_0 = SMLAD(ker_1, vec_1, acc_0); + acc_0 = SMLAD(ker_0, vec_0, acc_0); + } + + for (int32_t j = col_loop_cnt * 4; j < rhs_cols; j++) + { + const int32_t lhs_temp = (*lhs_vec + lhs_offset); + lhs_vec++; + acc_0 += lhs_temp * (*rhs_ptr + rhs_offset); + rhs_ptr++; + } + + acc_0 = arm_nn_requantize(acc_0, *dst_multiplier++, *dst_shift++); + + // Add offset + acc_0 += dst_offset; + + // Clamp the result + acc_0 = MAX(acc_0, activation_min); + acc_0 = MIN(acc_0, activation_max); + *dst = (int8_t)acc_0; + dst += address_offset; + } + +#else + (void)kernel_sum; + + const int32_t row_loop_cnt = rhs_rows / 3; + + for (int32_t i_row_loop_cnt = 0; i_row_loop_cnt < row_loop_cnt; i_row_loop_cnt++) + { + const int8_t *lhs_ptr = lhs; + const int8_t *rhs_ptr_0 = &rhs[0]; + const int8_t *rhs_ptr_1 = &rhs[rhs_cols]; + const int8_t *rhs_ptr_2 = &rhs[rhs_cols * 2]; + + int32_t res00 = 0; + int32_t res01 = 0; + int32_t res02 = 0; + if (bias) + { + res00 = *bias++; + res01 = *bias++; + res02 = *bias++; + } + for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) + { + const int32_t rhs_value0 = (int8_t)*rhs_ptr_0 + rhs_offset; + const int32_t rhs_value1 = (int8_t)*rhs_ptr_1 + rhs_offset; + const int32_t rhs_value2 = (int8_t)*rhs_ptr_2 + rhs_offset; + const int32_t lhs_value = (int8_t)*lhs_ptr + lhs_offset; + + res00 += lhs_value * rhs_value0; + res01 += lhs_value * rhs_value1; + res02 += lhs_value * rhs_value2; + + ++rhs_ptr_0; + ++rhs_ptr_1; + ++rhs_ptr_2; + ++lhs_ptr; + } + + // Quantize down + res00 = arm_nn_requantize(res00, *dst_multiplier++, *dst_shift++); + res01 = arm_nn_requantize(res01, *dst_multiplier++, *dst_shift++); + res02 = arm_nn_requantize(res02, *dst_multiplier++, *dst_shift++); + + // Add offset + res00 += dst_offset; + res01 += dst_offset; + res02 += dst_offset; + + // Clamp the result + res00 = MAX(res00, activation_min); + res00 = MIN(res00, activation_max); + res01 = MAX(res01, activation_min); + res01 = MIN(res01, activation_max); + res02 = MAX(res02, activation_min); + res02 = MIN(res02, activation_max); + + *dst = (int8_t)res00; + *(dst + address_offset) = (int8_t)res01; + *(dst + 2 * address_offset) = (int8_t)res02; + dst += 3 * address_offset; + + rhs += 3 * rhs_cols; + } + + const int loop_cnt = rhs_rows % 3; + + for (int32_t i_loop_cnt = 0; i_loop_cnt < loop_cnt; i_loop_cnt++) + { + const int8_t *lhs_ptr = &lhs[0]; + const int8_t *rhs_ptr = &rhs[0]; + + int32_t res00 = 0; + if (bias) + { + res00 = *bias++; + } + + for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) + { + int32_t rhs_value0 = (int8_t)rhs_ptr[0] + rhs_offset; + int32_t lhs_value = (int8_t)lhs_ptr[0] + lhs_offset; + + res00 += lhs_value * rhs_value0; + + ++rhs_ptr; + ++lhs_ptr; + } + + // Quantize down + res00 = arm_nn_requantize(res00, *dst_multiplier++, *dst_shift++); + + // Add offset + res00 += dst_offset; + + // Clamp the result + res00 = MAX(res00, activation_min); + res00 = MIN(res00, activation_max); + + *dst = (int8_t)res00; + dst += address_offset; + rhs += rhs_cols; + } +#endif + } + + else + { +#if defined(ARM_MATH_MVEI) + const int32_t row_loop_cnt = rhs_rows / 4; + const uint32x4_t address_offset_array = {0, address_offset, address_offset * 2, address_offset * 3}; + + for (int32_t i_row_loop_cnt = 0; i_row_loop_cnt < row_loop_cnt; i_row_loop_cnt++) + { + int32_t acc_0 = 0; + int32_t acc_1 = 0; + int32_t acc_2 = 0; + int32_t acc_3 = 0; + + const int32_t col_loop_cnt = (rhs_cols + 15) / 16; + + const int8_t *lhs_vec = lhs; + const int8_t *rhs_0_ptr = rhs; + const int8_t *rhs_1_ptr = rhs + rhs_cols; + const int8_t *rhs_2_ptr = rhs + 2 * rhs_cols; + const int8_t *rhs_3_ptr = rhs + 3 * rhs_cols; + + if (bias) + { + acc_0 = *bias++; + acc_1 = *bias++; + acc_2 = *bias++; + acc_3 = *bias++; + } + + uint32_t col_cnt = (uint32_t)rhs_cols; + + for (int32_t i = 0; i < col_loop_cnt; i++) + { + mve_pred16_t p = vctp8q(col_cnt); + col_cnt -= 16; + + const int8x16_t input = vldrbq_z_s8(lhs_vec, p); + + const int8x16_t ker_0 = vldrbq_z_s8(rhs_0_ptr, p); + acc_0 = vmladavaq_s8(acc_0, ker_0, input); + + const int8x16_t ker_1 = vldrbq_z_s8(rhs_1_ptr, p); + acc_1 = vmladavaq_s8(acc_1, ker_1, input); + + const int8x16_t ker_2 = vldrbq_z_s8(rhs_2_ptr, p); + acc_2 = vmladavaq_s8(acc_2, ker_2, input); + + const int8x16_t ker_3 = vldrbq_z_s8(rhs_3_ptr, p); + acc_3 = vmladavaq_s8(acc_3, ker_3, input); + + lhs_vec += 16; + rhs_0_ptr += 16; + rhs_1_ptr += 16; + rhs_2_ptr += 16; + rhs_3_ptr += 16; + } + rhs += 4 * rhs_cols; + + int32x4_t acc = {acc_0, acc_1, acc_2, acc_3}; + + const int32x4_t rhs_sum = {kernel_sum[0], kernel_sum[1], kernel_sum[2], kernel_sum[3]}; + acc += vdupq_n_s32(lhs_offset) * rhs_sum; + kernel_sum += 4; + + acc = arm_requantize_mve_32x4(acc, vldrwq_s32(dst_multiplier), vldrwq_s32(dst_shift)); + dst_multiplier += 4; + dst_shift += 4; + + acc = vaddq_s32(acc, vdupq_n_s32(dst_offset)); + acc = vmaxq_s32(acc, vdupq_n_s32(activation_min)); + acc = vminq_s32(acc, vdupq_n_s32(activation_max)); + + vstrbq_scatter_offset_s32(dst, address_offset_array, acc); + + dst += 4 * address_offset; + } + + const int loop_cnt = rhs_rows % 4; + for (int32_t i_row_loop_cnt = 0; i_row_loop_cnt < loop_cnt; i_row_loop_cnt++) + { + int32_t acc_0 = 0; + const int32_t col_loop_cnt = (rhs_cols + 15) / 16; + const int8_t *lhs_vec = lhs; + const int8_t *rhs_ptr = rhs; + uint32_t col_cnt = (uint32_t)rhs_cols; + + for (int32_t i = 0; i < col_loop_cnt; i++) + { + mve_pred16_t p = vctp8q(col_cnt); + col_cnt -= 16; + const int8x16_t input = vldrbq_z_s8(lhs_vec, p); + + const int8x16_t ker_0 = vldrbq_z_s8(rhs_ptr, p); + acc_0 = vmladavaq_s8(acc_0, ker_0, input); + + lhs_vec += 16; + rhs_ptr += 16; + } + rhs += rhs_cols; + + if (bias) + { + acc_0 += *bias; + bias++; + } + const int32_t rhs_sum = kernel_sum[i_row_loop_cnt]; + const int32_t offsets = rhs_sum * lhs_offset; + acc_0 += offsets; + acc_0 = arm_nn_requantize(acc_0, *dst_multiplier++, *dst_shift++); + + acc_0 += dst_offset; + + // Clamp the result + acc_0 = MAX(acc_0, activation_min); + *dst = MIN(acc_0, activation_max); + dst += address_offset; + } + +#elif defined(ARM_MATH_DSP) + (void)kernel_sum; + + const int32_t row_loop_cnt = rhs_rows / 2; + const int16_t lhs_offset_s16 = (int16_t)lhs_offset; + const uint32_t lhs_offset_s16x2 = PKHBT(lhs_offset_s16, lhs_offset_s16, 16); + + for (int32_t i = 0; i < row_loop_cnt; i++) + { + int32_t acc_0 = 0; + int32_t acc_1 = 0; + if (bias) + { + acc_0 = *bias++; + acc_1 = *bias++; + } + + const int32_t col_loop_cnt = rhs_cols / 4; + + const int8_t *lhs_vec = lhs; + const int8_t *rhs_0_ptr = rhs; + const int8_t *rhs_1_ptr = rhs + rhs_cols; + rhs += 2 * rhs_cols; + + for (int32_t j = col_loop_cnt; j != 0; j--) + { + int32_t vec_0 = arm_nn_read_s8x4_ia(&lhs_vec); + int32_t vec_1 = SXTAB16_RORn(lhs_offset_s16x2, (uint32_t)vec_0, 8); + + vec_0 = SXTAB16(lhs_offset_s16x2, vec_0); + + int32_t ker_0 = arm_nn_read_s8x4_ia(&rhs_0_ptr); + int32_t ker_1 = SXTB16_RORn((uint32_t)ker_0, 8); + ker_0 = SXTB16(ker_0); + + acc_0 = SMLAD(ker_1, vec_1, acc_0); + acc_0 = SMLAD(ker_0, vec_0, acc_0); + + ker_0 = arm_nn_read_s8x4_ia(&rhs_1_ptr); + ker_1 = SXTB16_RORn((uint32_t)ker_0, 8); + ker_0 = SXTB16(ker_0); + + acc_1 = SMLAD(ker_1, vec_1, acc_1); + acc_1 = SMLAD(ker_0, vec_0, acc_1); + } + + for (int32_t k = col_loop_cnt * 4; k < rhs_cols; k++) + { + const int32_t lhs_temp = (*lhs_vec + lhs_offset); + lhs_vec++; + acc_0 += lhs_temp * (*rhs_0_ptr); + rhs_0_ptr++; + acc_1 += lhs_temp * (*rhs_1_ptr); + rhs_1_ptr++; + } + + acc_0 = arm_nn_requantize(acc_0, *dst_multiplier++, *dst_shift++); + acc_1 = arm_nn_requantize(acc_1, *dst_multiplier++, *dst_shift++); + + // Add offset + acc_0 += dst_offset; + acc_1 += dst_offset; + + // Clamp the result + acc_0 = MAX(acc_0, activation_min); + acc_0 = MIN(acc_0, activation_max); + acc_1 = MAX(acc_1, activation_min); + acc_1 = MIN(acc_1, activation_max); + *dst = (int8_t)acc_0; + *(dst + address_offset) = (int8_t)acc_1; + dst += 2 * address_offset; + } + + if (rhs_rows & 0x1) + { + int32_t acc_0 = 0; + if (bias) + { + acc_0 = *bias++; + } + const int32_t col_loop_cnt = rhs_cols / 4; + + const int8_t *lhs_vec = lhs; + const int8_t *rhs_ptr = rhs; + + for (int32_t i = col_loop_cnt; i != 0; i--) + { + int32_t vec_0 = arm_nn_read_s8x4_ia(&lhs_vec); + int32_t vec_1 = SXTAB16_RORn(lhs_offset_s16x2, (uint32_t)vec_0, 8); + vec_0 = SXTAB16(lhs_offset_s16x2, vec_0); + + int32_t ker_0 = arm_nn_read_s8x4_ia(&rhs_ptr); + int32_t ker_1 = SXTB16_RORn((uint32_t)ker_0, 8); + ker_0 = SXTB16(ker_0); + + acc_0 = SMLAD(ker_1, vec_1, acc_0); + acc_0 = SMLAD(ker_0, vec_0, acc_0); + } + + for (int32_t j = col_loop_cnt * 4; j < rhs_cols; j++) + { + const int32_t lhs_temp = (*lhs_vec + lhs_offset); + lhs_vec++; + acc_0 += lhs_temp * (*rhs_ptr); + rhs_ptr++; + } + + acc_0 = arm_nn_requantize(acc_0, *dst_multiplier++, *dst_shift++); + + // Add offset + acc_0 += dst_offset; + + // Clamp the result + acc_0 = MAX(acc_0, activation_min); + acc_0 = MIN(acc_0, activation_max); + *dst = (int8_t)acc_0; + dst += address_offset; + } + +#else + (void)kernel_sum; + + const int32_t row_loop_cnt = rhs_rows / 3; + + for (int32_t i_row_loop_cnt = 0; i_row_loop_cnt < row_loop_cnt; i_row_loop_cnt++) + { + const int8_t *lhs_ptr = lhs; + const int8_t *rhs_ptr_0 = &rhs[0]; + const int8_t *rhs_ptr_1 = &rhs[rhs_cols]; + const int8_t *rhs_ptr_2 = &rhs[rhs_cols * 2]; + + int32_t res00 = 0; + int32_t res01 = 0; + int32_t res02 = 0; + if (bias) + { + res00 = *bias++; + res01 = *bias++; + res02 = *bias++; + } + for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) + { + const int32_t rhs_value0 = (int8_t)*rhs_ptr_0; + const int32_t rhs_value1 = (int8_t)*rhs_ptr_1; + const int32_t rhs_value2 = (int8_t)*rhs_ptr_2; + const int32_t lhs_value = (int8_t)*lhs_ptr + lhs_offset; + + res00 += lhs_value * rhs_value0; + res01 += lhs_value * rhs_value1; + res02 += lhs_value * rhs_value2; + + ++rhs_ptr_0; + ++rhs_ptr_1; + ++rhs_ptr_2; + ++lhs_ptr; + } + // Quantize down + res00 = arm_nn_requantize(res00, *dst_multiplier++, *dst_shift++); + res01 = arm_nn_requantize(res01, *dst_multiplier++, *dst_shift++); + res02 = arm_nn_requantize(res02, *dst_multiplier++, *dst_shift++); + + // Add offset + res00 += dst_offset; + res01 += dst_offset; + res02 += dst_offset; + + // Clamp the result + res00 = MAX(res00, activation_min); + res00 = MIN(res00, activation_max); + res01 = MAX(res01, activation_min); + res01 = MIN(res01, activation_max); + res02 = MAX(res02, activation_min); + res02 = MIN(res02, activation_max); + + *dst = (int8_t)res00; + *(dst + address_offset) = (int8_t)res01; + *(dst + 2 * address_offset) = (int8_t)res02; + dst += 3 * address_offset; + + rhs += 3 * rhs_cols; + } + + const int loop_cnt = rhs_rows % 3; + + for (int32_t i_loop_cnt = 0; i_loop_cnt < loop_cnt; i_loop_cnt++) + { + const int8_t *lhs_ptr = &lhs[0]; + const int8_t *rhs_ptr = &rhs[0]; + + int32_t res00 = 0; + if (bias) + { + res00 = *bias++; + } + + for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) + { + int32_t rhs_value0 = (int8_t)rhs_ptr[0]; + int32_t lhs_value = (int8_t)lhs_ptr[0] + lhs_offset; + + res00 += lhs_value * rhs_value0; + + ++rhs_ptr; + ++lhs_ptr; + } + + // Quantize down + res00 = arm_nn_requantize(res00, *dst_multiplier++, *dst_shift++); + + // Add offset + res00 += dst_offset; + + // Clamp the result + res00 = MAX(res00, activation_min); + res00 = MIN(res00, activation_max); + + *dst = (int8_t)res00; + dst += address_offset; + rhs += rhs_cols; + } +#endif + } + return ARM_CMSIS_NN_SUCCESS; +} + +/** + * @} end of Doxygen group + */ diff --git a/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c b/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c index 9575cd16..e639a47b 100644 --- a/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c +++ b/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c @@ -21,8 +21,8 @@ * Title: arm_nn_vec_mat_mult_t_s8 * Description: s8 vector by matrix (transposed) multiplication * - * $Date: 14 Feb 2023 - * $Revision: V.6.0.0 + * $Date: 12 Jul 2024 + * $Revision: V.6.1.0 * * Target : Arm(R) M-Profile Architecture * @@ -52,7 +52,7 @@ * Refer header file for details. * */ -#if defined(ARM_MATH_DSP) && !defined(__ARMCC_VERSION) && !defined(__ICCARM__) +#if !defined(ARM_MATH_MVEI) && defined(ARM_MATH_DSP) && !defined(__ARMCC_VERSION) && !defined(__ICCARM__) #pragma GCC optimize("unroll-loops") #endif arm_cmsis_nn_status arm_nn_vec_mat_mult_t_s8(const int8_t *lhs, @@ -422,7 +422,6 @@ arm_cmsis_nn_status arm_nn_vec_mat_mult_t_s8(const int8_t *lhs, else { - #if defined(ARM_MATH_MVEI) const int32_t row_loop_cnt = rhs_rows / 4; const uint32x4_t address_offset_array = {0, address_offset, address_offset * 2, address_offset * 3}; diff --git a/Tests/UnitTest/README.md b/Tests/UnitTest/README.md index c85ff129..c54a09f9 100644 --- a/Tests/UnitTest/README.md +++ b/Tests/UnitTest/README.md @@ -167,7 +167,7 @@ Current progress: | --- | --- | --- | --- | convolution | x | x | New version only supports 16x8 and int4 packed weights | depthwise conv | x | | -| fully_connected | x | x | New version only supports int4 packed weights +| fully_connected | x | x | New version supports int4 packed weights. Only new version supports per channels quantization for int8. | lstm | x | x | Only new version supporting 16x8 | svdf | x | | | softmax | x | | diff --git a/Tests/UnitTest/RefactoredTestGen/Lib/op_conv.py b/Tests/UnitTest/RefactoredTestGen/Lib/op_conv.py index 3db679ce..381d0e6d 100644 --- a/Tests/UnitTest/RefactoredTestGen/Lib/op_conv.py +++ b/Tests/UnitTest/RefactoredTestGen/Lib/op_conv.py @@ -19,10 +19,13 @@ from tensorflow.lite.python.interpreter import Interpreter from tensorflow.lite.python.interpreter import OpResolverType -import tf_keras as keras +import keras import numpy as np -import tflite_micro +try: + import tflite_micro +except ModuleNotFoundError: + pass def generate_data(tflite_fname, params): @@ -119,28 +122,6 @@ def calculate_padding(x_output, y_output, params): pad_y_with_offset, pad_x_with_offset, pad_y, pad_x = calculate_padding(x_output, y_output, params) - def generate_quantize_per_channel_multiplier(params, scales): - def quantize_scale(scale): - significand, shift = math.frexp(scale) - significand_q31 = round(significand * (1 << 31)) - return significand_q31, shift - - num_channels = params["out_ch"] - per_channel_multiplier = [] - per_channel_shift = [] - - if len(scales["scaling_factors"]) != num_channels: - raise RuntimeError("Missing scaling factors") - - for i in range(num_channels): - effective_output_scale = scales["input_scale"] * scales["scaling_factors"][i] / scales["output_scale"] - (quantized_multiplier, shift) = quantize_scale(effective_output_scale) - - per_channel_multiplier.append(quantized_multiplier) - per_channel_shift.append(shift) - - return per_channel_multiplier, per_channel_shift - generated_params["input_batches"] = params["batch_size"] generated_params["pad_x"] = pad_x generated_params["pad_y"] = pad_y @@ -150,7 +131,7 @@ def quantize_scale(scale): generated_params["input_offset"] = -scales["input_zero_point"] generated_params["output_offset"] = scales["output_zero_point"] - per_channel_multiplier, per_channel_shift = generate_quantize_per_channel_multiplier(params, scales) + per_channel_multiplier, per_channel_shift = Lib.op_utils.generate_quantize_per_channel_multiplier(params, scales) tensors["output_multiplier"] = np.array(per_channel_multiplier) tensors["output_shift"] = np.array(per_channel_shift) diff --git a/Tests/UnitTest/RefactoredTestGen/Lib/op_fully_connected.py b/Tests/UnitTest/RefactoredTestGen/Lib/op_fully_connected.py index aa91ab9f..492474de 100644 --- a/Tests/UnitTest/RefactoredTestGen/Lib/op_fully_connected.py +++ b/Tests/UnitTest/RefactoredTestGen/Lib/op_fully_connected.py @@ -14,10 +14,29 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import Lib.op_utils + import math +import Lib.op_utils + +from tensorflow.lite.python.interpreter import Interpreter +from tensorflow.lite.python.interpreter import OpResolverType + import numpy as np +import keras + + +def quantize_multiplier(input_scale, weights_scale, output_scale): + def quantize_scale(scale): + significand, shift = math.frexp(scale) + significand_q31 = round(significand * (1 << 31)) + return significand_q31, shift + + input_product_scale = input_scale * weights_scale + if input_product_scale < 0: + raise RuntimeError("negative input product scale") + real_multipler = input_product_scale / output_scale + return quantize_scale(real_multipler) class Op_fully_connected(Lib.op_utils.Op_type): @@ -59,14 +78,20 @@ def get_shapes(params): out_ch = params["out_ch"] shapes["input_tensor"] = (params["batch_size"], in_ch) - shapes["weight_shape"] = (in_ch, 1, 1, out_ch) + shapes["weight_shape"] = (in_ch, out_ch) if params["generate_bias"]: shapes["bias_shape"] = [out_ch] - params["json_template"] = "fully_connected.json" else: shapes["bias_shape"] = [] - params["json_template"] = "fully_connected_null_bias.json" + + if params["tflite_generator"] == "json": + if params["generate_bias"]: + params["json_template"] = "fully_connected.json" + else: + params["json_template"] = "fully_connected_null_bias.json" + else: + shapes["representational_dataset"] = (params["batch_size"], 1, 1, in_ch) return shapes @@ -92,7 +117,7 @@ def generate_data_json(shapes, params): weights = np.random.randint( params["weights_min"], params["weights_max"], size=shapes["weight_shape"]) -# Lib.op_utils.get_dtype_min("int4_t"), Lib.op_utils.get_dtype_max("int4_t"), size=shapes["weight_shape"]) + uneven = weights.size % 2 if uneven: weights = np.append(weights, 0) @@ -109,19 +134,90 @@ def generate_data_json(shapes, params): else: tensors["input_bias"] = None - def quantize_multiplier(input_scale, weights_scale, output_scale): - def quantize_scale(scale): - significand, shift = math.frexp(scale) - significand_q31 = round(significand * (1 << 31)) - return significand_q31, shift - - input_product_scale = input_scale * weights_scale - if input_product_scale < 0: - raise RuntimeError("negative input product scale") - real_multipler = input_product_scale / output_scale - return quantize_scale(real_multipler) - generated_params["output_multiplier"], generated_params["output_shift"] = quantize_multiplier( params["input_scale"], params["w_scale"], params["output_scale"]) return Lib.op_utils.Generated_data(generated_params, tensors, scales, effective_scales, aliases) + + def generate_data_tflite(tflite_fname, params): + tensors = {} + effective_scales = {} + scales = {} + generated_params = {} + aliases = {} + + # To be removed + aliases["output_multiplier"] = "output_mult" + aliases["bias"] = "biases" + aliases["output"] = "output_ref" + + interpreter = Interpreter(str(tflite_fname), experimental_op_resolver_type=OpResolverType.BUILTIN_REF) + interpreter.allocate_tensors() + tensor_details = interpreter.get_tensor_details() + + bias_index = 1 + if params["generate_bias"]: + filter_index = 2 + else: + filter_index = 1 + + filter_layer = tensor_details[filter_index] + scales["scaling_factors"] = filter_layer['quantization_parameters']['scales'] + + if params["generate_bias"]: + bias_layer = tensor_details[bias_index] + else: + bias_layer = None + + input_details = interpreter.get_input_details() + (scales["input_scale"], scales["input_zero_point"]) = input_details[0]['quantization'] + + output_details = interpreter.get_output_details() + (scales["output_scale"], scales["output_zero_point"]) = output_details[0]['quantization'] + + tensors["weights"] = interpreter.get_tensor(filter_layer['index']) + if params["generate_bias"]: + tensors["bias"] = interpreter.get_tensor(bias_layer['index']) + else: + tensors["bias"] = None + + generated_params["input_batches"] = params["batch_size"] + generated_params["input_w"] = 1 + generated_params["input_h"] = 1 + generated_params["dst_size"] = params["out_ch"] * params["batch_size"] + generated_params["accumulation_depth"] = params["in_ch"] + generated_params["input_offset"] = -scales["input_zero_point"] + generated_params["output_offset"] = scales["output_zero_point"] + + if params["per_channel_quant"]: + per_channel_multiplier, per_channel_shift = Lib.op_utils.generate_quantize_per_channel_multiplier( + params, scales) + tensors["output_multiplier"] = np.array(per_channel_multiplier) + tensors["output_shift"] = np.array(per_channel_shift) + else: + weights_scale = filter_layer['quantization_parameters']['scales'][0] + generated_params["output_multiplier"], generated_params["output_shift"] = quantize_multiplier( + scales["input_scale"], weights_scale, scales["output_scale"]) + + return Lib.op_utils.Generated_data(generated_params, tensors, scales, effective_scales, aliases) + + def generate_keras_model(shapes, params): + + model = keras.models.Sequential() + model.add( + keras.layers.InputLayer(input_shape=(params["in_ch"], ), batch_size=params["batch_size"])) + + fully_connected_layer = keras.layers.Dense(params["out_ch"], activation=None, use_bias=params["generate_bias"]) + model.add(fully_connected_layer) + + weights = Lib.op_utils.generate_tf_tensor( + shapes["weight_shape"], params["weights_min"], params["weights_max"], decimals=8) + + if params["generate_bias"]: + bias = Lib.op_utils.generate_tf_tensor( + shapes["bias_shape"], params["bias_min"], params["bias_max"]) + fully_connected_layer.set_weights([weights, bias]) + else: + fully_connected_layer.set_weights([weights]) + + return model diff --git a/Tests/UnitTest/RefactoredTestGen/Lib/op_utils.py b/Tests/UnitTest/RefactoredTestGen/Lib/op_utils.py index d4c71c91..4e49b88e 100644 --- a/Tests/UnitTest/RefactoredTestGen/Lib/op_utils.py +++ b/Tests/UnitTest/RefactoredTestGen/Lib/op_utils.py @@ -14,6 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # + +import math import numpy as np import tensorflow as tf @@ -152,3 +154,25 @@ def get_dtype_min(dtype): return -9223372036854775808 else: raise Exception(f"Unrecognized dtype '{dtype}'") + +def generate_quantize_per_channel_multiplier(params, scales): + def quantize_scale(scale): + significand, shift = math.frexp(scale) + significand_q31 = round(significand * (1 << 31)) + return significand_q31, shift + + num_channels = params["out_ch"] + per_channel_multiplier = [] + per_channel_shift = [] + + if len(scales["scaling_factors"]) != num_channels: + raise RuntimeError("Missing scaling factors") + + for i in range(num_channels): + effective_output_scale = scales["input_scale"] * scales["scaling_factors"][i] / scales["output_scale"] + (quantized_multiplier, shift) = quantize_scale(effective_output_scale) + + per_channel_multiplier.append(quantized_multiplier) + per_channel_shift.append(shift) + + return per_channel_multiplier, per_channel_shift diff --git a/Tests/UnitTest/RefactoredTestGen/Lib/test.py b/Tests/UnitTest/RefactoredTestGen/Lib/test.py index cd6cdab2..d2d55150 100644 --- a/Tests/UnitTest/RefactoredTestGen/Lib/test.py +++ b/Tests/UnitTest/RefactoredTestGen/Lib/test.py @@ -27,7 +27,7 @@ import subprocess import sys import math -import tf_keras as keras +import keras # Optional runtime interpreters try: @@ -70,12 +70,21 @@ def generate(params, args, fpaths): # Generate reference data if params["tflite_generator"] == "keras": keras_model = op_type.generate_keras_model(shapes, params) + + per_tensor_quant_for_dense = False + try: + per_tensor_quant_for_dense = not params["per_channel_quant"] + except KeyError: + pass + convert_keras_to_tflite(fpaths["tflite"], keras_model, quantize=True, dtype=params["input_data_type"], bias_dtype=params["bias_data_type"], - shape=shapes) + shape=shapes, + per_tensor_quant_for_dense=per_tensor_quant_for_dense) + data = op_type.generate_data_tflite(fpaths["tflite"], params) elif params["tflite_generator"] == "json": @@ -167,10 +176,10 @@ def get_op_type(op_type_string): raise ValueError(f"Unknown op type '{op_type_string}'") -def convert_keras_to_tflite(output_fpath, keras_model, quantize, dtype, bias_dtype, shape): +def convert_keras_to_tflite( + output_fpath, keras_model, quantize, dtype, bias_dtype, shape, per_tensor_quant_for_dense=False): """ Convert a model generated with keras to tflite-format """ keras_model.compile(loss=keras.losses.categorical_crossentropy, - optimizer=keras.optimizers.Adam(), metrics=['accuracy']) n_inputs = len(keras_model.inputs) converter = tf.lite.TFLiteConverter.from_keras_model(keras_model) @@ -192,6 +201,7 @@ def representative_dataset(): converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.inference_input_type = Lib.op_utils.get_tf_dtype(dtype) converter.inference_output_type = Lib.op_utils.get_tf_dtype(dtype) + converter._experimental_disable_per_channel_quantization_for_dense_layers = per_tensor_quant_for_dense if dtype == "int8_t": converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] diff --git a/Tests/UnitTest/RefactoredTestGen/test_plan.json b/Tests/UnitTest/RefactoredTestGen/test_plan.json index f3a1be29..75157ec8 100644 --- a/Tests/UnitTest/RefactoredTestGen/test_plan.json +++ b/Tests/UnitTest/RefactoredTestGen/test_plan.json @@ -803,5 +803,24 @@ } ] +}, +{ + "suite_name" : "test_arm_fully_connected_s8", + "op_type" : "fully_connected", + "input_data_type": "int8_t", + "weights_data_type": "int8_t", + "bias_data_type": "int32_t", + "interpreter": "tensorflow", + "tflite_generator": "keras", + "w_type": "INT8", + "shift_and_mult_data_type": "int32_t", + "tests" : [ + {"name" : "fc_per_ch", + "in_ch" : 89, + "out_ch" : 22, + "generate_bias": true, + "per_channel_quant": true + } + ] } ] diff --git a/Tests/UnitTest/TestCases/TestData/fc_per_ch/bias.h b/Tests/UnitTest/TestCases/TestData/fc_per_ch/bias.h new file mode 100644 index 00000000..1bddf361 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/fc_per_ch/bias.h @@ -0,0 +1,9 @@ +// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.4.1). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int32_t fc_per_ch_bias[22] = {-1720, 5761, -25657, -17236, 16662, -6141, -31875, 22601, -18163, -30729, -25864, + -30232, -9596, -15034, -23538, 8194, -32759, 27796, 13136, -16061, 590, 26513}; + +const int32_t *const fc_per_ch_biases = fc_per_ch_bias; diff --git a/Tests/UnitTest/TestCases/TestData/fc_per_ch/config_data.h b/Tests/UnitTest/TestCases/TestData/fc_per_ch/config_data.h new file mode 100644 index 00000000..42bb4bb8 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/fc_per_ch/config_data.h @@ -0,0 +1,17 @@ +// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.4.1). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#define FC_PER_CH_W_TYPE INT8 +#define FC_PER_CH_IN_CH 89 +#define FC_PER_CH_OUT_CH 22 +#define FC_PER_CH_PER_CHANNEL_QUANT true +#define FC_PER_CH_BATCH_SIZE 1 +#define FC_PER_CH_OUT_ACTIVATION_MIN -128 +#define FC_PER_CH_OUT_ACTIVATION_MAX 127 +#define FC_PER_CH_INPUT_BATCHES 1 +#define FC_PER_CH_INPUT_W 1 +#define FC_PER_CH_INPUT_H 1 +#define FC_PER_CH_DST_SIZE 22 +#define FC_PER_CH_ACCUMULATION_DEPTH 89 +#define FC_PER_CH_INPUT_OFFSET 128 +#define FC_PER_CH_OUTPUT_OFFSET 11 diff --git a/Tests/UnitTest/TestCases/TestData/fc_per_ch/input.h b/Tests/UnitTest/TestCases/TestData/fc_per_ch/input.h new file mode 100644 index 00000000..69156bbe --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/fc_per_ch/input.h @@ -0,0 +1,11 @@ +// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.4.1). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int8_t fc_per_ch_input[89] = {-100, 95, -71, -51, -51, 120, 23, -59, 48, -29, 28, -41, -72, 109, 29, + -75, 27, 77, 116, 0, 113, -125, 29, 104, -114, -87, 89, -112, 31, -93, + -50, 115, -20, 126, 75, -43, -44, 14, -54, -16, -110, -66, -62, -103, -98, + -56, 106, -78, -97, 23, 41, -119, -57, 85, 33, -60, 45, -52, -61, -24, + 18, -15, -52, 91, -55, -67, -4, 117, 22, -64, 16, -109, 31, 87, 85, + 19, 88, -77, 32, -1, 121, 2, 58, 73, 32, 46, -39, -117, 41}; diff --git a/Tests/UnitTest/TestCases/TestData/fc_per_ch/output.h b/Tests/UnitTest/TestCases/TestData/fc_per_ch/output.h new file mode 100644 index 00000000..a0a3d336 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/fc_per_ch/output.h @@ -0,0 +1,9 @@ +// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.4.1). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int8_t fc_per_ch_output[22] = {39, -61, 52, 27, 44, -86, -89, -104, -69, -100, -47, + -90, -13, -75, -31, -5, -48, 7, 59, 90, -63, -71}; + +const int8_t *const fc_per_ch_output_ref = fc_per_ch_output; diff --git a/Tests/UnitTest/TestCases/TestData/fc_per_ch/output_multiplier.h b/Tests/UnitTest/TestCases/TestData/fc_per_ch/output_multiplier.h new file mode 100644 index 00000000..55372187 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/fc_per_ch/output_multiplier.h @@ -0,0 +1,11 @@ +// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.4.1). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int32_t fc_per_ch_output_multiplier[22] = {1470742593, 1454348298, 1454354566, 1467664675, 1460704449, 1471555339, + 1460846871, 1438005713, 1465479244, 1462578222, 1471942734, 1471242986, + 1470425887, 1460755289, 1450718973, 1443519955, 1401276315, 1413333005, + 1460858884, 1457164877, 1453714972, 1465539835}; + +const int32_t *const fc_per_ch_output_mult = fc_per_ch_output_multiplier; diff --git a/Tests/UnitTest/TestCases/TestData/fc_per_ch/output_shift.h b/Tests/UnitTest/TestCases/TestData/fc_per_ch/output_shift.h new file mode 100644 index 00000000..2effda8c --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/fc_per_ch/output_shift.h @@ -0,0 +1,7 @@ +// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.4.1). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int32_t fc_per_ch_output_shift[22] = {-10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, + -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10}; diff --git a/Tests/UnitTest/TestCases/TestData/fc_per_ch/test_data.h b/Tests/UnitTest/TestCases/TestData/fc_per_ch/test_data.h new file mode 100644 index 00000000..33835748 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/fc_per_ch/test_data.h @@ -0,0 +1,7 @@ +#include "bias.h" +#include "config_data.h" +#include "input.h" +#include "output.h" +#include "output_multiplier.h" +#include "output_shift.h" +#include "weights.h" diff --git a/Tests/UnitTest/TestCases/TestData/fc_per_ch/weights.h b/Tests/UnitTest/TestCases/TestData/fc_per_ch/weights.h new file mode 100644 index 00000000..254ea2f4 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/fc_per_ch/weights.h @@ -0,0 +1,110 @@ +// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.4.1). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int8_t fc_per_ch_weights[1958] = { + 80, 14, -115, 84, 66, -79, -36, -97, 24, 13, 40, 76, -4, 116, 93, -109, -102, 73, 38, + -103, 64, 125, -40, 109, -121, 57, 46, -45, -116, -31, 10, -127, 34, 97, -11, -65, 102, -78, + 119, -14, -26, -55, 106, 20, 24, -58, 68, 70, 66, 58, -5, -68, 108, -68, 117, -111, -53, + 127, -41, 7, -5, 76, -11, -12, 71, 16, 25, -45, 27, 113, -121, -62, -62, 32, 61, -110, + -65, 60, 14, -29, -55, 83, -33, -2, -118, 83, -95, 13, 49, -44, -127, -28, -114, -86, -21, + -105, -23, -68, -35, 74, 90, -52, -109, -69, -77, -20, -30, -58, 110, -57, -3, 73, 3, -47, + 35, -22, -72, 70, -91, -71, 127, 92, 105, 56, 56, 41, 17, -119, -107, -57, -24, -54, -29, + 27, 68, -11, -24, -78, -87, 84, 115, 8, -36, -52, -78, 22, 13, -70, 31, 35, 76, 65, + 104, 11, -7, -110, -18, -123, -11, 5, 68, -15, 60, -82, -103, -59, -97, 48, -50, -88, 58, + -17, -82, 72, -86, 116, -62, 126, -36, 40, 64, 57, -47, -114, -27, -82, -54, 113, -94, 84, + -58, -1, 5, -70, -27, 119, -13, -90, 48, 61, 45, -7, -6, -46, 125, 90, 32, 52, -101, + -71, 70, -12, 47, 97, 127, -117, 98, 88, -32, -29, -84, 60, 54, 85, 126, 76, 16, 125, + -45, 58, -37, -3, 3, 8, -55, -66, -37, -23, -101, -1, -26, 26, -54, -7, 112, 80, 109, + -4, -102, 107, 2, 70, 89, 50, 34, 21, -12, 122, -10, 76, -90, -96, -66, -11, 65, 101, + -92, -78, -71, 39, 76, 113, 47, 67, -48, -124, 33, -84, 26, -6, 23, -39, -69, -108, 78, + 73, -41, 100, 93, -88, -34, 45, 25, -88, -84, -43, -125, -88, -83, 91, 110, -112, -10, -51, + 24, -80, -64, -53, 84, -80, 75, 85, -101, 36, -71, 33, 79, 125, -19, 91, 120, -31, 67, + 14, 12, 28, -20, -118, -91, 101, 124, 77, -91, 84, 19, 77, -119, -7, -127, 118, 80, -21, + 38, 50, 124, -125, -116, 28, -44, 42, -25, -98, 19, 55, 120, -55, 56, 60, -2, 37, 115, + 26, 45, -14, 13, -15, 28, -7, 31, 81, 24, 53, -23, -100, -62, -9, -67, 29, -9, -42, + 112, 19, 105, -54, -38, 105, 81, -55, 93, 93, -88, -33, -108, -99, -11, 11, 55, 54, -28, + 99, -80, 38, 104, 87, -105, -86, -72, 27, 45, -58, -115, -65, 59, -6, 18, -95, 90, 11, + -14, -68, 94, -31, -61, 27, 34, -2, -67, -126, 111, 121, -1, 55, -96, -64, 104, -115, -39, + 81, 30, -10, 125, 127, -87, -20, -85, 5, -30, -112, 86, -59, -62, -123, -8, -97, -85, 29, + 107, 31, -28, 25, -93, -5, 35, -21, 86, -35, 100, 10, -36, -23, 127, 79, -54, -122, -46, + -2, -101, 60, 30, -54, 64, -118, 60, -72, -88, -103, -41, -29, -70, -4, 55, 8, 96, -122, + 20, -29, -14, -58, -69, 50, -99, -82, 109, -12, 21, -15, 42, -16, -51, -63, 61, -81, -35, + -47, 121, 73, 63, 81, 45, 34, 94, -89, -33, -120, -81, 83, -67, -30, 45, -14, -71, 55, + 9, 13, -116, 66, -35, -84, -101, 0, 81, -96, 59, 27, 113, 28, 56, 86, 95, 64, -40, + 3, -66, 116, 90, 63, 11, -127, -92, -45, 74, -110, 14, 86, 86, -72, -104, 37, -127, 9, + 47, 3, 87, 37, -35, 23, -56, 59, 39, 29, -52, -79, 95, -65, -17, 103, 9, -107, 35, + -121, 3, 27, 99, -86, 87, -105, -74, -82, -54, -61, -20, -24, -59, 124, -91, -37, 26, -51, + 112, 94, -20, 83, -78, 1, -115, -105, -96, -36, -75, 38, -76, 5, -80, -110, -74, -51, -59, + 107, -75, -33, -122, -126, -34, 25, -49, 107, -92, -68, 121, 9, 90, -106, 4, -37, -8, -23, + 3, -49, 81, -62, 83, 12, -30, -54, -114, 23, 96, -111, -79, 53, 70, 7, 49, 30, 55, + 42, -49, -3, -125, -77, -116, -116, -4, -62, 87, -49, 74, 122, -30, -18, -42, -75, -118, 21, + 23, -3, 92, 32, -57, -81, -68, 107, 84, 127, -37, -98, -70, -100, 52, -10, 78, 106, -70, + -99, 105, -70, -120, 53, 22, -6, 40, 36, -123, 123, 65, 63, 4, -67, -45, 70, 127, 5, + 82, 42, -109, 125, -5, -12, 39, -117, -25, -23, -92, 45, 39, -105, -111, 29, -9, -66, 0, + -83, -5, 61, 116, 74, -96, -89, -58, -113, -120, 19, -47, 14, -41, 11, -62, -71, -71, 20, + 78, -126, 120, 108, -70, 102, -127, -4, 105, -121, 53, -106, -18, 5, -7, -94, -121, -65, -74, + 72, 30, 14, 8, 119, 12, -93, -3, -115, -67, -110, -41, -84, -18, -30, 82, 31, 61, -110, + -1, 83, 87, 83, -79, -116, 22, 115, -43, 52, 41, -72, -71, -94, -118, -83, -123, 57, 100, + 100, 55, 5, -78, 23, 89, 56, 0, 72, 71, -126, -89, 64, 20, 76, -15, 50, -26, -29, + 8, -96, -50, 80, -123, -21, -119, 35, -36, 11, -83, -60, -123, 115, 126, 43, 50, -16, -104, + -81, 34, -31, -33, 28, 93, 121, 68, 97, -37, -124, 127, 56, -109, -57, -114, 92, -4, 116, + -1, 89, 48, -23, -52, 60, -27, -66, -37, -110, -46, 14, 40, -31, -44, -106, 103, -114, 14, + -11, 17, 13, 73, 120, 8, 31, -10, -114, 126, -76, 44, -11, -62, 33, -89, -4, -36, -114, + 119, 12, 94, 29, -111, 57, 75, 38, -65, 89, -29, 61, -96, 25, -118, -106, -26, 16, -82, + -105, -79, -30, 47, -64, 127, 0, -101, 16, -42, 37, -30, -82, 16, -37, 95, -29, -85, 2, + -30, -14, -89, -29, -10, 94, 96, 106, 68, 86, -106, 92, -104, -92, 23, 71, 59, -116, 96, + -104, -96, 110, -112, 83, -94, 71, 36, -95, -60, 21, 109, -76, 102, 46, 88, -10, 54, 30, + -114, 23, -3, -24, -44, -36, 56, 107, -1, -92, -5, 77, -15, -86, -98, -94, 16, -54, 60, + -72, -111, 118, -111, 105, -30, 125, 10, -21, -54, 29, -49, 110, -102, 9, -16, 100, 19, -64, + 72, -105, -56, 109, 67, -64, 91, -100, -82, -87, -57, -58, -90, -54, -42, 126, -28, 92, -12, + 65, -20, 119, 114, 87, 85, 72, -69, -40, -62, 20, 98, -118, -45, 1, -46, -77, -62, -127, + -2, -48, 69, -65, -76, 25, -96, -120, -30, 62, 40, -120, 89, 69, 43, -24, -15, 67, 5, + -121, -23, 103, 118, -116, 107, 18, 127, -20, 56, -43, -122, 120, -125, 0, -57, 38, -11, -2, + -117, 8, -60, -72, 21, -8, 46, -119, 11, 24, -19, 1, 7, -86, 42, 49, -75, -126, -28, + -3, 8, 102, -104, -26, 38, -116, 9, -110, 79, 57, 126, 92, -86, -96, 51, 64, -7, -94, + 124, -79, 78, 85, -59, -77, -16, 70, -95, -49, 20, 13, -65, -95, 127, 13, 20, 124, 50, + 50, -60, -111, -3, -70, 113, -116, -124, -118, 28, -120, -93, 105, 34, 79, 98, 60, -33, 47, + -49, -118, -3, -63, -6, 68, -13, -14, 67, 79, -127, 39, -26, -21, 66, -108, 91, -52, 32, + 47, -68, -48, -127, 25, -60, -111, -68, 74, -57, -57, -68, 96, -113, -25, -25, 12, 86, 100, + 77, 11, 110, -78, -33, -25, 86, 76, 5, 39, -40, -28, -99, 106, -9, -90, 67, -103, 38, + 79, 49, -29, -75, -98, -89, -9, 102, 23, -115, -23, 121, 48, -41, -48, -80, -83, 109, -58, + -66, -107, -30, 1, 93, 56, -110, -70, -27, 98, -52, 19, 80, -115, 9, 96, 77, 25, -102, + -106, 118, -30, -15, -35, -82, 46, -23, -77, 109, -47, 125, 99, 80, -94, -34, 85, 102, -67, + 69, -26, 26, -74, -105, -72, 84, 26, 23, 65, -94, -80, 30, -7, -121, 119, 68, 42, -13, + 122, 113, -67, 42, -47, 97, -119, -37, -64, -5, -91, 38, -118, -127, -65, 110, 82, -113, -21, + 80, -74, -59, -48, -45, -92, -108, -77, -27, 102, -116, 14, -79, -83, 55, -116, 55, -36, 53, + -82, 70, 83, 24, -95, 122, 101, 47, -6, 39, 114, -118, -67, 87, -101, 108, -64, 37, -83, + 73, -86, 21, 98, 62, 31, 80, -109, 101, 21, 100, -105, -120, -71, 84, 86, 102, 21, 16, + -124, -127, -110, 97, 36, 0, 87, 68, -51, -16, 4, -33, 121, 54, 49, -99, 17, 63, -58, + 100, 10, -70, -104, -100, -11, 64, 127, 63, 63, 32, 37, 92, 70, -63, 92, -35, 106, -10, + 108, -24, -67, -123, -103, 18, 127, 16, 56, -89, -103, -9, -32, -16, -17, -107, -59, 103, -15, + -88, -14, -11, -38, 98, -22, 21, 107, 23, -105, 5, -126, -63, -109, -26, -44, -83, 118, 115, + 124, 107, 79, 38, 111, 77, 20, 112, -68, -33, 7, 12, -44, -48, -49, 55, 105, -21, -25, + 37, -7, 9, 25, -49, -65, 5, -101, 4, 105, 54, -80, 108, 34, -119, 48, 2, 4, 46, + 40, 65, 114, 3, 49, -16, -122, -80, -89, 92, -110, -48, 10, -61, -63, -56, 43, -121, 36, + 14, -29, -118, 51, -30, -40, 97, 93, 14, 59, 121, 112, 86, 71, 102, -114, -91, 120, 8, + 54, -2, 119, 7, 68, 71, 44, 48, -119, -63, -37, -27, -36, -98, -2, -46, 79, 4, -28, + -39, 31, 113, -127, -46, -27, -57, 16, 31, -118, 95, -120, -72, 12, 86, -54, 6, 63, 61, + 19, 125, -63, -34, 16, -72, -85, -80, -45, -105, -123, -61, 66, 23, 111, -104, -79, -88, 8, + 24, -61, 33, 7, -15, 123, -27, 60, 72, 91, -92, 70, -36, 120, 66, 93, -68, -29, -98, + -109, -14, -90, 19, -81, 99, 37, 96, 109, -67, -126, -61, -25, 87, -49, -110, 89, 99, 73, + -30, 38, -28, 91, 54, -125, 41, -126, -102, -58, 17, -127, -69, 79, 25, 54, -124, -27, 78, + -80, 116, 111, -67, -77, -40, -108, -32, -108, -43, -14, -91, -21, 6, 122, 122, 120, 87, -63, + 31, 92, 49, -125, 110, -124, 32, 52, -73, -89, -55, 75, -75, 54, -2, 21, 89, 77, 116, + 50, -39, 121, 95, 58, 9, 92, -106, 73, 115, -72, 46, -59, 107, -34, 18, 27, -46, -28, + 7, -52, -40, -87, 14, -48, -17, -5, 71, 13, 100, -67, 71, -18, 53, 79, -102, 81, 51, + 89, 9, -46, 7, -51, -33, 114, 15, 6, 115, -23, 2, 119, -100, 80, 113, 111, -43, 64, + -32, -12, -127, -91, -96, 51, 80, -46, 100, 36, -35, -6, -59, -50, 14, -74, -115, 118, 29, + -123, -35, -19, 48, 5, -84, 126, 114, -45, 73, -21, 76, -1, -112, -113, 18, 7, 79, -55, + -65, 72, -48, -64, -81, -18, 59, 12, -91, -103, -30, -71, -67, -127, -57, 69, -44, 84, 34, + -127, 70, -79, -127, -123, -90, -86, 53, 83, 57, 18, -99, 101, 36, 127, 96, 70, 56, -122, + -17, -43, 19, 27, 46, -43, -68, 54, -12, 31, 82, 87, -29, 127, -123, 124, -97, -42, 9, + -47, 118, -90, 46, 108, 102, 3, -123, 100, 71, 1, -90, -66, -114, 19, -3, 68, 61, -22, + 54, -22, 63, -76, -108, 63, -26, -86, -105, 115, 110, 27, -51, 12, 112, 90, 18, 107, -79, + -69, -109, 17, -66, 127, -119, 58, -111, -82, 24, 3, -106, 41, -117, 120, -14, -124, -5, -35, + 22, 44, -88, -94, 46, 21, 106, -36, 79, 54, -15, 91, 69, 103, -63, 90, -75, 30, 18, + -99, -95, -44, -26, 58, 90, -75, -64, -77, -3, -28, 87, -19, -80, 81, -58, 51, -48, 32, + -114, -60, 92, 35, -110, 22, -66, 34, -41, -50, -35, -61, 74, 42, 7, -103, 28, 53, 100, + -94}; diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected/bias.h b/Tests/UnitTest/TestCases/TestData/fully_connected/bias.h new file mode 100644 index 00000000..087772de --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/fully_connected/bias.h @@ -0,0 +1,8 @@ +// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.4.1). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int32_t *const fully_connected_bias = NULL; + +const int32_t *const fully_connected_biases = fully_connected_bias; diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected/biases_data.h b/Tests/UnitTest/TestCases/TestData/fully_connected/biases_data.h index d79c11d8..386f182c 100644 --- a/Tests/UnitTest/TestCases/TestData/fully_connected/biases_data.h +++ b/Tests/UnitTest/TestCases/TestData/fully_connected/biases_data.h @@ -1,6 +1,6 @@ -// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0). -// Interpreter from tensorflow version 2.10.0 and revision upstream/v2.10.0-0-g359c3cdfc5f. +// Generated by test_settings.py using tensorflow version 2.18.0-dev20240711 (Keras version 3.4.1.dev2024071203). +// Interpreter from tensorflow version 2.18.0-dev20240711 and revision v1.12.1-112931-gbc6210b35b0. #pragma once #include -const int32_t fully_connected_biases[6] = {24443, 7521, 925, 17359, 32544, -3872}; +const int32_t fully_connected_biases[6] = {17061, 30620, -17956, 6604, -30838, -23496}; diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected/config_data.h b/Tests/UnitTest/TestCases/TestData/fully_connected/config_data.h index e8f78c2e..5e22375e 100644 --- a/Tests/UnitTest/TestCases/TestData/fully_connected/config_data.h +++ b/Tests/UnitTest/TestCases/TestData/fully_connected/config_data.h @@ -1,5 +1,5 @@ -// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0). -// Interpreter from tensorflow version 2.10.0 and revision upstream/v2.10.0-0-g359c3cdfc5f. +// Generated by test_settings.py using tensorflow version 2.18.0-dev20240711 (Keras version 3.4.1.dev2024071203). +// Interpreter from tensorflow version 2.18.0-dev20240711 and revision v1.12.1-112931-gbc6210b35b0. #pragma once #define FULLY_CONNECTED_OUT_CH 6 #define FULLY_CONNECTED_IN_CH 10 @@ -10,8 +10,9 @@ #define FULLY_CONNECTED_OUT_ACTIVATION_MIN -128 #define FULLY_CONNECTED_OUT_ACTIVATION_MAX 127 #define FULLY_CONNECTED_INPUT_BATCHES 3 -#define FULLY_CONNECTED_OUTPUT_MULTIPLIER 1592720445 +#define FULLY_CONNECTED_OUTPUT_MULTIPLIER 1342580370 #define FULLY_CONNECTED_OUTPUT_SHIFT -9 #define FULLY_CONNECTED_ACCUMULATION_DEPTH 20 #define FULLY_CONNECTED_INPUT_OFFSET 128 -#define FULLY_CONNECTED_OUTPUT_OFFSET -109 +#define FULLY_CONNECTED_FILTER_OFFSET 0 +#define FULLY_CONNECTED_OUTPUT_OFFSET -102 diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected/input.h b/Tests/UnitTest/TestCases/TestData/fully_connected/input.h new file mode 100644 index 00000000..18d3bf43 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/fully_connected/input.h @@ -0,0 +1,11 @@ +// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.4.1). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int8_t fully_connected_input[89] = { + -3, -123, -79, 45, 127, 85, -79, -102, 29, 119, -101, -7, 77, -107, 82, -102, 1, 0, + -11, 50, -96, -72, 68, -30, -77, 13, 83, 23, -116, -38, 39, -33, -53, -61, 60, 5, + 39, -100, -96, 5, -13, -93, -109, 15, -67, 74, -50, -102, -120, 18, 81, 26, -3, -97, + -59, -106, 64, 34, 59, -6, 22, -121, -32, 39, 97, -23, -40, -101, -80, -52, -97, -101, + -118, -41, 46, -10, 118, 125, 41, 45, 116, -56, 74, -81, 31, 114, -96, 52, 42}; diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected/input_data.h b/Tests/UnitTest/TestCases/TestData/fully_connected/input_data.h index cde081cf..3702a3ed 100644 --- a/Tests/UnitTest/TestCases/TestData/fully_connected/input_data.h +++ b/Tests/UnitTest/TestCases/TestData/fully_connected/input_data.h @@ -1,9 +1,9 @@ -// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0). -// Interpreter from tensorflow version 2.10.0 and revision upstream/v2.10.0-0-g359c3cdfc5f. +// Generated by test_settings.py using tensorflow version 2.18.0-dev20240711 (Keras version 3.4.1.dev2024071203). +// Interpreter from tensorflow version 2.18.0-dev20240711 and revision v1.12.1-112931-gbc6210b35b0. #pragma once #include -const int8_t fully_connected_input[60] = {27, 98, 67, -21, 63, -34, -45, 61, -124, -75, -23, -67, 18, -23, 9, - -49, 66, -11, 69, 87, -16, -6, 120, -33, -62, 32, -21, -117, -57, 6, - 121, 3, -78, -7, -17, -69, 79, 7, -89, -85, 122, -10, -75, 101, -65, - -26, -51, -45, 91, 78, 122, 15, -124, -8, 67, 10, 35, -62, -73, 59}; +const int8_t fully_connected_input[60] = { + -79, -90, 26, -66, -40, -13, -102, 67, -112, -92, 40, 113, 69, -63, -122, -24, 122, -88, 126, 123, + -69, 51, -83, 83, -127, -73, 93, 114, 52, -89, -109, -41, 84, 10, -80, 24, 65, -122, -65, 121, + -55, 59, -6, 113, -101, -40, 72, -11, 67, 83, -70, -111, -64, 53, -43, 77, 66, -54, 42, 5}; diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected/output.h b/Tests/UnitTest/TestCases/TestData/fully_connected/output.h new file mode 100644 index 00000000..273b8cfa --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/fully_connected/output.h @@ -0,0 +1,9 @@ +// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.4.1). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int8_t fully_connected_output[22] = {93, -13, 78, -30, 53, -51, 55, -128, 127, -7, 101, + -3, 127, 5, 53, 118, -10, 79, 25, 76, -20, 17}; + +const int8_t *const fully_connected_output_ref = fully_connected_output; diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected/output_mult_data.h b/Tests/UnitTest/TestCases/TestData/fully_connected/output_mult_data.h new file mode 100644 index 00000000..741fe9cc --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/fully_connected/output_mult_data.h @@ -0,0 +1,118 @@ +// Generated by test_settings.py using tensorflow version 2.18.0-dev20240711 (Keras version 3.4.1.dev2024071203). +// Interpreter from tensorflow version 2.18.0-dev20240711 and revision v1.12.1-112931-gbc6210b35b0. +#pragma once +#include + +const int32_t fully_connected_output_mult[1001] = { + 1696449162, 1681404615, 1692198053, 1675127591, 1686949941, 1678770123, 1684690429, 1678215530, 1676646375, + 1684491350, 1694946433, 1693930164, 1683141242, 1694809365, 1691557466, 1689950381, 1677848678, 1686181920, + 1654150998, 1685503805, 1683960939, 1683399423, 1695971331, 1694151921, 1689082218, 1685088989, 1672576886, + 1693382896, 1690199231, 1687776161, 1678667272, 1652034575, 1695423862, 1691081241, 1665296839, 1696600679, + 1657628969, 1683680181, 1686907998, 1674320236, 1684520650, 1687987281, 1687603973, 1662358511, 1679942424, + 1656862653, 1690341516, 1694335347, 1695803157, 1671587509, 1686916025, 1678746442, 1690348741, 1691440868, + 1691082445, 1690428814, 1656831246, 1696458393, 1677106646, 1675821359, 1695010853, 1690140831, 1694030507, + 1685726365, 1680749278, 1694652831, 1679000309, 1662881897, 1693784266, 1689966436, 1688111104, 1683872237, + 1694306448, 1690657996, 1693486249, 1686286075, 1695192673, 1694764412, 1680689675, 1689714777, 1682158889, + 1687167885, 1695418644, 1692904263, 1690389279, 1679174303, 1695036540, 1695290407, 1673351931, 1694921548, + 1696081105, 1668164727, 1662517252, 1688387046, 1682016001, 1696152549, 1636618661, 1681923285, 1687872891, + 1665337478, 1689053922, 1665505050, 1666916467, 1675416076, 1682256823, 1684187914, 1690382657, 1683011800, + 1691775410, 1682166314, 1691567902, 1682756830, 1678342464, 1693264492, 1691147868, 1696156764, 1694291598, + 1696409627, 1684415491, 1689715781, 1693376474, 1695584611, 1668135126, 1676809732, 1678726876, 1694338959, + 1689945565, 1670993682, 1685514843, 1685233884, 1663667980, 1681810801, 1686906994, 1691829996, 1690945377, + 1693062804, 1689076800, 1682016904, 1694559311, 1690946180, 1693125618, 1694365651, 1691738283, 1675864506, + 1673715271, 1693708407, 1693137860, 1693092907, 1681891777, 1689022013, 1695623544, 1683363400, 1694640790, + 1666781707, 1677490556, 1687259798, 1692742711, 1683405443, 1693946620, 1680929393, 1688076385, 1678776043, + 1691871940, 1684218217, 1694524392, 1684073524, 1676578042, 1694777456, 1682063764, 1669690836, 1685044839, + 1681722199, 1681525226, 1683282825, 1685180502, 1692484029, 1689192796, 1693845475, 1657087220, 1696412035, + 1693309446, 1694855121, 1690468148, 1692236183, 1691422205, 1684015927, 1692013021, 1682912461, 1640809966, + 1665773466, 1676481311, 1667264956, 1667886678, 1675813532, 1687821716, 1690511296, 1693424438, 1689306584, + 1682545107, 1680089526, 1685632043, 1681324341, 1689301567, 1693398951, 1652389286, 1679704512, 1661679694, + 1672295325, 1670172480, 1695699603, 1687770542, 1689279090, 1684520449, 1639881998, 1680443033, 1680573177, + 1695571366, 1696255099, 1669372850, 1686530710, 1689456696, 1694296214, 1694468201, 1684194938, 1694891846, + 1685328407, 1663359728, 1694963290, 1638832416, 1659326262, 1685526683, 1693823601, 1694594231, 1695519388, + 1682350543, 1664653543, 1689456696, 1696032540, 1690311012, 1692678492, 1692227754, 1695873396, 1682741778, + 1675015308, 1692424224, 1692696153, 1688570672, 1682945173, 1684092388, 1695719270, 1695485272, 1665110904, + 1689317622, 1674299766, 1683017218, 1695046976, 1664572667, 1648405088, 1680674222, 1683536691, 1691018828, + 1679729999, 1686734204, 1681044686, 1691881171, 1665627869, 1690224316, 1691757148, 1692765991, 1695950861, + 1691521142, 1680501231, 1689982089, 1688919262, 1641885838, 1689579315, 1686843778, 1673628876, 1668907562, + 1669574137, 1679156040, 1680365769, 1685579865, 1688007550, 1694207109, 1675409052, 1643484092, 1637103616, + 1631417008, 1695011655, 1695841889, 1696707242, 1695572971, 1675572209, 1689384450, 1695491894, 1694502919, + 1683538297, 1694007227, 1695799745, 1674960019, 1693432265, 1689926700, 1693828618, 1686948335, 1662157525, + 1694626140, 1691673663, 1681499238, 1694963491, 1691688915, 1681463917, 1695793925, 1693216729, 1670769919, + 1687402084, 1695983171, 1695433094, 1692144670, 1683897122, 1663531715, 1666684977, 1690198026, 1657237533, + 1665071972, 1683719716, 1666620256, 1687664178, 1683869427, 1667174648, 1675984917, 1695862961, 1692492457, + 1685318573, 1686071543, 1683564787, 1695308468, 1686921644, 1688521304, 1664261606, 1678225765, 1694386321, + 1683215294, 1683261753, 1680018785, 1669647889, 1662128526, 1692302007, 1689706348, 1691150879, 1694483453, + 1676245005, 1658482582, 1695892662, 1691460937, 1688179738, 1677682009, 1684223034, 1694889037, 1687079583, + 1688378818, 1692538615, 1694417227, 1654007609, 1675474576, 1681876525, 1682318835, 1690546616, 1692420211, + 1683444878, 1687419142, 1689181557, 1686073750, 1692126609, 1693005809, 1689497034, 1694934592, 1681854751, + 1688294530, 1676566101, 1685562606, 1691816149, 1696167601, 1665452671, 1695550294, 1669694950, 1688175724, + 1684207380, 1691612655, 1672026507, 1678801129, 1679232702, 1696678344, 1682781414, 1679873890, 1686467093, + 1689382443, 1694343776, 1695782085, 1691192220, 1691745909, 1691977500, 1683867220, 1652876950, 1694714642, + 1654289571, 1668025452, 1686307749, 1675326972, 1689457700, 1683436148, 1635015589, 1689881546, 1683537494, + 1684722539, 1675639639, 1685030389, 1685727770, 1673292328, 1682885067, 1680616525, 1667320747, 1694097736, + 1684844957, 1688871298, 1696500136, 1685337437, 1689683069, 1693616092, 1693750752, 1672421255, 1694870172, + 1694191456, 1669012521, 1666068974, 1690759944, 1690833194, 1691960642, 1691308216, 1693533009, 1672557118, + 1688601377, 1692405761, 1672507248, 1671522487, 1674376327, 1696635397, 1687774154, 1670663656, 1693490062, + 1687052892, 1687415530, 1696522412, 1659710272, 1671095931, 1691592586, 1667633414, 1651788435, 1692017637, + 1688953579, 1673078297, 1688980471, 1686576065, 1670664960, 1665518997, 1688151040, 1693350787, 1665165792, + 1679894661, 1696501741, 1678529301, 1693087890, 1659865703, 1683549535, 1682719101, 1674657286, 1686167270, + 1680331051, 1633274949, 1672101764, 1691375044, 1671116100, 1683529266, 1693849690, 1658335681, 1693594619, + 1696064649, 1693008218, 1687234311, 1690697130, 1683904146, 1684923023, 1686351699, 1692449310, 1686548772, + 1693323895, 1694518974, 1695773054, 1635035056, 1675906951, 1648719561, 1695236623, 1687187351, 1690554042, + 1695686358, 1672834164, 1678267006, 1696502142, 1695887846, 1655175696, 1683175158, 1696014679, 1691443477, + 1679584703, 1696689582, 1633801345, 1672079187, 1690007576, 1672076578, 1647601746, 1684868838, 1696513180, + 1691304804, 1655093415, 1671475025, 1677318168, 1690812323, 1693214722, 1692396731, 1686050270, 1691884382, + 1694213531, 1693993179, 1676263368, 1696348217, 1694956266, 1666891783, 1688902405, 1683873441, 1693152711, + 1693140870, 1687493195, 1693805940, 1680308072, 1686058097, 1679742743, 1679015059, 1651339001, 1684634839, + 1691796482, 1687287292, 1694046561, 1692776226, 1692453926, 1688953780, 1681384747, 1684520248, 1685002494, + 1680737036, 1686795012, 1687056304, 1694839267, 1694362841, 1685760682, 1685705494, 1681818728, 1696036152, + 1674134100, 1691780227, 1674029644, 1670614689, 1668007992, 1674532962, 1689682066, 1693363229, 1691506091, + 1681653263, 1686728385, 1694723070, 1682159591, 1690069789, 1695284988, 1671796924, 1662799516, 1691177570, + 1680125048, 1672021189, 1683801596, 1673143720, 1688239542, 1696188070, 1692127813, 1680476246, 1660366211, + 1692524166, 1674500049, 1694513355, 1678598939, 1683928228, 1692827200, 1684570821, 1689085028, 1695446339, + 1676358894, 1680816307, 1672625451, 1692317460, 1668202657, 1693037317, 1686249550, 1693936385, 1694016459, + 1676411473, 1694365048, 1678546962, 1691245000, 1683736975, 1696129872, 1688000727, 1696060234, 1689567675, + 1695823627, 1690013196, 1679714145, 1666942957, 1676344645, 1685675591, 1676139445, 1681775982, 1681920174, + 1688194388, 1694191456, 1691894416, 1690857879, 1668210684, 1695804361, 1653634134, 1681624866, 1690908250, + 1688910633, 1664539253, 1695085909, 1618043263, 1689598380, 1668636236, 1696688579, 1677698165, 1684516636, + 1619280586, 1681856356, 1687948148, 1680004436, 1696449162, 1690136818, 1657064944, 1677067914, 1670051667, + 1692661835, 1687350508, 1696330155, 1695845702, 1685842963, 1696007053, 1688301152, 1686237309, 1691738885, + 1695557719, 1693170572, 1675936552, 1685399048, 1672806469, 1695773054, 1693108961, 1696542681, 1695076075, + 1694327320, 1694333742, 1690816939, 1678403271, 1672518286, 1673311292, 1693908892, 1677657024, 1667997858, + 1675391492, 1689039874, 1683306004, 1690753522, 1694815586, 1687133166, 1685330213, 1684846963, 1693498892, + 1691441470, 1691885987, 1673818925, 1688151843, 1672525611, 1663746749, 1696703229, 1694739326, 1690475774, + 1693357008, 1691001569, 1694780868, 1695489085, 1693766004, 1653378161, 1693623518, 1696394375, 1686955560, + 1676968374, 1692502692, 1690260439, 1685744426, 1689812711, 1687314786, 1696441134, 1686983255, 1691189410, + 1695286594, 1692198855, 1693946620, 1687375995, 1687450850, 1679114699, 1678681119, 1696363469, 1696006852, + 1653023149, 1662090496, 1695890053, 1695675722, 1684819871, 1685122303, 1657126353, 1688722992, 1690312818, + 1689556437, 1694793109, 1681830970, 1644259037, 1675393699, 1692725252, 1684229456, 1692598820, 1695230402, + 1687175912, 1693797511, 1690741281, 1692748732, 1686092815, 1684944697, 1689302972, 1694322905, 1675450393, + 1691505288, 1696281389, 1687240533, 1690120562, 1696516190, 1693729279, 1677993974, 1690973473, 1667000052, + 1661521755, 1695509956, 1695482462, 1685289273, 1693760987, 1688394070, 1688064144, 1679240629, 1688664392, + 1685922434, 1695818810, 1681172322, 1683516021, 1679820006, 1696416249, 1695849916, 1671487267, 1695555512, + 1686258381, 1682456806, 1694506531, 1685461260, 1696407018, 1690653983, 1664454966, 1692577146, 1691878361, + 1684064493, 1695357435, 1694634769, 1683859995, 1691568705, 1676351569, 1682546813, 1694568744, 1696486489, + 1680227698, 1696417855, 1689225708, 1695256290, 1692155307, 1694831440, 1690691511, 1694562522, 1680135182, + 1685206390, 1691518533, 1695839280, 1694857730, 1686166868, 1691315842, 1691335709, 1679610391, 1695507548, + 1674799471, 1694349796, 1691153889, 1688733227, 1687526308, 1695657861, 1682048312, 1692243006, 1694246042, + 1684889910, 1678347782, 1691343335, 1656113597, 1696697007, 1696658075, 1677596116, 1663066126, 1675298374, + 1694214535, 1690263450, 1695272546, 1691757750, 1679203302, 1683466753, 1682788438, 1679152027, 1693606660, + 1690027846, 1671691062, 1668330192, 1695321714, 1687921858, 1684974800, 1695997821, 1666296451, 1688351123, + 1696231619, 1689178748, 1676049839, 1692162531, 1681567972, 1695433495, 1691499870, 1696454179, 1688928092, + 1696663894, 1688425979, 1663908802, 1692672873, 1654226055, 1683238975, 1690689504, 1691402136, 1688448857, + 1666386659, 1672388443, 1687140190, 1688668607, 1693764398, 1693008820, 1696501139, 1644669037, 1662675392, + 1658216474, 1694213732, 1693154918, 1692730269, 1695009849, 1670183317, 1686746045, 1692950220, 1681550613, + 1675072202, 1690422192, 1684262368, 1693173783, 1689569682, 1689609016, 1696253694, 1694607275, 1689950582, + 1685775934, 1683617768, 1689766554, 1679274645, 1694324711, 1686905589, 1686978237, 1694835655, 1690462931, + 1693181609, 1673378522, 1694248651, 1696564355, 1691902042, 1685717936, 1694717853, 1695709236, 1684715515, + 1686151415, 1678507326, 1677318067, 1690588158, 1692558884, 1696512177, 1683175158, 1693019456, 1691215298, + 1692315453, 1671834552, 1694963090, 1607070413, 1675144047, 1694097535, 1681001840, 1689937939, 1683159103, + 1691057359, 1682441955, 1694530413, 1673000833, 1689009570, 1684106035, 1689635908, 1689278087, 1689843015, + 1688468122, 1654044234, 1696508765, 1688746272, 1685636057, 1692117177, 1693746738, 1695787905, 1630477300, + 1692350774, 1685342254, 1673670217, 1667167925, 1682504167, 1691035685, 1694053385, 1637623591, 1651827167, + 1683818854, 1695375096, 1593976825, 1670487455, 1663338054, 1660024946, 1687771946, 1683336609, 1687058913, + 1695007842, 1646673377, 1695590631, 1695505541, 1691450300, 1674924197, 1684406059, 1696686973, 1696348618, + 1686017759, 1686697078, 1678591313, 1692230363, 1695621135, 1692507308, 1696293631, 1685849987, 1663051576, + 1689017397, 1659094169, 1677990161, 1689980083, 1665520302, 1681799061, 1691365411, 1671899373, 1665566760, + 1675292755, 1672280574}; diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected/output_multiplier.h b/Tests/UnitTest/TestCases/TestData/fully_connected/output_multiplier.h new file mode 100644 index 00000000..5e05de0b --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/fully_connected/output_multiplier.h @@ -0,0 +1,11 @@ +// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.4.1). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int32_t fully_connected_output_multiplier[22] = { + 1697233994, 1695742538, 1687983567, 1700511049, 1697208626, 1692791840, 1693768695, 1676536524, + 1661825614, 1681139038, 1667052755, 1698267624, 1699923365, 1700211268, 1647103429, 1685143701, + 1656503344, 1697505186, 1679616074, 1681887383, 1669183032, 1696823079}; + +const int32_t *const fully_connected_output_mult = fully_connected_output_multiplier; diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected/output_ref_data.h b/Tests/UnitTest/TestCases/TestData/fully_connected/output_ref_data.h index 0ba3de02..a6f1ec2a 100644 --- a/Tests/UnitTest/TestCases/TestData/fully_connected/output_ref_data.h +++ b/Tests/UnitTest/TestCases/TestData/fully_connected/output_ref_data.h @@ -1,7 +1,7 @@ -// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0). -// Interpreter from tensorflow version 2.10.0 and revision upstream/v2.10.0-0-g359c3cdfc5f. +// Generated by test_settings.py using tensorflow version 2.18.0-dev20240711 (Keras version 3.4.1.dev2024071203). +// Interpreter from tensorflow version 2.18.0-dev20240711 and revision v1.12.1-112931-gbc6210b35b0. #pragma once #include const int8_t fully_connected_output_ref[18] = - {-23, -128, -19, 63, 22, -71, 0, -125, -36, -10, -10, -56, 28, -128, 8, -31, 43, 6}; + {-1, 8, -128, -88, -17, -128, 31, -23, -128, -121, -29, -52, 10, -16, -128, -128, -35, -19}; diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected/output_shift.h b/Tests/UnitTest/TestCases/TestData/fully_connected/output_shift.h new file mode 100644 index 00000000..4ffc4fe7 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/fully_connected/output_shift.h @@ -0,0 +1,7 @@ +// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.4.1). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int32_t fully_connected_output_shift[22] = {-10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, + -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10}; diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected/output_shift_data.h b/Tests/UnitTest/TestCases/TestData/fully_connected/output_shift_data.h new file mode 100644 index 00000000..02c440f9 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/fully_connected/output_shift_data.h @@ -0,0 +1,50 @@ +// Generated by test_settings.py using tensorflow version 2.18.0-dev20240711 (Keras version 3.4.1.dev2024071203). +// Interpreter from tensorflow version 2.18.0-dev20240711 and revision v1.12.1-112931-gbc6210b35b0. +#pragma once +#include + +const int32_t fully_connected_output_shift[1001] = { + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12}; diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected/test_data.h b/Tests/UnitTest/TestCases/TestData/fully_connected/test_data.h index dfcc1d8c..13234de0 100644 --- a/Tests/UnitTest/TestCases/TestData/fully_connected/test_data.h +++ b/Tests/UnitTest/TestCases/TestData/fully_connected/test_data.h @@ -1,5 +1,5 @@ -// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0). -// Interpreter from tensorflow version 2.10.0 and revision upstream/v2.10.0-0-g359c3cdfc5f. +// Generated by test_settings.py using tensorflow version 2.18.0-dev20240711 (Keras version 3.4.1.dev2024071203). +// Interpreter from tensorflow version 2.18.0-dev20240711 and revision v1.12.1-112931-gbc6210b35b0. #include "biases_data.h" #include "config_data.h" #include "input_data.h" diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected/weights.h b/Tests/UnitTest/TestCases/TestData/fully_connected/weights.h new file mode 100644 index 00000000..16a028b3 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/fully_connected/weights.h @@ -0,0 +1,110 @@ +// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.4.1). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int8_t fully_connected_weights[1958] = { + -65, 96, 2, 20, -58, 96, -75, -112, -4, -15, 62, -103, -52, 0, 26, 78, 49, -79, 97, + -93, 13, -51, -9, -88, -41, 99, 105, -114, -14, -23, -70, -77, 65, 111, 39, -13, 88, -76, + -119, -59, -32, -111, 19, 28, 1, 127, 41, -65, -119, 84, 22, 78, 91, 87, -8, 25, -82, + -127, -126, -20, -52, 79, -7, 82, 106, -121, -53, 124, -17, 109, 9, -17, 100, 5, 69, 56, + 88, 97, -103, -64, 67, 124, 95, 48, 6, 62, -58, 109, 118, -60, -63, -43, -62, -105, 119, + -106, 80, -53, 121, -116, -57, -95, -56, 48, 20, -5, -66, -57, -10, -90, -16, 23, 63, 3, + -85, 79, 97, 126, 38, 98, -94, -49, 0, 41, -12, 60, 60, 44, 66, -59, -3, 106, 94, + -53, 76, -89, 74, 95, 67, -7, 34, -90, 98, -75, 48, -86, 72, 0, -6, -71, -110, 54, + 94, 23, -116, -100, -63, -23, -80, -56, 0, 58, 88, 25, -117, -16, -127, 44, -77, -53, 116, + 73, -71, -31, 31, 78, -26, 99, -113, -47, 13, 76, -50, -28, -53, 22, 1, 63, 23, -58, + 7, -50, 106, -105, -106, 37, -83, -85, -80, -67, 122, -16, 56, 24, 15, -75, 6, 114, 80, + -36, -57, 83, 127, 49, 115, 63, 76, 10, -2, 20, 70, 77, -43, 65, 41, 43, -1, -97, + -38, 78, 45, 26, 23, -82, -75, 85, 35, -56, 28, -3, 72, -7, -2, -6, -120, -12, 93, + -90, -19, -122, 122, -86, 52, -33, -120, 59, 52, -12, 75, -75, 90, -84, -90, -10, 43, 116, + 72, -55, -7, -92, -99, 104, 91, 68, -16, 115, -46, -91, 55, 29, 51, -99, 109, 31, -54, + -127, -104, -20, -14, 76, -67, -106, -27, -15, 5, 2, 5, 77, 73, -126, -72, -39, 45, -123, + 41, 123, -6, 6, -124, -90, 14, -17, -37, -86, -112, 46, 90, -9, -100, 67, -122, 3, -111, + 45, 38, -114, 42, 116, 94, 50, -32, 69, -98, -84, 58, -102, 105, -69, 76, -72, -15, 6, + -104, 62, 9, 40, -6, 38, -53, -85, -110, 120, -95, -127, 87, 68, -71, 76, 74, -108, -118, + 30, 50, 44, 68, -14, 104, 42, 110, -73, -120, -19, 27, 25, 1, -121, -52, 78, -16, 48, + -38, 56, 21, 58, -47, 127, 23, -123, 20, 8, -41, -47, 5, 16, 0, 122, 60, 30, 68, + 87, -51, -98, 42, 59, 37, -72, 21, 104, 68, 88, 87, -3, 36, 26, -74, -97, 20, -26, + 78, 26, 81, -26, -43, -10, -26, 42, -100, 4, 95, 55, 83, -78, -60, 96, 118, 25, 4, + -120, 103, 33, -100, -59, -5, 119, 38, -49, -64, -21, 109, 127, -80, -59, -56, -57, -46, -114, + -57, 70, -75, -97, -21, 67, 71, 7, -85, 2, 111, 125, -57, -70, -39, -98, 32, 52, -111, + 61, -100, -10, -90, -50, -115, -125, 6, -88, 46, -116, -82, -23, -27, -97, 28, 63, -56, 118, + 35, 53, -60, -108, 127, 66, -5, -109, 112, -50, -93, -73, -81, 12, 102, -55, 109, 38, 29, + 115, -15, 106, 48, 123, -42, 21, 81, -65, 46, -90, 98, 121, 51, -4, -68, -39, 23, -52, + 40, -29, -101, -70, -116, 5, 85, 5, 96, -60, 116, 73, 75, -55, 45, 65, -107, -86, 122, + -47, 22, -82, -52, -64, 25, -47, 71, 77, 68, 100, 46, -63, 59, 13, -29, 47, 108, 3, + 105, -46, -50, 87, 34, -61, 100, 122, -122, -81, -123, 63, -1, 9, -75, 102, 64, 35, 17, + -10, 53, 50, -48, 43, -82, -112, -111, 64, -1, 56, 77, 114, -127, -61, 115, 99, -20, -89, + -12, 10, -39, -15, 103, 21, 30, 124, -5, -20, -121, -71, -115, -112, 17, 120, 48, -123, -13, + 2, -9, -124, 85, 114, -100, 15, 55, -24, -9, -74, 119, -113, 29, 24, -97, 72, -72, -119, + 59, 68, -11, 52, -71, -94, -100, 31, 116, 54, -34, -21, -3, -122, 101, -19, 117, 123, 75, + -97, 95, 118, -35, 120, -60, 29, -114, -89, 69, 42, -110, -111, -72, -7, -23, 12, -69, 73, + 72, 37, -126, 4, -112, 97, -123, 38, 107, -11, -24, 116, -63, 127, -114, -47, -68, 29, -65, + -110, -93, -88, 7, 7, -121, -5, 90, -121, 98, -60, -123, 94, 78, 16, 10, -20, -34, 91, + 1, 10, 118, -41, -51, -88, -1, 100, -22, 89, -36, -63, 4, 24, -78, 30, 126, 88, 94, + 43, 30, 1, 42, 123, -75, 121, -16, 69, 123, -8, 115, 2, 9, 112, -127, 111, 108, -118, + 70, -57, 54, -3, -29, 88, 43, -6, 64, -13, -87, 41, -65, -118, -32, 71, -21, -6, 115, + -58, -47, -29, -100, 54, 99, -91, -32, -11, 43, -126, -110, 110, 36, 99, -105, -21, 83, 54, + -123, 43, 9, -43, -39, 20, 37, 10, 111, -23, 36, -7, -73, -8, -44, 107, 28, -98, -77, + 117, 97, 106, -23, 23, -49, 38, -62, -20, 117, 104, -14, -70, 110, 93, -118, -121, -70, 1, + 126, -13, -6, -84, 117, 3, -72, 27, -121, 38, 28, 96, -35, -36, 109, 110, -47, -127, -23, + -58, 115, 45, -27, 46, -7, -45, 55, 19, -56, -82, -53, -94, 70, -84, 38, 41, -69, 46, + 3, 13, -73, 126, -26, -58, -53, -118, -51, -82, -95, -107, 13, 94, -1, -36, 32, -80, -42, + 19, -48, 39, 89, 106, 86, 31, -108, -108, 37, -40, -64, -104, -5, -97, -31, 120, 123, -15, + 85, 89, 65, -79, 59, 86, 103, 107, 1, 7, -41, 44, 83, -80, 21, -105, -94, 112, 0, + 9, -60, -57, -97, 98, 35, 31, -18, 125, -20, 29, 106, 92, -65, -100, -78, 19, 99, -95, + 87, -52, 114, -16, 24, -114, -18, 63, -35, 65, 117, 4, -21, 64, -44, 113, -110, 43, 127, + -67, -82, -83, 122, 22, 75, -51, 49, -13, 49, -57, 37, 57, -105, -30, 125, -9, -99, 74, + 15, 89, -47, -114, 102, 93, 34, 40, -38, 47, -107, 56, -124, 7, -91, -7, 61, 95, -98, + -85, -26, 62, 26, 65, -4, -69, 96, -36, -33, 100, 4, 4, -61, -18, -95, 66, -106, 124, + 20, 74, 115, 10, 35, 102, 14, -54, -59, -14, 84, -17, 58, -66, 17, -10, -98, -13, 13, + -37, 14, -73, -19, -35, -31, 70, -42, 13, -100, 66, 12, 106, 40, 11, 127, -46, 35, -62, + -98, 79, -1, 91, -32, -60, 95, 80, 59, 8, -118, -11, -91, 116, 73, 8, 33, -21, -78, + -94, -45, -101, 90, 10, -21, -70, 54, -97, -122, 97, 76, 110, -36, 79, 49, 106, 62, -26, + -98, -45, 19, 60, 25, -34, 69, 4, -101, 79, 1, 105, -28, 40, -65, 18, 113, -24, 61, + 106, 82, -25, 67, -89, -56, 22, 46, 15, 121, 89, 113, -123, 62, -17, 68, -78, -61, -110, + -62, 71, -56, 120, -8, 127, -113, -114, 15, -50, 70, 65, -103, 99, 28, -20, -27, -22, -56, + 74, -11, 31, 82, 99, 16, 32, 28, 23, 48, -63, -40, 66, 66, 69, 116, -43, 74, -40, + 101, -88, -85, -120, -62, -60, 35, -7, -54, -123, -9, -72, -87, 15, 1, -15, -127, 72, 65, + 61, -68, 35, 55, 122, -16, -116, 112, -126, -73, -47, -46, 7, 13, -98, 97, -87, 18, 21, + 126, 49, -80, -5, -57, -32, 121, 122, -32, 87, 94, 2, -22, -63, 47, 92, -117, 22, 14, + 98, 113, -57, -108, -58, -83, -39, -14, -108, 20, 5, 117, 106, 12, -127, 127, -94, -51, -69, + -20, 114, -4, 60, 88, 39, 55, 48, 48, -21, 43, 42, -58, -110, 54, -89, 127, 82, 87, + 111, 6, -93, 36, 89, -54, 90, -118, -63, -36, -85, -94, -102, 56, -85, -68, -82, -39, -105, + 51, 93, -71, 13, -122, 83, -102, 45, -38, -103, -95, 68, 90, -81, -124, 20, -2, -35, 53, + 34, 37, 22, 49, 63, -41, -98, -19, -7, -3, -66, 78, 21, 46, 16, 78, 91, 71, 7, + -98, 42, 119, -105, 85, -51, 106, -110, -59, 83, 48, 56, -93, 52, -127, -121, 98, -71, 123, + 126, 74, -14, 110, -37, 101, 14, -46, 40, 81, -20, 92, 31, -118, -40, -89, 49, 46, 70, + -26, 96, -106, -65, -21, 65, -18, -44, 101, -106, -22, 28, 91, 0, 25, 7, -24, -16, 119, + -95, 8, -55, -62, -14, 90, 106, 113, 62, 32, 5, 106, 14, -51, -88, 74, -37, -50, 40, + -10, 23, -24, -48, 125, 96, 16, 18, 19, 70, -98, 72, 26, -94, -74, 8, 30, 57, 114, + 108, 43, -127, -14, -104, 118, 55, -11, 45, 62, 106, 10, 28, 86, 112, 83, 33, -10, 52, + 24, -44, 13, 103, -125, -51, -73, -122, -39, -1, 82, -125, -60, -4, -125, 3, -44, -66, -64, + 89, 103, -25, 112, 31, 84, 115, -93, 26, -104, -3, -33, 69, -58, 29, 7, -116, 10, -47, + 97, 84, -84, 103, -72, 63, -55, 49, 22, -17, -78, -4, -9, 85, -17, -111, 32, -81, -18, + 41, -117, 107, -8, 7, -83, 101, -23, 43, 78, -21, -33, 84, 108, 91, 11, -94, 113, -62, + -20, 114, -24, 117, 62, -19, 99, 100, 122, 123, -51, -73, -66, -6, -48, -45, 36, -21, 63, + 71, 107, -118, 104, -124, 110, 107, 94, 9, 77, 44, 69, -50, -29, -7, -53, 61, -86, 14, + 33, 57, -112, -95, 123, 98, -113, 15, -120, -80, -11, -109, 35, -98, 13, -28, 56, -100, 22, + -91, -50, 18, 35, -114, 119, -32, -42, -22, -13, -88, 74, 25, 127, -91, -58, 83, -1, 61, + 28, 92, 82, 103, 40, -107, 17, -95, 101, 48, -127, 119, 106, 124, 72, -127, -62, -122, -126, + -78, -30, 105, 41, 40, 66, -45, 114, -78, 58, 20, 87, -67, 60, -71, 100, -87, -120, -71, + -40, 46, 11, 85, -79, -119, 89, -100, -87, 71, 92, -5, 122, -26, 110, -3, -99, 120, 67, + 1, 29, -117, 60, -16, -12, -30, 65, -15, 100, 40, -6, 49, 49, -96, -28, -102, 25, 84, + -82, -59, 116, -58, -31, 34, -113, 82, 58, 114, -17, 69, 4, 114, 1, 107, 77, 75, -28, + -96, 101, 103, 122, -119, -84, -126, 107, 30, 61, -81, 87, 17, -24, -42, 110, 104, -92, 10, + -46, 2, 10, 70, 51, 82, 69, -110, -22, 94, -59, 14, 33, -23, 108, 13, -110, -105, 92, + 90, -49, -54, 37, 38, -96, -9, 57, 49, 89, 10, 2, -20, -3, -1, -41, -121, 92, -25, + -78, 32, -45, 109, -42, 112, 105, 113, 109, -4, 112, 85, 58, 73, -87, 114, -90, -92, -68, + 51, 127, 2, 28, 112, 56, 103, -49, 66, 103, 95, -42, -83, 58, -23, -2, -57, -17, 15, + -10, -62, -55, 86, -33, -3, -73, -101, 50, 92, 30, 51, 70, -125, 70, 100, 114, -43, 90, + 6, -104, 1, -24, -22, -94, 45, 47, -62, -100, 109, -70, 96, -38, 102, -16, 61, -104, -5, + -42, -45, -96, 40, 72, -22, -101, -14, -24, 1, -65, -61, -55, 6, 126, 46, 64, -127, 107, + 36, 94, 123, 10, -112, -99, -122, -50, -94, 57, -14, 104, 114, -8, -14, -120, 14, 127, -29, + 24, -101, -63, -126, -28, -44, 34, -98, -51, 97, -38, -50, 127, -71, -18, -11, 110, -57, -49, + 80, -24, 35, -111, -1, -26, 127, -24, -77, -40, -35, -38, 96, 74, -44, -54, 66, 70, 126, + -30, 113, -76, 99, -86, 94, -40, -20, -21, -99, -79, 121, 85, -35, 57, -20, -10, -52, -26, + 79, -19, 25, -24, -10, -10, 78, -83, -40, -32, 103, 38, 86, -53, -36, -119, -25, -70, -33, + -58, -111, -118, -5, -57, -66, 42, 97, -65, -25, 114, -108, 27, 81, 122, -53, -123, 117, 24, + -2}; diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected/weights_data.h b/Tests/UnitTest/TestCases/TestData/fully_connected/weights_data.h index a382e5f6..ae0e82b0 100644 --- a/Tests/UnitTest/TestCases/TestData/fully_connected/weights_data.h +++ b/Tests/UnitTest/TestCases/TestData/fully_connected/weights_data.h @@ -1,12 +1,12 @@ -// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0). -// Interpreter from tensorflow version 2.10.0 and revision upstream/v2.10.0-0-g359c3cdfc5f. +// Generated by test_settings.py using tensorflow version 2.18.0-dev20240711 (Keras version 3.4.1.dev2024071203). +// Interpreter from tensorflow version 2.18.0-dev20240711 and revision v1.12.1-112931-gbc6210b35b0. #pragma once #include const int8_t fully_connected_weights[120] = { - 86, 99, -57, -110, -101, 110, 27, -26, 77, -21, 61, 125, 68, 112, -61, 82, -53, -2, -6, 59, - -93, -56, 2, -41, 39, 38, -70, 54, 47, 11, 30, 24, 101, -69, -107, 67, -45, 58, -62, -6, - 64, 19, -88, 114, 113, 88, 125, 103, -45, 86, 74, -68, 66, -50, 54, 116, -102, 104, -29, -108, - -74, 103, -15, -17, 87, 101, -29, -13, -103, 0, -59, 27, 106, -27, 111, 76, 90, 83, -32, 110, - 8, -121, 7, 123, -27, -100, 27, 90, 121, -68, 124, -61, 102, -10, 47, 4, 4, 95, 127, 4, - 64, 14, -14, 60, -76, 97, 81, -69, 22, 44, -30, 116, -83, -9, -1, 55, 28, -113, 84, 88}; + -55, 18, 26, -100, -109, -70, 56, 66, 118, 56, 28, 39, 70, 67, 100, -11, 3, 112, 2, 127, + -92, 16, 35, -87, 51, 2, 45, -74, 99, 3, 89, -45, 78, 76, -47, -83, 60, 3, 92, 65, + 47, 14, -65, 7, 11, 70, -120, 33, -18, 125, -27, 6, 40, -65, 104, -85, -117, 14, -55, 69, + 114, -93, -37, 1, -72, 1, -63, -42, 27, -18, 93, 101, 86, -77, 46, 80, -7, 96, -81, -70, + 91, 29, 109, 46, 7, -12, 68, 39, 1, -103, 56, 94, 58, 126, 70, 12, -80, 119, 85, 42, + 22, 99, -102, 5, 3, 74, 95, 68, 91, 24, -119, -113, -106, 99, -62, 4, 101, 67, 51, -39}; diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/bias.h b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/bias.h new file mode 100644 index 00000000..7834e039 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/bias.h @@ -0,0 +1,8 @@ +// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.4.1). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int32_t *const fully_connected_mve_0_bias = NULL; + +const int32_t *const fully_connected_mve_0_biases = fully_connected_mve_0_bias; diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/biases_data.h b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/biases_data.h index 7545714a..c8ad3189 100644 --- a/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/biases_data.h +++ b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/biases_data.h @@ -1,6 +1,6 @@ -// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0). -// Interpreter from tensorflow version 2.10.0 and revision upstream/v2.10.0-0-g359c3cdfc5f. +// Generated by test_settings.py using tensorflow version 2.18.0-dev20240711 (Keras version 3.4.1.dev2024071203). +// Interpreter from tensorflow version 2.18.0-dev20240711 and revision v1.12.1-112931-gbc6210b35b0. #pragma once #include -const int32_t fully_connected_mve_0_biases[9] = {11295, -30752, -3196, 10489, -5120, 18598, 27393, 29746, 22967}; +const int32_t fully_connected_mve_0_biases[9] = {10631, -6022, -23693, -2849, -24177, -8216, 6896, 4395, -31983}; diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/config_data.h b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/config_data.h index b23d1f4d..f2082ea5 100644 --- a/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/config_data.h +++ b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/config_data.h @@ -1,5 +1,5 @@ -// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0). -// Interpreter from tensorflow version 2.10.0 and revision upstream/v2.10.0-0-g359c3cdfc5f. +// Generated by test_settings.py using tensorflow version 2.18.0-dev20240711 (Keras version 3.4.1.dev2024071203). +// Interpreter from tensorflow version 2.18.0-dev20240711 and revision v1.12.1-112931-gbc6210b35b0. #pragma once #define FULLY_CONNECTED_MVE_0_OUT_CH 9 #define FULLY_CONNECTED_MVE_0_IN_CH 16 @@ -10,8 +10,9 @@ #define FULLY_CONNECTED_MVE_0_OUT_ACTIVATION_MIN -128 #define FULLY_CONNECTED_MVE_0_OUT_ACTIVATION_MAX 127 #define FULLY_CONNECTED_MVE_0_INPUT_BATCHES 1 -#define FULLY_CONNECTED_MVE_0_OUTPUT_MULTIPLIER 1244038257 -#define FULLY_CONNECTED_MVE_0_OUTPUT_SHIFT -9 +#define FULLY_CONNECTED_MVE_0_OUTPUT_MULTIPLIER 2054731230 +#define FULLY_CONNECTED_MVE_0_OUTPUT_SHIFT -10 #define FULLY_CONNECTED_MVE_0_ACCUMULATION_DEPTH 16 #define FULLY_CONNECTED_MVE_0_INPUT_OFFSET 128 -#define FULLY_CONNECTED_MVE_0_OUTPUT_OFFSET -26 +#define FULLY_CONNECTED_MVE_0_FILTER_OFFSET 0 +#define FULLY_CONNECTED_MVE_0_OUTPUT_OFFSET -42 diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/input.h b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/input.h new file mode 100644 index 00000000..c59c2396 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/input.h @@ -0,0 +1,11 @@ +// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.4.1). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int8_t fully_connected_mve_0_input[89] = { + 118, 16, 48, 23, 47, -35, 80, 82, 85, -38, 80, -107, 87, -111, 32, -17, 54, 8, + 14, -81, -100, -56, 57, -24, 98, 80, -74, 82, -33, -97, 111, 108, 25, 123, 12, 33, + -57, -20, 68, -43, 50, -64, -107, -90, 113, 102, -41, 0, -18, 113, -110, -54, 49, -44, + -115, 13, 114, 60, 86, -87, -37, -78, -128, 42, 96, 30, 92, -107, -65, -12, 9, -42, + 13, -10, 44, -76, 7, 31, -120, 116, -81, 77, -104, -35, 112, -31, -54, -110, 49}; diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/input_data.h b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/input_data.h index f28445b5..6173a978 100644 --- a/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/input_data.h +++ b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/input_data.h @@ -1,7 +1,7 @@ -// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0). -// Interpreter from tensorflow version 2.10.0 and revision upstream/v2.10.0-0-g359c3cdfc5f. +// Generated by test_settings.py using tensorflow version 2.18.0-dev20240711 (Keras version 3.4.1.dev2024071203). +// Interpreter from tensorflow version 2.18.0-dev20240711 and revision v1.12.1-112931-gbc6210b35b0. #pragma once #include const int8_t fully_connected_mve_0_input[16] = - {-43, 68, 79, -12, -119, -56, -102, -46, 107, -65, -109, -7, 92, -99, -80, -29}; + {94, 82, 85, -84, -117, 91, -106, -86, 124, 1, 69, -46, -127, 68, -104, -50}; diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/output.h b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/output.h new file mode 100644 index 00000000..fad9f206 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/output.h @@ -0,0 +1,9 @@ +// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.4.1). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int8_t fully_connected_mve_0_output[22] = {-81, -8, 38, 23, 70, -45, 17, -43, -107, 82, 10, + 24, -10, -72, 62, 50, 31, 86, 27, -48, 14, 53}; + +const int8_t *const fully_connected_mve_0_output_ref = fully_connected_mve_0_output; diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/output_multiplier.h b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/output_multiplier.h new file mode 100644 index 00000000..9f0b6be9 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/output_multiplier.h @@ -0,0 +1,11 @@ +// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.4.1). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int32_t fully_connected_mve_0_output_multiplier[22] = { + 1683401917, 1640698641, 1691959680, 1688380116, 1603499042, 1674474090, 1696520126, 1690076629, + 1641837447, 1666364601, 1682519927, 1658509317, 1683852199, 1680310786, 1692288582, 1684976247, + 1685622805, 1676760537, 1692152845, 1694786466, 1690655118, 1661919311}; + +const int32_t *const fully_connected_mve_0_output_mult = fully_connected_mve_0_output_multiplier; diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/output_ref_data.h b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/output_ref_data.h index 9b2f6f6c..91cba939 100644 --- a/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/output_ref_data.h +++ b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/output_ref_data.h @@ -1,6 +1,6 @@ -// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0). -// Interpreter from tensorflow version 2.10.0 and revision upstream/v2.10.0-0-g359c3cdfc5f. +// Generated by test_settings.py using tensorflow version 2.18.0-dev20240711 (Keras version 3.4.1.dev2024071203). +// Interpreter from tensorflow version 2.18.0-dev20240711 and revision v1.12.1-112931-gbc6210b35b0. #pragma once #include -const int8_t fully_connected_mve_0_output_ref[9] = {-9, -3, 26, 8, 3, -88, 75, 34, 5}; +const int8_t fully_connected_mve_0_output_ref[9] = {-34, -28, -45, -39, -65, 24, -55, -15, -69}; diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/output_shift.h b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/output_shift.h new file mode 100644 index 00000000..bebbceb1 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/output_shift.h @@ -0,0 +1,7 @@ +// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.4.1). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int32_t fully_connected_mve_0_output_shift[22] = {-10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, + -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10}; diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/test_data.h b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/test_data.h index dfcc1d8c..13234de0 100644 --- a/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/test_data.h +++ b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/test_data.h @@ -1,5 +1,5 @@ -// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0). -// Interpreter from tensorflow version 2.10.0 and revision upstream/v2.10.0-0-g359c3cdfc5f. +// Generated by test_settings.py using tensorflow version 2.18.0-dev20240711 (Keras version 3.4.1.dev2024071203). +// Interpreter from tensorflow version 2.18.0-dev20240711 and revision v1.12.1-112931-gbc6210b35b0. #include "biases_data.h" #include "config_data.h" #include "input_data.h" diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/weights.h b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/weights.h new file mode 100644 index 00000000..bb4aa7e2 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/weights.h @@ -0,0 +1,110 @@ +// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.4.1). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int8_t fully_connected_mve_0_weights[1958] = { + -96, -58, -93, -75, 52, 120, 103, 64, 73, 21, 115, -107, 118, -111, 92, -49, -91, 40, 30, + -113, 119, 15, 89, -77, 0, 49, -101, 75, 70, 65, -89, -47, 98, -45, -95, -115, 73, -96, + -73, -97, 115, -57, 116, 70, 20, 60, 89, -106, -60, 2, 27, -60, -66, 17, -107, 72, 121, + 77, 119, 19, -92, 112, -45, -87, -21, 12, 62, -40, 52, -126, -124, 8, -57, -102, -67, 36, + 52, 15, -2, -114, 94, -123, 125, -70, -68, -95, -66, -126, -31, -18, -98, 121, -117, 84, -28, + -6, -11, -116, 80, -13, -33, 38, -115, 92, -79, 71, 35, 58, -109, -56, 18, -72, -10, -3, + -44, -125, -82, -16, -25, -76, 56, 115, 2, -66, 77, 32, -40, -43, 10, 101, -73, -14, 14, + 108, 23, 37, -53, 12, -89, -24, -10, 79, -9, 65, 42, -46, 72, 123, 64, -105, 96, -51, + -7, -82, -121, 30, -38, -23, 122, -39, -94, -12, 121, 17, -93, 76, 100, -118, 37, -115, 89, + -101, -115, 120, -51, -69, -106, -37, -41, -126, -21, 62, -101, 30, 76, -54, -26, 126, -126, -121, + 30, -21, -116, -55, -125, 84, 84, 43, -62, -57, 75, 82, -101, -17, 64, 127, -16, -25, 76, + 100, -17, -43, 65, -48, -38, 7, 126, -80, -26, 94, 29, -75, -2, 64, -11, 27, 75, 13, + -99, 108, 83, -20, -101, -90, 75, 102, 55, -51, 49, -60, -18, 13, 99, 2, -37, 40, 49, + 101, -2, -63, 51, -114, 82, 92, 31, -89, 51, -39, 67, 17, 51, 83, 23, -55, 15, 10, + -8, 20, -63, 120, 110, 121, -118, 115, 50, -104, 58, -71, -17, 45, 18, 7, -12, -70, -84, + 102, -54, -49, -77, -31, 14, 107, 69, -124, -98, 18, 116, -97, -50, 126, -20, -124, -77, 95, + -29, 81, 66, -44, 40, -22, -58, 47, -79, -96, 29, -41, 124, -40, -30, 23, -76, 72, -80, + 40, 54, 18, 2, 103, -35, 23, -88, 23, -112, 24, 123, 30, 125, 79, 88, 100, -73, 118, + 46, -16, 94, -15, -109, 6, -61, 57, 120, 31, 0, -3, -91, 47, 124, -99, 7, -110, 57, + 120, -81, -82, 81, 14, 89, 84, -37, 30, -44, 13, 92, -3, 60, 7, -109, -96, 84, -32, + -87, 11, -30, -71, 1, 14, 45, 38, -77, 64, -89, 57, -54, -81, 47, 108, 80, -45, 89, + 103, 62, -23, -105, -60, -77, 61, 12, 95, 82, 26, -27, 78, 58, -123, -106, -17, -70, -99, + -105, 41, -34, 97, -10, -49, 8, 53, -117, 122, 48, 115, 124, 126, 76, 81, 123, 69, -11, + -59, 15, 22, -70, 74, 60, 124, 104, 49, 53, -78, 111, 33, -107, -87, -5, -48, -104, -74, + 100, 118, 116, 121, -110, -63, -48, -74, -73, -49, 87, -44, -51, 96, -111, 31, -7, 68, -35, + -25, -37, 86, 60, 62, -123, -84, 34, 41, 17, -121, 109, -121, -67, 57, -103, -46, -13, 85, + -110, -5, -81, -6, -17, -118, 31, 102, -107, 77, 15, 11, 98, -72, 60, 20, 44, -122, -66, + 22, -30, 112, -123, -59, 0, 77, 29, 60, 45, -81, 104, 56, 82, 110, 66, -25, -69, -35, + 121, 4, -76, -30, -9, -57, -11, -106, 47, 78, 105, 111, -38, 56, 28, -4, -91, -9, 69, + 38, 32, 93, 66, -126, -27, 47, -54, 5, -51, -87, -50, 62, 1, -84, -43, -37, 36, -50, + 95, 73, -9, 108, -83, -70, 20, -23, 70, 125, -38, 58, 34, 31, -66, -83, 99, -115, -19, + -20, 99, 72, 120, 72, -2, -124, -124, -113, -43, -43, -61, 2, -12, -14, -48, -32, -9, -44, + 79, -28, 52, 69, -37, 38, 54, 61, 56, 6, 11, 85, -11, -66, 111, -70, -125, 83, -77, + -13, -3, -88, 60, 104, 5, 32, 101, -65, -116, -105, -47, 34, -34, 71, 3, 17, 125, -23, + 74, 68, 69, 45, 113, 66, -119, -102, 39, -21, 50, 3, -76, 118, -21, -68, 29, 6, 76, + 12, -42, 50, -20, 9, 115, 14, -102, 95, -99, -90, 18, -8, -117, 32, -56, -54, 82, 18, + -18, 19, 74, 52, -4, 16, 112, 5, -51, -52, -73, -17, 75, -12, 74, 21, -14, -110, -28, + 103, -23, 1, 24, 115, 91, -116, 89, -51, 117, -43, -12, -114, 43, -67, -112, -6, -108, -47, + -18, 17, 127, -85, 105, 30, 93, -55, 55, 75, -84, 55, 80, -125, -87, 58, 17, 25, -87, + 72, 84, -97, 88, 94, 71, 109, -94, 46, -75, -110, -3, 80, 77, 93, -15, -99, -99, 60, + 122, -36, -114, 70, -100, 91, -108, 121, -46, -37, -78, -126, 97, 55, 16, -100, -119, 119, 124, + -8, -17, 110, 10, 112, -81, -52, 19, 104, -34, -120, 127, -95, 80, -45, 24, -42, -118, -120, + 35, 126, -100, -94, 108, 121, -94, 55, 117, -118, 2, 65, -123, -85, -126, 51, -45, 68, 21, + -122, 2, 56, 117, 107, 19, 80, 39, 17, 41, -120, 42, 92, 52, 126, -85, 98, -80, -30, + -85, 19, -18, 87, 78, -121, 48, 81, -37, -83, 66, 98, 0, 82, -52, -6, 11, 108, 49, + 26, -46, -69, 103, 0, 101, 111, 0, -120, 92, 47, 108, 113, -11, 36, 77, 29, -44, 0, + 18, -24, 55, 42, -10, 3, 56, -30, -16, -99, -60, 11, -68, -25, 35, 86, 61, -4, 51, + 24, -52, -82, -21, -7, 79, -30, 26, -85, 52, -1, 23, -107, 49, -88, 8, -25, 31, 21, + 121, 15, -59, -73, 110, 28, -17, 29, -49, 0, 109, -78, 80, -23, -119, -92, -103, -85, 125, + -87, -16, -8, 125, 120, -94, -44, 30, 52, -49, -118, 83, -36, -104, -59, -16, 21, 17, 67, + 7, -9, 24, -63, -1, -43, -67, 105, 71, -115, 88, 6, -35, -101, 0, -108, -1, 105, -9, + 78, -34, 90, -14, -38, 85, -59, 12, -23, 27, 120, -91, -25, 35, -29, 38, 62, -92, 119, + 53, -15, -120, -24, 105, 87, -57, -88, -12, 79, 86, -99, 44, -40, 62, 97, 29, -74, -19, + 78, 43, -86, 8, 118, -86, -16, -9, 114, 12, -32, 21, -49, 63, -124, -1, 40, 11, 9, + 83, -114, -33, 10, -15, -11, 11, -30, -13, 47, 70, 106, -88, 12, -92, 55, 11, 2, -19, + -119, 96, 74, 80, 126, 71, 112, 82, -25, -40, -8, -33, -9, -31, 48, -19, 4, -76, 111, + -46, 51, -30, -108, -21, -103, 85, -51, 3, -60, 45, 82, 79, -74, 52, 5, 12, -33, -39, + -109, -119, -3, -36, -9, -107, 108, -23, 104, 12, -91, 100, 117, -72, 119, -106, -5, -51, -9, + 12, 120, 124, 2, 22, -36, 35, 27, 40, 62, 94, 75, -15, 109, -8, 74, 67, -115, -74, + -80, -7, -114, -73, -19, -89, -44, 118, -46, -57, 74, 32, 88, 61, -79, 35, -52, 34, 96, + -23, 81, -114, -68, -68, 49, -77, 30, -93, 28, 71, 27, 10, -68, -28, 107, -23, 81, 59, + 17, 124, -120, -71, -11, -7, -80, -114, 54, 25, -49, -19, -68, 2, 77, -93, -104, 126, -9, + 46, 111, -37, -45, 57, -92, 16, -37, 75, -39, -122, 15, 107, -83, 44, -51, 81, 34, -100, + 126, -75, -99, -106, -83, -93, 112, -101, 36, 47, -73, 62, -44, 79, -105, 124, 79, 22, -3, + -25, 110, -70, 0, 57, -76, 107, 3, 123, -85, 65, 84, -94, 10, 3, 108, -48, -57, -7, + 114, -102, -16, -110, 65, -100, -62, -118, -70, -88, 99, 110, -60, 23, -30, 111, 117, 89, -88, + -2, -24, 92, 13, -43, 64, 118, 13, -53, -28, 49, 114, -64, 60, -71, -75, 85, 6, 70, + 7, 17, -28, -75, 10, -16, -92, 89, -113, -20, 78, 29, 84, 83, -27, -86, 51, 106, 104, + 66, 111, -118, -53, -60, 117, -105, 63, 28, 74, 88, 78, 17, 9, 90, 114, -76, 104, -24, + -102, -50, -69, 24, 64, 24, 33, -49, -104, -76, -47, 47, 108, 17, 21, -35, -24, 54, -86, + -24, -55, 7, 109, -29, 15, -50, 99, -120, 74, 6, 111, -81, -27, -52, 121, -86, -4, 84, + 102, 103, 11, -102, -113, 16, 53, -72, -72, -27, 8, -17, -58, -86, -12, -52, -24, 109, -51, + -76, -89, 94, -33, 37, 75, 31, 28, 127, 39, 92, -90, -74, 92, -48, -43, -17, -123, -10, + 68, -2, -69, 117, -123, 31, 118, -89, -26, -36, 114, 86, 114, 105, 19, 96, 20, 109, -16, + 70, -57, 2, -42, 41, 81, -77, 113, -12, 115, 10, 112, 64, 124, 114, -33, -112, 100, -9, + 124, 73, 70, 69, -42, 30, -22, 53, -83, -103, 16, -80, -126, 22, 64, 84, -14, -59, 91, + 3, 19, -126, 35, 1, -21, -78, 51, 33, -4, 112, 57, 17, -92, 66, 101, 53, -34, 36, + 111, -25, -111, 99, 11, -54, 20, 99, 32, 68, 12, 21, -101, 37, 96, -106, -54, -73, 13, + 78, -70, 14, -120, -62, 45, -91, -91, 88, 11, -108, 64, 81, -112, 37, 105, -110, 30, -82, + 1, 71, -96, -52, 65, -112, 69, 119, 125, -4, 92, 16, -62, 20, -3, -23, -81, 83, -10, + 108, 51, 28, 23, -93, 123, 10, -11, -68, 34, 45, -107, 44, 36, -11, 28, 4, -24, -1, + 19, 60, 79, 72, 63, -101, -22, 67, 85, -113, 107, -114, 127, 41, 30, 26, 107, 63, -114, + 27, -86, 38, -52, -124, 116, -100, -45, 109, 17, 81, 32, 61, -41, 99, 50, 14, 31, -74, + 83, 39, 30, -42, 92, -111, -6, 110, -15, -72, 3, 77, 85, 115, -79, 72, 77, 43, -76, + 70, 99, 8, 35, -75, 60, -44, 8, 123, 120, 125, 64, 60, 23, 47, -36, -47, 126, 48, + 87, -66, -43, 86, -101, 67, 114, 72, -91, -22, 10, -27, -45, -97, -54, 74, 14, 47, 123, + -20, -86, -35, -47, -124, -83, -41, -34, -30, -47, -19, 121, -47, 83, 13, -108, -115, 91, -11, + -73, -47, 82, 51, 35, -28, 62, 68, 67, 100, -63, 53, 25, 22, -60, 61, 2, -114, 123, + 64, 106, 31, 23, -81, -16, -116, 11, 22, 125, 119, -61, 46, 5, 22, -30, 39, 30, -62, + 16, 84, -37, -5, -86, 54, 79, 90, 61, 122, 21, -81, -75, 107, -124, -15, 45, 55, -107, + 65, 47, 7, 23, -13, 54, 54, 32, -77, 84, 87, -66, -18, -67, 94, -38, 122, 10, -106, + -49, -90, -31, -95, -21, -7, -65, -44, 126, -17, 42, 115, 17, -78, 75, 114, 121, -42, 32, + -2, 122, -107, -34, 89, 95, 116, -119, -93, -82, -37, -58, -95, -58, 75, -89, 108, -99, 34, + -88, -95, -78, -48, -15, -52, 15, -100, 113, 78, -11, 58, 88, 28, 119, 3, -50, -88, 91, + -87, -33, -122, 5, -84, 27, 90, 76, 74, -63, -6, -31, 46, 6, -65, 85, 73, -116, 41, + -81, -7, -108, 54, 22, -72, -48, 67, 104, -4, 17, 66, 84, 30, -76, 55, 38, -12, 30, + -93, 120, -111, 37, 8, 35, -62, 112, -18, 107, -83, 2, -73, -61, 48, 88, 73, -54, 123, + 20, -11, -82, 29, -89, 108, 102, 115, 77, 99, -34, -6, -83, -47, 11, -64, 36, -118, 101, + 57, -96, 78, -70, -32, -39, 124, 94, 40, 44, -51, 14, -37, 101, 90, 29, 71, 22, -3, + 84, 38, -94, 78, 36, -104, 9, 97, -123, 61, 64, 93, -63, -121, 81, -73, -56, 24, 80, + -13, -82, -42, -92, 29, 18, -118, 4, -42, 24, 96, -23, 82, -124, 12, 99, -23, 103, 105, + -66, 81, 44, 56, -76, -30, -11, 64, 36, 77, -24, -119, 65, 3, -100, 30, 93, 122, -20, + -121, 102, 73, 97, -50, 26, -32, 123, -14, 7, -116, -79, 98, -49, -1, -28, 98, -13, -124, + 103}; diff --git a/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/weights_data.h b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/weights_data.h index a0fea94b..29c24f1e 100644 --- a/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/weights_data.h +++ b/Tests/UnitTest/TestCases/TestData/fully_connected_mve_0/weights_data.h @@ -1,13 +1,13 @@ -// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0). -// Interpreter from tensorflow version 2.10.0 and revision upstream/v2.10.0-0-g359c3cdfc5f. +// Generated by test_settings.py using tensorflow version 2.18.0-dev20240711 (Keras version 3.4.1.dev2024071203). +// Interpreter from tensorflow version 2.18.0-dev20240711 and revision v1.12.1-112931-gbc6210b35b0. #pragma once #include const int8_t fully_connected_mve_0_weights[144] = { - 37, -46, 75, -33, -52, -82, -94, 64, 71, 65, 64, 16, -66, -5, -65, -44, 82, 42, 84, 105, 18, - 79, -103, -75, -95, 65, 87, 103, 43, -25, -66, 75, 125, 40, -34, 24, 9, -79, 4, 73, 98, -75, - 42, 81, 18, -58, -119, 92, 0, -72, 48, 23, -69, 11, -95, -103, 66, 117, 107, -96, 114, -29, 75, - -93, 118, 66, -19, 83, -14, 86, -110, 44, 37, -9, 17, -107, 50, -116, -116, -27, -84, -126, -108, -127, - -71, 8, 81, 108, -61, 126, 69, -45, 37, -78, -102, -55, 116, 112, -111, -89, -57, 82, -47, 22, 125, - -84, 97, -9, 88, 74, -15, 118, -95, 112, 89, 44, -17, -112, -71, -94, 1, -117, 112, -92, 52, 57, - -22, 80, -60, 95, -106, -1, -27, 105, 6, 123, 6, 96, 126, -65, -29, 103, 19, -45}; + 9, 50, 118, -35, 96, 51, 62, 73, -113, 75, -72, -108, 72, -35, 39, -92, -32, 39, -40, 35, -4, + 117, -8, -35, 113, -34, -114, -28, 92, 76, -64, -126, -18, 4, -5, -30, 8, 37, 104, -68, 21, -50, + 58, 36, 125, 44, -122, -12, 120, 3, -57, -114, -92, 38, 110, -68, 62, -53, -29, -65, -47, -36, 72, + -44, 59, 27, 20, -77, 114, -65, 15, -44, -40, 7, 75, -113, -26, 50, -94, -120, 76, 58, 26, 43, + 50, 96, 96, 119, -79, 60, 20, 78, 72, 90, -37, -23, -93, -50, -97, 17, -18, 20, -17, -118, 120, + 38, 26, -38, -113, -55, -118, 109, -100, 95, 95, 106, 46, -7, -53, -93, -64, 6, 29, 76, -2, 50, + -82, 48, -95, -107, 1, 23, 53, 67, -93, -106, 93, 116, -117, 4, 88, 45, 86, 127}; diff --git a/Tests/UnitTest/TestCases/test_arm_fully_connected_s8/Unity/unity_test_arm_fully_connected_s8.c b/Tests/UnitTest/TestCases/test_arm_fully_connected_s8/Unity/unity_test_arm_fully_connected_s8.c index 324c31d1..02bb6a54 100644 --- a/Tests/UnitTest/TestCases/test_arm_fully_connected_s8/Unity/unity_test_arm_fully_connected_s8.c +++ b/Tests/UnitTest/TestCases/test_arm_fully_connected_s8/Unity/unity_test_arm_fully_connected_s8.c @@ -61,3 +61,5 @@ void test_fully_connected_out_activation_arm_fully_connected_s8(void) { fully_connected_out_activation_arm_fully_connected_s8(); } + +void test_fc_per_fc_per_ch_arm_fully_connected_s8(void) { fc_per_ch_arm_fully_connected_s8(); } diff --git a/Tests/UnitTest/TestCases/test_arm_fully_connected_s8/test_arm_fully_connected_s8.c b/Tests/UnitTest/TestCases/test_arm_fully_connected_s8/test_arm_fully_connected_s8.c index d6488c9d..140e0d4f 100644 --- a/Tests/UnitTest/TestCases/test_arm_fully_connected_s8/test_arm_fully_connected_s8.c +++ b/Tests/UnitTest/TestCases/test_arm_fully_connected_s8/test_arm_fully_connected_s8.c @@ -20,6 +20,7 @@ #include #include +#include "../TestData/fc_per_ch/test_data.h" #include "../TestData/fully_connected/test_data.h" #include "../TestData/fully_connected_mve_0/test_data.h" #include "../TestData/fully_connected_mve_1/test_data.h" @@ -422,3 +423,91 @@ void fully_connected_out_activation_arm_fully_connected_s8(void) TEST_ASSERT_EQUAL(expected, result); TEST_ASSERT_TRUE(validate(output, output_ref, output_ref_size)); } + +void fc_per_ch_arm_fully_connected_s8(void) +{ + const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS; + int8_t output[FC_PER_CH_DST_SIZE] = {0}; + + cmsis_nn_context ctx; + cmsis_nn_fc_params fc_params; + cmsis_nn_per_channel_quant_params quant_params; + cmsis_nn_dims input_dims; + cmsis_nn_dims filter_dims; + cmsis_nn_dims bias_dims; + cmsis_nn_dims output_dims; + + const int32_t *bias_data = fc_per_ch_biases; + const int8_t *kernel_data = fc_per_ch_weights; + const int8_t *input_data = fc_per_ch_input; + const int8_t *output_ref = fc_per_ch_output_ref; + const int32_t output_ref_size = FC_PER_CH_DST_SIZE; + + input_dims.n = FC_PER_CH_INPUT_BATCHES; + input_dims.w = FC_PER_CH_INPUT_W; + input_dims.h = FC_PER_CH_INPUT_H; + input_dims.c = FC_PER_CH_IN_CH; + filter_dims.n = FC_PER_CH_ACCUMULATION_DEPTH; + filter_dims.c = FC_PER_CH_OUT_CH; + output_dims.n = FC_PER_CH_INPUT_BATCHES; + output_dims.c = FC_PER_CH_OUT_CH; + + fc_params.input_offset = FC_PER_CH_INPUT_OFFSET; + fc_params.filter_offset = 0; + fc_params.output_offset = FC_PER_CH_OUTPUT_OFFSET; + fc_params.activation.min = FC_PER_CH_OUT_ACTIVATION_MIN; + fc_params.activation.max = FC_PER_CH_OUT_ACTIVATION_MAX; + + quant_params.multiplier = (int32_t *)fc_per_ch_output_mult; + quant_params.shift = (int32_t *)fc_per_ch_output_shift; + + const int32_t buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims); + ctx.buf = malloc(buf_size); + ctx.size = buf_size; + +#if defined(ARM_MATH_MVEI) + int32_t *buf = ctx.buf; + TEST_ASSERT_EQUAL(expected, arm_vector_sum_s8(buf, filter_dims.n, output_dims.c, kernel_data, 1, NULL)); +#endif + + arm_cmsis_nn_status result = arm_fully_connected_per_channel_s8(&ctx, + &fc_params, + &quant_params, + &input_dims, + input_data, + &filter_dims, + kernel_data, + &bias_dims, + bias_data, + &output_dims, + output); + + TEST_ASSERT_EQUAL(expected, result); + TEST_ASSERT_TRUE(validate(output, output_ref, output_ref_size)); + + cmsis_nn_quant_params generic_quant_params; + generic_quant_params.multiplier = quant_params.multiplier; + generic_quant_params.shift = quant_params.shift; + generic_quant_params.is_per_channel = 1; + + result = arm_fully_connected_wrapper_s8(&ctx, + &fc_params, + &generic_quant_params, + &input_dims, + input_data, + &filter_dims, + kernel_data, + &bias_dims, + bias_data, + &output_dims, + output); + + if (ctx.buf) + { + // The caller is responsible to clear the scratch buffers for security reasons if applicable. + memset(ctx.buf, 0, buf_size); + free(ctx.buf); + } + TEST_ASSERT_EQUAL(expected, result); + TEST_ASSERT_TRUE(validate(output, output_ref, output_ref_size)); +} diff --git a/Tests/UnitTest/add_mul_settings.py b/Tests/UnitTest/add_mul_settings.py index a2dbeb32..7b5bcfd9 100644 --- a/Tests/UnitTest/add_mul_settings.py +++ b/Tests/UnitTest/add_mul_settings.py @@ -18,7 +18,7 @@ import tensorflow as tf import numpy as np -import tf_keras as keras +import keras class AddMulSettings(TestSettings): diff --git a/Tests/UnitTest/conv_settings.py b/Tests/UnitTest/conv_settings.py index b536241d..48752067 100644 --- a/Tests/UnitTest/conv_settings.py +++ b/Tests/UnitTest/conv_settings.py @@ -19,7 +19,7 @@ import tensorflow as tf import numpy as np import math -import tf_keras as keras +import keras class ConvSettings(TestSettings): diff --git a/Tests/UnitTest/fully_connected_settings.py b/Tests/UnitTest/fully_connected_settings.py index 4b465e4b..fa877c37 100644 --- a/Tests/UnitTest/fully_connected_settings.py +++ b/Tests/UnitTest/fully_connected_settings.py @@ -18,7 +18,7 @@ import tensorflow as tf import numpy as np -import tf_keras as keras +import keras class FullyConnectedSettings(TestSettings): diff --git a/Tests/UnitTest/generate_test_data.py b/Tests/UnitTest/generate_test_data.py index dfb40d6a..df41e694 100755 --- a/Tests/UnitTest/generate_test_data.py +++ b/Tests/UnitTest/generate_test_data.py @@ -3281,5 +3281,6 @@ def main(): raise RuntimeError("Please select testdataset or use --run-all-testsets") return 0 + if __name__ == '__main__': sys.exit(main()) diff --git a/Tests/UnitTest/pooling_settings.py b/Tests/UnitTest/pooling_settings.py index 8dc842cc..136e53f2 100644 --- a/Tests/UnitTest/pooling_settings.py +++ b/Tests/UnitTest/pooling_settings.py @@ -18,7 +18,7 @@ import numpy as np import tensorflow as tf -import tf_keras as keras +import keras class PoolingSettings(TestSettings): @@ -92,13 +92,13 @@ def generate_data(self, input_data=None) -> None: model.add( keras.layers.AveragePooling2D(pool_size=(self.filter_y, self.filter_x), strides=(self.stride_y, self.stride_x), - padding=self.padding, + padding=self.padding.lower(), input_shape=input_shape[1:])) elif self.test_type == 'maxpool': model.add( keras.layers.MaxPooling2D(pool_size=(self.filter_y, self.filter_x), strides=(self.stride_y, self.stride_x), - padding=self.padding, + padding=self.padding.lower(), input_shape=input_shape[1:])) else: raise RuntimeError("Wrong test type") diff --git a/Tests/UnitTest/softmax_settings.py b/Tests/UnitTest/softmax_settings.py index 520b1058..de265e98 100644 --- a/Tests/UnitTest/softmax_settings.py +++ b/Tests/UnitTest/softmax_settings.py @@ -17,7 +17,7 @@ import math from test_settings import TestSettings import tensorflow as tf -import tf_keras as keras +import keras class SoftmaxSettings(TestSettings): softmax_input_integer_bits = 5 diff --git a/Tests/UnitTest/test_settings.py b/Tests/UnitTest/test_settings.py index 64fc120f..8b329a6a 100644 --- a/Tests/UnitTest/test_settings.py +++ b/Tests/UnitTest/test_settings.py @@ -26,7 +26,7 @@ import numpy as np import tensorflow as tf -import tf_keras as keras +import keras class TestSettings(ABC): @@ -454,7 +454,6 @@ def convert_and_interpret(self, model, inttype, input_data=None, dataset_shape=N def convert_model(self, model, inttype, dataset_shape=None, int16x8_int32bias=False): model.compile(loss=keras.losses.categorical_crossentropy, - optimizer=keras.optimizers.Adam(), metrics=['accuracy']) n_inputs = len(model.inputs) @@ -469,6 +468,8 @@ def convert_model(self, model, inttype, dataset_shape=None, int16x8_int32bias=Fa converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.representative_dataset = representative_dataset + converter._experimental_disable_per_channel_quantization_for_dense_layers = True + if self.is_int16xint8: if int16x8_int32bias: converter._experimental_full_integer_quantization_bias_type = tf.int32