This repository has been archived on 2023-07-17. You can view files and clone it, but cannot push or open issues or pull requests.
bl_mcu_sdk/examples/nn/nnTest/Ref_Implementations/ref_functions.h

1043 lines
61 KiB
C

/*
* Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
* Copyright (c) 2019 Nuclei Limited. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _REF_FUNCTIONS_H_
#define _REF_FUNCTIONS_H_
#include "riscv_math.h"
#include "riscv_nnfunctions.h"
#include "riscv_nnsupportfunctions.h"
#include "fully_connected_testing_weights.h"
#ifdef __cplusplus
extern "C"
{
#endif
/*
*
* Activation Functions
*
*/
void riscv_nn_activations_direct_q7_ref(q7_t * data, uint16_t size, uint16_t int_width, riscv_nn_activation_type type);
void riscv_nn_activations_direct_q15_ref(q15_t * data, uint16_t size, uint16_t int_width, riscv_nn_activation_type type);
void riscv_relu_q7_ref(q7_t *data, uint16_t size);
void riscv_relu_q15_ref(q15_t *data, uint16_t size);
void riscv_relu6_s8_ref(q7_t *data, uint16_t size);
/*
*
* Basic math functions
*
*/
riscv_status riscv_elementwise_add_s8_ref(const int8_t *input_1_vect,
const int8_t *input_2_vect,
const int32_t input_1_offset,
const int32_t input_1_mult,
const int32_t input_1_shift,
const int32_t input_2_offset,
const int32_t input_2_mult,
const int32_t input_2_shift,
const int32_t left_shift,
int8_t *output,
const int32_t out_offset,
const int32_t out_mult,
const int32_t out_shift,
const int32_t out_activation_min,
const int32_t out_activation_max,
const uint32_t block_size);
riscv_status riscv_elementwise_mul_s8_ref(const int8_t *input_1_vect,
const int8_t *input_2_vect,
const int32_t input_1_offset,
const int32_t input_2_offset,
int8_t *output,
const int32_t out_offset,
const int32_t out_mult,
const int32_t out_shift,
const int32_t out_activation_min,
const int32_t out_activation_max,
const uint32_t block_size);
/*
*
* Concatenation Functions
*
*/
void riscv_concatenation_s8_w_ref(const int8_t *input,
const uint16_t input_x,
const uint16_t input_y,
const uint16_t input_z,
const uint16_t input_w,
int8_t *output,
const uint32_t offset_w);
void riscv_concatenation_s8_x_ref(const int8_t *input,
const uint16_t input_x,
const uint16_t input_y,
const uint16_t input_z,
const uint16_t input_w,
int8_t *output,
const uint16_t output_x,
const uint32_t offset_x);
void riscv_concatenation_s8_y_ref(const int8_t *input,
const uint16_t input_x,
const uint16_t input_y,
const uint16_t input_z,
const uint16_t input_w,
int8_t *output,
const uint16_t output_y,
const uint32_t offset_y);
void riscv_concatenation_s8_z_ref(const int8_t *input,
const uint16_t input_x,
const uint16_t input_y,
const uint16_t input_z,
const uint16_t input_w,
int8_t *output,
const uint16_t output_z,
const uint32_t offset_z);
/*
*
* Convolution Functions
*
*/
riscv_status riscv_convolve_1_x_n_s8_ref(const nmsis_nn_context* ctx,
const nmsis_nn_conv_params* conv_params,
const nmsis_nn_per_channel_quant_params* quant_params,
const nmsis_nn_dims* input_dims,
const q7_t *input_data,
const nmsis_nn_dims* filter_dims,
const q7_t *filter_data,
const nmsis_nn_dims* bias_dims,
const int32_t *bias_data,
const nmsis_nn_dims* output_dims,
q7_t *output_data);
int32_t riscv_convolve_1_x_n_s8_get_buffer_size_ref(const nmsis_nn_dims* input_dims,
const nmsis_nn_dims* filter_dims);
riscv_status riscv_convolve_1x1_HWC_q7_fast_nonsquare_ref(const q7_t * Im_in,
const uint16_t dim_im_in_x,
const uint16_t dim_im_in_y,
const uint16_t ch_im_in,
const q7_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel_x,
const uint16_t dim_kernel_y,
const uint16_t padding_x,
const uint16_t padding_y,
const uint16_t stride_x,
const uint16_t stride_y,
const q7_t * bias,
const uint16_t bias_shift,
const uint16_t out_shift,
q7_t * Im_out,
const uint16_t dim_im_out_x,
const uint16_t dim_im_out_y,
q15_t * bufferA,
q7_t * bufferB);
riscv_status riscv_convolve_1x1_s8_fast_ref(const nmsis_nn_context *ctx,
const nmsis_nn_conv_params *conv_params,
const nmsis_nn_per_channel_quant_params *quant_params,
const nmsis_nn_dims *input_dims,
const q7_t *input_data,
const nmsis_nn_dims *filter_dims,
const q7_t *filter_data,
const nmsis_nn_dims *bias_dims,
const int32_t *bias_data,
const nmsis_nn_dims *output_dims,
q7_t *output_data);
int32_t riscv_convolve_1x1_s8_fast_get_buffer_size_ref(const nmsis_nn_dims *input_dims);
riscv_status riscv_convolve_HWC_q7_basic_nonsquare_ref(const q7_t * Im_in,
const uint16_t dim_im_in_x,
const uint16_t dim_im_in_y,
const uint16_t ch_im_in,
const q7_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel_x,
const uint16_t dim_kernel_y,
const uint16_t padding_x,
const uint16_t padding_y,
const uint16_t stride_x,
const uint16_t stride_y,
const q7_t * bias,
const uint16_t bias_shift,
const uint16_t out_shift,
q7_t * Im_out,
const uint16_t dim_im_out_x,
const uint16_t dim_im_out_y,
q15_t * bufferA,
q7_t * bufferB);
riscv_status
riscv_convolve_HWC_q7_basic_ref(const q7_t * Im_in,
const uint16_t dim_im_in,
const uint16_t ch_im_in,
const q7_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel,
const uint16_t padding,
const uint16_t stride,
const q7_t * bias,
const uint16_t bias_shift,
const uint16_t out_shift,
q7_t * Im_out,
const uint16_t dim_im_out,
q15_t * bufferA,
q7_t * bufferB);
riscv_status riscv_convolve_HWC_q7_fast_nonsquare_ref(const q7_t * Im_in,
const uint16_t dim_im_in_x,
const uint16_t dim_im_in_y,
const uint16_t ch_im_in,
const q7_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel_x,
const uint16_t dim_kernel_y,
const uint16_t padding_x,
const uint16_t padding_y,
const uint16_t stride_x,
const uint16_t stride_y,
const q7_t * bias,
const uint16_t bias_shift,
const uint16_t out_shift,
q7_t * Im_out,
const uint16_t dim_im_out_x,
const uint16_t dim_im_out_y,
q15_t * bufferA,
q7_t * bufferB);
riscv_status
riscv_convolve_HWC_q7_fast_ref(const q7_t * Im_in,
const uint16_t dim_im_in,
const uint16_t ch_im_in,
const q7_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel,
const uint16_t padding,
const uint16_t stride,
const q7_t * bias,
const uint16_t bias_shift,
const uint16_t out_shift,
q7_t * Im_out,
const uint16_t dim_im_out,
q15_t * bufferA,
q7_t * bufferB);
riscv_status
riscv_convolve_HWC_q7_RGB_ref(const q7_t * Im_in,
const uint16_t dim_im_in,
const uint16_t ch_im_in,
const q7_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel,
const uint16_t padding,
const uint16_t stride,
const q7_t * bias,
const uint16_t bias_shift,
const uint16_t out_shift,
q7_t * Im_out, const uint16_t dim_im_out, q15_t * bufferA, q7_t * bufferB);
riscv_status
riscv_convolve_HWC_q15_basic_ref(const q15_t * Im_in,
const uint16_t dim_im_in,
const uint16_t ch_im_in,
const q15_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel,
const uint16_t padding,
const uint16_t stride,
const q15_t * bias,
const uint16_t bias_shift,
const uint16_t out_shift,
q15_t * Im_out,
const uint16_t dim_im_out,
q15_t * bufferA,
q7_t * bufferB);
riscv_status
riscv_convolve_HWC_q15_fast_nonsquare_ref(const q15_t * Im_in,
const uint16_t dim_im_in_x,
const uint16_t dim_im_in_y,
const uint16_t ch_im_in,
const q15_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel_x,
const uint16_t dim_kernel_y,
const uint16_t padding_x,
const uint16_t padding_y,
const uint16_t stride_x,
const uint16_t stride_y,
const q15_t * bias,
const uint16_t bias_shift,
const uint16_t out_shift,
q15_t * Im_out,
const uint16_t dim_im_out_x,
const uint16_t dim_im_out_y,
q15_t * bufferA,
q7_t * bufferB);
riscv_status
riscv_convolve_HWC_q15_fast_ref(const q15_t * Im_in,
const uint16_t dim_im_in,
const uint16_t ch_im_in,
const q15_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel,
const uint16_t padding,
const uint16_t stride,
const q15_t * bias,
const uint16_t bias_shift,
const uint16_t out_shift,
q15_t * Im_out,
const uint16_t dim_im_out,
q15_t * bufferA,
q7_t * bufferB);
riscv_status riscv_convolve_s8_ref(const nmsis_nn_context* ctx,
const nmsis_nn_conv_params* conv_params,
const nmsis_nn_per_channel_quant_params* quant_params,
const nmsis_nn_dims* input_dims,
const q7_t *input_data,
const nmsis_nn_dims* filter_dims,
const q7_t *filter_data,
const nmsis_nn_dims* bias_dims,
const int32_t *bias_data,
const nmsis_nn_dims* output_dims,
q7_t *output_data);
int32_t riscv_convolve_s8_get_buffer_size_ref(const nmsis_nn_dims* input_dims,
const nmsis_nn_dims* filter_dims);
riscv_status riscv_convolve_wrapper_s8_ref(const nmsis_nn_context* ctx,
const nmsis_nn_conv_params* conv_params,
const nmsis_nn_per_channel_quant_params* quant_params,
const nmsis_nn_dims* input_dims,
const q7_t *input_data,
const nmsis_nn_dims* filter_dims,
const q7_t *filter_data,
const nmsis_nn_dims* bias_dims,
const int32_t *bias_data,
const nmsis_nn_dims* output_dims,
q7_t *output_data);
int32_t riscv_convolve_wrapper_s8_get_buffer_size_ref(const nmsis_nn_conv_params* conv_params,
const nmsis_nn_dims* input_dims,
const nmsis_nn_dims* filter_dims,
const nmsis_nn_dims* output_dims);
riscv_status riscv_depthwise_conv_3x3_s8_ref(const nmsis_nn_context *ctx,
const nmsis_nn_dw_conv_params *dw_conv_params,
const nmsis_nn_per_channel_quant_params *quant_params,
const nmsis_nn_dims *input_dims,
const q7_t *input,
const nmsis_nn_dims *filter_dims,
const q7_t *kernel,
const nmsis_nn_dims *bias_dims,
const int32_t *bias,
const nmsis_nn_dims *output_dims,
q7_t *output);
riscv_status riscv_depthwise_conv_s8_opt_ref(const nmsis_nn_context *ctx,
const nmsis_nn_dw_conv_params *dw_conv_params,
const nmsis_nn_per_channel_quant_params *quant_params,
const nmsis_nn_dims *input_dims,
const q7_t *input,
const nmsis_nn_dims *filter_dims,
const q7_t *kernel,
const nmsis_nn_dims *bias_dims,
const int32_t *bias,
const nmsis_nn_dims *output_dims,
q7_t *output);
int32_t riscv_depthwise_conv_s8_opt_get_buffer_size_ref(const nmsis_nn_dims *input_dims,
const nmsis_nn_dims *filter_dims);
static void depthwise_conv_s8_mult_4_ref(const int8_t *input,
const int32_t input_x,
const int32_t input_y,
const int32_t input_ch,
const int8_t *kernel,
const int32_t output_ch,
const int32_t ch_mult,
const int32_t kernel_x,
const int32_t kernel_y,
const int32_t pad_x,
const int32_t pad_y,
const int32_t stride_x,
const int32_t stride_y,
const int32_t *bias,
int8_t *output,
const int32_t *output_shift,
const int32_t *output_mult,
const int32_t output_x,
const int32_t output_y,
const int32_t output_offset,
const int32_t input_offset,
const int32_t output_activation_min,
const int32_t output_activation_max);
static void depthwise_conv_s8_generic_ref(const q7_t *input,
const uint16_t input_x,
const uint16_t input_y,
const uint16_t input_ch,
const q7_t *kernel,
const uint16_t output_ch,
const uint16_t ch_mult,
const uint16_t kernel_x,
const uint16_t kernel_y,
const uint16_t pad_x,
const uint16_t pad_y,
const uint16_t stride_x,
const uint16_t stride_y,
const int32_t *bias,
q7_t *output,
const int32_t *output_shift,
const int32_t *output_mult,
const uint16_t output_x,
const uint16_t output_y,
const int32_t output_offset,
const int32_t input_offset,
const int32_t output_activation_min,
const int32_t output_activation_max);
riscv_status riscv_depthwise_conv_s8_ref(const nmsis_nn_context *ctx,
const nmsis_nn_dw_conv_params *dw_conv_params,
const nmsis_nn_per_channel_quant_params *quant_params,
const nmsis_nn_dims *input_dims,
const q7_t *input,
const nmsis_nn_dims *filter_dims,
const q7_t *kernel,
const nmsis_nn_dims *bias_dims,
const int32_t *bias,
const nmsis_nn_dims *output_dims,
q7_t *output);
static void depthwise_conv_u8_mult_4_ref(const uint8_t *input,
const int32_t input_x,
const int32_t input_y,
const int32_t input_ch,
const uint8_t *kernel,
const int32_t output_ch,
const int32_t ch_mult,
const int32_t kernel_x,
const int32_t kernel_y,
const int32_t pad_x,
const int32_t pad_y,
const int32_t stride_x,
const int32_t stride_y,
const int32_t *bias,
uint8_t *output,
const int32_t output_shift,
const int32_t output_mult,
const int32_t output_x,
const int32_t output_y,
const int32_t output_offset,
const int32_t input_offset,
const int32_t filter_offset,
const int32_t output_activation_min,
const int32_t output_activation_max);
static void depthwise_conv_u8_generic_ref(const uint8_t *input,
const int32_t input_x,
const int32_t input_y,
const int32_t input_ch,
const uint8_t *kernel,
const int32_t output_ch,
const int32_t ch_mult,
const int32_t kernel_x,
const int32_t kernel_y,
const int32_t pad_x,
const int32_t pad_y,
const int32_t stride_x,
const int32_t stride_y,
const int32_t *bias,
uint8_t *output,
const int32_t output_shift,
const int32_t output_mult,
const int32_t output_x,
const int32_t output_y,
const int32_t output_offset,
const int32_t input_offset,
const int32_t filter_offset,
const int32_t output_activation_min,
const int32_t output_activation_max);
riscv_status riscv_depthwise_conv_u8_basic_ver1_ref(const uint8_t *input,
const uint16_t input_x,
const uint16_t input_y,
const uint16_t input_ch,
const uint8_t *kernel,
const uint16_t kernel_x,
const uint16_t kernel_y,
const int16_t ch_mult,
const int16_t pad_x,
const int16_t pad_y,
const int16_t stride_x,
const int16_t stride_y,
const int16_t dilation_x,
const int16_t dilation_y,
const int32_t *bias,
const int32_t input_offset,
const int32_t filter_offset,
const int32_t output_offset,
uint8_t *output,
const uint16_t output_x,
const uint16_t output_y,
const int32_t output_activation_min,
const int32_t output_activation_max,
const int32_t output_shift,
const int32_t output_mult);
riscv_status riscv_depthwise_conv_wrapper_s8_ref(const nmsis_nn_context *ctx,
const nmsis_nn_dw_conv_params *dw_conv_params,
const nmsis_nn_per_channel_quant_params *quant_params,
const nmsis_nn_dims *input_dims,
const q7_t *input,
const nmsis_nn_dims *filter_dims,
const q7_t *filter,
const nmsis_nn_dims *bias_dims,
const int32_t *bias,
const nmsis_nn_dims *output_dims,
q7_t *output);
int32_t riscv_depthwise_conv_wrapper_s8_get_buffer_size_ref(const nmsis_nn_dw_conv_params *dw_conv_params,
const nmsis_nn_dims *input_dims,
const nmsis_nn_dims *filter_dims,
const nmsis_nn_dims *output_dims);
riscv_status riscv_depthwise_separable_conv_HWC_q7_nonsquare_ref(const q7_t * Im_in,
const uint16_t dim_im_in_x,
const uint16_t dim_im_in_y,
const uint16_t ch_im_in,
const q7_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel_x,
const uint16_t dim_kernel_y,
const uint16_t padding_x,
const uint16_t padding_y,
const uint16_t stride_x,
const uint16_t stride_y,
const q7_t * bias,
const uint16_t bias_shift,
const uint16_t out_shift,
q7_t * Im_out,
const uint16_t dim_im_out_x,
const uint16_t dim_im_out_y,
q15_t * bufferA,
q7_t * bufferB);
q7_t *riscv_nn_depthwise_conv_s8_core_ref(const q7_t *row,
const q15_t *col,
const uint16_t num_ch,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t out_offset,
const int32_t activation_min,
const int32_t activation_max,
const uint16_t kernel_size,
const int32_t *const output_bias,
q7_t *out);
q7_t *riscv_nn_mat_mult_kernel_q7_q15_ref(const q7_t * pA,
const q15_t * pInBuffer,
const uint16_t ch_im_out,
const uint16_t numCol_A,
const uint16_t bias_shift,
const uint16_t out_shift,
const q7_t * bias,
q7_t * pOut);
q7_t *riscv_nn_mat_mult_kernel_q7_q15_reordered_ref(const q7_t * pA,
const q15_t * pInBuffer,
const uint16_t ch_im_out,
const uint16_t numCol_A,
const uint16_t bias_shift,
const uint16_t out_shift,
const q7_t * bias,
q7_t * pOut);
q7_t *riscv_nn_mat_mult_kernel_q7_reordered_ref(const q7_t * pA,
const q7_t * pInBuffer,
const uint16_t ch_im_out,
const uint16_t numCol_A,
const uint16_t bias_shift,
const uint16_t out_shift,
const q7_t * bias,
q7_t * pOut);
q7_t *riscv_nn_mat_mult_kernel_s8_s16_ref(const q7_t *input_a,
const q15_t *input_b,
const uint16_t output_ch,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t out_offset,
const int16_t activation_min,
const int16_t activation_max,
const uint16_t num_col_a,
const int32_t *const output_bias,
q7_t *out_0);
q7_t *riscv_nn_mat_mult_kernel_s8_s16_reordered_ref(const q7_t *input_a,
const q15_t *input_b,
const uint16_t output_ch,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t out_offset,
const int16_t activation_min,
const int16_t activation_max,
const uint16_t num_col_a,
const int32_t *const output_bias,
q7_t *out_0);
q7_t *riscv_nn_mat_mult_s8_ref(const q7_t *input_row,
const q7_t *input_col,
const uint16_t output_ch,
const uint16_t col_batches,
const int32_t *output_shift,
const int32_t *output_mult,
const int32_t out_offset,
const int32_t col_offset,
const int32_t row_offset,
const int16_t activation_min,
const int16_t activation_max,
const uint16_t row_len,
const int32_t *const bias,
q7_t *out);
/*
*
* Fully-connected Layer Functions
*
*/
riscv_status
riscv_fully_connected_s8_ref(const nmsis_nn_context *ctx,
const nmsis_nn_fc_params *fc_params,
const nmsis_nn_per_tensor_quant_params *quant_params,
const nmsis_nn_dims *input_dims,
const q7_t *input,
const nmsis_nn_dims *filter_dims,
const q7_t *kernel,
const nmsis_nn_dims *bias_dims,
const int32_t *bias,
const nmsis_nn_dims *output_dims,
q7_t *output);
int32_t riscv_fully_connected_s8_get_buffer_size_ref(const nmsis_nn_dims *filter_dims);
/*
*
* Pooling Functions
*
*/
riscv_status riscv_avgpool_s8_ref(const nmsis_nn_context *ctx,
const nmsis_nn_pool_params *pool_params,
const nmsis_nn_dims *input_dims,
const q7_t *src,
const nmsis_nn_dims *filter_dims,
const nmsis_nn_dims *output_dims,
q7_t *dst);
int32_t riscv_avgpool_s8_get_buffer_size_ref(const int dim_dst_width,
const int ch_src);
static void compare_and_replace_if_larger_q7_ref(q7_t *base,
const q7_t *target,
int32_t length);
static void
clamp_output_ref(q7_t *source, int32_t length, const int32_t act_min, const int32_t act_max);
riscv_status
riscv_max_pool_s8_ref(const nmsis_nn_context *ctx,
const nmsis_nn_pool_params *pool_params,
const nmsis_nn_dims *input_dims,
const q7_t *src,
const nmsis_nn_dims *filter_dims,
const nmsis_nn_dims *output_dims,
q7_t *dst);
/*
*
* Reshape Functions
*
*/
void riscv_reshape_s8_ref(const int8_t *input,
int8_t *output,
const uint32_t total_size);
/*
*
* Softmax Functions
*
*/
void riscv_softmax_q7_ref(const q7_t * vec_in, const uint16_t dim_vec, q7_t * p_out );
void riscv_softmax_q15_ref(const q15_t * vec_in, const uint16_t dim_vec, q15_t * p_out);
void riscv_softmax_s8_ref(const int8_t *input,
const int32_t num_rows,
const int32_t row_size,
const int32_t mult,
const int32_t shift,
const int32_t diff_min,
int8_t *output);
void riscv_softmax_u8_ref(const uint8_t *input,
const int32_t num_rows,
const int32_t row_size,
const int32_t mult,
const int32_t shift,
const int32_t diff_min,
uint8_t *output);
void riscv_softmax_with_batch_q7_ref(const q7_t * vec_in, const uint16_t nb_batches,const uint16_t dim_vec, q7_t * p_out );
/*
*
* Neural Network Data Conversion Functions
*
*/
void riscv_nn_accumulate_q7_to_q15_ref(q15_t *pDst, const q7_t *pSrc, uint32_t length);
void riscv_nn_add_q7_ref(const q7_t *input, q31_t *output, uint32_t block_size);
q7_t *riscv_nn_depthwise_conv_nt_t_padded_s8_ref(const q7_t *lhs,
const q7_t *rhs,
const int32_t input_offset,
const uint16_t num_ch,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t out_offset,
const int32_t activation_min,
const int32_t activation_max,
const uint16_t row_x_col,
const int32_t *const output_bias,
q7_t *out);
q7_t *riscv_nn_depthwise_conv_nt_t_s8_ref(const q7_t *lhs,
const q7_t *rhs,
const int32_t input_offset,
const uint16_t num_ch,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t out_offset,
const int32_t activation_min,
const int32_t activation_max,
const uint16_t row_x_col,
const int32_t *const output_bias,
q7_t *out);
riscv_status riscv_nn_mat_mul_core_1x_s8_ref(int32_t row_elements,
const int8_t *row_base,
const int8_t *col_base,
int32_t *const sum_col,
int32_t *const output);
riscv_status riscv_nn_mat_mul_core_4x_s8_ref(const int32_t row_elements,
const int32_t offset,
const int8_t *row_base,
const int8_t *col_base,
int32_t *const sum_col,
int32_t *const output);
riscv_status riscv_nn_mat_mult_nt_t_s8_ref(const q7_t *lhs,
const q7_t *rhs,
const q31_t *bias,
q7_t *dst,
const int32_t *dst_multipliers,
const int32_t *dst_shifts,
const int32_t lhs_rows,
const int32_t rhs_rows,
const int32_t rhs_cols,
const int32_t lhs_offset,
const int32_t dst_offset,
const int32_t activation_min,
const int32_t activation_max);
void riscv_nn_mult_q7_ref(
q7_t * pSrcA,
q7_t * pSrcB,
q7_t * pDst,
const uint16_t out_shift,
uint32_t blockSize);
void riscv_nn_mult_q15_ref(
q15_t * pSrcA,
q15_t * pSrcB,
q15_t * pDst,
const uint16_t out_shift,
uint32_t blockSize);
riscv_status riscv_nn_vec_mat_mult_t_s8_ref(const q7_t *lhs,
const q7_t *rhs,
const q31_t *bias,
q7_t *dst,
const int32_t lhs_offset,
const int32_t rhs_offset,
const int32_t dst_offset,
const int32_t dst_multiplier,
const int32_t dst_shift,
const int32_t rhs_cols,
const int32_t rhs_rows,
const int32_t activation_min,
const int32_t activation_max);
void riscv_q7_to_q7_no_shift_ref(const q7_t * pSrc, q7_t * pDst, uint32_t blockSize);
void riscv_q7_to_q7_reordered_no_shift_ref(const q7_t * pSrc, q7_t * pDst, uint32_t blockSize);
void riscv_q7_to_q15_no_shift_ref(const q7_t * pSrc, q15_t * pDst, uint32_t blockSize);
void riscv_q7_to_q15_reordered_no_shift_ref(const q7_t * pSrc, q15_t * pDst, uint32_t blockSize);
void riscv_q7_to_q15_reordered_with_offset_ref(const q7_t *src, q15_t *dst, uint32_t block_size, q15_t offset);
void riscv_q7_to_q15_with_offset_ref(const q7_t *src,
q15_t *dst,
uint32_t block_size,
q15_t offset);
void riscv_convolve_HWC_q7_ref(const q7_t * Im_in, // input image
const uint16_t dim_im_in, // input image dimention
const uint16_t ch_im_in, // number of input image channels
const q7_t * wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel, // filter kernel size
const uint16_t padding, // padding sizes
const uint16_t stride, // stride
const q7_t * bias, // bias
const uint16_t bias_shift, const uint16_t out_shift, q7_t * Im_out, // output image
const uint16_t dim_im_out, // output image dimension
q15_t * bufferA, //buffer space for input
q7_t * bufferB //buffer space for output
);
void riscv_convolve_HWC_q7_ref_nonsquare(const q7_t * Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const q7_t * wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const q7_t * bias, // bias
const uint16_t bias_shift, const uint16_t out_shift, q7_t * Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y, // output image dimension y
q15_t * bufferA, //buffer space for input
q7_t * bufferB //buffer space for output
);
void riscv_convolve_HWC_q15_ref(const q15_t * Im_in, // input image
const uint16_t dim_im_in, // input image dimention
const uint16_t ch_im_in, // number of input image channels
const q15_t * wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel, // filter kernel size
const uint16_t padding, // padding sizes
const uint16_t stride, // stride
const q15_t * bias, // bias
const uint16_t bias_shift, const uint16_t out_shift, q15_t * Im_out, // output image
const uint16_t dim_im_out, // output image dimension
q15_t * bufferA, //buffer space for input
q7_t * bufferB //buffer space for output
);
void riscv_convolve_HWC_q15_nonsquare_ref(const q15_t * Im_in,
const uint16_t dim_im_in_x,
const uint16_t dim_im_in_y,
const uint16_t ch_im_in,
const q15_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel_x,
const uint16_t dim_kernel_y,
const uint16_t padding_x,
const uint16_t padding_y,
const uint16_t stride_x,
const uint16_t stride_y,
const q15_t * bias,
const uint16_t bias_shift,
const uint16_t out_shift,
q15_t * Im_out,
const uint16_t dim_im_out_x,
const uint16_t dim_im_out_y,
q15_t * bufferA,
q7_t * bufferB);
void riscv_depthwise_separable_conv_HWC_q7_ref(const q7_t * Im_in, // input image
const uint16_t dim_im_in, // input image dimention
const uint16_t ch_im_in, // number of input image channels
const q7_t * wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel, // filter kernel size
const uint16_t padding, // padding sizes
const uint16_t stride, // stride
const q7_t * bias, // bias
const uint16_t bias_shift, // amount of left-shift for bias
const uint16_t out_shift, // amount of right-shift for output
q7_t * Im_out, // output image
const uint16_t dim_im_out, // output image dimension
q15_t * bufferA, //buffer space for input
q7_t * bufferB //buffer space for output
);
void riscv_depthwise_separable_conv_HWC_q7_ref_nonsquare(const q7_t * Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const q7_t * wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const q7_t * bias, // bias
const uint16_t bias_shift, // amount of left-shift for bias
const uint16_t out_shift, // amount of right-shift for output
q7_t * Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y, // output image dimension y
q15_t * bufferA, //buffer space for input
q7_t * bufferB //buffer space for output
);
/*
*
* Fully-connected reference implemenation
*
*/
void riscv_fully_connected_q7_ref(const q7_t * pV, // pointer to vector
const q7_t * pM, // pointer to matrix
const uint16_t dim_vec, // length of the vector
const uint16_t num_of_rows, // numCol of A
const uint16_t bias_shift, // amount of left-shift for bias
const uint16_t out_shift, // amount of right-shift for output
const q7_t * bias, q7_t * pOut, // output operand
q15_t * vec_buffer);
void riscv_fully_connected_q15_ref(const q15_t * pV, // pointer to vector
const q15_t * pM, // pointer to matrix
const uint16_t dim_vec, // length of the vector
const uint16_t num_of_rows, // numCol of A
const uint16_t bias_shift, // amount of left-shift for bias
const uint16_t out_shift, // amount of right-shift for output
const q15_t * bias, q15_t * pOut, // output operand
q15_t * vec_buffer);
void riscv_fully_connected_mat_q7_vec_q15_ref(const q15_t * pV, // pointer to vector
const q7_t * pM, // pointer to matrix
const uint16_t dim_vec, // length of the vector
const uint16_t num_of_rows, // numCol of A
const uint16_t bias_shift, // amount of left-shift for bias
const uint16_t out_shift, // amount of right-shift for output
const q7_t * bias, q15_t * pOut, // output operand
q15_t * vec_buffer);
void riscv_fully_connected_q7_opt_ref(const q7_t * pV, // pointer to vector
const q7_t * pM, // pointer to matrix
const uint16_t dim_vec, // length of the vector
const uint16_t num_of_rows, // numCol of A
const uint16_t bias_shift, // amount of left-shift for bias
const uint16_t out_shift, // amount of right-shift for output
const q7_t * bias, q7_t * pOut, // output operand
q15_t * vec_buffer);
void riscv_fully_connected_q15_opt_ref(const q15_t * pV, // pointer to vector
const q15_t * pM, // pointer to matrix
const uint16_t dim_vec, // length of the vector
const uint16_t num_of_rows, // numCol of A
const uint16_t bias_shift, // amount of left-shift for bias
const uint16_t out_shift, // amount of right-shift for output
const q15_t * bias, q15_t * pOut, // output operand
q15_t * vec_buffer);
void riscv_fully_connected_mat_q7_vec_q15_opt_ref(const q15_t * pV, // pointer to vector
const q7_t * pM, // pointer to matrix
const uint16_t dim_vec, // length of the vector
const uint16_t num_of_rows, // numCol of A
const uint16_t bias_shift, // amount of left-shift for bias
const uint16_t out_shift, // amount of right-shift for output
const q7_t * bias, q15_t * pOut, // output operand
q15_t * vec_buffer);
/*
*
* Pooling reference implemenation
*
*/
void riscv_avepool_q7_HWC_ref(const q7_t * Im_in, // input image
const uint16_t dim_im_in, // input image dimension
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel, // window kernel size
const uint16_t padding, // padding sizes
const uint16_t stride, // stride
const uint16_t dim_im_out, // output image dimension
q7_t * bufferA, // a buffer for local storage
q7_t * Im_out);
void riscv_maxpool_q7_HWC_ref(const q7_t * Im_in, // input image
const uint16_t dim_im_in, // input image dimension
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel, // window kernel size
const uint16_t padding, // padding sizes
const uint16_t stride, // stride
const uint16_t dim_im_out, // output image dimension
q7_t * bufferA, // a buffer for local storage
q7_t * Im_out);
/*
*
* SVD reference implemenation
*
*/
riscv_status ref_nn_vec_mat_mult_t_svdf_s8(const q7_t *lhs,
const q7_t *rhs,
q15_t *dst,
const int32_t lhs_offset,
const int32_t rhs_offset,
const int32_t dst_offset,
const int32_t dst_multiplier,
const int32_t dst_shift,
const int32_t rhs_cols,
const int32_t rhs_rows,
const int32_t activation_min,
const int32_t activation_max);
riscv_status ref_svdf_s8(const nmsis_nn_context *input_ctx,
const nmsis_nn_context *output_ctx,
const nmsis_nn_svdf_params *svdf_params,
const nmsis_nn_per_tensor_quant_params *input_quant_params,
const nmsis_nn_per_tensor_quant_params *output_quant_params,
const nmsis_nn_dims *input_dims,
const q7_t *input_data,
const nmsis_nn_dims *state_dims,
q15_t *state_data,
const nmsis_nn_dims *weights_feature_dims,
const q7_t *weights_feature_data,
const nmsis_nn_dims *weights_time_dims,
const q15_t *weights_time_data,
const nmsis_nn_dims *bias_dims,
const q31_t *bias_data,
const nmsis_nn_dims *output_dims,
q7_t *output_data);
/*
*
* Other reference implemenation
*
*/
void riscv_relu_q7_ref(q7_t * data, uint16_t size);
void riscv_relu_q15_ref(q15_t * data, uint16_t size);
void riscv_nn_mult_q7_ref(q7_t * pSrcA, q7_t * pSrcB, q7_t * pDst, const uint16_t out_shift, uint32_t blockSize);
void riscv_nn_mult_q15_ref(q15_t * pSrcA, q15_t * pSrcB, q15_t * pDst, const uint16_t out_shift, uint32_t blockSize);
void riscv_relu6_s8_ref(q7_t *data, uint16_t size);
#ifdef __cplusplus
}
#endif
#endif