[feat][nmsis] add nmsis component and nn,dsp demo

2021-09-26 13:38:51 +08:00 · 2021-09-26 13:38:51 +08:00 · 5d1126d0f0
commit 5d1126d0f0
parent b2aada479b
989 changed files with 286224 additions and 0 deletions
--- a/components/nmsis/CMakeLists.txt
+++ b/components/nmsis/CMakeLists.txt
@ -0,0 +1,46 @@
+################# Add global include #################
+list(APPEND ADD_INCLUDE
+"${CMAKE_CURRENT_SOURCE_DIR}/core/inc"
+"${CMAKE_CURRENT_SOURCE_DIR}/dsp/inc"
+"${CMAKE_CURRENT_SOURCE_DIR}/nn/inc"
+)
+#######################################################
+
+################# Add private include #################
+list(APPEND ADD_PRIVATE_INCLUDE
+"${CMAKE_CURRENT_SOURCE_DIR}/dsp/privateInc"
+)
+#######################################################
+
+############## Add current dir source files ###########
+file(GLOB_RECURSE sources "${CMAKE_CURRENT_SOURCE_DIR}/dsp/*.c"
+"${CMAKE_CURRENT_SOURCE_DIR}/nn/*.c"
+)
+list(APPEND ADD_SRCS  ${sources})
+# aux_source_directory(src ADD_SRCS)
+# list(REMOVE_ITEM ADD_SRCS "${CMAKE_CURRENT_SOURCE_DIR}")
+#######################################################
+
+########### Add required/dependent components #########
+# list(APPEND ADD_REQUIREMENTS common)
+#######################################################
+
+############ Add static libs ##########################
+#list(APPEND ADD_STATIC_LIB "libxxx.a")
+#######################################################
+
+############ Add dynamic libs #########################
+# list(APPEND ADD_DYNAMIC_LIB "libxxx.so")
+#######################################################
+
+############ Add global compile option ################
+#add components denpend on this component
+list(APPEND ADD_DEFINITIONS -D__RISCV_FEATURE_MVE=0 -Wno-incompatible-pointer-types -Wno-parentheses)
+#######################################################
+
+############ Add private compile option ################
+#add compile option for this component that won't affect other modules
+# list(APPEND ADD_PRIVATE_DEFINITIONS )
+#######################################################
+
+generate_library()
--- a/components/nmsis/core/inc/core_compatiable.h
+++ b/components/nmsis/core/inc/core_compatiable.h
@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __CORE_COMPATIABLE_H__
+#define __CORE_COMPATIABLE_H__
+/*!
+ * @file     core_compatiable.h
+ * @brief    ARM compatiable function definitions header file
+ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* ===== ARM Compatiable Functions ===== */
+/**
+ * \defgroup NMSIS_Core_ARMCompatiable_Functions   ARM Compatiable Functions
+ * \ingroup  NMSIS_Core
+ * \brief    A few functions that compatiable with ARM CMSIS-Core.
+ * \details
+ *
+ * Here we provided a few functions that compatiable with ARM CMSIS-Core,
+ * mostly used in the DSP and NN library.
+ * @{
+ */
+/** \brief Instruction Synchronization Barrier, compatiable with ARM */
+#define __ISB() __RWMB()
+
+/** \brief Data Synchronization Barrier, compatiable with ARM */
+#define __DSB() __RWMB()
+
+/** \brief Data Memory Barrier, compatiable with ARM */
+#define __DMB() __RWMB()
+
+/** \brief LDRT Unprivileged (8 bit), ARM Compatiable */
+#define __LDRBT(ptr) __LB((ptr))
+/** \brief LDRT Unprivileged (16 bit), ARM Compatiable */
+#define __LDRHT(ptr) __LH((ptr))
+/** \brief LDRT Unprivileged (32 bit), ARM Compatiable */
+#define __LDRT(ptr) __LW((ptr))
+
+/** \brief STRT Unprivileged (8 bit), ARM Compatiable */
+#define __STRBT(val, ptr) __SB((ptr), (val))
+/** \brief STRT Unprivileged (16 bit), ARM Compatiable */
+#define __STRHT(val, ptr) __SH((ptr), (val))
+/** \brief STRT Unprivileged (32 bit), ARM Compatiable */
+#define __STRT(val, ptr) __SW((ptr), (val))
+
+/* ===== Saturation Operations ===== */
+/**
+ * \brief   Signed Saturate
+ * \details Saturates a signed value.
+ * \param [in]  value  Value to be saturated
+ * \param [in]    sat  Bit position to saturate to (1..32)
+ * \return             Saturated value
+ */
+#if defined(__DSP_PRESENT) && (__DSP_PRESENT == 1)
+#define __SSAT(val, sat) __RV_SCLIP32((val), (sat - 1))
+#else
+__STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat)
+{
+    if ((sat >= 1U) && (sat <= 32U)) {
+        const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U);
+        const int32_t min = -1 - max;
+        if (val > max) {
+            return max;
+        } else if (val < min) {
+            return min;
+        }
+    }
+    return val;
+}
+#endif
+
+/**
+ * \brief   Unsigned Saturate
+ * \details Saturates an unsigned value.
+ * \param [in]  value  Value to be saturated
+ * \param [in]    sat  Bit position to saturate to (0..31)
+ * \return             Saturated value
+ */
+#if defined(__DSP_PRESENT) && (__DSP_PRESENT == 1)
+#define __USAT(val, sat) __RV_UCLIP32((val), (sat))
+#else
+__STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat)
+{
+    if (sat <= 31U) {
+        const uint32_t max = ((1U << sat) - 1U);
+        if (val > (int32_t)max) {
+            return max;
+        } else if (val < 0) {
+            return 0U;
+        }
+    }
+    return (uint32_t)val;
+}
+#endif
+#if 0
+/* ===== Data Processing Operations ===== */
+/**
+ * \brief   Reverse byte order (32 bit)
+ * \details Reverses the byte order in unsigned integer value.
+ * For example, 0x12345678 becomes 0x78563412.
+ * \param [in]    value  Value to reverse
+ * \return               Reversed value
+ */
+__STATIC_FORCEINLINE uint32_t __REV(uint32_t value)
+{
+    uint32_t result;
+
+    result = ((value & 0xff000000) >> 24) | ((value & 0x00ff0000) >> 8) | ((value & 0x0000ff00) << 8) | ((value & 0x000000ff) << 24);
+    return result;
+}
+
+/**
+ * \brief   Reverse byte order (16 bit)
+ * \details Reverses the byte order within each halfword of a word.
+ * For example, 0x12345678 becomes 0x34127856.
+ * \param [in]    value  Value to reverse
+ * \return               Reversed value
+ */
+__STATIC_FORCEINLINE uint32_t __REV16(uint32_t value)
+{
+    uint32_t result;
+    result = ((value & 0xff000000) >> 8) | ((value & 0x00ff00000) << 8) | ((value & 0x0000ff00) >> 8) | ((value & 0x000000ff) << 8);
+
+    return result;
+}
+
+/**
+ * \brief   Reverse byte order (16 bit)
+ * \details Reverses the byte order in a 16-bit value
+ * and returns the signed 16-bit result.
+ * For example, 0x0080 becomes 0x8000.
+ * \param [in]    value  Value to reverse
+ * \return               Reversed value
+ */
+__STATIC_FORCEINLINE int16_t __REVSH(int16_t value)
+{
+    int16_t result;
+    result = ((value & 0xff00) >> 8) | ((value & 0x00ff) << 8);
+    return result;
+}
+#endif
+/**
+ * \brief   Rotate Right in unsigned value (32 bit)
+ * \details Rotate Right (immediate) provides the value of
+ * the contents of a register rotated by a variable number of bits.
+ * \param [in]    op1  Value to rotate
+ * \param [in]    op2  Number of Bits to rotate(0-31)
+ * \return               Rotated value
+ */
+__STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2)
+{
+    op2 = op2 & 0x1F;
+    if (op2 == 0U) {
+        return op1;
+    }
+    return (op1 >> op2) | (op1 << (32U - op2));
+}
+
+/**
+ * \brief   Reverse bit order of value
+ * \details Reverses the bit order of the given value.
+ * \param [in]    value  Value to reverse
+ * \return               Reversed value
+ */
+#if defined(__DSP_PRESENT) && (__DSP_PRESENT == 1)
+#define __RBIT(value) __RV_BITREVI((value), 31)
+#else
+__STATIC_FORCEINLINE uint32_t __RBIT(uint32_t value)
+{
+    uint32_t result;
+    uint32_t s = (4U /*sizeof(v)*/ * 8U) - 1U; /* extra shift needed at end */
+
+    result = value; /* r will be reversed bits of v; first get LSB of v */
+    for (value >>= 1U; value != 0U; value >>= 1U) {
+        result <<= 1U;
+        result |= value & 1U;
+        s--;
+    }
+    result <<= s; /* shift when v's highest bits are zero */
+    return result;
+}
+#endif /* defined(__DSP_PRESENT) && (__DSP_PRESENT == 1) */
+
+/**
+ * \brief   Count leading zeros
+ * \details Counts the number of leading zeros of a data value.
+ * \param [in]  data  Value to count the leading zeros
+ * \return             number of leading zeros in value
+ */
+#if defined(__DSP_PRESENT) && (__DSP_PRESENT == 1)
+#define __CLZ(data) __RV_CLZ32(data)
+#else
+__STATIC_FORCEINLINE uint8_t __CLZ(uint32_t data)
+{
+    uint8_t ret = 0;
+    uint32_t temp = ~data;
+    while (temp & 0x80000000) {
+        temp <<= 1;
+        ret++;
+    }
+    return ret;
+}
+#endif /* defined(__DSP_PRESENT) && (__DSP_PRESENT == 1) */
+
+/** @} */ /* End of Doxygen Group NMSIS_Core_ARMCompatiable_Functions */
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* __CORE_COMPATIABLE_H__ */
--- a/components/nmsis/core/inc/core_feature_base.h
+++ b/components/nmsis/core/inc/core_feature_base.h
--- a/components/nmsis/core/inc/core_feature_cache.h
+++ b/components/nmsis/core/inc/core_feature_cache.h
--- a/components/nmsis/core/inc/core_feature_dsp.h
+++ b/components/nmsis/core/inc/core_feature_dsp.h
--- a/components/nmsis/core/inc/core_feature_eclic.h
+++ b/components/nmsis/core/inc/core_feature_eclic.h
@ -0,0 +1,897 @@
+/*
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __CORE_FEATURE_ECLIC__
+#define __CORE_FEATURE_ECLIC__
+/*!
+ * @file     core_feature_eclic.h
+ * @brief    ECLIC feature API header file for Nuclei N/NX Core
+ */
+/*
+ * ECLIC Feature Configuration Macro:
+ * 1. __ECLIC_PRESENT:  Define whether Enhanced Core Local Interrupt Controller (ECLIC) Unit is present or not
+ *   * 0: Not present
+ *   * 1: Present
+ * 2. __ECLIC_BASEADDR:  Base address of the ECLIC unit.
+ * 3. ECLIC_GetInfoCtlbits():  Define the number of hardware bits are actually implemented in the clicintctl registers.
+ *   Valid number is 1 - 8.
+ * 4. __ECLIC_INTNUM  : Define the external interrupt number of ECLIC Unit
+ *
+ */
+#ifdef __cplusplus
+ extern "C" {
+#endif
+
+#if defined(__ECLIC_PRESENT) && (__ECLIC_PRESENT == 1)
+/**
+ * \defgroup NMSIS_Core_ECLIC_Registers     Register Define and Type Definitions Of ECLIC
+ * \ingroup NMSIS_Core_Registers
+ * \brief   Type definitions and defines for eclic registers.
+ *
+ * @{
+ */
+
+/**
+ * \brief  Union type to access CLICFG configure register.
+ */
+typedef union
+{
+    struct {
+        uint8_t _reserved0:1;                   /*!< bit:     0   Overflow condition code flag */
+        uint8_t nlbits:4;                       /*!< bit:     29  Carry condition code flag */
+        uint8_t _reserved1:2;                   /*!< bit:     30  Zero condition code flag */
+        uint8_t _reserved2:1;                   /*!< bit:     31  Negative condition code flag */
+    } b;                                        /*!< Structure used for bit  access */
+    uint8_t w;                                  /*!< Type      used for byte access */
+} CLICCFG_Type;
+
+/**
+ * \brief  Union type to access CLICINFO information register.
+ */
+typedef union {
+    struct {
+        uint32_t numint:13;                     /*!< bit:  0..12   number of maximum interrupt inputs supported */
+        uint32_t version:8;                     /*!< bit:  13..20  20:17 for architecture version,16:13 for implementation version */
+        uint32_t intctlbits:4;                  /*!< bit:  21..24  specifies how many hardware bits are actually implemented in the clicintctl registers */
+        uint32_t _reserved0:7;                  /*!< bit:  25..31  Reserved */
+    } b;                                        /*!< Structure used for bit  access */
+    uint32_t w;                                 /*!< Type      used for word access */
+} CLICINFO_Type;
+
+/**
+ * \brief Access to the structure of a vector interrupt controller.
+ */
+typedef struct {
+    __IOM uint8_t  INTIP;                       /*!< Offset: 0x000 (R/W)  Interrupt set pending register */
+    __IOM uint8_t  INTIE;                       /*!< Offset: 0x001 (R/W)  Interrupt set enable register */
+    __IOM uint8_t  INTATTR;                     /*!< Offset: 0x002 (R/W)  Interrupt set attributes register */
+    __IOM uint8_t  INTCTRL;                     /*!< Offset: 0x003 (R/W)  Interrupt configure register */
+} CLIC_CTRL_Type;
+
+typedef struct {
+    __IOM uint8_t   CFG;                        /*!< Offset: 0x000 (R/W)  CLIC configuration register */
+    uint8_t RESERVED0[3];
+    __IM uint32_t  INFO;                        /*!< Offset: 0x004 (R/ )  CLIC information register */
+    uint8_t RESERVED1[3];
+    __IOM uint8_t  MTH;                         /*!< Offset: 0x00B (R/W)  CLIC machine mode threshold register */
+    uint32_t RESERVED2[0x3FD];
+    CLIC_CTRL_Type CTRL[4096];                  /*!< Offset: 0x1000 (R/W) CLIC register structure for INTIP, INTIE, INTATTR, INTCTL */
+} CLIC_Type;
+
+#define CLIC_CLICCFG_NLBIT_Pos                 1U                                       /*!< CLIC CLICCFG: NLBIT Position */
+#define CLIC_CLICCFG_NLBIT_Msk                 (0xFUL << CLIC_CLICCFG_NLBIT_Pos)        /*!< CLIC CLICCFG: NLBIT Mask */
+
+#define CLIC_CLICINFO_CTLBIT_Pos                21U                                     /*!< CLIC INTINFO: __ECLIC_GetInfoCtlbits() Position */
+#define CLIC_CLICINFO_CTLBIT_Msk                (0xFUL << CLIC_CLICINFO_CTLBIT_Pos)     /*!< CLIC INTINFO: __ECLIC_GetInfoCtlbits() Mask */
+
+#define CLIC_CLICINFO_VER_Pos                  13U                                      /*!< CLIC CLICINFO: VERSION Position */
+#define CLIC_CLICINFO_VER_Msk                  (0xFFUL << CLIC_CLICCFG_NLBIT_Pos)       /*!< CLIC CLICINFO: VERSION Mask */
+
+#define CLIC_CLICINFO_NUM_Pos                  0U                                       /*!< CLIC CLICINFO: NUM Position */
+#define CLIC_CLICINFO_NUM_Msk                  (0xFFFUL << CLIC_CLICINFO_NUM_Pos)       /*!< CLIC CLICINFO: NUM Mask */
+
+#define CLIC_INTIP_IP_Pos                      0U                                       /*!< CLIC INTIP: IP Position */
+#define CLIC_INTIP_IP_Msk                      (0x1UL << CLIC_INTIP_IP_Pos)             /*!< CLIC INTIP: IP Mask */
+
+#define CLIC_INTIE_IE_Pos                      0U                                       /*!< CLIC INTIE: IE Position */
+#define CLIC_INTIE_IE_Msk                      (0x1UL << CLIC_INTIE_IE_Pos)             /*!< CLIC INTIE: IE Mask */
+
+#define CLIC_INTATTR_TRIG_Pos                  1U                                       /*!< CLIC INTATTR: TRIG Position */
+#define CLIC_INTATTR_TRIG_Msk                  (0x3UL << CLIC_INTATTR_TRIG_Pos)         /*!< CLIC INTATTR: TRIG Mask */
+
+#define CLIC_INTATTR_SHV_Pos                   0U                                       /*!< CLIC INTATTR: SHV Position */
+#define CLIC_INTATTR_SHV_Msk                   (0x1UL << CLIC_INTATTR_SHV_Pos)          /*!< CLIC INTATTR: SHV Mask */
+
+#define ECLIC_MAX_NLBITS                       8U                                       /*!< Max nlbit of the CLICINTCTLBITS */
+#define ECLIC_MODE_MTVEC_Msk                   3U                                       /*!< ECLIC Mode mask for MTVT CSR Register */
+
+#define ECLIC_NON_VECTOR_INTERRUPT             0x0                                      /*!< Non-Vector Interrupt Mode of ECLIC */
+#define ECLIC_VECTOR_INTERRUPT                 0x1                                      /*!< Vector Interrupt Mode of ECLIC */
+
+/**\brief ECLIC Trigger Enum for different Trigger Type */
+typedef enum ECLIC_TRIGGER {
+    ECLIC_LEVEL_TRIGGER = 0x0,          /*!< Level Triggerred, trig[0] = 0 */
+    ECLIC_POSTIVE_EDGE_TRIGGER = 0x1,   /*!< Postive/Rising Edge Triggered, trig[0] = 1, trig[1] = 0 */
+    ECLIC_NEGTIVE_EDGE_TRIGGER = 0x3,   /*!< Negtive/Falling Edge Triggered, trig[0] = 1, trig[1] = 1 */
+    ECLIC_MAX_TRIGGER = 0x3             /*!< MAX Supported Trigger Mode */
+} ECLIC_TRIGGER_Type;
+
+#ifndef __ECLIC_BASEADDR
+/* Base address of ECLIC(__ECLIC_BASEADDR) should be defined in <Device.h> */
+#error "__ECLIC_BASEADDR is not defined, please check!"
+#endif
+
+#ifndef __ECLIC_INTCTLBITS
+/* Define __ECLIC_INTCTLBITS to get via ECLIC->INFO if not defined */
+#define __ECLIC_INTCTLBITS                  (__ECLIC_GetInfoCtlbits())
+#endif
+
+/* ECLIC Memory mapping of Device */
+#define ECLIC_BASE                          __ECLIC_BASEADDR                            /*!< ECLIC Base Address */
+#define ECLIC                               ((CLIC_Type *) ECLIC_BASE)                  /*!< CLIC configuration struct */
+
+/** @} */ /* end of group NMSIS_Core_ECLIC_Registers */
+
+/* ##########################   ECLIC functions  #################################### */
+/**
+ * \defgroup   NMSIS_Core_IntExc        Interrupts and Exceptions
+ * \brief Functions that manage interrupts and exceptions via the ECLIC.
+ *
+ * @{
+ */
+
+/**
+ * \brief  Definition of IRQn numbers
+ * \details
+ * The core interrupt enumeration names for IRQn values are defined in the file <b><Device>.h</b>.
+ * - Interrupt ID(IRQn) from 0 to 18 are reserved for core internal interrupts.
+ * - Interrupt ID(IRQn) start from 19 represent device-specific external interrupts.
+ * - The first device-specific interrupt has the IRQn value 19.
+ *
+ * The table below describes the core interrupt names and their availability in various Nuclei Cores.
+ */
+/* The following enum IRQn definition in this file
+ * is only used for doxygen documentation generation,
+ * The <Device>.h is the real file to define it by vendor
+ */
+#if defined(__ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__)
+typedef enum IRQn {
+    /* ========= Nuclei N/NX Core Specific Interrupt Numbers  =========== */
+    /* Core Internal Interrupt IRQn definitions */
+    Reserved0_IRQn            =   0,              /*!<  Internal reserved */
+    Reserved1_IRQn            =   1,              /*!<  Internal reserved */
+    Reserved2_IRQn            =   2,              /*!<  Internal reserved */
+    SysTimerSW_IRQn           =   3,              /*!<  System Timer SW interrupt */
+    Reserved3_IRQn            =   4,              /*!<  Internal reserved */
+    Reserved4_IRQn            =   5,              /*!<  Internal reserved */
+    Reserved5_IRQn            =   6,              /*!<  Internal reserved */
+    SysTimer_IRQn             =   7,              /*!<  System Timer Interrupt */
+    Reserved6_IRQn            =   8,              /*!<  Internal reserved */
+    Reserved7_IRQn            =   9,              /*!<  Internal reserved */
+    Reserved8_IRQn            =  10,              /*!<  Internal reserved */
+    Reserved9_IRQn            =  11,              /*!<  Internal reserved */
+    Reserved10_IRQn           =  12,              /*!<  Internal reserved */
+    Reserved11_IRQn           =  13,              /*!<  Internal reserved */
+    Reserved12_IRQn           =  14,              /*!<  Internal reserved */
+    Reserved13_IRQn           =  15,              /*!<  Internal reserved */
+    Reserved14_IRQn           =  16,              /*!<  Internal reserved */
+    Reserved15_IRQn           =  17,              /*!<  Internal reserved */
+    Reserved16_IRQn           =  18,              /*!<  Internal reserved */
+
+    /* ========= Device Specific Interrupt Numbers  =================== */
+    /* ToDo: add here your device specific external interrupt numbers.
+     * 19~max(NUM_INTERRUPT, 1023) is reserved number for user.
+     * Maxmum interrupt supported could get from clicinfo.NUM_INTERRUPT.
+     * According the interrupt handlers defined in startup_Device.S
+     * eg.: Interrupt for Timer#1       eclic_tim1_handler   ->   TIM1_IRQn */
+    FirstDeviceSpecificInterrupt_IRQn    = 19,    /*!< First Device Specific Interrupt */
+    SOC_INT_MAX,                                  /*!< Number of total interrupts */
+} IRQn_Type;
+#endif /* __ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__ */
+
+#ifdef NMSIS_ECLIC_VIRTUAL
+    #ifndef NMSIS_ECLIC_VIRTUAL_HEADER_FILE
+        #define NMSIS_ECLIC_VIRTUAL_HEADER_FILE "nmsis_eclic_virtual.h"
+    #endif
+    #include NMSIS_ECLIC_VIRTUAL_HEADER_FILE
+#else
+    #define ECLIC_SetCfgNlbits            __ECLIC_SetCfgNlbits
+    #define ECLIC_GetCfgNlbits            __ECLIC_GetCfgNlbits
+    #define ECLIC_GetInfoVer              __ECLIC_GetInfoVer
+    #define ECLIC_GetInfoCtlbits          __ECLIC_GetInfoCtlbits
+    #define ECLIC_GetInfoNum              __ECLIC_GetInfoNum
+    #define ECLIC_SetMth                  __ECLIC_SetMth
+    #define ECLIC_GetMth                  __ECLIC_GetMth
+    #define ECLIC_EnableIRQ               __ECLIC_EnableIRQ
+    #define ECLIC_GetEnableIRQ            __ECLIC_GetEnableIRQ
+    #define ECLIC_DisableIRQ              __ECLIC_DisableIRQ
+    #define ECLIC_SetPendingIRQ           __ECLIC_SetPendingIRQ
+    #define ECLIC_GetPendingIRQ           __ECLIC_GetPendingIRQ
+    #define ECLIC_ClearPendingIRQ         __ECLIC_ClearPendingIRQ
+    #define ECLIC_SetTrigIRQ              __ECLIC_SetTrigIRQ
+    #define ECLIC_GetTrigIRQ              __ECLIC_GetTrigIRQ
+    #define ECLIC_SetShvIRQ               __ECLIC_SetShvIRQ
+    #define ECLIC_GetShvIRQ               __ECLIC_GetShvIRQ
+    #define ECLIC_SetCtrlIRQ              __ECLIC_SetCtrlIRQ
+    #define ECLIC_GetCtrlIRQ              __ECLIC_GetCtrlIRQ
+    #define ECLIC_SetLevelIRQ             __ECLIC_SetLevelIRQ
+    #define ECLIC_GetLevelIRQ             __ECLIC_GetLevelIRQ
+    #define ECLIC_SetPriorityIRQ          __ECLIC_SetPriorityIRQ
+    #define ECLIC_GetPriorityIRQ          __ECLIC_GetPriorityIRQ
+
+#endif /* NMSIS_ECLIC_VIRTUAL */
+
+#ifdef NMSIS_VECTAB_VIRTUAL
+    #ifndef NMSIS_VECTAB_VIRTUAL_HEADER_FILE
+        #define NMSIS_VECTAB_VIRTUAL_HEADER_FILE "nmsis_vectab_virtual.h"
+    #endif
+    #include NMSIS_VECTAB_VIRTUAL_HEADER_FILE
+#else
+    #define ECLIC_SetVector              __ECLIC_SetVector
+    #define ECLIC_GetVector              __ECLIC_GetVector
+#endif  /* (NMSIS_VECTAB_VIRTUAL) */
+
+/**
+ * \brief  Set nlbits value
+ * \details
+ * This function set the nlbits value of CLICCFG register.
+ * \param [in]    nlbits    nlbits value
+ * \remarks
+ * - nlbits is used to set the width of level in the CLICINTCTL[i].
+ * \sa
+ * - \ref ECLIC_GetCfgNlbits
+ */
+__STATIC_FORCEINLINE void __ECLIC_SetCfgNlbits(uint32_t nlbits)
+{
+    ECLIC->CFG &= ~CLIC_CLICCFG_NLBIT_Msk;
+    ECLIC->CFG |= (uint8_t)((nlbits <<CLIC_CLICCFG_NLBIT_Pos) & CLIC_CLICCFG_NLBIT_Msk);
+}
+
+/**
+ * \brief  Get nlbits value
+ * \details
+ * This function get the nlbits value of CLICCFG register.
+ * \return   nlbits value of CLICCFG register
+ * \remarks
+ * - nlbits is used to set the width of level in the CLICINTCTL[i].
+ * \sa
+ * - \ref ECLIC_SetCfgNlbits
+ */
+__STATIC_FORCEINLINE uint32_t __ECLIC_GetCfgNlbits(void)
+{
+    return ((uint32_t)((ECLIC->CFG & CLIC_CLICCFG_NLBIT_Msk) >> CLIC_CLICCFG_NLBIT_Pos));
+}
+
+/**
+ * \brief  Get the ECLIC version number
+ * \details
+ * This function gets the hardware version information from CLICINFO register.
+ * \return   hardware version number in CLICINFO register.
+ * \remarks
+ * - This function gets harware version information from CLICINFO register.
+ * - Bit 20:17 for architecture version, bit 16:13 for implementation version.
+ * \sa
+ * - \ref ECLIC_GetInfoNum
+*/
+__STATIC_FORCEINLINE uint32_t __ECLIC_GetInfoVer(void)
+{
+    return ((uint32_t)((ECLIC->INFO & CLIC_CLICINFO_VER_Msk) >> CLIC_CLICINFO_VER_Pos));
+}
+
+/**
+ * \brief  Get CLICINTCTLBITS
+ * \details
+ * This function gets CLICINTCTLBITS from CLICINFO register.
+ * \return  CLICINTCTLBITS from CLICINFO register.
+ * \remarks
+ * - In the CLICINTCTL[i] registers, with 2 <= CLICINTCTLBITS <= 8.
+ * - The implemented bits are kept left-justified in the most-significant bits of each 8-bit
+ *   CLICINTCTL[I] register, with the lower unimplemented bits treated as hardwired to 1.
+ * \sa
+ * - \ref ECLIC_GetInfoNum
+ */
+__STATIC_FORCEINLINE uint32_t __ECLIC_GetInfoCtlbits(void)
+{
+    return ((uint32_t)((ECLIC->INFO & CLIC_CLICINFO_CTLBIT_Msk) >> CLIC_CLICINFO_CTLBIT_Pos));
+}
+
+/**
+ * \brief  Get number of maximum interrupt inputs supported
+ * \details
+ * This function gets number of maximum interrupt inputs supported from CLICINFO register.
+ * \return  number of maximum interrupt inputs supported from CLICINFO register.
+ * \remarks
+ * - This function gets number of maximum interrupt inputs supported from CLICINFO register.
+ * - The num_interrupt field specifies the actual number of maximum interrupt inputs supported in this implementation.
+ * \sa
+ * - \ref ECLIC_GetInfoCtlbits
+ */
+__STATIC_FORCEINLINE uint32_t __ECLIC_GetInfoNum(void)
+{
+    return ((uint32_t)((ECLIC->INFO & CLIC_CLICINFO_NUM_Msk) >> CLIC_CLICINFO_NUM_Pos));
+}
+
+/**
+ * \brief  Set Machine Mode Interrupt Level Threshold
+ * \details
+ * This function sets machine mode interrupt level threshold.
+ * \param [in]  mth       Interrupt Level Threshold.
+ * \sa
+ * - \ref ECLIC_GetMth
+ */
+__STATIC_FORCEINLINE void __ECLIC_SetMth(uint8_t mth)
+{
+    ECLIC->MTH = mth;
+}
+
+/**
+ * \brief  Get Machine Mode Interrupt Level Threshold
+ * \details
+ * This function gets machine mode interrupt level threshold.
+ * \return       Interrupt Level Threshold.
+ * \sa
+ * - \ref ECLIC_SetMth
+ */
+__STATIC_FORCEINLINE uint8_t __ECLIC_GetMth(void)
+{
+    return (ECLIC->MTH);
+}
+
+
+/**
+ * \brief  Enable a specific interrupt
+ * \details
+ * This function enables the specific interrupt \em IRQn.
+ * \param [in]  IRQn  Interrupt number
+ * \remarks
+ * - IRQn must not be negative.
+ * \sa
+ * - \ref ECLIC_DisableIRQ
+ */
+__STATIC_FORCEINLINE void __ECLIC_EnableIRQ(IRQn_Type IRQn)
+{
+    ECLIC->CTRL[IRQn].INTIE |= CLIC_INTIE_IE_Msk;
+}
+
+/**
+ * \brief  Get a specific interrupt enable status
+ * \details
+ * This function returns the interrupt enable status for the specific interrupt \em IRQn.
+ * \param [in]  IRQn  Interrupt number
+ * \returns
+ * - 0  Interrupt is not enabled
+ * - 1  Interrupt is pending
+ * \remarks
+ * - IRQn must not be negative.
+ * \sa
+ * - \ref ECLIC_EnableIRQ
+ * - \ref ECLIC_DisableIRQ
+ */
+__STATIC_FORCEINLINE uint32_t __ECLIC_GetEnableIRQ(IRQn_Type IRQn)
+{
+    return((uint32_t) (ECLIC->CTRL[IRQn].INTIE) & CLIC_INTIE_IE_Msk);
+}
+
+/**
+ * \brief  Disable a specific interrupt
+ * \details
+ * This function disables the specific interrupt \em IRQn.
+ * \param [in]  IRQn  Number of the external interrupt to disable
+ * \remarks
+ * - IRQn must not be negative.
+ * \sa
+ * - \ref ECLIC_EnableIRQ
+ */
+__STATIC_FORCEINLINE void __ECLIC_DisableIRQ(IRQn_Type IRQn)
+{
+    ECLIC->CTRL[IRQn].INTIE &= ~CLIC_INTIE_IE_Msk;
+}
+
+/**
+ * \brief  Get the pending specific interrupt
+ * \details
+ * This function returns the pending status of the specific interrupt \em IRQn.
+ * \param [in]      IRQn  Interrupt number
+ * \returns
+ * - 0  Interrupt is not pending
+ * - 1  Interrupt is pending
+ * \remarks
+ * - IRQn must not be negative.
+ * \sa
+ * - \ref ECLIC_SetPendingIRQ
+ * - \ref ECLIC_ClearPendingIRQ
+ */
+__STATIC_FORCEINLINE int32_t __ECLIC_GetPendingIRQ(IRQn_Type IRQn)
+{
+    return((uint32_t)(ECLIC->CTRL[IRQn].INTIP) & CLIC_INTIP_IP_Msk);
+}
+
+/**
+ * \brief  Set a specific interrupt to pending
+ * \details
+ * This function sets the pending bit for the specific interrupt \em IRQn.
+ * \param [in]      IRQn  Interrupt number
+ * \remarks
+ * - IRQn must not be negative.
+ * \sa
+ * - \ref ECLIC_GetPendingIRQ
+ * - \ref ECLIC_ClearPendingIRQ
+ */
+__STATIC_FORCEINLINE void __ECLIC_SetPendingIRQ(IRQn_Type IRQn)
+{
+    ECLIC->CTRL[IRQn].INTIP |= CLIC_INTIP_IP_Msk;
+}
+
+/**
+ * \brief  Clear a specific interrupt from pending
+ * \details
+ * This function removes the pending state of the specific interrupt \em IRQn.
+ * \em IRQn cannot be a negative number.
+ * \param [in]      IRQn  Interrupt number
+ * \remarks
+ * - IRQn must not be negative.
+ * \sa
+ * - \ref ECLIC_SetPendingIRQ
+ * - \ref ECLIC_GetPendingIRQ
+ */
+__STATIC_FORCEINLINE void __ECLIC_ClearPendingIRQ(IRQn_Type IRQn)
+{
+    ECLIC->CTRL[IRQn].INTIP &= ~ CLIC_INTIP_IP_Msk;
+}
+
+/**
+ * \brief  Set trigger mode and polarity for a specific interrupt
+ * \details
+ * This function set trigger mode and polarity of the specific interrupt \em IRQn.
+ * \param [in]      IRQn  Interrupt number
+ * \param [in]      trig
+ *                   - 00  level trigger, \ref ECLIC_LEVEL_TRIGGER
+ *                   - 01  positive edge trigger, \ref ECLIC_POSTIVE_EDGE_TRIGGER
+ *                   - 02  level trigger, \ref ECLIC_LEVEL_TRIGGER
+ *                   - 03  negative edge trigger, \ref ECLIC_NEGTIVE_EDGE_TRIGGER
+ * \remarks
+ * - IRQn must not be negative.
+ *
+ * \sa
+ * - \ref ECLIC_GetTrigIRQ
+ */
+__STATIC_FORCEINLINE void __ECLIC_SetTrigIRQ(IRQn_Type IRQn, uint32_t trig)
+{
+    ECLIC->CTRL[IRQn].INTATTR &= ~CLIC_INTATTR_TRIG_Msk;
+    ECLIC->CTRL[IRQn].INTATTR |= (uint8_t)(trig<<CLIC_INTATTR_TRIG_Pos);
+}
+
+/**
+ * \brief  Get trigger mode and polarity for a specific interrupt
+ * \details
+ * This function get trigger mode and polarity of the specific interrupt \em IRQn.
+ * \param [in]      IRQn  Interrupt number
+ * \return
+ *                 - 00  level trigger, \ref ECLIC_LEVEL_TRIGGER
+ *                 - 01  positive edge trigger, \ref ECLIC_POSTIVE_EDGE_TRIGGER
+ *                 - 02  level trigger, \ref ECLIC_LEVEL_TRIGGER
+ *                 - 03  negative edge trigger, \ref ECLIC_NEGTIVE_EDGE_TRIGGER
+ * \remarks
+ *     - IRQn must not be negative.
+ * \sa
+ *     - \ref ECLIC_SetTrigIRQ
+ */
+__STATIC_FORCEINLINE uint32_t __ECLIC_GetTrigIRQ(IRQn_Type IRQn)
+{
+    return ((int32_t)(((ECLIC->CTRL[IRQn].INTATTR) & CLIC_INTATTR_TRIG_Msk)>>CLIC_INTATTR_TRIG_Pos));
+}
+
+/**
+ * \brief  Set interrupt working mode for a specific interrupt
+ * \details
+ * This function set selective hardware vector or non-vector working mode of the specific interrupt \em IRQn.
+ * \param [in]      IRQn  Interrupt number
+ * \param [in]      shv
+ *                        - 0  non-vector mode, \ref ECLIC_NON_VECTOR_INTERRUPT
+ *                        - 1  vector mode, \ref ECLIC_VECTOR_INTERRUPT
+ * \remarks
+ * - IRQn must not be negative.
+ * \sa
+ * - \ref ECLIC_GetShvIRQ
+ */
+__STATIC_FORCEINLINE void __ECLIC_SetShvIRQ(IRQn_Type IRQn, uint32_t shv)
+{
+    ECLIC->CTRL[IRQn].INTATTR &= ~CLIC_INTATTR_SHV_Msk;
+    ECLIC->CTRL[IRQn].INTATTR |= (uint8_t)(shv<<CLIC_INTATTR_SHV_Pos);
+}
+
+/**
+ * \brief  Get interrupt working mode for a specific interrupt
+ * \details
+ * This function get selective hardware vector or non-vector working mode of the specific interrupt \em IRQn.
+ * \param [in]      IRQn  Interrupt number
+ * \return       shv
+ *                        - 0  non-vector mode, \ref ECLIC_NON_VECTOR_INTERRUPT
+ *                        - 1  vector mode, \ref ECLIC_VECTOR_INTERRUPT
+ * \remarks
+ * - IRQn must not be negative.
+ * \sa
+ * - \ref ECLIC_SetShvIRQ
+ */
+__STATIC_FORCEINLINE uint32_t __ECLIC_GetShvIRQ(IRQn_Type IRQn)
+{
+    return ((int32_t)(((ECLIC->CTRL[IRQn].INTATTR) & CLIC_INTATTR_SHV_Msk)>>CLIC_INTATTR_SHV_Pos));
+}
+
+/**
+ * \brief  Modify ECLIC Interrupt Input Control Register for a specific interrupt
+ * \details
+ * This function modify ECLIC Interrupt Input Control(CLICINTCTL[i]) register of the specific interrupt \em IRQn.
+ * \param [in]      IRQn  Interrupt number
+ * \param [in]      intctrl  Set value for CLICINTCTL[i] register
+ * \remarks
+ * - IRQn must not be negative.
+ * \sa
+ * - \ref ECLIC_GetCtrlIRQ
+ */
+__STATIC_FORCEINLINE void __ECLIC_SetCtrlIRQ(IRQn_Type IRQn, uint8_t intctrl)
+{
+    ECLIC->CTRL[IRQn].INTCTRL = intctrl;
+}
+
+/**
+ * \brief  Get ECLIC Interrupt Input Control Register value for a specific interrupt
+ * \details
+ * This function modify ECLIC Interrupt Input Control register of the specific interrupt \em IRQn.
+ * \param [in]      IRQn  Interrupt number
+ * \return       value of ECLIC Interrupt Input Control register
+ * \remarks
+ * - IRQn must not be negative.
+ * \sa
+ * - \ref ECLIC_SetCtrlIRQ
+ */
+__STATIC_FORCEINLINE uint8_t __ECLIC_GetCtrlIRQ(IRQn_Type IRQn)
+{
+    return (ECLIC->CTRL[IRQn].INTCTRL);
+}
+
+/**
+ * \brief  Set ECLIC Interrupt level of a specific interrupt
+ * \details
+ * This function set interrupt level of the specific interrupt \em IRQn.
+ * \param [in]      IRQn  Interrupt number
+ * \param [in]      lvl_abs   Interrupt level
+ * \remarks
+ * - IRQn must not be negative.
+ * - If lvl_abs to be set is larger than the max level allowed, it will be force to be max level.
+ * - When you set level value you need use clciinfo.nlbits to get the width of level.
+ *   Then we could know the maximum of level. CLICINTCTLBITS is how many total bits are
+ *   present in the CLICINTCTL register.
+ * \sa
+ * - \ref ECLIC_GetLevelIRQ
+ */
+__STATIC_FORCEINLINE void __ECLIC_SetLevelIRQ(IRQn_Type IRQn, uint8_t lvl_abs)
+{
+    uint8_t nlbits = __ECLIC_GetCfgNlbits();
+    uint8_t intctlbits = (uint8_t)__ECLIC_INTCTLBITS;
+
+    if (nlbits == 0) {
+        return;
+    }
+
+    if (nlbits > intctlbits) {
+        nlbits = intctlbits;
+    }
+    uint8_t maxlvl = ((1 << nlbits) - 1);
+    if (lvl_abs > maxlvl) {
+        lvl_abs = maxlvl;
+    }
+    uint8_t lvl = lvl_abs << (ECLIC_MAX_NLBITS - nlbits);
+    uint8_t cur_ctrl = __ECLIC_GetCtrlIRQ(IRQn);
+    cur_ctrl = cur_ctrl << nlbits;
+    cur_ctrl = cur_ctrl >> nlbits;
+    __ECLIC_SetCtrlIRQ(IRQn, (cur_ctrl | lvl));
+}
+
+/**
+ * \brief  Get ECLIC Interrupt level of a specific interrupt
+ * \details
+ * This function get interrupt level of the specific interrupt \em IRQn.
+ * \param [in]      IRQn  Interrupt number
+ * \return         Interrupt level
+ * \remarks
+ * - IRQn must not be negative.
+ * \sa
+ * - \ref ECLIC_SetLevelIRQ
+ */
+__STATIC_FORCEINLINE uint8_t __ECLIC_GetLevelIRQ(IRQn_Type IRQn)
+{
+    uint8_t nlbits = __ECLIC_GetCfgNlbits();
+    uint8_t intctlbits = (uint8_t)__ECLIC_INTCTLBITS;
+
+    if (nlbits == 0) {
+        return 0;
+    }
+
+    if (nlbits > intctlbits) {
+        nlbits = intctlbits;
+    }
+    uint8_t intctrl = __ECLIC_GetCtrlIRQ(IRQn);
+    uint8_t lvl_abs = intctrl >> (ECLIC_MAX_NLBITS - nlbits);
+    return lvl_abs;
+}
+
+/**
+ * \brief  Get ECLIC Interrupt priority of a specific interrupt
+ * \details
+ * This function get interrupt priority of the specific interrupt \em IRQn.
+ * \param [in]      IRQn  Interrupt number
+ * \param [in]      pri   Interrupt priority
+ * \remarks
+ * - IRQn must not be negative.
+ * - If pri to be set is larger than the max priority allowed, it will be force to be max priority.
+ * - Priority width is CLICINTCTLBITS minus clciinfo.nlbits if clciinfo.nlbits
+ *   is less than CLICINTCTLBITS. Otherwise priority width is 0.
+ * \sa
+ * - \ref ECLIC_GetPriorityIRQ
+ */
+__STATIC_FORCEINLINE void __ECLIC_SetPriorityIRQ(IRQn_Type IRQn, uint8_t pri)
+{
+    uint8_t nlbits = __ECLIC_GetCfgNlbits();
+    uint8_t intctlbits = (uint8_t)__ECLIC_INTCTLBITS;
+    if (nlbits < intctlbits) {
+        uint8_t maxpri = ((1 << (intctlbits - nlbits)) - 1);
+        if (pri > maxpri) {
+            pri = maxpri;
+        }
+        pri = pri << (ECLIC_MAX_NLBITS - intctlbits);
+        uint8_t mask = ((uint8_t)(-1)) >> intctlbits;
+        pri = pri | mask;
+        uint8_t cur_ctrl = __ECLIC_GetCtrlIRQ(IRQn);
+        cur_ctrl = cur_ctrl >> (ECLIC_MAX_NLBITS - nlbits);
+        cur_ctrl = cur_ctrl << (ECLIC_MAX_NLBITS - nlbits);
+        __ECLIC_SetCtrlIRQ(IRQn, (cur_ctrl | pri));
+    }
+}
+
+/**
+ * \brief  Get ECLIC Interrupt priority of a specific interrupt
+ * \details
+ * This function get interrupt priority of the specific interrupt \em IRQn.
+ * \param [in]      IRQn  Interrupt number
+ * \return   Interrupt priority
+ * \remarks
+ * - IRQn must not be negative.
+ * \sa
+ * - \ref ECLIC_SetPriorityIRQ
+ */
+__STATIC_FORCEINLINE uint8_t __ECLIC_GetPriorityIRQ(IRQn_Type IRQn)
+{
+    uint8_t nlbits = __ECLIC_GetCfgNlbits();
+    uint8_t intctlbits = (uint8_t)__ECLIC_INTCTLBITS;
+    if (nlbits < intctlbits) {
+        uint8_t cur_ctrl = __ECLIC_GetCtrlIRQ(IRQn);
+        uint8_t pri = cur_ctrl << nlbits;
+        pri = pri >> nlbits;
+        pri = pri >> (ECLIC_MAX_NLBITS - intctlbits);
+        return pri;
+    } else {
+        return 0;
+    }
+}
+
+/**
+ * \brief  Set Interrupt Vector of a specific interrupt
+ * \details
+ * This function set interrupt handler address of the specific interrupt \em IRQn.
+ * \param [in]      IRQn  Interrupt number
+ * \param [in]      vector   Interrupt handler address
+ * \remarks
+ * - IRQn must not be negative.
+ * - You can set the \ref CSR_CSR_MTVT to set interrupt vector table entry address.
+ * - If your vector table is placed in readonly section, the vector for IRQn will not be modified.
+ *   For this case, you need to use the correct irq handler name defined in your vector table as
+ *   your irq handler function name.
+ * - This function will only work correctly when the vector table is placed in an read-write enabled section.
+ * \sa
+ * - \ref ECLIC_GetVector
+ */
+__STATIC_FORCEINLINE void __ECLIC_SetVector(IRQn_Type IRQn, rv_csr_t vector)
+{
+#if __RISCV_XLEN == 32
+    volatile uint32_t vec_base;
+    vec_base = ((uint32_t)__RV_CSR_READ(CSR_MTVT));
+    (* (unsigned long *) (vec_base + ((int32_t)IRQn) * 4)) = vector;
+#elif __RISCV_XLEN == 64
+    volatile uint64_t vec_base;
+    vec_base = ((uint64_t)__RV_CSR_READ(CSR_MTVT));
+    (* (unsigned long *) (vec_base + ((int32_t)IRQn) * 8)) = vector;
+#else // TODO Need cover for XLEN=128 case in future
+    volatile uint64_t vec_base;
+    vec_base = ((uint64_t)__RV_CSR_READ(CSR_MTVT));
+    (* (unsigned long *) (vec_base + ((int32_t)IRQn) * 8)) = vector;
+#endif
+}
+
+/**
+ * \brief  Get Interrupt Vector of a specific interrupt
+ * \details
+ * This function get interrupt handler address of the specific interrupt \em IRQn.
+ * \param [in]      IRQn  Interrupt number
+ * \return        Interrupt handler address
+ * \remarks
+ * - IRQn must not be negative.
+ * - You can read \ref CSR_CSR_MTVT to get interrupt vector table entry address.
+ * \sa
+ *     - \ref ECLIC_SetVector
+ */
+__STATIC_FORCEINLINE rv_csr_t __ECLIC_GetVector(IRQn_Type IRQn)
+{
+#if __RISCV_XLEN == 32
+    return (*(uint32_t *)(__RV_CSR_READ(CSR_MTVT)+IRQn*4));
+#elif __RISCV_XLEN == 64
+    return (*(uint64_t *)(__RV_CSR_READ(CSR_MTVT)+IRQn*8));
+#else // TODO Need cover for XLEN=128 case in future
+    return (*(uint64_t *)(__RV_CSR_READ(CSR_MTVT)+IRQn*8));
+#endif
+}
+
+/**
+ * \brief  Set Exception entry address
+ * \details
+ * This function set exception handler address to 'CSR_MTVEC'.
+ * \param [in]      addr  Exception handler address
+ * \remarks
+ * - This function use to set exception handler address to 'CSR_MTVEC'. Address is 4 bytes align.
+ * \sa
+ * - \ref __get_exc_entry
+ */
+__STATIC_FORCEINLINE void __set_exc_entry(rv_csr_t addr)
+{
+    addr &= (rv_csr_t)(~0x3F);
+    addr |= ECLIC_MODE_MTVEC_Msk;
+    __RV_CSR_WRITE(CSR_MTVEC, addr);
+}
+
+/**
+ * \brief  Get Exception entry address
+ * \details
+ * This function get exception handler address from 'CSR_MTVEC'.
+ * \return       Exception handler address
+ * \remarks
+ * - This function use to get exception handler address from 'CSR_MTVEC'. Address is 4 bytes align
+ * \sa
+ * - \ref __set_exc_entry
+ */
+__STATIC_FORCEINLINE rv_csr_t __get_exc_entry(void)
+{
+    unsigned long addr = __RV_CSR_READ(CSR_MTVEC);
+    return (addr & ~ECLIC_MODE_MTVEC_Msk);
+}
+
+/**
+ * \brief  Set Non-vector interrupt entry address
+ * \details
+ * This function set Non-vector interrupt address.
+ * \param [in]      addr  Non-vector interrupt entry address
+ * \remarks
+ * - This function use to set non-vector interrupt entry address to 'CSR_MTVT2' if
+ * - CSR_MTVT2 bit0 is 1. If 'CSR_MTVT2' bit0 is 0 then set address to 'CSR_MTVEC'
+ * \sa
+ * - \ref __get_nonvec_entry
+ */
+__STATIC_FORCEINLINE void __set_nonvec_entry(rv_csr_t addr)
+{
+    if (__RV_CSR_READ(CSR_MTVT2) & 0x1){
+        __RV_CSR_WRITE(CSR_MTVT2, addr | 0x01);
+    } else {
+        addr &= (rv_csr_t)(~0x3F);
+        addr |= ECLIC_MODE_MTVEC_Msk;
+        __RV_CSR_WRITE(CSR_MTVEC, addr);
+    }
+}
+
+/**
+ * \brief  Get Non-vector interrupt entry address
+ * \details
+ * This function get Non-vector interrupt address.
+ * \return      Non-vector interrupt handler address
+ * \remarks
+ * - This function use to get non-vector interrupt entry address from 'CSR_MTVT2' if
+ * - CSR_MTVT2 bit0 is 1. If 'CSR_MTVT2' bit0 is 0 then get address from 'CSR_MTVEC'.
+ * \sa
+ * - \ref __set_nonvec_entry
+ */
+__STATIC_FORCEINLINE rv_csr_t __get_nonvec_entry(void)
+{
+    if (__RV_CSR_READ(CSR_MTVT2) & 0x1) {
+        return __RV_CSR_READ(CSR_MTVT2) & (~(rv_csr_t)(0x1));
+    } else {
+        rv_csr_t addr = __RV_CSR_READ(CSR_MTVEC);
+        return (addr & ~ECLIC_MODE_MTVEC_Msk);
+    }
+}
+
+/**
+ * \brief  Get NMI interrupt entry from 'CSR_MNVEC'
+ * \details
+ * This function get NMI interrupt address from 'CSR_MNVEC'.
+ * \return      NMI interrupt handler address
+ * \remarks
+ * - This function use to get NMI interrupt handler address from 'CSR_MNVEC'. If CSR_MMISC_CTL[9] = 1 'CSR_MNVEC'
+ * - will be equal as mtvec. If CSR_MMISC_CTL[9] = 0 'CSR_MNVEC' will be equal as reset vector.
+ * - NMI entry is defined via \ref CSR_MMISC_CTL, writing to \ref CSR_MNVEC will be ignored.
+ */
+__STATIC_FORCEINLINE rv_csr_t __get_nmi_entry(void)
+{
+    return __RV_CSR_READ(CSR_MNVEC);
+}
+
+/**
+ * \brief   Save necessary CSRs into variables for vector interrupt nesting
+ * \details
+ * This macro is used to declare variables which are used for saving
+ * CSRs(MCAUSE, MEPC, MSUB), and it will read these CSR content into
+ * these variables, it need to be used in a vector-interrupt if nesting
+ * is required.
+ * \remarks
+ * - Interrupt will be enabled after this macro is called
+ * - It need to be used together with \ref RESTORE_IRQ_CSR_CONTEXT
+ * - Don't use variable names __mcause, __mpec, __msubm in your ISR code
+ * - If you want to enable interrupt nesting feature for vector interrupt,
+ * you can do it like this:
+ * \code
+ * // __INTERRUPT attribute will generates function entry and exit sequences suitable
+ * // for use in an interrupt handler when this attribute is present
+ * __INTERRUPT void eclic_mtip_handler(void)
+ * {
+ *     // Must call this to save CSRs
+ *     SAVE_IRQ_CSR_CONTEXT();
+ *     // !!!Interrupt is enabled here!!!
+ *     // !!!Higher priority interrupt could nest it!!!
+ *
+ *     // put you own interrupt handling code here
+ *
+ *     // Must call this to restore CSRs
+ *     RESTORE_IRQ_CSR_CONTEXT();
+ * }
+ * \endcode
+ */
+#define SAVE_IRQ_CSR_CONTEXT()                                              \
+        rv_csr_t __mcause = __RV_CSR_READ(CSR_MCAUSE);                      \
+        rv_csr_t __mepc = __RV_CSR_READ(CSR_MEPC);                          \
+        rv_csr_t __msubm = __RV_CSR_READ(CSR_MSUBM);                        \
+        __enable_irq();
+
+/**
+ * \brief   Restore necessary CSRs from variables for vector interrupt nesting
+ * \details
+ * This macro is used restore CSRs(MCAUSE, MEPC, MSUB) from pre-defined variables
+ * in \ref SAVE_IRQ_CSR_CONTEXT macro.
+ * \remarks
+ * - Interrupt will be disabled after this macro is called
+ * - It need to be used together with \ref SAVE_IRQ_CSR_CONTEXT
+ */
+#define RESTORE_IRQ_CSR_CONTEXT()                                           \
+        __disable_irq();                                                    \
+        __RV_CSR_WRITE(CSR_MSUBM, __msubm);                                 \
+        __RV_CSR_WRITE(CSR_MEPC, __mepc);                                   \
+        __RV_CSR_WRITE(CSR_MCAUSE, __mcause);
+
+/** @} */ /* End of Doxygen Group NMSIS_Core_IntExc */
+
+#endif /* defined(__ECLIC_PRESENT) && (__ECLIC_PRESENT == 1) */
+
+#ifdef __cplusplus
+}
+#endif
+#endif /** __CORE_FEATURE_ECLIC__ */
--- a/components/nmsis/core/inc/core_feature_fpu.h
+++ b/components/nmsis/core/inc/core_feature_fpu.h
@ -0,0 +1,304 @@
+/*
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __CORE_FEATURE_FPU_H__
+#define __CORE_FEATURE_FPU_H__
+/*!
+ * @file     core_feature_fpu.h
+ * @brief    FPU feature API header file for Nuclei N/NX Core
+ */
+/*
+ * FPU Feature Configuration Macro:
+ * 1. __FPU_PRESENT:  Define whether Floating Point Unit(FPU) is present or not
+ *   * 0: Not present
+ *   * 1: Single precision FPU present, __RISCV_FLEN == 32
+ *   * 2: Double precision FPU present, __RISCV_FLEN == 64
+ */
+#ifdef __cplusplus
+ extern "C" {
+#endif
+
+/* ===== FPU Operations ===== */
+/**
+ * \defgroup NMSIS_Core_FPU_Functions   FPU Functions
+ * \ingroup  NMSIS_Core
+ * \brief    Functions that related to the RISC-V FPU (F and D extension).
+ * \details
+ *
+ * Nuclei provided floating point unit by RISC-V F and D extension.
+ * * `F extension` adds single-precision floating-point computational
+ * instructions compliant with the IEEE 754-2008 arithmetic standard, __RISCV_FLEN = 32.
+ *   The F extension adds 32 floating-point registers, f0-f31, each 32 bits wide,
+ *   and a floating-point control and status register fcsr, which contains the
+ *   operating mode and exception status of the floating-point unit.
+ * * `D extension` adds double-precision floating-point computational instructions
+ * compliant with the IEEE 754-2008 arithmetic standard.
+ *   The D extension widens the 32 floating-point registers, f0-f31, to 64 bits, __RISCV_FLEN = 64
+ *   @{
+ */
+#if defined(__FPU_PRESENT) && (__FPU_PRESENT > 0)
+
+#if __FPU_PRESENT == 1
+  /** \brief Refer to the width of the floating point register in bits(either 32 or 64) */
+  #define __RISCV_FLEN          32
+#elif __FPU_PRESENT == 2
+  #define __RISCV_FLEN          64
+#else
+  #define __RISCV_FLEN          __riscv_flen
+#endif /* __FPU_PRESENT == 1 */
+
+/** \brief Get FCSR CSR Register */
+#define __get_FCSR()            __RV_CSR_READ(CSR_FCSR)
+/** \brief Set FCSR CSR Register with val */
+#define __set_FCSR(val)         __RV_CSR_WRITE(CSR_FCSR, (val))
+/** \brief Get FRM CSR Register */
+#define __get_FRM()             __RV_CSR_READ(CSR_FRM)
+/** \brief Set FRM CSR Register with val */
+#define __set_FRM(val)          __RV_CSR_WRITE(CSR_FRM, (val))
+/** \brief Get FFLAGS CSR Register */
+#define __get_FFLAGS()          __RV_CSR_READ(CSR_FFLAGS)
+/** \brief Set FFLAGS CSR Register with val */
+#define __set_FFLAGS(val)       __RV_CSR_WRITE(CSR_FFLAGS, (val))
+
+/** \brief Enable FPU Unit */
+#define __enable_FPU()          __RV_CSR_SET(CSR_MSTATUS, MSTATUS_FS)
+/**
+ * \brief Disable FPU Unit
+ * \details
+ * * We can save power by disable FPU Unit.
+ * * When FPU Unit is disabled, any access to FPU related CSR registers
+ * and FPU instructions will cause illegal Instuction Exception.
+ * */
+#define __disable_FPU()         __RV_CSR_CLEAR(CSR_MSTATUS, MSTATUS_FS)
+
+
+/**
+ * \brief   Load a single-precision value from memory into float point register freg using flw instruction
+ * \details The FLW instruction loads a single-precision floating point value from memory
+ * address (addr + ofs) into floating point register freg(f0-f31)
+ * \param [in]    freg   The floating point register, eg. FREG(0), f0
+ * \param [in]    addr   The memory base address, 4 byte aligned required
+ * \param [in]    ofs    a 12-bit immediate signed byte offset value, should be an const value
+ * \remarks
+ * * FLW and FSW operations need to make sure the address is 4 bytes aligned,
+ *   otherwise it will cause exception code 4(Load address misaligned) or 6 (Store/AMO address misaligned)
+ * * FLW and FSW do not modify the bits being transferred; in particular, the payloads of non-canonical
+ * NaNs are preserved
+ *
+ */
+#define __RV_FLW(freg, addr, ofs)                              \
+    ({                                                         \
+        register rv_csr_t __addr = (rv_csr_t)(addr);           \
+        __ASM volatile("flw " STRINGIFY(freg) ", %0(%1)  "     \
+                     : : "I"(ofs), "r"(__addr)                 \
+                     : "memory");                              \
+    })
+
+/**
+ * \brief   Store a single-precision value from float point freg into memory using fsw instruction
+ * \details The FSW instruction stores a single-precision value from floating point register to memory
+ * \param [in]    freg   The floating point register(f0-f31), eg. FREG(0), f0
+ * \param [in]    addr   The memory base address, 4 byte aligned required
+ * \param [in]    ofs    a 12-bit immediate signed byte offset value, should be an const value
+ * \remarks
+ * * FLW and FSW operations need to make sure the address is 4 bytes aligned,
+ *   otherwise it will cause exception code 4(Load address misaligned) or 6 (Store/AMO address misaligned)
+ * * FLW and FSW do not modify the bits being transferred; in particular, the payloads of non-canonical
+ * NaNs are preserved
+ *
+ */
+#define __RV_FSW(freg, addr, ofs)                              \
+    ({                                                         \
+        register rv_csr_t __addr = (rv_csr_t)(addr);           \
+        __ASM volatile("fsw " STRINGIFY(freg) ", %0(%1)  "     \
+                     : : "I"(ofs), "r"(__addr)                 \
+                     : "memory");                              \
+    })
+
+/**
+ * \brief   Load a double-precision value from memory into float point register freg using fld instruction
+ * \details The FLD instruction loads a double-precision floating point value from memory
+ * address (addr + ofs) into floating point register freg(f0-f31)
+ * \param [in]    freg   The floating point register, eg. FREG(0), f0
+ * \param [in]    addr   The memory base address, 8 byte aligned required
+ * \param [in]    ofs    a 12-bit immediate signed byte offset value, should be an const value
+ * \attention
+ * * Function only available for double precision floating point unit, FLEN = 64
+ * \remarks
+ * * FLD and FSD operations need to make sure the address is 8 bytes aligned,
+ *   otherwise it will cause exception code 4(Load address misaligned) or 6 (Store/AMO address misaligned)
+ * * FLD and FSD do not modify the bits being transferred; in particular, the payloads of non-canonical
+ * NaNs are preserved.
+ */
+#define __RV_FLD(freg, addr, ofs)                              \
+    ({                                                         \
+        register rv_csr_t __addr = (rv_csr_t)(addr);           \
+        __ASM volatile("fld " STRINGIFY(freg) ", %0(%1)  "     \
+                     : : "I"(ofs), "r"(__addr)                 \
+                     : "memory");                              \
+    })
+
+/**
+ * \brief   Store a double-precision value from float point freg into memory using fsd instruction
+ * \details The FSD instruction stores double-precision value from floating point register to memory
+ * \param [in]    freg   The floating point register(f0-f31), eg. FREG(0), f0
+ * \param [in]    addr   The memory base address, 8 byte aligned required
+ * \param [in]    ofs    a 12-bit immediate signed byte offset value, should be an const value
+ * \attention
+ * * Function only available for double precision floating point unit, FLEN = 64
+ * \remarks
+ * * FLD and FSD operations need to make sure the address is 8 bytes aligned,
+ *   otherwise it will cause exception code 4(Load address misaligned) or 6 (Store/AMO address misaligned)
+ * * FLD and FSD do not modify the bits being transferred; in particular, the payloads of non-canonical
+ * NaNs are preserved.
+ *
+ */
+#define __RV_FSD(freg, addr, ofs)                              \
+    ({                                                         \
+        register rv_csr_t __addr = (rv_csr_t)(addr);           \
+        __ASM volatile("fsd " STRINGIFY(freg) ", %0(%1)  "     \
+                     : : "I"(ofs), "r"(__addr)                 \
+                     : "memory");                              \
+    })
+
+/**
+ * \def __RV_FLOAD
+ * \brief   Load a float point value from memory into float point register freg using flw/fld instruction
+ * \details
+ * * For Single-Precison Floating-Point Mode(__FPU_PRESENT == 1, __RISCV_FLEN == 32):
+ *   It will call \ref __RV_FLW to load a single-precision floating point value from memory to floating point register
+ * * For Double-Precison Floating-Point Mode(__FPU_PRESENT == 2, __RISCV_FLEN == 64):
+ *   It will call \ref __RV_FLD to load a double-precision floating point value from memory to floating point register
+ *
+ * \attention
+ * Function behaviour is different for __FPU_PRESENT = 1 or 2, please see the real function this macro represent
+ */
+/**
+ * \def __RV_FSTORE
+ * \brief   Store a float value from float point freg into memory using fsw/fsd instruction
+ * \details
+ * * For Single-Precison Floating-Point Mode(__FPU_PRESENT == 1, __RISCV_FLEN == 32):
+ *   It will call \ref __RV_FSW to store floating point register into memory
+ * * For Double-Precison Floating-Point Mode(__FPU_PRESENT == 2, __RISCV_FLEN == 64):
+ *   It will call \ref __RV_FSD to store floating point register into memory
+ *
+ * \attention
+ * Function behaviour is different for __FPU_PRESENT = 1 or 2, please see the real function this macro represent
+ */
+#if __FPU_PRESENT == 1
+#define __RV_FLOAD              __RV_FLW
+#define __RV_FSTORE             __RV_FSW
+/** \brief Type of FPU register, depends on the FLEN defined in RISC-V */
+typedef uint32_t rv_fpu_t;
+#elif __FPU_PRESENT == 2
+#define __RV_FLOAD              __RV_FLD
+#define __RV_FSTORE             __RV_FSD
+/** \brief Type of FPU register, depends on the FLEN defined in RISC-V */
+typedef uint64_t rv_fpu_t;
+#endif /* __FPU_PRESENT == 2 */
+
+/**
+ * \brief   Save FPU context into variables for interrupt nesting
+ * \details
+ * This macro is used to declare variables which are used for saving
+ * FPU context, and it will store the nessary fpu registers into
+ * these variables, it need to be used in a interrupt when in this
+ * interrupt fpu registers are used.
+ * \remarks
+ * - It need to be used together with \ref RESTORE_FPU_CONTEXT
+ * - Don't use variable names __fpu_context in your ISR code
+ * - If you isr code will use fpu registers, and this interrupt is nested.
+ * Then you can do it like this:
+ * \code
+ * void eclic_mtip_handler(void)
+ * {
+ *     // !!!Interrupt is enabled here!!!
+ *     // !!!Higher priority interrupt could nest it!!!
+ *
+ *     // Necessary only when you need to use fpu registers
+ *     // in this isr handler functions
+ *     SAVE_FPU_CONTEXT();
+ *
+ *     // put you own interrupt handling code here
+ *
+ *     // pair of SAVE_FPU_CONTEXT()
+ *     RESTORE_FPU_CONTEXT();
+ * }
+ * \endcode
+ */
+#define SAVE_FPU_CONTEXT()                                                  \
+        rv_fpu_t __fpu_context[20];                                         \
+        __RV_FSTORE(FREG(0),  __fpu_context, 0  << LOG_FPREGBYTES);         \
+        __RV_FSTORE(FREG(1),  __fpu_context, 1  << LOG_FPREGBYTES);         \
+        __RV_FSTORE(FREG(2),  __fpu_context, 2  << LOG_FPREGBYTES);         \
+        __RV_FSTORE(FREG(3),  __fpu_context, 3  << LOG_FPREGBYTES);         \
+        __RV_FSTORE(FREG(4),  __fpu_context, 4  << LOG_FPREGBYTES);         \
+        __RV_FSTORE(FREG(5),  __fpu_context, 5  << LOG_FPREGBYTES);         \
+        __RV_FSTORE(FREG(6),  __fpu_context, 6  << LOG_FPREGBYTES);         \
+        __RV_FSTORE(FREG(7),  __fpu_context, 7  << LOG_FPREGBYTES);         \
+        __RV_FSTORE(FREG(10), __fpu_context, 8  << LOG_FPREGBYTES);         \
+        __RV_FSTORE(FREG(11), __fpu_context, 9  << LOG_FPREGBYTES);         \
+        __RV_FSTORE(FREG(12), __fpu_context, 10 << LOG_FPREGBYTES);         \
+        __RV_FSTORE(FREG(13), __fpu_context, 11 << LOG_FPREGBYTES);         \
+        __RV_FSTORE(FREG(14), __fpu_context, 12 << LOG_FPREGBYTES);         \
+        __RV_FSTORE(FREG(15), __fpu_context, 13 << LOG_FPREGBYTES);         \
+        __RV_FSTORE(FREG(16), __fpu_context, 14 << LOG_FPREGBYTES);         \
+        __RV_FSTORE(FREG(17), __fpu_context, 15 << LOG_FPREGBYTES);         \
+        __RV_FSTORE(FREG(28), __fpu_context, 16 << LOG_FPREGBYTES);         \
+        __RV_FSTORE(FREG(29), __fpu_context, 17 << LOG_FPREGBYTES);         \
+        __RV_FSTORE(FREG(30), __fpu_context, 18 << LOG_FPREGBYTES);         \
+        __RV_FSTORE(FREG(31), __fpu_context, 19 << LOG_FPREGBYTES);
+
+/**
+ * \brief   Restore necessary fpu registers from variables for interrupt nesting
+ * \details
+ * This macro is used restore necessary fpu registers from pre-defined variables
+ * in \ref SAVE_FPU_CONTEXT macro.
+ * \remarks
+ * - It need to be used together with \ref SAVE_FPU_CONTEXT
+ */
+#define RESTORE_FPU_CONTEXT()                                               \
+        __RV_FLOAD(FREG(0),  __fpu_context, 0  << LOG_FPREGBYTES);          \
+        __RV_FLOAD(FREG(1),  __fpu_context, 1  << LOG_FPREGBYTES);          \
+        __RV_FLOAD(FREG(2),  __fpu_context, 2  << LOG_FPREGBYTES);          \
+        __RV_FLOAD(FREG(3),  __fpu_context, 3  << LOG_FPREGBYTES);          \
+        __RV_FLOAD(FREG(4),  __fpu_context, 4  << LOG_FPREGBYTES);          \
+        __RV_FLOAD(FREG(5),  __fpu_context, 5  << LOG_FPREGBYTES);          \
+        __RV_FLOAD(FREG(6),  __fpu_context, 6  << LOG_FPREGBYTES);          \
+        __RV_FLOAD(FREG(7),  __fpu_context, 7  << LOG_FPREGBYTES);          \
+        __RV_FLOAD(FREG(10), __fpu_context, 8  << LOG_FPREGBYTES);          \
+        __RV_FLOAD(FREG(11), __fpu_context, 9  << LOG_FPREGBYTES);          \
+        __RV_FLOAD(FREG(12), __fpu_context, 10 << LOG_FPREGBYTES);          \
+        __RV_FLOAD(FREG(13), __fpu_context, 11 << LOG_FPREGBYTES);          \
+        __RV_FLOAD(FREG(14), __fpu_context, 12 << LOG_FPREGBYTES);          \
+        __RV_FLOAD(FREG(15), __fpu_context, 13 << LOG_FPREGBYTES);          \
+        __RV_FLOAD(FREG(16), __fpu_context, 14 << LOG_FPREGBYTES);          \
+        __RV_FLOAD(FREG(17), __fpu_context, 15 << LOG_FPREGBYTES);          \
+        __RV_FLOAD(FREG(28), __fpu_context, 16 << LOG_FPREGBYTES);          \
+        __RV_FLOAD(FREG(29), __fpu_context, 17 << LOG_FPREGBYTES);          \
+        __RV_FLOAD(FREG(30), __fpu_context, 18 << LOG_FPREGBYTES);          \
+        __RV_FLOAD(FREG(31), __fpu_context, 19 << LOG_FPREGBYTES);
+#else
+#define SAVE_FPU_CONTEXT()
+#define RESTORE_FPU_CONTEXT()
+#endif /* __FPU_PRESENT > 0 */
+/** @} */ /* End of Doxygen Group NMSIS_Core_FPU_Functions */
+
+#ifdef __cplusplus
+}
+#endif
+#endif /** __RISCV_EXT_FPU_H__  */
--- a/components/nmsis/core/inc/core_feature_pmp.h
+++ b/components/nmsis/core/inc/core_feature_pmp.h
@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __CORE_FEATURE_PMP_H__
+#define __CORE_FEATURE_PMP_H__
+/*!
+ * @file     core_feature_pmp.h
+ * @brief    PMP feature API header file for Nuclei N/NX Core
+ */
+/*
+ * PMP Feature Configuration Macro:
+ * 1. __PMP_PRESENT:  Define whether Physical Memory Protection(PMP) is present or not
+ *   * 0: Not present
+ *   * 1: Present
+ * 2. __PMP_ENTRY_NUM:  Define the number of PMP entries, only 8 or 16 is configurable.
+ */
+#ifdef __cplusplus
+ extern "C" {
+#endif
+
+#if defined(__PMP_PRESENT) && (__PMP_PRESENT == 1)
+/* ===== PMP Operations ===== */
+/**
+ * \defgroup NMSIS_Core_PMP_Functions   PMP Functions
+ * \ingroup  NMSIS_Core
+ * \brief    Functions that related to the RISCV Phyiscal Memory Protection.
+ * \details
+ * Optional physical memory protection (PMP) unit provides per-hart machine-mode
+ * control registers to allow physical memory access privileges (read, write, execute)
+ * to be specified for each physical memory region.
+ *
+ * The PMP can supports region access control settings as small as four bytes.
+ *
+ *   @{
+ */
+#ifndef __PMP_ENTRY_NUM
+/* numbers of PMP entries(__PMP_ENTRY_NUM) should be defined in <Device.h> */
+#error "__PMP_ENTRY_NUM is not defined, please check!"
+#endif
+
+/**
+ * \brief   Get 8bit PMPxCFG Register by PMP entry index
+ * \details Return the content of the PMPxCFG Register.
+ * \param [in]    idx    PMP region index(0-15)
+ * \return               PMPxCFG Register value
+ */
+__STATIC_INLINE uint8_t __get_PMPxCFG(uint32_t idx)
+{
+    rv_csr_t pmpcfg = 0;
+
+    if (idx >= __PMP_ENTRY_NUM) return 0;
+#if __RISCV_XLEN == 32
+    if (idx < 4) {
+        pmpcfg = __RV_CSR_READ(CSR_PMPCFG0);
+    } else if ((idx >=4) && (idx < 8)) {
+        idx -= 4;
+        pmpcfg = __RV_CSR_READ(CSR_PMPCFG1);
+    } else if ((idx >=8) && (idx < 12)) {
+        idx -= 8;
+        pmpcfg = __RV_CSR_READ(CSR_PMPCFG2);
+    } else {
+        idx -= 12;
+        pmpcfg = __RV_CSR_READ(CSR_PMPCFG3);
+    }
+
+    idx = idx << 3;
+    return (uint8_t)((pmpcfg>>idx) & 0xFF);
+#elif __RISCV_XLEN == 64
+    if (idx < 8) {
+        pmpcfg = __RV_CSR_READ(CSR_PMPCFG0);
+    } else {
+        idx -= 8;
+        pmpcfg = __RV_CSR_READ(CSR_PMPCFG2);
+    }
+    idx = idx << 3;
+    return (uint8_t)((pmpcfg>>idx) & 0xFF);
+#else
+    // TODO Add RV128 Handling
+    return 0;
+#endif
+}
+
+/**
+ * \brief   Set 8bit PMPxCFG by pmp entry index
+ * \details Set the given pmpxcfg value to the PMPxCFG Register.
+ * \param [in]    idx      PMPx region index(0-15)
+ * \param [in]    pmpxcfg  PMPxCFG register value to set
+ */
+__STATIC_INLINE void __set_PMPxCFG(uint32_t idx, uint8_t pmpxcfg)
+{
+    rv_csr_t pmpcfgx = 0;
+    if (idx >= __PMP_ENTRY_NUM) return;
+
+#if __RISCV_XLEN == 32
+    if (idx < 4) {
+        pmpcfgx = __RV_CSR_READ(CSR_PMPCFG0);
+        idx = idx << 3;
+        pmpcfgx = (pmpcfgx & ~(0xFFUL << idx)) | ((rv_csr_t)pmpxcfg << idx);
+        __RV_CSR_WRITE(CSR_PMPCFG0, pmpcfgx);
+    } else if ((idx >=4) && (idx < 8)) {
+        idx -= 4;
+        pmpcfgx = __RV_CSR_READ(CSR_PMPCFG1);
+        idx = idx << 3;
+        pmpcfgx = (pmpcfgx & ~(0xFFUL << idx)) | ((rv_csr_t)pmpxcfg << idx);
+        __RV_CSR_WRITE(CSR_PMPCFG1, pmpcfgx);
+    } else if ((idx >=8) && (idx < 12)) {
+        idx -= 8;
+        pmpcfgx = __RV_CSR_READ(CSR_PMPCFG2);
+        idx = idx << 3;
+        pmpcfgx = (pmpcfgx & ~(0xFFUL << idx)) | ((rv_csr_t)pmpxcfg << idx);
+        __RV_CSR_WRITE(CSR_PMPCFG2, pmpcfgx);
+    } else {
+        idx -= 12;
+        pmpcfgx = __RV_CSR_READ(CSR_PMPCFG3);
+        idx = idx << 3;
+        pmpcfgx = (pmpcfgx & ~(0xFFUL << idx)) | ((rv_csr_t)pmpxcfg << idx);
+        __RV_CSR_WRITE(CSR_PMPCFG3, pmpcfgx);
+    }
+#elif __RISCV_XLEN == 64
+    if (idx < 8) {
+        pmpcfgx = __RV_CSR_READ(CSR_PMPCFG0);
+        idx = idx << 3;
+        pmpcfgx = (pmpcfgx & ~(0xFFULL << idx)) | ((rv_csr_t)pmpxcfg << idx);
+        __RV_CSR_WRITE(CSR_PMPCFG0, pmpcfgx);
+    } else {
+        idx -= 8;
+        pmpcfgx = __RV_CSR_READ(CSR_PMPCFG2);
+        idx = idx << 3;
+        pmpcfgx = (pmpcfgx & ~(0xFFULL << idx)) | ((rv_csr_t)pmpxcfg << idx);
+        __RV_CSR_WRITE(CSR_PMPCFG2, pmpcfgx);
+    }
+#else
+    // TODO Add RV128 Handling
+#endif
+}
+
+/**
+ * \brief   Get PMPCFGx Register by index
+ * \details Return the content of the PMPCFGx Register.
+ * \param [in]    idx    PMPCFG CSR index(0-3)
+ * \return               PMPCFGx Register value
+ * \remark
+ * - For RV64, only idx = 0 and idx = 2 is allowed.
+ *   pmpcfg0 and pmpcfg2 hold the configurations
+ *   for the 16 PMP entries, pmpcfg1 and pmpcfg3 are illegal
+ * - For RV32, pmpcfg0–pmpcfg3, hold the configurations
+ *   pmp0cfg–pmp15cfg for the 16 PMP entries
+ */
+__STATIC_INLINE rv_csr_t __get_PMPCFGx(uint32_t idx)
+{
+    switch (idx) {
+        case 0: return __RV_CSR_READ(CSR_PMPCFG0);
+        case 1: return __RV_CSR_READ(CSR_PMPCFG1);
+        case 2: return __RV_CSR_READ(CSR_PMPCFG2);
+        case 3: return __RV_CSR_READ(CSR_PMPCFG3);
+        default: return 0;
+    }
+}
+
+/**
+ * \brief   Set PMPCFGx by index
+ * \details Write the given value to the PMPCFGx Register.
+ * \param [in]    idx      PMPCFG CSR index(0-3)
+ * \param [in]    pmpcfg   PMPCFGx Register value to set
+ * \remark
+ * - For RV64, only idx = 0 and idx = 2 is allowed.
+ *   pmpcfg0 and pmpcfg2 hold the configurations
+ *   for the 16 PMP entries, pmpcfg1 and pmpcfg3 are illegal
+ * - For RV32, pmpcfg0–pmpcfg3, hold the configurations
+ *   pmp0cfg–pmp15cfg for the 16 PMP entries
+ */
+__STATIC_INLINE void __set_PMPCFGx(uint32_t idx, rv_csr_t pmpcfg)
+{
+    switch (idx) {
+        case 0: __RV_CSR_WRITE(CSR_PMPCFG0, pmpcfg); break;
+        case 1: __RV_CSR_WRITE(CSR_PMPCFG1, pmpcfg); break;
+        case 2: __RV_CSR_WRITE(CSR_PMPCFG2, pmpcfg); break;
+        case 3: __RV_CSR_WRITE(CSR_PMPCFG3, pmpcfg); break;
+        default: return;
+    }
+}
+
+/**
+ * \brief   Get PMPADDRx Register by index
+ * \details Return the content of the PMPADDRx Register.
+ * \param [in]    idx    PMP region index(0-15)
+ * \return               PMPADDRx Register value
+ */
+__STATIC_INLINE rv_csr_t __get_PMPADDRx(uint32_t idx)
+{
+    switch (idx) {
+        case 0: return __RV_CSR_READ(CSR_PMPADDR0);
+        case 1: return __RV_CSR_READ(CSR_PMPADDR1);
+        case 2: return __RV_CSR_READ(CSR_PMPADDR2);
+        case 3: return __RV_CSR_READ(CSR_PMPADDR3);
+        case 4: return __RV_CSR_READ(CSR_PMPADDR4);
+        case 5: return __RV_CSR_READ(CSR_PMPADDR5);
+        case 6: return __RV_CSR_READ(CSR_PMPADDR6);
+        case 7: return __RV_CSR_READ(CSR_PMPADDR7);
+        case 8: return __RV_CSR_READ(CSR_PMPADDR8);
+        case 9: return __RV_CSR_READ(CSR_PMPADDR9);
+        case 10: return __RV_CSR_READ(CSR_PMPADDR10);
+        case 11: return __RV_CSR_READ(CSR_PMPADDR11);
+        case 12: return __RV_CSR_READ(CSR_PMPADDR12);
+        case 13: return __RV_CSR_READ(CSR_PMPADDR13);
+        case 14: return __RV_CSR_READ(CSR_PMPADDR14);
+        case 15: return __RV_CSR_READ(CSR_PMPADDR15);
+        default: return 0;
+    }
+}
+
+/**
+ * \brief   Set PMPADDRx by index
+ * \details Write the given value to the PMPADDRx Register.
+ * \param [in]    idx      PMP region index(0-15)
+ * \param [in]    pmpaddr  PMPADDRx Register value to set
+ */
+__STATIC_INLINE void __set_PMPADDRx(uint32_t idx, rv_csr_t pmpaddr)
+{
+    switch (idx) {
+        case 0: __RV_CSR_WRITE(CSR_PMPADDR0, pmpaddr); break;
+        case 1: __RV_CSR_WRITE(CSR_PMPADDR1, pmpaddr); break;
+        case 2: __RV_CSR_WRITE(CSR_PMPADDR2, pmpaddr); break;
+        case 3: __RV_CSR_WRITE(CSR_PMPADDR3, pmpaddr); break;
+        case 4: __RV_CSR_WRITE(CSR_PMPADDR4, pmpaddr); break;
+        case 5: __RV_CSR_WRITE(CSR_PMPADDR5, pmpaddr); break;
+        case 6: __RV_CSR_WRITE(CSR_PMPADDR6, pmpaddr); break;
+        case 7: __RV_CSR_WRITE(CSR_PMPADDR7, pmpaddr); break;
+        case 8: __RV_CSR_WRITE(CSR_PMPADDR8, pmpaddr); break;
+        case 9: __RV_CSR_WRITE(CSR_PMPADDR9, pmpaddr); break;
+        case 10: __RV_CSR_WRITE(CSR_PMPADDR10, pmpaddr); break;
+        case 11: __RV_CSR_WRITE(CSR_PMPADDR11, pmpaddr); break;
+        case 12: __RV_CSR_WRITE(CSR_PMPADDR12, pmpaddr); break;
+        case 13: __RV_CSR_WRITE(CSR_PMPADDR13, pmpaddr); break;
+        case 14: __RV_CSR_WRITE(CSR_PMPADDR14, pmpaddr); break;
+        case 15: __RV_CSR_WRITE(CSR_PMPADDR15, pmpaddr); break;
+        default: return;
+    }
+}
+/** @} */ /* End of Doxygen Group NMSIS_Core_PMP_Functions */
+#endif /* defined(__PMP_PRESENT) && (__PMP_PRESENT == 1) */
+
+#ifdef __cplusplus
+}
+#endif
+#endif /** __CORE_FEATURE_PMP_H__  */
--- a/components/nmsis/core/inc/core_feature_timer.h
+++ b/components/nmsis/core/inc/core_feature_timer.h
@ -0,0 +1,364 @@
+/*
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __CORE_FEATURE_TIMER_H__
+#define __CORE_FEATURE_TIMER_H__
+/*!
+ * @file     core_feature_timer.h
+ * @brief    System Timer feature API header file for Nuclei N/NX Core
+ */
+/*
+ * System Timer Feature Configuration Macro:
+ * 1. __SYSTIMER_PRESENT:  Define whether Private System Timer is present or not.
+ *   * 0: Not present
+ *   * 1: Present
+ * 2. __SYSTIMER_BASEADDR:  Define the base address of the System Timer.
+ */
+#ifdef __cplusplus
+ extern "C" {
+#endif
+
+#if defined(__SYSTIMER_PRESENT) && (__SYSTIMER_PRESENT == 1)
+/**
+ * \defgroup NMSIS_Core_SysTimer_Registers     Register Define and Type Definitions Of System Timer
+ * \ingroup NMSIS_Core_Registers
+ * \brief   Type definitions and defines for system timer registers.
+ *
+ * @{
+ */
+/**
+ * \brief  Structure type to access the System Timer (SysTimer).
+ * \details
+ * Structure definition to access the system timer(SysTimer).
+ * \remarks
+ * - MSFTRST register is introduced in Nuclei N Core version 1.3(\ref __NUCLEI_N_REV >= 0x0103)
+ * - MSTOP register is renamed to MTIMECTL register in Nuclei N Core version 1.4(\ref __NUCLEI_N_REV >= 0x0104)
+ * - CMPCLREN and CLKSRC bit in MTIMECTL register is introduced in Nuclei N Core version 1.4(\ref __NUCLEI_N_REV >= 0x0104)
+ */
+typedef struct {
+    __IOM uint64_t MTIMER;                  /*!< Offset: 0x000 (R/W)  System Timer current value 64bits Register */
+    __IOM uint64_t MTIMERCMP;               /*!< Offset: 0x008 (R/W)  System Timer compare Value 64bits Register */
+    __IOM uint32_t RESERVED0[0x3F8];        /*!< Offset: 0x010 - 0xFEC Reserved */
+    __IOM uint32_t MSFTRST;                 /*!< Offset: 0xFF0 (R/W)  System Timer Software Core Reset Register */
+    __IOM uint32_t RESERVED1;               /*!< Offset: 0xFF4 Reserved */
+    __IOM uint32_t MTIMECTL;                /*!< Offset: 0xFF8 (R/W)  System Timer Control Register, previously MSTOP register */
+    __IOM uint32_t MSIP;                    /*!< Offset: 0xFFC (R/W)  System Timer SW interrupt Register */
+} SysTimer_Type;
+
+/* Timer Control / Status Register Definitions */
+#define SysTimer_MTIMECTL_TIMESTOP_Pos      0U                                          /*!< SysTick Timer MTIMECTL: TIMESTOP bit Position */
+#define SysTimer_MTIMECTL_TIMESTOP_Msk      (1UL << SysTimer_MTIMECTL_TIMESTOP_Pos)     /*!< SysTick Timer MTIMECTL: TIMESTOP Mask */
+#define SysTimer_MTIMECTL_CMPCLREN_Pos      1U                                          /*!< SysTick Timer MTIMECTL: CMPCLREN bit Position */
+#define SysTimer_MTIMECTL_CMPCLREN_Msk      (1UL << SysTimer_MTIMECTL_CMPCLREN_Pos)     /*!< SysTick Timer MTIMECTL: CMPCLREN Mask */
+#define SysTimer_MTIMECTL_CLKSRC_Pos        2U                                          /*!< SysTick Timer MTIMECTL: CLKSRC bit Position */
+#define SysTimer_MTIMECTL_CLKSRC_Msk        (1UL << SysTimer_MTIMECTL_CLKSRC_Pos)       /*!< SysTick Timer MTIMECTL: CLKSRC Mask */
+
+#define SysTimer_MSIP_MSIP_Pos              0U                                          /*!< SysTick Timer MSIP: MSIP bit Position */
+#define SysTimer_MSIP_MSIP_Msk              (1UL << SysTimer_MSIP_MSIP_Pos)             /*!< SysTick Timer MSIP: MSIP Mask */
+
+#define SysTimer_MTIMER_Msk                 (0xFFFFFFFFFFFFFFFFULL)                     /*!< SysTick Timer MTIMER value Mask */
+#define SysTimer_MTIMERCMP_Msk              (0xFFFFFFFFFFFFFFFFULL)                     /*!< SysTick Timer MTIMERCMP value Mask */
+#define SysTimer_MTIMECTL_Msk               (0xFFFFFFFFUL)                              /*!< SysTick Timer MTIMECTL/MSTOP value Mask */
+#define SysTimer_MSIP_Msk                   (0xFFFFFFFFUL)                              /*!< SysTick Timer MSIP   value Mask */
+#define SysTimer_MSFTRST_Msk                (0xFFFFFFFFUL)                              /*!< SysTick Timer MSFTRST value Mask */
+
+#define SysTimer_MSFRST_KEY                 (0x80000A5FUL)                              /*!< SysTick Timer Software Reset Request Key */
+
+#ifndef __SYSTIMER_BASEADDR
+/* Base address of SYSTIMER(__SYSTIMER_BASEADDR) should be defined in <Device.h> */
+#error "__SYSTIMER_BASEADDR is not defined, please check!"
+#endif
+/* System Timer Memory mapping of Device  */
+#define SysTimer_BASE                       __SYSTIMER_BASEADDR                         /*!< SysTick Base Address */
+#define SysTimer                            ((SysTimer_Type *) SysTimer_BASE)           /*!< SysTick configuration struct */
+/** @} */ /* end of group NMSIS_Core_SysTimer_Registers */
+
+/* ##################################    SysTimer function  ############################################ */
+/**
+ * \defgroup NMSIS_Core_SysTimer SysTimer Functions
+ * \brief    Functions that configure the Core System Timer.
+ * @{
+ */
+/**
+ * \brief  Set system timer load value
+ * \details
+ * This function set the system timer load value in MTIMER register.
+ * \param [in]  value   value to set system timer MTIMER register.
+ * \remarks
+ * - Load value is 64bits wide.
+ * - \ref SysTimer_GetLoadValue
+ */
+__STATIC_FORCEINLINE void SysTimer_SetLoadValue(uint64_t value)
+{
+    SysTimer->MTIMER = value;
+}
+
+/**
+ * \brief  Get system timer load value
+ * \details
+ * This function get the system timer current value in MTIMER register.
+ * \return  current value(64bit) of system timer MTIMER register.
+ * \remarks
+ * - Load value is 64bits wide.
+ * - \ref SysTimer_SetLoadValue
+ */
+__STATIC_FORCEINLINE uint64_t SysTimer_GetLoadValue(void)
+{
+    return SysTimer->MTIMER;
+}
+
+/**
+ * \brief  Set system timer compare value
+ * \details
+ * This function set the system Timer compare value in MTIMERCMP register.
+ * \param [in]  value   compare value to set system timer MTIMERCMP register.
+ * \remarks
+ * - Compare value is 64bits wide.
+ * - If compare value is larger than current value timer interrupt generate.
+ * - Modify the load value or compare value less to clear the interrupt.
+ * - \ref SysTimer_GetCompareValue
+ */
+__STATIC_FORCEINLINE void SysTimer_SetCompareValue(uint64_t value)
+{
+    SysTimer->MTIMERCMP = value;
+}
+
+/**
+ * \brief  Get system timer compare value
+ * \details
+ * This function get the system timer compare value in MTIMERCMP register.
+ * \return  compare value of system timer MTIMERCMP register.
+ * \remarks
+ * - Compare value is 64bits wide.
+ * - \ref SysTimer_SetCompareValue
+ */
+__STATIC_FORCEINLINE uint64_t SysTimer_GetCompareValue(void)
+{
+    return SysTimer->MTIMERCMP;
+}
+
+/**
+ * \brief  Enable system timer counter running
+ * \details
+ * Enable system timer counter running by clear
+ * TIMESTOP bit in MTIMECTL register.
+ */
+__STATIC_FORCEINLINE void SysTimer_Start(void)
+{
+    SysTimer->MTIMECTL &= ~(SysTimer_MTIMECTL_TIMESTOP_Msk);
+}
+
+/**
+ * \brief  Stop system timer counter running
+ * \details
+ * Stop system timer counter running by set
+ * TIMESTOP bit in MTIMECTL register.
+ */
+__STATIC_FORCEINLINE void SysTimer_Stop(void)
+{
+    SysTimer->MTIMECTL |= SysTimer_MTIMECTL_TIMESTOP_Msk;
+}
+
+/**
+ * \brief  Set system timer control value
+ * \details
+ * This function set the system timer MTIMECTL register value.
+ * \param [in]  mctl    value to set MTIMECTL register
+ * \remarks
+ * - Bit TIMESTOP is used to start and stop timer.
+ *   Clear TIMESTOP bit to 0 to start timer, otherwise to stop timer.
+ * - Bit CMPCLREN is used to enable auto MTIMER clear to zero when MTIMER >= MTIMERCMP.
+ *   Clear CMPCLREN bit to 0 to stop auto clear MTIMER feature, otherwise to enable it.
+ * - Bit CLKSRC is used to select timer clock source.
+ *   Clear CLKSRC bit to 0 to use *mtime_toggle_a*, otherwise use *core_clk_aon*
+ * - \ref SysTimer_GetControlValue
+ */
+__STATIC_FORCEINLINE void SysTimer_SetControlValue(uint32_t mctl)
+{
+    SysTimer->MTIMECTL = (mctl & SysTimer_MTIMECTL_Msk);
+}
+
+/**
+ * \brief  Get system timer control value
+ * \details
+ * This function get the system timer MTIMECTL register value.
+ * \return  MTIMECTL register value
+ * \remarks
+ * - \ref SysTimer_SetControlValue
+ */
+__STATIC_FORCEINLINE uint32_t SysTimer_GetControlValue(void)
+{
+    return (SysTimer->MTIMECTL & SysTimer_MTIMECTL_Msk);
+}
+
+/**
+ * \brief  Trigger or set software interrupt via system timer
+ * \details
+ * This function set the system timer MSIP bit in MSIP register.
+ * \remarks
+ * - Set system timer MSIP bit and generate a SW interrupt.
+ * - \ref SysTimer_ClearSWIRQ
+ * - \ref SysTimer_GetMsipValue
+ */
+__STATIC_FORCEINLINE void SysTimer_SetSWIRQ(void)
+{
+    SysTimer->MSIP |= SysTimer_MSIP_MSIP_Msk;
+}
+
+/**
+ * \brief  Clear system timer software interrupt pending request
+ * \details
+ * This function clear the system timer MSIP bit in MSIP register.
+ * \remarks
+ * - Clear system timer MSIP bit in MSIP register to clear the software interrupt pending.
+ * - \ref SysTimer_SetSWIRQ
+ * - \ref SysTimer_GetMsipValue
+ */
+__STATIC_FORCEINLINE void SysTimer_ClearSWIRQ(void)
+{
+    SysTimer->MSIP &= ~SysTimer_MSIP_MSIP_Msk;
+}
+
+/**
+ * \brief  Get system timer MSIP register value
+ * \details
+ * This function get the system timer MSIP register value.
+ * \return    Value of Timer MSIP register.
+ * \remarks
+ * - Bit0 is SW interrupt flag.
+ *   Bit0 is 1 then SW interrupt set. Bit0 is 0 then SW interrupt clear.
+ * - \ref SysTimer_SetSWIRQ
+ * - \ref SysTimer_ClearSWIRQ
+ */
+__STATIC_FORCEINLINE uint32_t SysTimer_GetMsipValue(void)
+{
+    return (uint32_t)(SysTimer->MSIP & SysTimer_MSIP_Msk);
+}
+
+/**
+ * \brief  Set system timer MSIP register value
+ * \details
+ * This function set the system timer MSIP register value.
+ * \param [in]  msip   value to set MSIP register
+ */
+__STATIC_FORCEINLINE void SysTimer_SetMsipValue(uint32_t msip)
+{
+    SysTimer->MSIP = (msip & SysTimer_MSIP_Msk);
+}
+
+/**
+ * \brief  Do software reset request
+ * \details
+ * This function will do software reset request through MTIMER
+ * - Software need to write \ref SysTimer_MSFRST_KEY to generate software reset request
+ * - The software request flag can be cleared by reset operation to clear
+ * \remarks
+ * - The software reset is sent to SoC, SoC need to generate reset signal and send back to Core
+ * - This function will not return, it will do while(1) to wait the Core reset happened
+ */
+__STATIC_FORCEINLINE void SysTimer_SoftwareReset(void)
+{
+    SysTimer->MSFTRST = SysTimer_MSFRST_KEY;
+    while(1);
+}
+
+#if defined (__Vendor_SysTickConfig) && (__Vendor_SysTickConfig == 0U) && defined(__ECLIC_PRESENT) && (__ECLIC_PRESENT == 1)
+/**
+ * \brief   System Tick Configuration
+ * \details Initializes the System Timer and its non-vector interrupt, and starts the System Tick Timer.
+ *
+ *  In our default implementation, the timer counter will be set to zero, and it will start a timer compare non-vector interrupt
+ *  when it matchs the ticks user set, during the timer interrupt user should reload the system tick using \ref SysTick_Reload function
+ *  or similar function written by user, so it can produce period timer interrupt.
+ * \param [in]  ticks  Number of ticks between two interrupts.
+ * \return          0  Function succeeded.
+ * \return          1  Function failed.
+ * \remarks
+ * - For \ref __NUCLEI_N_REV >= 0x0104, the CMPCLREN bit in MTIMECTL is introduced,
+ *   but we assume that the CMPCLREN bit is set to 0, so MTIMER register will not be
+ *   auto cleared to 0 when MTIMER >= MTIMERCMP.
+ * - When the variable \ref __Vendor_SysTickConfig is set to 1, then the
+ *   function \ref SysTick_Config is not included.
+ * - In this case, the file <b><Device>.h</b> must contain a vendor-specific implementation
+ *   of this function.
+ * - If user need this function to start a period timer interrupt, then in timer interrupt handler
+ *   routine code, user should call \ref SysTick_Reload with ticks to reload the timer.
+ * - This function only available when __SYSTIMER_PRESENT == 1 and __ECLIC_PRESENT == 1 and __Vendor_SysTickConfig == 0
+ * \sa
+ * - \ref SysTimer_SetCompareValue; SysTimer_SetLoadValue
+ */
+__STATIC_INLINE uint32_t SysTick_Config(uint64_t ticks)
+{
+    SysTimer_SetLoadValue(0);
+    SysTimer_SetCompareValue(ticks);
+    ECLIC_SetShvIRQ(SysTimer_IRQn, ECLIC_NON_VECTOR_INTERRUPT);
+    ECLIC_SetLevelIRQ(SysTimer_IRQn, 0);
+    ECLIC_EnableIRQ(SysTimer_IRQn);
+    return (0UL);
+}
+
+/**
+ * \brief   System Tick Reload
+ * \details Reload the System Timer Tick when the MTIMECMP reached TIME value
+ *
+ * \param [in]  ticks  Number of ticks between two interrupts.
+ * \return          0  Function succeeded.
+ * \return          1  Function failed.
+ * \remarks
+ * - For \ref __NUCLEI_N_REV >= 0x0104, the CMPCLREN bit in MTIMECTL is introduced,
+ *   but for this \ref SysTick_Config function, we assume this CMPCLREN bit is set to 0,
+ *   so in interrupt handler function, user still need to set the MTIMERCMP or MTIMER to reload
+ *   the system tick, if vendor want to use this timer's auto clear feature, they can define
+ *   \ref __Vendor_SysTickConfig to 1, and implement \ref SysTick_Config and \ref SysTick_Reload functions.
+ * - When the variable \ref __Vendor_SysTickConfig is set to 1, then the
+ *   function \ref SysTick_Reload is not included.
+ * - In this case, the file <b><Device>.h</b> must contain a vendor-specific implementation
+ *   of this function.
+ * - This function only available when __SYSTIMER_PRESENT == 1 and __ECLIC_PRESENT == 1 and __Vendor_SysTickConfig == 0
+ * - Since the MTIMERCMP value might overflow, if overflowed, MTIMER will be set to 0, and MTIMERCMP set to ticks
+ * \sa
+ * - \ref SysTimer_SetCompareValue
+ * - \ref SysTimer_SetLoadValue
+ */
+__STATIC_FORCEINLINE uint32_t SysTick_Reload(uint64_t ticks)
+{
+    uint64_t cur_ticks = SysTimer->MTIMER;
+    uint64_t reload_ticks = ticks + cur_ticks;
+
+    if (__USUALLY(reload_ticks > cur_ticks)) {
+        SysTimer->MTIMERCMP = reload_ticks;
+    } else {
+        /* When added the ticks value, then the MTIMERCMP < TIMER,
+         * which means the MTIMERCMP is overflowed,
+         * so we need to reset the counter to zero */
+        SysTimer->MTIMER = 0;
+        SysTimer->MTIMERCMP = ticks;
+    }
+
+    return (0UL);
+}
+
+#endif /* defined(__Vendor_SysTickConfig) && (__Vendor_SysTickConfig == 0U) */
+/** @} */ /* End of Doxygen Group NMSIS_Core_SysTimer */
+
+#endif /* defined(__SYSTIMER_PRESENT) && (__SYSTIMER_PRESENT == 1) */
+
+#ifdef __cplusplus
+}
+#endif
+#endif /** __CORE_FEATURE_TIMER_H__  */
+
--- a/components/nmsis/core/inc/nmsis_compiler.h
+++ b/components/nmsis/core/inc/nmsis_compiler.h
@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NMSIS_COMPILER_H
+#define __NMSIS_COMPILER_H
+
+#include <stdint.h>
+
+/*!
+ * @file     nmsis_compiler.h
+ * @brief    NMSIS compiler generic header file
+ */
+#if defined ( __GNUC__ )
+  /** GNU GCC Compiler */
+  #include "nmsis_gcc.h"
+#else
+  #error Unknown compiler.
+#endif
+
+
+#endif /* __NMSIS_COMPILER_H */
+
--- a/components/nmsis/core/inc/nmsis_core.h
+++ b/components/nmsis/core/inc/nmsis_core.h
@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2009-2019 Arm Limited. All rights reserved.
+ * -- Adaptable modifications made for Nuclei Processors. --
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __NMSIS_CORE_H__
+#define __NMSIS_CORE_H__
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "nmsis_version.h"
+
+/**
+ * \ingroup NMSIS_Core_VersionControl
+ * @{
+ */
+/* The following enum __NUCLEI_N_REV/__NUCLEI_NX_REV definition in this file
+ * is only used for doxygen documentation generation,
+ * The <device>.h is the real file to define it by vendor
+ */
+#if defined(__ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__)
+/**
+ * \brief Nuclei N class core revision number
+ * \details
+ * Reversion number format: [15:8] revision number, [7:0] patch number
+ * \attention
+ * This define is exclusive with \ref __NUCLEI_NX_REV
+ */
+#define __NUCLEI_N_REV (0x0104)
+/**
+ * \brief Nuclei NX class core revision number
+ * \details
+ * Reversion number format: [15:8] revision number, [7:0] patch number
+ * \attention
+ * This define is exclusive with \ref __NUCLEI_N_REV
+ */
+#define __NUCLEI_NX_REV (0x0100)
+#endif /* __ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__ */
+/** @} */ /* End of Group NMSIS_Core_VersionControl */
+
+#include "nmsis_compiler.h" /* NMSIS compiler specific defines */
+
+/* === Include Nuclei Core Related Headers === */
+/* Include core base feature header file */
+#include "core_feature_base.h"
+
+#ifndef __NMSIS_GENERIC
+/* Include core eclic feature header file */
+#include "core_feature_eclic.h"
+/* Include core systimer feature header file */
+#include "core_feature_timer.h"
+#endif
+
+/* Include core fpu feature header file */
+#include "core_feature_fpu.h"
+/* Include core dsp feature header file */
+#include "core_feature_dsp.h"
+/* Include core pmp feature header file */
+#include "core_feature_pmp.h"
+/* Include core cache feature header file */
+#include "core_feature_cache.h"
+
+/* Include compatiable functions header file */
+#include "core_compatiable.h"
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* __NMSIS_CORE_H__ */
--- a/components/nmsis/core/inc/nmsis_gcc.h
+++ b/components/nmsis/core/inc/nmsis_gcc.h
@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NMSIS_GCC_H__
+#define __NMSIS_GCC_H__
+/*!
+ * @file     nmsis_gcc.h
+ * @brief    NMSIS compiler GCC header file
+ */
+#include <stdint.h>
+#include "riscv_encoding.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* #########################  Startup and Lowlevel Init  ######################## */
+/**
+ * \defgroup NMSIS_Core_CompilerControl    Compiler Control
+ * \ingroup  NMSIS_Core
+ * \brief    Compiler agnostic \#define symbols for generic c/c++ source code
+ * \details
+ *
+ * The NMSIS-Core provides the header file <b>nmsis_compiler.h</b> with consistent \#define symbols for generate C or C++ source files that should be compiler agnostic.
+ * Each NMSIS compliant compiler should support the functionality described in this section.
+ *
+ * The header file <b>nmsis_compiler.h</b> is also included by each Device Header File <device.h> so that these definitions are available.
+ *   @{
+ */
+/* ignore some GCC warnings */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wsign-conversion"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+
+/* Fallback for __has_builtin */
+#ifndef __has_builtin
+#define __has_builtin(x) (0)
+#endif
+
+/* NMSIS compiler specific defines */
+/** \brief Pass information from the compiler to the assembler. */
+#ifndef __ASM
+#define __ASM __asm
+#endif
+
+/** \brief Recommend that function should be inlined by the compiler. */
+#ifndef __INLINE
+#define __INLINE inline
+#endif
+
+/** \brief Define a static function that may be inlined by the compiler. */
+#ifndef __STATIC_INLINE
+#define __STATIC_INLINE static inline
+#endif
+
+/** \brief Define a static function that should be always inlined by the compiler. */
+#ifndef __STATIC_FORCEINLINE
+#define __STATIC_FORCEINLINE __attribute__((always_inline)) static inline
+#endif
+
+/** \brief Inform the compiler that a function does not return. */
+#ifndef __NO_RETURN
+#define __NO_RETURN __attribute__((__noreturn__))
+#endif
+
+/** \brief Inform that a variable shall be retained in executable image. */
+#ifndef __USED
+#define __USED __attribute__((used))
+#endif
+
+/** \brief restrict pointer qualifier to enable additional optimizations. */
+#ifndef __WEAK
+#define __WEAK __attribute__((weak))
+#endif
+
+/** \brief specified the vector size of the variable, measured in bytes */
+#ifndef __VECTOR_SIZE
+#define __VECTOR_SIZE(x) __attribute__((vector_size(x)))
+#endif
+
+/** \brief Request smallest possible alignment. */
+#ifndef __PACKED
+#define __PACKED __attribute__((packed, aligned(1)))
+#endif
+
+/** \brief Request smallest possible alignment for a structure. */
+#ifndef __PACKED_STRUCT
+#define __PACKED_STRUCT struct __attribute__((packed, aligned(1)))
+#endif
+
+/** \brief Request smallest possible alignment for a union. */
+#ifndef __PACKED_UNION
+#define __PACKED_UNION union __attribute__((packed, aligned(1)))
+#endif
+
+#ifndef __UNALIGNED_UINT16_WRITE
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpacked"
+#pragma GCC diagnostic ignored "-Wattributes"
+/** \brief Packed struct for unaligned uint16_t write access */
+__PACKED_STRUCT T_UINT16_WRITE
+{
+    uint16_t v;
+};
+#pragma GCC diagnostic pop
+/** \brief Pointer for unaligned write of a uint16_t variable. */
+#define __UNALIGNED_UINT16_WRITE(addr, val) (void)((((struct T_UINT16_WRITE *)(void *)(addr))->v) = (val))
+#endif
+
+#ifndef __UNALIGNED_UINT16_READ
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpacked"
+#pragma GCC diagnostic ignored "-Wattributes"
+/** \brief Packed struct for unaligned uint16_t read access */
+__PACKED_STRUCT T_UINT16_READ
+{
+    uint16_t v;
+};
+#pragma GCC diagnostic pop
+/** \brief Pointer for unaligned read of a uint16_t variable. */
+#define __UNALIGNED_UINT16_READ(addr) (((const struct T_UINT16_READ *)(const void *)(addr))->v)
+#endif
+
+#ifndef __UNALIGNED_UINT32_WRITE
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpacked"
+#pragma GCC diagnostic ignored "-Wattributes"
+/** \brief Packed struct for unaligned uint32_t write access */
+__PACKED_STRUCT T_UINT32_WRITE
+{
+    uint32_t v;
+};
+#pragma GCC diagnostic pop
+/** \brief Pointer for unaligned write of a uint32_t variable. */
+#define __UNALIGNED_UINT32_WRITE(addr, val) (void)((((struct T_UINT32_WRITE *)(void *)(addr))->v) = (val))
+#endif
+
+#ifndef __UNALIGNED_UINT32_READ
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpacked"
+#pragma GCC diagnostic ignored "-Wattributes"
+/** \brief Packed struct for unaligned uint32_t read access */
+__PACKED_STRUCT T_UINT32_READ
+{
+    uint32_t v;
+};
+#pragma GCC diagnostic pop
+/** \brief Pointer for unaligned read of a uint32_t variable. */
+#define __UNALIGNED_UINT32_READ(addr) (((const struct T_UINT32_READ *)(const void *)(addr))->v)
+#endif
+
+/** \brief Minimum `x` bytes alignment for a variable. */
+#ifndef __ALIGNED
+#define __ALIGNED(x) __attribute__((aligned(x)))
+#endif
+
+/** \brief restrict pointer qualifier to enable additional optimizations. */
+#ifndef __RESTRICT
+#define __RESTRICT __restrict
+#endif
+
+/** \brief Barrier to prevent compiler from reordering instructions. */
+#ifndef __COMPILER_BARRIER
+#define __COMPILER_BARRIER() __ASM volatile("" :: \
+                                                : "memory")
+#endif
+
+/** \brief provide the compiler with branch prediction information, the branch is usually true */
+#ifndef __USUALLY
+#define __USUALLY(exp) __builtin_expect((exp), 1)
+#endif
+
+/** \brief provide the compiler with branch prediction information, the branch is rarely true */
+#ifndef __RARELY
+#define __RARELY(exp) __builtin_expect((exp), 0)
+#endif
+
+/** \brief Use this attribute to indicate that the specified function is an interrupt handler. */
+#ifndef __INTERRUPT
+#define __INTERRUPT __attribute__((interrupt))
+#endif
+
+/** @} */ /* End of Doxygen Group NMSIS_Core_CompilerControl */
+
+/* IO definitions (access restrictions to peripheral registers) */
+/**
+ * \defgroup NMSIS_Core_PeriphAccess     Peripheral Access
+ * \brief  Naming conventions and optional features for accessing peripherals.
+ *
+ * The section below describes the naming conventions, requirements, and optional features
+ * for accessing device specific peripherals.
+ * Most of the rules also apply to the core peripherals.
+ *
+ * The **Device Header File <device.h>** contains typically these definition
+ * and also includes the core specific header files.
+ *
+ * @{
+ */
+/** \brief Defines 'read only' permissions */
+#ifdef __cplusplus
+#define __I volatile
+#else
+#define __I volatile const
+#endif
+/** \brief Defines 'write only' permissions */
+#define __O volatile
+/** \brief Defines 'read / write' permissions */
+#define __IO volatile
+
+/* following defines should be used for structure members */
+/** \brief Defines 'read only' structure member permissions */
+#define __IM volatile const
+/** \brief Defines 'write only' structure member permissions */
+#define __OM volatile
+/** \brief Defines 'read/write' structure member permissions */
+#define __IOM volatile
+
+/**
+ * \brief   Mask and shift a bit field value for use in a register bit range.
+ * \details The macro \ref _VAL2FLD uses the #define's _Pos and _Msk of the related bit
+ * field to shift bit-field values for assigning to a register.
+ *
+ * **Example**:
+ * \code
+ * ECLIC->CFG = _VAL2FLD(CLIC_CLICCFG_NLBIT, 3);
+ * \endcode
+ * \param[in] field  Name of the register bit field.
+ * \param[in] value  Value of the bit field. This parameter is interpreted as an uint32_t type.
+ * \return           Masked and shifted value.
+ */
+#define _VAL2FLD(field, value) (((uint32_t)(value) << field##_Pos) & field##_Msk)
+
+/**
+ * \brief   Mask and shift a register value to extract a bit filed value.
+ * \details The macro \ref _FLD2VAL uses the #define's _Pos and _Msk of the related bit
+ * field to extract the value of a bit field from a register.
+ *
+ * **Example**:
+ * \code
+ * nlbits = _FLD2VAL(CLIC_CLICCFG_NLBIT, ECLIC->CFG);
+ * \endcode
+ * \param[in] field  Name of the register bit field.
+ * \param[in] value  Value of register. This parameter is interpreted as an uint32_t type.
+ * \return           Masked and shifted bit field value.
+ */
+#define _FLD2VAL(field, value) (((uint32_t)(value)&field##_Msk) >> field##_Pos)
+
+    /** @} */ /* end of group NMSIS_Core_PeriphAccess */
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* __NMSIS_GCC_H__ */
--- a/components/nmsis/core/inc/nmsis_version.h
+++ b/components/nmsis/core/inc/nmsis_version.h
@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __NMSIS_VERSION_H
+#define __NMSIS_VERSION_H
+
+/**
+ * \defgroup NMSIS_Core_VersionControl    Version Control
+ * \ingroup  NMSIS_Core
+ * \brief    Version \#define symbols for NMSIS release specific C/C++ source code
+ * \details
+ *
+ * We followed the [semantic versioning 2.0.0](https://semver.org/) to control NMSIS version.
+ * The version format is **MAJOR.MINOR.PATCH**, increment the:
+ * 1. MAJOR version when you make incompatible API changes,
+ * 2. MINOR version when you add functionality in a backwards compatible manner, and
+ * 3. PATCH version when you make backwards compatible bug fixes.
+ *
+ * The header file `nmsis_version.h` is included by each core header so that these definitions are available.
+ *
+ * **Example Usage for NMSIS Version Check**:
+ * \code
+ *   #if defined(__NMSIS_VERSION) && (__NMSIS_VERSION >= 0x00010105)
+ *      #warning "Yes, we have NMSIS 1.1.5 or later"
+ *   #else
+ *      #error "We need NMSIS 1.1.5 or later!"
+ *   #endif
+ * \endcode
+ *
+ * @{
+ */
+
+/*!
+ * \file     nmsis_version.h
+ * \brief    NMSIS Version definitions
+ **/
+
+/**
+ * \brief   Represent the NMSIS major version
+ * \details
+ * The NMSIS major version can be used to
+ * differentiate between NMSIS major releases.
+ * */
+#define __NMSIS_VERSION_MAJOR            (1U)
+
+/**
+ * \brief   Represent the NMSIS minor version
+ * \details
+ * The NMSIS minor version can be used to
+ * query a NMSIS release update including new features.
+ *
+ **/
+#define __NMSIS_VERSION_MINOR            (0U)
+
+/**
+ * \brief   Represent the NMSIS patch version
+ * \details
+ * The NMSIS patch version can be used to
+ * show bug fixes in this package.
+ **/
+#define __NMSIS_VERSION_PATCH            (1U)
+/**
+ * \brief   Represent the NMSIS Version
+ * \details
+ * NMSIS Version format: **MAJOR.MINOR.PATCH**
+ * * MAJOR: \ref __NMSIS_VERSION_MAJOR, stored in `bits [31:16]` of \ref __NMSIS_VERSION
+ * * MINOR: \ref __NMSIS_VERSION_MINOR, stored in `bits [15:8]` of \ref __NMSIS_VERSION
+ * * PATCH: \ref __NMSIS_VERSION_PATCH, stored in `bits [7:0]` of \ref __NMSIS_VERSION
+ **/
+#define __NMSIS_VERSION                  ((__NMSIS_VERSION_MAJOR << 16U) | (__NMSIS_VERSION_MINOR << 8) | __NMSIS_VERSION_PATCH)
+
+/** @} */ /* End of Doxygen Group NMSIS_Core_VersionControl */
+#endif
--- a/components/nmsis/core/inc/riscv_bits.h
+++ b/components/nmsis/core/inc/riscv_bits.h
@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __RISCV_BITS_H__
+#define __RISCV_BITS_H__
+
+#ifdef __cplusplus
+ extern "C" {
+#endif
+
+#if __riscv_xlen == 64
+# define SLL32                  sllw
+# define STORE                  sd
+# define LOAD                   ld
+# define LWU                    lwu
+# define LOG_REGBYTES           3
+#else
+# define SLL32                  sll
+# define STORE                  sw
+# define LOAD                   lw
+# define LWU                    lw
+# define LOG_REGBYTES           2
+#endif /* __riscv_xlen */
+
+#define REGBYTES (1 << LOG_REGBYTES)
+
+#if defined(__riscv_flen)
+#if __riscv_flen == 64
+# define FPSTORE                fsd
+# define FPLOAD                 fld
+# define LOG_FPREGBYTES         3
+#else
+# define FPSTORE                fsw
+# define FPLOAD                 flw
+# define LOG_FPREGBYTES         2
+#endif /* __riscv_flen == 64 */
+#define FPREGBYTES              (1 << LOG_FPREGBYTES)
+#endif /* __riscv_flen */
+
+#define __rv_likely(x)          __builtin_expect((x), 1)
+#define __rv_unlikely(x)        __builtin_expect((x), 0)
+
+#define __RV_ROUNDUP(a, b)      ((((a)-1)/(b)+1)*(b))
+#define __RV_ROUNDDOWN(a, b)    ((a)/(b)*(b))
+
+#define __RV_MAX(a, b)          ((a) > (b) ? (a) : (b))
+#define __RV_MIN(a, b)          ((a) < (b) ? (a) : (b))
+#define __RV_CLAMP(a, lo, hi)   MIN(MAX(a, lo), hi)
+
+#define __RV_EXTRACT_FIELD(val, which)                  (((val) & (which)) / ((which) & ~((which)-1)))
+#define __RV_INSERT_FIELD(val, which, fieldval)         (((val) & ~(which)) | ((fieldval) * ((which) & ~((which)-1))))
+
+#ifdef __ASSEMBLY__
+#define _AC(X,Y)                X
+#define _AT(T,X)                X
+#else
+#define __AC(X,Y)               (X##Y)
+#define _AC(X,Y)                __AC(X,Y)
+#define _AT(T,X)                ((T)(X))
+#endif /* __ASSEMBLY__ */
+
+#define _UL(x)                  (_AC(x, UL))
+#define _ULL(x)                 (_AC(x, ULL))
+
+#define _BITUL(x)               (_UL(1) << (x))
+#define _BITULL(x)              (_ULL(1) << (x))
+
+#define UL(x)                   (_UL(x))
+#define ULL(x)                  (_ULL(x))
+
+#define STR(x)                  XSTR(x)
+#define XSTR(x)                 #x
+#define __STR(s)                #s
+#define STRINGIFY(s)            __STR(s)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /** __RISCV_BITS_H__  */
--- a/components/nmsis/core/inc/riscv_encoding.h
+++ b/components/nmsis/core/inc/riscv_encoding.h
@ -0,0 +1,690 @@
+/*
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __RISCV_ENCODING_H__
+#define __RISCV_ENCODING_H__
+
+#include "riscv_bits.h"
+#ifdef __cplusplus
+ extern "C" {
+#endif
+/**
+ * \defgroup NMSIS_Core_CSR_Encoding    Core CSR Encodings
+ * \ingroup  NMSIS_Core
+ * \brief    NMSIS Core CSR Encodings
+ * \details
+ *
+ * The following macros are used for CSR encodings
+ *   @{
+ */
+/* === Standard CSR bit mask === */
+#define MSTATUS_UIE         0x00000001
+#define MSTATUS_SIE         0x00000002
+#define MSTATUS_HIE         0x00000004
+#define MSTATUS_MIE         0x00000008
+#define MSTATUS_UPIE        0x00000010
+#define MSTATUS_SPIE        0x00000020
+#define MSTATUS_HPIE        0x00000040
+#define MSTATUS_MPIE        0x00000080
+#define MSTATUS_SPP         0x00000100
+#define MSTATUS_MPP         0x00001800
+#define MSTATUS_FS          0x00006000
+#define MSTATUS_XS          0x00018000
+#define MSTATUS_MPRV        0x00020000
+#define MSTATUS_PUM         0x00040000
+#define MSTATUS_MXR         0x00080000
+#define MSTATUS_VM          0x1F000000
+#define MSTATUS32_SD        0x80000000
+#define MSTATUS64_SD        0x8000000000000000
+
+#define MSTATUS_FS_INITIAL  0x00002000
+#define MSTATUS_FS_CLEAN    0x00004000
+#define MSTATUS_FS_DIRTY    0x00006000
+
+#define SSTATUS_UIE         0x00000001
+#define SSTATUS_SIE         0x00000002
+#define SSTATUS_UPIE        0x00000010
+#define SSTATUS_SPIE        0x00000020
+#define SSTATUS_SPP         0x00000100
+#define SSTATUS_FS          0x00006000
+#define SSTATUS_XS          0x00018000
+#define SSTATUS_PUM         0x00040000
+#define SSTATUS32_SD        0x80000000
+#define SSTATUS64_SD        0x8000000000000000
+
+#define CSR_MCACHE_CTL_IE   0x00000001
+#define CSR_MCACHE_CTL_DE   0x00010000
+
+#define DCSR_XDEBUGVER      (3U<<30)
+#define DCSR_NDRESET        (1<<29)
+#define DCSR_FULLRESET      (1<<28)
+#define DCSR_EBREAKM        (1<<15)
+#define DCSR_EBREAKH        (1<<14)
+#define DCSR_EBREAKS        (1<<13)
+#define DCSR_EBREAKU        (1<<12)
+#define DCSR_STOPCYCLE      (1<<10)
+#define DCSR_STOPTIME       (1<<9)
+#define DCSR_CAUSE          (7<<6)
+#define DCSR_DEBUGINT       (1<<5)
+#define DCSR_HALT           (1<<3)
+#define DCSR_STEP           (1<<2)
+#define DCSR_PRV            (3<<0)
+
+#define DCSR_CAUSE_NONE     0
+#define DCSR_CAUSE_SWBP     1
+#define DCSR_CAUSE_HWBP     2
+#define DCSR_CAUSE_DEBUGINT 3
+#define DCSR_CAUSE_STEP     4
+#define DCSR_CAUSE_HALT     5
+
+#define MCONTROL_TYPE(xlen)    (0xfULL<<((xlen)-4))
+#define MCONTROL_DMODE(xlen)   (1ULL<<((xlen)-5))
+#define MCONTROL_MASKMAX(xlen) (0x3fULL<<((xlen)-11))
+
+#define MCONTROL_SELECT     (1<<19)
+#define MCONTROL_TIMING     (1<<18)
+#define MCONTROL_ACTION     (0x3f<<12)
+#define MCONTROL_CHAIN      (1<<11)
+#define MCONTROL_MATCH      (0xf<<7)
+#define MCONTROL_M          (1<<6)
+#define MCONTROL_H          (1<<5)
+#define MCONTROL_S          (1<<4)
+#define MCONTROL_U          (1<<3)
+#define MCONTROL_EXECUTE    (1<<2)
+#define MCONTROL_STORE      (1<<1)
+#define MCONTROL_LOAD       (1<<0)
+
+#define MCONTROL_TYPE_NONE      0
+#define MCONTROL_TYPE_MATCH     2
+
+#define MCONTROL_ACTION_DEBUG_EXCEPTION   0
+#define MCONTROL_ACTION_DEBUG_MODE        1
+#define MCONTROL_ACTION_TRACE_START       2
+#define MCONTROL_ACTION_TRACE_STOP        3
+#define MCONTROL_ACTION_TRACE_EMIT        4
+
+#define MCONTROL_MATCH_EQUAL     0
+#define MCONTROL_MATCH_NAPOT     1
+#define MCONTROL_MATCH_GE        2
+#define MCONTROL_MATCH_LT        3
+#define MCONTROL_MATCH_MASK_LOW  4
+#define MCONTROL_MATCH_MASK_HIGH 5
+
+#define MIP_SSIP            (1 << IRQ_S_SOFT)
+#define MIP_HSIP            (1 << IRQ_H_SOFT)
+#define MIP_MSIP            (1 << IRQ_M_SOFT)
+#define MIP_STIP            (1 << IRQ_S_TIMER)
+#define MIP_HTIP            (1 << IRQ_H_TIMER)
+#define MIP_MTIP            (1 << IRQ_M_TIMER)
+#define MIP_SEIP            (1 << IRQ_S_EXT)
+#define MIP_HEIP            (1 << IRQ_H_EXT)
+#define MIP_MEIP            (1 << IRQ_M_EXT)
+
+#define MIE_SSIE            MIP_SSIP
+#define MIE_HSIE            MIP_HSIP
+#define MIE_MSIE            MIP_MSIP
+#define MIE_STIE            MIP_STIP
+#define MIE_HTIE            MIP_HTIP
+#define MIE_MTIE            MIP_MTIP
+#define MIE_SEIE            MIP_SEIP
+#define MIE_HEIE            MIP_HEIP
+#define MIE_MEIE            MIP_MEIP
+
+/* === P-ext CSR bit mask === */
+
+#define UCODE_OV            (0x1)
+
+/* === Nuclei custom CSR bit mask === */
+
+#define WFE_WFE                     (0x1)
+#define TXEVT_TXEVT                 (0x1)
+#define SLEEPVALUE_SLEEPVALUE       (0x1)
+
+#define MCOUNTINHIBIT_IR            (1<<2)
+#define MCOUNTINHIBIT_CY            (1<<0)
+
+#define MILM_CTL_ILM_BPA            (((1ULL<<((__riscv_xlen)-10))-1)<<10)
+#define MILM_CTL_ILM_RWECC          (1<<3)
+#define MILM_CTL_ILM_ECC_EXCP_EN    (1<<2)
+#define MILM_CTL_ILM_ECC_EN         (1<<1)
+#define MILM_CTL_ILM_EN             (1<<0)
+
+#define MDLM_CTL_DLM_BPA            (((1ULL<<((__riscv_xlen)-10))-1)<<10)
+#define MDLM_CTL_DLM_RWECC          (1<<3)
+#define MDLM_CTL_DLM_ECC_EXCP_EN    (1<<2)
+#define MDLM_CTL_DLM_ECC_EN         (1<<1)
+#define MDLM_CTL_DLM_EN             (1<<0)
+
+#define MSUBM_PTYP                  (0x3<<8)
+#define MSUBM_TYP                   (0x3<<6)
+
+#define MDCAUSE_MDCAUSE             (0x3)
+
+#define MMISC_CTL_NMI_CAUSE_FFF     (1<<9)
+#define MMISC_CTL_MISALIGN          (1<<6)
+#define MMISC_CTL_BPU               (1<<3)
+
+#define MCACHE_CTL_IC_EN            (1<<0)
+#define MCACHE_CTL_IC_SCPD_MOD      (1<<1)
+#define MCACHE_CTL_IC_ECC_EN        (1<<2)
+#define MCACHE_CTL_IC_ECC_EXCP_EN   (1<<3)
+#define MCACHE_CTL_IC_RWTECC        (1<<4)
+#define MCACHE_CTL_IC_RWDECC        (1<<5)
+#define MCACHE_CTL_DC_EN            (1<<16)
+#define MCACHE_CTL_DC_ECC_EN        (1<<17)
+#define MCACHE_CTL_DC_ECC_EXCP_EN   (1<<18)
+#define MCACHE_CTL_DC_RWTECC        (1<<19)
+#define MCACHE_CTL_DC_RWDECC        (1<<20)
+
+#define MTVT2_MTVT2EN               (1<<0)
+#define MTVT2_COMMON_CODE_ENTRY     (((1ULL<<((__riscv_xlen)-2))-1)<<2)
+
+#define MCFG_INFO_TEE               (1<<0)
+#define MCFG_INFO_ECC               (1<<1)
+#define MCFG_INFO_CLIC              (1<<2)
+#define MCFG_INFO_PLIC              (1<<3)
+#define MCFG_INFO_FIO               (1<<4)
+#define MCFG_INFO_PPI               (1<<5)
+#define MCFG_INFO_NICE              (1<<6)
+#define MCFG_INFO_ILM               (1<<7)
+#define MCFG_INFO_DLM               (1<<8)
+#define MCFG_INFO_ICACHE            (1<<9)
+#define MCFG_INFO_DCACHE            (1<<10)
+
+#define MICFG_IC_SET                (0xF<<0)
+#define MICFG_IC_WAY                (0x7<<4)
+#define MICFG_IC_LSIZE              (0x7<<7)
+#define MICFG_IC_ECC                (0x1<<10)
+#define MICFG_ILM_SIZE              (0x1F<<16)
+#define MICFG_ILM_XONLY             (0x1<<21)
+#define MICFG_ILM_ECC               (0x1<<22)
+
+#define MDCFG_DC_SET                (0xF<<0)
+#define MDCFG_DC_WAY                (0x7<<4)
+#define MDCFG_DC_LSIZE              (0x7<<7)
+#define MDCFG_DC_ECC                (0x1<<10)
+#define MDCFG_DLM_SIZE              (0x1F<<16)
+#define MDCFG_DLM_ECC               (0x1<<21)
+
+#define MPPICFG_INFO_PPI_SIZE       (0x1F<<1)
+#define MPPICFG_INFO_PPI_BPA        (((1ULL<<((__riscv_xlen)-10))-1)<<10)
+
+#define MFIOCFG_INFO_FIO_SIZE       (0x1F<<1)
+#define MFIOCFG_INFO_FIO_BPA        (((1ULL<<((__riscv_xlen)-10))-1)<<10)
+
+#define MECC_LOCK_ECC_LOCK          (0x1)
+
+#define MECC_CODE_CODE              (0x1FF)
+#define MECC_CODE_RAMID             (0x1F<<16)
+#define MECC_CODE_SRAMID            (0x1F<<24)
+
+#define CCM_SUEN_SUEN               (0x1<<0)
+#define CCM_DATA_DATA               (0x7<<0)
+#define CCM_COMMAND_COMMAND         (0x1F<<0)
+
+#define SIP_SSIP MIP_SSIP
+#define SIP_STIP MIP_STIP
+
+#define PRV_U 0
+#define PRV_S 1
+#define PRV_H 2
+#define PRV_M 3
+
+#define VM_MBARE 0
+#define VM_MBB   1
+#define VM_MBBID 2
+#define VM_SV32  8
+#define VM_SV39  9
+#define VM_SV48  10
+
+#define IRQ_S_SOFT   1
+#define IRQ_H_SOFT   2
+#define IRQ_M_SOFT   3
+#define IRQ_S_TIMER  5
+#define IRQ_H_TIMER  6
+#define IRQ_M_TIMER  7
+#define IRQ_S_EXT    9
+#define IRQ_H_EXT    10
+#define IRQ_M_EXT    11
+#define IRQ_COP      12
+#define IRQ_HOST     13
+
+
+/* === FPU FRM Rounding Mode === */
+/** FPU Round to Nearest, ties to Even*/
+#define FRM_RNDMODE_RNE     0x0
+/** FPU Round Towards Zero */
+#define FRM_RNDMODE_RTZ     0x1
+/** FPU Round Down (towards -inf) */
+#define FRM_RNDMODE_RDN     0x2
+/** FPU Round Up (towards +inf) */
+#define FRM_RNDMODE_RUP     0x3
+/** FPU Round to nearest, ties to Max Magnitude */
+#define FRM_RNDMODE_RMM     0x4
+/**
+ * In instruction's rm, selects dynamic rounding mode.
+ * In Rounding Mode register, Invalid */
+#define FRM_RNDMODE_DYN     0x7
+
+/* === FPU FFLAGS Accrued Exceptions === */
+/** FPU Inexact */
+#define FFLAGS_AE_NX        (1<<0)
+/** FPU Underflow */
+#define FFLAGS_AE_UF        (1<<1)
+/** FPU Overflow */
+#define FFLAGS_AE_OF        (1<<2)
+/** FPU Divide by Zero */
+#define FFLAGS_AE_DZ        (1<<3)
+/** FPU Invalid Operation */
+#define FFLAGS_AE_NV        (1<<4)
+
+/** Floating Point Register f0-f31, eg. f0 -> FREG(0) */
+#define FREG(idx)           f##idx
+
+
+/* === PMP CFG Bits === */
+#define PMP_R                0x01
+#define PMP_W                0x02
+#define PMP_X                0x04
+#define PMP_A                0x18
+#define PMP_A_TOR            0x08
+#define PMP_A_NA4            0x10
+#define PMP_A_NAPOT          0x18
+#define PMP_L                0x80
+
+#define PMP_SHIFT            2
+#define PMP_COUNT            16
+
+// page table entry (PTE) fields
+#define PTE_V     0x001 // Valid
+#define PTE_R     0x002 // Read
+#define PTE_W     0x004 // Write
+#define PTE_X     0x008 // Execute
+#define PTE_U     0x010 // User
+#define PTE_G     0x020 // Global
+#define PTE_A     0x040 // Accessed
+#define PTE_D     0x080 // Dirty
+#define PTE_SOFT  0x300 // Reserved for Software
+
+#define PTE_PPN_SHIFT 10
+
+#define PTE_TABLE(PTE) (((PTE) & (PTE_V | PTE_R | PTE_W | PTE_X)) == PTE_V)
+
+#ifdef __riscv
+
+#ifdef __riscv64
+# define MSTATUS_SD MSTATUS64_SD
+# define SSTATUS_SD SSTATUS64_SD
+# define RISCV_PGLEVEL_BITS 9
+#else
+# define MSTATUS_SD MSTATUS32_SD
+# define SSTATUS_SD SSTATUS32_SD
+# define RISCV_PGLEVEL_BITS 10
+#endif /* __riscv64 */
+
+#define RISCV_PGSHIFT 12
+#define RISCV_PGSIZE (1 << RISCV_PGSHIFT)
+
+#endif /* __riscv */
+
+/**
+ * \defgroup NMSIS_Core_CSR_Registers    Core CSR Registers
+ * \ingroup  NMSIS_Core
+ * \brief    NMSIS Core CSR Register Definitions
+ * \details
+ *
+ * The following macros are used for CSR Register Defintions.
+ *   @{
+ */
+/* === Standard RISC-V CSR Registers === */
+#define CSR_USTATUS 0x0
+#define CSR_FFLAGS 0x1
+#define CSR_FRM 0x2
+#define CSR_FCSR 0x3
+#define CSR_CYCLE 0xc00
+#define CSR_TIME 0xc01
+#define CSR_INSTRET 0xc02
+#define CSR_HPMCOUNTER3 0xc03
+#define CSR_HPMCOUNTER4 0xc04
+#define CSR_HPMCOUNTER5 0xc05
+#define CSR_HPMCOUNTER6 0xc06
+#define CSR_HPMCOUNTER7 0xc07
+#define CSR_HPMCOUNTER8 0xc08
+#define CSR_HPMCOUNTER9 0xc09
+#define CSR_HPMCOUNTER10 0xc0a
+#define CSR_HPMCOUNTER11 0xc0b
+#define CSR_HPMCOUNTER12 0xc0c
+#define CSR_HPMCOUNTER13 0xc0d
+#define CSR_HPMCOUNTER14 0xc0e
+#define CSR_HPMCOUNTER15 0xc0f
+#define CSR_HPMCOUNTER16 0xc10
+#define CSR_HPMCOUNTER17 0xc11
+#define CSR_HPMCOUNTER18 0xc12
+#define CSR_HPMCOUNTER19 0xc13
+#define CSR_HPMCOUNTER20 0xc14
+#define CSR_HPMCOUNTER21 0xc15
+#define CSR_HPMCOUNTER22 0xc16
+#define CSR_HPMCOUNTER23 0xc17
+#define CSR_HPMCOUNTER24 0xc18
+#define CSR_HPMCOUNTER25 0xc19
+#define CSR_HPMCOUNTER26 0xc1a
+#define CSR_HPMCOUNTER27 0xc1b
+#define CSR_HPMCOUNTER28 0xc1c
+#define CSR_HPMCOUNTER29 0xc1d
+#define CSR_HPMCOUNTER30 0xc1e
+#define CSR_HPMCOUNTER31 0xc1f
+#define CSR_SSTATUS 0x100
+#define CSR_SIE 0x104
+#define CSR_STVEC 0x105
+#define CSR_SSCRATCH 0x140
+#define CSR_SEPC 0x141
+#define CSR_SCAUSE 0x142
+#define CSR_SBADADDR 0x143
+#define CSR_SIP 0x144
+#define CSR_SPTBR 0x180
+#define CSR_MSTATUS 0x300
+#define CSR_MISA 0x301
+#define CSR_MEDELEG 0x302
+#define CSR_MIDELEG 0x303
+#define CSR_MIE 0x304
+#define CSR_MTVEC 0x305
+#define CSR_MCOUNTEREN 0x306
+#define CSR_MSCRATCH 0x340
+#define CSR_MEPC 0x341
+#define CSR_MCAUSE 0x342
+#define CSR_MBADADDR 0x343
+#define CSR_MTVAL 0x343
+#define CSR_MIP 0x344
+#define CSR_PMPCFG0 0x3a0
+#define CSR_PMPCFG1 0x3a1
+#define CSR_PMPCFG2 0x3a2
+#define CSR_PMPCFG3 0x3a3
+#define CSR_PMPADDR0 0x3b0
+#define CSR_PMPADDR1 0x3b1
+#define CSR_PMPADDR2 0x3b2
+#define CSR_PMPADDR3 0x3b3
+#define CSR_PMPADDR4 0x3b4
+#define CSR_PMPADDR5 0x3b5
+#define CSR_PMPADDR6 0x3b6
+#define CSR_PMPADDR7 0x3b7
+#define CSR_PMPADDR8 0x3b8
+#define CSR_PMPADDR9 0x3b9
+#define CSR_PMPADDR10 0x3ba
+#define CSR_PMPADDR11 0x3bb
+#define CSR_PMPADDR12 0x3bc
+#define CSR_PMPADDR13 0x3bd
+#define CSR_PMPADDR14 0x3be
+#define CSR_PMPADDR15 0x3bf
+#define CSR_TSELECT 0x7a0
+#define CSR_TDATA1 0x7a1
+#define CSR_TDATA2 0x7a2
+#define CSR_TDATA3 0x7a3
+#define CSR_DCSR 0x7b0
+#define CSR_DPC 0x7b1
+#define CSR_DSCRATCH 0x7b2
+#define CSR_MCYCLE 0xb00
+#define CSR_MINSTRET 0xb02
+#define CSR_MHPMCOUNTER3 0xb03
+#define CSR_MHPMCOUNTER4 0xb04
+#define CSR_MHPMCOUNTER5 0xb05
+#define CSR_MHPMCOUNTER6 0xb06
+#define CSR_MHPMCOUNTER7 0xb07
+#define CSR_MHPMCOUNTER8 0xb08
+#define CSR_MHPMCOUNTER9 0xb09
+#define CSR_MHPMCOUNTER10 0xb0a
+#define CSR_MHPMCOUNTER11 0xb0b
+#define CSR_MHPMCOUNTER12 0xb0c
+#define CSR_MHPMCOUNTER13 0xb0d
+#define CSR_MHPMCOUNTER14 0xb0e
+#define CSR_MHPMCOUNTER15 0xb0f
+#define CSR_MHPMCOUNTER16 0xb10
+#define CSR_MHPMCOUNTER17 0xb11
+#define CSR_MHPMCOUNTER18 0xb12
+#define CSR_MHPMCOUNTER19 0xb13
+#define CSR_MHPMCOUNTER20 0xb14
+#define CSR_MHPMCOUNTER21 0xb15
+#define CSR_MHPMCOUNTER22 0xb16
+#define CSR_MHPMCOUNTER23 0xb17
+#define CSR_MHPMCOUNTER24 0xb18
+#define CSR_MHPMCOUNTER25 0xb19
+#define CSR_MHPMCOUNTER26 0xb1a
+#define CSR_MHPMCOUNTER27 0xb1b
+#define CSR_MHPMCOUNTER28 0xb1c
+#define CSR_MHPMCOUNTER29 0xb1d
+#define CSR_MHPMCOUNTER30 0xb1e
+#define CSR_MHPMCOUNTER31 0xb1f
+#define CSR_MUCOUNTEREN 0x320
+#define CSR_MSCOUNTEREN 0x321
+#define CSR_MHPMEVENT3 0x323
+#define CSR_MHPMEVENT4 0x324
+#define CSR_MHPMEVENT5 0x325
+#define CSR_MHPMEVENT6 0x326
+#define CSR_MHPMEVENT7 0x327
+#define CSR_MHPMEVENT8 0x328
+#define CSR_MHPMEVENT9 0x329
+#define CSR_MHPMEVENT10 0x32a
+#define CSR_MHPMEVENT11 0x32b
+#define CSR_MHPMEVENT12 0x32c
+#define CSR_MHPMEVENT13 0x32d
+#define CSR_MHPMEVENT14 0x32e
+#define CSR_MHPMEVENT15 0x32f
+#define CSR_MHPMEVENT16 0x330
+#define CSR_MHPMEVENT17 0x331
+#define CSR_MHPMEVENT18 0x332
+#define CSR_MHPMEVENT19 0x333
+#define CSR_MHPMEVENT20 0x334
+#define CSR_MHPMEVENT21 0x335
+#define CSR_MHPMEVENT22 0x336
+#define CSR_MHPMEVENT23 0x337
+#define CSR_MHPMEVENT24 0x338
+#define CSR_MHPMEVENT25 0x339
+#define CSR_MHPMEVENT26 0x33a
+#define CSR_MHPMEVENT27 0x33b
+#define CSR_MHPMEVENT28 0x33c
+#define CSR_MHPMEVENT29 0x33d
+#define CSR_MHPMEVENT30 0x33e
+#define CSR_MHPMEVENT31 0x33f
+#define CSR_MVENDORID 0xf11
+#define CSR_MARCHID 0xf12
+#define CSR_MIMPID 0xf13
+#define CSR_MHARTID 0xf14
+#define CSR_CYCLEH 0xc80
+#define CSR_TIMEH 0xc81
+#define CSR_INSTRETH 0xc82
+#define CSR_HPMCOUNTER3H 0xc83
+#define CSR_HPMCOUNTER4H 0xc84
+#define CSR_HPMCOUNTER5H 0xc85
+#define CSR_HPMCOUNTER6H 0xc86
+#define CSR_HPMCOUNTER7H 0xc87
+#define CSR_HPMCOUNTER8H 0xc88
+#define CSR_HPMCOUNTER9H 0xc89
+#define CSR_HPMCOUNTER10H 0xc8a
+#define CSR_HPMCOUNTER11H 0xc8b
+#define CSR_HPMCOUNTER12H 0xc8c
+#define CSR_HPMCOUNTER13H 0xc8d
+#define CSR_HPMCOUNTER14H 0xc8e
+#define CSR_HPMCOUNTER15H 0xc8f
+#define CSR_HPMCOUNTER16H 0xc90
+#define CSR_HPMCOUNTER17H 0xc91
+#define CSR_HPMCOUNTER18H 0xc92
+#define CSR_HPMCOUNTER19H 0xc93
+#define CSR_HPMCOUNTER20H 0xc94
+#define CSR_HPMCOUNTER21H 0xc95
+#define CSR_HPMCOUNTER22H 0xc96
+#define CSR_HPMCOUNTER23H 0xc97
+#define CSR_HPMCOUNTER24H 0xc98
+#define CSR_HPMCOUNTER25H 0xc99
+#define CSR_HPMCOUNTER26H 0xc9a
+#define CSR_HPMCOUNTER27H 0xc9b
+#define CSR_HPMCOUNTER28H 0xc9c
+#define CSR_HPMCOUNTER29H 0xc9d
+#define CSR_HPMCOUNTER30H 0xc9e
+#define CSR_HPMCOUNTER31H 0xc9f
+#define CSR_MCYCLEH 0xb80
+#define CSR_MINSTRETH 0xb82
+#define CSR_MHPMCOUNTER3H 0xb83
+#define CSR_MHPMCOUNTER4H 0xb84
+#define CSR_MHPMCOUNTER5H 0xb85
+#define CSR_MHPMCOUNTER6H 0xb86
+#define CSR_MHPMCOUNTER7H 0xb87
+#define CSR_MHPMCOUNTER8H 0xb88
+#define CSR_MHPMCOUNTER9H 0xb89
+#define CSR_MHPMCOUNTER10H 0xb8a
+#define CSR_MHPMCOUNTER11H 0xb8b
+#define CSR_MHPMCOUNTER12H 0xb8c
+#define CSR_MHPMCOUNTER13H 0xb8d
+#define CSR_MHPMCOUNTER14H 0xb8e
+#define CSR_MHPMCOUNTER15H 0xb8f
+#define CSR_MHPMCOUNTER16H 0xb90
+#define CSR_MHPMCOUNTER17H 0xb91
+#define CSR_MHPMCOUNTER18H 0xb92
+#define CSR_MHPMCOUNTER19H 0xb93
+#define CSR_MHPMCOUNTER20H 0xb94
+#define CSR_MHPMCOUNTER21H 0xb95
+#define CSR_MHPMCOUNTER22H 0xb96
+#define CSR_MHPMCOUNTER23H 0xb97
+#define CSR_MHPMCOUNTER24H 0xb98
+#define CSR_MHPMCOUNTER25H 0xb99
+#define CSR_MHPMCOUNTER26H 0xb9a
+#define CSR_MHPMCOUNTER27H 0xb9b
+#define CSR_MHPMCOUNTER28H 0xb9c
+#define CSR_MHPMCOUNTER29H 0xb9d
+#define CSR_MHPMCOUNTER30H 0xb9e
+#define CSR_MHPMCOUNTER31H 0xb9f
+
+/* === TEE CSR Registers === */
+#define CSR_SPMPCFG0            0x1A0
+#define CSR_SPMPCFG1            0x1A1
+#define CSR_SPMPCFG2            0x1A2
+#define CSR_SPMPCFG3            0x1A3
+#define CSR_SPMPADDR0           0x1B0
+#define CSR_SPMPADDR1           0x1B1
+#define CSR_SPMPADDR2           0x1B2
+#define CSR_SPMPADDR3           0x1B3
+#define CSR_SPMPADDR4           0x1B4
+#define CSR_SPMPADDR5           0x1B5
+#define CSR_SPMPADDR6           0x1B6
+#define CSR_SPMPADDR7           0x1B7
+#define CSR_SPMPADDR8           0x1B8
+#define CSR_SPMPADDR9           0x1B9
+#define CSR_SPMPADDR10          0x1BA
+#define CSR_SPMPADDR11          0x1BB
+#define CSR_SPMPADDR12          0x1BC
+#define CSR_SPMPADDR13          0x1BD
+#define CSR_SPMPADDR14          0x1BE
+#define CSR_SPMPADDR15          0x1BF
+
+#define CSR_JALSNXTI            0x947
+#define CSR_STVT2               0x948
+#define CSR_PUSHSCAUSE          0x949
+#define CSR_PUSHSEPC            0x94A
+
+
+/* === CLIC CSR Registers === */
+#define CSR_MTVT                0x307
+#define CSR_MNXTI               0x345
+#define CSR_MINTSTATUS          0x346
+#define CSR_MSCRATCHCSW         0x348
+#define CSR_MSCRATCHCSWL        0x349
+#define CSR_MCLICBASE           0x350
+
+/* === P-Extension Registers === */
+#define CSR_UCODE               0x801
+
+/* === Nuclei custom CSR Registers === */
+#define CSR_MCOUNTINHIBIT       0x320
+#define CSR_MILM_CTL            0x7C0
+#define CSR_MDLM_CTL            0x7C1
+#define CSR_MECC_CODE           0x7C2
+#define CSR_MNVEC               0x7C3
+#define CSR_MSUBM               0x7C4
+#define CSR_MDCAUSE             0x7C9
+#define CSR_MCACHE_CTL          0x7CA
+#define CSR_MMISC_CTL           0x7D0
+#define CSR_MSAVESTATUS         0x7D6
+#define CSR_MSAVEEPC1           0x7D7
+#define CSR_MSAVECAUSE1         0x7D8
+#define CSR_MSAVEEPC2           0x7D9
+#define CSR_MSAVECAUSE2         0x7DA
+#define CSR_MSAVEDCAUSE1        0x7DB
+#define CSR_MSAVEDCAUSE2        0x7DC
+#define CSR_MTLB_CTL            0x7DD
+#define CSR_MECC_LOCK           0x7DE
+#define CSR_MFP16MODE           0x7E2
+#define CSR_LSTEPFORC           0x7E9
+#define CSR_PUSHMSUBM           0x7EB
+#define CSR_MTVT2               0x7EC
+#define CSR_JALMNXTI            0x7ED
+#define CSR_PUSHMCAUSE          0x7EE
+#define CSR_PUSHMEPC            0x7EF
+#define CSR_MPPICFG_INFO        0x7F0
+#define CSR_MFIOCFG_INFO        0x7F1
+#define CSR_MSMPCFG_INFO        0x7F7
+#define CSR_SLEEPVALUE          0x811
+#define CSR_TXEVT               0x812
+#define CSR_WFE                 0x810
+#define CSR_MICFG_INFO          0xFC0
+#define CSR_MDCFG_INFO          0xFC1
+#define CSR_MCFG_INFO           0xFC2
+#define CSR_MTLBCFG_INFO        0xFC3
+
+/* === Nuclei CCM Registers === */
+#define CSR_CCM_MBEGINADDR      0x7CB
+#define CSR_CCM_MCOMMAND        0x7CC
+#define CSR_CCM_MDATA           0x7CD
+#define CSR_CCM_SUEN            0x7CE
+#define CSR_CCM_SBEGINADDR      0x5CB
+#define CSR_CCM_SCOMMAND        0x5CC
+#define CSR_CCM_SDATA           0x5CD
+#define CSR_CCM_UBEGINADDR      0x4CB
+#define CSR_CCM_UCOMMAND        0x4CC
+#define CSR_CCM_UDATA           0x4CD
+#define CSR_CCM_FPIPE           0x4CF
+
+/** @} */ /** End of Doxygen Group NMSIS_Core_CSR_Registers **/
+
+/* Exception Code in MCAUSE CSR */
+#define CAUSE_MISALIGNED_FETCH 0x0
+#define CAUSE_FAULT_FETCH 0x1
+#define CAUSE_ILLEGAL_INSTRUCTION 0x2
+#define CAUSE_BREAKPOINT 0x3
+#define CAUSE_MISALIGNED_LOAD 0x4
+#define CAUSE_FAULT_LOAD 0x5
+#define CAUSE_MISALIGNED_STORE 0x6
+#define CAUSE_FAULT_STORE 0x7
+#define CAUSE_USER_ECALL 0x8
+#define CAUSE_SUPERVISOR_ECALL 0x9
+#define CAUSE_HYPERVISOR_ECALL 0xa
+#define CAUSE_MACHINE_ECALL 0xb
+
+/* Exception Subcode in MDCAUSE CSR */
+#define DCAUSE_FAULT_FETCH_PMP      0x1
+#define DCAUSE_FAULT_FETCH_INST     0x2
+
+#define DCAUSE_FAULT_LOAD_PMP       0x1
+#define DCAUSE_FAULT_LOAD_INST      0x2
+#define DCAUSE_FAULT_LOAD_NICE      0x3
+
+#define DCAUSE_FAULT_STORE_PMP      0x1
+#define DCAUSE_FAULT_STORE_INST     0x2
+
+/** @} */ /** End of Doxygen Group NMSIS_Core_CSR_Encoding **/
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* __RISCV_ENCODING_H__ */
--- a/components/nmsis/dsp/inc/dsp/basic_math_functions.h
+++ b/components/nmsis/dsp/inc/dsp/basic_math_functions.h
@ -0,0 +1,765 @@
+/******************************************************************************
+ * @file     basic_math_functions.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _BASIC_MATH_FUNCTIONS_H_
+#define _BASIC_MATH_FUNCTIONS_H_
+
+#include "riscv_math_types.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * @defgroup groupMath Basic Math Functions
+ */
+
+ /**
+   * @brief Q7 vector multiplication.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void riscv_mult_q7(
+  const q7_t * pSrcA,
+  const q7_t * pSrcB,
+        q7_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief Q15 vector multiplication.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void riscv_mult_q15(
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        q15_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief Q31 vector multiplication.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void riscv_mult_q31(
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        q31_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief Floating-point vector multiplication.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void riscv_mult_f32(
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+
+
+ /**
+   * @brief Floating-point vector addition.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void riscv_add_f32(
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+
+
+  /**
+   * @brief Q7 vector addition.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void riscv_add_q7(
+  const q7_t * pSrcA,
+  const q7_t * pSrcB,
+        q7_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief Q15 vector addition.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void riscv_add_q15(
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        q15_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief Q31 vector addition.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void riscv_add_q31(
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        q31_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief Floating-point vector subtraction.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void riscv_sub_f32(
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+
+
+  /**
+   * @brief Q7 vector subtraction.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void riscv_sub_q7(
+  const q7_t * pSrcA,
+  const q7_t * pSrcB,
+        q7_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief Q15 vector subtraction.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void riscv_sub_q15(
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        q15_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief Q31 vector subtraction.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void riscv_sub_q31(
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        q31_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief Multiplies a floating-point vector by a scalar.
+   * @param[in]  pSrc       points to the input vector
+   * @param[in]  scale      scale factor to be applied
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
+  void riscv_scale_f32(
+  const float32_t * pSrc,
+        float32_t scale,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+
+
+  /**
+   * @brief Multiplies a Q7 vector by a scalar.
+   * @param[in]  pSrc        points to the input vector
+   * @param[in]  scaleFract  fractional portion of the scale value
+   * @param[in]  shift       number of bits to shift the result by
+   * @param[out] pDst        points to the output vector
+   * @param[in]  blockSize   number of samples in the vector
+   */
+  void riscv_scale_q7(
+  const q7_t * pSrc,
+        q7_t scaleFract,
+        int8_t shift,
+        q7_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief Multiplies a Q15 vector by a scalar.
+   * @param[in]  pSrc        points to the input vector
+   * @param[in]  scaleFract  fractional portion of the scale value
+   * @param[in]  shift       number of bits to shift the result by
+   * @param[out] pDst        points to the output vector
+   * @param[in]  blockSize   number of samples in the vector
+   */
+  void riscv_scale_q15(
+  const q15_t * pSrc,
+        q15_t scaleFract,
+        int8_t shift,
+        q15_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief Multiplies a Q31 vector by a scalar.
+   * @param[in]  pSrc        points to the input vector
+   * @param[in]  scaleFract  fractional portion of the scale value
+   * @param[in]  shift       number of bits to shift the result by
+   * @param[out] pDst        points to the output vector
+   * @param[in]  blockSize   number of samples in the vector
+   */
+  void riscv_scale_q31(
+  const q31_t * pSrc,
+        q31_t scaleFract,
+        int8_t shift,
+        q31_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief Q7 vector absolute value.
+   * @param[in]  pSrc       points to the input buffer
+   * @param[out] pDst       points to the output buffer
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void riscv_abs_q7(
+  const q7_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief Floating-point vector absolute value.
+   * @param[in]  pSrc       points to the input buffer
+   * @param[out] pDst       points to the output buffer
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void riscv_abs_f32(
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+
+
+
+  /**
+   * @brief Q15 vector absolute value.
+   * @param[in]  pSrc       points to the input buffer
+   * @param[out] pDst       points to the output buffer
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void riscv_abs_q15(
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief Q31 vector absolute value.
+   * @param[in]  pSrc       points to the input buffer
+   * @param[out] pDst       points to the output buffer
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void riscv_abs_q31(
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief Dot product of floating-point vectors.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[in]  blockSize  number of samples in each vector
+   * @param[out] result     output result returned here
+   */
+  void riscv_dot_prod_f32(
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        uint32_t blockSize,
+        float32_t * result);
+
+
+
+  /**
+   * @brief Dot product of Q7 vectors.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[in]  blockSize  number of samples in each vector
+   * @param[out] result     output result returned here
+   */
+  void riscv_dot_prod_q7(
+  const q7_t * pSrcA,
+  const q7_t * pSrcB,
+        uint32_t blockSize,
+        q31_t * result);
+
+
+  /**
+   * @brief Dot product of Q15 vectors.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[in]  blockSize  number of samples in each vector
+   * @param[out] result     output result returned here
+   */
+  void riscv_dot_prod_q15(
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        uint32_t blockSize,
+        q63_t * result);
+
+
+  /**
+   * @brief Dot product of Q31 vectors.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[in]  blockSize  number of samples in each vector
+   * @param[out] result     output result returned here
+   */
+  void riscv_dot_prod_q31(
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        uint32_t blockSize,
+        q63_t * result);
+
+
+  /**
+   * @brief  Shifts the elements of a Q7 vector a specified number of bits.
+   * @param[in]  pSrc       points to the input vector
+   * @param[in]  shiftBits  number of bits to shift.  A positive value shifts left; a negative value shifts right.
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
+  void riscv_shift_q7(
+  const q7_t * pSrc,
+        int8_t shiftBits,
+        q7_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Shifts the elements of a Q15 vector a specified number of bits.
+   * @param[in]  pSrc       points to the input vector
+   * @param[in]  shiftBits  number of bits to shift.  A positive value shifts left; a negative value shifts right.
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
+  void riscv_shift_q15(
+  const q15_t * pSrc,
+        int8_t shiftBits,
+        q15_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Shifts the elements of a Q31 vector a specified number of bits.
+   * @param[in]  pSrc       points to the input vector
+   * @param[in]  shiftBits  number of bits to shift.  A positive value shifts left; a negative value shifts right.
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
+  void riscv_shift_q31(
+  const q31_t * pSrc,
+        int8_t shiftBits,
+        q31_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Adds a constant offset to a floating-point vector.
+   * @param[in]  pSrc       points to the input vector
+   * @param[in]  offset     is the offset to be added
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
+  void riscv_offset_f32(
+  const float32_t * pSrc,
+        float32_t offset,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+
+
+  /**
+   * @brief  Adds a constant offset to a Q7 vector.
+   * @param[in]  pSrc       points to the input vector
+   * @param[in]  offset     is the offset to be added
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
+  void riscv_offset_q7(
+  const q7_t * pSrc,
+        q7_t offset,
+        q7_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Adds a constant offset to a Q15 vector.
+   * @param[in]  pSrc       points to the input vector
+   * @param[in]  offset     is the offset to be added
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
+  void riscv_offset_q15(
+  const q15_t * pSrc,
+        q15_t offset,
+        q15_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Adds a constant offset to a Q31 vector.
+   * @param[in]  pSrc       points to the input vector
+   * @param[in]  offset     is the offset to be added
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
+  void riscv_offset_q31(
+  const q31_t * pSrc,
+        q31_t offset,
+        q31_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Negates the elements of a floating-point vector.
+   * @param[in]  pSrc       points to the input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
+  void riscv_negate_f32(
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Negates the elements of a Q7 vector.
+   * @param[in]  pSrc       points to the input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
+  void riscv_negate_q7(
+  const q7_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Negates the elements of a Q15 vector.
+   * @param[in]  pSrc       points to the input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
+  void riscv_negate_q15(
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Negates the elements of a Q31 vector.
+   * @param[in]  pSrc       points to the input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
+  void riscv_negate_q31(
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
+
+/**
+   * @brief         Compute the logical bitwise AND of two fixed-point vectors.
+   * @param[in]     pSrcA      points to input vector A
+   * @param[in]     pSrcB      points to input vector B
+   * @param[out]    pDst       points to output vector
+   * @param[in]     blockSize  number of samples in each vector
+   * @return        none
+   */
+  void riscv_and_u16(
+    const uint16_t * pSrcA,
+    const uint16_t * pSrcB,
+          uint16_t * pDst,
+          uint32_t blockSize);
+
+  /**
+   * @brief         Compute the logical bitwise AND of two fixed-point vectors.
+   * @param[in]     pSrcA      points to input vector A
+   * @param[in]     pSrcB      points to input vector B
+   * @param[out]    pDst       points to output vector
+   * @param[in]     blockSize  number of samples in each vector
+   * @return        none
+   */
+  void riscv_and_u32(
+    const uint32_t * pSrcA,
+    const uint32_t * pSrcB,
+          uint32_t * pDst,
+          uint32_t blockSize);
+
+  /**
+   * @brief         Compute the logical bitwise AND of two fixed-point vectors.
+   * @param[in]     pSrcA      points to input vector A
+   * @param[in]     pSrcB      points to input vector B
+   * @param[out]    pDst       points to output vector
+   * @param[in]     blockSize  number of samples in each vector
+   * @return        none
+   */
+  void riscv_and_u8(
+    const uint8_t * pSrcA,
+    const uint8_t * pSrcB,
+          uint8_t * pDst,
+          uint32_t blockSize);
+
+  /**
+   * @brief         Compute the logical bitwise OR of two fixed-point vectors.
+   * @param[in]     pSrcA      points to input vector A
+   * @param[in]     pSrcB      points to input vector B
+   * @param[out]    pDst       points to output vector
+   * @param[in]     blockSize  number of samples in each vector
+   * @return        none
+   */
+  void riscv_or_u16(
+    const uint16_t * pSrcA,
+    const uint16_t * pSrcB,
+          uint16_t * pDst,
+          uint32_t blockSize);
+
+  /**
+   * @brief         Compute the logical bitwise OR of two fixed-point vectors.
+   * @param[in]     pSrcA      points to input vector A
+   * @param[in]     pSrcB      points to input vector B
+   * @param[out]    pDst       points to output vector
+   * @param[in]     blockSize  number of samples in each vector
+   * @return        none
+   */
+  void riscv_or_u32(
+    const uint32_t * pSrcA,
+    const uint32_t * pSrcB,
+          uint32_t * pDst,
+          uint32_t blockSize);
+
+  /**
+   * @brief         Compute the logical bitwise OR of two fixed-point vectors.
+   * @param[in]     pSrcA      points to input vector A
+   * @param[in]     pSrcB      points to input vector B
+   * @param[out]    pDst       points to output vector
+   * @param[in]     blockSize  number of samples in each vector
+   * @return        none
+   */
+  void riscv_or_u8(
+    const uint8_t * pSrcA,
+    const uint8_t * pSrcB,
+          uint8_t * pDst,
+          uint32_t blockSize);
+
+  /**
+   * @brief         Compute the logical bitwise NOT of a fixed-point vector.
+   * @param[in]     pSrc       points to input vector 
+   * @param[out]    pDst       points to output vector
+   * @param[in]     blockSize  number of samples in each vector
+   * @return        none
+   */
+  void riscv_not_u16(
+    const uint16_t * pSrc,
+          uint16_t * pDst,
+          uint32_t blockSize);
+
+  /**
+   * @brief         Compute the logical bitwise NOT of a fixed-point vector.
+   * @param[in]     pSrc       points to input vector 
+   * @param[out]    pDst       points to output vector
+   * @param[in]     blockSize  number of samples in each vector
+   * @return        none
+   */
+  void riscv_not_u32(
+    const uint32_t * pSrc,
+          uint32_t * pDst,
+          uint32_t blockSize);
+
+  /**
+   * @brief         Compute the logical bitwise NOT of a fixed-point vector.
+   * @param[in]     pSrc       points to input vector 
+   * @param[out]    pDst       points to output vector
+   * @param[in]     blockSize  number of samples in each vector
+   * @return        none
+   */
+  void riscv_not_u8(
+    const uint8_t * pSrc,
+          uint8_t * pDst,
+          uint32_t blockSize);
+
+/**
+   * @brief         Compute the logical bitwise XOR of two fixed-point vectors.
+   * @param[in]     pSrcA      points to input vector A
+   * @param[in]     pSrcB      points to input vector B
+   * @param[out]    pDst       points to output vector
+   * @param[in]     blockSize  number of samples in each vector
+   * @return        none
+   */
+  void riscv_xor_u16(
+    const uint16_t * pSrcA,
+    const uint16_t * pSrcB,
+          uint16_t * pDst,
+          uint32_t blockSize);
+
+  /**
+   * @brief         Compute the logical bitwise XOR of two fixed-point vectors.
+   * @param[in]     pSrcA      points to input vector A
+   * @param[in]     pSrcB      points to input vector B
+   * @param[out]    pDst       points to output vector
+   * @param[in]     blockSize  number of samples in each vector
+   * @return        none
+   */
+  void riscv_xor_u32(
+    const uint32_t * pSrcA,
+    const uint32_t * pSrcB,
+          uint32_t * pDst,
+          uint32_t blockSize);
+
+  /**
+   * @brief         Compute the logical bitwise XOR of two fixed-point vectors.
+   * @param[in]     pSrcA      points to input vector A
+   * @param[in]     pSrcB      points to input vector B
+   * @param[out]    pDst       points to output vector
+   * @param[in]     blockSize  number of samples in each vector
+   * @return        none
+   */
+  void riscv_xor_u8(
+    const uint8_t * pSrcA,
+    const uint8_t * pSrcB,
+          uint8_t * pDst,
+    uint32_t blockSize);
+
+  /**
+  @brief         Elementwise floating-point clipping
+  @param[in]     pSrc          points to input values
+  @param[out]    pDst          points to output clipped values
+  @param[in]     low           lower bound
+  @param[in]     high          higher bound
+  @param[in]     numSamples    number of samples to clip
+  @return        none
+ */
+
+void riscv_clip_f32(const float32_t * pSrc, 
+  float32_t * pDst, 
+  float32_t low, 
+  float32_t high, 
+  uint32_t numSamples);
+
+  /**
+  @brief         Elementwise fixed-point clipping
+  @param[in]     pSrc          points to input values
+  @param[out]    pDst          points to output clipped values
+  @param[in]     low           lower bound
+  @param[in]     high          higher bound
+  @param[in]     numSamples    number of samples to clip
+  @return        none
+ */
+
+void riscv_clip_q31(const q31_t * pSrc, 
+  q31_t * pDst, 
+  q31_t low, 
+  q31_t high, 
+  uint32_t numSamples);
+
+  /**
+  @brief         Elementwise fixed-point clipping
+  @param[in]     pSrc          points to input values
+  @param[out]    pDst          points to output clipped values
+  @param[in]     low           lower bound
+  @param[in]     high          higher bound
+  @param[in]     numSamples    number of samples to clip
+  @return        none
+ */
+
+void riscv_clip_q15(const q15_t * pSrc, 
+  q15_t * pDst, 
+  q15_t low, 
+  q15_t high, 
+  uint32_t numSamples);
+
+  /**
+  @brief         Elementwise fixed-point clipping
+  @param[in]     pSrc          points to input values
+  @param[out]    pDst          points to output clipped values
+  @param[in]     low           lower bound
+  @param[in]     high          higher bound
+  @param[in]     numSamples    number of samples to clip
+  @return        none
+ */
+
+void riscv_clip_q7(const q7_t * pSrc, 
+  q7_t * pDst, 
+  q7_t low, 
+  q7_t high, 
+  uint32_t numSamples);
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _BASIC_MATH_FUNCTIONS_H_ */
--- a/components/nmsis/dsp/inc/dsp/basic_math_functions_f16.h
+++ b/components/nmsis/dsp/inc/dsp/basic_math_functions_f16.h
@ -0,0 +1,169 @@
+/******************************************************************************
+ * @file     basic_math_functions_f16.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _BASIC_MATH_FUNCTIONS_F16_H_
+#define _BASIC_MATH_FUNCTIONS_F16_H_
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#include "riscv_math_types_f16.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+
+#if defined(RISCV_FLOAT16_SUPPORTED)
+
+
+  /**
+   * @brief Floating-point vector addition.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void riscv_add_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+  /**
+   * @brief Floating-point vector subtraction.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void riscv_sub_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+    /**
+   * @brief Multiplies a floating-point vector by a scalar.
+   * @param[in]  pSrc       points to the input vector
+   * @param[in]  scale      scale factor to be applied
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
+  void riscv_scale_f16(
+  const float16_t * pSrc,
+        float16_t scale,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+    /**
+   * @brief Floating-point vector absolute value.
+   * @param[in]  pSrc       points to the input buffer
+   * @param[out] pDst       points to the output buffer
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void riscv_abs_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Adds a constant offset to a floating-point vector.
+   * @param[in]  pSrc       points to the input vector
+   * @param[in]  offset     is the offset to be added
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
+  void riscv_offset_f16(
+  const float16_t * pSrc,
+        float16_t offset,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+  /**
+   * @brief Dot product of floating-point vectors.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[in]  blockSize  number of samples in each vector
+   * @param[out] result     output result returned here
+   */
+  void riscv_dot_prod_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        uint32_t blockSize,
+        float16_t * result);
+
+  /**
+   * @brief Floating-point vector multiplication.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void riscv_mult_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+  /**
+   * @brief  Negates the elements of a floating-point vector.
+   * @param[in]  pSrc       points to the input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
+  void riscv_negate_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+  /**
+  @brief         Elementwise floating-point clipping
+  @param[in]     pSrc          points to input values
+  @param[out]    pDst          points to output clipped values
+  @param[in]     low           lower bound
+  @param[in]     high          higher bound
+  @param[in]     numSamples    number of samples to clip
+  @return        none
+ */
+
+void riscv_clip_f16(const float16_t * pSrc, 
+  float16_t * pDst, 
+  float16_t low, 
+  float16_t high, 
+  uint32_t numSamples);
+
+#endif /* defined(RISCV_FLOAT16_SUPPORTED)*/
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _BASIC_MATH_FUNCTIONS_F16_H_ */
--- a/components/nmsis/dsp/inc/dsp/bayes_functions.h
+++ b/components/nmsis/dsp/inc/dsp/bayes_functions.h
@ -0,0 +1,90 @@
+/******************************************************************************
+ * @file     bayes_functions.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _BAYES_FUNCTIONS_H_
+#define _BAYES_FUNCTIONS_H_
+
+#include "riscv_math_types.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#include "dsp/statistics_functions.h"
+
+/**
+ * @defgroup groupBayes Bayesian estimators
+ *
+ * Implement the naive gaussian Bayes estimator.
+ * The training must be done from scikit-learn.
+ *
+ * The parameters can be easily
+ * generated from the scikit-learn object. Some examples are given in
+ * DSP/Testing/PatternGeneration/Bayes.py
+ */
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * @brief Instance structure for Naive Gaussian Bayesian estimator.
+ */
+typedef struct
+{
+  uint32_t vectorDimension;  /**< Dimension of vector space */
+  uint32_t numberOfClasses;  /**< Number of different classes  */
+  const float32_t *theta;          /**< Mean values for the Gaussians */
+  const float32_t *sigma;          /**< Variances for the Gaussians */
+  const float32_t *classPriors;    /**< Class prior probabilities */
+  float32_t epsilon;         /**< Additive value to variances */
+} riscv_gaussian_naive_bayes_instance_f32;
+
+/**
+ * @brief Naive Gaussian Bayesian Estimator
+ *
+ * @param[in]  S                        points to a naive bayes instance structure
+ * @param[in]  in                       points to the elements of the input vector.
+ * @param[out] *pOutputProbabilities    points to a buffer of length numberOfClasses containing estimated probabilities
+ * @param[out] *pBufferB                points to a temporary buffer of length numberOfClasses
+ * @return The predicted class
+ *
+ */
+
+
+uint32_t riscv_gaussian_naive_bayes_predict_f32(const riscv_gaussian_naive_bayes_instance_f32 *S, 
+   const float32_t * in, 
+   float32_t *pOutputProbabilities,
+   float32_t *pBufferB);
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _BAYES_FUNCTIONS_H_ */
--- a/components/nmsis/dsp/inc/dsp/bayes_functions_f16.h
+++ b/components/nmsis/dsp/inc/dsp/bayes_functions_f16.h
@ -0,0 +1,81 @@
+/******************************************************************************
+ * @file     bayes_functions_f16.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _BAYES_FUNCTIONS_F16_H_
+#define _BAYES_FUNCTIONS_F16_H_
+
+#include "riscv_math_types_f16.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#include "dsp/statistics_functions_f16.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#if defined(RISCV_FLOAT16_SUPPORTED)
+
+/**
+ * @brief Instance structure for Naive Gaussian Bayesian estimator.
+ */
+typedef struct
+{
+  uint32_t vectorDimension;  /**< Dimension of vector space */
+  uint32_t numberOfClasses;  /**< Number of different classes  */
+  const float16_t *theta;          /**< Mean values for the Gaussians */
+  const float16_t *sigma;          /**< Variances for the Gaussians */
+  const float16_t *classPriors;    /**< Class prior probabilities */
+  float16_t epsilon;         /**< Additive value to variances */
+} riscv_gaussian_naive_bayes_instance_f16;
+
+/**
+ * @brief Naive Gaussian Bayesian Estimator
+ *
+ * @param[in]  S                        points to a naive bayes instance structure
+ * @param[in]  in                       points to the elements of the input vector.
+ * @param[out] *pOutputProbabilities    points to a buffer of length numberOfClasses containing estimated probabilities
+ * @param[out] *pBufferB                points to a temporary buffer of length numberOfClasses
+ * @return The predicted class
+ *
+ */
+
+
+uint32_t riscv_gaussian_naive_bayes_predict_f16(const riscv_gaussian_naive_bayes_instance_f16 *S, 
+   const float16_t * in, 
+   float16_t *pOutputProbabilities,
+   float16_t *pBufferB);
+
+#endif /*defined(RISCV_FLOAT16_SUPPORTED)*/
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _BAYES_FUNCTIONS_F16_H_ */
--- a/components/nmsis/dsp/inc/dsp/complex_math_functions.h
+++ b/components/nmsis/dsp/inc/dsp/complex_math_functions.h
@ -0,0 +1,296 @@
+/******************************************************************************
+ * @file     complex_math_functions.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _COMPLEX_MATH_FUNCTIONS_H_
+#define _COMPLEX_MATH_FUNCTIONS_H_
+
+#include "riscv_math_types.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+#include "dsp/fast_math_functions.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * @defgroup groupCmplxMath Complex Math Functions
+ * This set of functions operates on complex data vectors.
+ * The data in the complex arrays is stored in an interleaved fashion
+ * (real, imag, real, imag, ...).
+ * In the API functions, the number of samples in a complex array refers
+ * to the number of complex values; the array contains twice this number of
+ * real values.
+ */
+
+ /**
+   * @brief  Floating-point complex conjugate.
+   * @param[in]  pSrc        points to the input vector
+   * @param[out] pDst        points to the output vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   */
+  void riscv_cmplx_conj_f32(
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t numSamples);
+
+  /**
+   * @brief  Q31 complex conjugate.
+   * @param[in]  pSrc        points to the input vector
+   * @param[out] pDst        points to the output vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   */
+  void riscv_cmplx_conj_q31(
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t numSamples);
+
+
+  /**
+   * @brief  Q15 complex conjugate.
+   * @param[in]  pSrc        points to the input vector
+   * @param[out] pDst        points to the output vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   */
+  void riscv_cmplx_conj_q15(
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t numSamples);
+
+
+  /**
+   * @brief  Floating-point complex magnitude squared
+   * @param[in]  pSrc        points to the complex input vector
+   * @param[out] pDst        points to the real output vector
+   * @param[in]  numSamples  number of complex samples in the input vector
+   */
+  void riscv_cmplx_mag_squared_f32(
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t numSamples);
+
+
+  /**
+   * @brief  Q31 complex magnitude squared
+   * @param[in]  pSrc        points to the complex input vector
+   * @param[out] pDst        points to the real output vector
+   * @param[in]  numSamples  number of complex samples in the input vector
+   */
+  void riscv_cmplx_mag_squared_q31(
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t numSamples);
+
+
+  /**
+   * @brief  Q15 complex magnitude squared
+   * @param[in]  pSrc        points to the complex input vector
+   * @param[out] pDst        points to the real output vector
+   * @param[in]  numSamples  number of complex samples in the input vector
+   */
+  void riscv_cmplx_mag_squared_q15(
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t numSamples);
+
+
+/**
+   * @brief  Floating-point complex magnitude
+   * @param[in]  pSrc        points to the complex input vector
+   * @param[out] pDst        points to the real output vector
+   * @param[in]  numSamples  number of complex samples in the input vector
+   */
+  void riscv_cmplx_mag_f32(
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t numSamples);
+
+
+  /**
+   * @brief  Q31 complex magnitude
+   * @param[in]  pSrc        points to the complex input vector
+   * @param[out] pDst        points to the real output vector
+   * @param[in]  numSamples  number of complex samples in the input vector
+   */
+  void riscv_cmplx_mag_q31(
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t numSamples);
+
+
+  /**
+   * @brief  Q15 complex magnitude
+   * @param[in]  pSrc        points to the complex input vector
+   * @param[out] pDst        points to the real output vector
+   * @param[in]  numSamples  number of complex samples in the input vector
+   */
+  void riscv_cmplx_mag_q15(
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t numSamples);
+
+
+  /**
+   * @brief  Q15 complex dot product
+   * @param[in]  pSrcA       points to the first input vector
+   * @param[in]  pSrcB       points to the second input vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   * @param[out] realResult  real part of the result returned here
+   * @param[out] imagResult  imaginary part of the result returned here
+   */
+  void riscv_cmplx_dot_prod_q15(
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        uint32_t numSamples,
+        q31_t * realResult,
+        q31_t * imagResult);
+
+
+  /**
+   * @brief  Q31 complex dot product
+   * @param[in]  pSrcA       points to the first input vector
+   * @param[in]  pSrcB       points to the second input vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   * @param[out] realResult  real part of the result returned here
+   * @param[out] imagResult  imaginary part of the result returned here
+   */
+  void riscv_cmplx_dot_prod_q31(
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        uint32_t numSamples,
+        q63_t * realResult,
+        q63_t * imagResult);
+
+
+  /**
+   * @brief  Floating-point complex dot product
+   * @param[in]  pSrcA       points to the first input vector
+   * @param[in]  pSrcB       points to the second input vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   * @param[out] realResult  real part of the result returned here
+   * @param[out] imagResult  imaginary part of the result returned here
+   */
+  void riscv_cmplx_dot_prod_f32(
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        uint32_t numSamples,
+        float32_t * realResult,
+        float32_t * imagResult);
+
+
+  /**
+   * @brief  Q15 complex-by-real multiplication
+   * @param[in]  pSrcCmplx   points to the complex input vector
+   * @param[in]  pSrcReal    points to the real input vector
+   * @param[out] pCmplxDst   points to the complex output vector
+   * @param[in]  numSamples  number of samples in each vector
+   */
+  void riscv_cmplx_mult_real_q15(
+  const q15_t * pSrcCmplx,
+  const q15_t * pSrcReal,
+        q15_t * pCmplxDst,
+        uint32_t numSamples);
+
+
+  /**
+   * @brief  Q31 complex-by-real multiplication
+   * @param[in]  pSrcCmplx   points to the complex input vector
+   * @param[in]  pSrcReal    points to the real input vector
+   * @param[out] pCmplxDst   points to the complex output vector
+   * @param[in]  numSamples  number of samples in each vector
+   */
+  void riscv_cmplx_mult_real_q31(
+  const q31_t * pSrcCmplx,
+  const q31_t * pSrcReal,
+        q31_t * pCmplxDst,
+        uint32_t numSamples);
+
+
+  /**
+   * @brief  Floating-point complex-by-real multiplication
+   * @param[in]  pSrcCmplx   points to the complex input vector
+   * @param[in]  pSrcReal    points to the real input vector
+   * @param[out] pCmplxDst   points to the complex output vector
+   * @param[in]  numSamples  number of samples in each vector
+   */
+  void riscv_cmplx_mult_real_f32(
+  const float32_t * pSrcCmplx,
+  const float32_t * pSrcReal,
+        float32_t * pCmplxDst,
+        uint32_t numSamples);
+
+  /**
+   * @brief  Q15 complex-by-complex multiplication
+   * @param[in]  pSrcA       points to the first input vector
+   * @param[in]  pSrcB       points to the second input vector
+   * @param[out] pDst        points to the output vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   */
+  void riscv_cmplx_mult_cmplx_q15(
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        q15_t * pDst,
+        uint32_t numSamples);
+
+
+  /**
+   * @brief  Q31 complex-by-complex multiplication
+   * @param[in]  pSrcA       points to the first input vector
+   * @param[in]  pSrcB       points to the second input vector
+   * @param[out] pDst        points to the output vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   */
+  void riscv_cmplx_mult_cmplx_q31(
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        q31_t * pDst,
+        uint32_t numSamples);
+
+
+  /**
+   * @brief  Floating-point complex-by-complex multiplication
+   * @param[in]  pSrcA       points to the first input vector
+   * @param[in]  pSrcB       points to the second input vector
+   * @param[out] pDst        points to the output vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   */
+  void riscv_cmplx_mult_cmplx_f32(
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        float32_t * pDst,
+        uint32_t numSamples);
+
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _COMPLEX_MATH_FUNCTIONS_H_ */
--- a/components/nmsis/dsp/inc/dsp/complex_math_functions_f16.h
+++ b/components/nmsis/dsp/inc/dsp/complex_math_functions_f16.h
@ -0,0 +1,124 @@
+/******************************************************************************
+ * @file     complex_math_functions_f16.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _COMPLEX_MATH_FUNCTIONS_F16_H_
+#define _COMPLEX_MATH_FUNCTIONS_F16_H_
+
+#include "riscv_math_types_f16.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+#include "dsp/fast_math_functions_f16.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#if defined(RISCV_FLOAT16_SUPPORTED)
+
+ /**
+   * @brief  Floating-point complex conjugate.
+   * @param[in]  pSrc        points to the input vector
+   * @param[out] pDst        points to the output vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   */
+  void riscv_cmplx_conj_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t numSamples);
+
+ /**
+   * @brief  Floating-point complex magnitude squared
+   * @param[in]  pSrc        points to the complex input vector
+   * @param[out] pDst        points to the real output vector
+   * @param[in]  numSamples  number of complex samples in the input vector
+   */
+  void riscv_cmplx_mag_squared_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t numSamples);
+
+  /**
+   * @brief  Floating-point complex magnitude
+   * @param[in]  pSrc        points to the complex input vector
+   * @param[out] pDst        points to the real output vector
+   * @param[in]  numSamples  number of complex samples in the input vector
+   */
+  void riscv_cmplx_mag_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t numSamples);
+
+  /**
+   * @brief  Floating-point complex dot product
+   * @param[in]  pSrcA       points to the first input vector
+   * @param[in]  pSrcB       points to the second input vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   * @param[out] realResult  real part of the result returned here
+   * @param[out] imagResult  imaginary part of the result returned here
+   */
+  void riscv_cmplx_dot_prod_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        uint32_t numSamples,
+        float16_t * realResult,
+        float16_t * imagResult);
+
+   /**
+   * @brief  Floating-point complex-by-real multiplication
+   * @param[in]  pSrcCmplx   points to the complex input vector
+   * @param[in]  pSrcReal    points to the real input vector
+   * @param[out] pCmplxDst   points to the complex output vector
+   * @param[in]  numSamples  number of samples in each vector
+   */
+  void riscv_cmplx_mult_real_f16(
+  const float16_t * pSrcCmplx,
+  const float16_t * pSrcReal,
+        float16_t * pCmplxDst,
+        uint32_t numSamples);
+
+  /**
+   * @brief  Floating-point complex-by-complex multiplication
+   * @param[in]  pSrcA       points to the first input vector
+   * @param[in]  pSrcB       points to the second input vector
+   * @param[out] pDst        points to the output vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   */
+  void riscv_cmplx_mult_cmplx_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        float16_t * pDst,
+        uint32_t numSamples);
+
+#endif /*defined(RISCV_FLOAT16_SUPPORTED)*/
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _COMPLEX_MATH_FUNCTIONS_F16_H_ */
--- a/components/nmsis/dsp/inc/dsp/controller_functions.h
+++ b/components/nmsis/dsp/inc/dsp/controller_functions.h
@ -0,0 +1,792 @@
+/******************************************************************************
+ * @file     controller_functions.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _CONTROLLER_FUNCTIONS_H_
+#define _CONTROLLER_FUNCTIONS_H_
+
+#include "riscv_math_types.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+  /**
+   * @brief Macros required for SINE and COSINE Controller functions
+   */
+
+#define CONTROLLER_Q31_SHIFT  (32 - 9)
+  /* 1.31(q31) Fixed value of 2/360 */
+  /* -1 to +1 is divided into 360 values so total spacing is (2/360) */
+#define INPUT_SPACING         0xB60B61
+  
+/**
+ * @defgroup groupController Controller Functions
+ */
+
+
+ /**
+   * @ingroup groupController
+   */
+
+  /**
+   * @addtogroup SinCos
+   * @{
+   */
+
+/**
+   * @brief  Floating-point sin_cos function.
+   * @param[in]  theta   input value in degrees
+   * @param[out] pSinVal  points to the processed sine output.
+   * @param[out] pCosVal  points to the processed cos output.
+   */
+  void riscv_sin_cos_f32(
+        float32_t theta,
+        float32_t * pSinVal,
+        float32_t * pCosVal);
+
+
+  /**
+   * @brief  Q31 sin_cos function.
+   * @param[in]  theta    scaled input value in degrees
+   * @param[out] pSinVal  points to the processed sine output.
+   * @param[out] pCosVal  points to the processed cosine output.
+   */
+  void riscv_sin_cos_q31(
+        q31_t theta,
+        q31_t * pSinVal,
+        q31_t * pCosVal);
+
+  /**
+   * @} end of SinCos group
+   */
+
+ /**
+   * @ingroup groupController
+   */
+
+/**
+   * @defgroup PID PID Motor Control
+   *
+   * A Proportional Integral Derivative (PID) controller is a generic feedback control
+   * loop mechanism widely used in industrial control systems.
+   * A PID controller is the most commonly used type of feedback controller.
+   *
+   * This set of functions implements (PID) controllers
+   * for Q15, Q31, and floating-point data types.  The functions operate on a single sample
+   * of data and each call to the function returns a single processed value.
+   * <code>S</code> points to an instance of the PID control data structure.  <code>in</code>
+   * is the input sample value. The functions return the output value.
+   *
+   * \par Algorithm:
+   * <pre>
+   *    y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]
+   *    A0 = Kp + Ki + Kd
+   *    A1 = (-Kp ) - (2 * Kd )
+   *    A2 = Kd
+   * </pre>
+   *
+   * \par
+   * where \c Kp is proportional constant, \c Ki is Integral constant and \c Kd is Derivative constant
+   *
+   * \par
+   * \image html PID.png "Proportional Integral Derivative Controller"
+   *
+   * \par
+   * The PID controller calculates an "error" value as the difference between
+   * the measured output and the reference input.
+   * The controller attempts to minimize the error by adjusting the process control inputs.
+   * The proportional value determines the reaction to the current error,
+   * the integral value determines the reaction based on the sum of recent errors,
+   * and the derivative value determines the reaction based on the rate at which the error has been changing.
+   *
+   * \par Instance Structure
+   * The Gains A0, A1, A2 and state variables for a PID controller are stored together in an instance data structure.
+   * A separate instance structure must be defined for each PID Controller.
+   * There are separate instance structure declarations for each of the 3 supported data types.
+   *
+   * \par Reset Functions
+   * There is also an associated reset function for each data type which clears the state array.
+   *
+   * \par Initialization Functions
+   * There is also an associated initialization function for each data type.
+   * The initialization function performs the following operations:
+   * - Initializes the Gains A0, A1, A2 from Kp,Ki, Kd gains.
+   * - Zeros out the values in the state buffer.
+   *
+   * \par
+   * Instance structure cannot be placed into a const data section and it is recommended to use the initialization function.
+   *
+   * \par Fixed-Point Behavior
+   * Care must be taken when using the fixed-point versions of the PID Controller functions.
+   * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
+   * Refer to the function specific documentation below for usage guidelines.
+   */
+
+
+  /**
+   * @brief Instance structure for the Q15 PID Control.
+   */
+  typedef struct
+  {
+          q15_t A0;           /**< The derived gain, A0 = Kp + Ki + Kd . */
+#if !defined (RISCV_MATH_DSP)
+          q15_t A1;           /**< The derived gain A1 = -Kp - 2Kd */
+          q15_t A2;           /**< The derived gain A1 = Kd. */
+#else
+          q31_t A1;           /**< The derived gain A1 = -Kp - 2Kd | Kd.*/
+#endif
+          q15_t state[3];     /**< The state array of length 3. */
+          q15_t Kp;           /**< The proportional gain. */
+          q15_t Ki;           /**< The integral gain. */
+          q15_t Kd;           /**< The derivative gain. */
+  } riscv_pid_instance_q15;
+
+  /**
+   * @brief Instance structure for the Q31 PID Control.
+   */
+  typedef struct
+  {
+          q31_t A0;            /**< The derived gain, A0 = Kp + Ki + Kd . */
+          q31_t A1;            /**< The derived gain, A1 = -Kp - 2Kd. */
+          q31_t A2;            /**< The derived gain, A2 = Kd . */
+          q31_t state[3];      /**< The state array of length 3. */
+          q31_t Kp;            /**< The proportional gain. */
+          q31_t Ki;            /**< The integral gain. */
+          q31_t Kd;            /**< The derivative gain. */
+  } riscv_pid_instance_q31;
+
+  /**
+   * @brief Instance structure for the floating-point PID Control.
+   */
+  typedef struct
+  {
+          float32_t A0;          /**< The derived gain, A0 = Kp + Ki + Kd . */
+          float32_t A1;          /**< The derived gain, A1 = -Kp - 2Kd. */
+          float32_t A2;          /**< The derived gain, A2 = Kd . */
+          float32_t state[3];    /**< The state array of length 3. */
+          float32_t Kp;          /**< The proportional gain. */
+          float32_t Ki;          /**< The integral gain. */
+          float32_t Kd;          /**< The derivative gain. */
+  } riscv_pid_instance_f32;
+
+
+
+  /**
+   * @brief  Initialization function for the floating-point PID Control.
+   * @param[in,out] S               points to an instance of the PID structure.
+   * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
+   */
+  void riscv_pid_init_f32(
+        riscv_pid_instance_f32 * S,
+        int32_t resetStateFlag);
+
+
+  /**
+   * @brief  Reset function for the floating-point PID Control.
+   * @param[in,out] S  is an instance of the floating-point PID Control structure
+   */
+  void riscv_pid_reset_f32(
+        riscv_pid_instance_f32 * S);
+
+
+  /**
+   * @brief  Initialization function for the Q31 PID Control.
+   * @param[in,out] S               points to an instance of the Q15 PID structure.
+   * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
+   */
+  void riscv_pid_init_q31(
+        riscv_pid_instance_q31 * S,
+        int32_t resetStateFlag);
+
+
+  /**
+   * @brief  Reset function for the Q31 PID Control.
+   * @param[in,out] S   points to an instance of the Q31 PID Control structure
+   */
+
+  void riscv_pid_reset_q31(
+        riscv_pid_instance_q31 * S);
+
+
+  /**
+   * @brief  Initialization function for the Q15 PID Control.
+   * @param[in,out] S               points to an instance of the Q15 PID structure.
+   * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
+   */
+  void riscv_pid_init_q15(
+        riscv_pid_instance_q15 * S,
+        int32_t resetStateFlag);
+
+
+  /**
+   * @brief  Reset function for the Q15 PID Control.
+   * @param[in,out] S  points to an instance of the q15 PID Control structure
+   */
+  void riscv_pid_reset_q15(
+        riscv_pid_instance_q15 * S);
+
+
+
+  /**
+   * @addtogroup PID
+   * @{
+   */
+
+  /**
+   * @brief         Process function for the floating-point PID Control.
+   * @param[in,out] S   is an instance of the floating-point PID Control structure
+   * @param[in]     in  input sample to process
+   * @return        processed output sample.
+   */
+  __STATIC_FORCEINLINE float32_t riscv_pid_f32(
+  riscv_pid_instance_f32 * S,
+  float32_t in)
+  {
+    float32_t out;
+
+    /* y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]  */
+    out = (S->A0 * in) +
+      (S->A1 * S->state[0]) + (S->A2 * S->state[1]) + (S->state[2]);
+
+    /* Update state */
+    S->state[1] = S->state[0];
+    S->state[0] = in;
+    S->state[2] = out;
+
+    /* return to application */
+    return (out);
+
+  }
+
+/**
+  @brief         Process function for the Q31 PID Control.
+  @param[in,out] S  points to an instance of the Q31 PID Control structure
+  @param[in]     in  input sample to process
+  @return        processed output sample.
+
+  \par Scaling and Overflow Behavior
+         The function is implemented using an internal 64-bit accumulator.
+         The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
+         Thus, if the accumulator result overflows it wraps around rather than clip.
+         In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions.
+         After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
+ */
+__STATIC_FORCEINLINE q31_t riscv_pid_q31(
+  riscv_pid_instance_q31 * S,
+  q31_t in)
+  {
+    q63_t acc;
+    q31_t out;
+
+    /* acc = A0 * x[n]  */
+    acc = (q63_t) S->A0 * in;
+
+    /* acc += A1 * x[n-1] */
+    acc += (q63_t) S->A1 * S->state[0];
+
+    /* acc += A2 * x[n-2]  */
+    acc += (q63_t) S->A2 * S->state[1];
+
+    /* convert output to 1.31 format to add y[n-1] */
+    out = (q31_t) (acc >> 31U);
+
+    /* out += y[n-1] */
+    out += S->state[2];
+
+    /* Update state */
+    S->state[1] = S->state[0];
+    S->state[0] = in;
+    S->state[2] = out;
+
+    /* return to application */
+    return (out);
+  }
+
+
+/**
+  @brief         Process function for the Q15 PID Control.
+  @param[in,out] S   points to an instance of the Q15 PID Control structure
+  @param[in]     in  input sample to process
+  @return        processed output sample.
+
+  \par Scaling and Overflow Behavior
+         The function is implemented using a 64-bit internal accumulator.
+         Both Gains and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
+         The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
+         There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
+         After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
+         Lastly, the accumulator is saturated to yield a result in 1.15 format.
+ */
+__STATIC_FORCEINLINE q15_t riscv_pid_q15(
+  riscv_pid_instance_q15 * S,
+  q15_t in)
+  {
+    q63_t acc;
+    q15_t out;
+
+#if defined (RISCV_MATH_DSP)
+    /* Implementation of PID controller */
+
+    /* acc = A0 * x[n]  */
+    acc = (q31_t) __SMUAD((uint32_t)S->A0, (uint32_t)in);
+
+    /* acc += A1 * x[n-1] + A2 * x[n-2]  */
+    acc = (q63_t)__SMLALD((uint32_t)S->A1, (uint32_t)read_q15x2 (S->state), (uint64_t)acc);
+#else
+    /* acc = A0 * x[n]  */
+    acc = ((q31_t) S->A0) * in;
+
+    /* acc += A1 * x[n-1] + A2 * x[n-2]  */
+    acc += (q31_t) S->A1 * S->state[0];
+    acc += (q31_t) S->A2 * S->state[1];
+#endif
+
+    /* acc += y[n-1] */
+    acc += (q31_t) S->state[2] << 15;
+
+    /* saturate the output */
+    out = (q15_t) (__SSAT((q31_t)(acc >> 15), 16));
+
+    /* Update state */
+    S->state[1] = S->state[0];
+    S->state[0] = in;
+    S->state[2] = out;
+
+    /* return to application */
+    return (out);
+  }
+
+  /**
+   * @} end of PID group
+   */
+
+  /**
+   * @ingroup groupController
+   */
+
+  /**
+   * @defgroup park Vector Park Transform
+   *
+   * Forward Park transform converts the input two-coordinate vector to flux and torque components.
+   * The Park transform can be used to realize the transformation of the <code>Ialpha</code> and the <code>Ibeta</code> currents
+   * from the stationary to the moving reference frame and control the spatial relationship between
+   * the stator vector current and rotor flux vector.
+   * If we consider the d axis aligned with the rotor flux, the diagram below shows the
+   * current vector and the relationship from the two reference frames:
+   * \image html park.png "Stator current space vector and its component in (a,b) and in the d,q rotating reference frame"
+   *
+   * The function operates on a single sample of data and each call to the function returns the processed output.
+   * The library provides separate functions for Q31 and floating-point data types.
+   * \par Algorithm
+   * \image html parkFormula.png
+   * where <code>Ialpha</code> and <code>Ibeta</code> are the stator vector components,
+   * <code>pId</code> and <code>pIq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
+   * cosine and sine values of theta (rotor flux position).
+   * \par Fixed-Point Behavior
+   * Care must be taken when using the Q31 version of the Park transform.
+   * In particular, the overflow and saturation behavior of the accumulator used must be considered.
+   * Refer to the function specific documentation below for usage guidelines.
+   */
+
+  /**
+   * @addtogroup park
+   * @{
+   */
+
+  /**
+   * @brief Floating-point Park transform
+   * @param[in]  Ialpha  input two-phase vector coordinate alpha
+   * @param[in]  Ibeta   input two-phase vector coordinate beta
+   * @param[out] pId     points to output   rotor reference frame d
+   * @param[out] pIq     points to output   rotor reference frame q
+   * @param[in]  sinVal  sine value of rotation angle theta
+   * @param[in]  cosVal  cosine value of rotation angle theta
+   * @return     none
+   *
+   * The function implements the forward Park transform.
+   *
+   */
+  __STATIC_FORCEINLINE void riscv_park_f32(
+  float32_t Ialpha,
+  float32_t Ibeta,
+  float32_t * pId,
+  float32_t * pIq,
+  float32_t sinVal,
+  float32_t cosVal)
+  {
+    /* Calculate pId using the equation, pId = Ialpha * cosVal + Ibeta * sinVal */
+    *pId = Ialpha * cosVal + Ibeta * sinVal;
+
+    /* Calculate pIq using the equation, pIq = - Ialpha * sinVal + Ibeta * cosVal */
+    *pIq = -Ialpha * sinVal + Ibeta * cosVal;
+  }
+
+
+/**
+  @brief  Park transform for Q31 version
+  @param[in]  Ialpha  input two-phase vector coordinate alpha
+  @param[in]  Ibeta   input two-phase vector coordinate beta
+  @param[out] pId     points to output rotor reference frame d
+  @param[out] pIq     points to output rotor reference frame q
+  @param[in]  sinVal  sine value of rotation angle theta
+  @param[in]  cosVal  cosine value of rotation angle theta
+  @return     none
+
+  \par Scaling and Overflow Behavior
+         The function is implemented using an internal 32-bit accumulator.
+         The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
+         There is saturation on the addition and subtraction, hence there is no risk of overflow.
+ */
+__STATIC_FORCEINLINE void riscv_park_q31(
+  q31_t Ialpha,
+  q31_t Ibeta,
+  q31_t * pId,
+  q31_t * pIq,
+  q31_t sinVal,
+  q31_t cosVal)
+  {
+    q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
+    q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
+
+    /* Intermediate product is calculated by (Ialpha * cosVal) */
+    product1 = (q31_t) (((q63_t) (Ialpha) * (cosVal)) >> 31);
+
+    /* Intermediate product is calculated by (Ibeta * sinVal) */
+    product2 = (q31_t) (((q63_t) (Ibeta) * (sinVal)) >> 31);
+
+
+    /* Intermediate product is calculated by (Ialpha * sinVal) */
+    product3 = (q31_t) (((q63_t) (Ialpha) * (sinVal)) >> 31);
+
+    /* Intermediate product is calculated by (Ibeta * cosVal) */
+    product4 = (q31_t) (((q63_t) (Ibeta) * (cosVal)) >> 31);
+
+    /* Calculate pId by adding the two intermediate products 1 and 2 */
+    *pId = __QADD(product1, product2);
+
+    /* Calculate pIq by subtracting the two intermediate products 3 from 4 */
+    *pIq = __QSUB(product4, product3);
+  }
+
+  /**
+   * @} end of park group
+   */
+
+
+  /**
+   * @ingroup groupController
+   */
+
+  /**
+   * @defgroup inv_park Vector Inverse Park transform
+   * Inverse Park transform converts the input flux and torque components to two-coordinate vector.
+   *
+   * The function operates on a single sample of data and each call to the function returns the processed output.
+   * The library provides separate functions for Q31 and floating-point data types.
+   * \par Algorithm
+   * \image html parkInvFormula.png
+   * where <code>pIalpha</code> and <code>pIbeta</code> are the stator vector components,
+   * <code>Id</code> and <code>Iq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
+   * cosine and sine values of theta (rotor flux position).
+   * \par Fixed-Point Behavior
+   * Care must be taken when using the Q31 version of the Park transform.
+   * In particular, the overflow and saturation behavior of the accumulator used must be considered.
+   * Refer to the function specific documentation below for usage guidelines.
+   */
+
+  /**
+   * @addtogroup inv_park
+   * @{
+   */
+
+   /**
+   * @brief  Floating-point Inverse Park transform
+   * @param[in]  Id       input coordinate of rotor reference frame d
+   * @param[in]  Iq       input coordinate of rotor reference frame q
+   * @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
+   * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
+   * @param[in]  sinVal   sine value of rotation angle theta
+   * @param[in]  cosVal   cosine value of rotation angle theta
+   * @return     none
+   */
+  __STATIC_FORCEINLINE void riscv_inv_park_f32(
+  float32_t Id,
+  float32_t Iq,
+  float32_t * pIalpha,
+  float32_t * pIbeta,
+  float32_t sinVal,
+  float32_t cosVal)
+  {
+    /* Calculate pIalpha using the equation, pIalpha = Id * cosVal - Iq * sinVal */
+    *pIalpha = Id * cosVal - Iq * sinVal;
+
+    /* Calculate pIbeta using the equation, pIbeta = Id * sinVal + Iq * cosVal */
+    *pIbeta = Id * sinVal + Iq * cosVal;
+  }
+
+
+/**
+  @brief  Inverse Park transform for   Q31 version
+  @param[in]  Id       input coordinate of rotor reference frame d
+  @param[in]  Iq       input coordinate of rotor reference frame q
+  @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
+  @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
+  @param[in]  sinVal   sine value of rotation angle theta
+  @param[in]  cosVal   cosine value of rotation angle theta
+  @return     none
+
+  @par Scaling and Overflow Behavior
+         The function is implemented using an internal 32-bit accumulator.
+         The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
+         There is saturation on the addition, hence there is no risk of overflow.
+ */
+__STATIC_FORCEINLINE void riscv_inv_park_q31(
+  q31_t Id,
+  q31_t Iq,
+  q31_t * pIalpha,
+  q31_t * pIbeta,
+  q31_t sinVal,
+  q31_t cosVal)
+  {
+    q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
+    q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
+
+    /* Intermediate product is calculated by (Id * cosVal) */
+    product1 = (q31_t) (((q63_t) (Id) * (cosVal)) >> 31);
+
+    /* Intermediate product is calculated by (Iq * sinVal) */
+    product2 = (q31_t) (((q63_t) (Iq) * (sinVal)) >> 31);
+
+
+    /* Intermediate product is calculated by (Id * sinVal) */
+    product3 = (q31_t) (((q63_t) (Id) * (sinVal)) >> 31);
+
+    /* Intermediate product is calculated by (Iq * cosVal) */
+    product4 = (q31_t) (((q63_t) (Iq) * (cosVal)) >> 31);
+
+    /* Calculate pIalpha by using the two intermediate products 1 and 2 */
+    *pIalpha = __QSUB(product1, product2);
+
+    /* Calculate pIbeta by using the two intermediate products 3 and 4 */
+    *pIbeta = __QADD(product4, product3);
+  }
+
+  /**
+   * @} end of Inverse park group
+   */
+
+/**
+   * @ingroup groupController
+   */
+
+  /**
+   * @defgroup clarke Vector Clarke Transform
+   * Forward Clarke transform converts the instantaneous stator phases into a two-coordinate time invariant vector.
+   * Generally the Clarke transform uses three-phase currents <code>Ia, Ib and Ic</code> to calculate currents
+   * in the two-phase orthogonal stator axis <code>Ialpha</code> and <code>Ibeta</code>.
+   * When <code>Ialpha</code> is superposed with <code>Ia</code> as shown in the figure below
+   * \image html clarke.png Stator current space vector and its components in (a,b).
+   * and <code>Ia + Ib + Ic = 0</code>, in this condition <code>Ialpha</code> and <code>Ibeta</code>
+   * can be calculated using only <code>Ia</code> and <code>Ib</code>.
+   *
+   * The function operates on a single sample of data and each call to the function returns the processed output.
+   * The library provides separate functions for Q31 and floating-point data types.
+   * \par Algorithm
+   * \image html clarkeFormula.png
+   * where <code>Ia</code> and <code>Ib</code> are the instantaneous stator phases and
+   * <code>pIalpha</code> and <code>pIbeta</code> are the two coordinates of time invariant vector.
+   * \par Fixed-Point Behavior
+   * Care must be taken when using the Q31 version of the Clarke transform.
+   * In particular, the overflow and saturation behavior of the accumulator used must be considered.
+   * Refer to the function specific documentation below for usage guidelines.
+   */
+
+  /**
+   * @addtogroup clarke
+   * @{
+   */
+
+  /**
+   *
+   * @brief  Floating-point Clarke transform
+   * @param[in]  Ia       input three-phase coordinate <code>a</code>
+   * @param[in]  Ib       input three-phase coordinate <code>b</code>
+   * @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
+   * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
+   * @return        none
+   */
+  __STATIC_FORCEINLINE void riscv_clarke_f32(
+  float32_t Ia,
+  float32_t Ib,
+  float32_t * pIalpha,
+  float32_t * pIbeta)
+  {
+    /* Calculate pIalpha using the equation, pIalpha = Ia */
+    *pIalpha = Ia;
+
+    /* Calculate pIbeta using the equation, pIbeta = (1/sqrt(3)) * Ia + (2/sqrt(3)) * Ib */
+    *pIbeta = (0.57735026919f * Ia + 1.15470053838f * Ib);
+  }
+
+
+/**
+  @brief  Clarke transform for Q31 version
+  @param[in]  Ia       input three-phase coordinate <code>a</code>
+  @param[in]  Ib       input three-phase coordinate <code>b</code>
+  @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
+  @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
+  @return     none
+
+  \par Scaling and Overflow Behavior
+         The function is implemented using an internal 32-bit accumulator.
+         The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
+         There is saturation on the addition, hence there is no risk of overflow.
+ */
+__STATIC_FORCEINLINE void riscv_clarke_q31(
+  q31_t Ia,
+  q31_t Ib,
+  q31_t * pIalpha,
+  q31_t * pIbeta)
+  {
+    q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
+
+    /* Calculating pIalpha from Ia by equation pIalpha = Ia */
+    *pIalpha = Ia;
+
+    /* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */
+    product1 = (q31_t) (((q63_t) Ia * 0x24F34E8B) >> 30);
+
+    /* Intermediate product is calculated by (2/sqrt(3) * Ib) */
+    product2 = (q31_t) (((q63_t) Ib * 0x49E69D16) >> 30);
+
+    /* pIbeta is calculated by adding the intermediate products */
+    *pIbeta = __QADD(product1, product2);
+  }
+
+  /**
+   * @} end of clarke group
+   */
+
+
+  /**
+   * @ingroup groupController
+   */
+
+  /**
+   * @defgroup inv_clarke Vector Inverse Clarke Transform
+   * Inverse Clarke transform converts the two-coordinate time invariant vector into instantaneous stator phases.
+   *
+   * The function operates on a single sample of data and each call to the function returns the processed output.
+   * The library provides separate functions for Q31 and floating-point data types.
+   * \par Algorithm
+   * \image html clarkeInvFormula.png
+   * where <code>pIa</code> and <code>pIb</code> are the instantaneous stator phases and
+   * <code>Ialpha</code> and <code>Ibeta</code> are the two coordinates of time invariant vector.
+   * \par Fixed-Point Behavior
+   * Care must be taken when using the Q31 version of the Clarke transform.
+   * In particular, the overflow and saturation behavior of the accumulator used must be considered.
+   * Refer to the function specific documentation below for usage guidelines.
+   */
+
+  /**
+   * @addtogroup inv_clarke
+   * @{
+   */
+
+   /**
+   * @brief  Floating-point Inverse Clarke transform
+   * @param[in]  Ialpha  input two-phase orthogonal vector axis alpha
+   * @param[in]  Ibeta   input two-phase orthogonal vector axis beta
+   * @param[out] pIa     points to output three-phase coordinate <code>a</code>
+   * @param[out] pIb     points to output three-phase coordinate <code>b</code>
+   * @return     none
+   */
+  __STATIC_FORCEINLINE void riscv_inv_clarke_f32(
+  float32_t Ialpha,
+  float32_t Ibeta,
+  float32_t * pIa,
+  float32_t * pIb)
+  {
+    /* Calculating pIa from Ialpha by equation pIa = Ialpha */
+    *pIa = Ialpha;
+
+    /* Calculating pIb from Ialpha and Ibeta by equation pIb = -(1/2) * Ialpha + (sqrt(3)/2) * Ibeta */
+    *pIb = -0.5f * Ialpha + 0.8660254039f * Ibeta;
+  }
+
+
+/**
+  @brief  Inverse Clarke transform for Q31 version
+  @param[in]  Ialpha  input two-phase orthogonal vector axis alpha
+  @param[in]  Ibeta   input two-phase orthogonal vector axis beta
+  @param[out] pIa     points to output three-phase coordinate <code>a</code>
+  @param[out] pIb     points to output three-phase coordinate <code>b</code>
+  @return     none
+
+  \par Scaling and Overflow Behavior
+         The function is implemented using an internal 32-bit accumulator.
+         The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
+         There is saturation on the subtraction, hence there is no risk of overflow.
+ */
+__STATIC_FORCEINLINE void riscv_inv_clarke_q31(
+  q31_t Ialpha,
+  q31_t Ibeta,
+  q31_t * pIa,
+  q31_t * pIb)
+  {
+    q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
+
+    /* Calculating pIa from Ialpha by equation pIa = Ialpha */
+    *pIa = Ialpha;
+
+    /* Intermediate product is calculated by (1/(2*sqrt(3)) * Ia) */
+    product1 = (q31_t) (((q63_t) (Ialpha) * (0x40000000)) >> 31);
+
+    /* Intermediate product is calculated by (1/sqrt(3) * pIb) */
+    product2 = (q31_t) (((q63_t) (Ibeta) * (0x6ED9EBA1)) >> 31);
+
+    /* pIb is calculated by subtracting the products */
+    *pIb = __QSUB(product2, product1);
+  }
+
+  /**
+   * @} end of inv_clarke group
+   */
+
+
+
+  
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _CONTROLLER_FUNCTIONS_H_ */
--- a/components/nmsis/dsp/inc/dsp/controller_functions_f16.h
+++ b/components/nmsis/dsp/inc/dsp/controller_functions_f16.h
@ -0,0 +1,42 @@
+/******************************************************************************
+ * @file     controller_functions_f16.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _CONTROLLER_FUNCTIONS_F16_H_
+#define _CONTROLLER_FUNCTIONS_F16_H_
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#if defined(RISCV_FLOAT16_SUPPORTED)
+#endif /*defined(RISCV_FLOAT16_SUPPORTED)*/
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _CONTROLLER_FUNCTIONS_F16_H_ */
--- a/components/nmsis/dsp/inc/dsp/distance_functions.h
+++ b/components/nmsis/dsp/inc/dsp/distance_functions.h
@ -0,0 +1,293 @@
+/******************************************************************************
+ * @file     distance_functions.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _DISTANCE_FUNCTIONS_H_
+#define _DISTANCE_FUNCTIONS_H_
+
+#include "riscv_math_types.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#include "dsp/statistics_functions.h"
+#include "dsp/basic_math_functions.h"
+#include "dsp/fast_math_functions.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+
+/**
+ * @defgroup groupDistance Distance functions
+ *
+ * Distance functions for use with clustering algorithms.
+ * There are distance functions for float vectors and boolean vectors.
+ *
+ */
+
+/* 6.14 bug */
+
+/**
+ * @brief        Euclidean distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+float32_t riscv_euclidean_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);
+
+/**
+ * @brief        Bray-Curtis distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float32_t riscv_braycurtis_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);
+
+/**
+ * @brief        Canberra distance between two vectors
+ *
+ * This function may divide by zero when samples pA[i] and pB[i] are both zero.
+ * The result of the computation will be correct. So the division per zero may be
+ * ignored.
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float32_t riscv_canberra_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);
+
+
+/**
+ * @brief        Chebyshev distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float32_t riscv_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);
+
+
+/**
+ * @brief        Cityblock (Manhattan) distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float32_t riscv_cityblock_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);
+
+/**
+ * @brief        Correlation distance between two vectors
+ *
+ * The input vectors are modified in place !
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float32_t riscv_correlation_distance_f32(float32_t *pA,float32_t *pB, uint32_t blockSize);
+
+/**
+ * @brief        Cosine distance between two vectors
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+float32_t riscv_cosine_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);
+
+/**
+ * @brief        Jensen-Shannon distance between two vectors
+ *
+ * This function is assuming that elements of second vector are > 0
+ * and 0 only when the corresponding element of first vector is 0.
+ * Otherwise the result of the computation does not make sense
+ * and for speed reasons, the cases returning NaN or Infinity are not
+ * managed.
+ *
+ * When the function is computing x log (x / y) with x 0 and y 0,
+ * it will compute the right value (0) but a division per zero will occur
+ * and shoudl be ignored in client code.
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+float32_t riscv_jensenshannon_distance_f32(const float32_t *pA,const float32_t *pB,uint32_t blockSize);
+
+/**
+ * @brief        Minkowski distance between two vectors
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    n          Norm order (>= 2)
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+
+
+float32_t riscv_minkowski_distance_f32(const float32_t *pA,const float32_t *pB, int32_t order, uint32_t blockSize);
+
+/**
+ * @brief        Dice distance between two vectors
+ *
+ * @param[in]    pA              First vector of packed booleans
+ * @param[in]    pB              Second vector of packed booleans
+ * @param[in]    order           Distance order
+ * @param[in]    blockSize       Number of samples
+ * @return distance
+ *
+ */
+
+
+float32_t riscv_dice_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools);
+
+/**
+ * @brief        Hamming distance between two vectors
+ *
+ * @param[in]    pA              First vector of packed booleans
+ * @param[in]    pB              Second vector of packed booleans
+ * @param[in]    numberOfBools   Number of booleans
+ * @return distance
+ *
+ */
+
+float32_t riscv_hamming_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools);
+
+/**
+ * @brief        Jaccard distance between two vectors
+ *
+ * @param[in]    pA              First vector of packed booleans
+ * @param[in]    pB              Second vector of packed booleans
+ * @param[in]    numberOfBools   Number of booleans
+ * @return distance
+ *
+ */
+
+float32_t riscv_jaccard_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools);
+
+/**
+ * @brief        Kulsinski distance between two vectors
+ *
+ * @param[in]    pA              First vector of packed booleans
+ * @param[in]    pB              Second vector of packed booleans
+ * @param[in]    numberOfBools   Number of booleans
+ * @return distance
+ *
+ */
+
+float32_t riscv_kulsinski_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools);
+
+/**
+ * @brief        Roger Stanimoto distance between two vectors
+ *
+ * @param[in]    pA              First vector of packed booleans
+ * @param[in]    pB              Second vector of packed booleans
+ * @param[in]    numberOfBools   Number of booleans
+ * @return distance
+ *
+ */
+
+float32_t riscv_rogerstanimoto_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools);
+
+/**
+ * @brief        Russell-Rao distance between two vectors
+ *
+ * @param[in]    pA              First vector of packed booleans
+ * @param[in]    pB              Second vector of packed booleans
+ * @param[in]    numberOfBools   Number of booleans
+ * @return distance
+ *
+ */
+
+float32_t riscv_russellrao_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools);
+
+/**
+ * @brief        Sokal-Michener distance between two vectors
+ *
+ * @param[in]    pA              First vector of packed booleans
+ * @param[in]    pB              Second vector of packed booleans
+ * @param[in]    numberOfBools   Number of booleans
+ * @return distance
+ *
+ */
+
+float32_t riscv_sokalmichener_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools);
+
+/**
+ * @brief        Sokal-Sneath distance between two vectors
+ *
+ * @param[in]    pA              First vector of packed booleans
+ * @param[in]    pB              Second vector of packed booleans
+ * @param[in]    numberOfBools   Number of booleans
+ * @return distance
+ *
+ */
+
+float32_t riscv_sokalsneath_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools);
+
+/**
+ * @brief        Yule distance between two vectors
+ *
+ * @param[in]    pA              First vector of packed booleans
+ * @param[in]    pB              Second vector of packed booleans
+ * @param[in]    numberOfBools   Number of booleans
+ * @return distance
+ *
+ */
+
+float32_t riscv_yule_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools);
+
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _DISTANCE_FUNCTIONS_H_ */
--- a/components/nmsis/dsp/inc/dsp/distance_functions_f16.h
+++ b/components/nmsis/dsp/inc/dsp/distance_functions_f16.h
@ -0,0 +1,177 @@
+/******************************************************************************
+ * @file     distance_functions_f16.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _DISTANCE_FUNCTIONS_F16_H_
+#define _DISTANCE_FUNCTIONS_F16_H_
+
+#include "riscv_math_types_f16.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+/* 6.14 bug */
+
+#include "dsp/statistics_functions_f16.h"
+#include "dsp/basic_math_functions_f16.h"
+
+#include "dsp/fast_math_functions_f16.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#if defined(RISCV_FLOAT16_SUPPORTED)
+
+/**
+ * @brief        Euclidean distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+float16_t riscv_euclidean_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
+
+/**
+ * @brief        Bray-Curtis distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float16_t riscv_braycurtis_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
+
+/**
+ * @brief        Canberra distance between two vectors
+ *
+ * This function may divide by zero when samples pA[i] and pB[i] are both zero.
+ * The result of the computation will be correct. So the division per zero may be
+ * ignored.
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float16_t riscv_canberra_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
+
+
+/**
+ * @brief        Chebyshev distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float16_t riscv_chebyshev_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
+
+
+/**
+ * @brief        Cityblock (Manhattan) distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float16_t riscv_cityblock_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
+
+/**
+ * @brief        Correlation distance between two vectors
+ *
+ * The input vectors are modified in place !
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float16_t riscv_correlation_distance_f16(float16_t *pA,float16_t *pB, uint32_t blockSize);
+
+/**
+ * @brief        Cosine distance between two vectors
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+float16_t riscv_cosine_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
+
+/**
+ * @brief        Jensen-Shannon distance between two vectors
+ *
+ * This function is assuming that elements of second vector are > 0
+ * and 0 only when the corresponding element of first vector is 0.
+ * Otherwise the result of the computation does not make sense
+ * and for speed reasons, the cases returning NaN or Infinity are not
+ * managed.
+ *
+ * When the function is computing x log (x / y) with x 0 and y 0,
+ * it will compute the right value (0) but a division per zero will occur
+ * and shoudl be ignored in client code.
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+float16_t riscv_jensenshannon_distance_f16(const float16_t *pA,const float16_t *pB,uint32_t blockSize);
+
+/**
+ * @brief        Minkowski distance between two vectors
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    n          Norm order (>= 2)
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+
+
+float16_t riscv_minkowski_distance_f16(const float16_t *pA,const float16_t *pB, int32_t order, uint32_t blockSize);
+
+
+#endif /*defined(RISCV_FLOAT16_SUPPORTED)*/
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _DISTANCE_FUNCTIONS_F16_H_ */
--- a/components/nmsis/dsp/inc/dsp/fast_math_functions.h
+++ b/components/nmsis/dsp/inc/dsp/fast_math_functions.h
@ -0,0 +1,296 @@
+/******************************************************************************
+ * @file     fast_math_functions.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#ifndef _FAST_MATH_FUNCTIONS_H_
+#define _FAST_MATH_FUNCTIONS_H_
+
+#include "riscv_math_types.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+  /**
+   * @brief Macros required for SINE and COSINE Fast math approximations
+   */
+
+#define FAST_MATH_TABLE_SIZE  512
+#define FAST_MATH_Q31_SHIFT   (32 - 10)
+#define FAST_MATH_Q15_SHIFT   (16 - 10)
+
+#ifndef PI
+  #define PI               3.14159265358979f
+#endif
+
+
+/**
+ * @defgroup groupFastMath Fast Math Functions
+ * This set of functions provides a fast approximation to sine, cosine, and square root.
+ * As compared to most of the other functions in the NMSIS math library, the fast math functions
+ * operate on individual values and not arrays.
+ * There are separate functions for Q15, Q31, and floating-point data.
+ *
+ */
+
+  /**
+   * @ingroup groupFastMath
+   */
+
+
+/**
+  @addtogroup sin
+  @{
+ */
+
+/**
+   * @brief  Fast approximation to the trigonometric sine function for floating-point data.
+   * @param[in] x  input value in radians.
+   * @return  sin(x).
+   */
+  float32_t riscv_sin_f32(
+  float32_t x);
+
+
+  /**
+   * @brief  Fast approximation to the trigonometric sine function for Q31 data.
+   * @param[in] x  Scaled input value in radians.
+   * @return  sin(x).
+   */
+  q31_t riscv_sin_q31(
+  q31_t x);
+
+
+  /**
+   * @brief  Fast approximation to the trigonometric sine function for Q15 data.
+   * @param[in] x  Scaled input value in radians.
+   * @return  sin(x).
+   */
+  q15_t riscv_sin_q15(
+  q15_t x);
+
+/**
+  @} end of sin group
+ */
+
+/**
+  @addtogroup cos
+  @{
+ */
+
+  /**
+   * @brief  Fast approximation to the trigonometric cosine function for floating-point data.
+   * @param[in] x  input value in radians.
+   * @return  cos(x).
+   */
+  float32_t riscv_cos_f32(
+  float32_t x);
+
+
+  /**
+   * @brief Fast approximation to the trigonometric cosine function for Q31 data.
+   * @param[in] x  Scaled input value in radians.
+   * @return  cos(x).
+   */
+  q31_t riscv_cos_q31(
+  q31_t x);
+
+
+  /**
+   * @brief  Fast approximation to the trigonometric cosine function for Q15 data.
+   * @param[in] x  Scaled input value in radians.
+   * @return  cos(x).
+   */
+  q15_t riscv_cos_q15(
+  q15_t x);
+
+/**
+  @} end of cos group
+ */
+
+
+/**
+  @brief         Floating-point vector of log values.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+  void riscv_vlog_f32(
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+/**
+  @brief         Floating-point vector of exp values.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+  void riscv_vexp_f32(
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+ /**
+   * @defgroup SQRT Square Root
+   *
+   * Computes the square root of a number.
+   * There are separate functions for Q15, Q31, and floating-point data types.
+   * The square root function is computed using the Newton-Raphson algorithm.
+   * This is an iterative algorithm of the form:
+   * <pre>
+   *      x1 = x0 - f(x0)/f'(x0)
+   * </pre>
+   * where <code>x1</code> is the current estimate,
+   * <code>x0</code> is the previous estimate, and
+   * <code>f'(x0)</code> is the derivative of <code>f()</code> evaluated at <code>x0</code>.
+   * For the square root function, the algorithm reduces to:
+   * <pre>
+   *     x0 = in/2                         [initial guess]
+   *     x1 = 1/2 * ( x0 + in / x0)        [each iteration]
+   * </pre>
+   */
+
+
+  /**
+   * @addtogroup SQRT
+   * @{
+   */
+
+/**
+  @brief         Floating-point square root function.
+  @param[in]     in    input value
+  @param[out]    pOut  square root of input value
+  @return        execution status
+                   - \ref RISCV_MATH_SUCCESS        : input value is positive
+                   - \ref RISCV_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
+ */
+__STATIC_FORCEINLINE riscv_status riscv_sqrt_f32(
+  float32_t in,
+  float32_t * pOut)
+  {
+    if (in >= 0.0f)
+    {
+
+#if defined ( __RISCV_FLEN )
+      __ASM volatile("fsqrt.s %0, %1" : "=f"(*pOut) : "f"(in));
+
+#else
+      *pOut = sqrtf(in);
+#endif
+
+      return (RISCV_MATH_SUCCESS);
+    }
+    else
+    {
+      *pOut = 0.0f;
+      return (RISCV_MATH_ARGUMENT_ERROR);
+    }
+  }
+
+
+/**
+  @brief         Q31 square root function.
+  @param[in]     in    input value.  The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF
+  @param[out]    pOut  points to square root of input value
+  @return        execution status
+                   - \ref RISCV_MATH_SUCCESS        : input value is positive
+                   - \ref RISCV_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
+ */
+riscv_status riscv_sqrt_q31(
+  q31_t in,
+  q31_t * pOut);
+
+
+/**
+  @brief         Q15 square root function.
+  @param[in]     in    input value.  The range of the input value is [0 +1) or 0x0000 to 0x7FFF
+  @param[out]    pOut  points to square root of input value
+  @return        execution status
+                   - \ref RISCV_MATH_SUCCESS        : input value is positive
+                   - \ref RISCV_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
+ */
+riscv_status riscv_sqrt_q15(
+  q15_t in,
+  q15_t * pOut);
+
+  /**
+   * @brief  Vector Floating-point square root function.
+   * @param[in]  pIn   input vector.
+   * @param[out] pOut  vector of square roots of input elements.
+   * @param[in]  len   length of input vector.
+   * @return The function returns RISCV_MATH_SUCCESS if input value is positive value or RISCV_MATH_ARGUMENT_ERROR if
+   * <code>in</code> is negative value and returns zero output for negative values.
+   */
+  void riscv_vsqrt_f32(
+  float32_t * pIn,
+  float32_t * pOut,
+  uint16_t len);
+
+  void riscv_vsqrt_q31(
+  q31_t * pIn,
+  q31_t * pOut,
+  uint16_t len);
+
+  void riscv_vsqrt_q15(
+  q15_t * pIn,
+  q15_t * pOut,
+  uint16_t len);
+
+  /**
+   * @} end of SQRT group
+   */
+
+  /**
+  @brief         Fixed point division
+  @param[in]     numerator    Numerator
+  @param[in]     denominator  Denominator
+  @param[out]    quotient     Quotient value normalized between -1.0 and 1.0
+  @param[out]    shift        Shift left value to get the unnormalized quotient
+  @return        error status
+
+  When dividing by 0, an error RISCV_MATH_NANINF is returned. And the quotient is forced
+  to the saturated negative or positive value.
+ */
+
+riscv_status riscv_divide_q15(q15_t numerator,
+  q15_t denominator,
+  q15_t *quotient,
+  int16_t *shift);
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _FAST_MATH_FUNCTIONS_H_ */
--- a/components/nmsis/dsp/inc/dsp/fast_math_functions_f16.h
+++ b/components/nmsis/dsp/inc/dsp/fast_math_functions_f16.h
@ -0,0 +1,117 @@
+/******************************************************************************
+ * @file     fast_math_functions_f16.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _FAST_MATH_FUNCTIONS_F16_H_
+#define _FAST_MATH_FUNCTIONS_F16_H_
+
+#include "riscv_math_types_f16.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+/* For sqrt_f32 */
+#include "dsp/fast_math_functions.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#if defined(RISCV_FLOAT16_SUPPORTED)
+
+ /**
+   * @addtogroup SQRT
+   * @{
+   */
+
+/**
+  @brief         Floating-point square root function.
+  @param[in]     in    input value
+  @param[out]    pOut  square root of input value
+  @return        execution status
+                   - \ref RISCV_MATH_SUCCESS        : input value is positive
+                   - \ref RISCV_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
+ */
+__STATIC_FORCEINLINE riscv_status riscv_sqrt_f16(
+  float16_t in,
+  float16_t * pOut)
+  {
+    float32_t r;
+    riscv_status status;
+    status=riscv_sqrt_f32((float32_t)in,&r);
+    *pOut=(float16_t)r;
+    return(status);
+  }
+
+
+/**
+  @} end of SQRT group
+ */
+  
+/**
+  @brief         Floating-point vector of log values.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+  void riscv_vlog_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+/**
+  @brief         Floating-point vector of exp values.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+  void riscv_vexp_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+  /**
+  @brief         Floating-point vector of inverse values.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+  void riscv_vinverse_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+#endif /*defined(RISCV_FLOAT16_SUPPORTED)*/
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _FAST_MATH_FUNCTIONS_F16_H_ */
--- a/components/nmsis/dsp/inc/dsp/filtering_functions.h
+++ b/components/nmsis/dsp/inc/dsp/filtering_functions.h
--- a/components/nmsis/dsp/inc/dsp/filtering_functions_f16.h
+++ b/components/nmsis/dsp/inc/dsp/filtering_functions_f16.h
@ -0,0 +1,221 @@
+/******************************************************************************
+ * @file     filtering_functions_f16.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _FILTERING_FUNCTIONS_F16_H_
+#define _FILTERING_FUNCTIONS_F16_H_
+
+#include "riscv_math_types_f16.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#if defined(RISCV_FLOAT16_SUPPORTED)
+
+ /**
+   * @brief Instance structure for the floating-point FIR filter.
+   */
+  typedef struct
+  {
+          uint16_t numTaps;     /**< number of filter coefficients in the filter. */
+          float16_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    const float16_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
+  } riscv_fir_instance_f16;
+
+  /**
+   * @brief  Initialization function for the floating-point FIR filter.
+   * @param[in,out] S          points to an instance of the floating-point FIR filter structure.
+   * @param[in]     numTaps    Number of filter coefficients in the filter.
+   * @param[in]     pCoeffs    points to the filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   * @param[in]     blockSize  number of samples that are processed at a time.
+   */
+  void riscv_fir_init_f16(
+        riscv_fir_instance_f16 * S,
+        uint16_t numTaps,
+  const float16_t * pCoeffs,
+        float16_t * pState,
+        uint32_t blockSize);
+
+  /**
+   * @brief Processing function for the floating-point FIR filter.
+   * @param[in]  S          points to an instance of the floating-point FIR structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
+   */
+  void riscv_fir_f16(
+  const riscv_fir_instance_f16 * S,
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief Instance structure for the floating-point Biquad cascade filter.
+   */
+  typedef struct
+  {
+          uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+          float16_t *pState;       /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
+    const float16_t *pCoeffs;      /**< Points to the array of coefficients.  The array is of length 5*numStages. */
+  } riscv_biquad_casd_df1_inst_f16;
+
+
+  /**
+   * @brief Processing function for the floating-point Biquad cascade filter.
+   * @param[in]  S          points to an instance of the floating-point Biquad cascade structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
+   */
+  void riscv_biquad_cascade_df1_f16(
+  const riscv_biquad_casd_df1_inst_f16 * S,
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+
+  void riscv_biquad_cascade_df1_init_f16(
+        riscv_biquad_casd_df1_inst_f16 * S,
+        uint8_t numStages,
+  const float16_t * pCoeffs,
+        float16_t * pState);
+
+  /**
+   * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
+   */
+  typedef struct
+  {
+          uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+          float16_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
+    const float16_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
+  } riscv_biquad_cascade_df2T_instance_f16;
+
+  /**
+   * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
+   */
+  typedef struct
+  {
+          uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+          float16_t *pState;         /**< points to the array of state coefficients.  The array is of length 4*numStages. */
+    const float16_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
+  } riscv_biquad_cascade_stereo_df2T_instance_f16;
+
+  /**
+   * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
+   * @param[in]  S          points to an instance of the filter data structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data
+   * @param[in]  blockSize  number of samples to process.
+   */
+  void riscv_biquad_cascade_df2T_f16(
+  const riscv_biquad_cascade_df2T_instance_f16 * S,
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+  /**
+   * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter. 2 channels
+   * @param[in]  S          points to an instance of the filter data structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data
+   * @param[in]  blockSize  number of samples to process.
+   */
+  void riscv_biquad_cascade_stereo_df2T_f16(
+  const riscv_biquad_cascade_stereo_df2T_instance_f16 * S,
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+  /**
+   * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
+   * @param[in,out] S          points to an instance of the filter data structure.
+   * @param[in]     numStages  number of 2nd order stages in the filter.
+   * @param[in]     pCoeffs    points to the filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   */
+  void riscv_biquad_cascade_df2T_init_f16(
+        riscv_biquad_cascade_df2T_instance_f16 * S,
+        uint8_t numStages,
+  const float16_t * pCoeffs,
+        float16_t * pState);
+
+  /**
+   * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
+   * @param[in,out] S          points to an instance of the filter data structure.
+   * @param[in]     numStages  number of 2nd order stages in the filter.
+   * @param[in]     pCoeffs    points to the filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   */
+  void riscv_biquad_cascade_stereo_df2T_init_f16(
+        riscv_biquad_cascade_stereo_df2T_instance_f16 * S,
+        uint8_t numStages,
+  const float16_t * pCoeffs,
+        float16_t * pState);
+
+  /**
+   * @brief Correlation of floating-point sequences.
+   * @param[in]  pSrcA    points to the first input sequence.
+   * @param[in]  srcALen  length of the first input sequence.
+   * @param[in]  pSrcB    points to the second input sequence.
+   * @param[in]  srcBLen  length of the second input sequence.
+   * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
+   */
+  void riscv_correlate_f16(
+  const float16_t * pSrcA,
+        uint32_t srcALen,
+  const float16_t * pSrcB,
+        uint32_t srcBLen,
+        float16_t * pDst);
+
+
+/**
+  @brief         Levinson Durbin
+  @param[in]     phi      autocovariance vector starting with lag 0 (length is nbCoefs + 1)
+  @param[out]    a        autoregressive coefficients
+  @param[out]    err      prediction error (variance)
+  @param[in]     nbCoefs  number of autoregressive coefficients
+  @return        none
+ */
+void riscv_levinson_durbin_f16(const float16_t *phi,
+  float16_t *a, 
+  float16_t *err,
+  int nbCoefs);
+
+#endif /*defined(RISCV_FLOAT16_SUPPORTED)*/
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _FILTERING_FUNCTIONS_F16_H_ */
--- a/components/nmsis/dsp/inc/dsp/interpolation_functions.h
+++ b/components/nmsis/dsp/inc/dsp/interpolation_functions.h
@ -0,0 +1,320 @@
+/******************************************************************************
+ * @file     interpolation_functions.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _INTERPOLATION_FUNCTIONS_H_
+#define _INTERPOLATION_FUNCTIONS_H_
+
+#include "riscv_math_types.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+
+/**
+ * @defgroup groupInterpolation Interpolation Functions
+ * These functions perform 1- and 2-dimensional interpolation of data.
+ * Linear interpolation is used for 1-dimensional data and
+ * bilinear interpolation is used for 2-dimensional data.
+ */
+
+
+  /**
+   * @brief Instance structure for the floating-point Linear Interpolate function.
+   */
+  typedef struct
+  {
+          uint32_t nValues;           /**< nValues */
+          float32_t x1;               /**< x1 */
+          float32_t xSpacing;         /**< xSpacing */
+          float32_t *pYData;          /**< pointer to the table of Y values */
+  } riscv_linear_interp_instance_f32;
+
+  /**
+   * @brief Instance structure for the floating-point bilinear interpolation function.
+   */
+  typedef struct
+  {
+          uint16_t numRows;   /**< number of rows in the data table. */
+          uint16_t numCols;   /**< number of columns in the data table. */
+          float32_t *pData;   /**< points to the data table. */
+  } riscv_bilinear_interp_instance_f32;
+
+   /**
+   * @brief Instance structure for the Q31 bilinear interpolation function.
+   */
+  typedef struct
+  {
+          uint16_t numRows;   /**< number of rows in the data table. */
+          uint16_t numCols;   /**< number of columns in the data table. */
+          q31_t *pData;       /**< points to the data table. */
+  } riscv_bilinear_interp_instance_q31;
+
+   /**
+   * @brief Instance structure for the Q15 bilinear interpolation function.
+   */
+  typedef struct
+  {
+          uint16_t numRows;   /**< number of rows in the data table. */
+          uint16_t numCols;   /**< number of columns in the data table. */
+          q15_t *pData;       /**< points to the data table. */
+  } riscv_bilinear_interp_instance_q15;
+
+   /**
+   * @brief Instance structure for the Q15 bilinear interpolation function.
+   */
+  typedef struct
+  {
+          uint16_t numRows;   /**< number of rows in the data table. */
+          uint16_t numCols;   /**< number of columns in the data table. */
+          q7_t *pData;        /**< points to the data table. */
+  } riscv_bilinear_interp_instance_q7;
+
+
+  /**
+   * @brief Struct for specifying cubic spline type
+   */
+  typedef enum
+  {
+    RISCV_SPLINE_NATURAL = 0,           /**< Natural spline */
+    RISCV_SPLINE_PARABOLIC_RUNOUT = 1   /**< Parabolic runout spline */
+  } riscv_spline_type;
+
+  /**
+   * @brief Instance structure for the floating-point cubic spline interpolation.
+   */
+  typedef struct
+  {
+    riscv_spline_type type;      /**< Type (boundary conditions) */
+    const float32_t * x;       /**< x values */
+    const float32_t * y;       /**< y values */
+    uint32_t n_x;              /**< Number of known data points */
+    float32_t * coeffs;        /**< Coefficients buffer (b,c, and d) */
+  } riscv_spline_instance_f32;
+
+
+
+
+  /**
+   * @ingroup groupInterpolation
+   */
+
+  /**
+   * @addtogroup SplineInterpolate
+   * @{
+   */
+
+  
+  /**
+   * @brief Processing function for the floating-point cubic spline interpolation.
+   * @param[in]  S          points to an instance of the floating-point spline structure.
+   * @param[in]  xq         points to the x values ot the interpolated data points.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of samples of output data.
+   */
+  void riscv_spline_f32(
+        riscv_spline_instance_f32 * S, 
+  const float32_t * xq,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+  /**
+   * @brief Initialization function for the floating-point cubic spline interpolation.
+   * @param[in,out] S        points to an instance of the floating-point spline structure.
+   * @param[in]     type     type of cubic spline interpolation (boundary conditions)
+   * @param[in]     x        points to the x values of the known data points.
+   * @param[in]     y        points to the y values of the known data points.
+   * @param[in]     n        number of known data points.
+   * @param[in]     coeffs   coefficients array for b, c, and d
+   * @param[in]     tempBuffer   buffer array for internal computations
+   */
+  void riscv_spline_init_f32(
+          riscv_spline_instance_f32 * S,
+          riscv_spline_type type,
+    const float32_t * x,
+    const float32_t * y,
+          uint32_t n, 
+          float32_t * coeffs,
+          float32_t * tempBuffer);
+
+
+  /**
+   * @} end of SplineInterpolate group
+   */
+
+
+  
+  /**
+   * @addtogroup LinearInterpolate
+   * @{
+   */
+
+    /**
+   * @brief  Process function for the floating-point Linear Interpolation Function.
+   * @param[in,out] S  is an instance of the floating-point Linear Interpolation structure
+   * @param[in]     x  input sample to process
+   * @return y processed output sample.
+   *
+   */
+  float32_t riscv_linear_interp_f32(
+  riscv_linear_interp_instance_f32 * S,
+  float32_t x);
+
+   /**
+   *
+   * @brief  Process function for the Q31 Linear Interpolation Function.
+   * @param[in] pYData   pointer to Q31 Linear Interpolation table
+   * @param[in] x        input sample to process
+   * @param[in] nValues  number of table values
+   * @return y processed output sample.
+   *
+   * \par
+   * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
+   * This function can support maximum of table size 2^12.
+   *
+   */
+  q31_t riscv_linear_interp_q31(
+  q31_t * pYData,
+  q31_t x,
+  uint32_t nValues);
+
+  /**
+   *
+   * @brief  Process function for the Q15 Linear Interpolation Function.
+   * @param[in] pYData   pointer to Q15 Linear Interpolation table
+   * @param[in] x        input sample to process
+   * @param[in] nValues  number of table values
+   * @return y processed output sample.
+   *
+   * \par
+   * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
+   * This function can support maximum of table size 2^12.
+   *
+   */
+  q15_t riscv_linear_interp_q15(
+  q15_t * pYData,
+  q31_t x,
+  uint32_t nValues);
+
+  /**
+   *
+   * @brief  Process function for the Q7 Linear Interpolation Function.
+   * @param[in] pYData   pointer to Q7 Linear Interpolation table
+   * @param[in] x        input sample to process
+   * @param[in] nValues  number of table values
+   * @return y processed output sample.
+   *
+   * \par
+   * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
+   * This function can support maximum of table size 2^12.
+   */
+q7_t riscv_linear_interp_q7(
+  q7_t * pYData,
+  q31_t x,
+  uint32_t nValues);
+
+  /**
+   * @} end of LinearInterpolate group
+   */
+
+  
+
+
+  /**
+   * @ingroup groupInterpolation
+   */
+
+
+  /**
+   * @addtogroup BilinearInterpolate
+   * @{
+   */
+
+  /**
+  * @brief  Floating-point bilinear interpolation.
+  * @param[in,out] S  points to an instance of the interpolation structure.
+  * @param[in]     X  interpolation coordinate.
+  * @param[in]     Y  interpolation coordinate.
+  * @return out interpolated value.
+  */
+  float32_t riscv_bilinear_interp_f32(
+  const riscv_bilinear_interp_instance_f32 * S,
+  float32_t X,
+  float32_t Y);
+
+  /**
+  * @brief  Q31 bilinear interpolation.
+  * @param[in,out] S  points to an instance of the interpolation structure.
+  * @param[in]     X  interpolation coordinate in 12.20 format.
+  * @param[in]     Y  interpolation coordinate in 12.20 format.
+  * @return out interpolated value.
+  */
+  q31_t riscv_bilinear_interp_q31(
+  riscv_bilinear_interp_instance_q31 * S,
+  q31_t X,
+  q31_t Y);
+
+
+  /**
+  * @brief  Q15 bilinear interpolation.
+  * @param[in,out] S  points to an instance of the interpolation structure.
+  * @param[in]     X  interpolation coordinate in 12.20 format.
+  * @param[in]     Y  interpolation coordinate in 12.20 format.
+  * @return out interpolated value.
+  */
+  q15_t riscv_bilinear_interp_q15(
+  riscv_bilinear_interp_instance_q15 * S,
+  q31_t X,
+  q31_t Y);
+
+  /**
+  * @brief  Q7 bilinear interpolation.
+  * @param[in,out] S  points to an instance of the interpolation structure.
+  * @param[in]     X  interpolation coordinate in 12.20 format.
+  * @param[in]     Y  interpolation coordinate in 12.20 format.
+  * @return out interpolated value.
+  */
+  q7_t riscv_bilinear_interp_q7(
+  riscv_bilinear_interp_instance_q7 * S,
+  q31_t X,
+  q31_t Y);
+  /**
+   * @} end of BilinearInterpolate group
+   */
+
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _INTERPOLATION_FUNCTIONS_H_ */
--- a/components/nmsis/dsp/inc/dsp/interpolation_functions_f16.h
+++ b/components/nmsis/dsp/inc/dsp/interpolation_functions_f16.h
@ -0,0 +1,108 @@
+/******************************************************************************
+ * @file     interpolation_functions_f16.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _INTERPOLATION_FUNCTIONS_F16_H_
+#define _INTERPOLATION_FUNCTIONS_F16_H_
+
+#include "riscv_math_types_f16.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#if defined(RISCV_FLOAT16_SUPPORTED)
+
+typedef struct
+{
+    uint32_t  nValues;        /**< nValues */
+    float16_t x1;             /**< x1 */
+    float16_t xSpacing;       /**< xSpacing */
+    float16_t *pYData;        /**< pointer to the table of Y values */
+} riscv_linear_interp_instance_f16;
+
+/**
+ * @brief Instance structure for the floating-point bilinear interpolation function.
+ */
+typedef struct
+{
+    uint16_t  numRows;/**< number of rows in the data table. */
+    uint16_t  numCols;/**< number of columns in the data table. */
+    float16_t *pData; /**< points to the data table. */
+} riscv_bilinear_interp_instance_f16;
+
+  /**
+   * @addtogroup LinearInterpolate
+   * @{
+   */
+
+    /**
+   * @brief  Process function for the floating-point Linear Interpolation Function.
+   * @param[in,out] S  is an instance of the floating-point Linear Interpolation structure
+   * @param[in]     x  input sample to process
+   * @return y processed output sample.
+   *
+   */
+  float16_t riscv_linear_interp_f16(
+  riscv_linear_interp_instance_f16 * S,
+  float16_t x);
+
+    /**
+   * @} end of LinearInterpolate group
+   */
+
+/**
+   * @addtogroup BilinearInterpolate
+   * @{
+   */
+
+  /**
+  * @brief  Floating-point bilinear interpolation.
+  * @param[in,out] S  points to an instance of the interpolation structure.
+  * @param[in]     X  interpolation coordinate.
+  * @param[in]     Y  interpolation coordinate.
+  * @return out interpolated value.
+  */
+  float16_t riscv_bilinear_interp_f16(
+  const riscv_bilinear_interp_instance_f16 * S,
+  float16_t X,
+  float16_t Y);
+
+
+  /**
+   * @} end of BilinearInterpolate group
+   */
+#endif /*defined(RISCV_FLOAT16_SUPPORTED)*/
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _INTERPOLATION_FUNCTIONS_F16_H_ */
--- a/components/nmsis/dsp/inc/dsp/matrix_functions.h
+++ b/components/nmsis/dsp/inc/dsp/matrix_functions.h
@ -0,0 +1,743 @@
+/******************************************************************************
+ * @file     matrix_functions.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _MATRIX_FUNCTIONS_H_
+#define _MATRIX_FUNCTIONS_H_
+
+#include "riscv_math_types.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * @defgroup groupMatrix Matrix Functions
+ *
+ * This set of functions provides basic matrix math operations.
+ * The functions operate on matrix data structures.  For example,
+ * the type
+ * definition for the floating-point matrix structure is shown
+ * below:
+ * <pre>
+ *     typedef struct
+ *     {
+ *       uint16_t numRows;     // number of rows of the matrix.
+ *       uint16_t numCols;     // number of columns of the matrix.
+ *       float32_t *pData;     // points to the data of the matrix.
+ *     } riscv_matrix_instance_f32;
+ * </pre>
+ * There are similar definitions for Q15 and Q31 data types.
+ *
+ * The structure specifies the size of the matrix and then points to
+ * an array of data.  The array is of size <code>numRows X numCols</code>
+ * and the values are arranged in row order.  That is, the
+ * matrix element (i, j) is stored at:
+ * <pre>
+ *     pData[i*numCols + j]
+ * </pre>
+ *
+ * \par Init Functions
+ * There is an associated initialization function for each type of matrix
+ * data structure.
+ * The initialization function sets the values of the internal structure fields.
+ * Refer to \ref riscv_mat_init_f32(), \ref riscv_mat_init_q31() and \ref riscv_mat_init_q15()
+ * for floating-point, Q31 and Q15 types,  respectively.
+ *
+ * \par
+ * Use of the initialization function is optional. However, if initialization function is used
+ * then the instance structure cannot be placed into a const data section.
+ * To place the instance structure in a const data
+ * section, manually initialize the data structure.  For example:
+ * <pre>
+ * <code>riscv_matrix_instance_f32 S = {nRows, nColumns, pData};</code>
+ * <code>riscv_matrix_instance_q31 S = {nRows, nColumns, pData};</code>
+ * <code>riscv_matrix_instance_q15 S = {nRows, nColumns, pData};</code>
+ * </pre>
+ * where <code>nRows</code> specifies the number of rows, <code>nColumns</code>
+ * specifies the number of columns, and <code>pData</code> points to the
+ * data array.
+ *
+ * \par Size Checking
+ * By default all of the matrix functions perform size checking on the input and
+ * output matrices. For example, the matrix addition function verifies that the
+ * two input matrices and the output matrix all have the same number of rows and
+ * columns. If the size check fails the functions return:
+ * <pre>
+ *     RISCV_MATH_SIZE_MISMATCH
+ * </pre>
+ * Otherwise the functions return
+ * <pre>
+ *     RISCV_MATH_SUCCESS
+ * </pre>
+ * There is some overhead associated with this matrix size checking.
+ * The matrix size checking is enabled via the \#define
+ * <pre>
+ *     RISCV_MATH_MATRIX_CHECK
+ * </pre>
+ * within the library project settings.  By default this macro is defined
+ * and size checking is enabled. By changing the project settings and
+ * undefining this macro size checking is eliminated and the functions
+ * run a bit faster. With size checking disabled the functions always
+ * return <code>RISCV_MATH_SUCCESS</code>.
+ */
+
+  /**
+   * @brief Instance structure for the floating-point matrix structure.
+   */
+  typedef struct
+  {
+    uint16_t numRows;     /**< number of rows of the matrix.     */
+    uint16_t numCols;     /**< number of columns of the matrix.  */
+    float32_t *pData;     /**< points to the data of the matrix. */
+  } riscv_matrix_instance_f32;
+ 
+ /**
+   * @brief Instance structure for the floating-point matrix structure.
+   */
+  typedef struct
+  {
+    uint16_t numRows;     /**< number of rows of the matrix.     */
+    uint16_t numCols;     /**< number of columns of the matrix.  */
+    float64_t *pData;     /**< points to the data of the matrix. */
+  } riscv_matrix_instance_f64;
+
+ /**
+   * @brief Instance structure for the Q7 matrix structure.
+   */
+  typedef struct
+  {
+    uint16_t numRows;     /**< number of rows of the matrix.     */
+    uint16_t numCols;     /**< number of columns of the matrix.  */
+    q7_t *pData;         /**< points to the data of the matrix. */
+  } riscv_matrix_instance_q7;
+
+  /**
+   * @brief Instance structure for the Q15 matrix structure.
+   */
+  typedef struct
+  {
+    uint16_t numRows;     /**< number of rows of the matrix.     */
+    uint16_t numCols;     /**< number of columns of the matrix.  */
+    q15_t *pData;         /**< points to the data of the matrix. */
+  } riscv_matrix_instance_q15;
+
+  /**
+   * @brief Instance structure for the Q31 matrix structure.
+   */
+  typedef struct
+  {
+    uint16_t numRows;     /**< number of rows of the matrix.     */
+    uint16_t numCols;     /**< number of columns of the matrix.  */
+    q31_t *pData;         /**< points to the data of the matrix. */
+  } riscv_matrix_instance_q31;
+
+  /**
+   * @brief Floating-point matrix addition.
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_add_f32(
+  const riscv_matrix_instance_f32 * pSrcA,
+  const riscv_matrix_instance_f32 * pSrcB,
+        riscv_matrix_instance_f32 * pDst);
+
+  /**
+   * @brief Q15 matrix addition.
+   * @param[in]   pSrcA  points to the first input matrix structure
+   * @param[in]   pSrcB  points to the second input matrix structure
+   * @param[out]  pDst   points to output matrix structure
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_add_q15(
+  const riscv_matrix_instance_q15 * pSrcA,
+  const riscv_matrix_instance_q15 * pSrcB,
+        riscv_matrix_instance_q15 * pDst);
+
+  /**
+   * @brief Q31 matrix addition.
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_add_q31(
+  const riscv_matrix_instance_q31 * pSrcA,
+  const riscv_matrix_instance_q31 * pSrcB,
+        riscv_matrix_instance_q31 * pDst);
+
+  /**
+   * @brief Floating-point, complex, matrix multiplication.
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_cmplx_mult_f32(
+  const riscv_matrix_instance_f32 * pSrcA,
+  const riscv_matrix_instance_f32 * pSrcB,
+        riscv_matrix_instance_f32 * pDst);
+
+  /**
+   * @brief Q15, complex,  matrix multiplication.
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_cmplx_mult_q15(
+  const riscv_matrix_instance_q15 * pSrcA,
+  const riscv_matrix_instance_q15 * pSrcB,
+        riscv_matrix_instance_q15 * pDst,
+        q15_t * pScratch);
+
+  /**
+   * @brief Q31, complex, matrix multiplication.
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_cmplx_mult_q31(
+  const riscv_matrix_instance_q31 * pSrcA,
+  const riscv_matrix_instance_q31 * pSrcB,
+        riscv_matrix_instance_q31 * pDst);
+
+  /**
+   * @brief Floating-point matrix transpose.
+   * @param[in]  pSrc  points to the input matrix
+   * @param[out] pDst  points to the output matrix
+   * @return    The function returns either  <code>RISCV_MATH_SIZE_MISMATCH</code>
+   * or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_trans_f32(
+  const riscv_matrix_instance_f32 * pSrc,
+        riscv_matrix_instance_f32 * pDst);
+
+/**
+   * @brief Floating-point matrix transpose.
+   * @param[in]  pSrc  points to the input matrix
+   * @param[out] pDst  points to the output matrix
+   * @return    The function returns either  <code>RISCV_MATH_SIZE_MISMATCH</code>
+   * or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_trans_f64(
+  const riscv_matrix_instance_f64 * pSrc,
+        riscv_matrix_instance_f64 * pDst);
+
+  /**
+   * @brief Floating-point complex matrix transpose.
+   * @param[in]  pSrc  points to the input matrix
+   * @param[out] pDst  points to the output matrix
+   * @return    The function returns either  <code>RISCV_MATH_SIZE_MISMATCH</code>
+   * or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_cmplx_trans_f32(
+  const riscv_matrix_instance_f32 * pSrc,
+  riscv_matrix_instance_f32 * pDst);
+
+
+  /**
+   * @brief Q15 matrix transpose.
+   * @param[in]  pSrc  points to the input matrix
+   * @param[out] pDst  points to the output matrix
+   * @return    The function returns either  <code>RISCV_MATH_SIZE_MISMATCH</code>
+   * or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_trans_q15(
+  const riscv_matrix_instance_q15 * pSrc,
+        riscv_matrix_instance_q15 * pDst);
+
+  /**
+   * @brief Q15 complex matrix transpose.
+   * @param[in]  pSrc  points to the input matrix
+   * @param[out] pDst  points to the output matrix
+   * @return    The function returns either  <code>RISCV_MATH_SIZE_MISMATCH</code>
+   * or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_cmplx_trans_q15(
+  const riscv_matrix_instance_q15 * pSrc,
+  riscv_matrix_instance_q15 * pDst);
+
+  /**
+   * @brief Q7 matrix transpose.
+   * @param[in]  pSrc  points to the input matrix
+   * @param[out] pDst  points to the output matrix
+   * @return    The function returns either  <code>RISCV_MATH_SIZE_MISMATCH</code>
+   * or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_trans_q7(
+  const riscv_matrix_instance_q7 * pSrc,
+        riscv_matrix_instance_q7 * pDst);
+
+  /**
+   * @brief Q31 matrix transpose.
+   * @param[in]  pSrc  points to the input matrix
+   * @param[out] pDst  points to the output matrix
+   * @return    The function returns either  <code>RISCV_MATH_SIZE_MISMATCH</code>
+   * or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_trans_q31(
+  const riscv_matrix_instance_q31 * pSrc,
+        riscv_matrix_instance_q31 * pDst);
+
+  /**
+   * @brief Q31 complex matrix transpose.
+   * @param[in]  pSrc  points to the input matrix
+   * @param[out] pDst  points to the output matrix
+   * @return    The function returns either  <code>RISCV_MATH_SIZE_MISMATCH</code>
+   * or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_cmplx_trans_q31(
+  const riscv_matrix_instance_q31 * pSrc,
+  riscv_matrix_instance_q31 * pDst);
+
+  /**
+   * @brief Floating-point matrix multiplication
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_mult_f32(
+  const riscv_matrix_instance_f32 * pSrcA,
+  const riscv_matrix_instance_f32 * pSrcB,
+        riscv_matrix_instance_f32 * pDst);
+
+  /**
+   * @brief Floating-point matrix multiplication
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_mult_f64(
+  const riscv_matrix_instance_f64 * pSrcA,
+  const riscv_matrix_instance_f64 * pSrcB,
+        riscv_matrix_instance_f64 * pDst);
+
+  /**
+   * @brief Floating-point matrix and vector multiplication
+   * @param[in]  pSrcMat  points to the input matrix structure
+   * @param[in]  pVec     points to vector
+   * @param[out] pDst     points to output vector
+   */
+void riscv_mat_vec_mult_f32(
+  const riscv_matrix_instance_f32 *pSrcMat, 
+  const float32_t *pVec, 
+  float32_t *pDst);
+
+  /**
+   * @brief Q7 matrix multiplication
+   * @param[in]  pSrcA   points to the first input matrix structure
+   * @param[in]  pSrcB   points to the second input matrix structure
+   * @param[out] pDst    points to output matrix structure
+   * @param[in]  pState  points to the array for storing intermediate results
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_mult_q7(
+  const riscv_matrix_instance_q7 * pSrcA,
+  const riscv_matrix_instance_q7 * pSrcB,
+        riscv_matrix_instance_q7 * pDst,
+        q7_t * pState);
+
+  /**
+   * @brief Q7 matrix and vector multiplication
+   * @param[in]  pSrcMat  points to the input matrix structure
+   * @param[in]  pVec     points to vector
+   * @param[out] pDst     points to output vector
+   */
+void riscv_mat_vec_mult_q7(
+  const riscv_matrix_instance_q7 *pSrcMat, 
+  const q7_t *pVec, 
+  q7_t *pDst);
+
+  /**
+   * @brief Q15 matrix multiplication
+   * @param[in]  pSrcA   points to the first input matrix structure
+   * @param[in]  pSrcB   points to the second input matrix structure
+   * @param[out] pDst    points to output matrix structure
+   * @param[in]  pState  points to the array for storing intermediate results
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_mult_q15(
+  const riscv_matrix_instance_q15 * pSrcA,
+  const riscv_matrix_instance_q15 * pSrcB,
+        riscv_matrix_instance_q15 * pDst,
+        q15_t * pState);
+
+  /**
+   * @brief Q15 matrix and vector multiplication
+   * @param[in]  pSrcMat  points to the input matrix structure
+   * @param[in]  pVec     points to vector
+   * @param[out] pDst     points to output vector
+   */
+void riscv_mat_vec_mult_q15(
+  const riscv_matrix_instance_q15 *pSrcMat, 
+  const q15_t *pVec, 
+  q15_t *pDst);
+
+  /**
+   * @brief Q15 matrix multiplication (fast variant) for RISC-V Core with DSP enabled
+   * @param[in]  pSrcA   points to the first input matrix structure
+   * @param[in]  pSrcB   points to the second input matrix structure
+   * @param[out] pDst    points to output matrix structure
+   * @param[in]  pState  points to the array for storing intermediate results
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_mult_fast_q15(
+  const riscv_matrix_instance_q15 * pSrcA,
+  const riscv_matrix_instance_q15 * pSrcB,
+        riscv_matrix_instance_q15 * pDst,
+        q15_t * pState);
+
+  /**
+   * @brief Q31 matrix multiplication
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_mult_q31(
+  const riscv_matrix_instance_q31 * pSrcA,
+  const riscv_matrix_instance_q31 * pSrcB,
+        riscv_matrix_instance_q31 * pDst);
+
+  /**
+   * @brief Q31 matrix and vector multiplication
+   * @param[in]  pSrcMat  points to the input matrix structure
+   * @param[in]  pVec     points to vector
+   * @param[out] pDst     points to output vector
+   */
+void riscv_mat_vec_mult_q31(
+  const riscv_matrix_instance_q31 *pSrcMat, 
+  const q31_t *pVec, 
+  q31_t *pDst);
+
+  /**
+   * @brief Q31 matrix multiplication (fast variant) for RISC-V Core with DSP enabled
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_mult_fast_q31(
+  const riscv_matrix_instance_q31 * pSrcA,
+  const riscv_matrix_instance_q31 * pSrcB,
+        riscv_matrix_instance_q31 * pDst);
+
+  /**
+   * @brief Floating-point matrix subtraction
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_sub_f32(
+  const riscv_matrix_instance_f32 * pSrcA,
+  const riscv_matrix_instance_f32 * pSrcB,
+        riscv_matrix_instance_f32 * pDst);
+
+  /**
+   * @brief Floating-point matrix subtraction
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_sub_f64(
+  const riscv_matrix_instance_f64 * pSrcA,
+  const riscv_matrix_instance_f64 * pSrcB,
+        riscv_matrix_instance_f64 * pDst);
+
+  /**
+   * @brief Q15 matrix subtraction
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_sub_q15(
+  const riscv_matrix_instance_q15 * pSrcA,
+  const riscv_matrix_instance_q15 * pSrcB,
+        riscv_matrix_instance_q15 * pDst);
+
+  /**
+   * @brief Q31 matrix subtraction
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_sub_q31(
+  const riscv_matrix_instance_q31 * pSrcA,
+  const riscv_matrix_instance_q31 * pSrcB,
+        riscv_matrix_instance_q31 * pDst);
+
+  /**
+   * @brief Floating-point matrix scaling.
+   * @param[in]  pSrc   points to the input matrix
+   * @param[in]  scale  scale factor
+   * @param[out] pDst   points to the output matrix
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_scale_f32(
+  const riscv_matrix_instance_f32 * pSrc,
+        float32_t scale,
+        riscv_matrix_instance_f32 * pDst);
+
+  /**
+   * @brief Q15 matrix scaling.
+   * @param[in]  pSrc        points to input matrix
+   * @param[in]  scaleFract  fractional portion of the scale factor
+   * @param[in]  shift       number of bits to shift the result by
+   * @param[out] pDst        points to output matrix
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_scale_q15(
+  const riscv_matrix_instance_q15 * pSrc,
+        q15_t scaleFract,
+        int32_t shift,
+        riscv_matrix_instance_q15 * pDst);
+
+  /**
+   * @brief Q31 matrix scaling.
+   * @param[in]  pSrc        points to input matrix
+   * @param[in]  scaleFract  fractional portion of the scale factor
+   * @param[in]  shift       number of bits to shift the result by
+   * @param[out] pDst        points to output matrix structure
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_scale_q31(
+  const riscv_matrix_instance_q31 * pSrc,
+        q31_t scaleFract,
+        int32_t shift,
+        riscv_matrix_instance_q31 * pDst);
+
+  /**
+   * @brief  Q31 matrix initialization.
+   * @param[in,out] S         points to an instance of the floating-point matrix structure.
+   * @param[in]     nRows     number of rows in the matrix.
+   * @param[in]     nColumns  number of columns in the matrix.
+   * @param[in]     pData     points to the matrix data array.
+   */
+void riscv_mat_init_q31(
+        riscv_matrix_instance_q31 * S,
+        uint16_t nRows,
+        uint16_t nColumns,
+        q31_t * pData);
+
+  /**
+   * @brief  Q15 matrix initialization.
+   * @param[in,out] S         points to an instance of the floating-point matrix structure.
+   * @param[in]     nRows     number of rows in the matrix.
+   * @param[in]     nColumns  number of columns in the matrix.
+   * @param[in]     pData     points to the matrix data array.
+   */
+void riscv_mat_init_q15(
+        riscv_matrix_instance_q15 * S,
+        uint16_t nRows,
+        uint16_t nColumns,
+        q15_t * pData);
+
+  /**
+   * @brief  Floating-point matrix initialization.
+   * @param[in,out] S         points to an instance of the floating-point matrix structure.
+   * @param[in]     nRows     number of rows in the matrix.
+   * @param[in]     nColumns  number of columns in the matrix.
+   * @param[in]     pData     points to the matrix data array.
+   */
+void riscv_mat_init_f32(
+        riscv_matrix_instance_f32 * S,
+        uint16_t nRows,
+        uint16_t nColumns,
+        float32_t * pData);
+
+
+
+  /**
+   * @brief Floating-point matrix inverse.
+   * @param[in]  src   points to the instance of the input floating-point matrix structure.
+   * @param[out] dst   points to the instance of the output floating-point matrix structure.
+   * @return The function returns RISCV_MATH_SIZE_MISMATCH, if the dimensions do not match.
+   * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status RISCV_MATH_SINGULAR.
+   */
+  riscv_status riscv_mat_inverse_f32(
+  const riscv_matrix_instance_f32 * src,
+  riscv_matrix_instance_f32 * dst);
+
+
+  /**
+   * @brief Floating-point matrix inverse.
+   * @param[in]  src   points to the instance of the input floating-point matrix structure.
+   * @param[out] dst   points to the instance of the output floating-point matrix structure.
+   * @return The function returns RISCV_MATH_SIZE_MISMATCH, if the dimensions do not match.
+   * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status RISCV_MATH_SINGULAR.
+   */
+  riscv_status riscv_mat_inverse_f64(
+  const riscv_matrix_instance_f64 * src,
+  riscv_matrix_instance_f64 * dst);
+
+ /**
+   * @brief Floating-point Cholesky decomposition of Symmetric Positive Definite Matrix.
+   * @param[in]  src   points to the instance of the input floating-point matrix structure.
+   * @param[out] dst   points to the instance of the output floating-point matrix structure.
+   * @return The function returns RISCV_MATH_SIZE_MISMATCH, if the dimensions do not match.
+   * If the input matrix does not have a decomposition, then the algorithm terminates and returns error status RISCV_MATH_DECOMPOSITION_FAILURE.
+   * If the matrix is ill conditioned or only semi-definite, then it is better using the LDL^t decomposition.
+   * The decomposition is returning a lower triangular matrix.
+   */
+  riscv_status riscv_mat_cholesky_f64(
+  const riscv_matrix_instance_f64 * src,
+  riscv_matrix_instance_f64 * dst);
+
+ /**
+   * @brief Floating-point Cholesky decomposition of Symmetric Positive Definite Matrix.
+   * @param[in]  src   points to the instance of the input floating-point matrix structure.
+   * @param[out] dst   points to the instance of the output floating-point matrix structure.
+   * @return The function returns RISCV_MATH_SIZE_MISMATCH, if the dimensions do not match.
+   * If the input matrix does not have a decomposition, then the algorithm terminates and returns error status RISCV_MATH_DECOMPOSITION_FAILURE.
+   * If the matrix is ill conditioned or only semi-definite, then it is better using the LDL^t decomposition.
+   * The decomposition is returning a lower triangular matrix.
+   */
+  riscv_status riscv_mat_cholesky_f32(
+  const riscv_matrix_instance_f32 * src,
+  riscv_matrix_instance_f32 * dst);
+
+  /**
+   * @brief Solve UT . X = A where UT is an upper triangular matrix
+   * @param[in]  ut  The upper triangular matrix
+   * @param[in]  a  The matrix a
+   * @param[out] dst The solution X of UT . X = A
+   * @return The function returns RISCV_MATH_SINGULAR, if the system can't be solved.
+  */
+  riscv_status riscv_mat_solve_upper_triangular_f32(
+  const riscv_matrix_instance_f32 * ut,
+  const riscv_matrix_instance_f32 * a,
+  riscv_matrix_instance_f32 * dst);
+
+ /**
+   * @brief Solve LT . X = A where LT is a lower triangular matrix
+   * @param[in]  lt  The lower triangular matrix
+   * @param[in]  a  The matrix a
+   * @param[out] dst The solution X of LT . X = A
+   * @return The function returns RISCV_MATH_SINGULAR, if the system can't be solved.
+   */
+  riscv_status riscv_mat_solve_lower_triangular_f32(
+  const riscv_matrix_instance_f32 * lt,
+  const riscv_matrix_instance_f32 * a,
+  riscv_matrix_instance_f32 * dst);
+
+
+  /**
+   * @brief Solve UT . X = A where UT is an upper triangular matrix
+   * @param[in]  ut  The upper triangular matrix
+   * @param[in]  a  The matrix a
+   * @param[out] dst The solution X of UT . X = A
+   * @return The function returns RISCV_MATH_SINGULAR, if the system can't be solved.
+  */
+  riscv_status riscv_mat_solve_upper_triangular_f64(
+  const riscv_matrix_instance_f64 * ut,
+  const riscv_matrix_instance_f64 * a,
+  riscv_matrix_instance_f64 * dst);
+
+ /**
+   * @brief Solve LT . X = A where LT is a lower triangular matrix
+   * @param[in]  lt  The lower triangular matrix
+   * @param[in]  a  The matrix a
+   * @param[out] dst The solution X of LT . X = A
+   * @return The function returns RISCV_MATH_SINGULAR, if the system can't be solved.
+   */
+  riscv_status riscv_mat_solve_lower_triangular_f64(
+  const riscv_matrix_instance_f64 * lt,
+  const riscv_matrix_instance_f64 * a,
+  riscv_matrix_instance_f64 * dst);
+
+
+  /**
+   * @brief Floating-point LDL decomposition of Symmetric Positive Semi-Definite Matrix.
+   * @param[in]  src   points to the instance of the input floating-point matrix structure.
+   * @param[out] l   points to the instance of the output floating-point triangular matrix structure.
+   * @param[out] d   points to the instance of the output floating-point diagonal matrix structure.
+   * @param[out] p   points to the instance of the output floating-point permutation vector.
+   * @return The function returns RISCV_MATH_SIZE_MISMATCH, if the dimensions do not match.
+   * If the input matrix does not have a decomposition, then the algorithm terminates and returns error status RISCV_MATH_DECOMPOSITION_FAILURE.
+   * The decomposition is returning a lower triangular matrix.
+   */
+  riscv_status riscv_mat_ldlt_f32(
+  const riscv_matrix_instance_f32 * src,
+  riscv_matrix_instance_f32 * l,
+  riscv_matrix_instance_f32 * d,
+  uint16_t * pp);
+
+ /**
+   * @brief Floating-point LDL decomposition of Symmetric Positive Semi-Definite Matrix.
+   * @param[in]  src   points to the instance of the input floating-point matrix structure.
+   * @param[out] l   points to the instance of the output floating-point triangular matrix structure.
+   * @param[out] d   points to the instance of the output floating-point diagonal matrix structure.
+   * @param[out] p   points to the instance of the output floating-point permutation vector.
+   * @return The function returns RISCV_MATH_SIZE_MISMATCH, if the dimensions do not match.
+   * If the input matrix does not have a decomposition, then the algorithm terminates and returns error status RISCV_MATH_DECOMPOSITION_FAILURE.
+   * The decomposition is returning a lower triangular matrix.
+   */
+  riscv_status riscv_mat_ldlt_f64(
+  const riscv_matrix_instance_f64 * src,
+  riscv_matrix_instance_f64 * l,
+  riscv_matrix_instance_f64 * d,
+  uint16_t * pp);
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _MATRIX_FUNCTIONS_H_ */
--- a/components/nmsis/dsp/inc/dsp/matrix_functions_f16.h
+++ b/components/nmsis/dsp/inc/dsp/matrix_functions_f16.h
@ -0,0 +1,222 @@
+/******************************************************************************
+ * @file     matrix_functions_f16.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _MATRIX_FUNCTIONS_F16_H_
+#define _MATRIX_FUNCTIONS_F16_H_
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+
+#include "riscv_math_types_f16.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+    
+#if defined(RISCV_FLOAT16_SUPPORTED)
+
+ /**
+   * @brief Instance structure for the floating-point matrix structure.
+   */
+  typedef struct
+  {
+    uint16_t numRows;     /**< number of rows of the matrix.     */
+    uint16_t numCols;     /**< number of columns of the matrix.  */
+    float16_t *pData;     /**< points to the data of the matrix. */
+  } riscv_matrix_instance_f16;
+
+ /**
+   * @brief Floating-point matrix addition.
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_add_f16(
+  const riscv_matrix_instance_f16 * pSrcA,
+  const riscv_matrix_instance_f16 * pSrcB,
+        riscv_matrix_instance_f16 * pDst);
+
+  /**
+   * @brief Floating-point, complex, matrix multiplication.
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_cmplx_mult_f16(
+  const riscv_matrix_instance_f16 * pSrcA,
+  const riscv_matrix_instance_f16 * pSrcB,
+        riscv_matrix_instance_f16 * pDst);
+
+  /**
+   * @brief Floating-point matrix transpose.
+   * @param[in]  pSrc  points to the input matrix
+   * @param[out] pDst  points to the output matrix
+   * @return    The function returns either  <code>RISCV_MATH_SIZE_MISMATCH</code>
+   * or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_trans_f16(
+  const riscv_matrix_instance_f16 * pSrc,
+        riscv_matrix_instance_f16 * pDst);
+
+  /**
+   * @brief Floating-point complex matrix transpose.
+   * @param[in]  pSrc  points to the input matrix
+   * @param[out] pDst  points to the output matrix
+   * @return    The function returns either  <code>RISCV_MATH_SIZE_MISMATCH</code>
+   * or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_cmplx_trans_f16(
+  const riscv_matrix_instance_f16 * pSrc,
+  riscv_matrix_instance_f16 * pDst);
+
+  /**
+   * @brief Floating-point matrix multiplication
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_mult_f16(
+  const riscv_matrix_instance_f16 * pSrcA,
+  const riscv_matrix_instance_f16 * pSrcB,
+        riscv_matrix_instance_f16 * pDst);
+  /**
+   * @brief Floating-point matrix and vector multiplication
+   * @param[in]  pSrcMat  points to the input matrix structure
+   * @param[in]  pVec     points to vector
+   * @param[out] pDst     points to output vector
+   */
+void riscv_mat_vec_mult_f16(
+  const riscv_matrix_instance_f16 *pSrcMat, 
+  const float16_t *pVec, 
+  float16_t *pDst);
+
+  /**
+   * @brief Floating-point matrix subtraction
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_sub_f16(
+  const riscv_matrix_instance_f16 * pSrcA,
+  const riscv_matrix_instance_f16 * pSrcB,
+        riscv_matrix_instance_f16 * pDst);
+
+  /**
+   * @brief Floating-point matrix scaling.
+   * @param[in]  pSrc   points to the input matrix
+   * @param[in]  scale  scale factor
+   * @param[out] pDst   points to the output matrix
+   * @return     The function returns either
+   * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+riscv_status riscv_mat_scale_f16(
+  const riscv_matrix_instance_f16 * pSrc,
+        float16_t scale,
+        riscv_matrix_instance_f16 * pDst);
+
+  /**
+   * @brief  Floating-point matrix initialization.
+   * @param[in,out] S         points to an instance of the floating-point matrix structure.
+   * @param[in]     nRows     number of rows in the matrix.
+   * @param[in]     nColumns  number of columns in the matrix.
+   * @param[in]     pData     points to the matrix data array.
+   */
+void riscv_mat_init_f16(
+        riscv_matrix_instance_f16 * S,
+        uint16_t nRows,
+        uint16_t nColumns,
+        float16_t * pData);
+
+
+  /**
+   * @brief Floating-point matrix inverse.
+   * @param[in]  src   points to the instance of the input floating-point matrix structure.
+   * @param[out] dst   points to the instance of the output floating-point matrix structure.
+   * @return The function returns RISCV_MATH_SIZE_MISMATCH, if the dimensions do not match.
+   * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status RISCV_MATH_SINGULAR.
+   */
+  riscv_status riscv_mat_inverse_f16(
+  const riscv_matrix_instance_f16 * src,
+  riscv_matrix_instance_f16 * dst);
+
+
+ /**
+   * @brief Floating-point Cholesky decomposition of Symmetric Positive Definite Matrix.
+   * @param[in]  src   points to the instance of the input floating-point matrix structure.
+   * @param[out] dst   points to the instance of the output floating-point matrix structure.
+   * @return The function returns RISCV_MATH_SIZE_MISMATCH, if the dimensions do not match.
+   * If the input matrix does not have a decomposition, then the algorithm terminates and returns error status RISCV_MATH_DECOMPOSITION_FAILURE.
+   * If the matrix is ill conditioned or only semi-definite, then it is better using the LDL^t decomposition.
+   * The decomposition is returning a lower triangular matrix.
+   */
+  riscv_status riscv_mat_cholesky_f16(
+  const riscv_matrix_instance_f16 * src,
+  riscv_matrix_instance_f16 * dst);
+
+ /**
+   * @brief Solve UT . X = A where UT is an upper triangular matrix
+   * @param[in]  ut  The upper triangular matrix
+   * @param[in]  a  The matrix a
+   * @param[out] dst The solution X of UT . X = A
+   * @return The function returns RISCV_MATH_SINGULAR, if the system can't be solved.
+  */
+  riscv_status riscv_mat_solve_upper_triangular_f16(
+  const riscv_matrix_instance_f16 * ut,
+  const riscv_matrix_instance_f16 * a,
+  riscv_matrix_instance_f16 * dst);
+
+ /**
+   * @brief Solve LT . X = A where LT is a lower triangular matrix
+   * @param[in]  lt  The lower triangular matrix
+   * @param[in]  a  The matrix a
+   * @param[out] dst The solution X of LT . X = A
+   * @return The function returns RISCV_MATH_SINGULAR, if the system can't be solved.
+   */
+  riscv_status riscv_mat_solve_lower_triangular_f16(
+  const riscv_matrix_instance_f16 * lt,
+  const riscv_matrix_instance_f16 * a,
+  riscv_matrix_instance_f16 * dst);
+
+
+
+#endif /*defined(RISCV_FLOAT16_SUPPORTED)*/
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _MATRIX_FUNCTIONS_F16_H_ */
--- a/components/nmsis/dsp/inc/dsp/none.h
+++ b/components/nmsis/dsp/inc/dsp/none.h
@ -0,0 +1,570 @@
+/******************************************************************************
+ * @file     none.h
+ * @brief    Intrinsincs when no DSP extension available
+ * @version  V1.9.0
+ * @date     20. July 2020
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+
+Definitions in this file are allowing to reuse some versions of the
+NMSIS-DSP to build on a core (M0 for instance) or a host where
+DSP extension are not available.
+
+Ideally a pure C version should have been used instead.
+But those are not always available or use a restricted set
+of intrinsics.
+
+*/
+ 
+#ifndef _NONE_H_
+#define _NONE_H_
+
+#include "riscv_math_types.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+ 
+
+/*
+
+Normally those kind of definitions are in a compiler file
+in Core or Core_A.
+
+But for MSVC compiler it is a bit special. The goal is very specific
+to NMSIS-DSP and only to allow the use of this library from other
+systems like Python or Matlab.
+
+MSVC is not going to be used to cross-compile to ARM. So, having a MSVC
+compiler file in Core or Core_A would not make sense.
+
+*/
+#if defined ( _MSC_VER ) || defined(__GNUC_PYTHON__)
+    __STATIC_FORCEINLINE uint8_t __CLZ(uint32_t data)
+    {
+      if (data == 0U) { return 32U; }
+
+      uint32_t count = 0U;
+      uint32_t mask = 0x80000000U;
+
+      while ((data & mask) == 0U)
+      {
+        count += 1U;
+        mask = mask >> 1U;
+      }
+      return count;
+    }
+
+  __STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat)
+  {
+    if ((sat >= 1U) && (sat <= 32U))
+    {
+      const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U);
+      const int32_t min = -1 - max ;
+      if (val > max)
+      {
+        return max;
+      }
+      else if (val < min)
+      {
+        return min;
+      }
+    }
+    return val;
+  }
+
+  __STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat)
+  {
+    if (sat <= 31U)
+    {
+      const uint32_t max = ((1U << sat) - 1U);
+      if (val > (int32_t)max)
+      {
+        return max;
+      }
+      else if (val < 0)
+      {
+        return 0U;
+      }
+    }
+    return (uint32_t)val;
+  }
+
+ /**
+  \brief   Rotate Right in unsigned value (32 bit)
+  \details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits.
+  \param [in]    op1  Value to rotate
+  \param [in]    op2  Number of Bits to rotate
+  \return               Rotated value
+ */
+__STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2)
+{
+  op2 %= 32U;
+  if (op2 == 0U)
+  {
+    return op1;
+  }
+  return (op1 >> op2) | (op1 << (32U - op2));
+}
+
+
+#endif
+
+/**
+   * @brief Clips Q63 to Q31 values.
+   */
+  __STATIC_FORCEINLINE q31_t clip_q63_to_q31(
+  q63_t x)
+  {
+    return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
+      ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x;
+  }
+
+  /**
+   * @brief Clips Q63 to Q15 values.
+   */
+  __STATIC_FORCEINLINE q15_t clip_q63_to_q15(
+  q63_t x)
+  {
+    return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
+      ((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15);
+  }
+
+  /**
+   * @brief Clips Q31 to Q7 values.
+   */
+  __STATIC_FORCEINLINE q7_t clip_q31_to_q7(
+  q31_t x)
+  {
+    return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ?
+      ((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x;
+  }
+
+  /**
+   * @brief Clips Q31 to Q15 values.
+   */
+  __STATIC_FORCEINLINE q15_t clip_q31_to_q15(
+  q31_t x)
+  {
+    return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ?
+      ((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x;
+  }
+
+  /**
+   * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format.
+   */
+  __STATIC_FORCEINLINE q63_t mult32x64(
+  q63_t x,
+  q31_t y)
+  {
+    return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) +
+            (((q63_t) (x >> 32)                * y)      )  );
+  }
+
+/* SMMLAR */
+#define multAcc_32x32_keep32_R(a, x, y) \
+    a = (q31_t) (((((q63_t) a) << 32) + ((q63_t) x * y) + 0x80000000LL ) >> 32)
+
+/* SMMLSR */
+#define multSub_32x32_keep32_R(a, x, y) \
+    a = (q31_t) (((((q63_t) a) << 32) - ((q63_t) x * y) + 0x80000000LL ) >> 32)
+
+/* SMMULR */
+#define mult_32x32_keep32_R(a, x, y) \
+    a = (q31_t) (((q63_t) x * y + 0x80000000LL ) >> 32)
+
+/* SMMLA */
+#define multAcc_32x32_keep32(a, x, y) \
+    a += (q31_t) (((q63_t) x * y) >> 32)
+
+/* SMMLS */
+#define multSub_32x32_keep32(a, x, y) \
+    a -= (q31_t) (((q63_t) x * y) >> 32)
+
+/* SMMUL */
+#define mult_32x32_keep32(a, x, y) \
+    a = (q31_t) (((q63_t) x * y ) >> 32)
+
+#ifndef RISCV_MATH_DSP
+  /**
+   * @brief definition to pack two 16 bit values.
+   */
+  #define __PKHBT(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) <<    0) & (int32_t)0x0000FFFF) | \
+                                      (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000)  )
+  #define __PKHTB(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) <<    0) & (int32_t)0xFFFF0000) | \
+                                      (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF)  )
+#endif
+
+   /**
+   * @brief definition to pack four 8 bit values.
+   */
+  #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) <<  0) & (int32_t)0x000000FF) | \
+                                  (((int32_t)(v1) <<  8) & (int32_t)0x0000FF00) | \
+                                  (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \
+                                  (((int32_t)(v3) << 24) & (int32_t)0xFF000000)  )
+
+
+ 
+
+/*
+ * @brief C custom defined intrinsic functions
+ */
+#if !defined (RISCV_MATH_DSP)
+
+
+  /*
+   * @brief C custom defined QADD8
+   */
+  __STATIC_FORCEINLINE uint32_t __QADD8(
+  uint32_t x,
+  uint32_t y)
+  {
+    q31_t r, s, t, u;
+
+    r = __SSAT(((((q31_t)x << 24) >> 24) + (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
+    s = __SSAT(((((q31_t)x << 16) >> 24) + (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
+    t = __SSAT(((((q31_t)x <<  8) >> 24) + (((q31_t)y <<  8) >> 24)), 8) & (int32_t)0x000000FF;
+    u = __SSAT(((((q31_t)x      ) >> 24) + (((q31_t)y      ) >> 24)), 8) & (int32_t)0x000000FF;
+
+    return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r      )));
+  }
+
+
+  /*
+   * @brief C custom defined QSUB8
+   */
+  __STATIC_FORCEINLINE uint32_t __QSUB8(
+  uint32_t x,
+  uint32_t y)
+  {
+    q31_t r, s, t, u;
+
+    r = __SSAT(((((q31_t)x << 24) >> 24) - (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
+    s = __SSAT(((((q31_t)x << 16) >> 24) - (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
+    t = __SSAT(((((q31_t)x <<  8) >> 24) - (((q31_t)y <<  8) >> 24)), 8) & (int32_t)0x000000FF;
+    u = __SSAT(((((q31_t)x      ) >> 24) - (((q31_t)y      ) >> 24)), 8) & (int32_t)0x000000FF;
+
+    return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r      )));
+  }
+
+
+  /*
+   * @brief C custom defined QADD16
+   */
+  __STATIC_FORCEINLINE uint32_t __QADD16(
+  uint32_t x,
+  uint32_t y)
+  {
+/*  q31_t r,     s;  without initialisation 'riscv_offset_q15 test' fails  but 'intrinsic' tests pass! for armCC */
+    q31_t r = 0, s = 0;
+
+    r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
+    s = __SSAT(((((q31_t)x      ) >> 16) + (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
+  }
+
+
+  /*
+   * @brief C custom defined SHADD16
+   */
+  __STATIC_FORCEINLINE uint32_t __SHADD16(
+  uint32_t x,
+  uint32_t y)
+  {
+    q31_t r, s;
+
+    r = (((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+    s = (((((q31_t)x      ) >> 16) + (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
+  }
+
+
+  /*
+   * @brief C custom defined QSUB16
+   */
+  __STATIC_FORCEINLINE uint32_t __QSUB16(
+  uint32_t x,
+  uint32_t y)
+  {
+    q31_t r, s;
+
+    r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
+    s = __SSAT(((((q31_t)x      ) >> 16) - (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
+  }
+
+
+  /*
+   * @brief C custom defined SHSUB16
+   */
+  __STATIC_FORCEINLINE uint32_t __SHSUB16(
+  uint32_t x,
+  uint32_t y)
+  {
+    q31_t r, s;
+
+    r = (((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+    s = (((((q31_t)x      ) >> 16) - (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
+  }
+
+
+  /*
+   * @brief C custom defined QASX
+   */
+  __STATIC_FORCEINLINE uint32_t __QASX(
+  uint32_t x,
+  uint32_t y)
+  {
+    q31_t r, s;
+
+    r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
+    s = __SSAT(((((q31_t)x      ) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
+  }
+
+
+  /*
+   * @brief C custom defined SHASX
+   */
+  __STATIC_FORCEINLINE uint32_t __SHASX(
+  uint32_t x,
+  uint32_t y)
+  {
+    q31_t r, s;
+
+    r = (((((q31_t)x << 16) >> 16) - (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+    s = (((((q31_t)x      ) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
+  }
+
+
+  /*
+   * @brief C custom defined QSAX
+   */
+  __STATIC_FORCEINLINE uint32_t __QSAX(
+  uint32_t x,
+  uint32_t y)
+  {
+    q31_t r, s;
+
+    r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
+    s = __SSAT(((((q31_t)x      ) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
+  }
+
+
+  /*
+   * @brief C custom defined SHSAX
+   */
+  __STATIC_FORCEINLINE uint32_t __SHSAX(
+  uint32_t x,
+  uint32_t y)
+  {
+    q31_t r, s;
+
+    r = (((((q31_t)x << 16) >> 16) + (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+    s = (((((q31_t)x      ) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
+  }
+
+
+  /*
+   * @brief C custom defined SMUSDX
+   */
+  __STATIC_FORCEINLINE uint32_t __SMUSDX(
+  uint32_t x,
+  uint32_t y)
+  {
+    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) -
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16))   ));
+  }
+
+  /*
+   * @brief C custom defined SMUADX
+   */
+  __STATIC_FORCEINLINE uint32_t __SMUADX(
+  uint32_t x,
+  uint32_t y)
+  {
+    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) +
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16))   ));
+  }
+
+
+  /*
+   * @brief C custom defined QADD
+   */
+  __STATIC_FORCEINLINE int32_t __QADD(
+  int32_t x,
+  int32_t y)
+  {
+    return ((int32_t)(clip_q63_to_q31((q63_t)x + (q31_t)y)));
+  }
+
+
+  /*
+   * @brief C custom defined QSUB
+   */
+  __STATIC_FORCEINLINE int32_t __QSUB(
+  int32_t x,
+  int32_t y)
+  {
+    return ((int32_t)(clip_q63_to_q31((q63_t)x - (q31_t)y)));
+  }
+
+
+  /*
+   * @brief C custom defined SMLAD
+   */
+  __STATIC_FORCEINLINE uint32_t __SMLAD(
+  uint32_t x,
+  uint32_t y,
+  uint32_t sum)
+  {
+    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16)) +
+                       ( ((q31_t)sum    )                                  )   ));
+  }
+
+
+  /*
+   * @brief C custom defined SMLADX
+   */
+  __STATIC_FORCEINLINE uint32_t __SMLADX(
+  uint32_t x,
+  uint32_t y,
+  uint32_t sum)
+  {
+    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) +
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16)) +
+                       ( ((q31_t)sum    )                                  )   ));
+  }
+
+
+  /*
+   * @brief C custom defined SMLSDX
+   */
+  __STATIC_FORCEINLINE uint32_t __SMLSDX(
+  uint32_t x,
+  uint32_t y,
+  uint32_t sum)
+  {
+    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) -
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16)) +
+                       ( ((q31_t)sum    )                                  )   ));
+  }
+
+
+  /*
+   * @brief C custom defined SMLALD
+   */
+  __STATIC_FORCEINLINE uint64_t __SMLALD(
+  uint32_t x,
+  uint32_t y,
+  uint64_t sum)
+  {
+/*  return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) + ((q15_t) x * (q15_t) y)); */
+    return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16)) +
+                       ( ((q63_t)sum    )                                  )   ));
+  }
+
+
+  /*
+   * @brief C custom defined SMLALDX
+   */
+  __STATIC_FORCEINLINE uint64_t __SMLALDX(
+  uint32_t x,
+  uint32_t y,
+  uint64_t sum)
+  {
+/*  return (sum + ((q15_t) (x >> 16) * (q15_t) y)) + ((q15_t) x * (q15_t) (y >> 16)); */
+    return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) +
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16)) +
+                       ( ((q63_t)sum    )                                  )   ));
+  }
+
+
+  /*
+   * @brief C custom defined SMUAD
+   */
+  __STATIC_FORCEINLINE uint32_t __SMUAD(
+  uint32_t x,
+  uint32_t y)
+  {
+    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16))   ));
+  }
+
+
+  /*
+   * @brief C custom defined SMUSD
+   */
+  __STATIC_FORCEINLINE uint32_t __SMUSD(
+  uint32_t x,
+  uint32_t y)
+  {
+    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) -
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16))   ));
+  }
+
+
+  /*
+   * @brief C custom defined SXTB16
+   */
+  __STATIC_FORCEINLINE uint32_t __SXTB16(
+  uint32_t x)
+  {
+    return ((uint32_t)(((((q31_t)x << 24) >> 24) & (q31_t)0x0000FFFF) |
+                       ((((q31_t)x <<  8) >>  8) & (q31_t)0xFFFF0000)  ));
+  }
+
+  /*
+   * @brief C custom defined SMMLA
+   */
+  __STATIC_FORCEINLINE int32_t __SMMLA(
+  int32_t x,
+  int32_t y,
+  int32_t sum)
+  {
+    return (sum + (int32_t) (((int64_t) x * y) >> 32));
+  }
+
+#endif /* !defined (RISCV_MATH_DSP) */
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _TRANSFORM_FUNCTIONS_H_ */
--- a/components/nmsis/dsp/inc/dsp/quaternion_math_functions.h
+++ b/components/nmsis/dsp/inc/dsp/quaternion_math_functions.h
@ -0,0 +1,160 @@
+/******************************************************************************
+ * @file     quaternion_math_functions.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ *
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _QUATERNION_MATH_FUNCTIONS_H_
+#define _QUATERNION_MATH_FUNCTIONS_H_
+
+#include "riscv_math_types.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * @defgroup groupQuaternionMath Quaternion Math Functions
+ * Functions to operates on quaternions and convert between a
+ * rotation and quaternion representation.
+ */
+
+
+/**
+  @brief         Floating-point quaternion Norm.
+  @param[in]     pInputQuaternions       points to the input vector of quaternions
+  @param[out]    pNorms                  points to the output vector of norms
+  @param[in]     nbQuaternions           number of quaternions in each vector
+  @return        none
+ */
+
+
+
+void riscv_quaternion_norm_f32(const float32_t *pInputQuaternions, 
+    float32_t *pNorms,
+    uint32_t nbQuaternions);
+
+
+/**
+  @brief         Floating-point quaternion inverse.
+  @param[in]     pInputQuaternions            points to the input vector of quaternions
+  @param[out]    pInverseQuaternions          points to the output vector of inverse quaternions
+  @param[in]     nbQuaternions                number of quaternions in each vector
+  @return        none
+ */
+
+void riscv_quaternion_inverse_f32(const float32_t *pInputQuaternions, 
+    float32_t *pInverseQuaternions, 
+    uint32_t nbQuaternions);
+
+/**
+  @brief         Floating-point quaternion conjugates.
+  @param[in]     pInputQuaternions            points to the input vector of quaternions
+  @param[out]    pConjugateQuaternions        points to the output vector of conjugate quaternions
+  @param[in]     nbQuaternions                number of quaternions in each vector
+  @return        none
+ */
+void riscv_quaternion_conjugate_f32(const float32_t *inputQuaternions, 
+    float32_t *pConjugateQuaternions, 
+    uint32_t nbQuaternions);
+
+/**
+  @brief         Floating-point normalization of quaternions.
+  @param[in]     pInputQuaternions            points to the input vector of quaternions
+  @param[out]    pNormalizedQuaternions       points to the output vector of normalized quaternions
+  @param[in]     nbQuaternions                number of quaternions in each vector
+  @return        none
+ */
+void riscv_quaternion_normalize_f32(const float32_t *inputQuaternions, 
+    float32_t *pNormalizedQuaternions, 
+    uint32_t nbQuaternions);
+
+
+/**
+  @brief         Floating-point product of two quaternions.
+  @param[in]     qa       First quaternion
+  @param[in]     qb       Second quaternion
+  @param[out]    r        Product of two quaternions
+  @return        none
+ */
+void riscv_quaternion_product_single_f32(const float32_t *qa, 
+    const float32_t *qb, 
+    float32_t *r);
+
+/**
+  @brief         Floating-point elementwise product two quaternions.
+  @param[in]     qa                  First array of quaternions
+  @param[in]     qb                  Second array of quaternions
+  @param[out]    r                   Elementwise product of quaternions
+  @param[in]     nbQuaternions       Number of quaternions in the array
+  @return        none
+ */
+void riscv_quaternion_product_f32(const float32_t *qa, 
+    const float32_t *qb, 
+    float32_t *r,
+    uint32_t nbQuaternions);
+
+/**
+ * @brief Conversion of quaternion to equivalent rotation matrix.
+ * @param[in]       pInputQuaternions points to an array of normalized quaternions
+ * @param[out]      pOutputRotations points to an array of 3x3 rotations (in row order)
+ * @param[in]       nbQuaternions in the array
+ * @return none.
+ *
+ * <b>Format of rotation matrix</b>
+ * \par
+ * The quaternion a + ib + jc + kd is converted into rotation matrix:
+ *   a^2 + b^2 - c^2 - d^2                 2bc - 2ad                 2bd + 2ac
+ *               2bc + 2ad     a^2 - b^2 + c^2 - d^2                 2cd - 2ab
+ *               2bd - 2ac                 2cd + 2ab     a^2 - b^2 - c^2 + d^2
+ *
+ * Rotation matrix is saved in row order : R00 R01 R02 R10 R11 R12 R20 R21 R22
+ */
+void riscv_quaternion2rotation_f32(const float32_t *pInputQuaternions, 
+    float32_t *pOutputRotations, 
+    uint32_t nbQuaternions);
+
+/**
+ * @brief Conversion of a rotation matrix to equivalent quaternion.
+ * @param[in]       pInputRotations points to an array 3x3 rotation matrix (in row order)
+ * @param[out]      pOutputQuaternions points to an array of quaternions
+ * @param[in]       nbQuaternions in the array
+ * @return none.
+*/
+void riscv_rotation2quaternion_f32(const float32_t *pInputRotations, 
+    float32_t *pOutputQuaternions,  
+    uint32_t nbQuaternions);
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _QUATERNION_MATH_FUNCTIONS_H_ */
--- a/components/nmsis/dsp/inc/dsp/statistics_functions.h
+++ b/components/nmsis/dsp/inc/dsp/statistics_functions.h
@ -0,0 +1,587 @@
+/******************************************************************************
+ * @file     statistics_functions.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _STATISTICS_FUNCTIONS_H_
+#define _STATISTICS_FUNCTIONS_H_
+
+#include "riscv_math_types.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#include "dsp/basic_math_functions.h"
+#include "dsp/fast_math_functions.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+
+/**
+ * @defgroup groupStats Statistics Functions
+ */
+
+/**
+ * @brief Computation of the LogSumExp
+ *
+ * In probabilistic computations, the dynamic of the probability values can be very
+ * wide because they come from gaussian functions.
+ * To avoid underflow and overflow issues, the values are represented by their log.
+ * In this representation, multiplying the original exp values is easy : their logs are added.
+ * But adding the original exp values is requiring some special handling and it is the
+ * goal of the LogSumExp function.
+ *
+ * If the values are x1...xn, the function is computing:
+ *
+ * ln(exp(x1) + ... + exp(xn)) and the computation is done in such a way that
+ * rounding issues are minimised.
+ *
+ * The max xm of the values is extracted and the function is computing:
+ * xm + ln(exp(x1 - xm) + ... + exp(xn - xm))
+ *
+ * @param[in]  *in         Pointer to an array of input values.
+ * @param[in]  blockSize   Number of samples in the input array.
+ * @return LogSumExp
+ *
+ */
+
+
+float32_t riscv_logsumexp_f32(const float32_t *in, uint32_t blockSize);
+
+/**
+ * @brief Dot product with log arithmetic
+ *
+ * Vectors are containing the log of the samples
+ *
+ * @param[in]       pSrcA points to the first input vector
+ * @param[in]       pSrcB points to the second input vector
+ * @param[in]       blockSize number of samples in each vector
+ * @param[in]       pTmpBuffer temporary buffer of length blockSize
+ * @return The log of the dot product .
+ *
+ */
+
+
+float32_t riscv_logsumexp_dot_prod_f32(const float32_t * pSrcA,
+  const float32_t * pSrcB,
+  uint32_t blockSize,
+  float32_t *pTmpBuffer);
+
+/**
+ * @brief Entropy
+ *
+ * @param[in]  pSrcA        Array of input values.
+ * @param[in]  blockSize    Number of samples in the input array.
+ * @return     Entropy      -Sum(p ln p)
+ *
+ */
+
+
+float32_t riscv_entropy_f32(const float32_t * pSrcA,uint32_t blockSize);
+
+
+/**
+ * @brief Entropy
+ *
+ * @param[in]  pSrcA        Array of input values.
+ * @param[in]  blockSize    Number of samples in the input array.
+ * @return     Entropy      -Sum(p ln p)
+ *
+ */
+
+
+float64_t riscv_entropy_f64(const float64_t * pSrcA, uint32_t blockSize);
+
+
+/**
+ * @brief Kullback-Leibler
+ *
+ * @param[in]  pSrcA         Pointer to an array of input values for probability distribution A.
+ * @param[in]  pSrcB         Pointer to an array of input values for probability distribution B.
+ * @param[in]  blockSize     Number of samples in the input array.
+ * @return Kullback-Leibler  Divergence D(A || B)
+ *
+ */
+float32_t riscv_kullback_leibler_f32(const float32_t * pSrcA
+  ,const float32_t * pSrcB
+  ,uint32_t blockSize);
+
+
+/**
+ * @brief Kullback-Leibler
+ *
+ * @param[in]  pSrcA         Pointer to an array of input values for probability distribution A.
+ * @param[in]  pSrcB         Pointer to an array of input values for probability distribution B.
+ * @param[in]  blockSize     Number of samples in the input array.
+ * @return Kullback-Leibler  Divergence D(A || B)
+ *
+ */
+float64_t riscv_kullback_leibler_f64(const float64_t * pSrcA, 
+                const float64_t * pSrcB, 
+                uint32_t blockSize);
+
+
+ /**
+   * @brief  Sum of the squares of the elements of a Q31 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void riscv_power_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q63_t * pResult);
+
+
+  /**
+   * @brief  Sum of the squares of the elements of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void riscv_power_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult);
+
+
+  /**
+   * @brief  Sum of the squares of the elements of a Q15 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void riscv_power_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q63_t * pResult);
+
+
+  /**
+   * @brief  Sum of the squares of the elements of a Q7 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void riscv_power_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult);
+
+
+  /**
+   * @brief  Mean value of a Q7 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void riscv_mean_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult);
+
+
+  /**
+   * @brief  Mean value of a Q15 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void riscv_mean_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult);
+
+
+  /**
+   * @brief  Mean value of a Q31 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void riscv_mean_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult);
+
+
+  /**
+   * @brief  Mean value of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void riscv_mean_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult);
+
+
+  /**
+   * @brief  Variance of the elements of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void riscv_var_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult);
+
+
+  /**
+   * @brief  Variance of the elements of a Q31 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void riscv_var_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult);
+
+
+  /**
+   * @brief  Variance of the elements of a Q15 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void riscv_var_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult);
+
+
+  /**
+   * @brief  Root Mean Square of the elements of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void riscv_rms_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult);
+
+
+  /**
+   * @brief  Root Mean Square of the elements of a Q31 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void riscv_rms_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult);
+
+
+  /**
+   * @brief  Root Mean Square of the elements of a Q15 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void riscv_rms_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult);
+
+
+  /**
+   * @brief  Standard deviation of the elements of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void riscv_std_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult);
+
+
+  /**
+   * @brief  Standard deviation of the elements of a Q31 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void riscv_std_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult);
+
+
+  /**
+   * @brief  Standard deviation of the elements of a Q15 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void riscv_std_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult);
+
+
+  
+  /**
+   * @brief  Minimum value of a Q7 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] result     is output pointer
+   * @param[in]  index      is the array index of the minimum value in the input buffer.
+   */
+  void riscv_min_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * result,
+        uint32_t * index);
+
+  /**
+   * @brief  Minimum value of absolute values of a Q7 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] result     is output pointer
+   * @param[in]  index      is the array index of the minimum value in the input buffer.
+   */
+  void riscv_absmin_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * result,
+        uint32_t * index);
+
+
+  /**
+   * @brief  Minimum value of a Q15 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   * @param[in]  pIndex     is the array index of the minimum value in the input buffer.
+   */
+  void riscv_min_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult,
+        uint32_t * pIndex);
+
+/**
+   * @brief  Minimum value of absolute values of a Q15 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   * @param[in]  pIndex     is the array index of the minimum value in the input buffer.
+   */
+  void riscv_absmin_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult,
+        uint32_t * pIndex);
+
+
+  /**
+   * @brief  Minimum value of a Q31 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   * @param[out] pIndex     is the array index of the minimum value in the input buffer.
+   */
+  void riscv_min_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult,
+        uint32_t * pIndex);
+
+  /**
+   * @brief  Minimum value of absolute values of a Q31 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   * @param[out] pIndex     is the array index of the minimum value in the input buffer.
+   */
+  void riscv_absmin_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult,
+        uint32_t * pIndex);
+
+
+  /**
+   * @brief  Minimum value of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   * @param[out] pIndex     is the array index of the minimum value in the input buffer.
+   */
+  void riscv_min_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult,
+        uint32_t * pIndex);
+
+  /**
+   * @brief  Minimum value of absolute values of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   * @param[out] pIndex     is the array index of the minimum value in the input buffer.
+   */
+  void riscv_absmin_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult,
+        uint32_t * pIndex);
+
+
+/**
+ * @brief Maximum value of a Q7 vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
+ */
+  void riscv_max_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult,
+        uint32_t * pIndex);
+
+/**
+ * @brief Maximum value of absolute values of a Q7 vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
+ */
+  void riscv_absmax_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult,
+        uint32_t * pIndex);
+
+
+/**
+ * @brief Maximum value of a Q15 vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
+ */
+  void riscv_max_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult,
+        uint32_t * pIndex);
+
+/**
+ * @brief Maximum value of absolute values of a Q15 vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
+ */
+  void riscv_absmax_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult,
+        uint32_t * pIndex);
+
+/**
+ * @brief Maximum value of a Q31 vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
+ */
+  void riscv_max_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult,
+        uint32_t * pIndex);
+
+/**
+ * @brief Maximum value of absolute values of a Q31 vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
+ */
+  void riscv_absmax_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult,
+        uint32_t * pIndex);
+
+/**
+ * @brief Maximum value of a floating-point vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
+ */
+  void riscv_max_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult,
+        uint32_t * pIndex);
+
+/**
+ * @brief Maximum value of absolute values of a floating-point vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
+ */
+  void riscv_absmax_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult,
+        uint32_t * pIndex);
+
+  /**
+    @brief         Maximum value of a floating-point vector.
+    @param[in]     pSrc       points to the input vector
+    @param[in]     blockSize  number of samples in input vector
+    @param[out]    pResult    maximum value returned here
+    @return        none
+   */
+  void riscv_max_no_idx_f32(
+      const float32_t *pSrc,
+      uint32_t   blockSize,
+      float32_t *pResult);
+
+
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _STATISTICS_FUNCTIONS_H_ */
--- a/components/nmsis/dsp/inc/dsp/statistics_functions_f16.h
+++ b/components/nmsis/dsp/inc/dsp/statistics_functions_f16.h
@ -0,0 +1,219 @@
+/******************************************************************************
+ * @file     statistics_functions_f16.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _STATISTICS_FUNCTIONS_F16_H_
+#define _STATISTICS_FUNCTIONS_F16_H_
+
+#include "riscv_math_types_f16.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#include "dsp/basic_math_functions_f16.h"
+#include "dsp/fast_math_functions_f16.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#if defined(RISCV_FLOAT16_SUPPORTED)
+
+ /**
+   * @brief  Sum of the squares of the elements of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void riscv_power_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult);
+
+ /**
+   * @brief  Mean value of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void riscv_mean_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult);
+
+  /**
+   * @brief  Variance of the elements of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void riscv_var_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult);
+
+ /**
+   * @brief  Root Mean Square of the elements of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void riscv_rms_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult);
+
+ /**
+   * @brief  Standard deviation of the elements of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void riscv_std_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult);
+
+ /**
+   * @brief  Minimum value of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   * @param[out] pIndex     is the array index of the minimum value in the input buffer.
+   */
+  void riscv_min_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex);
+
+ /**
+   * @brief  Minimum value of absolute values of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   * @param[out] pIndex     is the array index of the minimum value in the input buffer.
+   */
+  void riscv_absmin_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex);
+
+/**
+ * @brief Maximum value of a floating-point vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
+ */
+  void riscv_max_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex);
+
+/**
+ * @brief Maximum value of absolute values of a floating-point vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
+ */
+  void riscv_absmax_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex);
+
+/**
+ * @brief Entropy
+ *
+ * @param[in]  pSrcA        Array of input values.
+ * @param[in]  blockSize    Number of samples in the input array.
+ * @return     Entropy      -Sum(p ln p)
+ *
+ */
+
+
+float16_t riscv_entropy_f16(const float16_t * pSrcA,uint32_t blockSize);
+
+float16_t riscv_logsumexp_f16(const float16_t *in, uint32_t blockSize);
+
+/**
+ * @brief Dot product with log arithmetic
+ *
+ * Vectors are containing the log of the samples
+ *
+ * @param[in]       pSrcA points to the first input vector
+ * @param[in]       pSrcB points to the second input vector
+ * @param[in]       blockSize number of samples in each vector
+ * @param[in]       pTmpBuffer temporary buffer of length blockSize
+ * @return The log of the dot product .
+ *
+ */
+
+
+float16_t riscv_logsumexp_dot_prod_f16(const float16_t * pSrcA,
+  const float16_t * pSrcB,
+  uint32_t blockSize,
+  float16_t *pTmpBuffer);
+
+/**
+ * @brief Kullback-Leibler
+ *
+ * @param[in]  pSrcA         Pointer to an array of input values for probability distribution A.
+ * @param[in]  pSrcB         Pointer to an array of input values for probability distribution B.
+ * @param[in]  blockSize     Number of samples in the input array.
+ * @return Kullback-Leibler  Divergence D(A || B)
+ *
+ */
+float16_t riscv_kullback_leibler_f16(const float16_t * pSrcA
+  ,const float16_t * pSrcB
+  ,uint32_t blockSize);
+
+/**
+    @brief         Maximum value of a floating-point vector.
+    @param[in]     pSrc       points to the input vector
+    @param[in]     blockSize  number of samples in input vector
+    @param[out]    pResult    maximum value returned here
+    @return        none
+   */
+  void riscv_max_no_idx_f16(
+      const float16_t *pSrc,
+      uint32_t   blockSize,
+      float16_t *pResult);
+
+
+
+#endif /*defined(RISCV_FLOAT16_SUPPORTED)*/
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _STATISTICS_FUNCTIONS_F16_H_ */
--- a/components/nmsis/dsp/inc/dsp/support_functions.h
+++ b/components/nmsis/dsp/inc/dsp/support_functions.h
@ -0,0 +1,428 @@
+/******************************************************************************
+ * @file     support_functions.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _SUPPORT_FUNCTIONS_H_
+#define _SUPPORT_FUNCTIONS_H_
+
+#include "riscv_math_types.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * @defgroup groupSupport Support Functions
+ */
+
+
+/**
+   * @brief Converts the elements of the floating-point vector to Q31 vector.
+   * @param[in]  pSrc       points to the floating-point input vector
+   * @param[out] pDst       points to the Q31 output vector
+   * @param[in]  blockSize  length of the input vector
+   */
+  void riscv_float_to_q31(
+  const float32_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief Converts the elements of the floating-point vector to Q15 vector.
+   * @param[in]  pSrc       points to the floating-point input vector
+   * @param[out] pDst       points to the Q15 output vector
+   * @param[in]  blockSize  length of the input vector
+   */
+  void riscv_float_to_q15(
+  const float32_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief Converts the elements of the floating-point vector to Q7 vector.
+   * @param[in]  pSrc       points to the floating-point input vector
+   * @param[out] pDst       points to the Q7 output vector
+   * @param[in]  blockSize  length of the input vector
+   */
+  void riscv_float_to_q7(
+  const float32_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Converts the elements of the Q31 vector to floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[out] pDst       is output pointer
+   * @param[in]  blockSize  is the number of samples to process
+   */
+  void riscv_q31_to_float(
+  const q31_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Converts the elements of the Q31 vector to Q15 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[out] pDst       is output pointer
+   * @param[in]  blockSize  is the number of samples to process
+   */
+  void riscv_q31_to_q15(
+  const q31_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Converts the elements of the Q31 vector to Q7 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[out] pDst       is output pointer
+   * @param[in]  blockSize  is the number of samples to process
+   */
+  void riscv_q31_to_q7(
+  const q31_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Converts the elements of the Q15 vector to floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[out] pDst       is output pointer
+   * @param[in]  blockSize  is the number of samples to process
+   */
+  void riscv_q15_to_float(
+  const q15_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Converts the elements of the Q15 vector to Q31 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[out] pDst       is output pointer
+   * @param[in]  blockSize  is the number of samples to process
+   */
+  void riscv_q15_to_q31(
+  const q15_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Converts the elements of the Q15 vector to Q7 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[out] pDst       is output pointer
+   * @param[in]  blockSize  is the number of samples to process
+   */
+  void riscv_q15_to_q7(
+  const q15_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Converts the elements of the Q7 vector to floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[out] pDst       is output pointer
+   * @param[in]  blockSize  is the number of samples to process
+   */
+  void riscv_q7_to_float(
+  const q7_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Converts the elements of the Q7 vector to Q31 vector.
+   * @param[in]  pSrc       input pointer
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
+   */
+  void riscv_q7_to_q31(
+  const q7_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Converts the elements of the Q7 vector to Q15 vector.
+   * @param[in]  pSrc       input pointer
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
+   */
+  void riscv_q7_to_q15(
+  const q7_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
+
+
+
+
+  
+  /**
+   * @brief Struct for specifying sorting algorithm
+   */
+  typedef enum
+  {
+    RISCV_SORT_BITONIC   = 0,
+             /**< Bitonic sort   */
+    RISCV_SORT_BUBBLE    = 1,
+             /**< Bubble sort    */
+    RISCV_SORT_HEAP      = 2,
+             /**< Heap sort      */
+    RISCV_SORT_INSERTION = 3,
+             /**< Insertion sort */
+    RISCV_SORT_QUICK     = 4,
+             /**< Quick sort     */
+    RISCV_SORT_SELECTION = 5
+             /**< Selection sort */
+  } riscv_sort_alg;
+
+  /**
+   * @brief Struct for specifying sorting algorithm
+   */
+  typedef enum
+  {
+    RISCV_SORT_DESCENDING = 0,
+             /**< Descending order (9 to 0) */
+    RISCV_SORT_ASCENDING = 1
+             /**< Ascending order (0 to 9) */
+  } riscv_sort_dir;
+
+  /**
+   * @brief Instance structure for the sorting algorithms.
+   */
+  typedef struct            
+  {
+    riscv_sort_alg alg;        /**< Sorting algorithm selected */
+    riscv_sort_dir dir;        /**< Sorting order (direction)  */
+  } riscv_sort_instance_f32;  
+
+  /**
+   * @param[in]  S          points to an instance of the sorting structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
+   */
+  void riscv_sort_f32(
+    const riscv_sort_instance_f32 * S, 
+          float32_t * pSrc, 
+          float32_t * pDst, 
+          uint32_t blockSize);
+
+  /**
+   * @param[in,out]  S            points to an instance of the sorting structure.
+   * @param[in]      alg          Selected algorithm.
+   * @param[in]      dir          Sorting order.
+   */
+  void riscv_sort_init_f32(
+    riscv_sort_instance_f32 * S, 
+    riscv_sort_alg alg, 
+    riscv_sort_dir dir); 
+
+  /**
+   * @brief Instance structure for the sorting algorithms.
+   */
+  typedef struct            
+  {
+    riscv_sort_dir dir;        /**< Sorting order (direction)  */
+    float32_t * buffer;      /**< Working buffer */
+  } riscv_merge_sort_instance_f32;  
+
+  /**
+   * @param[in]      S          points to an instance of the sorting structure.
+   * @param[in,out]  pSrc       points to the block of input data.
+   * @param[out]     pDst       points to the block of output data
+   * @param[in]      blockSize  number of samples to process.
+   */
+  void riscv_merge_sort_f32(
+    const riscv_merge_sort_instance_f32 * S,
+          float32_t *pSrc,
+          float32_t *pDst,
+          uint32_t blockSize);
+
+  /**
+   * @param[in,out]  S            points to an instance of the sorting structure.
+   * @param[in]      dir          Sorting order.
+   * @param[in]      buffer       Working buffer.
+   */
+  void riscv_merge_sort_init_f32(
+    riscv_merge_sort_instance_f32 * S,
+    riscv_sort_dir dir,
+    float32_t * buffer);
+
+ 
+ 
+  /**
+   * @brief  Copies the elements of a floating-point vector.
+   * @param[in]  pSrc       input pointer
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
+   */
+  void riscv_copy_f32(
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Copies the elements of a Q7 vector.
+   * @param[in]  pSrc       input pointer
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
+   */
+  void riscv_copy_q7(
+  const q7_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Copies the elements of a Q15 vector.
+   * @param[in]  pSrc       input pointer
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
+   */
+  void riscv_copy_q15(
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Copies the elements of a Q31 vector.
+   * @param[in]  pSrc       input pointer
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
+   */
+  void riscv_copy_q31(
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Fills a constant value into a floating-point vector.
+   * @param[in]  value      input value to be filled
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
+   */
+  void riscv_fill_f32(
+        float32_t value,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Fills a constant value into a Q7 vector.
+   * @param[in]  value      input value to be filled
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
+   */
+  void riscv_fill_q7(
+        q7_t value,
+        q7_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Fills a constant value into a Q15 vector.
+   * @param[in]  value      input value to be filled
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
+   */
+  void riscv_fill_q15(
+        q15_t value,
+        q15_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Fills a constant value into a Q31 vector.
+   * @param[in]  value      input value to be filled
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
+   */
+  void riscv_fill_q31(
+        q31_t value,
+        q31_t * pDst,
+        uint32_t blockSize);
+
+
+
+
+
+
+
+/**
+ * @brief Weighted sum
+ *
+ *
+ * @param[in]    *in           Array of input values.
+ * @param[in]    *weigths      Weights
+ * @param[in]    blockSize     Number of samples in the input array.
+ * @return Weighted sum
+ *
+ */
+float32_t riscv_weighted_sum_f32(const float32_t *in
+  , const float32_t *weigths
+  , uint32_t blockSize);
+
+
+/**
+ * @brief Barycenter
+ *
+ *
+ * @param[in]    in         List of vectors
+ * @param[in]    weights    Weights of the vectors
+ * @param[out]   out        Barycenter
+ * @param[in]    nbVectors  Number of vectors
+ * @param[in]    vecDim     Dimension of space (vector dimension)
+ * @return       None
+ *
+ */
+void riscv_barycenter_f32(const float32_t *in
+  , const float32_t *weights
+  , float32_t *out
+  , uint32_t nbVectors
+  , uint32_t vecDim);
+
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _SUPPORT_FUNCTIONS_H_ */
--- a/components/nmsis/dsp/inc/dsp/support_functions_f16.h
+++ b/components/nmsis/dsp/inc/dsp/support_functions_f16.h
@ -0,0 +1,130 @@
+/******************************************************************************
+ * @file     support_functions_f16.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _SUPPORT_FUNCTIONS_F16_H_
+#define _SUPPORT_FUNCTIONS_F16_H_
+
+#include "riscv_math_types_f16.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#if defined(RISCV_FLOAT16_SUPPORTED)
+
+  /**
+   * @brief  Copies the elements of a floating-point vector.
+   * @param[in]  pSrc       input pointer
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
+   */
+void riscv_copy_f16(const float16_t * pSrc, float16_t * pDst, uint32_t blockSize);
+
+  /**
+   * @brief  Fills a constant value into a floating-point vector.
+   * @param[in]  value      input value to be filled
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
+   */
+void riscv_fill_f16(float16_t value, float16_t * pDst, uint32_t blockSize);
+
+/**
+   * @brief Converts the elements of the floating-point vector to Q31 vector.
+   * @param[in]  pSrc       points to the f16 input vector
+   * @param[out] pDst       points to the q15 output vector
+   * @param[in]  blockSize  length of the input vector
+   */
+void riscv_f16_to_q15(const float16_t * pSrc, q15_t * pDst, uint32_t blockSize);
+
+/**
+   * @brief Converts the elements of the floating-point vector to Q31 vector.
+   * @param[in]  pSrc       points to the q15 input vector
+   * @param[out] pDst       points to the f16 output vector
+   * @param[in]  blockSize  length of the input vector
+   */
+void riscv_q15_to_f16(const q15_t * pSrc, float16_t * pDst, uint32_t blockSize);
+
+
+/**
+   * @brief Converts the elements of the floating-point vector to Q31 vector.
+   * @param[in]  pSrc       points to the f32 input vector
+   * @param[out] pDst       points to the f16 output vector
+   * @param[in]  blockSize  length of the input vector
+   */
+void riscv_float_to_f16(const float32_t * pSrc, float16_t * pDst, uint32_t blockSize);
+
+/**
+   * @brief Converts the elements of the floating-point vector to Q31 vector.
+   * @param[in]  pSrc       points to the f16 input vector
+   * @param[out] pDst       points to the f32 output vector
+   * @param[in]  blockSize  length of the input vector
+   */
+void riscv_f16_to_float(const float16_t * pSrc, float32_t * pDst, uint32_t blockSize);
+
+/**
+ * @brief Weighted sum
+ *
+ *
+ * @param[in]    *in           Array of input values.
+ * @param[in]    *weigths      Weights
+ * @param[in]    blockSize     Number of samples in the input array.
+ * @return Weighted sum
+ *
+ */
+float16_t riscv_weighted_sum_f16(const float16_t *in
+  , const float16_t *weigths
+  , uint32_t blockSize);
+
+/**
+ * @brief Barycenter
+ *
+ *
+ * @param[in]    in         List of vectors
+ * @param[in]    weights    Weights of the vectors
+ * @param[out]   out        Barycenter
+ * @param[in]    nbVectors  Number of vectors
+ * @param[in]    vecDim     Dimension of space (vector dimension)
+ * @return       None
+ *
+ */
+void riscv_barycenter_f16(const float16_t *in
+  , const float16_t *weights
+  , float16_t *out
+  , uint32_t nbVectors
+  , uint32_t vecDim);
+
+#endif /*defined(RISCV_FLOAT16_SUPPORTED)*/
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _SUPPORT_FUNCTIONS_F16_H_ */
--- a/components/nmsis/dsp/inc/dsp/svm_defines.h
+++ b/components/nmsis/dsp/inc/dsp/svm_defines.h
@ -0,0 +1,47 @@
+/******************************************************************************
+ * @file     svm_defines.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ *
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _SVM_DEFINES_H_
+#define _SVM_DEFINES_H_
+
+/**
+ * @brief Struct for specifying SVM Kernel
+ */
+typedef enum
+{
+    RISCV_ML_KERNEL_LINEAR = 0,
+             /**< Linear kernel */
+    RISCV_ML_KERNEL_POLYNOMIAL = 1,
+             /**< Polynomial kernel */
+    RISCV_ML_KERNEL_RBF = 2,
+             /**< Radial Basis Function kernel */
+    RISCV_ML_KERNEL_SIGMOID = 3
+             /**< Sigmoid kernel */
+} riscv_ml_kernel_type;
+
+#endif
--- a/components/nmsis/dsp/inc/dsp/svm_functions.h
+++ b/components/nmsis/dsp/inc/dsp/svm_functions.h
@ -0,0 +1,300 @@
+/******************************************************************************
+ * @file     svm_functions.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _SVM_FUNCTIONS_H_
+#define _SVM_FUNCTIONS_H_
+
+#include "riscv_math_types.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+#include "dsp/svm_defines.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#define STEP(x) (x) <= 0 ? 0 : 1
+
+/**
+ * @defgroup groupSVM SVM Functions
+ * This set of functions is implementing SVM classification on 2 classes.
+ * The training must be done from scikit-learn. The parameters can be easily
+ * generated from the scikit-learn object. Some examples are given in
+ * DSP/Testing/PatternGeneration/SVM.py
+ *
+ * If more than 2 classes are needed, the functions in this folder 
+ * will have to be used, as building blocks, to do multi-class classification.
+ *
+ * No multi-class classification is provided in this SVM folder.
+ * 
+ */
+
+/**
+ * @brief Integer exponentiation
+ * @param[in]    x           value
+ * @param[in]    nb          integer exponent >= 1
+ * @return x^nb
+ *
+ */
+__STATIC_INLINE float32_t riscv_exponent_f32(float32_t x, int32_t nb)
+{
+    float32_t r = x;
+    nb --;
+    while(nb > 0)
+    {
+        r = r * x;
+        nb--;
+    }
+    return(r);
+}
+
+  
+
+
+
+/**
+ * @brief Instance structure for linear SVM prediction function.
+ */
+typedef struct
+{
+  uint32_t        nbOfSupportVectors;     /**< Number of support vectors */
+  uint32_t        vectorDimension;        /**< Dimension of vector space */
+  float32_t       intercept;              /**< Intercept */
+  const float32_t *dualCoefficients;      /**< Dual coefficients */
+  const float32_t *supportVectors;        /**< Support vectors */
+  const int32_t   *classes;               /**< The two SVM classes */
+} riscv_svm_linear_instance_f32;
+
+
+/**
+ * @brief Instance structure for polynomial SVM prediction function.
+ */
+typedef struct
+{
+  uint32_t        nbOfSupportVectors;     /**< Number of support vectors */
+  uint32_t        vectorDimension;        /**< Dimension of vector space */
+  float32_t       intercept;              /**< Intercept */
+  const float32_t *dualCoefficients;      /**< Dual coefficients */
+  const float32_t *supportVectors;        /**< Support vectors */
+  const int32_t   *classes;               /**< The two SVM classes */
+  int32_t         degree;                 /**< Polynomial degree */
+  float32_t       coef0;                  /**< Polynomial constant */
+  float32_t       gamma;                  /**< Gamma factor */
+} riscv_svm_polynomial_instance_f32;
+
+/**
+ * @brief Instance structure for rbf SVM prediction function.
+ */
+typedef struct
+{
+  uint32_t        nbOfSupportVectors;     /**< Number of support vectors */
+  uint32_t        vectorDimension;        /**< Dimension of vector space */
+  float32_t       intercept;              /**< Intercept */
+  const float32_t *dualCoefficients;      /**< Dual coefficients */
+  const float32_t *supportVectors;        /**< Support vectors */
+  const int32_t   *classes;               /**< The two SVM classes */
+  float32_t       gamma;                  /**< Gamma factor */
+} riscv_svm_rbf_instance_f32;
+
+/**
+ * @brief Instance structure for sigmoid SVM prediction function.
+ */
+typedef struct
+{
+  uint32_t        nbOfSupportVectors;     /**< Number of support vectors */
+  uint32_t        vectorDimension;        /**< Dimension of vector space */
+  float32_t       intercept;              /**< Intercept */
+  const float32_t *dualCoefficients;      /**< Dual coefficients */
+  const float32_t *supportVectors;        /**< Support vectors */
+  const int32_t   *classes;               /**< The two SVM classes */
+  float32_t       coef0;                  /**< Independent constant */
+  float32_t       gamma;                  /**< Gamma factor */
+} riscv_svm_sigmoid_instance_f32;
+
+/**
+ * @brief        SVM linear instance init function
+ * @param[in]    S                      Parameters for SVM functions
+ * @param[in]    nbOfSupportVectors     Number of support vectors
+ * @param[in]    vectorDimension        Dimension of vector space
+ * @param[in]    intercept              Intercept
+ * @param[in]    dualCoefficients       Array of dual coefficients
+ * @param[in]    supportVectors         Array of support vectors
+ * @param[in]    classes                Array of 2 classes ID
+ * @return none.
+ *
+ */
+
+
+void riscv_svm_linear_init_f32(riscv_svm_linear_instance_f32 *S, 
+  uint32_t nbOfSupportVectors,
+  uint32_t vectorDimension,
+  float32_t intercept,
+  const float32_t *dualCoefficients,
+  const float32_t *supportVectors,
+  const int32_t  *classes);
+
+/**
+ * @brief SVM linear prediction
+ * @param[in]    S          Pointer to an instance of the linear SVM structure.
+ * @param[in]    in         Pointer to input vector
+ * @param[out]   pResult    Decision value
+ * @return none.
+ *
+ */
+  
+void riscv_svm_linear_predict_f32(const riscv_svm_linear_instance_f32 *S, 
+   const float32_t * in, 
+   int32_t * pResult);
+
+
+/**
+ * @brief        SVM polynomial instance init function
+ * @param[in]    S                      points to an instance of the polynomial SVM structure.
+ * @param[in]    nbOfSupportVectors     Number of support vectors
+ * @param[in]    vectorDimension        Dimension of vector space
+ * @param[in]    intercept              Intercept
+ * @param[in]    dualCoefficients       Array of dual coefficients
+ * @param[in]    supportVectors         Array of support vectors
+ * @param[in]    classes                Array of 2 classes ID
+ * @param[in]    degree                 Polynomial degree
+ * @param[in]    coef0                  coeff0 (scikit-learn terminology)
+ * @param[in]    gamma                  gamma (scikit-learn terminology)
+ * @return none.
+ *
+ */
+
+
+void riscv_svm_polynomial_init_f32(riscv_svm_polynomial_instance_f32 *S, 
+  uint32_t nbOfSupportVectors,
+  uint32_t vectorDimension,
+  float32_t intercept,
+  const float32_t *dualCoefficients,
+  const float32_t *supportVectors,
+  const int32_t   *classes,
+  int32_t      degree,
+  float32_t coef0,
+  float32_t gamma
+  );
+
+/**
+ * @brief SVM polynomial prediction
+ * @param[in]    S          Pointer to an instance of the polynomial SVM structure.
+ * @param[in]    in         Pointer to input vector
+ * @param[out]   pResult    Decision value
+ * @return none.
+ *
+ */
+void riscv_svm_polynomial_predict_f32(const riscv_svm_polynomial_instance_f32 *S, 
+   const float32_t * in, 
+   int32_t * pResult);
+
+
+/**
+ * @brief        SVM radial basis function instance init function
+ * @param[in]    S                      points to an instance of the polynomial SVM structure.
+ * @param[in]    nbOfSupportVectors     Number of support vectors
+ * @param[in]    vectorDimension        Dimension of vector space
+ * @param[in]    intercept              Intercept
+ * @param[in]    dualCoefficients       Array of dual coefficients
+ * @param[in]    supportVectors         Array of support vectors
+ * @param[in]    classes                Array of 2 classes ID
+ * @param[in]    gamma                  gamma (scikit-learn terminology)
+ * @return none.
+ *
+ */
+
+void riscv_svm_rbf_init_f32(riscv_svm_rbf_instance_f32 *S, 
+  uint32_t nbOfSupportVectors,
+  uint32_t vectorDimension,
+  float32_t intercept,
+  const float32_t *dualCoefficients,
+  const float32_t *supportVectors,
+  const int32_t   *classes,
+  float32_t gamma
+  );
+
+/**
+ * @brief SVM rbf prediction
+ * @param[in]    S         Pointer to an instance of the rbf SVM structure.
+ * @param[in]    in        Pointer to input vector
+ * @param[out]   pResult   decision value
+ * @return none.
+ *
+ */
+void riscv_svm_rbf_predict_f32(const riscv_svm_rbf_instance_f32 *S, 
+   const float32_t * in, 
+   int32_t * pResult);
+
+/**
+ * @brief        SVM sigmoid instance init function
+ * @param[in]    S                      points to an instance of the rbf SVM structure.
+ * @param[in]    nbOfSupportVectors     Number of support vectors
+ * @param[in]    vectorDimension        Dimension of vector space
+ * @param[in]    intercept              Intercept
+ * @param[in]    dualCoefficients       Array of dual coefficients
+ * @param[in]    supportVectors         Array of support vectors
+ * @param[in]    classes                Array of 2 classes ID
+ * @param[in]    coef0                  coeff0 (scikit-learn terminology)
+ * @param[in]    gamma                  gamma (scikit-learn terminology)
+ * @return none.
+ *
+ */
+
+void riscv_svm_sigmoid_init_f32(riscv_svm_sigmoid_instance_f32 *S, 
+  uint32_t nbOfSupportVectors,
+  uint32_t vectorDimension,
+  float32_t intercept,
+  const float32_t *dualCoefficients,
+  const float32_t *supportVectors,
+  const int32_t   *classes,
+  float32_t coef0,
+  float32_t gamma
+  );
+
+/**
+ * @brief SVM sigmoid prediction
+ * @param[in]    S        Pointer to an instance of the rbf SVM structure.
+ * @param[in]    in       Pointer to input vector
+ * @param[out]   pResult  Decision value
+ * @return none.
+ *
+ */
+void riscv_svm_sigmoid_predict_f32(const riscv_svm_sigmoid_instance_f32 *S, 
+   const float32_t * in, 
+   int32_t * pResult);
+
+
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _SVM_FUNCTIONS_H_ */
--- a/components/nmsis/dsp/inc/dsp/svm_functions_f16.h
+++ b/components/nmsis/dsp/inc/dsp/svm_functions_f16.h
@ -0,0 +1,299 @@
+/******************************************************************************
+ * @file     svm_functions_f16.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _SVM_FUNCTIONS_F16_H_
+#define _SVM_FUNCTIONS_F16_H_
+
+#include "riscv_math_types_f16.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+#include "dsp/svm_defines.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#if defined(RISCV_FLOAT16_SUPPORTED)
+
+#define STEP(x) (x) <= 0 ? 0 : 1
+
+/**
+ * @defgroup groupSVM SVM Functions
+ * This set of functions is implementing SVM classification on 2 classes.
+ * The training must be done from scikit-learn. The parameters can be easily
+ * generated from the scikit-learn object. Some examples are given in
+ * DSP/Testing/PatternGeneration/SVM.py
+ *
+ * If more than 2 classes are needed, the functions in this folder 
+ * will have to be used, as building blocks, to do multi-class classification.
+ *
+ * No multi-class classification is provided in this SVM folder.
+ * 
+ */
+
+/**
+ * @brief Integer exponentiation
+ * @param[in]    x           value
+ * @param[in]    nb          integer exponent >= 1
+ * @return x^nb
+ *
+ */
+__STATIC_INLINE float16_t riscv_exponent_f16(float16_t x, int32_t nb)
+{
+    float16_t r = x;
+    nb --;
+    while(nb > 0)
+    {
+        r = r * x;
+        nb--;
+    }
+    return(r);
+}
+
+
+/**
+ * @brief Instance structure for linear SVM prediction function.
+ */
+typedef struct
+{
+  uint32_t        nbOfSupportVectors;     /**< Number of support vectors */
+  uint32_t        vectorDimension;        /**< Dimension of vector space */
+  float16_t       intercept;              /**< Intercept */
+  const float16_t *dualCoefficients;      /**< Dual coefficients */
+  const float16_t *supportVectors;        /**< Support vectors */
+  const int32_t   *classes;               /**< The two SVM classes */
+} riscv_svm_linear_instance_f16;
+
+
+/**
+ * @brief Instance structure for polynomial SVM prediction function.
+ */
+typedef struct
+{
+  uint32_t        nbOfSupportVectors;     /**< Number of support vectors */
+  uint32_t        vectorDimension;        /**< Dimension of vector space */
+  float16_t       intercept;              /**< Intercept */
+  const float16_t *dualCoefficients;      /**< Dual coefficients */
+  const float16_t *supportVectors;        /**< Support vectors */
+  const int32_t   *classes;               /**< The two SVM classes */
+  int32_t         degree;                 /**< Polynomial degree */
+  float16_t       coef0;                  /**< Polynomial constant */
+  float16_t       gamma;                  /**< Gamma factor */
+} riscv_svm_polynomial_instance_f16;
+
+/**
+ * @brief Instance structure for rbf SVM prediction function.
+ */
+typedef struct
+{
+  uint32_t        nbOfSupportVectors;     /**< Number of support vectors */
+  uint32_t        vectorDimension;        /**< Dimension of vector space */
+  float16_t       intercept;              /**< Intercept */
+  const float16_t *dualCoefficients;      /**< Dual coefficients */
+  const float16_t *supportVectors;        /**< Support vectors */
+  const int32_t   *classes;               /**< The two SVM classes */
+  float16_t       gamma;                  /**< Gamma factor */
+} riscv_svm_rbf_instance_f16;
+
+/**
+ * @brief Instance structure for sigmoid SVM prediction function.
+ */
+typedef struct
+{
+  uint32_t        nbOfSupportVectors;     /**< Number of support vectors */
+  uint32_t        vectorDimension;        /**< Dimension of vector space */
+  float16_t       intercept;              /**< Intercept */
+  const float16_t *dualCoefficients;      /**< Dual coefficients */
+  const float16_t *supportVectors;        /**< Support vectors */
+  const int32_t   *classes;               /**< The two SVM classes */
+  float16_t       coef0;                  /**< Independent constant */
+  float16_t       gamma;                  /**< Gamma factor */
+} riscv_svm_sigmoid_instance_f16;
+
+/**
+ * @brief        SVM linear instance init function
+ * @param[in]    S                      Parameters for SVM functions
+ * @param[in]    nbOfSupportVectors     Number of support vectors
+ * @param[in]    vectorDimension        Dimension of vector space
+ * @param[in]    intercept              Intercept
+ * @param[in]    dualCoefficients       Array of dual coefficients
+ * @param[in]    supportVectors         Array of support vectors
+ * @param[in]    classes                Array of 2 classes ID
+ * @return none.
+ *
+ */
+
+
+void riscv_svm_linear_init_f16(riscv_svm_linear_instance_f16 *S, 
+  uint32_t nbOfSupportVectors,
+  uint32_t vectorDimension,
+  float16_t intercept,
+  const float16_t *dualCoefficients,
+  const float16_t *supportVectors,
+  const int32_t  *classes);
+
+/**
+ * @brief SVM linear prediction
+ * @param[in]    S          Pointer to an instance of the linear SVM structure.
+ * @param[in]    in         Pointer to input vector
+ * @param[out]   pResult    Decision value
+ * @return none.
+ *
+ */
+  
+void riscv_svm_linear_predict_f16(const riscv_svm_linear_instance_f16 *S, 
+   const float16_t * in, 
+   int32_t * pResult);
+
+
+/**
+ * @brief        SVM polynomial instance init function
+ * @param[in]    S                      points to an instance of the polynomial SVM structure.
+ * @param[in]    nbOfSupportVectors     Number of support vectors
+ * @param[in]    vectorDimension        Dimension of vector space
+ * @param[in]    intercept              Intercept
+ * @param[in]    dualCoefficients       Array of dual coefficients
+ * @param[in]    supportVectors         Array of support vectors
+ * @param[in]    classes                Array of 2 classes ID
+ * @param[in]    degree                 Polynomial degree
+ * @param[in]    coef0                  coeff0 (scikit-learn terminology)
+ * @param[in]    gamma                  gamma (scikit-learn terminology)
+ * @return none.
+ *
+ */
+
+
+void riscv_svm_polynomial_init_f16(riscv_svm_polynomial_instance_f16 *S, 
+  uint32_t nbOfSupportVectors,
+  uint32_t vectorDimension,
+  float16_t intercept,
+  const float16_t *dualCoefficients,
+  const float16_t *supportVectors,
+  const int32_t   *classes,
+  int32_t      degree,
+  float16_t coef0,
+  float16_t gamma
+  );
+
+/**
+ * @brief SVM polynomial prediction
+ * @param[in]    S          Pointer to an instance of the polynomial SVM structure.
+ * @param[in]    in         Pointer to input vector
+ * @param[out]   pResult    Decision value
+ * @return none.
+ *
+ */
+void riscv_svm_polynomial_predict_f16(const riscv_svm_polynomial_instance_f16 *S, 
+   const float16_t * in, 
+   int32_t * pResult);
+
+
+/**
+ * @brief        SVM radial basis function instance init function
+ * @param[in]    S                      points to an instance of the polynomial SVM structure.
+ * @param[in]    nbOfSupportVectors     Number of support vectors
+ * @param[in]    vectorDimension        Dimension of vector space
+ * @param[in]    intercept              Intercept
+ * @param[in]    dualCoefficients       Array of dual coefficients
+ * @param[in]    supportVectors         Array of support vectors
+ * @param[in]    classes                Array of 2 classes ID
+ * @param[in]    gamma                  gamma (scikit-learn terminology)
+ * @return none.
+ *
+ */
+
+void riscv_svm_rbf_init_f16(riscv_svm_rbf_instance_f16 *S, 
+  uint32_t nbOfSupportVectors,
+  uint32_t vectorDimension,
+  float16_t intercept,
+  const float16_t *dualCoefficients,
+  const float16_t *supportVectors,
+  const int32_t   *classes,
+  float16_t gamma
+  );
+
+/**
+ * @brief SVM rbf prediction
+ * @param[in]    S         Pointer to an instance of the rbf SVM structure.
+ * @param[in]    in        Pointer to input vector
+ * @param[out]   pResult   decision value
+ * @return none.
+ *
+ */
+void riscv_svm_rbf_predict_f16(const riscv_svm_rbf_instance_f16 *S, 
+   const float16_t * in, 
+   int32_t * pResult);
+
+/**
+ * @brief        SVM sigmoid instance init function
+ * @param[in]    S                      points to an instance of the rbf SVM structure.
+ * @param[in]    nbOfSupportVectors     Number of support vectors
+ * @param[in]    vectorDimension        Dimension of vector space
+ * @param[in]    intercept              Intercept
+ * @param[in]    dualCoefficients       Array of dual coefficients
+ * @param[in]    supportVectors         Array of support vectors
+ * @param[in]    classes                Array of 2 classes ID
+ * @param[in]    coef0                  coeff0 (scikit-learn terminology)
+ * @param[in]    gamma                  gamma (scikit-learn terminology)
+ * @return none.
+ *
+ */
+
+void riscv_svm_sigmoid_init_f16(riscv_svm_sigmoid_instance_f16 *S, 
+  uint32_t nbOfSupportVectors,
+  uint32_t vectorDimension,
+  float16_t intercept,
+  const float16_t *dualCoefficients,
+  const float16_t *supportVectors,
+  const int32_t   *classes,
+  float16_t coef0,
+  float16_t gamma
+  );
+
+/**
+ * @brief SVM sigmoid prediction
+ * @param[in]    S        Pointer to an instance of the rbf SVM structure.
+ * @param[in]    in       Pointer to input vector
+ * @param[out]   pResult  Decision value
+ * @return none.
+ *
+ */
+void riscv_svm_sigmoid_predict_f16(const riscv_svm_sigmoid_instance_f16 *S, 
+   const float16_t * in, 
+   int32_t * pResult);
+
+
+
+#endif /*defined(RISCV_FLOAT16_SUPPORTED)*/
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _SVM_FUNCTIONS_F16_H_ */
--- a/components/nmsis/dsp/inc/dsp/transform_functions.h
+++ b/components/nmsis/dsp/inc/dsp/transform_functions.h
@ -0,0 +1,561 @@
+/******************************************************************************
+ * @file     transform_functions.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _TRANSFORM_FUNCTIONS_H_
+#define _TRANSFORM_FUNCTIONS_H_
+
+#include "riscv_math_types.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#include "dsp/basic_math_functions.h"
+#include "dsp/complex_math_functions.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+
+/**
+ * @defgroup groupTransforms Transform Functions
+ */
+
+
+  /**
+   * @brief Instance structure for the Q15 CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                 /**< length of the FFT. */
+          uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const q15_t *pTwiddle;                 /**< points to the Sin twiddle factor table. */
+    const uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+  } riscv_cfft_radix2_instance_q15;
+
+/* Deprecated */
+  riscv_status riscv_cfft_radix2_init_q15(
+        riscv_cfft_radix2_instance_q15 * S,
+        uint16_t fftLen,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+
+/* Deprecated */
+  void riscv_cfft_radix2_q15(
+  const riscv_cfft_radix2_instance_q15 * S,
+        q15_t * pSrc);
+
+
+  /**
+   * @brief Instance structure for the Q15 CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                 /**< length of the FFT. */
+          uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const q15_t *pTwiddle;                 /**< points to the twiddle factor table. */
+    const uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+  } riscv_cfft_radix4_instance_q15;
+
+/* Deprecated */
+  riscv_status riscv_cfft_radix4_init_q15(
+        riscv_cfft_radix4_instance_q15 * S,
+        uint16_t fftLen,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+
+/* Deprecated */
+  void riscv_cfft_radix4_q15(
+  const riscv_cfft_radix4_instance_q15 * S,
+        q15_t * pSrc);
+
+  /**
+   * @brief Instance structure for the Radix-2 Q31 CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                 /**< length of the FFT. */
+          uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const q31_t *pTwiddle;                 /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+  } riscv_cfft_radix2_instance_q31;
+
+/* Deprecated */
+  riscv_status riscv_cfft_radix2_init_q31(
+        riscv_cfft_radix2_instance_q31 * S,
+        uint16_t fftLen,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+
+/* Deprecated */
+  void riscv_cfft_radix2_q31(
+  const riscv_cfft_radix2_instance_q31 * S,
+        q31_t * pSrc);
+
+  /**
+   * @brief Instance structure for the Q31 CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                 /**< length of the FFT. */
+          uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const q31_t *pTwiddle;                 /**< points to the twiddle factor table. */
+    const uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+  } riscv_cfft_radix4_instance_q31;
+
+/* Deprecated */
+  void riscv_cfft_radix4_q31(
+  const riscv_cfft_radix4_instance_q31 * S,
+        q31_t * pSrc);
+
+/* Deprecated */
+  riscv_status riscv_cfft_radix4_init_q31(
+        riscv_cfft_radix4_instance_q31 * S,
+        uint16_t fftLen,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+
+  /**
+   * @brief Instance structure for the floating-point CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                   /**< length of the FFT. */
+          uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+          float32_t onebyfftLen;             /**< value of 1/fftLen. */
+  } riscv_cfft_radix2_instance_f32;
+
+
+/* Deprecated */
+  riscv_status riscv_cfft_radix2_init_f32(
+        riscv_cfft_radix2_instance_f32 * S,
+        uint16_t fftLen,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+
+/* Deprecated */
+  void riscv_cfft_radix2_f32(
+  const riscv_cfft_radix2_instance_f32 * S,
+        float32_t * pSrc);
+
+  /**
+   * @brief Instance structure for the floating-point CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                   /**< length of the FFT. */
+          uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+          float32_t onebyfftLen;             /**< value of 1/fftLen. */
+  } riscv_cfft_radix4_instance_f32;
+
+
+
+/* Deprecated */
+  riscv_status riscv_cfft_radix4_init_f32(
+        riscv_cfft_radix4_instance_f32 * S,
+        uint16_t fftLen,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+
+/* Deprecated */
+  void riscv_cfft_radix4_f32(
+  const riscv_cfft_radix4_instance_f32 * S,
+        float32_t * pSrc);
+
+  /**
+   * @brief Instance structure for the fixed-point CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                   /**< length of the FFT. */
+    const q15_t *pTwiddle;             /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
+          uint16_t bitRevLength;             /**< bit reversal table length. */
+  } riscv_cfft_instance_q15;
+
+riscv_status riscv_cfft_init_q15(
+  riscv_cfft_instance_q15 * S,
+  uint16_t fftLen);
+
+void riscv_cfft_q15(
+    const riscv_cfft_instance_q15 * S,
+          q15_t * p1,
+          uint8_t ifftFlag,
+          uint8_t bitReverseFlag);
+
+  /**
+   * @brief Instance structure for the fixed-point CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                   /**< length of the FFT. */
+    const q31_t *pTwiddle;             /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
+          uint16_t bitRevLength;             /**< bit reversal table length. */
+  } riscv_cfft_instance_q31;
+
+riscv_status riscv_cfft_init_q31(
+  riscv_cfft_instance_q31 * S,
+  uint16_t fftLen);
+
+void riscv_cfft_q31(
+    const riscv_cfft_instance_q31 * S,
+          q31_t * p1,
+          uint8_t ifftFlag,
+          uint8_t bitReverseFlag);
+
+  /**
+   * @brief Instance structure for the floating-point CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                   /**< length of the FFT. */
+    const float32_t *pTwiddle;         /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
+          uint16_t bitRevLength;             /**< bit reversal table length. */
+  } riscv_cfft_instance_f32;
+
+
+
+  riscv_status riscv_cfft_init_f32(
+  riscv_cfft_instance_f32 * S,
+  uint16_t fftLen);
+
+  void riscv_cfft_f32(
+  const riscv_cfft_instance_f32 * S,
+        float32_t * p1,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+
+
+  /**
+   * @brief Instance structure for the Double Precision Floating-point CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                   /**< length of the FFT. */
+    const float64_t *pTwiddle;         /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
+          uint16_t bitRevLength;             /**< bit reversal table length. */
+  } riscv_cfft_instance_f64;
+
+  riscv_status riscv_cfft_init_f64(
+  riscv_cfft_instance_f64 * S,
+  uint16_t fftLen);
+  
+  void riscv_cfft_f64(
+  const riscv_cfft_instance_f64 * S,
+        float64_t * p1,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+
+  /**
+   * @brief Instance structure for the Q15 RFFT/RIFFT function.
+   */
+  typedef struct
+  {
+          uint32_t fftLenReal;                      /**< length of the real FFT. */
+          uint8_t ifftFlagR;                        /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
+          uint8_t bitReverseFlagR;                  /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
+          uint32_t twidCoefRModifier;               /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+    const q15_t *pTwiddleAReal;                     /**< points to the real twiddle factor table. */
+    const q15_t *pTwiddleBReal;                     /**< points to the imag twiddle factor table. */
+    const riscv_cfft_instance_q15 *pCfft;       /**< points to the complex FFT instance. */
+  } riscv_rfft_instance_q15;
+
+  riscv_status riscv_rfft_init_q15(
+        riscv_rfft_instance_q15 * S,
+        uint32_t fftLenReal,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+  void riscv_rfft_q15(
+  const riscv_rfft_instance_q15 * S,
+        q15_t * pSrc,
+        q15_t * pDst);
+
+  /**
+   * @brief Instance structure for the Q31 RFFT/RIFFT function.
+   */
+  typedef struct
+  {
+          uint32_t fftLenReal;                        /**< length of the real FFT. */
+          uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
+          uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
+          uint32_t twidCoefRModifier;                 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+    const q31_t *pTwiddleAReal;                       /**< points to the real twiddle factor table. */
+    const q31_t *pTwiddleBReal;                       /**< points to the imag twiddle factor table. */
+    const riscv_cfft_instance_q31 *pCfft;         /**< points to the complex FFT instance. */
+  } riscv_rfft_instance_q31;
+
+  riscv_status riscv_rfft_init_q31(
+        riscv_rfft_instance_q31 * S,
+        uint32_t fftLenReal,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+  void riscv_rfft_q31(
+  const riscv_rfft_instance_q31 * S,
+        q31_t * pSrc,
+        q31_t * pDst);
+
+  /**
+   * @brief Instance structure for the floating-point RFFT/RIFFT function.
+   */
+  typedef struct
+  {
+          uint32_t fftLenReal;                        /**< length of the real FFT. */
+          uint16_t fftLenBy2;                         /**< length of the complex FFT. */
+          uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
+          uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
+          uint32_t twidCoefRModifier;                     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+    const float32_t *pTwiddleAReal;                   /**< points to the real twiddle factor table. */
+    const float32_t *pTwiddleBReal;                   /**< points to the imag twiddle factor table. */
+          riscv_cfft_radix4_instance_f32 *pCfft;        /**< points to the complex FFT instance. */
+  } riscv_rfft_instance_f32;
+
+  riscv_status riscv_rfft_init_f32(
+        riscv_rfft_instance_f32 * S,
+        riscv_cfft_radix4_instance_f32 * S_CFFT,
+        uint32_t fftLenReal,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+  void riscv_rfft_f32(
+  const riscv_rfft_instance_f32 * S,
+        float32_t * pSrc,
+        float32_t * pDst);
+
+  /**
+   * @brief Instance structure for the Double Precision Floating-point RFFT/RIFFT function.
+   */
+typedef struct
+  {
+          riscv_cfft_instance_f64 Sint;      /**< Internal CFFT structure. */
+          uint16_t fftLenRFFT;             /**< length of the real sequence */
+    const float64_t * pTwiddleRFFT;        /**< Twiddle factors real stage  */
+  } riscv_rfft_fast_instance_f64 ;
+
+riscv_status riscv_rfft_fast_init_f64 (
+         riscv_rfft_fast_instance_f64 * S,
+         uint16_t fftLen);
+
+
+void riscv_rfft_fast_f64(
+    riscv_rfft_fast_instance_f64 * S,
+    float64_t * p, float64_t * pOut,
+    uint8_t ifftFlag);
+
+
+  /**
+   * @brief Instance structure for the floating-point RFFT/RIFFT function.
+   */
+typedef struct
+  {
+          riscv_cfft_instance_f32 Sint;      /**< Internal CFFT structure. */
+          uint16_t fftLenRFFT;             /**< length of the real sequence */
+    const float32_t * pTwiddleRFFT;        /**< Twiddle factors real stage  */
+  } riscv_rfft_fast_instance_f32 ;
+
+riscv_status riscv_rfft_fast_init_f32 (
+         riscv_rfft_fast_instance_f32 * S,
+         uint16_t fftLen);
+
+
+  void riscv_rfft_fast_f32(
+        const riscv_rfft_fast_instance_f32 * S,
+        float32_t * p, float32_t * pOut,
+        uint8_t ifftFlag);
+
+  /**
+   * @brief Instance structure for the floating-point DCT4/IDCT4 function.
+   */
+  typedef struct
+  {
+          uint16_t N;                          /**< length of the DCT4. */
+          uint16_t Nby2;                       /**< half of the length of the DCT4. */
+          float32_t normalize;                 /**< normalizing factor. */
+    const float32_t *pTwiddle;                 /**< points to the twiddle factor table. */
+    const float32_t *pCosFactor;               /**< points to the cosFactor table. */
+          riscv_rfft_instance_f32 *pRfft;        /**< points to the real FFT instance. */
+          riscv_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */
+  } riscv_dct4_instance_f32;
+
+
+  /**
+   * @brief  Initialization function for the floating-point DCT4/IDCT4.
+   * @param[in,out] S          points to an instance of floating-point DCT4/IDCT4 structure.
+   * @param[in]     S_RFFT     points to an instance of floating-point RFFT/RIFFT structure.
+   * @param[in]     S_CFFT     points to an instance of floating-point CFFT/CIFFT structure.
+   * @param[in]     N          length of the DCT4.
+   * @param[in]     Nby2       half of the length of the DCT4.
+   * @param[in]     normalize  normalizing factor.
+   * @return      riscv_status function returns RISCV_MATH_SUCCESS if initialization is successful or RISCV_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported transform length.
+   */
+  riscv_status riscv_dct4_init_f32(
+        riscv_dct4_instance_f32 * S,
+        riscv_rfft_instance_f32 * S_RFFT,
+        riscv_cfft_radix4_instance_f32 * S_CFFT,
+        uint16_t N,
+        uint16_t Nby2,
+        float32_t normalize);
+
+
+  /**
+   * @brief Processing function for the floating-point DCT4/IDCT4.
+   * @param[in]     S              points to an instance of the floating-point DCT4/IDCT4 structure.
+   * @param[in]     pState         points to state buffer.
+   * @param[in,out] pInlineBuffer  points to the in-place input and output buffer.
+   */
+  void riscv_dct4_f32(
+  const riscv_dct4_instance_f32 * S,
+        float32_t * pState,
+        float32_t * pInlineBuffer);
+
+
+  /**
+   * @brief Instance structure for the Q31 DCT4/IDCT4 function.
+   */
+  typedef struct
+  {
+          uint16_t N;                          /**< length of the DCT4. */
+          uint16_t Nby2;                       /**< half of the length of the DCT4. */
+          q31_t normalize;                     /**< normalizing factor. */
+    const q31_t *pTwiddle;                     /**< points to the twiddle factor table. */
+    const q31_t *pCosFactor;                   /**< points to the cosFactor table. */
+          riscv_rfft_instance_q31 *pRfft;        /**< points to the real FFT instance. */
+          riscv_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */
+  } riscv_dct4_instance_q31;
+
+
+  /**
+   * @brief  Initialization function for the Q31 DCT4/IDCT4.
+   * @param[in,out] S          points to an instance of Q31 DCT4/IDCT4 structure.
+   * @param[in]     S_RFFT     points to an instance of Q31 RFFT/RIFFT structure
+   * @param[in]     S_CFFT     points to an instance of Q31 CFFT/CIFFT structure
+   * @param[in]     N          length of the DCT4.
+   * @param[in]     Nby2       half of the length of the DCT4.
+   * @param[in]     normalize  normalizing factor.
+   * @return      riscv_status function returns RISCV_MATH_SUCCESS if initialization is successful or RISCV_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
+   */
+  riscv_status riscv_dct4_init_q31(
+        riscv_dct4_instance_q31 * S,
+        riscv_rfft_instance_q31 * S_RFFT,
+        riscv_cfft_radix4_instance_q31 * S_CFFT,
+        uint16_t N,
+        uint16_t Nby2,
+        q31_t normalize);
+
+
+  /**
+   * @brief Processing function for the Q31 DCT4/IDCT4.
+   * @param[in]     S              points to an instance of the Q31 DCT4 structure.
+   * @param[in]     pState         points to state buffer.
+   * @param[in,out] pInlineBuffer  points to the in-place input and output buffer.
+   */
+  void riscv_dct4_q31(
+  const riscv_dct4_instance_q31 * S,
+        q31_t * pState,
+        q31_t * pInlineBuffer);
+
+
+  /**
+   * @brief Instance structure for the Q15 DCT4/IDCT4 function.
+   */
+  typedef struct
+  {
+          uint16_t N;                          /**< length of the DCT4. */
+          uint16_t Nby2;                       /**< half of the length of the DCT4. */
+          q15_t normalize;                     /**< normalizing factor. */
+    const q15_t *pTwiddle;                     /**< points to the twiddle factor table. */
+    const q15_t *pCosFactor;                   /**< points to the cosFactor table. */
+          riscv_rfft_instance_q15 *pRfft;        /**< points to the real FFT instance. */
+          riscv_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */
+  } riscv_dct4_instance_q15;
+
+
+  /**
+   * @brief  Initialization function for the Q15 DCT4/IDCT4.
+   * @param[in,out] S          points to an instance of Q15 DCT4/IDCT4 structure.
+   * @param[in]     S_RFFT     points to an instance of Q15 RFFT/RIFFT structure.
+   * @param[in]     S_CFFT     points to an instance of Q15 CFFT/CIFFT structure.
+   * @param[in]     N          length of the DCT4.
+   * @param[in]     Nby2       half of the length of the DCT4.
+   * @param[in]     normalize  normalizing factor.
+   * @return      riscv_status function returns RISCV_MATH_SUCCESS if initialization is successful or RISCV_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
+   */
+  riscv_status riscv_dct4_init_q15(
+        riscv_dct4_instance_q15 * S,
+        riscv_rfft_instance_q15 * S_RFFT,
+        riscv_cfft_radix4_instance_q15 * S_CFFT,
+        uint16_t N,
+        uint16_t Nby2,
+        q15_t normalize);
+
+
+  /**
+   * @brief Processing function for the Q15 DCT4/IDCT4.
+   * @param[in]     S              points to an instance of the Q15 DCT4 structure.
+   * @param[in]     pState         points to state buffer.
+   * @param[in,out] pInlineBuffer  points to the in-place input and output buffer.
+   */
+  void riscv_dct4_q15(
+  const riscv_dct4_instance_q15 * S,
+        q15_t * pState,
+        q15_t * pInlineBuffer);
+
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _TRANSFORM_FUNCTIONS_H_ */
--- a/components/nmsis/dsp/inc/dsp/transform_functions_f16.h
+++ b/components/nmsis/dsp/inc/dsp/transform_functions_f16.h
@ -0,0 +1,150 @@
+/******************************************************************************
+ * @file     transform_functions_f16.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _TRANSFORM_FUNCTIONS_F16_H_
+#define _TRANSFORM_FUNCTIONS_F16_H_
+
+#include "riscv_math_types_f16.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+
+
+#if defined(RISCV_FLOAT16_SUPPORTED)
+
+
+  /**
+   * @brief Instance structure for the floating-point CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                   /**< length of the FFT. */
+          uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const float16_t *pTwiddle;               /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+          float16_t onebyfftLen;             /**< value of 1/fftLen. */
+  } riscv_cfft_radix2_instance_f16;
+
+  /**
+   * @brief Instance structure for the floating-point CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                   /**< length of the FFT. */
+          uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const float16_t *pTwiddle;               /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+          float16_t onebyfftLen;             /**< value of 1/fftLen. */
+  } riscv_cfft_radix4_instance_f16;
+
+  /**
+   * @brief Instance structure for the floating-point CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                   /**< length of the FFT. */
+    const float16_t *pTwiddle;         /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
+          uint16_t bitRevLength;             /**< bit reversal table length. */
+  } riscv_cfft_instance_f16;
+
+
+  riscv_status riscv_cfft_init_f16(
+  riscv_cfft_instance_f16 * S,
+  uint16_t fftLen);
+
+  void riscv_cfft_f16(
+  const riscv_cfft_instance_f16 * S,
+        float16_t * p1,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+
+  /**
+   * @brief Instance structure for the floating-point RFFT/RIFFT function.
+   */
+typedef struct
+  {
+          riscv_cfft_instance_f16 Sint;      /**< Internal CFFT structure. */
+          uint16_t fftLenRFFT;             /**< length of the real sequence */
+    const float16_t * pTwiddleRFFT;        /**< Twiddle factors real stage  */
+  } riscv_rfft_fast_instance_f16 ;
+
+riscv_status riscv_rfft_fast_init_f16 (
+         riscv_rfft_fast_instance_f16 * S,
+         uint16_t fftLen);
+
+
+  void riscv_rfft_fast_f16(
+        const riscv_rfft_fast_instance_f16 * S,
+        float16_t * p, float16_t * pOut,
+        uint8_t ifftFlag);
+
+/* Deprecated */
+  riscv_status riscv_cfft_radix4_init_f16(
+        riscv_cfft_radix4_instance_f16 * S,
+        uint16_t fftLen,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+
+/* Deprecated */
+  void riscv_cfft_radix4_f16(
+  const riscv_cfft_radix4_instance_f16 * S,
+        float16_t * pSrc);
+
+
+/* Deprecated */
+  riscv_status riscv_cfft_radix2_init_f16(
+        riscv_cfft_radix2_instance_f16 * S,
+        uint16_t fftLen,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+
+/* Deprecated */
+  void riscv_cfft_radix2_f16(
+  const riscv_cfft_radix2_instance_f16 * S,
+        float16_t * pSrc);
+  
+#endif /* defined(RISCV_FLOAT16_SUPPORTED)*/
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _TRANSFORM_FUNCTIONS_F16_H_ */
--- a/components/nmsis/dsp/inc/dsp/utils.h
+++ b/components/nmsis/dsp/inc/dsp/utils.h
@ -0,0 +1,241 @@
+/******************************************************************************
+ * @file     riscv_math_utils.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     20. July 2020
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _RISCV_MATH_UTILS_H_
+
+#define _RISCV_MATH_UTILS_H_
+
+#include "riscv_math_types.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+  /**
+   * @brief Macros required for reciprocal calculation in Normalized LMS
+   */
+
+#define INDEX_MASK         0x0000003F
+
+
+#define SQ(x) ((x) * (x))
+
+#define ROUND_UP(N, S) ((((N) + (S) - 1) / (S)) * (S))
+
+
+  /**
+   * @brief Function to Calculates 1/in (reciprocal) value of Q31 Data type.
+   */
+  __STATIC_FORCEINLINE uint32_t riscv_recip_q31(
+        q31_t in,
+        q31_t * dst,
+  const q31_t * pRecipTable)
+  {
+    q31_t out;
+    uint32_t tempVal;
+    uint32_t index, i;
+    uint32_t signBits;
+
+    if (in > 0)
+    {
+      signBits = ((uint32_t) (__CLZ( in) - 1));
+    }
+    else
+    {
+      signBits = ((uint32_t) (__CLZ(-in) - 1));
+    }
+
+    /* Convert input sample to 1.31 format */
+    in = (in << signBits);
+
+    /* calculation of index for initial approximated Val */
+    index = (uint32_t)(in >> 24);
+    index = (index & INDEX_MASK);
+
+    /* 1.31 with exp 1 */
+    out = pRecipTable[index];
+
+    /* calculation of reciprocal value */
+    /* running approximation for two iterations */
+    for (i = 0U; i < 2U; i++)
+    {
+      tempVal = (uint32_t) (((q63_t) in * out) >> 31);
+      tempVal = 0x7FFFFFFFu - tempVal;
+      /*      1.31 with exp 1 */
+      /* out = (q31_t) (((q63_t) out * tempVal) >> 30); */
+      out = clip_q63_to_q31(((q63_t) out * tempVal) >> 30);
+    }
+
+    /* write output */
+    *dst = out;
+
+    /* return num of signbits of out = 1/in value */
+    return (signBits + 1U);
+  }
+
+
+  /**
+   * @brief Function to Calculates 1/in (reciprocal) value of Q15 Data type.
+   */
+  __STATIC_FORCEINLINE uint32_t riscv_recip_q15(
+        q15_t in,
+        q15_t * dst,
+  const q15_t * pRecipTable)
+  {
+    q15_t out = 0;
+    uint32_t tempVal = 0;
+    uint32_t index = 0, i = 0;
+    uint32_t signBits = 0;
+
+    if (in > 0)
+    {
+      signBits = ((uint32_t)(__CLZ( in) - 17));
+    }
+    else
+    {
+      signBits = ((uint32_t)(__CLZ(-in) - 17));
+    }
+
+    /* Convert input sample to 1.15 format */
+    in = (in << signBits);
+
+    /* calculation of index for initial approximated Val */
+    index = (uint32_t)(in >>  8);
+    index = (index & INDEX_MASK);
+
+    /*      1.15 with exp 1  */
+    out = pRecipTable[index];
+
+    /* calculation of reciprocal value */
+    /* running approximation for two iterations */
+    for (i = 0U; i < 2U; i++)
+    {
+      tempVal = (uint32_t) (((q31_t) in * out) >> 15);
+      tempVal = 0x7FFFu - tempVal;
+      /*      1.15 with exp 1 */
+      out = (q15_t) (((q31_t) out * tempVal) >> 14);
+      /* out = clip_q31_to_q15(((q31_t) out * tempVal) >> 14); */
+    }
+
+    /* write output */
+    *dst = out;
+
+    /* return num of signbits of out = 1/in value */
+    return (signBits + 1);
+  }
+
+
+/**
+ * @brief  64-bit to 32-bit unsigned normalization
+ * @param[in]  in           is input unsigned long long value
+ * @param[out] normalized   is the 32-bit normalized value
+ * @param[out] norm         is norm scale
+ */
+__STATIC_INLINE  void riscv_norm_64_to_32u(uint64_t in, int32_t * normalized, int32_t *norm)
+{
+    int32_t     n1;
+    int32_t     hi = (int32_t) (in >> 32);
+    int32_t     lo = (int32_t) ((in << 32) >> 32);
+
+    n1 = __CLZ(hi) - 32;
+    if (!n1)
+    {
+        /*
+         * input fits in 32-bit
+         */
+        n1 = __CLZ(lo);
+        if (!n1)
+        {
+            /*
+             * MSB set, need to scale down by 1
+             */
+            *norm = -1;
+            *normalized = (((uint32_t) lo) >> 1);
+        } else
+        {
+            if (n1 == 32)
+            {
+                /*
+                 * input is zero
+                 */
+                *norm = 0;
+                *normalized = 0;
+            } else
+            {
+                /*
+                 * 32-bit normalization
+                 */
+                *norm = n1 - 1;
+                *normalized = lo << *norm;
+            }
+        }
+    } else
+    {
+        /*
+         * input fits in 64-bit
+         */
+        n1 = 1 - n1;
+        *norm = -n1;
+        /*
+         * 64 bit normalization
+         */
+        *normalized = (((uint32_t) lo) >> n1) | (hi << (32 - n1));
+    }
+}
+
+__STATIC_INLINE q31_t riscv_div_q63_to_q31(q63_t num, q31_t den)
+{
+    q31_t   result;
+    uint64_t   absNum;
+    int32_t   normalized;
+    int32_t   norm;
+
+    /*
+     * if sum fits in 32bits
+     * avoid costly 64-bit division
+     */
+    absNum = num > 0 ? num : -num;
+    riscv_norm_64_to_32u(absNum, &normalized, &norm);
+    if (norm > 0)
+        /*
+         * 32-bit division
+         */
+        result = (q31_t) num / den;
+    else
+        /*
+         * 64-bit division
+         */
+        result = (q31_t) (num / den);
+
+    return result;
+}
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /*ifndef _RISCV_MATH_UTILS_H_ */
--- a/components/nmsis/dsp/inc/riscv_common_tables.h
+++ b/components/nmsis/dsp/inc/riscv_common_tables.h
@ -0,0 +1,513 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_common_tables.h
+ * Description:  Extern declaration for common tables
+ *
+ * @version  V1.9.0
+ * @date     23 April 2021
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _RISCV_COMMON_TABLES_H
+#define _RISCV_COMMON_TABLES_H
+
+#include "riscv_math_types.h"
+#include "dsp/fast_math_functions.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_FFT_ALLOW_TABLES)
+  /* Double Precision Float CFFT twiddles */
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREV_1024)
+    extern const uint16_t riscvBitRevTable[1024];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F64_16)
+    extern const uint64_t twiddleCoefF64_16[32];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F64_32)
+    extern const uint64_t twiddleCoefF64_32[64];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F64_64)
+    extern const uint64_t twiddleCoefF64_64[128];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F64_128)
+    extern const uint64_t twiddleCoefF64_128[256];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F64_256)
+    extern const uint64_t twiddleCoefF64_256[512];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F64_512)
+    extern const uint64_t twiddleCoefF64_512[1024];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F64_1024)
+    extern const uint64_t twiddleCoefF64_1024[2048];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F64_2048)
+    extern const uint64_t twiddleCoefF64_2048[4096];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F64_4096)
+    extern const uint64_t twiddleCoefF64_4096[8192];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F32_16)
+    extern const float32_t twiddleCoef_16[32];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F32_32)
+    extern const float32_t twiddleCoef_32[64];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F32_64)
+    extern const float32_t twiddleCoef_64[128];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F32_128)
+    extern const float32_t twiddleCoef_128[256];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F32_256)
+    extern const float32_t twiddleCoef_256[512];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F32_512)
+    extern const float32_t twiddleCoef_512[1024];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F32_1024)
+    extern const float32_t twiddleCoef_1024[2048];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F32_2048)
+    extern const float32_t twiddleCoef_2048[4096];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F32_4096)
+    extern const float32_t twiddleCoef_4096[8192];
+    #define twiddleCoef twiddleCoef_4096
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  /* Q31 */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_Q31_16)
+    extern const q31_t twiddleCoef_16_q31[24];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_Q31_32)
+    extern const q31_t twiddleCoef_32_q31[48];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_Q31_64)
+    extern const q31_t twiddleCoef_64_q31[96];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_Q31_128)
+    extern const q31_t twiddleCoef_128_q31[192];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_Q31_256)
+    extern const q31_t twiddleCoef_256_q31[384];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_Q31_512)
+    extern const q31_t twiddleCoef_512_q31[768];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_Q31_1024)
+    extern const q31_t twiddleCoef_1024_q31[1536];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_Q31_2048)
+    extern const q31_t twiddleCoef_2048_q31[3072];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_Q31_4096)
+    extern const q31_t twiddleCoef_4096_q31[6144];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_Q15_16)
+    extern const q15_t twiddleCoef_16_q15[24];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_Q15_32)
+    extern const q15_t twiddleCoef_32_q15[48];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_Q15_64)
+    extern const q15_t twiddleCoef_64_q15[96];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_Q15_128)
+    extern const q15_t twiddleCoef_128_q15[192];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_Q15_256)
+    extern const q15_t twiddleCoef_256_q15[384];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_Q15_512)
+    extern const q15_t twiddleCoef_512_q15[768];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_Q15_1024)
+    extern const q15_t twiddleCoef_1024_q15[1536];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_Q15_2048)
+    extern const q15_t twiddleCoef_2048_q15[3072];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_Q15_4096)
+    extern const q15_t twiddleCoef_4096_q15[6144];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  /* Double Precision Float RFFT twiddles */
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F64_32)
+    extern const uint64_t twiddleCoefF64_rfft_32[32];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F64_64)
+    extern const uint64_t twiddleCoefF64_rfft_64[64];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F64_128)
+    extern const uint64_t twiddleCoefF64_rfft_128[128];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F64_256)
+    extern const uint64_t twiddleCoefF64_rfft_256[256];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F64_512)
+    extern const uint64_t twiddleCoefF64_rfft_512[512];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F64_1024)
+    extern const uint64_t twiddleCoefF64_rfft_1024[1024];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F64_2048)
+    extern const uint64_t twiddleCoefF64_rfft_2048[2048];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F64_4096)
+    extern const uint64_t twiddleCoefF64_rfft_4096[4096];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F32_32)
+    extern const float32_t twiddleCoef_rfft_32[32];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F32_64)
+    extern const float32_t twiddleCoef_rfft_64[64];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F32_128)
+    extern const float32_t twiddleCoef_rfft_128[128];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F32_256)
+    extern const float32_t twiddleCoef_rfft_256[256];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F32_512)
+    extern const float32_t twiddleCoef_rfft_512[512];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F32_1024)
+    extern const float32_t twiddleCoef_rfft_1024[1024];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F32_2048)
+    extern const float32_t twiddleCoef_rfft_2048[2048];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F32_4096)
+    extern const float32_t twiddleCoef_rfft_4096[4096];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+
+  /* Double precision floating-point bit reversal tables */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FLT64_16)
+    #define RISCVBITREVINDEXTABLEF64_16_TABLE_LENGTH ((uint16_t)12)
+    extern const uint16_t riscvBitRevIndexTableF64_16[RISCVBITREVINDEXTABLEF64_16_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FLT64_32)
+    #define RISCVBITREVINDEXTABLEF64_32_TABLE_LENGTH ((uint16_t)24)
+    extern const uint16_t riscvBitRevIndexTableF64_32[RISCVBITREVINDEXTABLEF64_32_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FLT64_64)
+    #define RISCVBITREVINDEXTABLEF64_64_TABLE_LENGTH ((uint16_t)56)
+    extern const uint16_t riscvBitRevIndexTableF64_64[RISCVBITREVINDEXTABLEF64_64_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FLT64_128)
+    #define RISCVBITREVINDEXTABLEF64_128_TABLE_LENGTH ((uint16_t)112)
+    extern const uint16_t riscvBitRevIndexTableF64_128[RISCVBITREVINDEXTABLEF64_128_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FLT64_256)
+    #define RISCVBITREVINDEXTABLEF64_256_TABLE_LENGTH ((uint16_t)240)
+    extern const uint16_t riscvBitRevIndexTableF64_256[RISCVBITREVINDEXTABLEF64_256_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FLT64_512)
+    #define RISCVBITREVINDEXTABLEF64_512_TABLE_LENGTH ((uint16_t)480)
+    extern const uint16_t riscvBitRevIndexTableF64_512[RISCVBITREVINDEXTABLEF64_512_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FLT64_1024)
+    #define RISCVBITREVINDEXTABLEF64_1024_TABLE_LENGTH ((uint16_t)992)
+    extern const uint16_t riscvBitRevIndexTableF64_1024[RISCVBITREVINDEXTABLEF64_1024_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FLT64_2048)
+    #define RISCVBITREVINDEXTABLEF64_2048_TABLE_LENGTH ((uint16_t)1984)
+    extern const uint16_t riscvBitRevIndexTableF64_2048[RISCVBITREVINDEXTABLEF64_2048_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FLT64_4096)
+    #define RISCVBITREVINDEXTABLEF64_4096_TABLE_LENGTH ((uint16_t)4032)
+    extern const uint16_t riscvBitRevIndexTableF64_4096[RISCVBITREVINDEXTABLEF64_4096_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+  /* floating-point bit reversal tables */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FLT_16)
+    #define RISCVBITREVINDEXTABLE_16_TABLE_LENGTH ((uint16_t)20)
+    extern const uint16_t riscvBitRevIndexTable16[RISCVBITREVINDEXTABLE_16_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FLT_32)
+    #define RISCVBITREVINDEXTABLE_32_TABLE_LENGTH ((uint16_t)48)
+    extern const uint16_t riscvBitRevIndexTable32[RISCVBITREVINDEXTABLE_32_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FLT_64)
+    #define RISCVBITREVINDEXTABLE_64_TABLE_LENGTH ((uint16_t)56)
+    extern const uint16_t riscvBitRevIndexTable64[RISCVBITREVINDEXTABLE_64_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FLT_128)
+    #define RISCVBITREVINDEXTABLE_128_TABLE_LENGTH ((uint16_t)208)
+    extern const uint16_t riscvBitRevIndexTable128[RISCVBITREVINDEXTABLE_128_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FLT_256)
+    #define RISCVBITREVINDEXTABLE_256_TABLE_LENGTH ((uint16_t)440)
+    extern const uint16_t riscvBitRevIndexTable256[RISCVBITREVINDEXTABLE_256_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FLT_512)
+    #define RISCVBITREVINDEXTABLE_512_TABLE_LENGTH ((uint16_t)448)
+    extern const uint16_t riscvBitRevIndexTable512[RISCVBITREVINDEXTABLE_512_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FLT_1024)
+    #define RISCVBITREVINDEXTABLE_1024_TABLE_LENGTH ((uint16_t)1800)
+    extern const uint16_t riscvBitRevIndexTable1024[RISCVBITREVINDEXTABLE_1024_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FLT_2048)
+    #define RISCVBITREVINDEXTABLE_2048_TABLE_LENGTH ((uint16_t)3808)
+    extern const uint16_t riscvBitRevIndexTable2048[RISCVBITREVINDEXTABLE_2048_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FLT_4096)
+    #define RISCVBITREVINDEXTABLE_4096_TABLE_LENGTH ((uint16_t)4032)
+    extern const uint16_t riscvBitRevIndexTable4096[RISCVBITREVINDEXTABLE_4096_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+
+  /* fixed-point bit reversal tables */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FXT_16)
+    #define RISCVBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH ((uint16_t)12)
+    extern const uint16_t riscvBitRevIndexTable_fixed_16[RISCVBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FXT_32)
+    #define RISCVBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH ((uint16_t)24)
+    extern const uint16_t riscvBitRevIndexTable_fixed_32[RISCVBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FXT_64)
+    #define RISCVBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH ((uint16_t)56)
+    extern const uint16_t riscvBitRevIndexTable_fixed_64[RISCVBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FXT_128)
+    #define RISCVBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH ((uint16_t)112)
+    extern const uint16_t riscvBitRevIndexTable_fixed_128[RISCVBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FXT_256)
+    #define RISCVBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH ((uint16_t)240)
+    extern const uint16_t riscvBitRevIndexTable_fixed_256[RISCVBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FXT_512)
+    #define RISCVBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH ((uint16_t)480)
+    extern const uint16_t riscvBitRevIndexTable_fixed_512[RISCVBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FXT_1024)
+    #define RISCVBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH ((uint16_t)992)
+    extern const uint16_t riscvBitRevIndexTable_fixed_1024[RISCVBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FXT_2048)
+    #define RISCVBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH ((uint16_t)1984)
+    extern const uint16_t riscvBitRevIndexTable_fixed_2048[RISCVBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_BITREVIDX_FXT_4096)
+    #define RISCVBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH ((uint16_t)4032)
+    extern const uint16_t riscvBitRevIndexTable_fixed_4096[RISCVBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_REALCOEF_F32)
+    extern const float32_t realCoefA[8192];
+    extern const float32_t realCoefB[8192];
+  #endif
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_REALCOEF_Q31)
+    extern const q31_t realCoefAQ31[8192];
+    extern const q31_t realCoefBQ31[8192];
+  #endif
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_REALCOEF_Q15)
+    extern const q15_t realCoefAQ15[8192];
+    extern const q15_t realCoefBQ15[8192];
+  #endif
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_DCT4_F32_128)
+    extern const float32_t Weights_128[256];
+    extern const float32_t cos_factors_128[128];
+  #endif
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_DCT4_F32_512)
+    extern const float32_t Weights_512[1024];
+    extern const float32_t cos_factors_512[512];
+  #endif
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_DCT4_F32_2048)
+    extern const float32_t Weights_2048[4096];
+    extern const float32_t cos_factors_2048[2048];
+  #endif
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_DCT4_F32_8192)
+    extern const float32_t Weights_8192[16384];
+    extern const float32_t cos_factors_8192[8192];
+  #endif
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_DCT4_Q15_128)
+    extern const q15_t WeightsQ15_128[256];
+    extern const q15_t cos_factorsQ15_128[128];
+  #endif
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_DCT4_Q15_512)
+    extern const q15_t WeightsQ15_512[1024];
+    extern const q15_t cos_factorsQ15_512[512];
+  #endif
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_DCT4_Q15_2048)
+    extern const q15_t WeightsQ15_2048[4096];
+    extern const q15_t cos_factorsQ15_2048[2048];
+  #endif
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_DCT4_Q15_8192)
+    extern const q15_t WeightsQ15_8192[16384];
+    extern const q15_t cos_factorsQ15_8192[8192];
+  #endif
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_DCT4_Q31_128)
+    extern const q31_t WeightsQ31_128[256];
+    extern const q31_t cos_factorsQ31_128[128];
+  #endif
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_DCT4_Q31_512)
+    extern const q31_t WeightsQ31_512[1024];
+    extern const q31_t cos_factorsQ31_512[512];
+  #endif
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_DCT4_Q31_2048)
+    extern const q31_t WeightsQ31_2048[4096];
+    extern const q31_t cos_factorsQ31_2048[2048];
+  #endif
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_DCT4_Q31_8192)
+    extern const q31_t WeightsQ31_8192[16384];
+    extern const q31_t cos_factorsQ31_8192[8192];
+  #endif
+
+#endif /* if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_FFT_TABLES) */
+
+#if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_FAST_ALLOW_TABLES)
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FAST_TABLES) || defined(RISCV_TABLE_RECIP_Q15)
+    extern const q15_t riscvRecipTableQ15[64];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) defined(RISCV_ALL_FAST_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FAST_TABLES) || defined(RISCV_TABLE_RECIP_Q31)
+    extern const q31_t riscvRecipTableQ31[64];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) defined(RISCV_ALL_FAST_TABLES) */
+
+  /* Tables for Fast Math Sine and Cosine */
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FAST_TABLES) || defined(RISCV_TABLE_SIN_F32)
+    extern const float32_t sinTable_f32[FAST_MATH_TABLE_SIZE + 1];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) defined(RISCV_ALL_FAST_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FAST_TABLES) || defined(RISCV_TABLE_SIN_Q31)
+    extern const q31_t sinTable_q31[FAST_MATH_TABLE_SIZE + 1];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) defined(RISCV_ALL_FAST_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FAST_TABLES) || defined(RISCV_TABLE_SIN_Q15)
+    extern const q15_t sinTable_q15[FAST_MATH_TABLE_SIZE + 1];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) defined(RISCV_ALL_FAST_TABLES) */
+
+
+
+#endif /* if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_FAST_TABLES) */
+
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /*  RISCV_COMMON_TABLES_H */
+
--- a/components/nmsis/dsp/inc/riscv_common_tables_f16.h
+++ b/components/nmsis/dsp/inc/riscv_common_tables_f16.h
@ -0,0 +1,129 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_common_tables_f16.h
+ * Description:  Extern declaration for common tables
+ *
+ * @version  V1.9.0
+ * @date     23 April 2021
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _RISCV_COMMON_TABLES_F16_H
+#define _RISCV_COMMON_TABLES_F16_H
+
+#include "riscv_math_types_f16.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_FFT_ALLOW_TABLES)
+
+  /* F16 */
+  #if defined(RISCV_FLOAT16_SUPPORTED)
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F16_16)
+    extern const float16_t twiddleCoefF16_16[32];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F16_32)
+    extern const float16_t twiddleCoefF16_32[64];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F16_64)
+    extern const float16_t twiddleCoefF16_64[128];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F16_128)
+    extern const float16_t twiddleCoefF16_128[256];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F16_256)
+    extern const float16_t twiddleCoefF16_256[512];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F16_512)
+    extern const float16_t twiddleCoefF16_512[1024];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F16_1024)
+    extern const float16_t twiddleCoefF16_1024[2048];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F16_2048)
+    extern const float16_t twiddleCoefF16_2048[4096];
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_F16_4096)
+    extern const float16_t twiddleCoefF16_4096[8192];
+    #define twiddleCoefF16 twiddleCoefF16_4096
+  #endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) */
+  
+ 
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F16_32)
+  extern const float16_t twiddleCoefF16_rfft_32[32];
+  #endif
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F16_64)
+  extern const float16_t twiddleCoefF16_rfft_64[64];
+  #endif
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F16_128)
+  extern const float16_t twiddleCoefF16_rfft_128[128];
+  #endif
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F16_256)
+  extern const float16_t twiddleCoefF16_rfft_256[256];
+  #endif
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F16_512)
+  extern const float16_t twiddleCoefF16_rfft_512[512];
+  #endif
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F16_1024)
+  extern const float16_t twiddleCoefF16_rfft_1024[1024];
+  #endif
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F16_2048)
+  extern const float16_t twiddleCoefF16_rfft_2048[2048];
+  #endif
+
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || defined(RISCV_TABLE_TWIDDLECOEF_RFFT_F16_4096)
+  extern const float16_t twiddleCoefF16_rfft_4096[4096];
+  #endif
+
+  #endif /* ARMAC5 */
+    
+#endif /* !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_FFT_ALLOW_TABLES) */
+
+#if defined(RISCV_FLOAT16_SUPPORTED)
+
+#endif 
+       
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /*  _RISCV_COMMON_TABLES_F16_H */
+
+  
--- a/components/nmsis/dsp/inc/riscv_const_structs.h
+++ b/components/nmsis/dsp/inc/riscv_const_structs.h
@ -0,0 +1,87 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_const_structs.h
+ * Description:  Constant structs that are initialized for user convenience.
+ *               For example, some can be given as arguments to the riscv_cfft_f32() function.
+ *
+ * @version  V1.9.0
+ * @date     23 April 2021
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _RISCV_CONST_STRUCTS_H
+#define _RISCV_CONST_STRUCTS_H
+
+#include "riscv_math_types.h"
+#include "riscv_common_tables.h"
+#include "dsp/transform_functions.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+   extern const riscv_cfft_instance_f64 riscv_cfft_sR_f64_len16;
+   extern const riscv_cfft_instance_f64 riscv_cfft_sR_f64_len32;
+   extern const riscv_cfft_instance_f64 riscv_cfft_sR_f64_len64;
+   extern const riscv_cfft_instance_f64 riscv_cfft_sR_f64_len128;
+   extern const riscv_cfft_instance_f64 riscv_cfft_sR_f64_len256;
+   extern const riscv_cfft_instance_f64 riscv_cfft_sR_f64_len512;
+   extern const riscv_cfft_instance_f64 riscv_cfft_sR_f64_len1024;
+   extern const riscv_cfft_instance_f64 riscv_cfft_sR_f64_len2048;
+   extern const riscv_cfft_instance_f64 riscv_cfft_sR_f64_len4096;
+
+   extern const riscv_cfft_instance_f32 riscv_cfft_sR_f32_len16;
+   extern const riscv_cfft_instance_f32 riscv_cfft_sR_f32_len32;
+   extern const riscv_cfft_instance_f32 riscv_cfft_sR_f32_len64;
+   extern const riscv_cfft_instance_f32 riscv_cfft_sR_f32_len128;
+   extern const riscv_cfft_instance_f32 riscv_cfft_sR_f32_len256;
+   extern const riscv_cfft_instance_f32 riscv_cfft_sR_f32_len512;
+   extern const riscv_cfft_instance_f32 riscv_cfft_sR_f32_len1024;
+   extern const riscv_cfft_instance_f32 riscv_cfft_sR_f32_len2048;
+   extern const riscv_cfft_instance_f32 riscv_cfft_sR_f32_len4096;
+
+   extern const riscv_cfft_instance_q31 riscv_cfft_sR_q31_len16;
+   extern const riscv_cfft_instance_q31 riscv_cfft_sR_q31_len32;
+   extern const riscv_cfft_instance_q31 riscv_cfft_sR_q31_len64;
+   extern const riscv_cfft_instance_q31 riscv_cfft_sR_q31_len128;
+   extern const riscv_cfft_instance_q31 riscv_cfft_sR_q31_len256;
+   extern const riscv_cfft_instance_q31 riscv_cfft_sR_q31_len512;
+   extern const riscv_cfft_instance_q31 riscv_cfft_sR_q31_len1024;
+   extern const riscv_cfft_instance_q31 riscv_cfft_sR_q31_len2048;
+   extern const riscv_cfft_instance_q31 riscv_cfft_sR_q31_len4096;
+
+   extern const riscv_cfft_instance_q15 riscv_cfft_sR_q15_len16;
+   extern const riscv_cfft_instance_q15 riscv_cfft_sR_q15_len32;
+   extern const riscv_cfft_instance_q15 riscv_cfft_sR_q15_len64;
+   extern const riscv_cfft_instance_q15 riscv_cfft_sR_q15_len128;
+   extern const riscv_cfft_instance_q15 riscv_cfft_sR_q15_len256;
+   extern const riscv_cfft_instance_q15 riscv_cfft_sR_q15_len512;
+   extern const riscv_cfft_instance_q15 riscv_cfft_sR_q15_len1024;
+   extern const riscv_cfft_instance_q15 riscv_cfft_sR_q15_len2048;
+   extern const riscv_cfft_instance_q15 riscv_cfft_sR_q15_len4096;
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif
+
--- a/components/nmsis/dsp/inc/riscv_const_structs_f16.h
+++ b/components/nmsis/dsp/inc/riscv_const_structs_f16.h
@ -0,0 +1,78 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_const_structs_f16.h
+ * Description:  Constant structs that are initialized for user convenience.
+ *               For example, some can be given as arguments to the riscv_cfft_f16() function.
+ *
+ * @version  V1.9.0
+ * @date     23 April 2021
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _RISCV_CONST_STRUCTS_F16_H
+#define _RISCV_CONST_STRUCTS_F16_H
+
+#include "riscv_math_types_f16.h"
+#include "riscv_common_tables.h"
+#include "riscv_common_tables_f16.h"
+#include "dsp/transform_functions_f16.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#if defined(RISCV_FLOAT16_SUPPORTED)
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || (defined(RISCV_TABLE_TWIDDLECOEF_F16_16) && defined(RISCV_TABLE_BITREVIDX_FLT_16))
+   extern const riscv_cfft_instance_f16 riscv_cfft_sR_f16_len16;
+   #endif
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || (defined(RISCV_TABLE_TWIDDLECOEF_F16_32) && defined(RISCV_TABLE_BITREVIDX_FLT_32))
+   extern const riscv_cfft_instance_f16 riscv_cfft_sR_f16_len32;
+    #endif
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || (defined(RISCV_TABLE_TWIDDLECOEF_F16_64) && defined(RISCV_TABLE_BITREVIDX_FLT_64))
+   extern const riscv_cfft_instance_f16 riscv_cfft_sR_f16_len64;
+    #endif
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || (defined(RISCV_TABLE_TWIDDLECOEF_F16_128) && defined(RISCV_TABLE_BITREVIDX_FLT_128))
+   extern const riscv_cfft_instance_f16 riscv_cfft_sR_f16_len128;
+    #endif
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || (defined(RISCV_TABLE_TWIDDLECOEF_F16_256) && defined(RISCV_TABLE_BITREVIDX_FLT_256))
+   extern const riscv_cfft_instance_f16 riscv_cfft_sR_f16_len256;
+    #endif
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || (defined(RISCV_TABLE_TWIDDLECOEF_F16_512) && defined(RISCV_TABLE_BITREVIDX_FLT_512))
+   extern const riscv_cfft_instance_f16 riscv_cfft_sR_f16_len512;
+    #endif
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || (defined(RISCV_TABLE_TWIDDLECOEF_F16_1024) && defined(RISCV_TABLE_BITREVIDX_FLT_1024))
+   extern const riscv_cfft_instance_f16 riscv_cfft_sR_f16_len1024;
+    #endif
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || (defined(RISCV_TABLE_TWIDDLECOEF_F16_2048) && defined(RISCV_TABLE_BITREVIDX_FLT_2048))
+   extern const riscv_cfft_instance_f16 riscv_cfft_sR_f16_len2048;
+    #endif
+  #if !defined(RISCV_DSP_CONFIG_TABLES) || defined(RISCV_ALL_FFT_TABLES) || (defined(RISCV_TABLE_TWIDDLECOEF_F16_4096) && defined(RISCV_TABLE_BITREVIDX_FLT_4096))
+   extern const riscv_cfft_instance_f16 riscv_cfft_sR_f16_len4096;
+  #endif
+#endif
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif
--- a/components/nmsis/dsp/inc/riscv_helium_utils.h
+++ b/components/nmsis/dsp/inc/riscv_helium_utils.h
@ -0,0 +1,75 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_helium_utils.h
+ * Description:  Utility functions for Helium development
+ *
+ * @version  V1.9.0
+ * @date     23 April 2021
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _RISCV_UTILS_HELIUM_H_
+#define _RISCV_UTILS_HELIUM_H_
+
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+/***************************************
+
+Definitions available for MVEF and MVEI
+
+***************************************/
+
+/***************************************
+
+Definitions available for MVEF only
+
+***************************************/
+
+
+/***************************************
+
+Definitions available for f16 datatype with HW acceleration only
+
+***************************************/
+#if defined(RISCV_FLOAT16_SUPPORTED)
+#endif 
+
+/***************************************
+
+Definitions available for MVEI and MVEF only
+
+***************************************/
+
+/***************************************
+
+Definitions available for MVEI only
+
+***************************************/
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif
--- a/components/nmsis/dsp/inc/riscv_math.h
+++ b/components/nmsis/dsp/inc/riscv_math.h
@ -0,0 +1,155 @@
+/******************************************************************************
+ * @file     riscv_math.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+   \mainpage NMSIS DSP Software Library
+   *
+   * \section intro Introduction
+   *
+   * This user manual describes the NMSIS DSP software library,
+   * a suite of common signal processing functions for use on Nuclei N/NX processor based devices.
+   *
+   * The library is divided into a number of functions each covering a specific category:
+   * - Basic math functions
+   * - Fast math functions
+   * - Complex math functions
+   * - Filtering functions
+   * - Matrix functions
+   * - Transform functions
+   * - Motor control functions
+   * - Statistical functions
+   * - Support functions
+   * - Interpolation functions
+   * - Support Vector Machine functions (SVM)
+   * - Bayes classifier functions
+   * - Distance functions
+   * - Quaternion functions
+   *
+   * The library has generally separate functions for operating on 8-bit integers, 16-bit integers,
+   * 32-bit integer and 32-bit floating-point values.
+   *
+   * The library functions are declared in the public file <code>riscv_math.h</code> which is placed in the <code>Include</code> folder.
+   * Simply include this file and link the appropriate library in the application and begin calling the library functions.
+   * The Library supports single public header file <code>riscv_math.h</code> for Nuclei N cores with little endian.
+   * Same header file will be used for floating point unit(FPU) variants.
+   *
+   * \note Please refer to [NMSIS-DSP](../../../dsp/index.html)
+   *
+   * \section example Examples
+   *
+   * The library ships with a number of examples which demonstrate how to use the library functions.
+   *
+   * Toolchain Support
+   * -----------------
+   *
+   * The library has been developed and tested with nuclei riscv gcc toolchain.
+   *
+   * Building the Library
+   * --------------------
+   *
+   * In NMSIS repo, it contains a Makefile to rebuild libraries on nuclei riscv gcc toolchain in the <code>NMSIS/</code> folder.
+   * * In *NMSIS* folder, you can run `make gen_dsp_lib` to build and install DSP library into **NMSIS/Library/DSP/GCC** folder.
+   *
+   * Preprocessor Macros
+   * -------------------
+   *
+   * Each library project have different preprocessor macros.
+   *
+   * - RISCV_MATH_MATRIX_CHECK:
+   *
+   * Define macro RISCV_MATH_MATRIX_CHECK for checking on the input and output sizes of matrices
+   *
+   * - RISCV_MATH_ROUNDING:
+   *
+   * Define macro RISCV_MATH_ROUNDING for rounding on support functions
+   *
+   * - RISCV_MATH_LOOPUNROLL:
+   *
+   * Define macro RISCV_MATH_LOOPUNROLL to enable manual loop unrolling in DSP functions
+   *
+   */
+
+
+/**
+ * @defgroup groupExamples Examples
+ */
+
+
+
+
+#ifndef _RISCV_MATH_H
+#define _RISCV_MATH_H
+
+
+#include "riscv_math_types.h"
+#include "riscv_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#include "dsp/basic_math_functions.h"
+#include "dsp/interpolation_functions.h"
+#include "dsp/bayes_functions.h"
+#include "dsp/matrix_functions.h"
+#include "dsp/complex_math_functions.h"
+#include "dsp/statistics_functions.h"
+#include "dsp/controller_functions.h"
+#include "dsp/support_functions.h"
+#include "dsp/distance_functions.h"
+#include "dsp/svm_functions.h"
+#include "dsp/fast_math_functions.h"
+#include "dsp/transform_functions.h"
+#include "dsp/filtering_functions.h"
+#include "dsp/quaternion_math_functions.h"
+
+
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+
+
+
+//#define TABLE_SPACING_Q31     0x400000
+//#define TABLE_SPACING_Q15     0x80
+
+
+
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+
+#endif /* _RISCV_MATH_H */
+
+/**
+ *
+ * End of file.
+ */
--- a/components/nmsis/dsp/inc/riscv_math_f16.h
+++ b/components/nmsis/dsp/inc/riscv_math_f16.h
@ -0,0 +1,60 @@
+/******************************************************************************
+ * @file     riscv_math_f16.h
+ * @brief    Public header file for f16 function of the NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _RISCV_MATH_F16_H
+#define _RISCV_MATH_F16_H
+
+#include "riscv_math.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#include "riscv_math_types_f16.h"
+#include "dsp/none.h"
+#include "dsp/utils.h"
+#include "dsp/basic_math_functions_f16.h"
+#include "dsp/interpolation_functions_f16.h"
+#include "dsp/bayes_functions_f16.h"
+#include "dsp/matrix_functions_f16.h"
+#include "dsp/complex_math_functions_f16.h"
+#include "dsp/statistics_functions_f16.h"
+#include "dsp/controller_functions_f16.h"
+#include "dsp/support_functions_f16.h"
+#include "dsp/distance_functions_f16.h"
+#include "dsp/svm_functions_f16.h"
+#include "dsp/fast_math_functions_f16.h"
+#include "dsp/transform_functions_f16.h"
+#include "dsp/filtering_functions_f16.h"
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* _RISCV_MATH_F16_H */
+
+
--- a/components/nmsis/dsp/inc/riscv_math_memory.h
+++ b/components/nmsis/dsp/inc/riscv_math_memory.h
@ -0,0 +1,550 @@
+/******************************************************************************
+ * @file     riscv_math_memory.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _RISCV_MATH_MEMORY_H_
+
+#define _RISCV_MATH_MEMORY_H_
+
+#include "riscv_math_types.h"
+
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+/**
+  @brief definition to read/write two 16 bit values.
+  @deprecated
+ */
+#if   defined ( __GNUC__ )
+  #define __SIMD32_TYPE int32_t
+#elif defined ( __TI_RISCV__ )
+  #define __SIMD32_TYPE int32_t
+#elif defined ( __CSMC__ )
+  #define __SIMD32_TYPE int32_t
+#elif defined ( __TASKING__ )
+  #define __SIMD32_TYPE __un(aligned) int32_t
+#elif defined(_MSC_VER )
+  #define __SIMD32_TYPE int32_t
+#else
+  #error Unknown compiler
+#endif
+
+#define __SIMD32(addr)        (*(__SIMD32_TYPE **) & (addr))
+#define __SIMD32_CONST(addr)  ( (__SIMD32_TYPE * )   (addr))
+#define _SIMD32_OFFSET(addr)  (*(__SIMD32_TYPE * )   (addr))
+#define __SIMD64(addr)        (*(      int64_t **) & (addr))
+
+
+/* SIMD replacement */
+
+/**
+  @brief         Read 2 Q31 from Q31 pointer and increment pointer afterwards.
+  @param[in]     pQ31      points to input value
+  @return        Q63 value
+ */
+__STATIC_FORCEINLINE q63_t read_q31x2_ia (
+  q31_t ** pQ31)
+{
+  q63_t val;
+#ifndef __RISCV_FEATURE_UNALIGNED
+#if __RISCV_XLEN == 64
+  val = __LD(*pQ31);
+#else
+  val = *((q63_t *)*pQ31);
+#endif /* __RISCV_XLEN == 64 */
+#else
+  memcpy((void *)(&val), (void *)(*pQ31), 8);
+#endif
+  *pQ31 += 2;
+  return (val);
+}
+
+/**
+  @brief         Read 2 Q31 from Q31 pointer and decrement pointer afterwards.
+  @param[in]     pQ31      points to input value
+  @return        Q63 value
+ */
+__STATIC_FORCEINLINE q63_t read_q31x2_da (
+  q31_t ** pQ31)
+{
+  q63_t val;
+#ifndef __RISCV_FEATURE_UNALIGNED
+#if __RISCV_XLEN == 64
+  val = __LD(*pQ31);
+#else
+  val = *((q63_t *)*pQ31);
+#endif /* __RISCV_XLEN == 64 */
+#else
+  memcpy((void *)(&val), (void *)(*pQ31), 8);
+#endif
+  *pQ31 -= 2;
+  return (val);
+}
+
+/**
+  @brief         Read 2 Q31 from Q31 pointer.
+  @param[in]     pQ31      points to input value
+  @return        Q63 value
+ */
+__STATIC_FORCEINLINE q63_t read_q31x2 (
+  q31_t * pQ31)
+{
+  q63_t val;
+#ifndef __RISCV_FEATURE_UNALIGNED
+#if __RISCV_XLEN == 64
+  val = __LD(pQ31);
+#else
+  val = *((q63_t *)pQ31);
+#endif /* __RISCV_XLEN == 64 */
+#else
+  memcpy((void *)(&val), (void *)(pQ31), 8);
+#endif
+  return (val);
+}
+
+/**
+  @brief         Write 2 Q31 to Q31 pointer and increment pointer afterwards.
+  @param[in]     pQ31      points to input value
+  @param[in]     value     Q63 value
+  @return        none
+ */
+__STATIC_FORCEINLINE void write_q31x2_ia (
+		q31_t ** pQ31,
+		q63_t    value)
+{
+#ifndef __RISCV_FEATURE_UNALIGNED
+#if __RISCV_XLEN == 64
+  __SD(*pQ31, value);
+#else
+  *((q63_t *)*pQ31) = value;
+#endif /* __RISCV_XLEN == 64 */
+#else
+  memcpy((void *)(*pQ31), (void *)(&value), 8);
+#endif
+  *pQ31 += 2;
+}
+
+/**
+  @brief         Write 2 Q31 to Q31 pointer.
+  @param[in]     pQ31      points to input value
+  @param[in]     value     Q63 value
+  @return        none
+ */
+__STATIC_FORCEINLINE void write_q31x2 (
+		q31_t * pQ31,
+		q63_t value)
+{
+#ifndef __RISCV_FEATURE_UNALIGNED
+#if __RISCV_XLEN == 64
+  __SD(pQ31, value);
+#else
+  *((q63_t *)pQ31) = value;
+#endif /* __RISCV_XLEN == 64 */
+#else
+  memcpy((void *)(pQ31), (void *)(&value), 8);
+#endif
+}
+
+/**
+  @brief         Read 2 Q15 from Q15 pointer.
+  @param[in]     pQ15      points to input value
+  @return        Q31 value
+ */
+__STATIC_FORCEINLINE q31_t read_q15x2 (
+  q15_t * pQ15)
+{
+  q31_t val;
+
+#ifdef __RISCV_FEATURE_UNALIGNED
+  memcpy (&val, pQ15, 4);
+#else
+  val = __LW(pQ15);
+#endif
+
+  return (val);
+}
+
+/**
+  @brief         Read 2 Q15 from Q15 pointer and increment pointer afterwards.
+  @param[in]     pQ15      points to input value
+  @return        Q31 value
+ */
+__STATIC_FORCEINLINE q31_t read_q15x2_ia (
+  q15_t ** pQ15)
+{
+  q31_t val;
+
+#ifdef __RISCV_FEATURE_UNALIGNED
+  memcpy (&val, *pQ15, 4);
+#else
+  val = __LW(*pQ15);
+#endif
+
+ *pQ15 += 2;
+ return (val);
+}
+
+/**
+  @brief         Read 2 Q15 from Q15 pointer and decrement pointer afterwards.
+  @param[in]     pQ15      points to input value
+  @return        Q31 value
+ */
+__STATIC_FORCEINLINE q31_t read_q15x2_da (
+  q15_t ** pQ15)
+{
+  q31_t val;
+
+#ifdef __RISCV_FEATURE_UNALIGNED
+  memcpy (&val, *pQ15, 4);
+#else
+  val = __LW(*pQ15);
+#endif
+
+  *pQ15 -= 2;
+  return (val);
+}
+
+/**
+  @brief         Write 2 Q15 to Q15 pointer and increment pointer afterwards.
+  @param[in]     pQ15      points to input value
+  @param[in]     value     Q31 value
+  @return        none
+ */
+__STATIC_FORCEINLINE void write_q15x2_ia (
+  q15_t ** pQ15,
+  q31_t    value)
+{
+#ifdef __RISCV_FEATURE_UNALIGNED
+  memcpy (*pQ15, &value, 4);
+#else
+  __SW(*pQ15, value);
+#endif
+
+ *pQ15 += 2;
+}
+
+/**
+  @brief         Write 2 Q15 to Q15 pointer.
+  @param[in]     pQ15      points to input value
+  @param[in]     value     Q31 value
+  @return        none
+ */
+__STATIC_FORCEINLINE void write_q15x2 (
+  q15_t * pQ15,
+  q31_t   value)
+{
+#ifdef __RISCV_FEATURE_UNALIGNED
+  memcpy (pQ15, &value, 4);
+#else
+  __SW(pQ15, value);
+#endif
+}
+
+/**
+  @brief         Write 4 Q15 to Q15 pointer and increment pointer afterwards.
+  @param[in]     pQ15      points to input value
+  @param[in]     value     Q31 value
+  @return        none
+ */
+__STATIC_FORCEINLINE void write_q15x4_ia (
+		q15_t ** pQ15,
+		q63_t    value)
+{
+#ifndef __RISCV_FEATURE_UNALIGNED
+#if __RISCV_XLEN == 64
+  __SD(*pQ15, value);
+#else
+  *((q63_t *)*pQ15) = value;
+#endif
+#else
+  memcpy((void *)(*pQ15), (void *)(&value), 8);
+#endif
+  *pQ15 += 4;
+}
+
+/**
+  @brief         Write 4 Q15 to Q15 pointer and decrement pointer afterwards.
+  @param[in]     pQ15      points to input value
+  @param[in]     value     Q31 value
+  @return        none
+ */
+__STATIC_FORCEINLINE void write_q15x4_da (
+		q15_t ** pQ15,
+		q63_t    value)
+{
+#ifndef __RISCV_FEATURE_UNALIGNED
+#if __RISCV_XLEN == 64
+  __SD(*pQ15, value);
+#else
+  *((q63_t *)*pQ15) = value;
+#endif
+#else
+  memcpy((void *)(*pQ15), (void *)(&value), 8);
+#endif
+  *pQ15 -= 4;
+}
+
+/**
+  @brief         Write 4 Q15 to Q15 pointer.
+  @param[in]     pQ15      points to input value
+  @param[in]     value     Q31 value
+  @return        none
+ */
+__STATIC_FORCEINLINE void write_q15x4 (
+		q15_t * pQ15,
+		q63_t   value)
+{
+#ifndef __RISCV_FEATURE_UNALIGNED
+#if __RISCV_XLEN == 64
+  __SD(pQ15, value);
+#else
+  *((q63_t *)pQ15) = value;
+#endif
+#else
+  memcpy((void *)(pQ15), (void *)(&value), 8);
+#endif
+}
+
+/**
+  @brief         Read 4 Q15 from Q15 pointer and increment pointer afterwards.
+  @param[in]     pQ15      points to input value
+  @return        Q63 value
+ */
+__STATIC_FORCEINLINE q63_t read_q15x4_ia (
+		q15_t ** pQ15)
+{
+  q63_t val;
+#ifndef __RISCV_FEATURE_UNALIGNED
+#if __RISCV_XLEN == 64
+  val = __LD(*pQ15);
+#else
+  val = *((q63_t *)*pQ15);
+#endif
+#else
+  memcpy((void *)(&val), (void *)(*pQ15), 8);
+#endif
+  *pQ15 += 4;
+
+  return (val);
+}
+
+/**
+  @brief         Read 4 Q15 from Q15 pointer and decrement pointer afterwards.
+  @param[in]     pQ15      points to input value
+  @return        Q31 value
+ */
+__STATIC_FORCEINLINE q63_t read_q15x4_da (
+		q15_t ** pQ15)
+{
+	q63_t val;
+#ifndef __RISCV_FEATURE_UNALIGNED
+#if __RISCV_XLEN == 64
+  val = __LD(*pQ15);
+#else
+  val = *((q63_t *)*pQ15);
+#endif
+#else
+  memcpy((void *)(&val), (void *)(*pQ15), 8);
+#endif
+  *pQ15 -= 4;
+
+  return (val);
+}
+
+/**
+  @brief         Read 4 Q15 from Q15 pointer.
+  @param[in]     pQ15      points to input value
+  @return        Q63 value
+ */
+__STATIC_FORCEINLINE q63_t read_q15x4 (
+		q15_t * pQ15)
+{
+  q63_t val;
+#ifndef __RISCV_FEATURE_UNALIGNED
+#if __RISCV_XLEN == 64
+  val = __LD(pQ15);
+#else
+  val = *((q63_t *)pQ15);
+#endif /* __RISCV_XLEN == 64 */
+#else
+  memcpy((void *)(&val), (void *)(pQ15), 8);
+#endif
+  return (val);
+}
+
+/**
+  @brief         Read 4 Q7 from Q7 pointer and increment pointer afterwards.
+  @param[in]     pQ7       points to input value
+  @return        Q31 value
+ */
+__STATIC_FORCEINLINE q31_t read_q7x4_ia (
+  q7_t ** pQ7)
+{
+  q31_t val;
+
+
+#ifdef __RISCV_FEATURE_UNALIGNED
+  memcpy (&val, *pQ7, 4);
+#else
+  val = __LW(*pQ7);
+#endif
+
+  *pQ7 += 4;
+
+  return (val);
+}
+
+/**
+  @brief         Read 4 Q7 from Q7 pointer and decrement pointer afterwards.
+  @param[in]     pQ7       points to input value
+  @return        Q31 value
+ */
+__STATIC_FORCEINLINE q31_t read_q7x4_da (
+  q7_t ** pQ7)
+{
+  q31_t val;
+#ifdef __RISCV_FEATURE_UNALIGNED
+  memcpy (&val, *pQ7, 4);
+#else
+  val = __LW(*pQ7);
+#endif
+  *pQ7 -= 4;
+
+  return (val);
+}
+
+/**
+  @brief         Write 4 Q7 to Q7 pointer and increment pointer afterwards.
+  @param[in]     pQ7       points to input value
+  @param[in]     value     Q31 value
+  @return        none
+ */
+__STATIC_FORCEINLINE void write_q7x4_ia (
+  q7_t ** pQ7,
+  q31_t   value)
+{
+#ifdef __RISCV_FEATURE_UNALIGNED
+  q31_t val = value;
+  memcpy (*pQ7, &val, 4);
+#else
+  __SW(*pQ7, value);
+#endif
+  *pQ7 += 4;
+}
+
+/**
+  @brief         Write 4 Q7 to Q7 pointer.
+  @param[in]     pQ7       points to input value
+  @param[in]     value     Q31 value
+  @return        none
+ */
+__STATIC_FORCEINLINE void write_q7x4 (
+  q7_t * pQ7,
+  q31_t   value)
+{
+#ifdef __RISCV_FEATURE_UNALIGNED
+  q31_t val = value;
+  memcpy (pQ7, &val, 4);
+#else
+  __SW(pQ7, value);
+#endif
+}
+
+/**
+  @brief         Read 8 Q7 from Q7 pointer and increment pointer afterwards.
+  @param[in]     pQ7       points to input value
+  @return        Q63 value
+ */
+__STATIC_FORCEINLINE q63_t read_q7x8_ia (
+		q7_t ** pQ7)
+{
+	q63_t val;
+#ifndef __RISCV_FEATURE_UNALIGNED
+#if __RISCV_XLEN == 64
+  val = __LD(*pQ7);
+#else
+  val = *((q63_t *)*pQ7);
+#endif
+#else
+  memcpy((void *)(&val), (void *)(*pQ7), 8);
+#endif
+  *pQ7 += 8;
+
+  return val;
+}
+
+/**
+  @brief         Read 8 Q7 from Q7 pointer and decrement pointer afterwards.
+  @param[in]     pQ7       points to input value
+  @return        Q63 value
+ */
+__STATIC_FORCEINLINE q63_t read_q7x8_da (
+		q7_t ** pQ7)
+{
+	q63_t val;
+#ifndef __RISCV_FEATURE_UNALIGNED
+#if __RISCV_XLEN == 64
+  val = __LD(*pQ7);
+#else
+  val = *((q63_t *)*pQ7);
+#endif
+#else
+  memcpy((void *)(&val), (void *)(*pQ7), 8);
+#endif
+  *pQ7 -= 8;
+  return val;
+}
+
+/**
+  @brief         Write 8 Q7 to Q7 pointer and increment pointer afterwards.
+  @param[in]     pQ7       points to input value
+  @param[in]     value     Q63 value
+  @return        none
+ */
+__STATIC_FORCEINLINE void write_q7x8_ia (
+		q7_t ** pQ7,
+		q63_t   value)
+{
+#ifndef __RISCV_FEATURE_UNALIGNED
+#if __RISCV_XLEN == 64
+  __SD(*pQ7,value);
+#else
+  *((q63_t *)*pQ7) = value;
+#endif
+#else
+  memcpy((void *)(*pQ7), (void *)(&value), 8);
+#endif
+  *pQ7 += 8;
+}
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /*ifndef _RISCV_MATH_MEMORY_H_ */
--- a/components/nmsis/dsp/inc/riscv_math_types.h
+++ b/components/nmsis/dsp/inc/riscv_math_types.h
@ -0,0 +1,263 @@
+/******************************************************************************
+ * @file     riscv_math_types.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _RISCV_MATH_TYPES_H_
+
+#define _RISCV_MATH_TYPES_H_
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+/* Compiler specific diagnostic adjustment */
+#if   defined ( __GNUC__ )
+  #pragma GCC diagnostic push
+  #pragma GCC diagnostic ignored "-Wsign-conversion"
+  #pragma GCC diagnostic ignored "-Wconversion"
+  #pragma GCC diagnostic ignored "-Wunused-parameter"
+
+#elif defined ( __TI_RISCV__ )
+
+#elif defined ( __CSMC__ )
+
+#elif defined ( __TASKING__ )
+
+#elif defined ( _MSC_VER )
+
+#else
+  #error Unknown compiler
+#endif
+
+
+/* Included for instrinsics definitions */
+#if defined (_MSC_VER )
+#include <stdint.h>
+#define __STATIC_FORCEINLINE static __forceinline
+#define __STATIC_INLINE static __inline
+#define __ALIGNED(x) __declspec(align(x))
+
+#elif defined (__GNUC_PYTHON__)
+#include <stdint.h>
+#define  __ALIGNED(x) __attribute__((aligned(x)))
+#define __STATIC_FORCEINLINE static inline __attribute__((always_inline)) 
+#define __STATIC_INLINE static inline
+
+#else
+#define __NMSIS_GENERIC
+#if (defined (__RISCV_FEATURE_DSP) && (__RISCV_FEATURE_DSP == 1))
+    #define __DSP_PRESENT   1
+#endif
+#include "nmsis_core.h"
+#undef __NMSIS_GENERIC
+#endif
+
+#if (defined (__RISCV_FEATURE_VECTOR) && (__RISCV_FEATURE_VECTOR == 1))
+#define RISCV_VECTOR                  1
+#include <riscv_vector.h> 
+#endif
+
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <limits.h>
+
+/* evaluate RISCV DSP feature */
+#if (defined (__RISCV_FEATURE_DSP) && (__RISCV_FEATURE_DSP == 1))
+  #define RISCV_MATH_DSP                   1
+#endif
+
+
+
+
+#if (__RISCV_FEATURE_MVE & 2)
+    #define RISCV_MATH_MVEF
+       #define RISCV_MATH_MVE_FLOAT16
+#endif
+
+
+
+
+
+
+#if   defined ( __GNUC__ )
+  #define LOW_OPTIMIZATION_ENTER \
+       __attribute__(( optimize("-O1") ))
+  #define LOW_OPTIMIZATION_EXIT
+  #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+
+#elif defined ( __TI_RISCV__ )
+  #define LOW_OPTIMIZATION_ENTER
+  #define LOW_OPTIMIZATION_EXIT
+  #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+
+#elif defined ( __CSMC__ )
+  #define LOW_OPTIMIZATION_ENTER
+  #define LOW_OPTIMIZATION_EXIT
+  #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+
+#elif defined ( __TASKING__ )
+  #define LOW_OPTIMIZATION_ENTER
+  #define LOW_OPTIMIZATION_EXIT
+  #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+
+#elif defined ( _MSC_VER ) || defined(__GNUC_PYTHON__)
+      #define LOW_OPTIMIZATION_ENTER
+      #define LOW_OPTIMIZATION_EXIT
+      #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+      #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+#endif
+
+
+
+/* Compiler specific diagnostic adjustment */
+#if   defined ( __GNUC__ )
+#pragma GCC diagnostic pop
+
+#elif defined ( __TI_RISCV__ )
+
+#elif defined ( __CSMC__ )
+
+#elif defined ( __TASKING__ )
+
+#elif defined ( _MSC_VER )
+
+#else
+  #error Unknown compiler
+#endif
+
+#ifdef   __cplusplus
+}
+#endif
+
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+ /**
+   * @brief 8-bit fractional data type in 1.7 format.
+   */
+  typedef int8_t q7_t;
+
+  /**
+   * @brief 16-bit fractional data type in 1.15 format.
+   */
+  typedef int16_t q15_t;
+
+  /**
+   * @brief 32-bit fractional data type in 1.31 format.
+   */
+  typedef int32_t q31_t;
+
+  /**
+   * @brief 64-bit fractional data type in 1.63 format.
+   */
+  typedef int64_t q63_t;
+
+  /**
+   * @brief 32-bit floating-point type definition.
+   */
+  typedef float float32_t;
+
+  /**
+   * @brief 64-bit floating-point type definition.
+   */
+  typedef double float64_t;
+
+  /**
+   * @brief vector types
+   */
+
+
+
+
+
+
+
+#define F64_MAX   ((float64_t)DBL_MAX)
+#define F32_MAX   ((float32_t)FLT_MAX)
+
+
+
+#define F64_MIN   (-DBL_MAX)
+#define F32_MIN   (-FLT_MAX)
+
+
+
+#define F64_ABSMAX   ((float64_t)DBL_MAX)
+#define F32_ABSMAX   ((float32_t)FLT_MAX)
+
+
+
+#define F64_ABSMIN   ((float64_t)0.0)
+#define F32_ABSMIN   ((float32_t)0.0)
+
+
+#define Q31_MAX   ((q31_t)(0x7FFFFFFFL))
+#define Q15_MAX   ((q15_t)(0x7FFF))
+#define Q7_MAX    ((q7_t)(0x7F))
+#define Q31_MIN   ((q31_t)(0x80000000L))
+#define Q15_MIN   ((q15_t)(0x8000))
+#define Q7_MIN    ((q7_t)(0x80))
+
+#define Q31_ABSMAX   ((q31_t)(0x7FFFFFFFL))
+#define Q15_ABSMAX   ((q15_t)(0x7FFF))
+#define Q7_ABSMAX    ((q7_t)(0x7F))
+#define Q31_ABSMIN   ((q31_t)0)
+#define Q15_ABSMIN   ((q15_t)0)
+#define Q7_ABSMIN    ((q7_t)0)
+
+  /* Dimension C vector space */
+  #define CMPLX_DIM 2
+
+  /**
+   * @brief Error status returned by some functions in the library.
+   */
+
+  typedef enum
+  {
+    RISCV_MATH_SUCCESS                 =  0,        /**< No error */
+    RISCV_MATH_ARGUMENT_ERROR          = -1,        /**< One or more arguments are incorrect */
+    RISCV_MATH_LENGTH_ERROR            = -2,        /**< Length of data buffer is incorrect */
+    RISCV_MATH_SIZE_MISMATCH           = -3,        /**< Size of matrices is not compatible with the operation */
+    RISCV_MATH_NANINF                  = -4,        /**< Not-a-number (NaN) or infinity is generated */
+    RISCV_MATH_SINGULAR                = -5,        /**< Input matrix is singular and cannot be inverted */
+    RISCV_MATH_TEST_FAILURE            = -6,        /**< Test Failed */
+    RISCV_MATH_DECOMPOSITION_FAILURE   = -7         /**< Decomposition Failed */
+  } riscv_status;
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /*ifndef _RISCV_MATH_TYPES_H_ */
--- a/components/nmsis/dsp/inc/riscv_math_types_f16.h
+++ b/components/nmsis/dsp/inc/riscv_math_types_f16.h
@ -0,0 +1,79 @@
+/******************************************************************************
+ * @file     riscv_math_types_f16.h
+ * @brief    Public header file for f16 function of the NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _RISCV_MATH_TYPES_F16_H
+#define _RISCV_MATH_TYPES_F16_H
+
+#include "riscv_math_types.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+
+/**
+ * @brief 16-bit floating-point type definition.
+ * This is already defined in riscv_mve.h
+ *
+ * This is not fully supported on ARM AC5.
+ */
+
+/*
+
+Check if the type __fp16 is available.
+If it is not available, f16 version of the kernels
+won't be built.
+
+*/
+#if !(__RISCV_FEATURE_MVE & 2)
+#else
+  /* When Vector float16, this flag is always defined and can't be disabled */
+  #define RISCV_FLOAT16_SUPPORTED
+#endif
+
+
+
+
+
+#if defined(RISCV_FLOAT16_SUPPORTED)
+#define F16_MAX   ((float16_t)__FLT16_MAX__)
+#define F16_MIN   (-(float16_t)__FLT16_MAX__)
+
+#define F16_ABSMAX   ((float16_t)__FLT16_MAX__)
+#define F16_ABSMIN   ((float16_t)0.0f16)
+
+#define F16INFINITY ((float16_t)__builtin_inf())
+
+#endif /* RISCV_FLOAT16_SUPPORTED*/
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* _RISCV_MATH_F16_H */
+
+
--- a/components/nmsis/dsp/inc/riscv_mve_tables.h
+++ b/components/nmsis/dsp/inc/riscv_mve_tables.h
@ -0,0 +1,58 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_mve_tables.h
+ * Description:  common tables like fft twiddle factors, Bitreverse, reciprocal etc
+ *               used for MVE implementation only
+ *
+ * @version  V1.9.0
+ * @date     23 April 2021
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ #ifndef _RISCV_MVE_TABLES_H
+ #define _RISCV_MVE_TABLES_H
+
+#include "riscv_math_types.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+
+ 
+
+
+
+
+
+
+
+
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /*_RISCV_MVE_TABLES_H*/
+
--- a/components/nmsis/dsp/inc/riscv_mve_tables_f16.h
+++ b/components/nmsis/dsp/inc/riscv_mve_tables_f16.h
@ -0,0 +1,52 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_mve_tables_f16.h
+ * Description:  common tables like fft twiddle factors, Bitreverse, reciprocal etc
+ *               used for MVE implementation only
+ *
+ * @version  V1.9.0
+ * @date     23 April 2021
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ #ifndef _RISCV_MVE_TABLES_F16_H
+ #define _RISCV_MVE_TABLES_F16_H
+
+#include "riscv_math_types_f16.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+
+ 
+
+
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /*_RISCV_MVE_TABLES_F16_H*/
+
--- a/components/nmsis/dsp/inc/riscv_vec_math.h
+++ b/components/nmsis/dsp/inc/riscv_vec_math.h
@ -0,0 +1,52 @@
+/******************************************************************************
+ * @file     riscv_vec_math.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _RISCV_VEC_MATH_H
+#define _RISCV_VEC_MATH_H
+
+#include "riscv_math_types.h"
+#include "riscv_common_tables.h"
+#include "riscv_helium_utils.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+
+#endif /* _RISCV_VEC_MATH_H */
+
+/**
+ *
+ * End of file.
+ */
--- a/components/nmsis/dsp/inc/riscv_vec_math_f16.h
+++ b/components/nmsis/dsp/inc/riscv_vec_math_f16.h
@ -0,0 +1,56 @@
+/******************************************************************************
+ * @file     riscv_vec_math_f16.h
+ * @brief    Public header file for NMSIS DSP Library
+ * @version  V1.9.0
+ * @date     23 April 2021
+ * Target Processor: RISC-V Cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _RISCV_VEC_MATH_F16_H
+#define _RISCV_VEC_MATH_F16_H
+
+#include "riscv_math_types_f16.h"
+#include "riscv_common_tables_f16.h"
+#include "riscv_helium_utils.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#if defined(RISCV_FLOAT16_SUPPORTED)
+
+
+
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ARM FLOAT16 SUPPORTED */
+
+#endif /* _RISCV_VEC_MATH_F16_H */
+
+/**
+ *
+ * End of file.
+ */
--- a/components/nmsis/dsp/privateInc/riscv_sorting.h
+++ b/components/nmsis/dsp/privateInc/riscv_sorting.h
@ -0,0 +1,113 @@
+/******************************************************************************
+ * @file     riscv_sorting.h
+ * @brief    Private header file for NMSIS DSP Library
+ * @version  V1.7.0
+ * @date     2019
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2019 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _RISCV_SORTING_H_
+#define _RISCV_SORTING_H_
+
+#include "riscv_math.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+  /**
+   * @param[in]  S          points to an instance of the sorting structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
+   */
+  void riscv_bubble_sort_f32(
+    const riscv_sort_instance_f32 * S, 
+          float32_t * pSrc, 
+          float32_t * pDst, 
+    uint32_t blockSize);
+
+   /**
+   * @param[in]  S          points to an instance of the sorting structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
+   */
+  void riscv_heap_sort_f32(
+    const riscv_sort_instance_f32 * S, 
+          float32_t * pSrc, 
+          float32_t * pDst, 
+    uint32_t blockSize);
+
+  /**
+   * @param[in]  S          points to an instance of the sorting structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
+   */
+  void riscv_insertion_sort_f32(
+    const riscv_sort_instance_f32 * S, 
+          float32_t *pSrc, 
+          float32_t* pDst, 
+    uint32_t blockSize);
+
+  /**
+   * @param[in]  S          points to an instance of the sorting structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data
+   * @param[in]  blockSize  number of samples to process.
+   */
+  void riscv_quick_sort_f32(
+    const riscv_sort_instance_f32 * S, 
+          float32_t * pSrc, 
+          float32_t * pDst, 
+    uint32_t blockSize);
+
+  /**
+   * @param[in]  S          points to an instance of the sorting structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data
+   * @param[in]  blockSize  number of samples to process.
+   */
+  void riscv_selection_sort_f32(
+    const riscv_sort_instance_f32 * S, 
+          float32_t * pSrc, 
+          float32_t * pDst, 
+    uint32_t blockSize);
+ 
+  /**
+   * @param[in]  S          points to an instance of the sorting structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data
+   * @param[in]  blockSize  number of samples to process.
+   */
+  void riscv_bitonic_sort_f32(
+    const riscv_sort_instance_f32 * S,
+          float32_t * pSrc,
+          float32_t * pDst,
+          uint32_t blockSize);
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* _RISCV_SORTING_H */
--- a/components/nmsis/dsp/privateInc/riscv_vec_fft.h
+++ b/components/nmsis/dsp/privateInc/riscv_vec_fft.h
@ -0,0 +1,44 @@
+/******************************************************************************
+ * @file     riscv_vec_fft.h
+ * @brief    Private header file for NMSIS DSP Library
+ * @version  V1.7.0
+ * @date     07. January 2020
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _RISCV_VEC_FFT_H_
+#define _RISCV_VEC_FFT_H_
+
+#include "riscv_math.h"
+#include "riscv_helium_utils.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+
+#endif /* _RISCV_VEC_FFT_H_ */
--- a/components/nmsis/dsp/privateInc/riscv_vec_filtering.h
+++ b/components/nmsis/dsp/privateInc/riscv_vec_filtering.h
@ -0,0 +1,44 @@
+/******************************************************************************
+ * @file     riscv_vec_filtering.h
+ * @brief    Private header file for NMSIS DSP Library
+ * @version  V1.7.0
+ * @date     30. October 2019
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2019 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _RISCV_VEC_FILTERING_H_
+#define _RISCV_VEC_FILTERING_H_
+
+#include "riscv_math.h"
+#include "riscv_helium_utils.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+
+#endif /* _RISCV_VEC_FILTERING_H_ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/BasicMathFunctions.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/BasicMathFunctions.c
@ -0,0 +1,80 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        BasicMathFunctions.c
+ * Description:  Combination of all basic math function source files.
+ *
+ * $Date:        16. March 2020
+ * $Revision:    V1.1.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "riscv_abs_f32.c"
+#include "riscv_abs_q15.c"
+#include "riscv_abs_q31.c"
+#include "riscv_abs_q7.c"
+#include "riscv_add_f32.c"
+#include "riscv_add_q15.c"
+#include "riscv_add_q31.c"
+#include "riscv_add_q7.c"
+#include "riscv_and_u16.c"
+#include "riscv_and_u32.c"
+#include "riscv_and_u8.c"
+#include "riscv_dot_prod_f32.c"
+#include "riscv_dot_prod_q15.c"
+#include "riscv_dot_prod_q31.c"
+#include "riscv_dot_prod_q7.c"
+#include "riscv_mult_f32.c"
+#include "riscv_mult_q15.c"
+#include "riscv_mult_q31.c"
+#include "riscv_mult_q7.c"
+#include "riscv_negate_f32.c"
+#include "riscv_negate_q15.c"
+#include "riscv_negate_q31.c"
+#include "riscv_negate_q7.c"
+#include "riscv_not_u16.c"
+#include "riscv_not_u32.c"
+#include "riscv_not_u8.c"
+#include "riscv_offset_f32.c"
+#include "riscv_offset_q15.c"
+#include "riscv_offset_q31.c"
+#include "riscv_offset_q7.c"
+#include "riscv_or_u16.c"
+#include "riscv_or_u32.c"
+#include "riscv_or_u8.c"
+#include "riscv_scale_f32.c"
+#include "riscv_scale_q15.c"
+#include "riscv_scale_q31.c"
+#include "riscv_scale_q7.c"
+#include "riscv_shift_q15.c"
+#include "riscv_shift_q31.c"
+#include "riscv_shift_q7.c"
+#include "riscv_sub_f32.c"
+#include "riscv_sub_q15.c"
+#include "riscv_sub_q31.c"
+#include "riscv_sub_q7.c"
+#include "riscv_xor_u16.c"
+#include "riscv_xor_u32.c"
+#include "riscv_xor_u8.c"
+#include "riscv_clip_f32.c"
+#include "riscv_clip_q31.c"
+#include "riscv_clip_q15.c"
+#include "riscv_clip_q7.c"
--- a/components/nmsis/dsp/src/BasicMathFunctions/BasicMathFunctionsF16.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/BasicMathFunctionsF16.c
@ -0,0 +1,38 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        BasicMathFunctionsF16.c
+ * Description:  Combination of all basic math function f16 source files.
+ *
+ * $Date:        20. April 2020
+ * $Revision:    V1.1.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "riscv_abs_f16.c"
+#include "riscv_add_f16.c"
+#include "riscv_dot_prod_f16.c"
+#include "riscv_mult_f16.c"
+#include "riscv_negate_f16.c"
+#include "riscv_offset_f16.c"
+#include "riscv_scale_f16.c"
+#include "riscv_sub_f16.c"
+#include "riscv_clip_f16.c"
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_abs_f16.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_abs_f16.c
@ -0,0 +1,120 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_abs_f16.c
+ * Description:  Floating-point vector absolute value
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions_f16.h"
+#include <math.h>
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @defgroup BasicAbs Vector Absolute Value
+
+  Computes the absolute value of a vector on an element-by-element basis.
+
+  <pre>
+      pDst[n] = abs(pSrc[n]),   0 <= n < blockSize.
+  </pre>
+
+  The functions support in-place computation allowing the source and
+  destination pointers to reference the same memory buffer.
+  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup BasicAbs
+  @{
+ */
+
+/**
+  @brief         Floating-point vector absolute value.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+
+
+#if defined(RISCV_FLOAT16_SUPPORTED)
+void riscv_abs_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C = |A| */
+
+    /* Calculate absolute and store result in destination buffer. */
+    *pDst++ = fabsf(*pSrc++);
+
+    *pDst++ = fabsf(*pSrc++);
+
+    *pDst++ = fabsf(*pSrc++);
+
+    *pDst++ = fabsf(*pSrc++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = |A| */
+
+    /* Calculate absolute and store result in destination buffer. */
+    *pDst++ = fabsf(*pSrc++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+#endif /* defined(RISCV_FLOAT16_SUPPORTED */
+/**
+  @} end of BasicAbs group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_abs_f32.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_abs_f32.c
@ -0,0 +1,128 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_abs_f32.c
+ * Description:  Floating-point vector absolute value
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+#include <math.h>
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @defgroup BasicAbs Vector Absolute Value
+
+  Computes the absolute value of a vector on an element-by-element basis.
+
+  <pre>
+      pDst[n] = abs(pSrc[n]),   0 <= n < blockSize.
+  </pre>
+
+  The functions support in-place computation allowing the source and
+  destination pointers to reference the same memory buffer.
+  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup BasicAbs
+  @{
+ */
+
+/**
+  @brief         Floating-point vector absolute value.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+
+
+void riscv_abs_f32(
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+#if defined(RISCV_VECTOR)
+  vfloat32m8_t vx;
+  blkCnt = blockSize;
+  size_t l;
+  for (; (l = vsetvl_e32m8(blkCnt)) > 0; blkCnt -= l) {
+    vx = vle32_v_f32m8(pSrc, l);
+    vse32_v_f32m8 (pDst, vfsgnjx_vv_f32m8(vx, vx, l), l);
+    pSrc += l;
+    pDst += l;
+  }
+#else
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C = |A| */
+
+    /* Calculate absolute and store result in destination buffer. */
+    *pDst++ = fabsf(*pSrc++);
+
+    *pDst++ = fabsf(*pSrc++);
+
+    *pDst++ = fabsf(*pSrc++);
+
+    *pDst++ = fabsf(*pSrc++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = |A| */
+
+    /* Calculate absolute and store result in destination buffer. */
+    *pDst++ = fabsf(*pSrc++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+#endif
+}
+/**
+  @} end of BasicAbs group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_abs_q15.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_abs_q15.c
@ -0,0 +1,126 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_abs_q15.c
+ * Description:  Q15 vector absolute value
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicAbs
+  @{
+ */
+
+/**
+  @brief         Q15 vector absolute value.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+
+  @par           Scaling and Overflow Behavior
+                   The function uses saturating arithmetic.
+                   The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.
+ */
+
+void riscv_abs_q15(
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        q15_t in;                                      /* Temporary input variable */
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C = |A| */
+
+    /* Calculate absolute of input (if -1 then saturated to 0x7fff) and store result in destination buffer. */
+#if defined (RISCV_MATH_DSP)
+#if __RISCV_XLEN == 64
+	write_q15x4_ia (&pDst, __RV_KABS16(read_q15x4_ia ((q15_t **) &pSrc)));
+#else
+#ifdef RISCV_DSP64
+	write_q15x4_ia (&pDst, __RV_DKABS16(read_q15x4_ia ((q15_t **) &pSrc)));
+#else
+	write_q15x2_ia (&pDst, __RV_KABS16(read_q15x2_ia ((q15_t **) &pSrc)));
+	write_q15x2_ia (&pDst, __RV_KABS16(read_q15x2_ia ((q15_t **) &pSrc)));
+#endif
+	in = *pSrc++;
+    *pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
+	in = *pSrc++;
+    *pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
+	in = *pSrc++;
+    *pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
+	in = *pSrc++;
+    *pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
+#endif
+#endif /* __RISCV_XLEN == 64 */
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = |A| */
+
+    /* Calculate absolute of input (if -1 then saturated to 0x7fff) and store result in destination buffer. */
+    in = *pSrc++;
+#if defined (RISCV_MATH_DSP)
+    //*pDst++ = (in > 0) ? in : (q15_t)__QSUB16(0, in);
+    *pDst++ = (q15_t)__RV_KABSW(in);
+#else
+    *pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
+#endif
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**
+  @} end of BasicAbs group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_abs_q31.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_abs_q31.c
@ -0,0 +1,137 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_abs_q31.c
+ * Description:  Q31 vector absolute value
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicAbs
+  @{
+ */
+
+/**
+  @brief         Q31 vector absolute value.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+
+  @par           Scaling and Overflow Behavior
+                   The function uses saturating arithmetic.
+                   The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.
+ */
+
+void riscv_abs_q31(
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        q31_t in;                                      /* Temporary variable */
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C = |A| */
+#if __RISCV_XLEN == 64
+	write_q31x2_ia (&pDst, __RV_KABS32(read_q31x2_ia ((q31_t **) &pSrc)));
+	write_q31x2_ia (&pDst, __RV_KABS32(read_q31x2_ia ((q31_t **) &pSrc)));
+#else
+    /* Calculate absolute of input (if -1 then saturated to 0x7fffffff) and store result in destination buffer. */
+    in = *pSrc++;
+#if defined (RISCV_MATH_DSP)
+	*pDst++ = __KABSW(in);
+#else
+    *pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
+#endif
+
+    in = *pSrc++;
+#if defined (RISCV_MATH_DSP)
+	*pDst++ = __KABSW(in);
+#else
+    *pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
+#endif
+
+    in = *pSrc++;
+#if defined (RISCV_MATH_DSP)
+	*pDst++ = __KABSW(in);
+#else
+    *pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
+#endif
+
+    in = *pSrc++;
+#if defined (RISCV_MATH_DSP)
+	*pDst++ = __KABSW(in);
+#else
+    *pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
+#endif
+
+    /* Decrement loop counter */
+#endif /* __RISCV_XLEN == 64 */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = |A| */
+
+    /* Calculate absolute of input (if -1 then saturated to 0x7fffffff) and store result in destination buffer. */
+    in = *pSrc++;
+#if defined (RISCV_MATH_DSP)
+    //*pDst++ = (in > 0) ? in : (q31_t)__QSUB(0, in);
+	*pDst++ = __KABSW(in);
+#else
+    *pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
+#endif
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+/**
+  @} end of BasicAbs group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_abs_q7.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_abs_q7.c
@ -0,0 +1,131 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_abs_q7.c
+ * Description:  Q7 vector absolute value
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicAbs
+  @{
+ */
+
+/**
+  @brief         Q7 vector absolute value.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+
+  @par           Conditions for optimum performance
+                   Input and output buffers should be aligned by 32-bit
+  @par           Scaling and Overflow Behavior
+                   The function uses saturating arithmetic.
+                   The Q7 value -1 (0x80) will be saturated to the maximum allowable positive value 0x7F.
+ */
+
+void riscv_abs_q7(
+  const q7_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        q7_t in;                                       /* Temporary input variable */
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+#if defined (RISCV_DSP64) || (__RISCV_XLEN == 64)
+  /* Loop unrolling: Compute 8 outputs at a time */
+  blkCnt = blockSize >> 3U;
+#else
+  blkCnt = blockSize >> 2U;
+#endif
+
+  while (blkCnt > 0U)
+  {
+    /* C = |A| */
+
+    /* Calculate absolute of input (if -1 then saturated to 0x7f) and store result in destination buffer. */
+#if defined (RISCV_MATH_DSP)
+#if __RISCV_XLEN == 64
+  write_q7x8_ia (&pDst, __RV_KABS8(read_q7x8_ia ((q7_t **) &pSrc)));
+#else
+#if defined (RISCV_DSP64)
+  write_q7x8_ia (&pDst, __RV_DKABS8(read_q7x8_ia ((q7_t **) &pSrc)));
+#else
+  write_q7x4_ia (&pDst, __RV_KABS8(read_q7x4_ia ((q7_t **) &pSrc)));
+#endif
+#endif /* __RISCV_XLEN == 64 */
+#else
+	in = *pSrc++;
+    *pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? (q7_t) 0x7f : -in);
+    *pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? (q7_t) 0x7f : -in);
+#endif /* defined (RISCV_DSP64) || (__RISCV_XLEN == 64) */
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+#if defined (RISCV_DSP64) || (__RISCV_XLEN == 64)
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x8U;
+#else
+  blkCnt = blockSize % 0x4U;
+#endif /* defined (RISCV_DSP64) || (__RISCV_XLEN == 64) */
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = |A| */
+
+    /* Calculate absolute of input (if -1 then saturated to 0x7f) and store result in destination buffer. */
+    in = *pSrc++;
+#if defined (RISCV_MATH_DSP)
+	  *pDst++ = (q7_t)__RV_KABSW(in);
+#else
+    *pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? (q7_t) 0x7f : -in);
+#endif
+
+  /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**
+  @} end of BasicAbs group
+ */
+
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_add_f16.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_add_f16.c
@ -0,0 +1,116 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_add_f16.c
+ * Description:  Floating-point vector addition
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions_f16.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @defgroup BasicAdd Vector Addition
+
+  Element-by-element addition of two vectors.
+
+  <pre>
+      pDst[n] = pSrcA[n] + pSrcB[n],   0 <= n < blockSize.
+  </pre>
+
+  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup BasicAdd
+  @{
+ */
+
+/**
+  @brief         Floating-point vector addition.
+  @param[in]     pSrcA      points to first input vector
+  @param[in]     pSrcB      points to second input vector
+  @param[out]    pDst       points to output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+
+#if defined(RISCV_FLOAT16_SUPPORTED)
+void riscv_add_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        float16_t * pDst,
+        uint32_t blockSize)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A + B */
+
+    /* Add and store result in destination buffer. */
+    *pDst++ = (*pSrcA++) + (*pSrcB++);
+    *pDst++ = (*pSrcA++) + (*pSrcB++);
+    *pDst++ = (*pSrcA++) + (*pSrcB++);
+    *pDst++ = (*pSrcA++) + (*pSrcB++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = A + B */
+
+    /* Add and store result in destination buffer. */
+    *pDst++ = (*pSrcA++) + (*pSrcB++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+#endif /* defined(RISCV_FLOAT16_SUPPORTED) */
+
+/**
+  @} end of BasicAdd group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_add_f32.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_add_f32.c
@ -0,0 +1,129 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_add_f32.c
+ * Description:  Floating-point vector addition
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @defgroup BasicAdd Vector Addition
+
+  Element-by-element addition of two vectors.
+
+  <pre>
+      pDst[n] = pSrcA[n] + pSrcB[n],   0 <= n < blockSize.
+  </pre>
+
+  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup BasicAdd
+  @{
+ */
+
+/**
+  @brief         Floating-point vector addition.
+  @param[in]     pSrcA      points to first input vector
+  @param[in]     pSrcB      points to second input vector
+  @param[out]    pDst       points to output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+
+void riscv_add_f32(
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        float32_t * pDst,
+        uint32_t blockSize)
+{
+#if defined(RISCV_VECTOR)
+  uint32_t blkCnt = blockSize;                               /* Loop counter */
+  size_t l;
+  vfloat32m8_t vx, vy;
+       
+  for (; (l = vsetvl_e32m8(blkCnt)) > 0; blkCnt -= l) {
+    vx = vle32_v_f32m8(pSrcA, l);
+    pSrcA += l;
+    vy = vle32_v_f32m8(pSrcB, l);
+    vse32_v_f32m8 (pDst, vfadd_vv_f32m8(vy, vx, l), l);
+    pSrcB += l;
+    pDst += l;
+  }
+
+#else
+        uint32_t blkCnt;                               /* Loop counter */
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A + B */
+
+    /* Add and store result in destination buffer. */
+    *pDst++ = (*pSrcA++) + (*pSrcB++);
+    *pDst++ = (*pSrcA++) + (*pSrcB++);
+    *pDst++ = (*pSrcA++) + (*pSrcB++);
+    *pDst++ = (*pSrcA++) + (*pSrcB++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = A + B */
+
+    /* Add and store result in destination buffer. */
+    *pDst++ = (*pSrcA++) + (*pSrcB++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+#endif /*defined(RISCV_VECTOR)*/
+}
+
+/**
+  @} end of BasicAdd group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_add_q15.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_add_q15.c
@ -0,0 +1,147 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_add_q15.c
+ * Description:  Q15 vector addition
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicAdd
+  @{
+ */
+
+/**
+  @brief         Q15 vector addition.
+  @param[in]     pSrcA      points to the first input vector
+  @param[in]     pSrcB      points to the second input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+
+  @par           Scaling and Overflow Behavior
+                   The function uses saturating arithmetic.
+                   Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
+ */
+
+void riscv_add_q15(
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        q15_t * pDst,
+        uint32_t blockSize)
+{
+#if defined(RISCV_VECTOR)
+  uint32_t blkCnt = blockSize;                               /* Loop counter */
+  size_t l;
+  vint16m8_t vx, vy;
+       
+  for (; (l = vsetvl_e16m8(blkCnt)) > 0; blkCnt -= l) {
+    vx = vle16_v_i16m8(pSrcA, l);
+    pSrcA += l;
+    vy = vle16_v_i16m8(pSrcB, l);
+    vse16_v_i16m8 (pDst, vsadd_vv_i16m8(vy, vx, l), l);
+    pSrcB += l;
+    pDst += l;
+  }
+#else
+        uint32_t blkCnt;                               /* Loop counter */
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+#if defined (RISCV_MATH_DSP)
+  q31_t inA1, inA2;
+  q31_t inB1, inB2;
+#endif
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A + B */
+
+#if defined (RISCV_MATH_DSP)
+    ///* read 2 times 2 samples at a time from sourceA */
+    //inA1 = read_q15x2_ia ((q15_t **) &pSrcA);
+    //inA2 = read_q15x2_ia ((q15_t **) &pSrcA);
+    ///* read 2 times 2 samples at a time from sourceB */
+    //inB1 = read_q15x2_ia ((q15_t **) &pSrcB);
+    //inB2 = read_q15x2_ia ((q15_t **) &pSrcB);
+
+    ///* Add and store 2 times 2 samples at a time */
+    //write_q15x2_ia (&pDst, __QADD16(inA1, inB1));
+    //write_q15x2_ia (&pDst, __QADD16(inA2, inB2));
+#if __RISCV_XLEN == 64
+    write_q15x4_ia(&pDst, __RV_KADD16(read_q15x4_ia((q15_t **)&pSrcA), read_q15x4_ia((q15_t **)&pSrcB)));
+#else
+#ifdef RISCV_DSP64
+    write_q15x4_ia(&pDst, __RV_DKADD16(read_q15x4_ia((q15_t **)&pSrcA), read_q15x4_ia((q15_t **)&pSrcB)));
+#else
+	  write_q15x2_ia(&pDst, __RV_KADD16(read_q15x2_ia((q15_t **)&pSrcA), read_q15x2_ia((q15_t **)&pSrcB)));
+    write_q15x2_ia(&pDst, __RV_KADD16(read_q15x2_ia((q15_t **)&pSrcA), read_q15x2_ia((q15_t **)&pSrcB)));
+#endif
+#endif /* __RISCV_XLEN == 64 */
+
+#else
+    *pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ + *pSrcB++), 16);
+    *pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ + *pSrcB++), 16);
+    *pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ + *pSrcB++), 16);
+    *pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ + *pSrcB++), 16);
+#endif
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = A + B */
+
+    /* Add and store result in destination buffer. */
+    *pDst++ = (q15_t) __QADD16(*pSrcA++, *pSrcB++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+#endif /* defined(RISCV_VECTOR) */
+}
+/**
+  @} end of BasicAdd group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_add_q31.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_add_q31.c
@ -0,0 +1,123 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_add_q31.c
+ * Description:  Q31 vector addition
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicAdd
+  @{
+ */
+
+/**
+  @brief         Q31 vector addition.
+  @param[in]     pSrcA      points to the first input vector
+  @param[in]     pSrcB      points to the second input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+
+  @par           Scaling and Overflow Behavior
+                   The function uses saturating arithmetic.
+                   Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
+ */
+
+void riscv_add_q31(
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        q31_t * pDst,
+        uint32_t blockSize)
+{
+#if defined(RISCV_VECTOR)
+  uint32_t blkCnt = blockSize;                               /* Loop counter */
+  size_t l;
+  vint32m8_t vx, vy;
+       
+  for (; (l = vsetvl_e32m8(blkCnt)) > 0; blkCnt -= l) {
+    vx = vle32_v_i32m8(pSrcA, l);
+    pSrcA += l;
+    vy = vle32_v_i32m8(pSrcB, l);
+    vse32_v_i32m8 (pDst, vsadd_vv_i32m8(vy, vx, l), l);
+    pSrcB += l;
+    pDst += l;
+  }
+#else
+        uint32_t blkCnt;                               /* Loop counter */
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A + B */
+#if __RISCV_XLEN == 64
+	write_q31x2_ia (&pDst, __RV_KADD32(read_q31x2_ia ((q31_t **) &pSrcA),read_q31x2_ia ((q31_t **) &pSrcB)));
+	write_q31x2_ia (&pDst, __RV_KADD32(read_q31x2_ia ((q31_t **) &pSrcA),read_q31x2_ia ((q31_t **) &pSrcB)));
+#else
+    /* Add and store result in destination buffer. */
+    *pDst++ = __RV_KADDW(*pSrcA++, *pSrcB++);
+    *pDst++ = __RV_KADDW(*pSrcA++, *pSrcB++);
+    *pDst++ = __RV_KADDW(*pSrcA++, *pSrcB++);
+    *pDst++ = __RV_KADDW(*pSrcA++, *pSrcB++);
+  #endif /* __RISCV_XLEN == 64 */
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = A + B */
+
+    /* Add and store result in destination buffer. */
+    *pDst++ = __QADD(*pSrcA++, *pSrcB++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+#endif /* defined(RISCV_VECTOR) */
+}
+
+/**
+  @} end of BasicAdd group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_add_q7.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_add_q7.c
@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_add_q7.c
+ * Description:  Q7 vector addition
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicAdd
+  @{
+ */
+
+/**
+  @brief         Q7 vector addition.
+  @param[in]     pSrcA      points to the first input vector
+  @param[in]     pSrcB      points to the second input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+
+  @par           Scaling and Overflow Behavior
+                   The function uses saturating arithmetic.
+                   Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
+ */
+
+void riscv_add_q7(
+  const q7_t * pSrcA,
+  const q7_t * pSrcB,
+        q7_t * pDst,
+        uint32_t blockSize)
+{
+#if defined(RISCV_VECTOR)
+  uint32_t blkCnt = blockSize;                              /* Loop counter */
+  size_t l;
+  vint8m8_t vx, vy;
+       
+  for (; (l = vsetvl_e32m8(blkCnt)) > 0; blkCnt -= l) {
+    vx = vle8_v_i8m8(pSrcA, l);
+    pSrcA += l;
+    vy = vle8_v_i8m8(pSrcB, l);
+    vse8_v_i8m8 (pDst, vsadd_vv_i8m8(vy, vx, l), l);
+    pSrcB += l;
+    pDst += l;
+  }
+#else
+        uint32_t blkCnt;                               /* Loop counter */
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+#if defined (RISCV_DSP64) || (__RISCV_XLEN == 64)
+  /* Loop unrolling: Compute 8 outputs at a time */
+  blkCnt = blockSize >> 3U;
+#else
+	/* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+#endif
+
+  while (blkCnt > 0U)
+  {
+    /* C = A + B */
+
+#if defined (RISCV_MATH_DSP)
+#if __RISCV_XLEN == 64
+    write_q7x8_ia (&pDst, __RV_KADD8 (read_q7x8_ia ((q7_t **) &pSrcA), read_q7x8_ia ((q7_t **) &pSrcB)));
+#else
+#ifdef RISCV_DSP64
+    /* Add and store result in destination buffer (4 samples at a time). */
+    write_q7x8_ia (&pDst, __RV_DKADD8 (read_q7x8_ia ((q7_t **) &pSrcA), read_q7x8_ia ((q7_t **) &pSrcB)));
+#else
+	  write_q7x4_ia (&pDst, __RV_KADD8 (read_q7x4_ia ((q7_t **) &pSrcA), read_q7x4_ia ((q7_t **) &pSrcB)));
+#endif
+#endif /* __RISCV_XLEN == 64 */
+#else
+	  *pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ + *pSrcB++, 8);
+    *pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ + *pSrcB++, 8);
+    *pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ + *pSrcB++, 8);
+    *pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ + *pSrcB++, 8);
+#endif
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+  
+#if defined (RISCV_DSP64) || (__RISCV_XLEN == 64)
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x8U;
+#else
+	blkCnt = blockSize % 0x4U;
+#endif
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = A + B */
+
+    /* Add and store result in destination buffer. */
+    *pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ + *pSrcB++, 8);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+#endif /* defined(RISCV_VECTOR) */
+}
+/**
+  @} end of BasicAdd group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_and_u16.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_and_u16.c
@ -0,0 +1,133 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_and_u16.c
+ * Description:  uint16_t bitwise AND
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @defgroup And Vector bitwise AND
+
+  Compute the logical bitwise AND.
+
+  There are separate functions for uint32_t, uint16_t, and uint7_t data types.
+ */
+
+/**
+  @addtogroup And
+  @{
+ */
+
+/**
+  @brief         Compute the logical bitwise AND of two fixed-point vectors.
+  @param[in]     pSrcA      points to input vector A
+  @param[in]     pSrcB      points to input vector B
+  @param[out]    pDst       points to output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+
+void riscv_and_u16(
+    const uint16_t * pSrcA,
+    const uint16_t * pSrcB,
+          uint16_t * pDst,
+          uint32_t blockSize)
+{
+#if defined(RISCV_VECTOR)
+  uint32_t blkCnt = blockSize;                               /* Loop counter */
+  size_t l;
+  vuint16m8_t vx, vy;
+       
+  for (; (l = vsetvl_e16m8(blkCnt)) > 0; blkCnt -= l) {
+    vx = vle16_v_u16m8(pSrcA, l);
+    pSrcA += l;
+    vy = vle16_v_u16m8(pSrcB, l);
+    vse16_v_u16m8 (pDst, vand_vv_u16m8(vx, vy, l), l);
+    pSrcB += l;
+    pDst += l;
+  }
+#else
+    uint32_t blkCnt;      /* Loop counter */
+
+#if defined (RISCV_DSP64) || (__RISCV_XLEN == 64)
+
+    uint64_t * pSrcA_temp = pSrcA;
+    uint64_t * pSrcB_temp = pSrcB;
+    uint64_t * pDst_temp = pDst;
+    if(blkCnt = blockSize >> 2)
+    {
+        while (blkCnt > 0U)
+        {
+            *pDst_temp++ = (*pSrcA_temp++)&(*pSrcB_temp++);
+
+            /* Decrement the loop counter */
+            blkCnt--;
+        }
+    }
+    if(blkCnt = blockSize%4)
+    {
+        pSrcA = (uint8_t * )(pSrcA_temp-3);
+        pSrcB = (uint8_t * )(pSrcB_temp-3);
+    }
+
+#else
+    uint32_t * pSrcA_temp = pSrcA;
+    uint32_t * pSrcB_temp = pSrcB;
+    uint32_t * pDst_temp = pDst;
+    if(blkCnt = blockSize >> 1)
+    {
+        while (blkCnt > 0U)
+        {
+            *pDst_temp++ = (*pSrcA_temp++)&(*pSrcB_temp++);
+
+            /* Decrement the loop counter */
+            blkCnt--;
+        }
+    }
+    if(blkCnt = blockSize%2)
+    {
+        pSrcA = (uint8_t * )(pSrcA_temp-1);
+        pSrcB = (uint8_t * )(pSrcB_temp-1);
+    }
+#endif
+    while (blkCnt > 0U)
+    {
+        *pDst++ = (*pSrcA++)&(*pSrcB++);
+
+        /* Decrement the loop counter */
+        blkCnt--;
+    }
+#endif /* defined(RISCV_VECTOR) */
+}
+
+/**
+  @} end of And group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_and_u32.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_and_u32.c
@ -0,0 +1,109 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_and_u32.c
+ * Description:  uint32_t bitwise AND
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup And
+  @{
+ */
+
+/**
+  @brief         Compute the logical bitwise AND of two fixed-point vectors.
+  @param[in]     pSrcA      points to input vector A
+  @param[in]     pSrcB      points to input vector B
+  @param[out]    pDst       points to output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+
+void riscv_and_u32(
+    const uint32_t * pSrcA,
+    const uint32_t * pSrcB,
+          uint32_t * pDst,
+          uint32_t blockSize)
+{
+#if defined(RISCV_VECTOR)
+  uint32_t blkCnt = blockSize;                               /* Loop counter */
+  size_t l;
+  vuint32m8_t vx, vy;
+       
+  for (; (l = vsetvl_e32m8(blkCnt)) > 0; blkCnt -= l) {
+    vx = vle32_v_u32m8(pSrcA, l);
+    pSrcA += l;
+    vy = vle32_v_u32m8(pSrcB, l);
+    vse32_v_u32m8 (pDst, vand_vv_u32m8(vx, vy, l), l);
+    pSrcB += l;
+    pDst += l;
+  }
+#else
+    uint32_t blkCnt;      /* Loop counter */
+
+#if defined (RISCV_DSP64) || (__RISCV_XLEN == 64)
+
+    uint64_t * pSrcA_temp = pSrcA;
+    uint64_t * pSrcB_temp = pSrcB;
+    uint64_t * pDst_temp = pDst;
+    if(blkCnt = blockSize >> 1)
+    {
+        while (blkCnt > 0U)
+        {
+            *pDst_temp++ = (*pSrcA_temp++)&(*pSrcB_temp++);
+
+            /* Decrement the loop counter */
+            blkCnt--;
+        }
+    }
+    if(blkCnt = blockSize%2)
+    {
+        pSrcA = (uint8_t * )(pSrcA_temp-1);
+        pSrcB = (uint8_t * )(pSrcB_temp-1);
+    }
+
+#else
+    /* Initialize blkCnt with number of samples */
+    blkCnt = blockSize;
+#endif
+    while (blkCnt > 0U)
+    {
+        *pDst++ = (*pSrcA++)&(*pSrcB++);
+
+        /* Decrement the loop counter */
+        blkCnt--;
+    }
+#endif /* defined(RISCV_VECTOR) */
+}
+
+/**
+  @} end of And group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_and_u8.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_and_u8.c
@ -0,0 +1,128 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_and_u8.c
+ * Description:  uint8_t bitwise AND
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+
+/**
+  @addtogroup And
+  @{
+ */
+
+/**
+  @brief         Compute the logical bitwise AND of two fixed-point vectors.
+  @param[in]     pSrcA      points to input vector A
+  @param[in]     pSrcB      points to input vector B
+  @param[out]    pDst       points to output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+
+void riscv_and_u8(
+    const uint8_t * pSrcA,
+    const uint8_t * pSrcB,
+          uint8_t * pDst,
+          uint32_t blockSize)
+{
+#if defined(RISCV_VECTOR)
+  uint32_t blkCnt = blockSize;                               /* Loop counter */
+  size_t l;
+  vuint8m8_t vx, vy;
+       
+  for (; (l = vsetvl_e8m8(blkCnt)) > 0; blkCnt -= l) {
+    vx = vle8_v_u8m8(pSrcA, l);
+    pSrcA += l;
+    vy = vle8_v_u8m8(pSrcB, l);
+    vse8_v_u8m8 (pDst, vand_vv_u8m8(vx, vy, l), l);
+    pSrcB += l;
+    pDst += l;
+  }
+#else
+    uint32_t blkCnt;      /* Loop counter */
+
+#if defined (RISCV_DSP64) || (__RISCV_XLEN == 64)
+
+    uint64_t * pSrcA_temp = pSrcA;
+    uint64_t * pSrcB_temp = pSrcB;
+    uint64_t * pDst_temp = pDst;
+    if(blkCnt = blockSize >> 3)
+    {
+        while (blkCnt > 0U)
+        {
+            *pDst_temp++ = (*pSrcA_temp++)&(*pSrcB_temp++);
+
+            /* Decrement the loop counter */
+            blkCnt--;
+        }
+    }
+    if(blkCnt = blockSize%8)
+    {
+        pSrcA = (uint8_t * )(pSrcA_temp-7);
+        pSrcB = (uint8_t * )(pSrcB_temp-7);
+    }
+
+#else
+    uint32_t * pSrcA_temp = pSrcA;
+    uint32_t * pSrcB_temp = pSrcB;
+    uint32_t * pDst_temp = pDst;
+    if(blkCnt = blockSize >> 2)
+    {
+        while (blkCnt > 0U)
+        {
+            *pDst_temp++ = (*pSrcA_temp++)&(*pSrcB_temp++);
+
+            /* Decrement the loop counter */
+            blkCnt--;
+        }
+    }
+    if(blkCnt = blockSize%4)
+    {
+        pSrcA = (uint8_t * )(pSrcA_temp-3);
+        pSrcB = (uint8_t * )(pSrcB_temp-3);
+    }
+#endif /*defined (RISCV_DSP64) || (__RISCV_XLEN == 64)*/
+
+    while (blkCnt > 0U)
+    {
+        *pDst++ = (*pSrcA++)&(*pSrcB++);
+
+        /* Decrement the loop counter */
+        blkCnt--;
+    }
+
+#endif /* defined(RISCV_VECTOR) */
+}
+
+/**
+  @} end of And group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_clip_f16.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_clip_f16.c
@ -0,0 +1,77 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_clip_f16.c
+ * Description:  Floating-point vector addition
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions_f16.h"
+
+/**
+  @ingroup groupMath
+ */
+
+
+/**
+  @addtogroup BasicClip
+  @{
+ */
+
+/**
+  @brief         Elementwise floating-point clipping
+  @param[in]     pSrc          points to input values
+  @param[out]    pDst          points to output clipped values
+  @param[in]     low           lower bound
+  @param[in]     high          higher bound
+  @param[in]     numSamples    number of samples to clip
+  @return        none
+ */
+
+
+#if defined(RISCV_FLOAT16_SUPPORTED)
+
+void riscv_clip_f16(const float16_t * pSrc, 
+  float16_t * pDst, 
+  float16_t low, 
+  float16_t high, 
+  uint32_t numSamples)
+{
+    for (uint32_t i = 0; i < numSamples; i++)
+    {                                        
+        if (pSrc[i] > high)                  
+            pDst[i] = high;                  
+        else if (pSrc[i] < low)              
+            pDst[i] = low;                   
+        else                                 
+            pDst[i] = pSrc[i];               
+    }
+}
+#endif /* defined(RISCV_FLOAT16_SUPPORTED */
+
+
+
+/**
+  @} end of BasicClip group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_clip_f32.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_clip_f32.c
@ -0,0 +1,79 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_clip_f32.c
+ * Description:  Floating-point vector addition
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @defgroup BasicClip Elementwise clipping
+
+  Element-by-element clipping of a value.
+
+  The value is constrained between 2 bounds.
+
+  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup BasicClip
+  @{
+ */
+
+/**
+  @brief         Elementwise floating-point clipping
+  @param[in]     pSrc          points to input values
+  @param[out]    pDst          points to output clipped values
+  @param[in]     low           lower bound
+  @param[in]     high          higher bound
+  @param[in]     numSamples    number of samples to clip
+  @return        none
+ */
+void riscv_clip_f32(const float32_t * pSrc, 
+  float32_t * pDst, 
+  float32_t low, 
+  float32_t high, 
+  uint32_t numSamples)
+{
+    for (uint32_t i = 0; i < numSamples; i++)
+    {                                        
+        if (pSrc[i] > high)                  
+            pDst[i] = high;                  
+        else if (pSrc[i] < low)              
+            pDst[i] = low;                   
+        else                                 
+            pDst[i] = pSrc[i];               
+    }
+}
+
+/**
+  @} end of BasicClip group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_clip_q15.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_clip_q15.c
@ -0,0 +1,70 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_clip_q15.c
+ * Description:  Floating-point vector addition
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+
+/**
+  @addtogroup BasicClip
+  @{
+ */
+
+/**
+  @brief         Elementwise fixed-point clipping
+  @param[in]     pSrc          points to input values
+  @param[out]    pDst          points to output clipped values
+  @param[in]     low           lower bound
+  @param[in]     high          higher bound
+  @param[in]     numSamples    number of samples to clip
+  @return        none
+ */
+void riscv_clip_q15(const q15_t * pSrc, 
+  q15_t * pDst, 
+  q15_t low, 
+  q15_t high, 
+  uint32_t numSamples)
+{
+    for (uint32_t i = 0; i < numSamples; i++)
+    {                                        
+        if (pSrc[i] > high)                  
+            pDst[i] = high;                  
+        else if (pSrc[i] < low)              
+            pDst[i] = low;                   
+        else                                 
+            pDst[i] = pSrc[i];               
+    }
+}
+
+/**
+  @} end of BasicClip group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_clip_q31.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_clip_q31.c
@ -0,0 +1,70 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_clip_q31.c
+ * Description:  Floating-point vector addition
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+
+/**
+  @addtogroup BasicClip
+  @{
+ */
+
+/**
+  @brief         Elementwise fixed-point clipping
+  @param[in]     pSrc          points to input values
+  @param[out]    pDst          points to output clipped values
+  @param[in]     low           lower bound
+  @param[in]     high          higher bound
+  @param[in]     numSamples    number of samples to clip
+  @return        none
+ */
+void riscv_clip_q31(const q31_t * pSrc, 
+  q31_t * pDst, 
+  q31_t low, 
+  q31_t high, 
+  uint32_t numSamples)
+{
+    for (uint32_t i = 0; i < numSamples; i++)
+    {                                        
+        if (pSrc[i] > high)                  
+            pDst[i] = high;                  
+        else if (pSrc[i] < low)              
+            pDst[i] = low;                   
+        else                                 
+            pDst[i] = pSrc[i];               
+    }
+}
+
+/**
+  @} end of BasicClip group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_clip_q7.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_clip_q7.c
@ -0,0 +1,70 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_clip_q7.c
+ * Description:  Floating-point vector addition
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+
+/**
+  @addtogroup BasicClip
+  @{
+ */
+
+/**
+  @brief         Elementwise fixed-point clipping
+  @param[in]     pSrc          points to input values
+  @param[out]    pDst          points to output clipped values
+  @param[in]     low           lower bound
+  @param[in]     high          higher bound
+  @param[in]     numSamples    number of samples to clip
+  @return        none
+ */
+void riscv_clip_q7(const q7_t * pSrc, 
+  q7_t * pDst, 
+  q7_t low, 
+  q7_t high, 
+  uint32_t numSamples)
+{
+    for (uint32_t i = 0; i < numSamples; i++)
+    {                                        
+        if (pSrc[i] > high)                  
+            pDst[i] = high;                  
+        else if (pSrc[i] < low)              
+            pDst[i] = low;                   
+        else                                 
+            pDst[i] = pSrc[i];               
+    }
+}
+
+/**
+  @} end of BasicClip group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_dot_prod_f16.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_dot_prod_f16.c
@ -0,0 +1,125 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_dot_prod_f16.c
+ * Description:  Floating-point dot product
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions_f16.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @defgroup BasicDotProd Vector Dot Product
+
+  Computes the dot product of two vectors.
+  The vectors are multiplied element-by-element and then summed.
+
+  <pre>
+      sum = pSrcA[0]*pSrcB[0] + pSrcA[1]*pSrcB[1] + ... + pSrcA[blockSize-1]*pSrcB[blockSize-1]
+  </pre>
+
+  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup BasicDotProd
+  @{
+ */
+
+/**
+  @brief         Dot product of floating-point vectors.
+  @param[in]     pSrcA      points to the first input vector.
+  @param[in]     pSrcB      points to the second input vector.
+  @param[in]     blockSize  number of samples in each vector.
+  @param[out]    result     output result returned here.
+  @return        none
+ */
+
+#if defined(RISCV_FLOAT16_SUPPORTED)
+void riscv_dot_prod_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        uint32_t blockSize,
+        float16_t * result)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        _Float16 sum = 0.0f;                          /* Temporary return variable */
+
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while (blkCnt > 0U)
+  {
+    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
+
+    /* Calculate dot product and store result in a temporary buffer. */
+    sum += (_Float16)(*pSrcA++) * (_Float16)(*pSrcB++);
+
+    sum += (_Float16)(*pSrcA++) * (_Float16)(*pSrcB++);
+
+    sum += (_Float16)(*pSrcA++) * (_Float16)(*pSrcB++);
+
+    sum += (_Float16)(*pSrcA++) * (_Float16)(*pSrcB++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
+
+    /* Calculate dot product and store result in a temporary buffer. */
+    sum += (_Float16)(*pSrcA++) * (_Float16)(*pSrcB++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store result in destination buffer */
+  *result = sum;
+}
+#endif
+/**
+  @} end of BasicDotProd group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_dot_prod_f32.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_dot_prod_f32.c
@ -0,0 +1,144 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_dot_prod_f32.c
+ * Description:  Floating-point dot product
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @defgroup BasicDotProd Vector Dot Product
+
+  Computes the dot product of two vectors.
+  The vectors are multiplied element-by-element and then summed.
+
+  <pre>
+      sum = pSrcA[0]*pSrcB[0] + pSrcA[1]*pSrcB[1] + ... + pSrcA[blockSize-1]*pSrcB[blockSize-1]
+  </pre>
+
+  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup BasicDotProd
+  @{
+ */
+
+/**
+  @brief         Dot product of floating-point vectors.
+  @param[in]     pSrcA      points to the first input vector.
+  @param[in]     pSrcB      points to the second input vector.
+  @param[in]     blockSize  number of samples in each vector.
+  @param[out]    result     output result returned here.
+  @return        none
+ */
+
+
+void riscv_dot_prod_f32(
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        uint32_t blockSize,
+        float32_t * result)
+{
+#if defined(RISCV_VECTOR)
+  uint32_t blkCnt = blockSize;                               /* Loop counter */
+  size_t l;
+  const float32_t * inputA = pSrcA;
+  const float32_t * inputB = pSrcB;
+  vfloat32m8_t v_A, v_B;
+  l = vsetvl_e32m1(1);
+  vfloat32m1_t v_sum = vfmv_s_f_f32m1(v_sum, 0.0f, l);
+  for (; (l = vsetvl_e32m8(blkCnt)) > 0; blkCnt -= l) 
+  {
+    v_A = vle32_v_f32m8(inputA, l);
+    v_B = vle32_v_f32m8(inputB, l);
+    inputA += l;
+    inputB += l;                  /* Point to the first complex pointer */
+    v_sum = vfredsum_vs_f32m8_f32m1(v_sum, vfmul_vv_f32m8(v_A, v_B, l), v_sum, l);
+  }
+  l = vsetvl_e32m1(1);
+  vse32_v_f32m1(result, v_sum, l);
+#else
+        uint32_t blkCnt;                               /* Loop counter */
+        float32_t sum = 0.0f;                          /* Temporary return variable */
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while (blkCnt > 0U)
+  {
+    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
+
+    /* Calculate dot product and store result in a temporary buffer. */
+    sum += (*pSrcA++) * (*pSrcB++);
+
+    sum += (*pSrcA++) * (*pSrcB++);
+
+    sum += (*pSrcA++) * (*pSrcB++);
+
+    sum += (*pSrcA++) * (*pSrcB++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
+
+    /* Calculate dot product and store result in a temporary buffer. */
+    sum += (*pSrcA++) * (*pSrcB++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store result in destination buffer */
+  *result = sum;
+#endif /* defined(RISCV_VECTOR) */
+}
+
+/**
+  @} end of BasicDotProd group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_dot_prod_q15.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_dot_prod_q15.c
@ -0,0 +1,152 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_dot_prod_q15.c
+ * Description:  Q15 dot product
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicDotProd
+  @{
+ */
+
+/**
+  @brief         Dot product of Q15 vectors.
+  @param[in]     pSrcA      points to the first input vector
+  @param[in]     pSrcB      points to the second input vector
+  @param[in]     blockSize  number of samples in each vector
+  @param[out]    result     output result returned here
+  @return        none
+
+  @par           Scaling and Overflow Behavior
+                   The intermediate multiplications are in 1.15 x 1.15 = 2.30 format and these
+                   results are added to a 64-bit accumulator in 34.30 format.
+                   Nonsaturating additions are used and given that there are 33 guard bits in the accumulator
+                   there is no risk of overflow.
+                   The return result is in 34.30 format.
+ */
+void riscv_dot_prod_q15(
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        uint32_t blockSize,
+        q63_t * result)
+{
+#if defined(RISCV_VECTOR)
+  uint32_t blkCnt = blockSize;                               /* Loop counter */
+  size_t l;
+  const q15_t * inputA = pSrcA;
+  const q15_t * inputB = pSrcB;
+  q63_t * output = result;
+  vint16m4_t v_inA;
+  vint16m4_t v_inB;
+  l = vsetvl_e64m1(1);
+  vint64m1_t v_sum = vmv_s_x_i64m1(v_sum, 0, l);
+  for (; (l = vsetvl_e16m4(blkCnt)) > 0; blkCnt -= l) 
+  {
+    v_inA = vle16_v_i16m4(inputA, l);
+    v_inB = vle16_v_i16m4(inputB, l);
+    inputA += l;
+    inputB += l;
+    v_sum = vwredsum_vs_i32m8_i64m1(v_sum, vwmul_vv_i32m8(v_inA, v_inB, l), v_sum, l);
+  }
+  l = vsetvl_e64m1(1);
+  vse64_v_i64m1(output, v_sum, l);
+#else
+        uint32_t blkCnt;                               /* Loop counter */
+        volatile q63_t sum = 0;                                 /* Temporary return variable */
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+#if __RISCV_XLEN == 64
+  /* Loop unrolling: Compute 8 outputs at a time */
+  blkCnt = blockSize >> 3U;
+#else
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+#endif /* __RISCV_XLEN == 64 */
+  while (blkCnt > 0U)
+  {
+    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
+
+#if defined (RISCV_MATH_DSP)
+#if __RISCV_XLEN == 64
+	sum = __RV_SMALDA(sum, read_q15x4_ia ((q15_t **) &pSrcA), read_q15x4_ia ((q15_t **) &pSrcB));
+	sum = __RV_SMALDA(sum, read_q15x4_ia ((q15_t **) &pSrcA), read_q15x4_ia ((q15_t **) &pSrcB));
+#else
+    /* Calculate dot product and store result in a temporary buffer. */
+    //sum = __SMLALD(read_q15x2_ia ((q15_t **) &pSrcA), read_q15x2_ia ((q15_t **) &pSrcB), sum);
+    //sum = __SMLALD(read_q15x2_ia ((q15_t **) &pSrcA), read_q15x2_ia ((q15_t **) &pSrcB), sum);
+	sum = __RV_SMALDA(sum, read_q15x2_ia ((q15_t **) &pSrcA), read_q15x2_ia ((q15_t **) &pSrcB));
+	sum = __RV_SMALDA(sum, read_q15x2_ia ((q15_t **) &pSrcA), read_q15x2_ia ((q15_t **) &pSrcB));
+#endif /* __RISCV_XLEN == 64 */
+#else
+    sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
+    sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
+    sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
+    sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
+#endif
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
+
+    /* Calculate dot product and store result in a temporary buffer. */
+#if defined (RISCV_MATH_DSP)
+    sum  = __SMLALD((*pSrcA++) & 0xffff, (*pSrcB++) & 0xffff, sum);
+#else
+    sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
+#endif
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store result in destination buffer in 34.30 format */
+  *result = sum;
+#endif /* defined(RISCV_VECTOR) */
+}
+
+/**
+  @} end of BasicDotProd group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_dot_prod_q31.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_dot_prod_q31.c
@ -0,0 +1,158 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_dot_prod_q31.c
+ * Description:  Q31 dot product
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicDotProd
+  @{
+ */
+
+/**
+  @brief         Dot product of Q31 vectors.
+  @param[in]     pSrcA      points to the first input vector.
+  @param[in]     pSrcB      points to the second input vector.
+  @param[in]     blockSize  number of samples in each vector.
+  @param[out]    result     output result returned here.
+  @return        none
+
+  @par           Scaling and Overflow Behavior
+                   The intermediate multiplications are in 1.31 x 1.31 = 2.62 format and these
+                   are truncated to 2.48 format by discarding the lower 14 bits.
+                   The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format.
+                   There are 15 guard bits in the accumulator and there is no risk of overflow as long as
+                   the length of the vectors is less than 2^16 elements.
+                   The return result is in 16.48 format.
+ */
+
+void riscv_dot_prod_q31(
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        uint32_t blockSize,
+        q63_t * result)
+{
+#if defined(RISCV_VECTOR)
+  uint32_t blkCnt = blockSize;                               /* Loop counter */
+  size_t l;
+  const q31_t * inputA = pSrcA;
+  const q31_t * inputB = pSrcB;
+  q63_t * output = result;
+  vint32m4_t v_inA;
+  vint32m4_t v_inB;
+  l = vsetvl_e64m1(1);
+  vint64m1_t v_sum = vmv_s_x_i64m1(v_sum, 0, l);
+  for (; (l = vsetvl_e32m4(blkCnt)) > 0; blkCnt -= l) 
+  {
+    v_inA = vle32_v_i32m4(inputA, l);
+    v_inB = vle32_v_i32m4(inputB, l);
+    inputA += l;
+    inputB += l;
+    v_sum = vredsum_vs_i64m8_i64m1(v_sum, vsra_vx_i64m8(vwmul_vv_i64m8(v_inA, v_inB, l), 14, l), v_sum, l);
+  }
+  l = vsetvl_e64m1(1);
+  vse64_v_i64m1(output, v_sum, l);
+#else
+        uint32_t blkCnt;                               /* Loop counter */
+        q63_t sum = 0;                                 /* Temporary return variable */
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
+
+    /* Calculate dot product and store result in a temporary buffer. */
+#if defined(RISCV_MATH_DSP)
+#if __RISCV_XLEN == 64
+	sum += (__RV_MULSR64(*pSrcA++, *pSrcB++) >> 14);
+	sum += (__RV_MULSR64(*pSrcA++, *pSrcB++) >> 14);
+	sum += (__RV_MULSR64(*pSrcA++, *pSrcB++) >> 14);
+	sum += (__RV_MULSR64(*pSrcA++, *pSrcB++) >> 14);
+#else
+	sum += (__RV_MULSR64(*pSrcA++, *pSrcB++) >> 14);
+	sum += (__RV_MULSR64(*pSrcA++, *pSrcB++) >> 14);
+	sum += (__RV_MULSR64(*pSrcA++, *pSrcB++) >> 14);
+	sum += (__RV_MULSR64(*pSrcA++, *pSrcB++) >> 14);
+#endif /* __RISCV_XLEN == 64 */
+#else
+    sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
+    sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
+    sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
+    sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
+#endif
+	  //sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
+	  //sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
+	  //sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
+	  //sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
+
+    /* Calculate dot product and store result in a temporary buffer. */
+#if defined(RISCV_MATH_DSP)
+	sum += (__MULSR64(*pSrcA++, *pSrcB++) >> 14);
+#else
+    sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
+#endif
+	  //sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store result in destination buffer in 16.48 format */
+  *result = sum;
+#endif /* defined(RISCV_VECTOR) */
+}
+
+/**
+  @} end of BasicDotProd group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_dot_prod_q7.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_dot_prod_q7.c
@ -0,0 +1,182 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_dot_prod_q7.c
+ * Description:  Q7 dot product
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicDotProd
+  @{
+ */
+
+/**
+  @brief         Dot product of Q7 vectors.
+  @param[in]     pSrcA      points to the first input vector
+  @param[in]     pSrcB      points to the second input vector
+  @param[in]     blockSize  number of samples in each vector
+  @param[out]    result     output result returned here
+  @return        none
+
+  @par           Scaling and Overflow Behavior
+                   The intermediate multiplications are in 1.7 x 1.7 = 2.14 format and these
+                   results are added to an accumulator in 18.14 format.
+                   Nonsaturating additions are used and there is no danger of wrap around as long as
+                   the vectors are less than 2^18 elements long.
+                   The return result is in 18.14 format.
+ */
+
+void riscv_dot_prod_q7(
+  const q7_t * pSrcA,
+  const q7_t * pSrcB,
+        uint32_t blockSize,
+        q31_t * result)
+{
+#if defined(RISCV_VECTOR)
+  uint32_t blkCnt = blockSize;                               /* Loop counter */
+  size_t l;
+  const q7_t * inputA = pSrcA;
+  const q7_t * inputB = pSrcB;
+  q31_t * output = result;
+  vint8m4_t v_inA;
+  vint8m4_t v_inB;
+  l = vsetvl_e32m1(1);
+  vint32m1_t v_sum = vmv_s_x_i32m1(v_sum, 0, l);
+  for (; (l = vsetvl_e8m4(blkCnt)) > 0; blkCnt -= l) 
+  {
+    v_inA = vle8_v_i8m4(inputA, l);
+    v_inB = vle8_v_i8m4(inputB, l);
+    inputA += l;
+    inputB += l;
+    v_sum = vwredsum_vs_i16m8_i32m1(v_sum, vwmul_vv_i16m8(v_inA, v_inB, l), v_sum, l);
+  }
+  l = vsetvl_e32m1(1);
+  vse32_v_i32m1(output, v_sum, l);
+#else
+        uint32_t blkCnt;                               /* Loop counter */
+        volatile q31_t sum = 0;                                 /* Temporary return variable */
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+#if defined (RISCV_MATH_DSP)
+#if __RISCV_XLEN == 64
+  q63_t input1, input2;                          /* Temporary variables */
+  q63_t sum64 = 0;
+    /* Loop unrolling: Compute 8 outputs at a time */
+  blkCnt = blockSize >> 3U;
+#else
+  q31_t input1, input2;                          /* Temporary variables */
+  //q31_t inA1, inA2, inB1, inB2;                  /* Temporary variables */
+    /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+#endif /* __RISCV_XLEN == 64 */
+#endif
+
+
+  while (blkCnt > 0U)
+  {
+    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
+
+#if defined (RISCV_MATH_DSP)
+#if __RISCV_XLEN == 64
+    /* read 4 samples at a time from sourceA */
+    input1 = read_q7x8_ia ((q7_t **) &pSrcA);
+    /* read 4 samples at a time from sourceB */
+    input2 = read_q7x8_ia ((q7_t **) &pSrcB);
+    sum64 = __RV_SMAQA(sum64, input1, input2);
+#else
+    /* read 4 samples at a time from sourceA */
+    input1 = read_q7x4_ia ((q7_t **) &pSrcA);
+    /* read 4 samples at a time from sourceB */
+    input2 = read_q7x4_ia ((q7_t **) &pSrcB);
+
+    ///* extract two q7_t samples to q15_t samples */
+    //inA1 = __SXTB16(__ROR(input1, 8));
+    ///* extract reminaing two samples */
+    //inA2 = __SXTB16(input1);
+    ///* extract two q7_t samples to q15_t samples */
+    //inB1 = __SXTB16(__ROR(input2, 8));
+    ///* extract reminaing two samples */
+    //inB2 = __SXTB16(input2);
+
+    ///* multiply and accumulate two samples at a time */
+    //sum = __SMLAD(inA1, inB1, sum);
+    //sum = __SMLAD(inA2, inB2, sum);
+    sum = __RV_SMAQA(sum, input1, input2);
+#endif /* __RISCV_XLEN == 64 */
+#else
+    sum += (q31_t) ((q15_t) *pSrcA++ * *pSrcB++);
+    sum += (q31_t) ((q15_t) *pSrcA++ * *pSrcB++);
+    sum += (q31_t) ((q15_t) *pSrcA++ * *pSrcB++);
+    sum += (q31_t) ((q15_t) *pSrcA++ * *pSrcB++);
+#endif
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+#if __RISCV_XLEN == 64
+  sum +=((sum64 + (sum64<<32u))>>32u);
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x8U;
+#else
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+#endif /* __RISCV_XLEN == 64 */
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
+
+    /* Calculate dot product and store result in a temporary buffer. */
+#if defined (RISCV_MATH_DSP)
+    sum  = __SMLAD((*pSrcA++) & 0xffff, (*pSrcB++) & 0xffff, sum);
+#else
+    sum += (q31_t) ((q15_t) *pSrcA++ * *pSrcB++);
+#endif
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store result in destination buffer in 18.14 format */
+  *result = sum;
+#endif /* defined(RISCV_VECTOR) */
+}
+
+/**
+  @} end of BasicDotProd group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_mult_f16.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_mult_f16.c
@ -0,0 +1,120 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_mult_f16.c
+ * Description:  Floating-point vector multiplication
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions_f16.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @defgroup BasicMult Vector Multiplication
+
+  Element-by-element multiplication of two vectors.
+
+  <pre>
+      pDst[n] = pSrcA[n] * pSrcB[n],   0 <= n < blockSize.
+  </pre>
+
+  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup BasicMult
+  @{
+ */
+
+/**
+  @brief         Floating-point vector multiplication.
+  @param[in]     pSrcA      points to the first input vector.
+  @param[in]     pSrcB      points to the second input vector.
+  @param[out]    pDst       points to the output vector.
+  @param[in]     blockSize  number of samples in each vector.
+  @return        none
+ */
+
+
+#if defined(RISCV_FLOAT16_SUPPORTED)
+void riscv_mult_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        float16_t * pDst,
+        uint32_t blockSize)
+{
+    uint32_t blkCnt;                               /* Loop counter */
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A * B */
+
+    /* Multiply inputs and store result in destination buffer. */
+    *pDst++ = (*pSrcA++) * (*pSrcB++);
+
+    *pDst++ = (*pSrcA++) * (*pSrcB++);
+
+    *pDst++ = (*pSrcA++) * (*pSrcB++);
+
+    *pDst++ = (*pSrcA++) * (*pSrcB++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = A * B */
+
+    /* Multiply input and store result in destination buffer. */
+    *pDst++ = (*pSrcA++) * (*pSrcB++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+#endif
+
+/**
+  @} end of BasicMult group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_mult_f32.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_mult_f32.c
@ -0,0 +1,131 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_mult_f32.c
+ * Description:  Floating-point vector multiplication
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @defgroup BasicMult Vector Multiplication
+
+  Element-by-element multiplication of two vectors.
+
+  <pre>
+      pDst[n] = pSrcA[n] * pSrcB[n],   0 <= n < blockSize.
+  </pre>
+
+  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup BasicMult
+  @{
+ */
+
+/**
+  @brief         Floating-point vector multiplication.
+  @param[in]     pSrcA      points to the first input vector.
+  @param[in]     pSrcB      points to the second input vector.
+  @param[out]    pDst       points to the output vector.
+  @param[in]     blockSize  number of samples in each vector.
+  @return        none
+ */
+
+void riscv_mult_f32(
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        float32_t * pDst,
+        uint32_t blockSize)
+{
+#if defined(RISCV_VECTOR)
+  uint32_t blkCnt = blockSize;                               /* Loop counter */
+  size_t l;
+  vfloat32m8_t vx, vy;
+       
+  for (; (l = vsetvl_e32m8(blkCnt)) > 0; blkCnt -= l) {
+    vx = vle32_v_f32m8(pSrcA, l);
+    pSrcA += l;
+    vy = vle32_v_f32m8(pSrcB, l);
+    vse32_v_f32m8 (pDst, vfmul_vv_f32m8(vx, vy, l), l);
+    pSrcB += l;
+    pDst += l;
+  }
+#else
+    uint32_t blkCnt;                               /* Loop counter */
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A * B */
+
+    /* Multiply inputs and store result in destination buffer. */
+    *pDst++ = (*pSrcA++) * (*pSrcB++);
+
+    *pDst++ = (*pSrcA++) * (*pSrcB++);
+
+    *pDst++ = (*pSrcA++) * (*pSrcB++);
+
+    *pDst++ = (*pSrcA++) * (*pSrcB++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = A * B */
+
+    /* Multiply input and store result in destination buffer. */
+    *pDst++ = (*pSrcA++) * (*pSrcB++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+#endif /* defined(RISCV_VECTOR) */
+}
+
+/**
+  @} end of BasicMult group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_mult_q15.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_mult_q15.c
@ -0,0 +1,165 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_mult_q15.c
+ * Description:  Q15 vector multiplication
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicMult
+  @{
+ */
+
+/**
+  @brief         Q15 vector multiplication
+  @param[in]     pSrcA      points to first input vector
+  @param[in]     pSrcB      points to second input vector
+  @param[out]    pDst       points to output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+
+  @par           Scaling and Overflow Behavior
+                   The function uses saturating arithmetic.
+                   Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
+ */
+void riscv_mult_q15(
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        q15_t * pDst,
+        uint32_t blockSize)
+{
+#if defined(RISCV_VECTOR)
+  uint32_t blkCnt = blockSize;                               /* Loop counter */
+  size_t l;
+  vint16m8_t vx, vy;
+       
+  for (; (l = vsetvl_e16m8(blkCnt)) > 0; blkCnt -= l) {
+    vx = vle16_v_i16m8(pSrcA, l);
+    pSrcA += l;
+    vy = vle16_v_i16m8(pSrcB, l);
+    vse16_v_i16m8 (pDst, vsmul_vv_i16m8(vx, vy, l), l);
+    pSrcB += l;
+    pDst += l;
+  }
+#else
+        uint32_t blkCnt;                               /* Loop counter */
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+// #if defined (RISCV_MATH_DSP)
+//   q31_t inA1, inA2, inB1, inB2;                  /* Temporary input variables */
+//   q15_t out1, out2, out3, out4;                  /* Temporary output variables */
+//   q31_t mul1, mul2, mul3, mul4;                  /* Temporary variables */
+//   q63_t opa, opb;
+// #endif
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A * B */
+
+#if defined (RISCV_MATH_DSP)
+    /* read 2 samples at a time from sourceA */
+    //inA1 = read_q15x2_ia ((q15_t **) &pSrcA);
+    ///* read 2 samples at a time from sourceB */
+    //inB1 = read_q15x2_ia ((q15_t **) &pSrcB);
+    ///* read 2 samples at a time from sourceA */
+    //inA2 = read_q15x2_ia ((q15_t **) &pSrcA);
+    ///* read 2 samples at a time from sourceB */
+    //inB2 = read_q15x2_ia ((q15_t **) &pSrcB);
+
+    ///* multiply mul = sourceA * sourceB */
+    //mul1 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
+    //mul2 = (q31_t) ((q15_t) (inA1      ) * (q15_t) (inB1      ));
+    //mul3 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB2 >> 16));
+    //mul4 = (q31_t) ((q15_t) (inA2      ) * (q15_t) (inB2      ));
+
+    ///* saturate result to 16 bit */
+    //out1 = (q15_t) __SSAT(mul1 >> 15, 16);
+    //out2 = (q15_t) __SSAT(mul2 >> 15, 16);
+    //out3 = (q15_t) __SSAT(mul3 >> 15, 16);
+    //out4 = (q15_t) __SSAT(mul4 >> 15, 16);
+
+    ///* store result to destination */
+    //write_q15x2_ia (&pDst, __PKHBT(out2, out1, 16));
+    //write_q15x2_ia (&pDst, __PKHBT(out4, out3, 16));
+
+	//write_q15x2_ia (&pDst, __KHM16(inA1, inB1));
+	//write_q15x2_ia (&pDst, __KHM16(inA2, inB2));
+#if __RISCV_XLEN == 64
+	write_q15x4_ia(&pDst, __RV_KHM16(read_q15x4_ia((q15_t **)& pSrcA), read_q15x4_ia((q15_t**)&pSrcB)));
+#else
+#ifdef RISCV_DSP64
+	write_q15x4_ia(&pDst, __RV_DKHM16(read_q15x4_ia((q15_t **)& pSrcA), read_q15x4_ia((q15_t**)&pSrcB)));
+#else
+	write_q15x2_ia(&pDst, __RV_KHM16(read_q15x2_ia((q15_t **)& pSrcA), read_q15x2_ia((q15_t**)&pSrcB)));
+	write_q15x2_ia(&pDst, __RV_KHM16(read_q15x2_ia((q15_t **)& pSrcA), read_q15x2_ia((q15_t**)&pSrcB)));
+#endif
+#endif /* __RISCV_XLEN == 64 */
+#else
+    *pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
+    *pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
+    *pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
+    *pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
+#endif
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = A * B */
+
+    /* Multiply inputs and store result in destination buffer. */
+    *pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+#endif /* defined(RISCV_VECTOR) */
+}
+
+/**
+  @} end of BasicMult group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_mult_q31.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_mult_q31.c
@ -0,0 +1,155 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_mult_q31.c
+ * Description:  Q31 vector multiplication
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicMult
+  @{
+ */
+
+/**
+  @brief         Q31 vector multiplication.
+  @param[in]     pSrcA      points to the first input vector.
+  @param[in]     pSrcB      points to the second input vector.
+  @param[out]    pDst       points to the output vector.
+  @param[in]     blockSize  number of samples in each vector.
+  @return        none
+
+  @par           Scaling and Overflow Behavior
+                   The function uses saturating arithmetic.
+                   Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] are saturated.
+ */
+void riscv_mult_q31(
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        q31_t * pDst,
+        uint32_t blockSize)
+{
+#if defined(RISCV_VECTOR)
+  uint32_t blkCnt = blockSize;                               /* Loop counter */
+  size_t l;
+  vint32m8_t vx, vy;
+       
+  for (; (l = vsetvl_e32m8(blkCnt)) > 0; blkCnt -= l) {
+    vx = vle32_v_i32m8(pSrcA, l);
+    pSrcA += l;
+    vy = vle32_v_i32m8(pSrcB, l);
+    vse32_v_i32m8 (pDst, vsmul_vv_i32m8(vx, vy, l), l);
+    pSrcB += l;
+    pDst += l;
+  }
+#else
+        uint32_t blkCnt;                               /* Loop counter */
+#if __RISCV_XLEN == 64
+        q63_t temp;                                     /* Temporary output variable */
+#endif
+        q31_t out;                                     /* Temporary output variable */
+
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A * B */
+#if __RISCV_XLEN == 64
+
+    temp = __RV_SMMUL(read_q31x2_ia((q31_t **) &pSrcA), read_q31x2_ia((q31_t **) &pSrcB));
+    out = __SSAT(temp, 31);
+    *pDst++ = out << 1U;
+
+    // out = __RV_SMMUL(*pSrcA++, *pSrcB++);
+    out = __SSAT(temp>>32, 31);
+    *pDst++ = out << 1U;
+
+    temp = __RV_SMMUL(read_q31x2_ia((q31_t **) &pSrcA), read_q31x2_ia((q31_t **) &pSrcB));
+    out = __SSAT(temp, 31);
+    *pDst++ = out << 1U;
+
+    // out = __RV_SMMUL(*pSrcA++, *pSrcB++);
+    out = __SSAT(temp>>32, 31);
+    *pDst++ = out << 1U;
+#else
+    /* Multiply inputs and store result in destination buffer. */
+    out = __RV_SMMUL(*pSrcA++, *pSrcB++);
+    out = __SSAT(out, 31);
+    *pDst++ = out << 1U;
+
+    out = __RV_SMMUL(*pSrcA++, *pSrcB++);
+    out = __SSAT(out, 31);
+    *pDst++ = out << 1U;
+
+    out = __RV_SMMUL(*pSrcA++, *pSrcB++);
+    out = __SSAT(out, 31);
+    *pDst++ = out << 1U;
+
+    out = __RV_SMMUL(*pSrcA++, *pSrcB++);
+    out = __SSAT(out, 31);
+    *pDst++ = out << 1U;
+
+#endif /* __RISCV_XLEN == 64 */
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = A * B */
+
+    /* Multiply inputs and store result in destination buffer. */
+    out = ((q63_t) *pSrcA++ * *pSrcB++) >> 32;
+    out = __SSAT(out, 31);
+    *pDst++ = out << 1U;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+#endif /* defined(RISCV_VECTOR) */
+}
+
+/**
+  @} end of BasicMult group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_mult_q7.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_mult_q7.c
@ -0,0 +1,159 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_mult_q7.c
+ * Description:  Q7 vector multiplication
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicMult
+  @{
+ */
+
+/**
+  @brief         Q7 vector multiplication
+  @param[in]     pSrcA      points to the first input vector
+  @param[in]     pSrcB      points to the second input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+
+  @par           Scaling and Overflow Behavior
+                   The function uses saturating arithmetic.
+                   Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
+ */
+void riscv_mult_q7(
+  const q7_t * pSrcA,
+  const q7_t * pSrcB,
+        q7_t * pDst,
+        uint32_t blockSize)
+{
+#if defined(RISCV_VECTOR)
+  uint32_t blkCnt = blockSize;                               /* Loop counter */
+  size_t l;
+  vint8m8_t vx, vy;
+       
+  for (; (l = vsetvl_e8m8(blkCnt)) > 0; blkCnt -= l) {
+    vx = vle8_v_i8m8(pSrcA, l);
+    pSrcA += l;
+    vy = vle8_v_i8m8(pSrcB, l);
+    vse8_v_i8m8 (pDst, vsmul_vv_i8m8(vx, vy, l), l);
+    pSrcB += l;
+    pDst += l;
+  }
+#else
+        uint32_t blkCnt;                               /* Loop counter */
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+#if defined (RISCV_MATH_DSP)
+  q7_t out1, out2, out3, out4;                   /* Temporary output variables */
+#endif
+
+
+#if defined (RISCV_DSP64) || (__RISCV_XLEN == 64)
+  /* Loop unrolling: Compute 8 outputs at a time */
+  blkCnt = blockSize >> 3U;
+#else
+	/* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+#endif /* defined (RISCV_DSP64) || (__RISCV_XLEN == 64) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = A * B */
+
+#if defined (RISCV_MATH_DSP)
+    /* Multiply inputs and store results in temporary variables */
+    //out1 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
+    //out2 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
+    //out3 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
+    //out4 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
+
+    ///* Pack and store result in destination buffer (in single write) */
+    //write_q7x4_ia (&pDst, __PACKq7(out1, out2, out3, out4));
+	//write_q7x4_ia (&pDst, __KHM8 (read_q7x4_ia ((q7_t **) &pSrcA), read_q7x4_ia ((q7_t **) &pSrcB)));
+#if __RISCV_XLEN == 64
+	write_q7x8_ia (&pDst, __RV_KHM8 (read_q7x8_ia ((q7_t **) &pSrcA), read_q7x8_ia ((q7_t **) &pSrcB)));
+#else
+#ifdef RISCV_DSP64
+	write_q7x8_ia (&pDst, __RV_DKHM8 (read_q7x8_ia ((q7_t **) &pSrcA), read_q7x8_ia ((q7_t **) &pSrcB)));
+#else
+	write_q7x4_ia (&pDst, __RV_KHM8 (read_q7x4_ia ((q7_t **) &pSrcA), read_q7x4_ia ((q7_t **) &pSrcB)));
+#endif
+#endif /* __RISCV_XLEN == 64 */
+#else
+    *pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
+    *pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
+    *pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
+    *pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
+#if defined (RISCV_DSP64) || (__RISCV_XLEN == 64)
+    *pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
+    *pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
+    *pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
+    *pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
+#endif
+#endif
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+#if defined (RISCV_DSP64) || (__RISCV_XLEN == 64)
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x8U;
+#else
+	blkCnt = blockSize % 0x4U;
+#endif
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = A * B */
+
+    /* Multiply input and store result in destination buffer. */
+    *pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+#endif /* defined(RISCV_VECTOR) */
+}
+
+/**
+  @} end of BasicMult group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_negate_f16.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_negate_f16.c
@ -0,0 +1,120 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_negate_f16.c
+ * Description:  Negates floating-point vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions_f16.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @defgroup BasicNegate Vector Negate
+
+  Negates the elements of a vector.
+
+  <pre>
+      pDst[n] = -pSrc[n],   0 <= n < blockSize.
+  </pre>
+
+  The functions support in-place computation allowing the source and
+  destination pointers to reference the same memory buffer.
+  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup BasicNegate
+  @{
+ */
+
+/**
+  @brief         Negates the elements of a floating-point vector.
+  @param[in]     pSrc       points to input vector.
+  @param[out]    pDst       points to output vector.
+  @param[in]     blockSize  number of samples in each vector.
+  @return        none
+ */
+
+#if defined(RISCV_FLOAT16_SUPPORTED)
+void riscv_negate_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C = -A */
+
+    /* Negate and store result in destination buffer. */
+    *pDst++ = -*pSrc++;
+
+    *pDst++ = -*pSrc++;
+
+    *pDst++ = -*pSrc++;
+
+    *pDst++ = -*pSrc++;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = -A */
+
+    /* Negate and store result in destination buffer. */
+    *pDst++ = -*pSrc++;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+#endif
+
+/**
+  @} end of BasicNegate group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_negate_f32.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_negate_f32.c
@ -0,0 +1,129 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_negate_f32.c
+ * Description:  Negates floating-point vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @defgroup BasicNegate Vector Negate
+
+  Negates the elements of a vector.
+
+  <pre>
+      pDst[n] = -pSrc[n],   0 <= n < blockSize.
+  </pre>
+
+  The functions support in-place computation allowing the source and
+  destination pointers to reference the same memory buffer.
+  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup BasicNegate
+  @{
+ */
+
+/**
+  @brief         Negates the elements of a floating-point vector.
+  @param[in]     pSrc       points to input vector.
+  @param[out]    pDst       points to output vector.
+  @param[in]     blockSize  number of samples in each vector.
+  @return        none
+ */
+
+void riscv_negate_f32(
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize)
+{
+#if defined(RISCV_VECTOR)
+  uint32_t blkCnt = blockSize;                               /* Loop counter */
+  size_t l;
+  vfloat32m8_t vx;
+       
+  for (; (l = vsetvl_e32m8(blkCnt)) > 0; blkCnt -= l) {
+    vx = vle32_v_f32m8(pSrc, l);
+    pSrc += l;
+    vse32_v_f32m8 (pDst, vfmul_vf_f32m8(vx, -1, l), l);
+    pDst += l;
+  }
+#else
+        uint32_t blkCnt;                               /* Loop counter */
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C = -A */
+
+    /* Negate and store result in destination buffer. */
+    *pDst++ = -*pSrc++;
+
+    *pDst++ = -*pSrc++;
+
+    *pDst++ = -*pSrc++;
+
+    *pDst++ = -*pSrc++;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = -A */
+
+    /* Negate and store result in destination buffer. */
+    *pDst++ = -*pSrc++;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+#endif /* defined(RISCV_VECTOR) */
+}
+
+/**
+  @} end of BasicNegate group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_negate_q15.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_negate_q15.c
@ -0,0 +1,148 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_negate_q15.c
+ * Description:  Negates Q15 vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicNegate
+  @{
+ */
+
+/**
+  @brief         Negates the elements of a Q15 vector.
+  @param[in]     pSrc       points to the input vector.
+  @param[out]    pDst       points to the output vector.
+  @param[in]     blockSize  number of samples in each vector.
+  @return        none
+
+  @par           Conditions for optimum performance
+                   Input and output buffers should be aligned by 32-bit
+  @par           Scaling and Overflow Behavior
+                   The function uses saturating arithmetic.
+                   The Q15 value -1 (0x8000) is saturated to the maximum allowable positive value 0x7FFF.
+ */
+void riscv_negate_q15(
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize)
+{
+#if defined(RISCV_VECTOR)
+  uint32_t blkCnt = blockSize;                               /* Loop counter */
+  size_t l = vsetvl_e16m8(blkCnt);
+  vint16m8_t vx, vy = vmv_s_x_i16m8(vy, 0, l);
+       
+  for (; (l = vsetvl_e16m8(blkCnt)) > 0; blkCnt -= l) {
+    vx = vle16_v_i16m8(pSrc, l);
+    pSrc += l;
+    vse16_v_i16m8 (pDst, vssub_vv_i16m8(vy ,vx, l), l);
+    pDst += l;
+  }
+#else
+        uint32_t blkCnt;                               /* Loop counter */
+        q15_t in;                                      /* Temporary input variable */
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+#if defined (RISCV_MATH_DSP)
+  q31_t in1;                                    /* Temporary input variables */
+#endif
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C = -A */
+
+#if defined (RISCV_MATH_DSP)
+    /* Negate and store result in destination buffer (2 samples at a time). */
+    //in1 = read_q15x2_ia ((q15_t **) &pSrc);
+    //write_q15x2_ia (&pDst, __QSUB16(0, in1));
+
+    //in1 = read_q15x2_ia ((q15_t **) &pSrc);
+    //write_q15x2_ia (&pDst, __QSUB16(0, in1));
+#if __RISCV_XLEN == 64
+    write_q15x4_ia(&pDst, __RV_KSUB16(0, read_q15x4_ia((q15_t **)&pSrc)));
+#else
+#ifdef RISCV_DSP64
+    write_q15x4_ia(&pDst, __DKSUB16(0, read_q15x4_ia((q15_t **)&pSrc)));
+#else
+	  write_q15x2_ia(&pDst, __RV_KSUB16(0, read_q15x2_ia((q15_t **)&pSrc)));
+	  write_q15x2_ia(&pDst, __RV_KSUB16(0, read_q15x2_ia((q15_t **)&pSrc)));
+#endif
+#endif /* __RISCV_XLEN == 64 */
+#else
+    in = *pSrc++;
+    *pDst++ = (in == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in;
+
+    in = *pSrc++;
+    *pDst++ = (in == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in;
+
+    in = *pSrc++;
+    *pDst++ = (in == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in;
+
+    in = *pSrc++;
+    *pDst++ = (in == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in;
+#endif
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = -A */
+
+    /* Negate and store result in destination buffer. */
+    in = *pSrc++;
+    *pDst++ = (in == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+#endif /* defined(RISCV_VECTOR) */
+}
+
+/**
+  @} end of BasicNegate group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_negate_q31.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_negate_q31.c
@ -0,0 +1,148 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_negate_q31.c
+ * Description:  Negates Q31 vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicNegate
+  @{
+ */
+
+/**
+  @brief         Negates the elements of a Q31 vector.
+  @param[in]     pSrc       points to the input vector.
+  @param[out]    pDst       points to the output vector.
+  @param[in]     blockSize   number of samples in each vector.
+  @return        none
+
+  @par           Scaling and Overflow Behavior
+                   The function uses saturating arithmetic.
+                   The Q31 value -1 (0x80000000) is saturated to the maximum allowable positive value 0x7FFFFFFF.
+ */
+
+void riscv_negate_q31(
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize)
+{
+#if defined(RISCV_VECTOR)
+  uint32_t blkCnt = blockSize;                               /* Loop counter */
+  size_t l = vsetvl_e32m8(blkCnt);
+  vint32m8_t vx, vy = vmv_s_x_i32m8(vy, 0, l);
+       
+  for (; (l = vsetvl_e32m8(blkCnt)) > 0; blkCnt -= l) {
+    vx = vle32_v_i32m8(pSrc, l);
+    pSrc += l;
+    vse32_v_i32m8 (pDst, vssub_vv_i32m8(vy ,vx, l), l);
+    pDst += l;
+  }
+#else
+        uint32_t blkCnt;                               /* Loop counter */
+        q31_t in;                                      /* Temporary input variable */
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C = -A */
+#if __RISCV_XLEN == 64
+    write_q15x4_ia(&pDst, __RV_KSUB32(0, read_q15x4_ia((q31_t **)&pSrc)));
+    write_q15x4_ia(&pDst, __RV_KSUB32(0, read_q15x4_ia((q31_t **)&pSrc)));
+#else
+    /* Negate and store result in destination buffer. */
+    in = *pSrc++;
+#if defined (RISCV_MATH_DSP)
+    *pDst++ = __QSUB(0, in);
+#else
+    *pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
+#endif
+
+    in = *pSrc++;
+#if defined (RISCV_MATH_DSP)
+    *pDst++ = __QSUB(0, in);
+#else
+    *pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
+#endif
+
+    in = *pSrc++;
+#if defined (RISCV_MATH_DSP)
+    *pDst++ = __QSUB(0, in);
+#else
+    *pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
+#endif
+
+    in = *pSrc++;
+#if defined (RISCV_MATH_DSP)
+    *pDst++ = __QSUB(0, in);
+#else
+    *pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
+#endif
+#endif /* __RISCV_XLEN == 64 */
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = -A */
+
+    /* Negate and store result in destination buffer. */
+    in = *pSrc++;
+#if defined (RISCV_MATH_DSP)
+    *pDst++ = __QSUB(0, in);
+#else
+    *pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
+#endif
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+#endif /* defined(RISCV_VECTOR) */
+}
+
+/**
+  @} end of BasicNegate group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_negate_q7.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_negate_q7.c
@ -0,0 +1,179 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_negate_q7.c
+ * Description:  Negates Q7 vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicNegate
+  @{
+ */
+
+/**
+  @brief         Negates the elements of a Q7 vector.
+  @param[in]     pSrc       points to the input vector.
+  @param[out]    pDst       points to the output vector.
+  @param[in]     blockSize   number of samples in each vector.
+  @return        none
+
+  @par           Scaling and Overflow Behavior
+                   The function uses saturating arithmetic.
+                   The Q7 value -1 (0x80) is saturated to the maximum allowable positive value 0x7F.
+ */
+void riscv_negate_q7(
+  const q7_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize)
+{
+#if defined(RISCV_VECTOR)
+  uint32_t blkCnt = blockSize;                               /* Loop counter */
+  size_t l;
+  l = vsetvl_e8m8(blkCnt);
+  vint8m8_t vx,vy = vmv_s_x_i8m8(vy, 0, l);
+       
+  for (; (l = vsetvl_e8m8(blkCnt)) > 0; blkCnt -= l) {
+    vx = vle8_v_i8m8(pSrc, l);
+    pSrc += l;
+    vse8_v_i8m8 (pDst, vssub_vv_i8m8(vy ,vx, l), l);
+    pDst += l;
+  }
+#else
+        uint32_t blkCnt;                               /* Loop counter */
+        q7_t in;                                       /* Temporary input variable */
+
+#if defined (RISCV_MATH_LOOPUNROLL)
+
+#if defined (RISCV_MATH_DSP)
+#ifdef RISCV_DSP64
+  q63_t in1;                                    /* Temporary input variable */
+#else
+  q31_t in1;
+#endif
+#endif // RISCV_MATH_DSP
+
+#if defined (RISCV_DSP64) || (__RISCV_XLEN == 64)
+  /* Loop unrolling: Compute 8 outputs at a time */
+  blkCnt = blockSize >> 3U;
+#else
+	/* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+#endif // RISCV_DSP64
+
+  while (blkCnt > 0U)
+  {
+    /* C = -A */
+
+#if defined (RISCV_MATH_DSP)
+    /* Negate and store result in destination buffer (4 samples at a time). */
+#if __RISCV_XLEN == 64
+    in1 = read_q7x8_ia ((q7_t **) &pSrc);
+    write_q7x8_ia (&pDst, __RV_KSUB8(0, in1));
+#else
+#ifdef RISCV_DSP64
+    in1 = read_q7x8_ia ((q7_t **) &pSrc);
+    write_q7x8_ia (&pDst, __DQSUB8(0, in1));
+#else
+    in1 = read_q7x4_ia ((q7_t **) &pSrc);
+    write_q7x4_ia (&pDst, __RV_KSUB8(0, in1));
+
+#endif
+#endif /* __RISCV_XLEN == 64 */
+#else
+    in = *pSrc++;
+    *pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
+
+    in = *pSrc++;
+    *pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
+
+    in = *pSrc++;
+    *pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
+
+    in = *pSrc++;
+    *pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
+
+#if defined (RISCV_DSP64) || (__RISCV_XLEN == 64)
+    in = *pSrc++;
+    *pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
+
+    in = *pSrc++;
+    *pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
+
+    in = *pSrc++;
+    *pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
+
+    in = *pSrc++;
+    *pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
+#endif // RISCV_DSP64
+
+#endif
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+#if defined (RISCV_DSP64) || (__RISCV_XLEN == 64)
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x8U;
+#else
+	/* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+#endif
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (RISCV_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = -A */
+
+    /* Negate and store result in destination buffer. */
+    in = *pSrc++;
+
+#if defined (RISCV_MATH_DSP)
+    *pDst++ = (q7_t) __QSUB8(0, in);
+#else
+    *pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
+#endif
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+#endif /* defined(RISCV_VECTOR) */
+}
+
+/**
+  @} end of BasicNegate group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_not_u16.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_not_u16.c
@ -0,0 +1,126 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_not_u16.c
+ * Description:  uint16_t bitwise NOT
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @defgroup Not Vector bitwise NOT
+
+  Compute the logical bitwise NOT.
+
+  There are separate functions for uint32_t, uint16_t, and uint8_t data types.
+ */
+
+/**
+  @addtogroup Not
+  @{
+ */
+
+/**
+  @brief         Compute the logical bitwise NOT of a fixed-point vector.
+  @param[in]     pSrc       points to input vector 
+  @param[out]    pDst       points to output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+
+void riscv_not_u16(
+    const uint16_t * pSrc,
+          uint16_t * pDst,
+          uint32_t blockSize)
+{
+#if defined(RISCV_VECTOR)
+  uint32_t blkCnt = blockSize;                               /* Loop counter */
+  size_t l;
+  vuint16m8_t vx;
+       
+  for (; (l = vsetvl_e16m8(blkCnt)) > 0; blkCnt -= l) {
+    vx = vle16_v_u16m8(pSrc, l);
+    pSrc += l;
+    vse16_v_u16m8(pDst,vnot_v_u16m8(vx, l), l);
+    pDst += l;
+  }
+#else
+    uint32_t blkCnt;      /* Loop counter */
+
+#if defined (RISCV_DSP64) || (__RISCV_XLEN == 64)
+
+    uint64_t * pSrc_temp = pSrc;
+    uint64_t * pDst_temp = pDst;
+    if(blkCnt = blockSize >> 2)
+    {
+        while (blkCnt > 0U)
+        {
+            *pDst_temp++ = ~(*pSrc_temp++);
+
+            /* Decrement the loop counter */
+            blkCnt--;
+        }
+    }
+    if(blkCnt = blockSize%4)
+    {
+        pSrc = (uint8_t * )(pSrc_temp-3);
+    }
+
+#else
+    uint32_t * pSrc_temp = pSrc;
+    uint32_t * pDst_temp = pDst;
+    if(blkCnt = blockSize >> 1)
+    {
+        while (blkCnt > 0U)
+        {
+            *pDst_temp++ = ~(*pSrc_temp++);
+
+            /* Decrement the loop counter */
+            blkCnt--;
+        }
+    }
+    if(blkCnt = blockSize%2)
+    {
+        pSrc = (uint8_t * )(pSrc_temp-1);
+    }
+#endif
+
+    while (blkCnt > 0U)
+    {
+        *pDst++ = ~(*pSrc++);
+
+        /* Decrement the loop counter */
+        blkCnt--;
+    }
+#endif /* defined(RISCV_VECTOR) */
+}
+
+/**
+  @} end of Not group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_not_u32.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_not_u32.c
@ -0,0 +1,102 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_not_u32.c
+ * Description:  uint32_t bitwise NOT
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup Not
+  @{
+ */
+
+/**
+  @brief         Compute the logical bitwise NOT of a fixed-point vector.
+  @param[in]     pSrc       points to input vector 
+  @param[out]    pDst       points to output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+
+void riscv_not_u32(
+    const uint32_t * pSrc,
+          uint32_t * pDst,
+          uint32_t blockSize)
+{
+#if defined(RISCV_VECTOR)
+  uint32_t blkCnt = blockSize;                               /* Loop counter */
+  size_t l;
+  vuint32m8_t vx;
+       
+  for (; (l = vsetvl_e32m8(blkCnt)) > 0; blkCnt -= l) {
+    vx = vle32_v_u32m8(pSrc, l);
+    pSrc += l;
+    vse32_v_u32m8(pDst,vnot_v_u32m8(vx, l), l);
+    pDst += l;
+  }
+#else
+    uint32_t blkCnt;      /* Loop counter */
+
+#if defined (RISCV_DSP64) || (__RISCV_XLEN == 64)
+
+    uint64_t * pSrc_temp = pSrc;
+    uint64_t * pDst_temp = pDst;
+    if(blkCnt = blockSize >> 1)
+    {
+        while (blkCnt > 0U)
+        {
+            *pDst_temp++ = ~(*pSrc_temp++);
+
+            /* Decrement the loop counter */
+            blkCnt--;
+        }
+    }
+    if(blkCnt = blockSize%2)
+    {
+        pSrc = (uint8_t * )(pSrc_temp-1);
+    }
+#else
+    /* Initialize blkCnt with number of samples */
+    blkCnt = blockSize;
+#endif
+    while (blkCnt > 0U)
+    {
+        *pDst++ = ~(*pSrc++);
+
+        /* Decrement the loop counter */
+        blkCnt--;
+    }
+#endif /* defined(RISCV_VECTOR) */
+}
+
+/**
+  @} end of Not group
+ */
--- a/components/nmsis/dsp/src/BasicMathFunctions/riscv_not_u8.c
+++ b/components/nmsis/dsp/src/BasicMathFunctions/riscv_not_u8.c
@ -0,0 +1,118 @@
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS DSP Library
+ * Title:        riscv_not_u8.c
+ * Description:  uint8_t bitwise NOT
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: RISC-V Cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup Not
+  @{
+ */
+
+/**
+  @brief         Compute the logical bitwise NOT of a fixed-point vector.
+  @param[in]     pSrc       points to input vector 
+  @param[out]    pDst       points to output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+
+void riscv_not_u8(
+    const uint8_t * pSrc,
+          uint8_t * pDst,
+          uint32_t blockSize)
+{
+#if defined(RISCV_VECTOR)
+  uint32_t blkCnt = blockSize;                               /* Loop counter */
+  size_t l;
+  vuint8m8_t vx;
+       
+  for (; (l = vsetvl_e8m8(blkCnt)) > 0; blkCnt -= l) {
+    vx = vle8_v_u8m8(pSrc, l);
+    pSrc += l;
+    vse8_v_u8m8(pDst,vnot_v_u8m8(vx, l), l);
+    pDst += l;
+  }
+#else
+    uint32_t blkCnt;      /* Loop counter */
+
+#if defined (RISCV_DSP64) || (__RISCV_XLEN == 64)
+
+    uint64_t * pSrc_temp = pSrc;
+    uint64_t * pDst_temp = pDst;
+    if(blkCnt = blockSize >> 3)
+    {
+        while (blkCnt > 0U)
+        {
+            *pDst_temp++ = ~(*pSrc_temp++);
+
+            /* Decrement the loop counter */
+            blkCnt--;
+        }
+    }
+    if(blkCnt = blockSize%8)
+    {
+        pSrc = (uint8_t * )(pSrc_temp-7);
+    }
+
+#else
+    uint32_t * pSrc_temp = pSrc;
+    uint32_t * pDst_temp = pDst;
+    if(blkCnt = blockSize >> 2)
+    {
+        while (blkCnt > 0U)
+        {
+            *pDst_temp++ = ~(*pSrc_temp++);
+
+            /* Decrement the loop counter */
+            blkCnt--;
+        }
+    }
+    if(blkCnt = blockSize%4)
+    {
+        pSrc = (uint8_t * )(pSrc_temp-3);
+    }
+#endif /*defined (RISCV_DSP64) || (__RISCV_XLEN == 64)*/
+
+    while (blkCnt > 0U)
+    {
+        *pDst++ = ~(*pSrc++);
+
+        /* Decrement the loop counter */
+        blkCnt--;
+    }
+#endif /* defined(RISCV_VECTOR) */
+}
+
+/**
+  @} end of Not group
+ */
--- a/Show More
+++ b/Show More