diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Add/aidge_add.hpp b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Add/aidge_add.hpp new file mode 100644 index 0000000000000000000000000000000000000000..57cbc6fd386b5b5c7b71c7ca6942230444eb3d55 --- /dev/null +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Add/aidge_add.hpp @@ -0,0 +1,8 @@ +template <unsigned int SIZE, typename T> +__attribute__((always_inline)) inline static +void aidge_add(T* __restrict input_a, T* __restrict input_b, T* __restrict output) { + for (unsigned int i = 0; i < SIZE; ++i) { + // Note : no cast to get compiler warning if we lose precision during auto cast! + output[i] = input_a[i] + input_b[i]; + } +} \ No newline at end of file diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Atan/aidge_atan.hpp b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Atan/aidge_atan.hpp index d4da329a44eb8669bc0469a31beeca670b63e6c3..cbbe4e3ccb5ec4ee8483cdf8cdef2042ad6b0446 100644 --- a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Atan/aidge_atan.hpp +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Atan/aidge_atan.hpp @@ -1,8 +1,8 @@ #include <cmath> -template <unsigned int SIZE, typename Input_T, typename Output_T> +template <unsigned int SIZE, typename T> __attribute__((always_inline)) inline static -void aidge_atan(Input_T* __restrict input, Output_T* __restrict output) { +void aidge_atan(T* __restrict input, T* __restrict output) { for (unsigned int i = 0; i < SIZE; ++i) { // Note : no cast to get compiler warning if we lose precision during auto cast! output[i] = std::atan(input[i]); diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/BatchNorm/aidge_batchnorm.hpp b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/BatchNorm/aidge_batchnorm.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9accb6072bd2c09ee97cfe23aa3d906764ef9cf2 --- /dev/null +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/BatchNorm/aidge_batchnorm.hpp @@ -0,0 +1,36 @@ +#include <cmath> + +template < + typename T, + typename MeanVar_T, + typename ScaleBias_T, + typename SpatialDims_T, + unsigned int NB_Channels, + unsigned int NB_SpatialDims + > +__attribute__((always_inline)) inline static +void aidge_batchnorm(T* __restrict inputs, + T* __restrict outputs, + MeanVar_T* __restrict input_mean, + MeanVar_T* __restrict input_var, + ScaleBias_T* __restrict scale, + ScaleBias_T* __restrict bias, + SpatialDims_T* __restrict spatial_dims, + float epsilon) +{ + int featureMapSize = 1; + for (int index = 0; index < NB_SpatialDims; ++index){ + featureMapSize *= spatial_dims[index]; + } + for (int current_channel = 0; current_channel < NB_Channels; ++current_channel){ + int ioIndex = current_channel * featureMapSize; + + for (int index = ioIndex; index < (ioIndex + featureMapSize); index++ ){ + outputs[index] = bias[current_channel]; + } + float var = sqrt(input_var[current_channel] + epsilon); + for (int current_feature = 0; current_feature < featureMapSize; ++current_feature){ + outputs[ioIndex + current_feature] += scale[current_channel] * (inputs[ioIndex + current_feature] - input_mean[current_channel]) / var; + } + } +} \ No newline at end of file diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/BatchNorm/aidge_batchnorm2d_chw_float32.c b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/BatchNorm/aidge_batchnorm2d_chw_float32.hpp similarity index 100% rename from aidge_export_arm_cortexm/_Aidge_Arm/kernels/BatchNorm/aidge_batchnorm2d_chw_float32.c rename to aidge_export_arm_cortexm/_Aidge_Arm/kernels/BatchNorm/aidge_batchnorm2d_chw_float32.hpp diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Div/aidge_div.hpp b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Div/aidge_div.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3fa4772d5af824b491cc6307e1dcf6ad58af5083 --- /dev/null +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Div/aidge_div.hpp @@ -0,0 +1,9 @@ +template <unsigned int SIZE, typename T> +__attribute__((always_inline)) inline static +void aidge_div(T* __restrict input_a, T* __restrict input_b, T* __restrict output) { + for (unsigned int i = 0; i < SIZE; ++i) { + // Note : no cast to get compiler warning if we lose precision during auto cast! + // [TODO] : input_b[i] = 0 + output[i] = input_a[i] / input_b[i]; + } +} \ No newline at end of file diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/MatMul/aidge_matmul.hpp b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/MatMul/aidge_matmul.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2a1935661a2a643e7c1cd0d2d2e80ab0670c62ca --- /dev/null +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/MatMul/aidge_matmul.hpp @@ -0,0 +1,95 @@ +template < + typename T, + typename Dim_T, + typename Size_T + > +__attribute__((always_inline)) inline static +void aidge_matmul(T* __restrict input_a, + T* __restrict input_b, + T* __restrict output, + Dim_T* __restrict dim_a, + Dim_T* __restrict dim_b, + Dim_T* __restrict dim_output, + Size_T __restrict size_aDim, + Size_T __restrict size_bDim, + Size_T __restrict size_outputDim) +{ + int ndim_a[size_outputDim]; + int ndim_b[size_outputDim]; + if (size_aDim == 1) { + ndim_a[0] = 1; + ndim_a[1] = dim_a[0]; + } + if (size_bDim == 1) { + ndim_b[0] = dim_b[0]; + ndim_b[1] = 1; + } + for (int i = 0; i < size_outputDim; ++i) { + int idx = size_outputDim - size_aDim; + ndim_a[i] = (i < idx) ? 1 : dim_a[i - idx]; + } + for (int i = 0; i < size_outputDim; ++i) { + int idx = size_outputDim - size_bDim; + ndim_b[i] = (i < idx) ? 1 : dim_b[i - idx]; + } + + int stride_post0[size_outputDim - 2]; + int stride_post1[size_outputDim - 2]; + int stride_step0[size_outputDim - 2]; + int stride_step1[size_outputDim - 2]; + + if (size_outputDim > 2) { + stride_post0[size_outputDim - 3] = 1; + stride_post1[size_outputDim - 3] = 1; + for (int i = size_outputDim - 4; i != -1; --i) { + stride_post0[i] = stride_post0[i + 1] * ndim_a[i + 1]; + stride_post1[i] = stride_post1[i + 1] * ndim_b[i + 1]; + } + for (int i = 0; i < size_outputDim - 2; ++i) { + stride_step0[i] = (ndim_a[i] == 1) ? 1 - stride_post0[i] : 1; + stride_step1[i] = (ndim_b[i] == 1) ? 1 - stride_post1[i] : 1; + } + } + + int nbMatrices = 1; + for (int i = size_outputDim - 3; i >= 0; --i) { + nbMatrices *= dim_output[i]; + } + int dim = size_outputDim - 3; + + int offsetIn0 = 0; + int offsetIn1 = 0; + int offsetOut = 0; + const int n = ndim_a[size_outputDim - 2]; + const int k = ndim_a[size_outputDim - 1]; + const int m = ndim_b[size_outputDim - 1]; + const int matrix0Size = n * k; + const int matrix1Size = k * m; + const int matrixOutSize = n * m; + + for(int stack = 0; stack < nbMatrices;){ + for (int i = 0; i < n; ++i) { + for (int j = 0; j < m; ++j) { + float sum = 0; + for (int l = 0; l < k; ++l) { + sum += (input_a[ offsetIn0*matrix0Size + i*k + l] * input_b[offsetIn1*matrix1Size + l*m + j]); + } + output[ offsetOut*matrixOutSize + i*m + j] = sum; + } + } + + if (++stack < nbMatrices) { + int tmp_stack = stack; + while(tmp_stack % dim_output[dim] == 0) { + tmp_stack /= dim_output[dim]; + dim--; + } + offsetIn0 += stride_step0[dim]; + offsetIn1 += stride_step1[dim]; + ++offsetOut; + dim = size_outputDim -3; + } + + } + +} \ No newline at end of file diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Mul/aidge_mul.hpp b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Mul/aidge_mul.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7f522b94e38ca7c4b71399fc718fec932a720d3b --- /dev/null +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Mul/aidge_mul.hpp @@ -0,0 +1,8 @@ +template <unsigned int SIZE, typename T> +__attribute__((always_inline)) inline static +void aidge_mul(T* __restrict input_a, T* __restrict input_b, T* __restrict output) { + for (unsigned int i = 0; i < SIZE; ++i) { + // Note : no cast to get compiler warning if we lose precision during auto cast! + output[i] = input_a[i] * input_b[i]; + } +} \ No newline at end of file diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Relu/aidge_relu.hpp b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Relu/aidge_relu.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9b84a33b5484a8e294737d38f861b4f62d69995a --- /dev/null +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Relu/aidge_relu.hpp @@ -0,0 +1,8 @@ + +template <unsigned int SIZE, typename T> +__attribute__((always_inline)) inline static +void aidge_relu(T* __restrict input, T* __restrict output) { + for (unsigned int i = 0; i < SIZE; ++i) { + output[i] = (input[i] < 0.0f) ? 0.0f : input[i]; + } +} \ No newline at end of file diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Reshape/aidge_reshape.hpp b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Reshape/aidge_reshape.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8dfc52d42dc35650feb220c10dc8576439a64722 --- /dev/null +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Reshape/aidge_reshape.hpp @@ -0,0 +1,7 @@ +template <unsigned int SIZE, typename T> +__attribute__((always_inline)) inline static +void aidge_reshape(T* __restrict input, T* __restrict output) { + for (unsigned int i = 0; i < SIZE; ++i) { + output[i] = input[i]; + } +} \ No newline at end of file diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Reshape/aidge_reshape_chw_float32.c b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Reshape/aidge_reshape_chw_float32.h similarity index 100% rename from aidge_export_arm_cortexm/_Aidge_Arm/kernels/Reshape/aidge_reshape_chw_float32.c rename to aidge_export_arm_cortexm/_Aidge_Arm/kernels/Reshape/aidge_reshape_chw_float32.h diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Sigmoid/aidge_sigmoid.hpp b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Sigmoid/aidge_sigmoid.hpp new file mode 100644 index 0000000000000000000000000000000000000000..eb287dd5d7fcf838d9f6d6b1091804abdb9794a3 --- /dev/null +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Sigmoid/aidge_sigmoid.hpp @@ -0,0 +1,10 @@ +#include <math.h> + +template <unsigned int SIZE, typename T> +__attribute__((always_inline)) inline static +void aidge_sigmoid(T* __restrict inputs, T* __restrict outputs) { + for (unsigned int i = 0; i < SIZE; ++i) { + // Note : no cast to get compiler warning if we lose precision during auto cast! + outputs[i] = 1 / (1 + exp(-inputs[i]) ); + } +} \ No newline at end of file diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Softmax/aidge_softmax.hpp b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Softmax/aidge_softmax.hpp new file mode 100644 index 0000000000000000000000000000000000000000..00db1f22dfc18ee8c7b9ef8b93fa9c176faac9ac --- /dev/null +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Softmax/aidge_softmax.hpp @@ -0,0 +1,48 @@ +#include <math.h> + +#define MAX_DIMS_AXIS_SIZE 128 /** TODO : is 128 enough or to big ? | Other possibility is to use a shared buffer as param, but this could have a side effect on Aidge's overall mechanics **/ +float exps[MAX_DIMS_AXIS_SIZE]; + +template < + typename T, + typename Dim_T, + typename Size_T + > +__attribute__((always_inline)) inline static +void aidge_softmax(T* __restrict input, + T* __restrict output, + Dim_T* __restrict dims, + Size_T __restrict dim_size, + int axis) +{ + axis += (axis >= 0 ) ? 0 : dim_size; + + int postAxisElems = 1; + for (unsigned int index = axis+1; index < dim_size; ++index) { + postAxisElems *= dims[index]; + } + int preAxisElems = 1; + for (int index = 0; index < axis; ++index) { + preAxisElems *= dims[index]; + } + + + for (int i = 0; i < preAxisElems; ++i) { + for (int j = 0; j < postAxisElems; ++j) { + float sumExp = 0.0; + + int baseIdx = i * dims[axis] * postAxisElems + j; + + for (int k = 0; k < dims[axis]; ++k) { + int inIdx = baseIdx + k * postAxisElems; + exps[k] = exp(input[inIdx]); + sumExp += exps[k]; + } + + for (int k = 0; k < dims[axis]; ++k) { + int inIdx = baseIdx + k * postAxisElems; + output[inIdx] = exps[k] / sumExp; + } + } + } +} \ No newline at end of file diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Sub/aidge_sub.hpp b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Sub/aidge_sub.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8bbc194c5e4c430360f6058491ec752d18abe863 --- /dev/null +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Sub/aidge_sub.hpp @@ -0,0 +1,8 @@ +template <unsigned int SIZE, typename T> +__attribute__((always_inline)) inline static +void aidge_sub(T* __restrict input_a, T* __restrict input_b, T* __restrict output) { + for (unsigned int i = 0; i < SIZE; ++i) { + // Note : no cast to get compiler warning if we lose precision during auto cast! + output[i] = input_a[i] - input_b[i]; + } +} \ No newline at end of file