diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Add/aidge_add.h b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Add/aidge_add.h new file mode 100644 index 0000000000000000000000000000000000000000..9d3c1a95d4eb2361120cff884d8d16e49045ef90 --- /dev/null +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Add/aidge_add.h @@ -0,0 +1,8 @@ +template <unsigned int SIZE, typename Input_T, typename Output_T> +__attribute__((always_inline)) inline static +void aidge_add(Input_T* __restrict input_a, Input_T* __restrict input_b, Output_T* __restrict output) { + for (unsigned int i = 0; i < SIZE; ++i) { + // Note : no cast to get compiler warning if we lose precision during auto cast! + output[i] = input_a[i] + input_b[i]; + } +} \ No newline at end of file diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Atan/aidge_atan.hpp b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Atan/aidge_atan.h similarity index 100% rename from aidge_export_arm_cortexm/_Aidge_Arm/kernels/Atan/aidge_atan.hpp rename to aidge_export_arm_cortexm/_Aidge_Arm/kernels/Atan/aidge_atan.h diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/BatchNorm/aidge_batchNorm.h b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/BatchNorm/aidge_batchNorm.h new file mode 100644 index 0000000000000000000000000000000000000000..c6cf9f75d19d4eb153345ac034115eb98edf78a3 --- /dev/null +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/BatchNorm/aidge_batchNorm.h @@ -0,0 +1,37 @@ +#include <cmath> + +template < + typename Input_T, + typename Output_T, + typename MeanVar_T, + typename ScaleBias_T, + typename SpatialDims_T, + unsigned int NB_Channels, + unsigned int NB_SpatialDims + > +__attribute__((always_inline)) inline static +void aidge_batchnorm(Input_T* __restrict inputs, + Output_T* __restrict outputs, + MeanVar_T* __restrict input_mean, + MeanVar_T* __restrict input_var, + ScaleBias_T* __restrict scale, + ScaleBias_T* __restrict bias, + SpatialDims_T* __restrict spatial_dims, + float epsilon) +{ + int featureMapSize = 1; + for (int index = 0; index < NB_SpatialDims; ++index){ + featureMapSize *= spatial_dims[index]; + } + for (int current_channel = 0; current_channel < NB_Channels; ++current_channel){ + int ioIndex = current_channel * featureMapSize; + + for (int index = ioIndex; index < (ioIndex + featureMapSize); index++ ){ + outputs[index] = bias[current_channel]; + } + float var = sqrt(input_var[current_channel] + epsilon); + for (int current_feature = 0; current_feature < featureMapSize; ++current_feature){ + outputs[ioIndex + current_feature] += scale[current_channel] * (inputs[ioIndex + current_feature] - input_mean[current_channel]) / var; + } + } +} \ No newline at end of file diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/BatchNorm/aidge_batchnorm2d_chw_float32.c b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/BatchNorm/aidge_batchnorm2d_chw_float32.h similarity index 100% rename from aidge_export_arm_cortexm/_Aidge_Arm/kernels/BatchNorm/aidge_batchnorm2d_chw_float32.c rename to aidge_export_arm_cortexm/_Aidge_Arm/kernels/BatchNorm/aidge_batchnorm2d_chw_float32.h diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Broadcast/aidge_broadcast.h b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Broadcast/aidge_broadcast.h new file mode 100644 index 0000000000000000000000000000000000000000..dd76ae8e8a4f318b63092f1643b6da27835aa5b7 --- /dev/null +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Broadcast/aidge_broadcast.h @@ -0,0 +1,77 @@ +/* + @inputs Tensor values to broadcast + @output Tensor values to return + @input Tab of input dimension + @input Tab of output dimension + @input Size of input tab dim + @input Size of output tab dim + @input Count of output data needed +*/ +#include <iostream> +#include <vector> +#include <stdexcept> + +void broadcast( + const float* inputs, + float* output, + const int dim_input[], + const int dim_output[], + int nb_dimInput, + int nb_dimOutput, + int input_size, + int output_size) { + + std::vector<int> stride_input(nb_dimInput, 1); + for (int i = nb_dimInput - 2; i >= 0; --i) { + stride_input[i] = stride_input[i + 1] * dim_input[i + 1]; + } + + for (int i = 0; i < output_size; ++i) { + int idx_input = 0; + int idx_output = i; + + for (int d = nb_dimOutput - 1, d_in = nb_dimInput - 1; d >= 0; --d) { + int coord_out = idx_output % dim_output[d]; + idx_output /= dim_output[d]; + + int coord_in = (d_in >= 0 && dim_input[d_in] == dim_output[d]) ? coord_out : 0; + + if (d_in >= 0) { + idx_input += coord_in * stride_input[d_in]; + --d_in; + } + } + + if (idx_input < input_size) { + output[i] = inputs[idx_input]; + } else { + throw std::out_of_range("Index out of range in input tensor."); + } + } +} + +bool should_broadcast( + const float* tensor_a, const float* tensor_b, + const int dim_a[], const int dim_b[], const int dim_output[], + int nb_dimA, int nb_dimB, int nb_dimOutput, + int size_a, int size_b) { + + bool need_broadcast_a = false; + bool need_broadcast_b = false; + + for (int i = 0; i < nb_dimOutput; ++i) { + int dim_a_idx = (i >= nb_dimOutput - nb_dimA) ? i - (nb_dimOutput - nb_dimA) : -1; + int dim_b_idx = (i >= nb_dimOutput - nb_dimB) ? i - (nb_dimOutput - nb_dimB) : -1; + + int dim_a_val = (dim_a_idx >= 0) ? dim_a[dim_a_idx] : 1; + int dim_b_val = (dim_b_idx >= 0) ? dim_b[dim_b_idx] : 1; + + if (dim_a_val != dim_output[i]) { + if (dim_a_val != 1 && dim_a_val != dim_output[i]) { + throw std::invalid_argument("Tensor A cannot be broadcasted to output dimensions."); + } + return true; + } + } + return false; +} diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Concat/aidge_concat_float32.hpp b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Concat/aidge_concat.h similarity index 100% rename from aidge_export_arm_cortexm/_Aidge_Arm/kernels/Concat/aidge_concat_float32.hpp rename to aidge_export_arm_cortexm/_Aidge_Arm/kernels/Concat/aidge_concat.h diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Div/aidge_div.h b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Div/aidge_div.h new file mode 100644 index 0000000000000000000000000000000000000000..fafa29834d14e61bdcc6431e6e3f248efcc4bfdd --- /dev/null +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Div/aidge_div.h @@ -0,0 +1,9 @@ +template <unsigned int SIZE, typename Input_T, typename Output_T> +__attribute__((always_inline)) inline static +void aidge_div(Input_T* __restrict input_a, Input_T* __restrict input_b, Output_T* __restrict output) { + for (unsigned int i = 0; i < SIZE; ++i) { + // Note : no cast to get compiler warning if we lose precision during auto cast! + // [TODO] : input_b[i] = 0 + output[i] = input_a[i] / input_b[i]; + } +} \ No newline at end of file diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Mul/aidge_mul.h b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Mul/aidge_mul.h new file mode 100644 index 0000000000000000000000000000000000000000..b18c2ab4b6a9f43d399d3cb0b7a0bff08a366c50 --- /dev/null +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Mul/aidge_mul.h @@ -0,0 +1,8 @@ +template <unsigned int SIZE, typename Input_T, typename Output_T> +__attribute__((always_inline)) inline static +void aidge_mul(Input_T* __restrict input_a, Input_T* __restrict input_b, Output_T* __restrict output) { + for (unsigned int i = 0; i < SIZE; ++i) { + // Note : no cast to get compiler warning if we lose precision during auto cast! + output[i] = input_a[i] * input_b[i]; + } +} \ No newline at end of file diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Relu/aidge_elu.h b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Relu/aidge_elu.h new file mode 100644 index 0000000000000000000000000000000000000000..f55c5afe301009f5d1cf67e3f30017232c03087e --- /dev/null +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Relu/aidge_elu.h @@ -0,0 +1,7 @@ +template <unsigned int SIZE, typename Input_T, typename Output_T> +__attribute__((always_inline)) inline static +void aidge_relu(Input_T* __restrict input, Output_T* __restrict output) { + for (unsigned int i = 0; i < SIZE; ++i) { + output[i] = (input[i] < 0.0f) ? 0.0f : input[i]; + } +} \ No newline at end of file diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Reshape/aidge_reshape.h b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Reshape/aidge_reshape.h new file mode 100644 index 0000000000000000000000000000000000000000..95465aba70e86afa1e087193d2ee63536934a68a --- /dev/null +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Reshape/aidge_reshape.h @@ -0,0 +1,7 @@ +template <unsigned int SIZE, typename Input_T, typename Output_T> +__attribute__((always_inline)) inline static +void aidge_reshape(Input_T* __restrict input, Output_T* __restrict output) { + for (unsigned int i = 0; i < SIZE; ++i) { + output[i] = input[i]; + } +} \ No newline at end of file diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Reshape/aidge_reshape_chw_float32.c b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Reshape/aidge_reshape_chw_float32.h similarity index 100% rename from aidge_export_arm_cortexm/_Aidge_Arm/kernels/Reshape/aidge_reshape_chw_float32.c rename to aidge_export_arm_cortexm/_Aidge_Arm/kernels/Reshape/aidge_reshape_chw_float32.h diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Sigmoid/aidge_sigmoid.h b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Sigmoid/aidge_sigmoid.h new file mode 100644 index 0000000000000000000000000000000000000000..e1f3f5a571047bc3276e3312f4aa8809e65ac93f --- /dev/null +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Sigmoid/aidge_sigmoid.h @@ -0,0 +1,10 @@ +#include <math.h> + +template <unsigned int SIZE, typename Input_T, typename Output_T> +__attribute__((always_inline)) inline static +void aidge_sigmoid(Input_T* __restrict input_a, Input_T* __restrict input_b, Output_T* __restrict output) { + for (unsigned int i = 0; i < SIZE; ++i) { + // Note : no cast to get compiler warning if we lose precision during auto cast! + output[i] = 1 / (1 + exp(-inputs[i]) ); + } +} \ No newline at end of file diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Sub/aidge_sub.h b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Sub/aidge_sub.h new file mode 100644 index 0000000000000000000000000000000000000000..5e9954d519216d1d43bb13c1d8f0e4eb1370bc74 --- /dev/null +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Sub/aidge_sub.h @@ -0,0 +1,8 @@ +template <unsigned int SIZE, typename Input_T, typename Output_T> +__attribute__((always_inline)) inline static +void aidge_sub(Input_T* __restrict input_a, Input_T* __restrict input_b, Output_T* __restrict output) { + for (unsigned int i = 0; i < SIZE; ++i) { + // Note : no cast to get compiler warning if we lose precision during auto cast! + output[i] = input_a[i] - input_b[i]; + } +} \ No newline at end of file