diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Concat/aidge_concat_float32.hpp b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Concat/aidge_concat_float32.hpp index 6faa94de4ae0c87e50d94b2ac6a3790937490412..305b2d3e078e27b135869c66aeb102f45eb1f41b 100644 --- a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Concat/aidge_concat_float32.hpp +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Concat/aidge_concat_float32.hpp @@ -1,92 +1,16 @@ -#include <stdarg.h> - -void aidge_concat2_float32 (unsigned int axis, - float* input1, - unsigned int size1, - float* input2, - unsigned int size2, - float* output) +template<typename T, unsigned int NB_INPUTS> +__attribute__((always_inline)) inline static +void aidge_concat( + const unsigned int axis, + const T* const * __restrict inputs, + const unsigned int* __restrict sizes, + T* __restrict output) { - for (unsigned int i = 0; i < size1; ++i) { - output[i] = input1[i]; - } - for (unsigned int i = 0; i < size2; ++i) { - output[i + size1] = input2[i]; - } -} - -void aidge_concat3_float32 (unsigned int axis, - float* input1, - unsigned int size1, - float* input2, - unsigned int size2, - float* input3, - unsigned int size3, - float* output) -{ - for (unsigned int i = 0; i < size1; ++i) { - output[i] = input1[i]; - } - for (unsigned int i = 0; i < size2; ++i) { - output[i + size1] = input2[i]; - } - for (unsigned int i = 0; i < size3; ++i) { - output[i + size1 + size2] = input3[i]; - } -} - -void aidge_concat4_float32 (unsigned int axis, - float* input1, - unsigned int size1, - float* input2, - unsigned int size2, - float* input3, - unsigned int size3, - float* input4, - unsigned int size4, - float* output) -{ - for (unsigned int i = 0; i < size1; ++i) { - output[i] = input1[i]; - } - for (unsigned int i = 0; i < size2; ++i) { - output[i + size1] = input2[i]; - } - for (unsigned int i = 0; i < size3; ++i) { - output[i + size1 + size2] = input3[i]; - } - for (unsigned int i = 0; i < size4; ++i) { - output[i + size1 + size2 + size3] = input4[i]; - } -} - -void aidge_concat5_float32 (unsigned int axis, - float* input1, - unsigned int size1, - float* input2, - unsigned int size2, - float* input3, - unsigned int size3, - float* input4, - unsigned int size4, - float* input5, - unsigned int size5, - float* output) -{ - for (unsigned int i = 0; i < size1; ++i) { - output[i] = input1[i]; - } - for (unsigned int i = 0; i < size2; ++i) { - output[i + size1] = input2[i]; - } - for (unsigned int i = 0; i < size3; ++i) { - output[i + size1 + size2] = input3[i]; - } - for (unsigned int i = 0; i < size4; ++i) { - output[i + size1 + size2 + size3] = input4[i]; - } - for (unsigned int i = 0; i < size5; ++i) { - output[i + size1 + size2 + size3 + size4] = input5[i]; + unsigned int offset = 0; + for (unsigned int n = 0; n < NB_INPUTS; ++n) { + for (unsigned int i = 0; i < sizes[n]; ++i) { + output[offset + i] = inputs[n][i]; + } + offset += sizes[n]; } } - diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/templates/forward_call/concat.jinja b/aidge_export_arm_cortexm/_Aidge_Arm/templates/forward_call/concat.jinja index 01474c3286e89aac09a424b6c855c9b6e3a11fd6..82f6106ce1ca5bac47cba48ceb66814a651d029a 100644 --- a/aidge_export_arm_cortexm/_Aidge_Arm/templates/forward_call/concat.jinja +++ b/aidge_export_arm_cortexm/_Aidge_Arm/templates/forward_call/concat.jinja @@ -2,9 +2,22 @@ {{out_cdtype[0]}}* {{out_name[0]}} = ({{out_cdtype[0]}}*) mem + {{out_name[0]|upper}}_OFFSET; {% endif %} -aidge_concat{{ nb_in }}_float32 ( - {{name|upper}}_AXIS, +float* {{ name|upper }}_INPUTS[] = { + {%- for i in range(nb_in) -%} + {{ in_name[i] }}{{ ", " if not loop.last else "" }} + {%- endfor -%} +}; + +unsigned int {{ name|upper }}_SIZES[] = { {%- for i in range(nb_in) -%} - {{ in_name[i]}}, {{ name|upper }}_INPUT_{{i}}_SIZE, + {{ name|upper }}_INPUT_{{i}}_SIZE{{ ", " if not loop.last else "" }} {%- endfor -%} +}; + +aidge_concat<float, {{ nb_in }}> ( + {{name|upper}}_AXIS, + {{ name|upper }}_INPUTS, + {{ name|upper }}_SIZES, {{ out_name[0] }}); + +