diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Concat/aidge_concat_float32.hpp b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Concat/aidge_concat_float32.hpp
index 6faa94de4ae0c87e50d94b2ac6a3790937490412..305b2d3e078e27b135869c66aeb102f45eb1f41b 100644
--- a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Concat/aidge_concat_float32.hpp
+++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Concat/aidge_concat_float32.hpp
@@ -1,92 +1,16 @@
-#include <stdarg.h>
-
-void aidge_concat2_float32 (unsigned int axis,
-                            float* input1,
-                            unsigned int size1,
-                            float* input2,
-                            unsigned int size2,
-                            float* output)
+template<typename T, unsigned int NB_INPUTS>
+__attribute__((always_inline)) inline static
+void aidge_concat(
+    const unsigned int axis,
+    const T* const * __restrict inputs,
+    const unsigned int* __restrict sizes,
+    T* __restrict output)
 {
-    for (unsigned int i = 0; i < size1; ++i) {
-        output[i] = input1[i];
-    }
-    for (unsigned int i = 0; i < size2; ++i) {
-        output[i + size1] = input2[i];
-    }
-}
-
-void aidge_concat3_float32 (unsigned int axis,
-                           float* input1,
-                           unsigned int size1,
-                           float* input2,
-                           unsigned int size2,
-                           float* input3,
-                           unsigned int size3,
-                           float* output)
-{
-    for (unsigned int i = 0; i < size1; ++i) {
-        output[i] = input1[i];
-    }
-    for (unsigned int i = 0; i < size2; ++i) {
-        output[i + size1] = input2[i];
-    }
-    for (unsigned int i = 0; i < size3; ++i) {
-        output[i + size1 + size2] = input3[i];
-    }
-}
-
-void aidge_concat4_float32 (unsigned int axis,
-                           float* input1,
-                           unsigned int size1,
-                           float* input2,
-                           unsigned int size2,
-                           float* input3,
-                           unsigned int size3,
-                           float* input4,
-                           unsigned int size4,
-                           float* output)
-{
-    for (unsigned int i = 0; i < size1; ++i) {
-        output[i] = input1[i];
-    }
-    for (unsigned int i = 0; i < size2; ++i) {
-        output[i + size1] = input2[i];
-    }
-    for (unsigned int i = 0; i < size3; ++i) {
-        output[i + size1 + size2] = input3[i];
-    }
-    for (unsigned int i = 0; i < size4; ++i) {
-        output[i + size1 + size2 + size3] = input4[i];
-    }
-}
-
-void aidge_concat5_float32 (unsigned int axis,
-                           float* input1,
-                           unsigned int size1,
-                           float* input2,
-                           unsigned int size2,
-                           float* input3,
-                           unsigned int size3,
-                           float* input4,
-                           unsigned int size4,
-                           float* input5,
-                           unsigned int size5,
-                           float* output)
-{
-    for (unsigned int i = 0; i < size1; ++i) {
-        output[i] = input1[i];
-    }
-    for (unsigned int i = 0; i < size2; ++i) {
-        output[i + size1] = input2[i];
-    }
-    for (unsigned int i = 0; i < size3; ++i) {
-        output[i + size1 + size2] = input3[i];
-    }
-    for (unsigned int i = 0; i < size4; ++i) {
-        output[i + size1 + size2 + size3] = input4[i];
-    }
-    for (unsigned int i = 0; i < size5; ++i) {
-        output[i + size1 + size2 + size3 + size4] = input5[i];
+    unsigned int offset = 0;
+    for (unsigned int n = 0; n < NB_INPUTS; ++n) {
+        for (unsigned int i = 0; i < sizes[n]; ++i) {
+            output[offset + i] = inputs[n][i];
+        }
+        offset += sizes[n];
     }
 }
-
diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/templates/forward_call/concat.jinja b/aidge_export_arm_cortexm/_Aidge_Arm/templates/forward_call/concat.jinja
index 01474c3286e89aac09a424b6c855c9b6e3a11fd6..82f6106ce1ca5bac47cba48ceb66814a651d029a 100644
--- a/aidge_export_arm_cortexm/_Aidge_Arm/templates/forward_call/concat.jinja
+++ b/aidge_export_arm_cortexm/_Aidge_Arm/templates/forward_call/concat.jinja
@@ -2,9 +2,22 @@
 {{out_cdtype[0]}}* {{out_name[0]}} = ({{out_cdtype[0]}}*) mem + {{out_name[0]|upper}}_OFFSET;
 {% endif %}
 
-aidge_concat{{ nb_in }}_float32 (
-    {{name|upper}}_AXIS,
+float* {{ name|upper }}_INPUTS[] = {
+    {%- for i in range(nb_in) -%}
+        {{ in_name[i] }}{{ ", " if not loop.last else "" }}
+    {%- endfor -%}
+};
+
+unsigned int {{ name|upper }}_SIZES[] = {
     {%- for i in range(nb_in) -%}
-     {{ in_name[i]}}, {{ name|upper }}_INPUT_{{i}}_SIZE,
+        {{ name|upper }}_INPUT_{{i}}_SIZE{{ ", " if not loop.last else "" }}
     {%- endfor -%}
+};
+
+aidge_concat<float, {{ nb_in }}> (
+    {{name|upper}}_AXIS,
+    {{ name|upper }}_INPUTS,
+    {{ name|upper }}_SIZES,
     {{ out_name[0] }});
+
+