From 504b2db05978d2c6c9716b1059f7ada6dfc2cb88 Mon Sep 17 00:00:00 2001
From: Gallasko <gallasko@gmail.com>
Date: Thu, 6 Mar 2025 11:35:46 +0100
Subject: [PATCH 01/14] feat: Added Softmax export op

---
 .gitignore                                    |  3 +
 aidge_export_cpp/kernels/softmax.hpp          | 55 +++++++++++++++++++
 aidge_export_cpp/operators.py                 | 15 +++++
 .../configuration/softmax_config.jinja        | 12 ++++
 .../kernel_forward/softmax_forward.jinja      | 12 ++++
 aidge_export_cpp/unit_tests/test_export.py    |  8 +++
 6 files changed, 105 insertions(+)
 create mode 100644 aidge_export_cpp/kernels/softmax.hpp
 create mode 100644 aidge_export_cpp/templates/configuration/softmax_config.jinja
 create mode 100644 aidge_export_cpp/templates/kernel_forward/softmax_forward.jinja

diff --git a/.gitignore b/.gitignore
index 67ffbef..93bcfd3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,6 +14,9 @@ dist*/
 aidge_export_cpp/_version.py
 wheelhouse/*
 
+# Temp test folders
+aidge_export_cpp/unit_tests/*_temp_test
+
 # Mermaid
 *.mmd
 
diff --git a/aidge_export_cpp/kernels/softmax.hpp b/aidge_export_cpp/kernels/softmax.hpp
new file mode 100644
index 0000000..73d00da
--- /dev/null
+++ b/aidge_export_cpp/kernels/softmax.hpp
@@ -0,0 +1,55 @@
+#ifndef __AIDGE_EXPORT_CPP_KERNELS_SOFTMAX__
+#define __AIDGE_EXPORT_CPP_KERNELS_SOFTMAX__
+
+#include "network/typedefs.hpp"
+#include "network/utils.hpp"
+#include "kernels/macs.hpp"
+
+#include <type_traits>
+
+#include <cmath>
+
+template<int NB_CHANNELS,
+         int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
+         int NB_OUTPUTS,
+         int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
+         int AXIS,
+         typename Input_T, typename Output_T>
+__attribute__((always_inline)) inline
+void softmax_forward (
+    const Input_T* __restrict inputs,
+    Output_T* __restrict outputs)
+{
+    Input_T maxValue = 0.0f;
+
+    for (int och = 0; och < NB_OUTPUTS; och++) {
+        maxValue = std::max(maxValue, inputs[och]);
+    }
+
+    Input_T sumExp = 0.0f;
+
+    if constexpr (std::is_same_v<Input_T, Output_T>) {
+        for (int och = 0; och < NB_OUTPUTS; och++) {
+            // This should be both more performant while keeping the same memory footprint but we can only use it if INPUT_T and OUTPUT_T types are the same !
+            outputs[och] = std::exp(inputs[och] - maxValue);
+            sumExp += outputs[och];
+        }
+
+        for (int och = 0; och < NB_OUTPUTS; och++) {
+            outputs[och] /= sumExp;
+        }
+    }
+    else
+    {
+        for (int och = 0; och < NB_OUTPUTS; och++) {
+            sumExp += std::exp(inputs[och] - maxValue);
+        }
+
+        for (int och = 0; och < NB_OUTPUTS; och++) {
+            outputs[och] = std::exp(inputs[och] - maxValue) / sumExp;
+        }
+    }
+}
+
+
+#endif  // __AIDGE_EXPORT_CPP_KERNELS_SOFTMAX__
diff --git a/aidge_export_cpp/operators.py b/aidge_export_cpp/operators.py
index 346928f..0f6d3c8 100644
--- a/aidge_export_cpp/operators.py
+++ b/aidge_export_cpp/operators.py
@@ -302,4 +302,19 @@ class TransposeCPP(ExportNodeCpp):
         self.include_list = []
         self.kernels_to_copy = [
             str(ROOT / "kernels" / "transpose.hpp")
+        ]
+
+@ExportLibCpp.register("Softmax", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
+class SoftmaxCPP(ExportNodeCpp):
+    def __init__(self, node, mem_info):
+        super().__init__(node, mem_info)
+        self.attributes["axis"] = node.get_operator().attr.axis
+        self.config_template = str(
+            ROOT / "templates" / "configuration" / "softmax_config.jinja")
+        self.forward_template = str(
+            ROOT / "templates" / "kernel_forward" / "softmax_forward.jinja")
+        self.include_list = []
+        self.kernels_to_copy = [
+            str(ROOT / "kernels" / "softmax.hpp"),
+            str(ROOT / "kernels" / "macs.hpp"),
         ]
\ No newline at end of file
diff --git a/aidge_export_cpp/templates/configuration/softmax_config.jinja b/aidge_export_cpp/templates/configuration/softmax_config.jinja
new file mode 100644
index 0000000..d8ec8af
--- /dev/null
+++ b/aidge_export_cpp/templates/configuration/softmax_config.jinja
@@ -0,0 +1,12 @@
+{#- For name header -#}
+#ifndef {{ name|upper }}_LAYER_H
+#define {{ name|upper }}_LAYER_H
+{# For layer configuration -#}
+{% include "./_def_io.jinja" %}
+{% include "./_meminfo.jinja" %}
+
+{#- Calculate sizes #}
+{%- set weights_size = out_chan[0] * in_chan[0] * in_height[0] * in_width[0] %}
+#define {{ name|upper }}_AXIS {{ axis }}
+
+#endif /* {{ name|upper }}_LAYER_H */
diff --git a/aidge_export_cpp/templates/kernel_forward/softmax_forward.jinja b/aidge_export_cpp/templates/kernel_forward/softmax_forward.jinja
new file mode 100644
index 0000000..607ad53
--- /dev/null
+++ b/aidge_export_cpp/templates/kernel_forward/softmax_forward.jinja
@@ -0,0 +1,12 @@
+{% filter indent(width=4, first=False) %}
+{% include "./_mem_offset.jinja" %}
+softmax_forward<{{ in_name[0]|upper }}_NB_CHANNELS,
+                       {{ in_name[0]|upper }}_IN_HEIGHT,
+                       {{ in_name[0]|upper }}_IN_WIDTH,
+                       {{ out_name[0]|upper }}_NB_OUTPUTS,
+                       {{ out_name[0]|upper }}_OUT_HEIGHT,
+                       {{ out_name[0]|upper }}_OUT_WIDTH,
+                       {{ name|upper }}_AXIS>
+                       ({{in_name[0]}}, {{out_name[0]}});
+{% include "./_save_outputs.jinja" %}
+{% endfilter %}
diff --git a/aidge_export_cpp/unit_tests/test_export.py b/aidge_export_cpp/unit_tests/test_export.py
index d900df8..27280fe 100644
--- a/aidge_export_cpp/unit_tests/test_export.py
+++ b/aidge_export_cpp/unit_tests/test_export.py
@@ -112,6 +112,14 @@ class test_operator_export(unittest.TestCase):
 
         self.unit_test_export(model, "FC_flat", [[1, 6, 1, 1]])
 
+    def test_export_softmax(self):
+        print("Softmax")
+        model = aidge_core.sequential([
+            aidge_core.Softmax(axis=1, name="sf0")
+        ])
+
+        self.assertTrue(unit_test_export(model, "Softmax", [[1, 10]]))
+
     @unittest.skip("Currently this test is failing")
     def test_export_FC_image_in(self):
         """Test exporting a FC operator with a HWC input.
-- 
GitLab


From 52d701379ec57736e7380df01e1c4f080c2c98a2 Mon Sep 17 00:00:00 2001
From: Gallasko <gallasko@gmail.com>
Date: Mon, 24 Mar 2025 10:12:55 +0100
Subject: [PATCH 02/14] feat: Added Batchnorm2d export op

---
 aidge_export_cpp/kernels/batchnorm.hpp        | 11 ++++++----
 aidge_export_cpp/operators.py                 | 22 ++++++++++++++++++-
 .../configuration/batchnorm_config.jinja      |  1 +
 .../kernel_forward/batchnorm_forward.jinja    |  2 +-
 4 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/aidge_export_cpp/kernels/batchnorm.hpp b/aidge_export_cpp/kernels/batchnorm.hpp
index 740ea21..0260d42 100644
--- a/aidge_export_cpp/kernels/batchnorm.hpp
+++ b/aidge_export_cpp/kernels/batchnorm.hpp
@@ -2,7 +2,8 @@
 #define __AIDGE_EXPORT_CPP_KERNELS_BATCHNORM__
 
 #include "network/typedefs.hpp"
-#include "kernels/rescaling.hpp"
+#include "kernels/activation.hpp"
+
 #include <math.h>
 
 // WARNING: this kernel only works for 32-bits floating point values
@@ -11,7 +12,8 @@ template<int NB_OUTPUTS,
          int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
          ActivationFunction_T ACTIVATION,
          typename Input_T, typename Output_T,
-         typename Param_T>
+         typename Param_T,
+         typename Rescaling_T>
 __attribute__((always_inline)) inline
 void batchnorm_forward (
     const Input_T* __restrict inputs,
@@ -20,7 +22,8 @@ void batchnorm_forward (
     const Param_T* __restrict variances,
     const Param_T* __restrict means,
     const Param_T* __restrict scales,
-    const double epsilon)
+    const double epsilon,
+    const Rescaling_T& __restrict rescaling)
 {
     for (unsigned int output = 0; output < NB_OUTPUTS; ++output) {
         const Output_T var = sqrt(variances[output] + epsilon);
@@ -31,7 +34,7 @@ void batchnorm_forward (
 
                 const Output_T normalized = (inputs[outputOffset + output] - means[output]) / var;
                 const Output_T sAs = scales[output] * normalized + biases[output];
-                outputs[outputOffset + output] = sat<Output_T>(sAs, output, ACTIVATION, NoScaling);
+                outputs[outputOffset + output] = activation_forward_value<Output_T>(sAs, output, ACTIVATION, rescaling);
             }
         }
     }
diff --git a/aidge_export_cpp/operators.py b/aidge_export_cpp/operators.py
index 0f6d3c8..b7e5472 100644
--- a/aidge_export_cpp/operators.py
+++ b/aidge_export_cpp/operators.py
@@ -317,4 +317,24 @@ class SoftmaxCPP(ExportNodeCpp):
         self.kernels_to_copy = [
             str(ROOT / "kernels" / "softmax.hpp"),
             str(ROOT / "kernels" / "macs.hpp"),
-        ]
\ No newline at end of file
+        ]
+
+@ExportLibCpp.register("BatchNorm2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
+class BatchNorm2DCPP(ExportNodeCpp):
+    def __init__(self, node, mem_info):
+        super().__init__(node, mem_info)
+        self.attributes["activation"] = "Linear"
+        self.attributes["rescaling"] = "NoScaling"
+        self.attributes["epsilon"] = node.get_operator().attr.epsilon
+        self.config_template = str(
+            ROOT / "templates" / "configuration" / "batchnorm_config.jinja")
+        self.forward_template = str(
+            ROOT / "templates" / "kernel_forward" / "batchnorm_forward.jinja")
+        self.include_list = []
+        self.kernels_to_copy = [
+            str(ROOT / "kernels" / "batchnorm.hpp"),
+            str(ROOT / "kernels" / "macs.hpp"),
+            str(ROOT / "kernels" / "activation.hpp"),
+            str(ROOT / "kernels" / "rescaling.hpp")
+        ]
+
diff --git a/aidge_export_cpp/templates/configuration/batchnorm_config.jinja b/aidge_export_cpp/templates/configuration/batchnorm_config.jinja
index 701ba7c..bc01e3b 100644
--- a/aidge_export_cpp/templates/configuration/batchnorm_config.jinja
+++ b/aidge_export_cpp/templates/configuration/batchnorm_config.jinja
@@ -7,5 +7,6 @@
 {% include "./_meminfo.jinja" %}
 #define {{ name|upper }}_ACTIVATION {{ activation }}
 #define {{ name|upper }}_EPSILON {{ epsilon }}
+static const {{ rescaling }} {{ name|upper }}_RESCALING = {};
 
 #endif /* {{ name|upper }}_LAYER_H */
diff --git a/aidge_export_cpp/templates/kernel_forward/batchnorm_forward.jinja b/aidge_export_cpp/templates/kernel_forward/batchnorm_forward.jinja
index 5a759b8..05e5154 100644
--- a/aidge_export_cpp/templates/kernel_forward/batchnorm_forward.jinja
+++ b/aidge_export_cpp/templates/kernel_forward/batchnorm_forward.jinja
@@ -4,6 +4,6 @@ batchnorm_forward<{{ out_name[0]|upper }}_NB_OUTPUTS,
                   {{ out_name[0]|upper }}_OUT_HEIGHT,
                   {{ out_name[0]|upper }}_OUT_WIDTH,
                   {{name|upper}}_ACTIVATION>
-                  ({{in_name[0]}}, {{out_name[0]}}, {{in_name[1]}}, {{in_name[2]}}, {{in_name[3]}}, {{in_name[4]}}, {{name|upper}}_EPSILON);
+                  ({{in_name[0]}}, {{out_name[0]}}, {{in_name[1]}}, {{in_name[2]}}, {{in_name[3]}}, {{in_name[4]}}, {{name|upper}}_EPSILON, {{name|upper}}_RESCALING);
 {% include "./_save_outputs.jinja" %}
 {% endfilter %}
-- 
GitLab


From 9e41dd6ba15ce23b4b4df3cffe5cb688d4adecf2 Mon Sep 17 00:00:00 2001
From: Gallasko <gallasko@gmail.com>
Date: Mon, 24 Mar 2025 10:30:17 +0100
Subject: [PATCH 03/14] feat: Added Concat export op

---
 aidge_export_cpp/kernels/concat.hpp           | 22 +++++++++++++++++++
 aidge_export_cpp/operators.py                 | 16 ++++++++++++++
 .../templates/configuration/concat.jinja      | 15 +++++++++++++
 .../templates/kernel_forward/concat.jinja     | 20 +++++++++++++++++
 4 files changed, 73 insertions(+)
 create mode 100644 aidge_export_cpp/kernels/concat.hpp
 create mode 100644 aidge_export_cpp/templates/configuration/concat.jinja
 create mode 100644 aidge_export_cpp/templates/kernel_forward/concat.jinja

diff --git a/aidge_export_cpp/kernels/concat.hpp b/aidge_export_cpp/kernels/concat.hpp
new file mode 100644
index 0000000..2db8a0b
--- /dev/null
+++ b/aidge_export_cpp/kernels/concat.hpp
@@ -0,0 +1,22 @@
+#ifndef __AIDGE_EXPORT_CPP_KERNELS_CONCAT__
+#define __AIDGE_EXPORT_CPP_KERNELS_CONCAT__
+
+template<typename T, unsigned int NB_INPUTS>
+__attribute__((always_inline)) inline static
+void concat_forward (
+    const unsigned int axis,
+    const T* const * __restrict inputs,
+    const unsigned int* __restrict sizes,
+    T* __restrict output)
+{
+    unsigned int offset = 0;
+
+    for (unsigned int n = 0; n < NB_INPUTS; ++n) {
+        for (unsigned int i = 0; i < sizes[n]; ++i) {
+            output[offset + i] = inputs[n][i];
+        }
+        offset += sizes[n];
+    }
+}
+
+#endif  // __AIDGE_EXPORT_CPP_KERNELS_CONCAT__
\ No newline at end of file
diff --git a/aidge_export_cpp/operators.py b/aidge_export_cpp/operators.py
index b7e5472..f49c501 100644
--- a/aidge_export_cpp/operators.py
+++ b/aidge_export_cpp/operators.py
@@ -338,3 +338,19 @@ class BatchNorm2DCPP(ExportNodeCpp):
             str(ROOT / "kernels" / "rescaling.hpp")
         ]
 
+@ExportLibCpp.register("Concat", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
+class Concat(ExportNodeCpp):
+    def __init__(self, node, mem_info):
+        super().__init__(node, mem_info)
+
+        print(node.get_operator())
+        print(dir(node.get_operator()))
+        self.attributes["nb_in"] = node.get_operator().nb_inputs()
+        self.attributes["axis"] = node.get_operator().attr.axis
+
+        self.config_template = str(ROOT / "templates" / "configuration" / "concat.jinja")
+        self.forward_template = str(ROOT / "templates" / "forward_call" / "concat.jinja")
+        self.include_list = []
+        self.kernels_to_copy = [
+            str(ROOT / "kernels" / "concat.hpp"),
+        ]
\ No newline at end of file
diff --git a/aidge_export_cpp/templates/configuration/concat.jinja b/aidge_export_cpp/templates/configuration/concat.jinja
new file mode 100644
index 0000000..8aa6315
--- /dev/null
+++ b/aidge_export_cpp/templates/configuration/concat.jinja
@@ -0,0 +1,15 @@
+{#- For name header -#}
+#ifndef {{ name|upper }}_LAYER_H
+#define {{ name|upper }}_LAYER_H
+
+{% include "./_meminfo.jinja" %}
+
+// Attributes
+#define {{ name|upper }}_NB_INPUTS {{ nb_in }}
+#define {{ name|upper }}_AXIS {{ axis }}
+{%- for i in range(nb_in) %}
+#define {{ name|upper }}_INPUT_{{i}}_SIZE {{ in_chan[i] * in_height[i] * in_width[i] }}
+{%- endfor %}
+#define {{ name|upper }}_OUTPUT_SIZE {{ out_chan[0] * out_height[0] * out_width[0] }}
+
+#endif /* {{ name|upper }}_LAYER_H */
diff --git a/aidge_export_cpp/templates/kernel_forward/concat.jinja b/aidge_export_cpp/templates/kernel_forward/concat.jinja
new file mode 100644
index 0000000..46fe87e
--- /dev/null
+++ b/aidge_export_cpp/templates/kernel_forward/concat.jinja
@@ -0,0 +1,20 @@
+{% filter indent(width=4, first=False) %}
+{% include "./_mem_offset.jinja" %}
+float* {{ name|upper }}_INPUTS[] = {
+    {%- for i in range(nb_in) -%}
+        {{ in_name[i] }}{{ ", " if not loop.last else "" }}
+    {%- endfor -%}
+};
+
+unsigned int {{ name|upper }}_SIZES[] = {
+    {%- for i in range(nb_in) -%}
+        {{ name|upper }}_INPUT_{{i}}_SIZE{{ ", " if not loop.last else "" }}
+    {%- endfor -%}
+};
+
+aidge_concat<float, {{ nb_in }}> (
+    {{name|upper}}_AXIS,
+    {{ name|upper }}_INPUTS,
+    {{ name|upper }}_SIZES,
+    {{ out_name[0] }});
+    {% endfilter %}
-- 
GitLab


From 1044a6b623c7633b775eec6ea026d917e62546e3 Mon Sep 17 00:00:00 2001
From: Gallasko <gallasko@gmail.com>
Date: Fri, 14 Mar 2025 16:15:51 +0100
Subject: [PATCH 04/14] feat: Added Pad2d export op

---
 aidge_export_cpp/kernels/pad.hpp              | 42 +++++++++++++++++++
 aidge_export_cpp/operators.py                 | 13 +++++-
 .../templates/configuration/pad_config.jinja  | 10 +++++
 .../kernel_forward/pad_forward.jinja          | 13 ++++++
 4 files changed, 77 insertions(+), 1 deletion(-)
 create mode 100644 aidge_export_cpp/kernels/pad.hpp
 create mode 100644 aidge_export_cpp/templates/configuration/pad_config.jinja
 create mode 100644 aidge_export_cpp/templates/kernel_forward/pad_forward.jinja

diff --git a/aidge_export_cpp/kernels/pad.hpp b/aidge_export_cpp/kernels/pad.hpp
new file mode 100644
index 0000000..dc7629f
--- /dev/null
+++ b/aidge_export_cpp/kernels/pad.hpp
@@ -0,0 +1,42 @@
+#ifndef __AIDGE_EXPORT_CPP_KERNELS_PAD2D__
+#define __AIDGE_EXPORT_CPP_KERNELS_PAD2D__
+
+#include "network/typedefs.hpp"
+#include "kernels/rescaling.hpp"
+#include "network/utils.hpp"
+#include "kernels/macs.hpp"
+#include "kernels/activation.hpp"
+
+// Todo add border value and border type (Reflect, Constant, Wrap...) and add the two missing pad value (bottom and right)
+
+template<int NB_CHANNELS,
+         int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
+         int NB_OUTPUTS,
+         int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
+         int PADDING_Y, int PADDING_X,
+         typename Input_T, typename Output_T>
+__attribute__((always_inline)) inline
+void convolution_forward(
+    const Input_T* __restrict inputs,
+    Output_T* __restrict outputs
+    )
+{
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
+
+    const std::size_t oySize = CHANNELS_HEIGHT + PADDING_Y + PADDING_Y;
+    const std::size_t oxSize = CHANNELS_WIDTH + PADDING_X + PADDING_X;
+
+    for (std::uint32_t oy = 0; oy < oySize; ++oy) {
+        for (std::uint32_t ox = 0; ox < oxSize; ++ox) {
+            if (oy < PADDING_Y or oy >= CHANNELS_HEIGHT + PADDING_Y or ox < PADDING_X or ox >= CHANNELS_WIDTH + PADDING_X)
+            {
+                outputs[oy * oySize + ox] = 0.0f;
+            }
+
+            outputs[oy * oySize + ox] = input[(oy - PADDING_Y) * CHANNELS_HEIGHT + (ox - PADDING_X)];
+        }
+    }
+}
+
+#endif  // __AIDGE_EXPORT_CPP_KERNELS_PAD2D__
diff --git a/aidge_export_cpp/operators.py b/aidge_export_cpp/operators.py
index f49c501..0e3749a 100644
--- a/aidge_export_cpp/operators.py
+++ b/aidge_export_cpp/operators.py
@@ -75,7 +75,18 @@ class ProducerCPP(ExportNode):
 @ExportLibCpp.register("Pad2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)))
 class Pad_ARMCortexM(ExportNodeCpp):
     def __init__(self, node, mem_info):
-        raise NotImplementedError("Pad2D nodes is not implemented")
+        super().__init__(node, mem_info)
+        self.attributes["padding"] = node.get_operator().attr.begin_end_borders
+
+        self.config_template = str(
+            ROOT / "templates" / "configuration" / "pad_config.jinja")
+        self.forward_template = str(
+            ROOT / "templates" / "kernel_forward" / "pad_forward.jinja")
+        self.include_list = []
+        self.kernels_to_copy = [
+            str(ROOT / "kernels" / "pad.hpp")
+        ]
+
 
 
 @ExportLibCpp.register("ReLU", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
diff --git a/aidge_export_cpp/templates/configuration/pad_config.jinja b/aidge_export_cpp/templates/configuration/pad_config.jinja
new file mode 100644
index 0000000..527e5c0
--- /dev/null
+++ b/aidge_export_cpp/templates/configuration/pad_config.jinja
@@ -0,0 +1,10 @@
+{#- For name header -#}
+#ifndef {{ name|upper }}_LAYER_H
+#define {{ name|upper }}_LAYER_H
+{# For layer configuration -#}
+{% include "./_def_io.jinja" %}
+{% include "./_meminfo.jinja" %}
+#define {{ name|upper }}_PADDING_Y {{ padding[1] }}
+#define {{ name|upper }}_PADDING_X {{ padding[0] }}
+
+#endif /* {{ name|upper }}_LAYER_H */
diff --git a/aidge_export_cpp/templates/kernel_forward/pad_forward.jinja b/aidge_export_cpp/templates/kernel_forward/pad_forward.jinja
new file mode 100644
index 0000000..04976e9
--- /dev/null
+++ b/aidge_export_cpp/templates/kernel_forward/pad_forward.jinja
@@ -0,0 +1,13 @@
+{% filter indent(width=4, first=False) %}
+{% include "./_mem_offset.jinja" %}
+convolution_forward<{{ in_name[0]|upper }}_NB_CHANNELS,
+                    {{ in_name[0]|upper }}_IN_HEIGHT,
+                    {{ in_name[0]|upper }}_IN_WIDTH,
+                    {{ out_name[0]|upper }}_NB_OUTPUTS,
+                    {{ out_name[0]|upper }}_OUT_HEIGHT,
+                    {{ out_name[0]|upper }}_OUT_WIDTH,
+                    {{name|upper}}_PADDING_Y,
+                    {{name|upper}}_PADDING_X>
+                    ({{in_name[0]}}, {{out_name[0]}});
+{% include "./_save_outputs.jinja" %}
+{% endfilter %}
-- 
GitLab


From 8f3170bffc5ff831b29af84afab8ad227a039ce7 Mon Sep 17 00:00:00 2001
From: Gallasko <gallasko@gmail.com>
Date: Tue, 25 Mar 2025 14:45:49 +0100
Subject: [PATCH 05/14] feat: Added more tests

---
 aidge_export_cpp/unit_tests/test_export.py | 135 +++++++++++++++++++++
 1 file changed, 135 insertions(+)

diff --git a/aidge_export_cpp/unit_tests/test_export.py b/aidge_export_cpp/unit_tests/test_export.py
index 27280fe..c5fa99c 100644
--- a/aidge_export_cpp/unit_tests/test_export.py
+++ b/aidge_export_cpp/unit_tests/test_export.py
@@ -31,6 +31,63 @@ def initFiller(model):
                 aidge_core.constant_filler(value, 0.01)
             else:
                 pass
+import math
+
+def normalize_random_tensor(randList):
+    for index in np.ndindex(randList.shape):
+        randList[index] = (math.floor(randList[index] * 21) - 10) / 10
+
+    return aidge_core.Tensor(randList.astype(np.float32))
+
+def unit_test_export(graph_view, op_name, in_dims):
+    graph_view.compile("cpu", aidge_core.dtype.float32, dims=in_dims)
+    scheduler = aidge_core.SequentialScheduler(graph_view)
+
+    # in_tensor = [aidge_core.Tensor(np.random.random(in_dim).astype(np.float32)) for in_dim in in_dims]
+    in_tensor = [normalize_random_tensor(np.random.rand(*in_dim)) for in_dim in in_dims]
+
+    scheduler.forward(data=in_tensor)
+
+    export_folder = op_name + "_temp_test"
+    # Export the model in C++ standalone
+    aidge_core.export_utils.scheduler_export(
+            scheduler,
+            export_folder,
+            aidge_export_cpp.ExportLibCpp,
+            memory_manager=aidge_core.mem_info.generate_optimized_memory_info,
+            memory_manager_args={"stats_folder": f"{export_folder}/stats", "wrapping": False }
+    )
+    aidge_core.export_utils.generate_main_compare_cpp(export_folder, graph_view)
+    print("COMPILATION")
+
+    try:
+        for std_line in run_command(["make"], cwd=export_folder):
+            print(std_line, end="")
+    except subprocess.CalledProcessError as e:
+        print(f"An error occurred: {e}\nFailed to generate export.")
+        raise SystemExit(1)
+    print("RUN EXPORT")
+    pattern = r"Number of equal outputs: (\d+) / (\d+)"
+    comparison_matched = False
+    result = False
+    try:
+        for std_line in run_command(["./bin/run_export"], cwd=export_folder):
+            print(std_line, end="")
+            matches = re.findall(pattern, std_line)
+            if matches:
+                if comparison_matched:
+                    raise RuntimeError("Two comparison matched found!")
+                else:
+                    expected, infered = map(int, matches[0])
+                    result = (expected == infered)
+                comparison_matched = True
+    except subprocess.CalledProcessError as e:
+        print(f"An error occurred: {e}\nFailed to run export for comparison.")
+        raise SystemExit(1)
+    if not comparison_matched:
+        raise RuntimeError("No comparison matched found!")
+
+    return result
 
 
 class test_operator_export(unittest.TestCase):
@@ -130,6 +187,84 @@ class test_operator_export(unittest.TestCase):
         initFiller(model)
         self.unit_test_export(model, "FC_img", [[1, 3, 2, 2]])
 
+    def test_export_relu(self):
+        print("ReLU")
+        model = aidge_core.sequential([
+            aidge_core.ReLU(name="relu0")
+        ])
+
+        self.assertTrue(unit_test_export(model, "ReLU", [[1, 10]]))
+
+    def test_export_add(self):
+        print("Add")
+        model = aidge_core.sequential([
+            aidge_core.Producer([1, 10]),
+            aidge_core.Add()
+        ])
+
+        self.assertTrue(unit_test_export(model, "Add", [[1, 10]]))
+
+    def test_export_sub(self):
+        print("Sub")
+        model = aidge_core.sequential([
+            aidge_core.Producer([1, 10]),
+            aidge_core.Sub()
+        ])
+
+        self.assertTrue(unit_test_export(model, "Sub", [[1, 10]]))
+
+    def test_export_mul(self):
+        print("Mul")
+        model = aidge_core.sequential([
+            aidge_core.Producer([1, 10]),
+            aidge_core.Mul()
+        ])
+
+        self.assertTrue(unit_test_export(model, "Mul", [[1, 10]]))
+
+    def test_export_conv2D(self):
+        print("Conv2D")
+        model = aidge_core.sequential([
+            aidge_core.Conv2D(in_channels=3, out_channels=3, kernel_dims=(3, 3))
+        ])
+
+        self.assertTrue(unit_test_export(model, "Conv2D", [[1, 3, 12, 12]]))
+
+    # def test_export_max_pooling(self):
+    #     print("MaxPooling2D")
+    #     model = aidge_core.sequential([
+    #         aidge_core.MaxPooling2D(kernel_dims=(3, 3))
+    #     ])
+
+    #     self.assertTrue(unit_test_export(model, "MaxPooling2D", [[1, 2, 12, 12]]))
+
+    # def test_export_avg_pooling(self):
+    #     print("AvgPooling2D")
+    #     model = aidge_core.sequential([
+    #         aidge_core.AvgPooling2D(kernel_dims=(3, 3), name="avg_pool0")
+    #     ])
+
+    #     self.assertTrue(unit_test_export(model, "AvgPooling2D", [[1, 2, 12, 12]]))
+
+    # def test_export_pad2D(self):
+    #     print("Pad2D")
+    #     model = aidge_core.sequential([
+    #         aidge_core.Softmax(axis=1, name="sf0")
+    #     ])
+
+    #     self.assertTrue(unit_test_export(model, "Softmax", [[1, 10]]))
+
+    # def test_export_batchnorm2D(self):
+    #     print("BatchNormalization2D")
+    #     model = aidge_core.sequential([
+    #         aidge_core.BatchNorm2D(nb_features=10, epsilon=2e-5)
+    #     ])
+
+    #     self.assertTrue(unit_test_export(model, "BatchNorm2D", [[1, 10]]))
+
+    def test_export_cpp(self):
+        print("Export test to do")
+
     def test_export_Conv(self):
         model = aidge_core.sequential([
             aidge_core.Conv2D(1, 1, [3, 3], name="InputNode")
-- 
GitLab


From 1aecd6c377800e7b6bc13f75b54c9eb443ebfbbc Mon Sep 17 00:00:00 2001
From: Gallasko <gallasko@gmail.com>
Date: Wed, 26 Mar 2025 13:14:07 +0100
Subject: [PATCH 06/14] fix: Better control over random number generation in
 the model weights during unit tests

---
 aidge_export_cpp/unit_tests/test_export.py | 105 ++++++++++++++-------
 1 file changed, 69 insertions(+), 36 deletions(-)

diff --git a/aidge_export_cpp/unit_tests/test_export.py b/aidge_export_cpp/unit_tests/test_export.py
index c5fa99c..20421ad 100644
--- a/aidge_export_cpp/unit_tests/test_export.py
+++ b/aidge_export_cpp/unit_tests/test_export.py
@@ -39,12 +39,45 @@ def normalize_random_tensor(randList):
 
     return aidge_core.Tensor(randList.astype(np.float32))
 
+import numpy as np
+import operator
+from functools import reduce
+
+def np_init(shape, dtype=np.float32):
+    """
+    Generates a NumPy array with the given shape, filled with random values between -1 and 1
+    with a step of 0.1.
+
+    :param shape: Tuple of dimensions for the array
+    :param dtype: Data type of the output array (default: np.float32)
+    :return: A NumPy array with the given shape and dtype
+    """
+    total_elements = reduce(operator.mul, shape, 1)
+    data = (np.random.randint(0, 21, size=total_elements) - 10) / 10.0
+    return data.reshape(shape).astype(dtype)
+
 def unit_test_export(graph_view, op_name, in_dims):
+    # Initialize parameters (weights and biases)
+
     graph_view.compile("cpu", aidge_core.dtype.float32, dims=in_dims)
+
+    for node in graph_view.get_nodes():
+        if node.type() == "Producer":
+            prod_op = node.get_operator()
+            value = prod_op.get_output(0)
+
+            # rand_tensor = aidge_core.Tensor(np_init(value.dims()))
+            # rand_tensor.set_backend(value.backend())
+            # value = rand_tensor
+
+            print(value)
+
+            aidge_core.constant_filler(value, 0.01)
+
+
     scheduler = aidge_core.SequentialScheduler(graph_view)
 
-    # in_tensor = [aidge_core.Tensor(np.random.random(in_dim).astype(np.float32)) for in_dim in in_dims]
-    in_tensor = [normalize_random_tensor(np.random.rand(*in_dim)) for in_dim in in_dims]
+    in_tensor = [aidge_core.Tensor(np_init(in_dim)) for in_dim in in_dims]
 
     scheduler.forward(data=in_tensor)
 
@@ -198,69 +231,69 @@ class test_operator_export(unittest.TestCase):
     def test_export_add(self):
         print("Add")
         model = aidge_core.sequential([
-            aidge_core.Producer([1, 10]),
-            aidge_core.Add()
+            aidge_core.Producer([1, 5, 5], name="producer"),
+            aidge_core.Add(name="add")
         ])
 
-        self.assertTrue(unit_test_export(model, "Add", [[1, 10]]))
+        self.assertTrue(unit_test_export(model, "Add", [[1, 5, 5]]))
 
     def test_export_sub(self):
         print("Sub")
         model = aidge_core.sequential([
-            aidge_core.Producer([1, 10]),
-            aidge_core.Sub()
+            aidge_core.Producer([1, 5, 5], name="producer"),
+            aidge_core.Sub(name="sub")
         ])
 
-        self.assertTrue(unit_test_export(model, "Sub", [[1, 10]]))
+        self.assertTrue(unit_test_export(model, "Sub", [[1, 5, 5]]))
 
     def test_export_mul(self):
         print("Mul")
         model = aidge_core.sequential([
-            aidge_core.Producer([1, 10]),
-            aidge_core.Mul()
+            aidge_core.Producer([1, 5, 5], name="producer"),
+            aidge_core.Mul(name="mul")
         ])
 
-        self.assertTrue(unit_test_export(model, "Mul", [[1, 10]]))
+        self.assertTrue(unit_test_export(model, "Mul", [[1, 5, 5]]))
 
     def test_export_conv2D(self):
         print("Conv2D")
         model = aidge_core.sequential([
-            aidge_core.Conv2D(in_channels=3, out_channels=3, kernel_dims=(3, 3))
+            aidge_core.Conv2D(in_channels=3, out_channels=3, kernel_dims=(3, 3), name="conv")
         ])
 
         self.assertTrue(unit_test_export(model, "Conv2D", [[1, 3, 12, 12]]))
 
-    # def test_export_max_pooling(self):
-    #     print("MaxPooling2D")
-    #     model = aidge_core.sequential([
-    #         aidge_core.MaxPooling2D(kernel_dims=(3, 3))
-    #     ])
+    def test_export_max_pooling(self):
+        print("MaxPooling2D")
+        model = aidge_core.sequential([
+            aidge_core.MaxPooling2D(kernel_dims=(3, 3), name="max_pool")
+        ])
 
-    #     self.assertTrue(unit_test_export(model, "MaxPooling2D", [[1, 2, 12, 12]]))
+        self.assertTrue(unit_test_export(model, "MaxPooling2D", [[1, 2, 12, 12]]))
 
-    # def test_export_avg_pooling(self):
-    #     print("AvgPooling2D")
-    #     model = aidge_core.sequential([
-    #         aidge_core.AvgPooling2D(kernel_dims=(3, 3), name="avg_pool0")
-    #     ])
+    def test_export_avg_pooling(self):
+        print("AvgPooling2D")
+        model = aidge_core.sequential([
+            aidge_core.AvgPooling2D(kernel_dims=(3, 3), name="avg_pool")
+        ])
 
-    #     self.assertTrue(unit_test_export(model, "AvgPooling2D", [[1, 2, 12, 12]]))
+        self.assertTrue(unit_test_export(model, "AvgPooling2D", [[1, 2, 12, 12]]))
 
-    # def test_export_pad2D(self):
-    #     print("Pad2D")
-    #     model = aidge_core.sequential([
-    #         aidge_core.Softmax(axis=1, name="sf0")
-    #     ])
+    def test_export_pad2D(self):
+        print("Pad2D")
+        model = aidge_core.sequential([
+            aidge_core.Pad2D((1, 1, 1, 1), name="pad2d")
+        ])
 
-    #     self.assertTrue(unit_test_export(model, "Softmax", [[1, 10]]))
+        self.assertTrue(unit_test_export(model, "Pad2D", [[1, 3, 10, 10]]))
 
-    # def test_export_batchnorm2D(self):
-    #     print("BatchNormalization2D")
-    #     model = aidge_core.sequential([
-    #         aidge_core.BatchNorm2D(nb_features=10, epsilon=2e-5)
-    #     ])
+    def test_export_batchnorm2D(self):
+        print("BatchNormalization2D")
+        model = aidge_core.sequential([
+            aidge_core.BatchNorm2D(nb_features=10, epsilon=2e-5, name="bn")
+        ])
 
-    #     self.assertTrue(unit_test_export(model, "BatchNorm2D", [[1, 10]]))
+        self.assertTrue(unit_test_export(model, "BatchNorm2D", [[1, 10, 5, 5]]))
 
     def test_export_cpp(self):
         print("Export test to do")
-- 
GitLab


From 0ee3b761fd1d2a39905e670890acc6d32b25623a Mon Sep 17 00:00:00 2001
From: Gallasko <gallasko@gmail.com>
Date: Thu, 27 Mar 2025 11:49:52 +0100
Subject: [PATCH 07/14] fix: Some unit test failing

---
 aidge_export_cpp/kernels/pad.hpp              |  19 +--
 aidge_export_cpp/kernels/pooling.hpp          |   8 +-
 aidge_export_cpp/operators.py                 |   8 +-
 .../configuration/batchnorm_config.jinja      |   1 +
 .../{concat.jinja => concat_config.jinja}     |   5 +-
 .../{concat.jinja => concat_forward.jinja}    |   4 +-
 aidge_export_cpp/unit_tests/test_export.py    | 142 +++++++-----------
 7 files changed, 74 insertions(+), 113 deletions(-)
 rename aidge_export_cpp/templates/configuration/{concat.jinja => concat_config.jinja} (63%)
 rename aidge_export_cpp/templates/kernel_forward/{concat.jinja => concat_forward.jinja} (86%)

diff --git a/aidge_export_cpp/kernels/pad.hpp b/aidge_export_cpp/kernels/pad.hpp
index dc7629f..158c935 100644
--- a/aidge_export_cpp/kernels/pad.hpp
+++ b/aidge_export_cpp/kernels/pad.hpp
@@ -2,10 +2,7 @@
 #define __AIDGE_EXPORT_CPP_KERNELS_PAD2D__
 
 #include "network/typedefs.hpp"
-#include "kernels/rescaling.hpp"
 #include "network/utils.hpp"
-#include "kernels/macs.hpp"
-#include "kernels/activation.hpp"
 
 // Todo add border value and border type (Reflect, Constant, Wrap...) and add the two missing pad value (bottom and right)
 
@@ -21,20 +18,20 @@ void convolution_forward(
     Output_T* __restrict outputs
     )
 {
-    const I *input = static_cast<const I *>(input_);
-    O *output = static_cast<O *>(output_);
+    const unsigned int oySize = CHANNELS_HEIGHT + PADDING_Y + PADDING_Y;
+    const unsigned int oxSize = CHANNELS_WIDTH + PADDING_X + PADDING_X;
 
-    const std::size_t oySize = CHANNELS_HEIGHT + PADDING_Y + PADDING_Y;
-    const std::size_t oxSize = CHANNELS_WIDTH + PADDING_X + PADDING_X;
-
-    for (std::uint32_t oy = 0; oy < oySize; ++oy) {
-        for (std::uint32_t ox = 0; ox < oxSize; ++ox) {
+    for (unsigned int oy = 0; oy < oySize; ++oy) {
+        for (unsigned int ox = 0; ox < oxSize; ++ox) {
             if (oy < PADDING_Y or oy >= CHANNELS_HEIGHT + PADDING_Y or ox < PADDING_X or ox >= CHANNELS_WIDTH + PADDING_X)
             {
                 outputs[oy * oySize + ox] = 0.0f;
             }
+            else
+            {
+                outputs[oy * oySize + ox] = inputs[(oy - PADDING_Y) * CHANNELS_HEIGHT + (ox - PADDING_X)];
+            }
 
-            outputs[oy * oySize + ox] = input[(oy - PADDING_Y) * CHANNELS_HEIGHT + (ox - PADDING_X)];
         }
     }
 }
diff --git a/aidge_export_cpp/kernels/pooling.hpp b/aidge_export_cpp/kernels/pooling.hpp
index 478b6a5..667e1a9 100644
--- a/aidge_export_cpp/kernels/pooling.hpp
+++ b/aidge_export_cpp/kernels/pooling.hpp
@@ -7,7 +7,7 @@
 #include <stdexcept>
 
 
-template<int NB_CHANNELS, 
+template<int NB_CHANNELS,
          int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
          int NB_OUTPUTS,
          int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
@@ -17,7 +17,7 @@ template<int NB_CHANNELS,
          Pooling_T POOLING_TYPE,
          ActivationFunction_T ACTIVATION,
          typename Input_T, typename Output_T>
-__attribute__((always_inline)) inline 
+__attribute__((always_inline)) inline
 void pooling_forward(
     const Input_T* __restrict inputs,
     Output_T* __restrict outputs)
@@ -32,7 +32,7 @@ void pooling_forward(
             : max(PADDING_Y - (oy * STRIDE_Y), 0);
         const int syMax = (PADDING_Y == 0
                 && OUTPUTS_HEIGHT == OUTPUTS_HEIGHT_NOPAD) ? POOL_HEIGHT
-            : clamp(CHANNELS_HEIGHT + PADDING_Y - (oy * STRIDE_Y), 
+            : clamp(CHANNELS_HEIGHT + PADDING_Y - (oy * STRIDE_Y),
                     0, POOL_HEIGHT);
         const int iy = (oy * STRIDE_Y) - PADDING_Y;
 
@@ -45,7 +45,7 @@ void pooling_forward(
                 const int sxMax = (PADDING_X == 0
                         && OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
                             ? POOL_WIDTH
-                    : clamp(CHANNELS_WIDTH + PADDING_X - (ox * STRIDE_X), 
+                    : clamp(CHANNELS_WIDTH + PADDING_X - (ox * STRIDE_X),
                             0, POOL_WIDTH);
                 const int ix = (ox * STRIDE_X) - PADDING_X;
 
diff --git a/aidge_export_cpp/operators.py b/aidge_export_cpp/operators.py
index 0e3749a..c89236b 100644
--- a/aidge_export_cpp/operators.py
+++ b/aidge_export_cpp/operators.py
@@ -353,14 +353,10 @@ class BatchNorm2DCPP(ExportNodeCpp):
 class Concat(ExportNodeCpp):
     def __init__(self, node, mem_info):
         super().__init__(node, mem_info)
-
-        print(node.get_operator())
-        print(dir(node.get_operator()))
-        self.attributes["nb_in"] = node.get_operator().nb_inputs()
         self.attributes["axis"] = node.get_operator().attr.axis
 
-        self.config_template = str(ROOT / "templates" / "configuration" / "concat.jinja")
-        self.forward_template = str(ROOT / "templates" / "forward_call" / "concat.jinja")
+        self.config_template = str(ROOT / "templates" / "configuration" / "concat_config.jinja")
+        self.forward_template = str(ROOT / "templates" / "kernel_forward" / "concat_forward.jinja")
         self.include_list = []
         self.kernels_to_copy = [
             str(ROOT / "kernels" / "concat.hpp"),
diff --git a/aidge_export_cpp/templates/configuration/batchnorm_config.jinja b/aidge_export_cpp/templates/configuration/batchnorm_config.jinja
index bc01e3b..ae7ef57 100644
--- a/aidge_export_cpp/templates/configuration/batchnorm_config.jinja
+++ b/aidge_export_cpp/templates/configuration/batchnorm_config.jinja
@@ -1,6 +1,7 @@
 {#- For name header -#}
 #ifndef {{ name|upper }}_LAYER_H
 #define {{ name|upper }}_LAYER_H
+#include "kernels/rescaling.hpp"
 
 {# For layer configuration -#}
 {% include "./_def_io.jinja" %}
diff --git a/aidge_export_cpp/templates/configuration/concat.jinja b/aidge_export_cpp/templates/configuration/concat_config.jinja
similarity index 63%
rename from aidge_export_cpp/templates/configuration/concat.jinja
rename to aidge_export_cpp/templates/configuration/concat_config.jinja
index 8aa6315..1a6637e 100644
--- a/aidge_export_cpp/templates/configuration/concat.jinja
+++ b/aidge_export_cpp/templates/configuration/concat_config.jinja
@@ -2,14 +2,15 @@
 #ifndef {{ name|upper }}_LAYER_H
 #define {{ name|upper }}_LAYER_H
 
+{% include "./_def_io.jinja" %}
 {% include "./_meminfo.jinja" %}
 
 // Attributes
 #define {{ name|upper }}_NB_INPUTS {{ nb_in }}
 #define {{ name|upper }}_AXIS {{ axis }}
 {%- for i in range(nb_in) %}
-#define {{ name|upper }}_INPUT_{{i}}_SIZE {{ in_chan[i] * in_height[i] * in_width[i] }}
+#define {{ name|upper }}_INPUT_{{i}}_SIZE {{ in_dims[i]|join('*') }}
 {%- endfor %}
-#define {{ name|upper }}_OUTPUT_SIZE {{ out_chan[0] * out_height[0] * out_width[0] }}
+#define {{ name|upper }}_OUTPUT_SIZE {{ out_dims[0]|join('*')}}
 
 #endif /* {{ name|upper }}_LAYER_H */
diff --git a/aidge_export_cpp/templates/kernel_forward/concat.jinja b/aidge_export_cpp/templates/kernel_forward/concat_forward.jinja
similarity index 86%
rename from aidge_export_cpp/templates/kernel_forward/concat.jinja
rename to aidge_export_cpp/templates/kernel_forward/concat_forward.jinja
index 46fe87e..a2f48e9 100644
--- a/aidge_export_cpp/templates/kernel_forward/concat.jinja
+++ b/aidge_export_cpp/templates/kernel_forward/concat_forward.jinja
@@ -1,6 +1,6 @@
 {% filter indent(width=4, first=False) %}
 {% include "./_mem_offset.jinja" %}
-float* {{ name|upper }}_INPUTS[] = {
+const float* {{ name|upper }}_INPUTS[] = {
     {%- for i in range(nb_in) -%}
         {{ in_name[i] }}{{ ", " if not loop.last else "" }}
     {%- endfor -%}
@@ -12,7 +12,7 @@ unsigned int {{ name|upper }}_SIZES[] = {
     {%- endfor -%}
 };
 
-aidge_concat<float, {{ nb_in }}> (
+concat_forward<float, {{ nb_in }}> (
     {{name|upper}}_AXIS,
     {{ name|upper }}_INPUTS,
     {{ name|upper }}_SIZES,
diff --git a/aidge_export_cpp/unit_tests/test_export.py b/aidge_export_cpp/unit_tests/test_export.py
index 20421ad..273c9b4 100644
--- a/aidge_export_cpp/unit_tests/test_export.py
+++ b/aidge_export_cpp/unit_tests/test_export.py
@@ -3,6 +3,8 @@ import aidge_core
 import aidge_backend_cpu
 import aidge_export_cpp
 import numpy as np
+import operator
+from functools import reduce
 
 import subprocess
 import re
@@ -31,19 +33,8 @@ def initFiller(model):
                 aidge_core.constant_filler(value, 0.01)
             else:
                 pass
-import math
-
-def normalize_random_tensor(randList):
-    for index in np.ndindex(randList.shape):
-        randList[index] = (math.floor(randList[index] * 21) - 10) / 10
-
-    return aidge_core.Tensor(randList.astype(np.float32))
-
-import numpy as np
-import operator
-from functools import reduce
 
-def np_init(shape, dtype=np.float32):
+def _np_init(shape, dtype=np.float32):
     """
     Generates a NumPy array with the given shape, filled with random values between -1 and 1
     with a step of 0.1.
@@ -56,71 +47,18 @@ def np_init(shape, dtype=np.float32):
     data = (np.random.randint(0, 21, size=total_elements) - 10) / 10.0
     return data.reshape(shape).astype(dtype)
 
-def unit_test_export(graph_view, op_name, in_dims):
-    # Initialize parameters (weights and biases)
-
-    graph_view.compile("cpu", aidge_core.dtype.float32, dims=in_dims)
-
-    for node in graph_view.get_nodes():
-        if node.type() == "Producer":
-            prod_op = node.get_operator()
-            value = prod_op.get_output(0)
-
-            # rand_tensor = aidge_core.Tensor(np_init(value.dims()))
-            # rand_tensor.set_backend(value.backend())
-            # value = rand_tensor
-
-            print(value)
-
-            aidge_core.constant_filler(value, 0.01)
-
-
-    scheduler = aidge_core.SequentialScheduler(graph_view)
-
-    in_tensor = [aidge_core.Tensor(np_init(in_dim)) for in_dim in in_dims]
-
-    scheduler.forward(data=in_tensor)
-
-    export_folder = op_name + "_temp_test"
-    # Export the model in C++ standalone
-    aidge_core.export_utils.scheduler_export(
-            scheduler,
-            export_folder,
-            aidge_export_cpp.ExportLibCpp,
-            memory_manager=aidge_core.mem_info.generate_optimized_memory_info,
-            memory_manager_args={"stats_folder": f"{export_folder}/stats", "wrapping": False }
-    )
-    aidge_core.export_utils.generate_main_compare_cpp(export_folder, graph_view)
-    print("COMPILATION")
-
-    try:
-        for std_line in run_command(["make"], cwd=export_folder):
-            print(std_line, end="")
-    except subprocess.CalledProcessError as e:
-        print(f"An error occurred: {e}\nFailed to generate export.")
-        raise SystemExit(1)
-    print("RUN EXPORT")
-    pattern = r"Number of equal outputs: (\d+) / (\d+)"
-    comparison_matched = False
-    result = False
-    try:
-        for std_line in run_command(["./bin/run_export"], cwd=export_folder):
-            print(std_line, end="")
-            matches = re.findall(pattern, std_line)
-            if matches:
-                if comparison_matched:
-                    raise RuntimeError("Two comparison matched found!")
-                else:
-                    expected, infered = map(int, matches[0])
-                    result = (expected == infered)
-                comparison_matched = True
-    except subprocess.CalledProcessError as e:
-        print(f"An error occurred: {e}\nFailed to run export for comparison.")
-        raise SystemExit(1)
-    if not comparison_matched:
-        raise RuntimeError("No comparison matched found!")
+def _np_init_ones(shape, default_value=0.01, dtype=np.float32):
+    """
+    Generates a NumPy array with the given shape, filled with random values between -1 and 1
+    with a step of 0.1.
 
-    return result
+    :param shape: Tuple of dimensions for the array
+    :param dtype: Data type of the output array (default: np.float32)
+    :return: A NumPy array with the given shape and dtype
+    """
+    total_elements = reduce(operator.mul, shape, 1)
+    data = np.ones(total_elements) * default_value
+    return data.reshape(shape).astype(dtype)
 
 
 class test_operator_export(unittest.TestCase):
@@ -133,7 +71,7 @@ class test_operator_export(unittest.TestCase):
     def tearDown(self):
         pass
 
-    def unit_test_export(self, graph_view, op_name, in_dims):
+    def unit_test_export(self, graph_view, op_name, in_dims, random_inputs=True, random_weights=True, default_value=0.01):
         """
         TODO:
         * Handle multiple dataformat
@@ -146,9 +84,27 @@ class test_operator_export(unittest.TestCase):
         4- Retrieve standard output and using regex to now if the results are the same
         """
         graph_view.compile("cpu", aidge_core.dtype.float32, dims=in_dims)
+
+        for node in graph_view.get_nodes():
+            if node.type() == "Producer":
+                prod_op = node.get_operator()
+                value = prod_op.get_output(0)
+
+                if (random_weights):
+                    tensor = aidge_core.Tensor(_np_init(value.dims()))
+
+                    node.get_operator().set_output(0, tensor)
+                else:
+                    aidge_core.constant_filler(value, default_value)
+
+
         scheduler = aidge_core.SequentialScheduler(graph_view)
 
-        in_tensor = [aidge_core.Tensor(np.random.random(in_dim).astype(np.float32)) for in_dim in in_dims]
+        if (random_inputs):
+            in_tensor = [aidge_core.Tensor(_np_init(in_dim)) for in_dim in in_dims]
+        else:
+            in_tensor = [aidge_core.Tensor(_np_init_ones(in_dim, default_value)) for in_dim in in_dims]
+
         scheduler.forward(data=in_tensor)
 
         # Note the convention ``<op_name>_test`` is useful for gitignore to avoid pushing generated export by accident.
@@ -208,7 +164,7 @@ class test_operator_export(unittest.TestCase):
             aidge_core.Softmax(axis=1, name="sf0")
         ])
 
-        self.assertTrue(unit_test_export(model, "Softmax", [[1, 10]]))
+        self.unit_test_export(model, "Softmax", [[1, 10]])
 
     @unittest.skip("Currently this test is failing")
     def test_export_FC_image_in(self):
@@ -226,7 +182,7 @@ class test_operator_export(unittest.TestCase):
             aidge_core.ReLU(name="relu0")
         ])
 
-        self.assertTrue(unit_test_export(model, "ReLU", [[1, 10]]))
+        self.unit_test_export(model, "ReLU", [[1, 10]])
 
     def test_export_add(self):
         print("Add")
@@ -235,7 +191,7 @@ class test_operator_export(unittest.TestCase):
             aidge_core.Add(name="add")
         ])
 
-        self.assertTrue(unit_test_export(model, "Add", [[1, 5, 5]]))
+        self.unit_test_export(model, "Add", [[1, 5, 5]])
 
     def test_export_sub(self):
         print("Sub")
@@ -244,7 +200,7 @@ class test_operator_export(unittest.TestCase):
             aidge_core.Sub(name="sub")
         ])
 
-        self.assertTrue(unit_test_export(model, "Sub", [[1, 5, 5]]))
+        self.unit_test_export(model, "Sub", [[1, 5, 5]])
 
     def test_export_mul(self):
         print("Mul")
@@ -253,7 +209,16 @@ class test_operator_export(unittest.TestCase):
             aidge_core.Mul(name="mul")
         ])
 
-        self.assertTrue(unit_test_export(model, "Mul", [[1, 5, 5]]))
+        self.unit_test_export(model, "Mul", [[1, 5, 5]])
+
+    def test_export_concat(self):
+        print("Concat")
+        model = aidge_core.sequential([
+            aidge_core.Producer([1, 5, 5], name="producer"),
+            aidge_core.Concat(nb_inputs=2, axis=1, name="concat")
+        ])
+
+        self.unit_test_export(model, "Concat", [[1, 5, 5]])
 
     def test_export_conv2D(self):
         print("Conv2D")
@@ -261,7 +226,7 @@ class test_operator_export(unittest.TestCase):
             aidge_core.Conv2D(in_channels=3, out_channels=3, kernel_dims=(3, 3), name="conv")
         ])
 
-        self.assertTrue(unit_test_export(model, "Conv2D", [[1, 3, 12, 12]]))
+        self.unit_test_export(model, "Conv2D", [[1, 3, 12, 12]], False, False)
 
     def test_export_max_pooling(self):
         print("MaxPooling2D")
@@ -269,7 +234,7 @@ class test_operator_export(unittest.TestCase):
             aidge_core.MaxPooling2D(kernel_dims=(3, 3), name="max_pool")
         ])
 
-        self.assertTrue(unit_test_export(model, "MaxPooling2D", [[1, 2, 12, 12]]))
+        self.unit_test_export(model, "MaxPooling2D", [[1, 2, 12, 12]], False, False)
 
     def test_export_avg_pooling(self):
         print("AvgPooling2D")
@@ -277,7 +242,7 @@ class test_operator_export(unittest.TestCase):
             aidge_core.AvgPooling2D(kernel_dims=(3, 3), name="avg_pool")
         ])
 
-        self.assertTrue(unit_test_export(model, "AvgPooling2D", [[1, 2, 12, 12]]))
+        self.unit_test_export(model, "AvgPooling2D", [[1, 2, 12, 12]], False, False)
 
     def test_export_pad2D(self):
         print("Pad2D")
@@ -285,7 +250,7 @@ class test_operator_export(unittest.TestCase):
             aidge_core.Pad2D((1, 1, 1, 1), name="pad2d")
         ])
 
-        self.assertTrue(unit_test_export(model, "Pad2D", [[1, 3, 10, 10]]))
+        self.unit_test_export(model, "Pad2D", [[1, 1, 10, 10]])
 
     def test_export_batchnorm2D(self):
         print("BatchNormalization2D")
@@ -293,7 +258,8 @@ class test_operator_export(unittest.TestCase):
             aidge_core.BatchNorm2D(nb_features=10, epsilon=2e-5, name="bn")
         ])
 
-        self.assertTrue(unit_test_export(model, "BatchNorm2D", [[1, 10, 5, 5]]))
+        self.unit_test_export(model, "BatchNorm2D", [[1, 1, 5, 5]], False, False)
+
 
     def test_export_cpp(self):
         print("Export test to do")
-- 
GitLab


From 90f53c3d914d4e814a963796053b097201446412 Mon Sep 17 00:00:00 2001
From: Gallasko <gallasko@gmail.com>
Date: Thu, 3 Apr 2025 16:32:19 +0200
Subject: [PATCH 08/14] fix: Only allow possitive value of variances in
 batchnorm

---
 aidge_export_cpp/kernels/batchnorm.hpp | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/aidge_export_cpp/kernels/batchnorm.hpp b/aidge_export_cpp/kernels/batchnorm.hpp
index 0260d42..01104f9 100644
--- a/aidge_export_cpp/kernels/batchnorm.hpp
+++ b/aidge_export_cpp/kernels/batchnorm.hpp
@@ -26,7 +26,14 @@ void batchnorm_forward (
     const Rescaling_T& __restrict rescaling)
 {
     for (unsigned int output = 0; output < NB_OUTPUTS; ++output) {
-        const Output_T var = sqrt(variances[output] + epsilon);
+        // If the variance is 0, we need to avoid division by 0
+        const Output_T var = epsilon;
+
+        // If the variance is negative, we need to set it to 0 to avoid a sqrt of a negative number
+        if (variances[output] > 0.0)
+        {
+            var = sqrt(variances[output] + epsilon);
+        }
 
         for (int oy = 0; oy < OUTPUTS_HEIGHT; ++oy) {
             for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) {
-- 
GitLab


From f4815323169867cbe44bdcd29ca2369260eff9bc Mon Sep 17 00:00:00 2001
From: Gallasko <gallasko@gmail.com>
Date: Thu, 3 Apr 2025 16:47:14 +0200
Subject: [PATCH 09/14] feat: Added AvgPooling2D export op

---
 aidge_export_cpp/kernels/pooling.hpp |  2 +-
 aidge_export_cpp/operators.py        | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/aidge_export_cpp/kernels/pooling.hpp b/aidge_export_cpp/kernels/pooling.hpp
index 667e1a9..a86fd41 100644
--- a/aidge_export_cpp/kernels/pooling.hpp
+++ b/aidge_export_cpp/kernels/pooling.hpp
@@ -86,7 +86,7 @@ void pooling_forward(
                     outputs[oOffset + output] = maxVal;
                 }
                 else if (POOLING_TYPE == Average) {
-                    int32_t sum = 0;
+                    Output_T sum = 0;
 
                     for (int sy = 0; sy < POOL_HEIGHT; ++sy) {
                         if ((PADDING_Y != 0
diff --git a/aidge_export_cpp/operators.py b/aidge_export_cpp/operators.py
index c89236b..5abb137 100644
--- a/aidge_export_cpp/operators.py
+++ b/aidge_export_cpp/operators.py
@@ -248,6 +248,20 @@ class MaxPoolCPP(ExportNodeCpp):
 
         _setup_pooling(self)
 
+@ExportLibCpp.register("AvgPooling2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
+class AvgPoolCPP(ExportNodeCpp):
+    def __init__(self, node, mem_info):
+        super().__init__(node, mem_info)
+
+        # No padding with MaxPooling
+        # Use PaddedMaxPooling to add padding attribute
+        self.attributes["padding"] = [0, 0]
+        self.attributes["pool_type"] = "Average"
+        self.attributes["activation"] = "Linear"
+        self.attributes["rescaling"] = "NoScaling"
+
+        _setup_pooling(self)
+
 @ExportLibCpp.register_metaop("PaddedMaxPooling2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
 class PaddedMaxPoolCPP(ExportNodeCpp):
     def __init__(self, node, mem_info):
-- 
GitLab


From ff980a954ed735eb4b3d5772df70182d83e8de29 Mon Sep 17 00:00:00 2001
From: Gallasko <gallasko@gmail.com>
Date: Thu, 3 Apr 2025 16:57:47 +0200
Subject: [PATCH 10/14] fix: Better pad op implementation

---
 aidge_export_cpp/kernels/pad.hpp              |  44 +++---
 aidge_export_cpp/operators.py                 |  10 +-
 .../templates/configuration/pad_config.jinja  |   7 +-
 .../kernel_forward/pad_forward.jinja          |  21 +--
 aidge_export_cpp/unit_tests/test_export.py    | 132 +++++++++++++++++-
 5 files changed, 183 insertions(+), 31 deletions(-)

diff --git a/aidge_export_cpp/kernels/pad.hpp b/aidge_export_cpp/kernels/pad.hpp
index 158c935..4e83257 100644
--- a/aidge_export_cpp/kernels/pad.hpp
+++ b/aidge_export_cpp/kernels/pad.hpp
@@ -6,32 +6,44 @@
 
 // Todo add border value and border type (Reflect, Constant, Wrap...) and add the two missing pad value (bottom and right)
 
-template<int NB_CHANNELS,
+template<int NB_BATCHES, int NB_CHANNELS,
          int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
          int NB_OUTPUTS,
          int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
-         int PADDING_Y, int PADDING_X,
+         int PADDING_TOP,
+         int PADDING_LEFT,
+         int PADDING_BOTTOM,
+         int PADDING_RIGHT,
          typename Input_T, typename Output_T>
 __attribute__((always_inline)) inline
-void convolution_forward(
+void pad_forward(
+    double borderValue,
     const Input_T* __restrict inputs,
     Output_T* __restrict outputs
     )
 {
-    const unsigned int oySize = CHANNELS_HEIGHT + PADDING_Y + PADDING_Y;
-    const unsigned int oxSize = CHANNELS_WIDTH + PADDING_X + PADDING_X;
-
-    for (unsigned int oy = 0; oy < oySize; ++oy) {
-        for (unsigned int ox = 0; ox < oxSize; ++ox) {
-            if (oy < PADDING_Y or oy >= CHANNELS_HEIGHT + PADDING_Y or ox < PADDING_X or ox >= CHANNELS_WIDTH + PADDING_X)
-            {
-                outputs[oy * oySize + ox] = 0.0f;
-            }
-            else
-            {
-                outputs[oy * oySize + ox] = inputs[(oy - PADDING_Y) * CHANNELS_HEIGHT + (ox - PADDING_X)];
-            }
+    const unsigned int oySize = CHANNELS_HEIGHT + PADDING_TOP + PADDING_BOTTOM;
+    const unsigned int oxSize = CHANNELS_WIDTH + PADDING_LEFT + PADDING_RIGHT;
+
+    for (unsigned int batch = 0; batch < NB_BATCHES; ++batch) {
+        for (unsigned int ch = 0; ch < NB_CHANNELS; ++ch) {
+            const unsigned int preIndex = batch * NB_CHANNELS * CHANNELS_HEIGHT * CHANNELS_WIDTH + ch * CHANNELS_HEIGHT * CHANNELS_WIDTH;
+
+            for (unsigned int oy = 0; oy < oySize; ++oy) {
+                for (unsigned int ox = 0; ox < oxSize; ++ox) {
+                    const unsigned int outIndex = batch * NB_CHANNELS * oySize * oxSize + ch * oySize * oxSize + oy * oxSize + ox;
 
+                    outputs[outIndex] = borderValue;
+
+                    const unsigned int inputX = ox - PADDING_LEFT;
+                    const unsigned int inputY = oy - PADDING_TOP;
+
+                    if (inputY >= 0 and inputY < CHANNELS_HEIGHT and inputX >= 0 and inputX < CHANNELS_WIDTH)
+                    {
+                        outputs[outIndex] = inputs[preIndex + inputY * CHANNELS_WIDTH + inputX];
+                    }
+                }
+            }
         }
     }
 }
diff --git a/aidge_export_cpp/operators.py b/aidge_export_cpp/operators.py
index 5abb137..a6ad95d 100644
--- a/aidge_export_cpp/operators.py
+++ b/aidge_export_cpp/operators.py
@@ -73,10 +73,16 @@ class ProducerCPP(ExportNode):
 
 # TODO : find a way to remove this dummy exportnode
 @ExportLibCpp.register("Pad2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)))
-class Pad_ARMCortexM(ExportNodeCpp):
+class PadCPP(ExportNodeCpp):
     def __init__(self, node, mem_info):
         super().__init__(node, mem_info)
         self.attributes["padding"] = node.get_operator().attr.begin_end_borders
+        self.attributes["border_type"] = node.get_operator().attr.border_type
+        self.attributes["border_value"] = node.get_operator().attr.border_value
+
+        assert self.attributes["border_type"] == aidge_core.pad_border_type.Constant, (
+            f"export Pad2d: border_type == {node.get_operator().attr.border_type} not implemented"
+        )
 
         self.config_template = str(
             ROOT / "templates" / "configuration" / "pad_config.jinja")
@@ -87,8 +93,6 @@ class Pad_ARMCortexM(ExportNodeCpp):
             str(ROOT / "kernels" / "pad.hpp")
         ]
 
-
-
 @ExportLibCpp.register("ReLU", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
 class ReLUCPP(ExportNodeCpp):
     def __init__(self, node, mem_info):
diff --git a/aidge_export_cpp/templates/configuration/pad_config.jinja b/aidge_export_cpp/templates/configuration/pad_config.jinja
index 527e5c0..8b21577 100644
--- a/aidge_export_cpp/templates/configuration/pad_config.jinja
+++ b/aidge_export_cpp/templates/configuration/pad_config.jinja
@@ -4,7 +4,10 @@
 {# For layer configuration -#}
 {% include "./_def_io.jinja" %}
 {% include "./_meminfo.jinja" %}
-#define {{ name|upper }}_PADDING_Y {{ padding[1] }}
-#define {{ name|upper }}_PADDING_X {{ padding[0] }}
+#define {{ name|upper }}_PADDING_BOTTOM {{ padding[2] }}
+#define {{ name|upper }}_PADDING_RIGHT {{ padding[3] }}
+#define {{ name|upper }}_PADDING_TOP {{ padding[0] }}
+#define {{ name|upper }}_PADDING_LEFT {{ padding[1] }}
+#define {{ name|upper }}_BORDER_VALUE {{ border_value }}
 
 #endif /* {{ name|upper }}_LAYER_H */
diff --git a/aidge_export_cpp/templates/kernel_forward/pad_forward.jinja b/aidge_export_cpp/templates/kernel_forward/pad_forward.jinja
index 04976e9..7214187 100644
--- a/aidge_export_cpp/templates/kernel_forward/pad_forward.jinja
+++ b/aidge_export_cpp/templates/kernel_forward/pad_forward.jinja
@@ -1,13 +1,16 @@
 {% filter indent(width=4, first=False) %}
 {% include "./_mem_offset.jinja" %}
-convolution_forward<{{ in_name[0]|upper }}_NB_CHANNELS,
-                    {{ in_name[0]|upper }}_IN_HEIGHT,
-                    {{ in_name[0]|upper }}_IN_WIDTH,
-                    {{ out_name[0]|upper }}_NB_OUTPUTS,
-                    {{ out_name[0]|upper }}_OUT_HEIGHT,
-                    {{ out_name[0]|upper }}_OUT_WIDTH,
-                    {{name|upper}}_PADDING_Y,
-                    {{name|upper}}_PADDING_X>
-                    ({{in_name[0]}}, {{out_name[0]}});
+pad_forward<{{ in_name[0]|upper }}_IN_BATCH,
+            {{ in_name[0]|upper }}_NB_CHANNELS,
+            {{ in_name[0]|upper }}_IN_HEIGHT,
+            {{ in_name[0]|upper }}_IN_WIDTH,
+            {{ out_name[0]|upper }}_NB_OUTPUTS,
+            {{ out_name[0]|upper }}_OUT_HEIGHT,
+            {{ out_name[0]|upper }}_OUT_WIDTH,
+            {{name|upper}}_PADDING_TOP,
+            {{name|upper}}_PADDING_LEFT,
+            {{name|upper}}_PADDING_BOTTOM,
+            {{name|upper}}_PADDING_RIGHT>
+            ({{name|upper}}_BORDER_VALUE, {{in_name[0]}}, {{out_name[0]}});
 {% include "./_save_outputs.jinja" %}
 {% endfilter %}
diff --git a/aidge_export_cpp/unit_tests/test_export.py b/aidge_export_cpp/unit_tests/test_export.py
index 273c9b4..9ee6443 100644
--- a/aidge_export_cpp/unit_tests/test_export.py
+++ b/aidge_export_cpp/unit_tests/test_export.py
@@ -211,6 +211,55 @@ class test_operator_export(unittest.TestCase):
 
         self.unit_test_export(model, "Mul", [[1, 5, 5]])
 
+    def test_export_mul_larger(self):
+        print("MulLarger")
+        model = aidge_core.sequential([
+            aidge_core.Producer([1, 7, 5], name="producer"),
+            aidge_core.Mul(name="mul")
+        ])
+
+        self.unit_test_export(model, "Mul", [[1, 7, 5]])
+
+    def test_export_mul_higher(self):
+        print("MulHigher")
+        model = aidge_core.sequential([
+            aidge_core.Producer([1, 5, 7], name="producer"),
+            aidge_core.Mul(name="mul")
+        ])
+
+        self.unit_test_export(model, "Mul", [[1, 5, 7]])
+
+    # "Broadcast not supported yet in export operator"
+    @unittest.expectedFailure
+    def test_export_mul_simple_broadcast(self):
+        print("MulSimpleBroadcast")
+        model = aidge_core.sequential([
+            aidge_core.Producer([1, 1, 5], name="producer"),
+            aidge_core.Mul(name="mul")
+        ])
+
+        self.unit_test_export(model, "MulSimpleBroadcast", [[1, 7, 5]])
+
+    # "Broadcast not supported yet in export operator"
+    @unittest.expectedFailure
+    def test_export_mul_double_broadcast(self):
+        print("MulDoubleBroadcast")
+        model = aidge_core.sequential([
+            aidge_core.Producer([1, 1, 7], name="producer"),
+            aidge_core.Mul(name="mul")
+        ])
+
+        self.unit_test_export(model, "MulDoubleBroadcast", [[1, 5, 1]])
+
+    def test_export_mul_batch(self):
+        print("MulBatch")
+        model = aidge_core.sequential([
+            aidge_core.Producer([3, 5, 7], name="producer"),
+            aidge_core.Mul(name="mul")
+        ])
+
+        self.unit_test_export(model, "MulBatch", [[3, 5, 7]])
+
     def test_export_concat(self):
         print("Concat")
         model = aidge_core.sequential([
@@ -250,7 +299,56 @@ class test_operator_export(unittest.TestCase):
             aidge_core.Pad2D((1, 1, 1, 1), name="pad2d")
         ])
 
-        self.unit_test_export(model, "Pad2D", [[1, 1, 10, 10]])
+        self.unit_test_export(model, "Pad2D", [[1, 1, 11, 11]])
+
+    def test_export_pad2D_larger(self):
+        print("Pad2DLarger")
+        model = aidge_core.sequential([
+            aidge_core.Pad2D((1, 3, 1, 3), name="pad2d")
+        ])
+
+        self.unit_test_export(model, "Pad2DLarger", [[1, 1, 7, 11]])
+
+    def test_export_pad2D_higher(self):
+        print("Pad2DHigher")
+        model = aidge_core.sequential([
+            aidge_core.Pad2D((3, 1, 3, 1), name="pad2d")
+        ])
+
+        self.unit_test_export(model, "Pad2DHigher", [[1, 1, 11, 7]])
+
+    def test_export_pad2D_mismatch(self):
+        print("Pad2DMismatch")
+        model = aidge_core.sequential([
+            aidge_core.Pad2D((1, 3, 5, 7), name="pad2d")
+        ])
+
+        self.unit_test_export(model, "Pad2DMismatch", [[3, 5, 11, 7]])
+
+    def test_export_pad2D_denser(self):
+        print("Pad2DDenser")
+        model = aidge_core.sequential([
+            aidge_core.Pad2D((3, 3, 3, 3), name="pad2d")
+        ])
+
+        self.unit_test_export(model, "Pad2DDenser", [[1, 5, 7, 11]])
+
+    def test_export_pad2D_with_bigger_batch_size(self):
+        print("Pad2DBiggerBatchSize")
+        model = aidge_core.sequential([
+            aidge_core.Pad2D((1, 1, 1, 1), name="pad2d")
+        ])
+
+        self.unit_test_export(model, "Pad2DBiggerBatchSize", [[3, 5, 7, 11]])
+
+    @unittest.expectedFailure
+    def test_export_pad2D_not_constant(self):
+        print("Pad2DNotConstant")
+        model = aidge_core.sequential([
+            aidge_core.Pad2D((3, 3, 3, 3), border_type=aidge_core.pad_border_type.Wrap, name="pad2d")
+        ])
+
+        self.unit_test_export(model, "Pad2DNotConstant", [[1, 5, 7, 11]])
 
     def test_export_batchnorm2D(self):
         print("BatchNormalization2D")
@@ -260,6 +358,38 @@ class test_operator_export(unittest.TestCase):
 
         self.unit_test_export(model, "BatchNorm2D", [[1, 1, 5, 5]], False, False)
 
+    def test_export_batchnorm2D_Larger(self):
+        print("BatchNormalization2DLarger")
+        model = aidge_core.sequential([
+            aidge_core.BatchNorm2D(nb_features=10, epsilon=2e-5, name="bn")
+        ])
+
+        self.unit_test_export(model, "BatchNorm2DLarger", [[1, 1, 5, 7]], False, False)
+
+    def test_export_batchnorm2D_Higher(self):
+        print("BatchNormalization2DHigher")
+        model = aidge_core.sequential([
+            aidge_core.BatchNorm2D(nb_features=10, epsilon=2e-5, name="bn")
+        ])
+
+        self.unit_test_export(model, "BatchNorm2DHigher", [[1, 1, 7, 5]], False, False)
+
+    def test_export_batchnorm2D_Denser(self):
+        print("BatchNormalization2DDenser")
+        model = aidge_core.sequential([
+            aidge_core.BatchNorm2D(nb_features=10, epsilon=2e-5, name="bn")
+        ])
+
+        self.unit_test_export(model, "BatchNorm2DDenser", [[1, 3, 5, 7]], False, False)
+
+    def test_export_batchnorm2D_with_bigger_batch_size(self):
+        print("BatchNormalization2DBiggerBatchSize")
+        model = aidge_core.sequential([
+            aidge_core.BatchNorm2D(nb_features=10, epsilon=2e-5, name="bn")
+        ])
+
+        self.unit_test_export(model, "BatchNormalization2DBiggerBatchSize", [[4, 3, 5, 7]], False, False)
+
 
     def test_export_cpp(self):
         print("Export test to do")
-- 
GitLab


From d5659f7d11922a427b1ad4117f7702059cea9177 Mon Sep 17 00:00:00 2001
From: Gallasko <gallasko@gmail.com>
Date: Thu, 3 Apr 2025 18:30:26 +0200
Subject: [PATCH 11/14] feat: Added more test cases to the test suite

---
 aidge_export_cpp/kernels/batchnorm.hpp     |   4 +-
 aidge_export_cpp/unit_tests/test_export.py | 112 ++++++++++++++++++++-
 2 files changed, 112 insertions(+), 4 deletions(-)

diff --git a/aidge_export_cpp/kernels/batchnorm.hpp b/aidge_export_cpp/kernels/batchnorm.hpp
index 01104f9..092ed4d 100644
--- a/aidge_export_cpp/kernels/batchnorm.hpp
+++ b/aidge_export_cpp/kernels/batchnorm.hpp
@@ -27,7 +27,7 @@ void batchnorm_forward (
 {
     for (unsigned int output = 0; output < NB_OUTPUTS; ++output) {
         // If the variance is 0, we need to avoid division by 0
-        const Output_T var = epsilon;
+        Output_T var = epsilon;
 
         // If the variance is negative, we need to set it to 0 to avoid a sqrt of a negative number
         if (variances[output] > 0.0)
@@ -37,7 +37,7 @@ void batchnorm_forward (
 
         for (int oy = 0; oy < OUTPUTS_HEIGHT; ++oy) {
             for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) {
-                const int outputOffset = OUTPUTS_HEIGHT * oy + ox;
+                const int outputOffset = OUTPUTS_WIDTH * oy + ox;
 
                 const Output_T normalized = (inputs[outputOffset + output] - means[output]) / var;
                 const Output_T sAs = scales[output] * normalized + biases[output];
diff --git a/aidge_export_cpp/unit_tests/test_export.py b/aidge_export_cpp/unit_tests/test_export.py
index 9ee6443..d8e7814 100644
--- a/aidge_export_cpp/unit_tests/test_export.py
+++ b/aidge_export_cpp/unit_tests/test_export.py
@@ -8,6 +8,7 @@ from functools import reduce
 
 import subprocess
 import re
+import shutil
 from aidge_core.utils import run_command
 
 def initFiller(model):
@@ -110,6 +111,8 @@ class test_operator_export(unittest.TestCase):
         # Note the convention ``<op_name>_test`` is useful for gitignore to avoid pushing generated export by accident.
         export_folder = op_name + "_test"
 
+        shutil.rmtree(export_folder, ignore_errors=True)
+
         # Export the model in C++ standalone
         aidge_core.export_utils.scheduler_export(
                 scheduler,
@@ -193,6 +196,46 @@ class test_operator_export(unittest.TestCase):
 
         self.unit_test_export(model, "Add", [[1, 5, 5]])
 
+    def test_export_add_larger(self):
+        print("AddLarger")
+        model = aidge_core.sequential([
+            aidge_core.Producer([1, 7, 5], name="producer"),
+            aidge_core.Add(name="add")
+        ])
+
+        self.unit_test_export(model, "Add", [[1, 7, 5]])
+
+    def test_export_add_higher(self):
+        print("AddHigher")
+        model = aidge_core.sequential([
+            aidge_core.Producer([1, 5, 7], name="producer"),
+            aidge_core.Add(name="add")
+        ])
+
+        self.unit_test_export(model, "Add", [[1, 5, 7]])
+
+    # "Broadcast not supported yet in export operator"
+    @unittest.expectedFailure
+    def test_export_add_simple_broadcast(self):
+        print("AddSimpleBroadcast")
+        model = aidge_core.sequential([
+            aidge_core.Producer([1, 1, 5], name="producer"),
+            aidge_core.Add(name="add")
+        ])
+
+        self.unit_test_export(model, "AddSimpleBroadcast", [[1, 7, 5]])
+
+    # "Broadcast not supported yet in export operator"
+    @unittest.expectedFailure
+    def test_export_add_double_broadcast(self):
+        print("AddDoubleBroadcast")
+        model = aidge_core.sequential([
+            aidge_core.Producer([1, 1, 7], name="producer"),
+            aidge_core.Add(name="add")
+        ])
+
+        self.unit_test_export(model, "AddDoubleBroadcast", [[1, 5, 1]])
+
     def test_export_sub(self):
         print("Sub")
         model = aidge_core.sequential([
@@ -202,6 +245,46 @@ class test_operator_export(unittest.TestCase):
 
         self.unit_test_export(model, "Sub", [[1, 5, 5]])
 
+    def test_export_sub_larger(self):
+        print("SubLarger")
+        model = aidge_core.sequential([
+            aidge_core.Producer([1, 7, 5], name="producer"),
+            aidge_core.Sub(name="sub")
+        ])
+
+        self.unit_test_export(model, "Sub", [[1, 7, 5]])
+
+    def test_export_sub_higher(self):
+        print("SubHigher")
+        model = aidge_core.sequential([
+            aidge_core.Producer([1, 5, 7], name="producer"),
+            aidge_core.Sub(name="sub")
+        ])
+
+        self.unit_test_export(model, "Sub", [[1, 5, 7]])
+
+    # "Broadcast not supported yet in export operator"
+    @unittest.expectedFailure
+    def test_export_sub_simple_broadcast(self):
+        print("SubSimpleBroadcast")
+        model = aidge_core.sequential([
+            aidge_core.Producer([1, 1, 5], name="producer"),
+            aidge_core.Sub(name="sub")
+        ])
+
+        self.unit_test_export(model, "SubSimpleBroadcast", [[1, 7, 5]])
+
+    # "Broadcast not supported yet in export operator"
+    @unittest.expectedFailure
+    def test_export_sub_double_broadcast(self):
+        print("SubDoubleBroadcast")
+        model = aidge_core.sequential([
+            aidge_core.Producer([1, 1, 7], name="producer"),
+            aidge_core.Sub(name="sub")
+        ])
+
+        self.unit_test_export(model, "SubDoubleBroadcast", [[1, 5, 1]])
+
     def test_export_mul(self):
         print("Mul")
         model = aidge_core.sequential([
@@ -263,11 +346,11 @@ class test_operator_export(unittest.TestCase):
     def test_export_concat(self):
         print("Concat")
         model = aidge_core.sequential([
-            aidge_core.Producer([1, 5, 5], name="producer"),
+            aidge_core.Producer([1, 5, 7], name="producer"),
             aidge_core.Concat(nb_inputs=2, axis=1, name="concat")
         ])
 
-        self.unit_test_export(model, "Concat", [[1, 5, 5]])
+        self.unit_test_export(model, "Concat", [[1, 5, 7]])
 
     def test_export_conv2D(self):
         print("Conv2D")
@@ -391,6 +474,31 @@ class test_operator_export(unittest.TestCase):
         self.unit_test_export(model, "BatchNormalization2DBiggerBatchSize", [[4, 3, 5, 7]], False, False)
 
 
+    def test_export_batchnorm2D_Larger(self):
+        print("BatchNormalization2DLarger")
+        model = aidge_core.sequential([
+            aidge_core.BatchNorm2D(nb_features=10, epsilon=2e-5, name="bn")
+        ])
+
+        self.unit_test_export(model, "BatchNorm2DLarger", [[1, 1, 5, 7]], False, False)
+
+    def test_export_batchnorm2D_Higher(self):
+        print("BatchNormalization2DHigher")
+        model = aidge_core.sequential([
+            aidge_core.BatchNorm2D(nb_features=10, epsilon=2e-5, name="bn")
+        ])
+
+        self.unit_test_export(model, "BatchNorm2DHigher", [[1, 1, 7, 5]], False, False)
+
+    def test_export_batchnorm2D_Denser(self):
+        print("BatchNormalization2DDenser")
+        model = aidge_core.sequential([
+            aidge_core.BatchNorm2D(nb_features=10, epsilon=2e-5, name="bn")
+        ])
+
+        self.unit_test_export(model, "BatchNorm2DDenser", [[1, 3, 5, 7]], False, False)
+
+
     def test_export_cpp(self):
         print("Export test to do")
 
-- 
GitLab


From 203fee0dd7dc352f196b8e6668b69e76d2b3320a Mon Sep 17 00:00:00 2001
From: Gallasko <gallasko@gmail.com>
Date: Fri, 4 Apr 2025 15:59:30 +0200
Subject: [PATCH 12/14] fix: Batch support for batchnorm

---
 aidge_export_cpp/kernels/batchnorm.hpp        | 32 ++++++++-----------
 .../templates/configuration/_def_io.jinja     |  2 ++
 .../kernel_forward/batchnorm_forward.jinja    |  3 +-
 3 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/aidge_export_cpp/kernels/batchnorm.hpp b/aidge_export_cpp/kernels/batchnorm.hpp
index 092ed4d..f05a047 100644
--- a/aidge_export_cpp/kernels/batchnorm.hpp
+++ b/aidge_export_cpp/kernels/batchnorm.hpp
@@ -8,7 +8,7 @@
 
 // WARNING: this kernel only works for 32-bits floating point values
 
-template<int NB_OUTPUTS,
+template<int NB_BATCHES, int NB_OUTPUTS,
          int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
          ActivationFunction_T ACTIVATION,
          typename Input_T, typename Output_T,
@@ -25,23 +25,19 @@ void batchnorm_forward (
     const double epsilon,
     const Rescaling_T& __restrict rescaling)
 {
-    for (unsigned int output = 0; output < NB_OUTPUTS; ++output) {
-        // If the variance is 0, we need to avoid division by 0
-        Output_T var = epsilon;
-
-        // If the variance is negative, we need to set it to 0 to avoid a sqrt of a negative number
-        if (variances[output] > 0.0)
-        {
-            var = sqrt(variances[output] + epsilon);
-        }
-
-        for (int oy = 0; oy < OUTPUTS_HEIGHT; ++oy) {
-            for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) {
-                const int outputOffset = OUTPUTS_WIDTH * oy + ox;
-
-                const Output_T normalized = (inputs[outputOffset + output] - means[output]) / var;
-                const Output_T sAs = scales[output] * normalized + biases[output];
-                outputs[outputOffset + output] = activation_forward_value<Output_T>(sAs, output, ACTIVATION, rescaling);
+    for (unsigned int batch = 0; batch < NB_BATCHES; ++batch) {
+        for (unsigned int output = 0; output < NB_OUTPUTS; ++output) {
+            // If the variance is 0, we need to avoid division by 0
+            Output_T var = sqrt(variances[output] > 0.0 ? variances[output] + epsilon : epsilon);
+
+            for (int oy = 0; oy < OUTPUTS_HEIGHT; ++oy) {
+                for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) {
+                    const int outputOffset = batch * OUTPUTS_WIDTH * OUTPUTS_HEIGHT * NB_OUTPUTS + output * OUTPUTS_WIDTH * OUTPUTS_HEIGHT + OUTPUTS_WIDTH * oy + ox;
+
+                    const Output_T normalized = (inputs[outputOffset] - means[output]) / var;
+                    const Output_T sAs = scales[output] * normalized + biases[output];
+                    outputs[outputOffset] = activation_forward_value<Output_T>(sAs, output, ACTIVATION, rescaling);
+                }
             }
         }
     }
diff --git a/aidge_export_cpp/templates/configuration/_def_io.jinja b/aidge_export_cpp/templates/configuration/_def_io.jinja
index 66756cf..f444547 100644
--- a/aidge_export_cpp/templates/configuration/_def_io.jinja
+++ b/aidge_export_cpp/templates/configuration/_def_io.jinja
@@ -4,6 +4,7 @@
 #define {{ in_name[inidx]|upper }}_NB_CHANNELS {{ in_chan[inidx] }}
 #define {{ in_name[inidx]|upper }}_IN_HEIGHT {{ in_height[inidx] }}
 #define {{ in_name[inidx]|upper }}_IN_WIDTH {{ in_width[inidx] }}
+#define {{ in_name[inidx]|upper }}_IN_BATCH {{ in_batch[inidx] }}
 {% endfor %}
 
 // OUTPUT CONF
@@ -11,4 +12,5 @@
 #define {{ out_name[outidx]|upper }}_NB_OUTPUTS {{ out_chan[outidx] }}
 #define {{ out_name[outidx]|upper }}_OUT_HEIGHT {{ out_height[outidx] }}
 #define {{ out_name[outidx]|upper }}_OUT_WIDTH {{ out_width[outidx] }}
+#define {{ out_name[outidx]|upper }}_OUT_BATCH {{ out_batch[outidx] }}
 {% endfor %}
diff --git a/aidge_export_cpp/templates/kernel_forward/batchnorm_forward.jinja b/aidge_export_cpp/templates/kernel_forward/batchnorm_forward.jinja
index 05e5154..03fd8e8 100644
--- a/aidge_export_cpp/templates/kernel_forward/batchnorm_forward.jinja
+++ b/aidge_export_cpp/templates/kernel_forward/batchnorm_forward.jinja
@@ -1,6 +1,7 @@
 {% filter indent(width=4, first=False) %}
 {% include "./_mem_offset.jinja" %}
-batchnorm_forward<{{ out_name[0]|upper }}_NB_OUTPUTS,
+batchnorm_forward<{{ out_name[0]|upper }}_OUT_BATCH,
+                  {{ out_name[0]|upper }}_NB_OUTPUTS,
                   {{ out_name[0]|upper }}_OUT_HEIGHT,
                   {{ out_name[0]|upper }}_OUT_WIDTH,
                   {{name|upper}}_ACTIVATION>
-- 
GitLab


From 7c3813bbfa2987fc711cfe5f73ed9dfe3325e98b Mon Sep 17 00:00:00 2001
From: Gallasko <gallasko@gmail.com>
Date: Fri, 4 Apr 2025 16:50:31 +0200
Subject: [PATCH 13/14] feat: Softmax works with any number of dimensions

---
 aidge_export_cpp/kernels/softmax.hpp          | 66 +++++++++----------
 aidge_export_cpp/operators.py                 | 24 +++++++
 .../configuration/softmax_config.jinja        |  4 +-
 .../kernel_forward/softmax_forward.jinja      | 12 ++--
 aidge_export_cpp/unit_tests/test_export.py    | 24 +++++++
 5 files changed, 87 insertions(+), 43 deletions(-)

diff --git a/aidge_export_cpp/kernels/softmax.hpp b/aidge_export_cpp/kernels/softmax.hpp
index 73d00da..f5472cf 100644
--- a/aidge_export_cpp/kernels/softmax.hpp
+++ b/aidge_export_cpp/kernels/softmax.hpp
@@ -6,50 +6,48 @@
 #include "kernels/macs.hpp"
 
 #include <type_traits>
-
 #include <cmath>
+#include <algorithm>
 
-template<int NB_CHANNELS,
-         int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
-         int NB_OUTPUTS,
-         int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
-         int AXIS,
+template<int AXIS_SIZE,
+         int AXIS_SIZE_POST,
+         int AXIS_SIZE_PRE,
          typename Input_T, typename Output_T>
 __attribute__((always_inline)) inline
 void softmax_forward (
     const Input_T* __restrict inputs,
     Output_T* __restrict outputs)
 {
-    Input_T maxValue = 0.0f;
-
-    for (int och = 0; och < NB_OUTPUTS; och++) {
-        maxValue = std::max(maxValue, inputs[och]);
-    }
-
-    Input_T sumExp = 0.0f;
-
-    if constexpr (std::is_same_v<Input_T, Output_T>) {
-        for (int och = 0; och < NB_OUTPUTS; och++) {
-            // This should be both more performant while keeping the same memory footprint but we can only use it if INPUT_T and OUTPUT_T types are the same !
-            outputs[och] = std::exp(inputs[och] - maxValue);
-            sumExp += outputs[och];
-        }
-
-        for (int och = 0; och < NB_OUTPUTS; och++) {
-            outputs[och] /= sumExp;
-        }
-    }
-    else
-    {
-        for (int och = 0; och < NB_OUTPUTS; och++) {
-            sumExp += std::exp(inputs[och] - maxValue);
-        }
-
-        for (int och = 0; och < NB_OUTPUTS; och++) {
-            outputs[och] = std::exp(inputs[och] - maxValue) / sumExp;
+    // Iterate over the "pre-axis" and "post-axis" slices.
+    // For each slice along the axis, compute the maximum value,
+    // the sum of exponentials, and then write the normalized softmax outputs.
+    for (int i = 0; i < AXIS_SIZE_PRE; ++i) {
+        for (int j = 0; j < AXIS_SIZE_POST; ++j) {
+            // Compute the base index for this slice.
+            const int baseIdx = i * AXIS_SIZE * AXIS_SIZE_POST + j;
+
+            // Find the maximum value along the axis.
+            Input_T maxVal = inputs[baseIdx];
+            for (int k = 1; k < AXIS_SIZE; ++k) {
+                const int idx = baseIdx + k * AXIS_SIZE_POST;
+                maxVal = std::max(maxVal, inputs[idx]);
+            }
+
+            // Compute the sum of the exponentials along the axis.
+            Input_T sumExp = 0;
+            for (int k = 0; k < AXIS_SIZE; ++k) {
+                const int idx = baseIdx + k * AXIS_SIZE_POST;
+                outputs[idx] = std::exp(inputs[idx] - maxVal);
+                sumExp += outputs[idx];
+            }
+
+            // Write the softmax values to the output.
+            for (int k = 0; k < AXIS_SIZE; ++k) {
+                const int idx = baseIdx + k * AXIS_SIZE_POST;
+                outputs[idx] /= sumExp;
+            }
         }
     }
 }
 
-
 #endif  // __AIDGE_EXPORT_CPP_KERNELS_SOFTMAX__
diff --git a/aidge_export_cpp/operators.py b/aidge_export_cpp/operators.py
index a6ad95d..7c22cdb 100644
--- a/aidge_export_cpp/operators.py
+++ b/aidge_export_cpp/operators.py
@@ -338,6 +338,30 @@ class SoftmaxCPP(ExportNodeCpp):
     def __init__(self, node, mem_info):
         super().__init__(node, mem_info)
         self.attributes["axis"] = node.get_operator().attr.axis
+
+        assert self.node.get_nb_inputs() == 1, (
+            f"export softmax: nb_inputs == {self.node.get_nb_inputs()} not implemented"
+        )
+
+        tensor = self.operator.get_input(0)
+        nbDims = len(tensor.dims())
+
+        assert self.attributes["axis"] < nbDims, (
+            f"export softmax: attribute axis == {node.get_operator().attr.axis} should be less than {nbDims}"
+        )
+
+        postAxisElems = 1
+        for i in range(self.attributes["axis"] + 1, nbDims):
+            postAxisElems *= tensor.dims()[i]
+
+        preAxisElems = 1
+        for i in range(self.attributes["axis"]):
+            preAxisElems *= tensor.dims()[i]
+
+        self.attributes["axis_size"] = tensor.dims()[self.attributes["axis"]]
+        self.attributes["axis_size_post"] = postAxisElems
+        self.attributes["axis_size_pre"] = preAxisElems
+
         self.config_template = str(
             ROOT / "templates" / "configuration" / "softmax_config.jinja")
         self.forward_template = str(
diff --git a/aidge_export_cpp/templates/configuration/softmax_config.jinja b/aidge_export_cpp/templates/configuration/softmax_config.jinja
index d8ec8af..e9661bc 100644
--- a/aidge_export_cpp/templates/configuration/softmax_config.jinja
+++ b/aidge_export_cpp/templates/configuration/softmax_config.jinja
@@ -7,6 +7,8 @@
 
 {#- Calculate sizes #}
 {%- set weights_size = out_chan[0] * in_chan[0] * in_height[0] * in_width[0] %}
-#define {{ name|upper }}_AXIS {{ axis }}
+#define {{ name|upper }}_AXIS_SIZE {{ axis_size }}
+#define {{ name|upper }}_AXIS_SIZE_POST {{ axis_size_post }}
+#define {{ name|upper }}_AXIS_SIZE_PRE {{ axis_size_pre }}
 
 #endif /* {{ name|upper }}_LAYER_H */
diff --git a/aidge_export_cpp/templates/kernel_forward/softmax_forward.jinja b/aidge_export_cpp/templates/kernel_forward/softmax_forward.jinja
index 607ad53..7c8e067 100644
--- a/aidge_export_cpp/templates/kernel_forward/softmax_forward.jinja
+++ b/aidge_export_cpp/templates/kernel_forward/softmax_forward.jinja
@@ -1,12 +1,8 @@
 {% filter indent(width=4, first=False) %}
 {% include "./_mem_offset.jinja" %}
-softmax_forward<{{ in_name[0]|upper }}_NB_CHANNELS,
-                       {{ in_name[0]|upper }}_IN_HEIGHT,
-                       {{ in_name[0]|upper }}_IN_WIDTH,
-                       {{ out_name[0]|upper }}_NB_OUTPUTS,
-                       {{ out_name[0]|upper }}_OUT_HEIGHT,
-                       {{ out_name[0]|upper }}_OUT_WIDTH,
-                       {{ name|upper }}_AXIS>
-                       ({{in_name[0]}}, {{out_name[0]}});
+softmax_forward<{{ name|upper }}_AXIS_SIZE,
+                {{ name|upper }}_AXIS_SIZE_POST,
+                {{ name|upper }}_AXIS_SIZE_PRE>
+                ({{in_name[0]}}, {{out_name[0]}});
 {% include "./_save_outputs.jinja" %}
 {% endfilter %}
diff --git a/aidge_export_cpp/unit_tests/test_export.py b/aidge_export_cpp/unit_tests/test_export.py
index d8e7814..3d55f11 100644
--- a/aidge_export_cpp/unit_tests/test_export.py
+++ b/aidge_export_cpp/unit_tests/test_export.py
@@ -169,6 +169,30 @@ class test_operator_export(unittest.TestCase):
 
         self.unit_test_export(model, "Softmax", [[1, 10]])
 
+    def test_export_softmax_batch(self):
+        print("SoftmaxBatch")
+        model = aidge_core.sequential([
+            aidge_core.Softmax(axis=1, name="sf0")
+        ])
+
+        self.unit_test_export(model, "SoftmaxBatch", [[3, 10]])
+
+    def test_export_softmax_axis_2(self):
+        print("SoftmaxAxis2")
+        model = aidge_core.sequential([
+            aidge_core.Softmax(axis=2, name="sf0")
+        ])
+
+        self.unit_test_export(model, "SoftmaxAxis2", [[1, 10, 3, 7]])
+
+    def test_export_softmax_axis_0(self):
+        print("SoftmaxAxis0")
+        model = aidge_core.sequential([
+            aidge_core.Softmax(axis=0, name="sf0")
+        ])
+
+        self.unit_test_export(model, "SoftmaxAxis0", [[10]])
+
     @unittest.skip("Currently this test is failing")
     def test_export_FC_image_in(self):
         """Test exporting a FC operator with a HWC input.
-- 
GitLab


From 838ac45ddc211c134c6a197bc7f7630a1eee2c33 Mon Sep 17 00:00:00 2001
From: Gallasko <gallasko@gmail.com>
Date: Fri, 4 Apr 2025 16:19:01 +0200
Subject: [PATCH 14/14] feat: Concat works for any axis

---
 aidge_export_cpp/kernels/concat.hpp           | 31 +++++++---
 aidge_export_cpp/operators.py                 | 56 ++++++++++++++++---
 .../configuration/concat_config.jinja         |  6 +-
 .../kernel_forward/concat_forward.jinja       |  6 +-
 aidge_export_cpp/unit_tests/test_export.py    | 26 +++++++++
 5 files changed, 107 insertions(+), 18 deletions(-)

diff --git a/aidge_export_cpp/kernels/concat.hpp b/aidge_export_cpp/kernels/concat.hpp
index 2db8a0b..dde8c4f 100644
--- a/aidge_export_cpp/kernels/concat.hpp
+++ b/aidge_export_cpp/kernels/concat.hpp
@@ -1,22 +1,39 @@
 #ifndef __AIDGE_EXPORT_CPP_KERNELS_CONCAT__
 #define __AIDGE_EXPORT_CPP_KERNELS_CONCAT__
 
-template<typename T, unsigned int NB_INPUTS>
+template<int AXIS_SIZE_POST,
+         int AXIS_SIZE_PRE,
+         unsigned int NB_INPUTS,
+         typename T>
 __attribute__((always_inline)) inline static
 void concat_forward (
-    const unsigned int axis,
     const T* const * __restrict inputs,
     const unsigned int* __restrict sizes,
     T* __restrict output)
 {
-    unsigned int offset = 0;
+    unsigned int total_concat_axis_size = 0;
+    for (unsigned int n = 0; n < NB_INPUTS; ++n)
+        total_concat_axis_size += sizes[n];
 
-    for (unsigned int n = 0; n < NB_INPUTS; ++n) {
-        for (unsigned int i = 0; i < sizes[n]; ++i) {
-            output[offset + i] = inputs[n][i];
+    for (int i = 0; i < AXIS_SIZE_PRE; ++i) {
+        // Loop over post-axis (e.g., dims after axis 1)
+        for (int j = 0; j < AXIS_SIZE_POST; ++j) {
+            unsigned int axis_offset = 0;
+
+            // Loop over each input tensor
+            for (unsigned int n = 0; n < NB_INPUTS; ++n) {
+                for (unsigned int k = 0; k < sizes[n]; ++k) {
+                    const int input_idx  = i * sizes[n] * AXIS_SIZE_POST + k * AXIS_SIZE_POST + j;
+
+                    output[i * total_concat_axis_size * AXIS_SIZE_POST + (axis_offset + k) * AXIS_SIZE_POST + j] =
+                        inputs[n][input_idx];
+                }
+
+                axis_offset += sizes[n];  // move along axis in output
+            }
         }
-        offset += sizes[n];
     }
+
 }
 
 #endif  // __AIDGE_EXPORT_CPP_KERNELS_CONCAT__
\ No newline at end of file
diff --git a/aidge_export_cpp/operators.py b/aidge_export_cpp/operators.py
index 7c22cdb..26ca621 100644
--- a/aidge_export_cpp/operators.py
+++ b/aidge_export_cpp/operators.py
@@ -337,28 +337,27 @@ class TransposeCPP(ExportNodeCpp):
 class SoftmaxCPP(ExportNodeCpp):
     def __init__(self, node, mem_info):
         super().__init__(node, mem_info)
-        self.attributes["axis"] = node.get_operator().attr.axis
-
         assert self.node.get_nb_inputs() == 1, (
             f"export softmax: nb_inputs == {self.node.get_nb_inputs()} not implemented"
         )
 
         tensor = self.operator.get_input(0)
         nbDims = len(tensor.dims())
+        axis = node.get_operator().attr.axis if node.get_operator().attr.axis >= 0 else node.get_operator().attr.axis + nbDims
 
-        assert self.attributes["axis"] < nbDims, (
+        assert axis < nbDims, (
             f"export softmax: attribute axis == {node.get_operator().attr.axis} should be less than {nbDims}"
         )
 
         postAxisElems = 1
-        for i in range(self.attributes["axis"] + 1, nbDims):
+        for i in range(axis + 1, nbDims):
             postAxisElems *= tensor.dims()[i]
 
         preAxisElems = 1
-        for i in range(self.attributes["axis"]):
+        for i in range(axis):
             preAxisElems *= tensor.dims()[i]
 
-        self.attributes["axis_size"] = tensor.dims()[self.attributes["axis"]]
+        self.attributes["axis_size"] = tensor.dims()[axis]
         self.attributes["axis_size_post"] = postAxisElems
         self.attributes["axis_size_pre"] = preAxisElems
 
@@ -395,7 +394,50 @@ class BatchNorm2DCPP(ExportNodeCpp):
 class Concat(ExportNodeCpp):
     def __init__(self, node, mem_info):
         super().__init__(node, mem_info)
-        self.attributes["axis"] = node.get_operator().attr.axis
+        assert self.node.get_nb_inputs() >= 1, (
+            f"export softmax: nb_inputs == {self.node.get_nb_inputs()} not implemented"
+        )
+
+        inputIndex = 0
+
+        tensor = self.operator.get_input(0)
+        for idx, _ in enumerate(self.node.inputs()):
+            if self.operator.get_input(idx) is not None:
+                tensor = self.operator.get_input(idx)
+                nbDims = len(tensor.dims())
+                axis = node.get_operator().attr.axis if node.get_operator().attr.axis >= 0 else node.get_operator().attr.axis + nbDims
+
+                assert axis < nbDims, (
+                    f"export softmax: attribute axis == {axis} should be less than {nbDims}"
+                )
+
+                postAxisElems = 1
+                for i in range(axis + 1, nbDims):
+                    postAxisElems *= tensor.dims()[i]
+
+                preAxisElems = 1
+                for i in range(axis):
+                    preAxisElems *= tensor.dims()[i]
+
+                if (inputIndex == 0):
+                    self.attributes["axis_size_post"] = postAxisElems
+                    self.attributes["axis_size_pre"] = preAxisElems
+
+                    self.attributes["axis_size"] = [None] * self.attributes["nb_in"]
+                else:
+                    assert self.attributes["axis_size_post"] == postAxisElems, (
+                        f"export concat: axis_size_post {self.attributes['axis_size_post']} != {postAxisElems}"
+                    )
+                    assert self.attributes["axis_size_pre"] == preAxisElems, (
+                        f"export concat: axis_size_pre {self.attributes['axis_size_pre']} != {preAxisElems}"
+                    )
+
+                self.attributes["axis_size"][idx] = tensor.dims()[axis]
+            else:
+                assert false, (
+                    f"export concat: input {idx} is None, not implemented")
+
+            inputIndex += 1
 
         self.config_template = str(ROOT / "templates" / "configuration" / "concat_config.jinja")
         self.forward_template = str(ROOT / "templates" / "kernel_forward" / "concat_forward.jinja")
diff --git a/aidge_export_cpp/templates/configuration/concat_config.jinja b/aidge_export_cpp/templates/configuration/concat_config.jinja
index 1a6637e..ea8246d 100644
--- a/aidge_export_cpp/templates/configuration/concat_config.jinja
+++ b/aidge_export_cpp/templates/configuration/concat_config.jinja
@@ -9,8 +9,10 @@
 #define {{ name|upper }}_NB_INPUTS {{ nb_in }}
 #define {{ name|upper }}_AXIS {{ axis }}
 {%- for i in range(nb_in) %}
-#define {{ name|upper }}_INPUT_{{i}}_SIZE {{ in_dims[i]|join('*') }}
+#define {{ name|upper }}_INPUT_{{i}}_SIZE {{ axis_size[i] }}
 {%- endfor %}
-#define {{ name|upper }}_OUTPUT_SIZE {{ out_dims[0]|join('*')}}
+
+#define {{ name|upper }}_AXIS_SIZE_POST {{ axis_size_post }}
+#define {{ name|upper }}_AXIS_SIZE_PRE {{ axis_size_pre }}
 
 #endif /* {{ name|upper }}_LAYER_H */
diff --git a/aidge_export_cpp/templates/kernel_forward/concat_forward.jinja b/aidge_export_cpp/templates/kernel_forward/concat_forward.jinja
index a2f48e9..7a77e90 100644
--- a/aidge_export_cpp/templates/kernel_forward/concat_forward.jinja
+++ b/aidge_export_cpp/templates/kernel_forward/concat_forward.jinja
@@ -12,8 +12,10 @@ unsigned int {{ name|upper }}_SIZES[] = {
     {%- endfor -%}
 };
 
-concat_forward<float, {{ nb_in }}> (
-    {{name|upper}}_AXIS,
+concat_forward<{{ name|upper }}_AXIS_SIZE_POST,
+               {{ name|upper }}_AXIS_SIZE_PRE,
+               {{ nb_in }},
+               float> (
     {{ name|upper }}_INPUTS,
     {{ name|upper }}_SIZES,
     {{ out_name[0] }});
diff --git a/aidge_export_cpp/unit_tests/test_export.py b/aidge_export_cpp/unit_tests/test_export.py
index 3d55f11..607778d 100644
--- a/aidge_export_cpp/unit_tests/test_export.py
+++ b/aidge_export_cpp/unit_tests/test_export.py
@@ -185,6 +185,14 @@ class test_operator_export(unittest.TestCase):
 
         self.unit_test_export(model, "SoftmaxAxis2", [[1, 10, 3, 7]])
 
+    def test_export_softmax_axis_negative(self):
+        print("SoftmaxAxisNegative")
+        model = aidge_core.sequential([
+            aidge_core.Softmax(axis=-3, name="sf0")
+        ])
+
+        self.unit_test_export(model, "SoftmaxAxisNegative", [[1, 10, 3, 7]])
+
     def test_export_softmax_axis_0(self):
         print("SoftmaxAxis0")
         model = aidge_core.sequential([
@@ -376,6 +384,24 @@ class test_operator_export(unittest.TestCase):
 
         self.unit_test_export(model, "Concat", [[1, 5, 7]])
 
+    def test_export_concat_axis_2(self):
+        print("ConcatAxis2")
+        model = aidge_core.sequential([
+            aidge_core.Producer([1, 5, 7], name="producer"),
+            aidge_core.Concat(nb_inputs=2, axis=2, name="concat")
+        ])
+
+        self.unit_test_export(model, "ConcatAxis2", [[1, 5, 7]])
+
+    def test_export_concat_axis_negative(self):
+        print("ConcatAxisNegative")
+        model = aidge_core.sequential([
+            aidge_core.Producer([1, 5, 7], name="producer"),
+            aidge_core.Concat(nb_inputs=2, axis=-2, name="concat")
+        ])
+
+        self.unit_test_export(model, "ConcatAxisNegative", [[1, 5, 7]])
+
     def test_export_conv2D(self):
         print("Conv2D")
         model = aidge_core.sequential([
-- 
GitLab