diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp
index 539a3128c8c7afb8dad06799e657f70d22db1e9c..0c8ab84d26ab6e88e9e9bf3f1ee4d3b7f1b0f257 100644
--- a/include/aidge/backend/cpu.hpp
+++ b/include/aidge/backend/cpu.hpp
@@ -30,6 +30,7 @@
 #include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp"
 #include "aidge/backend/cpu/operator/DivImpl.hpp"
 #include "aidge/backend/cpu/operator/ErfImpl.hpp"
+#include "aidge/backend/cpu/operator/ExpandImpl.hpp"
 #include "aidge/backend/cpu/operator/FCImpl.hpp"
 #include "aidge/backend/cpu/operator/FoldImpl.hpp"
 #include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp"
diff --git a/include/aidge/backend/cpu/operator/ExpandImpl.hpp b/include/aidge/backend/cpu/operator/ExpandImpl.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..adfc6ab1ef2e6550c6307fb93d0079ea3b5fc5a2
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/ExpandImpl.hpp
@@ -0,0 +1,35 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_EXPANDIMPL_H_
+#define AIDGE_CPU_OPERATOR_EXPANDIMPL_H_
+
+#include <memory>
+#include <vector>
+
+#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
+#include "aidge/operator/Expand.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+
+namespace Aidge {
+// Operator implementation entry point for the backend
+using ExpandImpl_cpu = OperatorImpl_cpu<Expand_Op,
+                                        void(const std::shared_ptr<Tensor> &,
+                                             const std::shared_ptr<Tensor> &,
+                                             void *,
+                                             const std::vector<DimSize_t> &)>;
+
+// Implementation entry point registration to Operator
+REGISTRAR(Expand_Op, "cpu", Aidge::ExpandImpl_cpu::create);
+} // namespace Aidge
+
+#endif /* _AIDGE_CPU_OPERATOR_EXPANDIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/ExpandImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ExpandImpl_kernels.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..3f4341c333ffd4032a90b8ee5d50a8475e81253e
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/ExpandImpl_kernels.hpp
@@ -0,0 +1,215 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_EXPANDIMPL_KERNELS_H_
+#define AIDGE_CPU_OPERATOR_EXPANDIMPL_KERNELS_H_
+
+#include "aidge/backend/cpu/operator/ExpandImpl.hpp"
+#include "aidge/utils/Registrar.hpp"
+
+#include <aidge/data/Data.hpp>
+#include <aidge/data/Tensor.hpp>
+#include <aidge/data/half.hpp>
+#include <aidge/scheduler/ProdConso.hpp>
+#include <aidge/utils/Types.h>
+#include <cmath>
+#include <cstdint> // std::int32_t, std::int64_t
+#include <memory>
+#include <numeric>
+
+namespace {
+// suppose values are contiguous in memory
+template <class IO>
+void expandContiguousArray(const std::size_t inputStackSize,
+                           const std::size_t outputStackSize,
+                           const IO *input,
+                           IO *output) {
+    for (std::size_t i = 0; i < outputStackSize; ++i) {
+        output[i] = (inputStackSize == 1) ? input[0] : input[i];
+    }
+    return;
+}
+} // namespace
+
+namespace Aidge {
+
+template <class IO>
+void ExpandImpl_cpu_forward_kernel(
+    const std::shared_ptr<Tensor> &inData,
+    const std::shared_ptr<Tensor> &_inExpandShape,
+    void *_output,
+    const std::vector<DimSize_t> &outputDims) {
+
+    // retrieving data of inputShape & dimensions of inputDims
+    // as the process will require to modify the values
+    IO *output = static_cast<IO *>(_output);
+    std::vector<DimSize_t> inExpandShape(_inExpandShape->size());
+    for (DimSize_t i = 0; i < _inExpandShape->size(); ++i) {
+        inExpandShape[i] = _inExpandShape->get<std::int64_t>(i);
+    }
+    std::vector<DimSize_t> inDataDims = inData->dims();
+
+    // Example with 2 tensors
+    // [5,2,1,7] & [2,6,7]
+    // 1. Same number of dimensions but adding 1s to le left of "smallest"
+    // tensor -> [5,2,1,7] & [1,2,6,7]
+    // 2. Find the highest equal dimension -> 3
+    //    Exception: if the first diverging dimension is the last one, then ->
+    //    4 (dims.size())
+    // 3. Compute the highest number of contiguous data -> 7
+    // 4. Compute stride and offset step for the broadcast mechanism
+    // 5. Call a simple kernel
+
+    // ## Compute compatible input dimensions
+    // special case for equal dimensions, the kernel is called with the entire
+    // arrays at once
+
+    if (inDataDims == inExpandShape) {
+        const std::size_t input0ContiguousSize =
+            std::accumulate(inDataDims.cbegin(),
+                            inDataDims.cend(),
+                            static_cast<std::size_t>(1),
+                            std::multiplies<std::size_t>());
+        for (std::size_t i = 0; i < input0ContiguousSize; ++i) {
+            output[i] = inData->get<IO>(i);
+        }
+        return;
+    }
+
+    // set dimensions to be of equal size by filling the smallest one with
+    // ones.
+    if (inDataDims.size() > inExpandShape.size()) {
+        inExpandShape.insert(inExpandShape.cbegin(),
+                             inDataDims.size() - inExpandShape.size(),
+                             static_cast<DimSize_t>(1));
+    } else if (_inExpandShape->size() > inDataDims.size()) {
+        inDataDims.insert(inDataDims.cbegin(),
+                          inExpandShape.size() - inDataDims.size(),
+                          static_cast<DimSize_t>(1));
+    }
+
+    const std::size_t nbDims = inDataDims.size();
+
+    // Find the highest equal dimension
+    // std::size_t contiguousIdx = nbDims - 1;
+    std::size_t contiguousIdx = nbDims;
+    while (contiguousIdx-- > 0) {
+        // for (; contiguousIdx+1 > 0; --contiguousIdx) {
+        if (inDataDims[contiguousIdx] != inExpandShape[contiguousIdx]) {
+            break;
+        }
+    }
+    if (contiguousIdx == (nbDims - 1)) {
+        // last dimensions of one of the input Tensor are of size 1
+        const std::vector<std::size_t> &dims =
+            (inDataDims[contiguousIdx] == 1) ? inDataDims : inExpandShape;
+        while ((contiguousIdx + 1 > 0) && (dims[contiguousIdx] == 1)) {
+            --contiguousIdx;
+        }
+    }
+    ++contiguousIdx;
+
+    // Compute the highest number of contiguous data for each Tensor
+    const std::size_t inputDataContiguousSize =
+        std::accumulate(inDataDims.cbegin() + contiguousIdx,
+                        inDataDims.cend(),
+                        static_cast<std::size_t>(1),
+                        std::multiplies<std::size_t>());
+    const std::size_t outputContiguousSize =
+        std::accumulate(outputDims.cbegin() + contiguousIdx,
+                        outputDims.cend(),
+                        static_cast<std::size_t>(1),
+                        std::multiplies<std::size_t>());
+
+    // initialize strides to iterate through data because of broadcasting
+    std::unique_ptr<std::int32_t[]> stridePostIn =
+        std::make_unique<std::int32_t[]>(contiguousIdx);
+    std::unique_ptr<std::int32_t[]> strideStepIn =
+        std::make_unique<std::int32_t[]>(contiguousIdx);
+    if (contiguousIdx > 0) {
+        stridePostIn[contiguousIdx - 1] = 1;
+        for (std::size_t i = contiguousIdx - 2;
+             i != static_cast<std::size_t>(-1);
+             --i) {
+            stridePostIn[i] = stridePostIn[i + 1] *
+                              static_cast<std::int32_t>(inDataDims[i + 1]);
+        }
+        for (std::size_t i = 0; i != contiguousIdx; ++i) {
+            strideStepIn[i] = (inDataDims[i] == 1) ? 1 - stridePostIn[i] : 1;
+        }
+    }
+
+    // variables for arrays offsets
+    std::size_t offsetInData = 0;
+    std::size_t offsetOut = 0;
+
+    std::size_t dim = contiguousIdx - 1;
+    const std::size_t nbStacks =
+        std::accumulate(outputDims.cbegin(),
+                        outputDims.cbegin() + contiguousIdx,
+                        static_cast<std::size_t>(1),
+                        std::multiplies<std::size_t>());
+
+    for (std::size_t stack = 0; stack < nbStacks;) {
+        expandContiguousArray<IO>(
+            inputDataContiguousSize,
+            outputContiguousSize,
+            &static_cast<const IO *>(
+                inData->getImpl()
+                    ->rawPtr())[offsetInData * inputDataContiguousSize],
+            &output[offsetOut * outputContiguousSize]);
+        if (++stack < nbStacks) {
+            std::size_t tmpStack = stack;
+            while (tmpStack % outputDims[dim] == 0) {
+                tmpStack /= outputDims[dim];
+                dim--;
+            }
+            offsetInData += strideStepIn[dim];
+            ++offsetOut;
+            dim = contiguousIdx - 1;
+        }
+    }
+}
+
+REGISTRAR(ExpandImpl_cpu,
+          {{DataType::Int16, DataType::Int64}, {DataType::Int16}},
+          {ProdConso::inPlaceModel,
+           Aidge::ExpandImpl_cpu_forward_kernel<std::int16_t>,
+           nullptr});
+REGISTRAR(ExpandImpl_cpu,
+          {{DataType::Int32, DataType::Int64}, {DataType::Int32}},
+          {ProdConso::inPlaceModel,
+           Aidge::ExpandImpl_cpu_forward_kernel<std::int32_t>,
+           nullptr});
+REGISTRAR(ExpandImpl_cpu,
+          {{DataType::Int64, DataType::Int64}, {DataType::Int64}},
+          {ProdConso::inPlaceModel,
+           Aidge::ExpandImpl_cpu_forward_kernel<std::int64_t>,
+           nullptr});
+
+REGISTRAR(ExpandImpl_cpu,
+          {{DataType::Float16, DataType::Int64}, {DataType::Float16}},
+          {ProdConso::inPlaceModel,
+           Aidge::ExpandImpl_cpu_forward_kernel<half_float::half>,
+           nullptr});
+REGISTRAR(ExpandImpl_cpu,
+          {{DataType::Float32, DataType::Int64}, {DataType::Float32}},
+          {ProdConso::inPlaceModel,
+           Aidge::ExpandImpl_cpu_forward_kernel<float>,
+           nullptr});
+REGISTRAR(ExpandImpl_cpu,
+          {{DataType::Float64, DataType::Int64}, {DataType::Float64}},
+          {ProdConso::inPlaceModel,
+           Aidge::ExpandImpl_cpu_forward_kernel<double>,
+           nullptr});
+} // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_EXPANDIMPL_KERNELS_H_ */
diff --git a/src/operator/ExpandImpl.cpp b/src/operator/ExpandImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..dfd4d2d82edc4dfb5bbaec6f5b33bf1c00bf3c75
--- /dev/null
+++ b/src/operator/ExpandImpl.cpp
@@ -0,0 +1,56 @@
+
+/********************************************************************************
+ * Copyright (c) 2024 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include "aidge/backend/cpu/operator/ExpandImpl.hpp"
+
+#include <vector>
+
+#include "aidge/backend/cpu/operator/ExpandImpl_kernels.hpp"
+#include "aidge/data/Data.hpp"
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/Expand.hpp"
+#include "aidge/utils/ErrorHandling.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+
+namespace Aidge {
+
+template <> void ExpandImpl_cpu::forward() {
+    const Expand_Op &op_ = static_cast<const Expand_Op &>(mOp);
+    // Check if input are provided
+    AIDGE_ASSERT(op_.getInput(0),
+                 "{}: missing input 0: {}",
+                 Expand_Op::Type,
+                 Expand_Op::getInputsName()[0]);
+    AIDGE_ASSERT(op_.getInput(1),
+                 "{}: missing input 1: {}",
+                 Expand_Op::Type,
+                 Expand_Op::getInputsName()[1]);
+
+    // Find the correct kernel type
+    const auto impl =
+        Registrar<ExpandImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+
+    // Call kernel
+    impl.forward(op_.getInput(0),
+                 op_.getInput(1),
+                 op_.getOutput(0)->getImpl()->rawPtr(),
+                 op_.getOutput(0)->dims());
+}
+
+template <> void ExpandImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for Expand_Op on backend cpu");
+}
+
+} // namespace Aidge
diff --git a/unit_tests/operator/Test_ExpandImpl.cpp b/unit_tests/operator/Test_ExpandImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3fcb5e4460388712abb99dc4aa2f1fd0f274d841
--- /dev/null
+++ b/unit_tests/operator/Test_ExpandImpl.cpp
@@ -0,0 +1,113 @@
+/********************************************************************************
+ * Copyright (c) 2024 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <aidge/data/Data.hpp>
+#include <aidge/operator/OperatorTensor.hpp>
+#include <aidge/utils/ArrayHelpers.hpp>
+#include <aidge/utils/TensorUtils.hpp>
+#include <aidge/utils/Types.h>
+#include <catch2/catch_test_macros.hpp>
+#include <cstdint>
+#include <cstdlib>
+#include <memory>
+
+#include "aidge/data/Tensor.hpp"
+#include "aidge/filler/Filler.hpp"
+#include "aidge/operator/Expand.hpp"
+
+#include "aidge/backend/cpu.hpp"
+
+using std::shared_ptr;
+
+using namespace Aidge;
+
+void setupTestExpand(shared_ptr<Tensor> inputData,
+                     shared_ptr<Tensor> inputShape,
+                     shared_ptr<OperatorTensor> &op,
+                     shared_ptr<Tensor> &expectedOutput) {
+
+    op->getOutput(0)->setDataType(inputData->dataType());
+
+    inputData->setBackend("cpu");
+    op->associateInput(0, inputData);
+
+    inputShape->setBackend("cpu");
+    op->associateInput(1, inputShape);
+
+    expectedOutput->setBackend("cpu");
+    expectedOutput->setDataType(DataType::Int32);
+}
+
+TEST_CASE("[cpu/operator] Expand(forward)", "[Expand][CPU]") {
+    auto node = Expand();
+    auto op = std::static_pointer_cast<OperatorTensor>(node->getOperator());
+    op->setBackend("cpu");
+
+    SECTION("Expand shape is bigger than inputData") {
+        auto inputData = std::make_shared<Tensor>(Array1D<int, 2>({1, 3}));
+        auto inputShape =
+            std::make_shared<Tensor>(Array1D<std::int64_t, 4>({1, 3, 4, 2}));
+        auto expectedOutput = std::make_shared<Tensor>(
+            Array4D<int, 1, 3, 4, 2>({{{{{1, 3}, {1, 3}, {1, 3}, {1, 3}},
+                                        {{1, 3}, {1, 3}, {1, 3}, {1, 3}},
+                                        {{1, 3}, {1, 3}, {1, 3}, {1, 3}}}}}));
+        setupTestExpand(inputData, inputShape, op, expectedOutput);
+
+        // forwardDims has already been tested in core
+        CHECK(op->forwardDims(true));
+        REQUIRE_NOTHROW(op->forward());
+        CHECK(approxEq<int>(*expectedOutput, *op->getOutput(0)));
+    }
+    SECTION("Expand shape has less dimensions than inputData") {
+        auto inputData = std::make_shared<Tensor>(
+            Array3D<int, 2, 1, 3>({{{2, 1, 3}, {2, 1, 3}}}));
+        auto inputShape =
+            std::make_shared<Tensor>(Array1D<std::int64_t, 2>({2, 3}));
+        auto expectedOutput = std::make_shared<Tensor>(Array3D<int, 2, 2, 3>(
+            {{{{2, 1, 3}, {2, 1, 3}}, {{2, 1, 3}, {2, 1, 3}}}}));
+        setupTestExpand(inputData, inputShape, op, expectedOutput);
+
+        // forwardDims has already been tested in core
+        CHECK(op->forwardDims(true));
+        REQUIRE_NOTHROW(op->forward());
+        CHECK(approxEq<int>(*expectedOutput, *op->getOutput(0)));
+    }
+    SECTION("Expand shape = {1} leads to input equal to output.") {
+        auto inputData = std::make_shared<Tensor>(
+            Array4D<int, 2, 1, 3, 1>({{{2, 1, 3}, {2, 1, 3}}}));
+        auto inputShape =
+            std::make_shared<Tensor>(Array1D<std::int64_t, 1>({1}));
+        auto expectedOutput = std::make_shared<Tensor>(
+            Array4D<int, 2, 1, 3, 1>({{{2, 1, 3}, {2, 1, 3}}}));
+        setupTestExpand(inputData, inputShape, op, expectedOutput);
+
+        // forwardDims has already been tested in core
+        CHECK(op->forwardDims(true));
+        REQUIRE_NOTHROW(op->forward());
+        CHECK(approxEq<int>(*expectedOutput, *op->getOutput(0)));
+    }
+    SECTION("The only common dimension is the last one & its equal to 1") {
+        auto inputData = std::make_shared<Tensor>(
+            Array4D<int, 1, 1, 3, 1>({{{{2, 1, 3}}}}));
+        auto inputShape =
+            std::make_shared<Tensor>(Array1D<std::int64_t, 3>({2, 1, 1}));
+        auto expectedOutput = std::make_shared<Tensor>(
+            Array4D<int, 1, 2, 3, 1>({{{{2, 1, 3}, {2, 1, 3}}}}));
+        setupTestExpand(inputData, inputShape, op, expectedOutput);
+
+        // forwardDims has already been tested in core
+        CHECK(op->forwardDims(true));
+        REQUIRE_NOTHROW(op->forward());
+        CHECK(approxEq<int>(*expectedOutput, *op->getOutput(0)));
+    }
+    SECTION("N-Dim to N-Dim") {}
+    auto inputData = std::shared_ptr<Tensor>();
+}