diff --git a/include/aidge/backend/cpu/operator/ConcatImpl.hpp b/include/aidge/backend/cpu/operator/ConcatImpl.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..84020f5d53a13459441104650136912ce2e0123b
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/ConcatImpl.hpp
@@ -0,0 +1,50 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_CONCATIMPL_H_
+#define AIDGE_CPU_OPERATOR_CONCATIMPL_H_
+
+#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/operator/Concat.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+#include <memory>
+#include <vector>
+
+namespace Aidge {
+// class Concat_Op;
+
+// compute kernel registry for forward and backward
+class ConcatImplForward_cpu
+    : public Registrable<ConcatImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const std::vector<DimSize_t>, const std::vector<void*>, void*)> {
+};
+class ConcatImplBackward_cpu
+    : public Registrable<ConcatImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const std::vector<DimSize_t>, const std::vector<void*>, void*)> {
+};
+
+class ConcatImpl_cpu : public OperatorImpl {
+public:
+    ConcatImpl_cpu(const Concat_Op& op) : OperatorImpl(op) {}
+
+    static std::unique_ptr<ConcatImpl_cpu> create(const Concat_Op& op) {
+        return std::make_unique<ConcatImpl_cpu>(op);
+    }
+
+    NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
+    void forward() override;
+};
+
+namespace {
+static Registrar<Concat_Op> registrarConcatImpl_cpu("cpu", Aidge::ConcatImpl_cpu::create);
+}
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_CONCATIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/ConcatImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConcatImpl_forward_kernels.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..c962f6dae022de9757b6042be995cc37fb16bc3b
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/ConcatImpl_forward_kernels.hpp
@@ -0,0 +1,67 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_CONCATIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_CONCATIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+#include <cstddef>
+#include <cmath>
+#include "aidge/data/Data.hpp"
+#include "aidge/utils/Types.h"
+
+#include "aidge/backend/cpu/operator/ConcatImpl.hpp"
+
+namespace Aidge {
+template <class I, class O>
+void ConcatImpl_cpu_forward_kernel(const int& axisIdx, std::vector<DimSize_t> arraysDims, const std::vector<void*> input_, void* output_)
+{
+    O* output = static_cast<O*>(output_);
+    std::vector<I*> input;
+    for(const auto& elem:input_)
+    {
+        input.emplace_back(static_cast<I*>(elem));
+    }
+
+    // compute length of chunks to copy from each input tensor
+    size_t chunkSize = 1;
+    size_t totalTensorSize = 1;
+    for(size_t i=arraysDims.size()-1; i>0 ; --i)
+    {
+        if(i >= axisIdx)
+			chunkSize *= arraysDims[i];
+		totalTensorSize *= arraysDims[i];
+    }
+
+	size_t iterationsCount = totalTensorSize / chunkSize;
+	for(size_t i=0; i<iterationsCount ; ++i)
+    {
+		for(size_t j=0; j < input.size(); ++j)
+		{
+			I* copyPtr = std::next(input[j], i * chunkSize);
+			std::copy_n(copyPtr, chunkSize, output);
+			output += chunkSize;
+		}
+	}
+
+}
+
+namespace {
+static Registrar<ConcatImplForward_cpu> registrarConcatImplForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32}, Aidge::ConcatImpl_cpu_forward_kernel<float, float>);
+static Registrar<ConcatImplForward_cpu> registrarConcatImplForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32}, Aidge::ConcatImpl_cpu_forward_kernel<int, int>);
+static Registrar<ConcatImplForward_cpu> registrarConcatImplForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64}, Aidge::ConcatImpl_cpu_forward_kernel<double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_CONCATIMPL_FORWARD_KERNEL_H_ */
diff --git a/src/operator/ConcatImpl.cpp b/src/operator/ConcatImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..18bf031ad35679e8611ed5132f1cb0fd7e352872
--- /dev/null
+++ b/src/operator/ConcatImpl.cpp
@@ -0,0 +1,51 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <cassert>
+#include <chrono>  // std::chrono::milliseconds
+#include <numeric> // std::accumulate
+#include <thread>  // std::this_thread::sleep_for
+#include <vector>
+
+#include "aidge/operator/Concat.hpp"
+#include "aidge/utils/Types.h"
+
+#include "aidge/backend/cpu/operator/ConcatImpl.hpp"
+#include "aidge/backend/cpu/operator/ConcatImpl_forward_kernels.hpp"
+
+#include <iostream>
+
+Aidge::NbElts_t Aidge::ConcatImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
+    // this implementation can be in-place
+    return 0;
+}
+
+void Aidge::ConcatImpl_cpu::forward() {
+    assert(mOp.getInput(0) && "missing input #0");
+
+    Concat_Op::Attrs attr = dynamic_cast<const Concat_Op&>(mOp).getStaticAttributes();
+    const int& axisIdx = static_cast<const int&>(std::get<0>(attr));
+    assert(mOp.getInput(0)->nbDims() > 1);// > axisIdx && "input dim must be bigger than "+std::to_strint(axisIdx)
+
+    auto kernelFunc = Registrar<ConcatImplForward_cpu>::create({
+        mOp.getInput(0)->dataType(),
+        mOp.getOutput(0)->dataType()});
+
+    // Call kernel
+    std::vector<void*> inputTensors;
+    for (std::size_t i = 0; i < dynamic_cast<const Concat_Op&>(mOp).mNbIn; ++i) {
+        inputTensors.push_back(mOp.getInput(i)->getImpl()->rawPtr());
+    }
+    kernelFunc(axisIdx,
+               mOp.getInput(0)->dims(),
+               inputTensors,
+               mOp.getOutput(0)->getImpl()->rawPtr());
+}
diff --git a/unit_tests/operator/Test_ConcatImpl.cpp b/unit_tests/operator/Test_ConcatImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9bda07d1423fbbee3ce757d3ba4ac40948e605ec
--- /dev/null
+++ b/unit_tests/operator/Test_ConcatImpl.cpp
@@ -0,0 +1,59 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <catch2/catch_test_macros.hpp>
+
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/Concat.hpp"
+
+#include "aidge/backend/cpu.hpp"
+
+#include <memory>
+
+using namespace Aidge;
+
+TEST_CASE("[cpu/operator] Concat(forward)") {
+    SECTION("2D Tensor") {
+        std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array2D<float,2,2> {
+            {
+                {0.00543531, 0.53726782},
+                {0.44371938, 0.93770550}
+            }
+        });
+        std::shared_ptr<Tensor> input2 = std::make_shared<Tensor>(Array2D<float,2,2> {
+            {
+                {0.87131297, 0.22378820},
+                {0.74409730, 0.72109798}
+            }
+        });
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,4> {
+            {
+                {0.00543531, 0.53726782, 0.87131297, 0.22378820},
+                {0.44371938, 0.93770550, 0.74409730, 0.72109798}
+            }
+        });
+
+        std::shared_ptr<Node> myConcat = Concat(1);
+        myConcat->getOperator()->setDatatype(DataType::Float32);
+        myConcat->getOperator()->setBackend("cpu");
+        myConcat->getOperator()->associateInput(0,input1);
+        myConcat->getOperator()->associateInput(1,input2);
+        myConcat->getOperator()->computeOutputDims();
+        myConcat->forward();
+
+        float* resPtr = static_cast<float*>(myConcat->getOperator()->getOutput(0)->getImpl()->rawPtr());
+        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i< 3; ++i) {
+            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        }
+
+    }
+}
\ No newline at end of file