diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp
index 0aec0f52263e41974454e5f9622a9aa83a189cb0..9b85eb812caffca3820a711d46775e1134db863f 100644
--- a/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp
@@ -95,6 +95,7 @@ class ReduceMeanImpl3D_cpu : public OperatorImpl {
 };
 namespace {
 // add cpu backend to ReduceMean_Op<2> implementation registry
+static Registrar<ReduceMean_Op<1>> registrarReduceMeanImpl1D_cpu("cpu", Aidge::ReduceMeanImpl1D_cpu::create);
 static Registrar<ReduceMean_Op<2>> registrarReduceMeanImpl2D_cpu("cpu", Aidge::ReduceMeanImpl2D_cpu::create);
 static Registrar<ReduceMean_Op<3>> registrarReduceMeanImpl3D_cpu("cpu", Aidge::ReduceMeanImpl3D_cpu::create);
 }  // namespace
diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp
index 0a27cf9827d7845f4e0473db4d2d5d195989f1f6..b7e7924b052a55a78ded012e4a13e90b64bab8ee 100644
--- a/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp
@@ -37,14 +37,14 @@ void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op<DIM>::Attrs&
         totalElements *= dimSize;
     }
 
-    // Create a temporary arrays to store intermediate input/output for each ReduceDim op
+    // Create a temporary arrays to store intermediate input/output for each Reduce op
     std::vector<I> tempInArray(input, input + totalElements);
     std::vector<I> tempOutArray(input, input + totalElements);
     std::vector<size_t> currentDims = inputDims;
 
 
     std::size_t addedElems = 0;
-    for(std::size_t i=0; i<1 ; ++i)
+    for(std::size_t i=0; i<DIM ; ++i)
     {
 		addedElems = 0;
 		I* tempOutArrayPtr = tempOutArray.data();
@@ -54,26 +54,29 @@ void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op<DIM>::Attrs&
         std::size_t nbElemBeforeAxis = 1;
 
         for (size_t d = 0; d < currentDims.size(); ++d) {
-            nbElemAfterAxis *= (d > axis) ? currentDims[d]:1;
-            nbElemBeforeAxis *= (d < axis) ? currentDims[d]:1;
+			if (d > axis)
+            	nbElemAfterAxis *= currentDims[d];
+			else if (d < axis)
+				nbElemBeforeAxis *= currentDims[d];
         }
 
         for (std::size_t j=0; j<nbElemBeforeAxis; ++j)
         {
             for (std::size_t k=0; k<nbElemAfterAxis; ++k)
             {
+				// Compute the mean value for the element k of each stride
                 I mean = 0;
                 for(std::size_t l=0; l<currentDims[axis];l++)
                 {
-                        size_t idx = j*(nbElemAfterAxis*currentDims[axis])+l*currentDims[axis]+k;
-                        mean+= tempInArray[idx];
+                        size_t idx = j * (nbElemAfterAxis * currentDims[axis]) + l * nbElemAfterAxis + k;
+                        mean += tempInArray[idx];
                 }
-                tempOutArrayPtr[addedElems] = mean/currentDims[axis];
+                tempOutArrayPtr[addedElems] = mean / currentDims[axis];
                 addedElems++;
             }
         }
 
-		// Update the input for the next slice operation
+        // Update the input for the next reduce operation
         tempInArray.assign(tempOutArray.begin(), tempOutArray.begin() + addedElems);
 		if(keepDims)
         	currentDims[axis] = 1;
diff --git a/unit_tests/operator/Test_ReduceMeanImpl.cpp b/unit_tests/operator/Test_ReduceMeanImpl.cpp
index 24ef7b993fabba8fd2139570990ba6a1fc4784e4..918e7f4857b726aea2f5a5db6b84d5bdf61baceb 100644
--- a/unit_tests/operator/Test_ReduceMeanImpl.cpp
+++ b/unit_tests/operator/Test_ReduceMeanImpl.cpp
@@ -21,42 +21,111 @@
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] ReduceMean(forward)") {
-    std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
-        {
+    SECTION("KeepDims") {
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
             {
-                { 5.0, 1.0 },
-                { 20.0, 2.0 }
-            },
+                {
+                    { 5.0, 1.0 },
+                    { 20.0, 2.0 }
+                },
+                {
+                    { 30.0, 1.0 },
+                    { 40.0, 2.0 }
+                },
+                {
+                    { 55.0, 1.0 },
+                    { 60.0, 2.0 }
+                }
+            }
+        });
+        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array3D<float,3,1,2> {
+            {
+
+                {{ 12.5, 1.5 }},
+                {{ 35.0, 1.5 }},
+                {{ 57.5, 1.5 }}
+            }
+        });
+
+        std::shared_ptr<Node> myReduceMean = ReduceMean({1}, 1);
+        myReduceMean->getOperator()->setDatatype(DataType::Float32);
+        myReduceMean->getOperator()->setBackend("cpu");
+        myReduceMean->getOperator()->associateInput(0,myInput);
+        myReduceMean->getOperator()->computeOutputDims();
+        myReduceMean->forward();
+        myReduceMean->getOperator()->getOutput(0)->print();
+
+        REQUIRE(*(myReduceMean->getOperator()->getOutput(0)) == *myOutput);
+    }
+    SECTION("not_KeepDims") {
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
             {
-                { 30.0, 1.0 },
-                { 40.0, 2.0 }
-            },
+                {
+                    { 5.0, 1.0 },
+                    { 20.0, 2.0 }
+                },
+                {
+                    { 30.0, 1.0 },
+                    { 40.0, 2.0 }
+                },
+                {
+                    { 55.0, 1.0 },
+                    { 60.0, 2.0 }
+                }
+            }
+        });
+        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<float,3,2> {
+            { 
+                { 12.5, 1.5 },
+                { 35.0, 1.5 },
+                { 57.5, 1.5 }
+            }
+        });
+
+        std::shared_ptr<Node> myReduceMean = ReduceMean({1}, 0);
+        myReduceMean->getOperator()->setDatatype(DataType::Float32);
+        myReduceMean->getOperator()->setBackend("cpu");
+        myReduceMean->getOperator()->associateInput(0,myInput);
+        myReduceMean->getOperator()->computeOutputDims();
+        myReduceMean->forward();
+        myReduceMean->getOperator()->getOutput(0)->print();
+
+        REQUIRE(*(myReduceMean->getOperator()->getOutput(0)) == *myOutput);
+
+    }
+    SECTION("all_axes") {
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
             {
-                { 55.0, 1.0 },
-                { 60.0, 2.0 }
+                {
+                    { 5.0, 1.0 },
+                    { 20.0, 2.0 }
+                },
+                {
+                    { 30.0, 1.0 },
+                    { 40.0, 2.0 }
+                },
+                {
+                    { 55.0, 1.0 },
+                    { 60.0, 2.0 }
+                }
+            }
+        });
+        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array3D<float,1,1,1> {
+            { 
+                {
+                    {18.25}
+                }
             }
-        }
-    });
-    std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array3D<float,3,1,2> {
-        {
-
-            {{ 12.5, 1.5 }},
-            {{ 35.0, 1.5 }},
-            {{ 57.5, 1.5 }}
-        }
-    });
-    //TODO fix case of DIM=1
-    std::shared_ptr<Node> myReduceMean = ReduceMean({1,1});
-    myReduceMean->getOperator()->setDatatype(DataType::Float32);
-    myReduceMean->getOperator()->setBackend("cpu");
-    myReduceMean->getOperator()->associateInput(0,myInput);
-    myReduceMean->getOperator()->computeOutputDims();
-    myReduceMean->forward();
-    myReduceMean->getOperator()->getOutput(0)->print();
-
-    float* resPtr = static_cast<float*>(myReduceMean->getOperator()->getOutput(0)->getImpl()->rawPtr());
-    float* expectedPtr = static_cast<float*>(myOutput->getImpl()->rawPtr());
-    for (std::size_t i = 0; i< myOutput->size(); ++i) {
-        REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        });
+
+        std::shared_ptr<Node> myReduceMean = ReduceMean({0, 1, 2});
+        myReduceMean->getOperator()->setDatatype(DataType::Float32);
+        myReduceMean->getOperator()->setBackend("cpu");
+        myReduceMean->getOperator()->associateInput(0,myInput);
+        myReduceMean->getOperator()->computeOutputDims();
+        myReduceMean->forward();
+        myReduceMean->getOperator()->getOutput(0)->print();
+
+        REQUIRE(*(myReduceMean->getOperator()->getOutput(0)) == *myOutput);
     }
 }
\ No newline at end of file