diff --git a/include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp
index 6646aa1c3772962afd320452a51257c319367808..1afac4698be2a63790ebac671ecc1e59166c5f94 100644
--- a/include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp
@@ -29,7 +29,7 @@ void ClipImpl_cpu_forward_kernel(
     O* output = static_cast<O*>(output_);
 
     for (std::size_t i = 0; i < length; ++i) {
-        output[i] = std::max(min_, std::min(static_cast<float>(input[i]), max_));
+        output[i] = std::min(std::max(static_cast<float>(input[i]), min_), max_);
     }
 }
 
diff --git a/unit_tests/operator/Test_ClipImpl.cpp b/unit_tests/operator/Test_ClipImpl.cpp
index a2da6d71a989f026439e3e60f66d272cc6f7e864..45c8da5bf7ecc84fad6b3e694fe204540f579af3 100644
--- a/unit_tests/operator/Test_ClipImpl.cpp
+++ b/unit_tests/operator/Test_ClipImpl.cpp
@@ -61,7 +61,7 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]")
     std::chrono::time_point<std::chrono::system_clock> end;
     std::chrono::duration<double, std::micro> duration;
 
-    SECTION("Simple clamp test [Forward]") {
+    SECTION("Simple clip test [Forward]") {
         std::size_t totalComputation = 0;
         for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
             // generate Tensors dimensions
@@ -121,7 +121,67 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]")
         std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
         std::cout << "total time: " << duration.count() << std::endl;
     } 
-    SECTION("Clamp with Clip Attr [Forward]")
+    SECTION("Clip test with min >= max [Forward]") {
+        std::size_t totalComputation = 0;
+        for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
+            // generate Tensors dimensions
+            const std::size_t dim0 = distDims(gen);
+            const std::size_t dim1 = distDims(gen);
+            totalComputation += dim0*dim1;
+
+            // Create and populate the array with random float values
+            float* Array = new float[dim0*dim1];
+            for (int i = 0; i < dim0*dim1; ++i) {
+                Array[i] = dis(gen); // Generate random float value
+            }
+
+            // Convert Input to Tensor
+            std::shared_ptr<Tensor> TInput = std::make_shared<Tensor>(DataType::Float32);
+            TInput -> resize({dim0,dim1});
+            TInput -> setBackend("cpu");
+            TInput -> getImpl() -> setRawPtr(Array, dim0*dim1);
+            
+            float min = dismax(gen);
+            std::shared_ptr<Tensor> Tmin = std::make_shared<Tensor>(DataType::Float32);
+            Tmin -> resize({});
+            Tmin -> setBackend("cpu");
+            Tmin -> getImpl() -> setRawPtr(&min,1);
+
+            float max = dismin(gen); //We generate max and min so that max is always <= min
+            std::shared_ptr<Tensor> Tmax = std::make_shared<Tensor>(DataType::Float32);
+            Tmax -> resize({});
+            Tmax -> setBackend("cpu");
+            Tmax -> getImpl() -> setRawPtr(&max,1);
+            // convert res to Tensor
+            std::vector<float> GT(Array, Array + (dim0*dim1));
+            for (float& val : GT)
+            {
+                val = max;
+            }
+            std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
+            Tres -> resize({dim0,dim1});
+            Tres -> setBackend("cpu");
+            Tres -> getImpl() -> setRawPtr(GT.data(), dim0*dim1);
+
+            op->associateInput(0, TInput);
+            op->associateInput(1, Tmin);
+            op->associateInput(2, Tmax);
+            op->setDataType(DataType::Float32);
+            op->setBackend("cpu");
+            op->forwardDims(true);
+            
+            start = std::chrono::system_clock::now();
+            myClip->forward();
+            end = std::chrono::system_clock::now();
+
+            duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+
+            REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
+        }
+        std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
+        std::cout << "total time: " << duration.count() << std::endl;
+    } 
+    SECTION("Clip with Clip Attr [Forward]")
     {
         std::size_t totalComputation = 0;
         for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) 
@@ -174,7 +234,7 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]")
         std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
         std::cout << "total time: " << duration.count() << std::endl;
     }
-    SECTION("Simple clamp test [Backward]") {
+    SECTION("Simple clip test [Backward]") {
         std::size_t totalComputation = 0;
         duration = std::chrono::duration<double, std::micro>::zero();
         for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {