Merge branch 'clipping_node_remove_attr' into 'dev'

[Add] Clip Operator See merge request !91

Merge branch 'clipping_node_remove_attr' into 'dev'
[Add] Clip Operator See merge request !91
2a745b7d · Thibault Allenet · 3d0fd068 · 375eb65f · 2a745b7d · 2a745b7d
Commit 2a745b7d authored 5 months ago by Thibault Allenet
--- a/include/aidge/backend/cpu.hpp
+++ b/include/aidge/backend/cpu.hpp
@@ -22,6 +22,7 @@
 #include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
 #include "aidge/backend/cpu/operator/BatchNormImpl.hpp"
 #include "aidge/backend/cpu/operator/BitShiftImpl.hpp"
+#include "aidge/backend/cpu/operator/ClipImpl.hpp"
 #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
 #include "aidge/backend/cpu/operator/ConvImpl.hpp"
 #include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp"

--- a/include/aidge/backend/cpu/operator/ClipImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ClipImpl.hpp
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_CLIPIMPL_H_
+#define AIDGE_CPU_OPERATOR_CLIPIMPL_H_
+
+#include <cstddef>  // std::size_t
+#include <memory>
+#include <tuple>    // std::tuple
+#include <vector>
+#include <algorithm>
+
+#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
+#include "aidge/operator/Clip.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
+
+
+namespace Aidge {
+// Operator implementation entry point for the backend
+    using ClipImpl_cpu = OperatorImpl_cpu<Clip_Op,
+    void(float, //Forward Types
+    float, 
+    const void*,
+    const std::size_t, 
+    void*),
+    void(float,//Backward Types
+    float, 
+    const std::size_t,
+    const void*, 
+    const void*,
+    void*)>;
+
+    REGISTRAR(Clip_Op,"cpu",Aidge::ClipImpl_cpu::create);
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_CLIPIMPL_H_ */
--- a/include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp
+
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_CLIPIMPL_KERNELS_H_
+#define AIDGE_CPU_OPERATOR_CLIPIMPL_KERNELS_H_
+
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/backend/cpu/operator/ClipImpl.hpp"
+
+namespace Aidge {
+template <class I, class O>
+void ClipImpl_cpu_forward_kernel(
+        float min_,
+        float max_,
+        const void* input_,
+        const std::size_t length,
+        void* output_) 
+{
+    const I* input = static_cast<const I*>(input_);
+    O* output = static_cast<O*>(output_);
+
+    for (std::size_t i = 0; i < length; ++i) {
+        output[i] = std::min(std::max(static_cast<float>(input[i]), min_), max_);
+    }
+}
+
+template <class I, class GI, class GO>
+void ClipImpl_cpu_backward_kernel(
+        float min_,
+        float max_,
+        const std::size_t length,
+        const void* input_, 
+        const void* grad_output_,
+		void* grad_input_)           
+{
+    const I* input = static_cast<const I*>(input_);
+    const GO* grad_output = static_cast<const GO*>(grad_output_);
+    GI* grad_input = static_cast<GI*>(grad_input_);
+
+    for (std::size_t i = 0; i < length; ++i) {
+        grad_input[i] = ((input[i] > min_) && (input[i] < max_)) ? grad_output[i] : 0;
+    }
+}
+
+REGISTRAR(ClipImpl_cpu,
+{DataType::Float32},
+{ProdConso::inPlaceModel,
+Aidge::ClipImpl_cpu_forward_kernel<float,float>,
+Aidge::ClipImpl_cpu_backward_kernel<float,float,float>});
+REGISTRAR(ClipImpl_cpu,
+{DataType::Float64},
+{ProdConso::inPlaceModel,
+Aidge::ClipImpl_cpu_forward_kernel<double,double>,
+Aidge::ClipImpl_cpu_backward_kernel<double,double,double>});
+REGISTRAR(ClipImpl_cpu,
+{DataType::Int32},
+{ProdConso::inPlaceModel,
+Aidge::ClipImpl_cpu_forward_kernel<std::int32_t,std::int32_t>,
+Aidge::ClipImpl_cpu_backward_kernel<std::int32_t,std::int32_t,std::int32_t>});
+REGISTRAR(ClipImpl_cpu,
+{DataType::Int64},
+{ProdConso::inPlaceModel,
+Aidge::ClipImpl_cpu_forward_kernel<std::int64_t,std::int64_t>,
+Aidge::ClipImpl_cpu_backward_kernel<std::int64_t,std::int64_t,std::int64_t>});
+
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_CLIPIMPL_KERNELS_H_ */
--- a/src/operator/ClipImpl.cpp
+++ b/src/operator/ClipImpl.cpp
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <memory>
+#include <vector>
+
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/Clip.hpp"
+#include "aidge/utils/Types.h"
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
+#include "aidge/utils/ErrorHandling.hpp"
+
+#include "aidge/backend/cpu/operator/ClipImpl.hpp"
+#include "aidge/backend/cpu/operator/ClipImpl_kernels.hpp"
+
+template<>
+void Aidge::ClipImpl_cpu::forward() {
+
+	const Clip_Op& op_ = dynamic_cast<const Clip_Op&>(mOp);
+    std::shared_ptr<Tensor> in0 = op_.getInput(0);
+    std::shared_ptr<Tensor> out0 = op_.getOutput(0);
+    AIDGE_ASSERT(in0, "missing input #0");
+    /*AIDGE_ASSERT(in1, "missing input #1 -> Min value empty shape Tensor");
+    AIDGE_ASSERT(in2, "missing input #2 -> Max value empty shape Tensor");*/
+    // Find the correct kernel type
+    const auto impl = Registrar<ClipImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+
+    // Call kernel
+    impl.forward(
+       op_.min(),
+       op_.max(),
+       getCPUPtr(mOp.getRawInput(0)), 
+       in0->size(), 
+       getCPUPtr(mOp.getRawOutput(0))
+    );
+}
+
+template<>
+void Aidge::ClipImpl_cpu::backward() {
+
+    const Clip_Op& op_ = dynamic_cast<const Clip_Op&>(mOp);
+    std::shared_ptr<Tensor> in0  = op_.getInput(0);
+    std::shared_ptr<Tensor> out0  = op_.getOutput(0);
+    std::shared_ptr<Tensor> gra_in0 = op_.getInput(0)->grad();
+    std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad();    
+    AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type());
+    
+    // Find the correct kernel type
+    const auto impl = Registrar<ClipImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    // Call kernel
+    impl.backward(
+        op_.min(),
+        op_.max(),
+        gra_in0->size(), 
+        getCPUPtr(in0), 
+        getCPUPtr(gra_out0), 
+        getCPUPtr(gra_in0)
+    );
+}
--- a/unit_tests/operator/Test_ClipImpl.cpp
+++ b/unit_tests/operator/Test_ClipImpl.cpp
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <catch2/catch_test_macros.hpp>
+#include <cstddef>  // std::size_t
+#include <cstdint>  // std::uint16_t
+#include <chrono>
+#include <iostream>
+#include <vector>
+#include <algorithm>
+#include <iomanip>
+#include <memory>
+#include <random>   // std::random_device, std::mt19937, std::uniform_real_distribution
+
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/Clip.hpp"
+#include "aidge/operator/OperatorTensor.hpp"
+#include "aidge/utils/TensorUtils.hpp"
+#include "aidge/backend/cpu.hpp"
+
+void ComputeClipBackward(const std::vector<float>& vec1, std::vector<float>& vec2, float min, float max) {
+    if (vec1.size() != vec2.size()) {
+        std::cerr << "Vectors should have the same sizes." << std::endl;
+        return;
+    }
+
+    for (size_t i = 0; i < vec1.size(); ++i) {
+        if (vec1[i] < min || vec1[i] > max) {
+            vec2[i] = 0.0f;
+        }
+    }
+}
+namespace Aidge 
+{
+TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]")
+ {
+    const std::uint16_t NBTRIALS = 10;
+    // Create a random number generator
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_real_distribution<float> dis(0.0, 10.0);
+    std::uniform_real_distribution<float> dismin(0.0, 4.5); 
+    std::uniform_real_distribution<float> dismax(5.5, 10.0); 
+    std::uniform_int_distribution<std::size_t> distDims(5,15);
+    std::uniform_int_distribution<std::size_t> distNbMatrix(1, 5);
+
+    // Create MatMul Operator
+    std::shared_ptr<Node> myClip = Aidge::Clip("nop");
+    auto op = std::static_pointer_cast<OperatorTensor>(myClip -> getOperator());
+
+    // To measure execution time of 'MatMul_Op::forward()' member function call
+    std::chrono::time_point<std::chrono::system_clock> start;
+    std::chrono::time_point<std::chrono::system_clock> end;
+    std::chrono::duration<double, std::micro> duration;
+
+    SECTION("Simple clip test [Forward]") {
+        std::size_t totalComputation = 0;
+        for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
+            // generate Tensors dimensions
+            const std::size_t dim0 = distDims(gen);
+            const std::size_t dim1 = distDims(gen);
+            totalComputation += dim0*dim1;
+
+            // Create and populate the array with random float values
+            float* Array = new float[dim0*dim1];
+            for (int i = 0; i < dim0*dim1; ++i) {
+                Array[i] = dis(gen); // Generate random float value
+            }
+
+            // Convert Input to Tensor
+            std::shared_ptr<Tensor> TInput = std::make_shared<Tensor>(DataType::Float32);
+            TInput -> resize({dim0,dim1});
+            TInput -> setBackend("cpu");
+            TInput -> getImpl() -> setRawPtr(Array, dim0*dim1);
+            
+            float min = dismin(gen);
+            std::shared_ptr<Tensor> Tmin = std::make_shared<Tensor>(DataType::Float32);
+            Tmin -> resize({});
+            Tmin -> setBackend("cpu");
+            Tmin -> getImpl() -> setRawPtr(&min,1);
+
+            float max = dismax(gen);
+            std::shared_ptr<Tensor> Tmax = std::make_shared<Tensor>(DataType::Float32);
+            Tmax -> resize({});
+            Tmax -> setBackend("cpu");
+            Tmax -> getImpl() -> setRawPtr(&max,1);
+            // convert res to Tensordf
+            std::vector<float> GT(Array, Array + (dim0*dim1));
+            for (float& val : GT)
+            {
+                val = std::max(min, std::min(val, max));
+            }
+            std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
+            Tres -> resize({dim0,dim1});
+            Tres -> setBackend("cpu");
+            Tres -> getImpl() -> setRawPtr(GT.data(), dim0*dim1);
+
+            op->associateInput(0, TInput);
+            op->associateInput(1, Tmin);
+            op->associateInput(2, Tmax);
+            op->setDataType(DataType::Float32);
+            op->setBackend("cpu");
+            op->forwardDims(true);
+            
+            start = std::chrono::system_clock::now();
+            myClip->forward();
+            end = std::chrono::system_clock::now();
+
+            duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+
+            REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
+        }
+        std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
+        std::cout << "total time: " << duration.count() << std::endl;
+    } 
+    SECTION("Clip test with min >= max [Forward]") {
+        std::size_t totalComputation = 0;
+        for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
+            // generate Tensors dimensions
+            const std::size_t dim0 = distDims(gen);
+            const std::size_t dim1 = distDims(gen);
+            totalComputation += dim0*dim1;
+
+            // Create and populate the array with random float values
+            float* Array = new float[dim0*dim1];
+            for (int i = 0; i < dim0*dim1; ++i) {
+                Array[i] = dis(gen); // Generate random float value
+            }
+
+            // Convert Input to Tensor
+            std::shared_ptr<Tensor> TInput = std::make_shared<Tensor>(DataType::Float32);
+            TInput -> resize({dim0,dim1});
+            TInput -> setBackend("cpu");
+            TInput -> getImpl() -> setRawPtr(Array, dim0*dim1);
+            
+            float min = dismax(gen);
+            std::shared_ptr<Tensor> Tmin = std::make_shared<Tensor>(DataType::Float32);
+            Tmin -> resize({});
+            Tmin -> setBackend("cpu");
+            Tmin -> getImpl() -> setRawPtr(&min,1);
+
+            float max = dismin(gen); //We generate max and min so that max is always <= min
+            std::shared_ptr<Tensor> Tmax = std::make_shared<Tensor>(DataType::Float32);
+            Tmax -> resize({});
+            Tmax -> setBackend("cpu");
+            Tmax -> getImpl() -> setRawPtr(&max,1);
+            // convert res to Tensor
+            std::vector<float> GT(Array, Array + (dim0*dim1));
+            for (float& val : GT)
+            {
+                val = max;
+            }
+            std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
+            Tres -> resize({dim0,dim1});
+            Tres -> setBackend("cpu");
+            Tres -> getImpl() -> setRawPtr(GT.data(), dim0*dim1);
+
+            op->associateInput(0, TInput);
+            op->associateInput(1, Tmin);
+            op->associateInput(2, Tmax);
+            op->setDataType(DataType::Float32);
+            op->setBackend("cpu");
+            op->forwardDims(true);
+            
+            start = std::chrono::system_clock::now();
+            myClip->forward();
+            end = std::chrono::system_clock::now();
+
+            duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+
+            REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
+        }
+        std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
+        std::cout << "total time: " << duration.count() << std::endl;
+    } 
+    SECTION("Clip with Clip Attr [Forward]")
+    {
+        std::size_t totalComputation = 0;
+        for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) 
+        {
+
+            float min = dismin(gen);
+            float max = dismax(gen);
+            std::shared_ptr<Node> myCl = Aidge::Clip("",min,max);
+            auto op = std::static_pointer_cast<OperatorTensor>(myCl -> getOperator());
+
+
+            // generate Tensors dimensions
+            const std::size_t dim0 = 3;
+            const std::size_t dim1 = 3;
+            totalComputation += dim0*dim1;
+
+            // Create and populate the array with random float values
+            float* Array = new float[dim0*dim1];
+            for (int i = 0; i < dim0*dim1; ++i) {
+                Array[i] = dis(gen); // Generate random float value
+            }
+            // Convert Input to Tensor
+            std::shared_ptr<Tensor> TInput = std::make_shared<Tensor>(DataType::Float32);
+            TInput -> resize({dim0,dim1});
+            TInput -> setBackend("cpu");
+            TInput -> getImpl() -> setRawPtr(Array, dim0*dim1);
+
+            // convert res to Tensordf
+            std::vector<float> GT(Array, Array + (dim0*dim1));
+            for (float& val : GT)
+            {
+                val = std::max(min, std::min(val, max));
+            }
+            std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
+            Tres -> resize({dim0,dim1});
+            Tres -> setBackend("cpu");
+            Tres -> getImpl() -> setRawPtr(GT.data(), dim0*dim1);
+            op->associateInput(0, TInput);
+            op->setDataType(DataType::Float32);
+            op->setBackend("cpu");
+            op->forwardDims(true);
+            start = std::chrono::system_clock::now();
+            myCl->forward();
+            end = std::chrono::system_clock::now();
+
+            duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+
+            REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
+        }
+        std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
+        std::cout << "total time: " << duration.count() << std::endl;
+    }
+    SECTION("Simple clip test [Backward]") {
+        std::size_t totalComputation = 0;
+        duration = std::chrono::duration<double, std::micro>::zero();
+        for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
+            std::size_t totalComputation = 0;
+        for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
+            // generate Tensors dimensions
+            const std::size_t dim0 = distDims(gen);
+            const std::size_t dim1 = distDims(gen);
+  
+            totalComputation += dim0*dim1;
+
+            // Create and populate the array with random float values
+            float* Array = new float[dim0*dim1];
+            float* gradArray = new float[dim0*dim1];
+            for (int i = 0; i < dim0*dim1; ++i) {
+                Array[i] = dis(gen); // Generate random float value
+                gradArray[i] = dis(gen);
+            }
+
+            std::shared_ptr<Tensor> TGrad = std::make_shared<Tensor>(DataType::Float32);
+            TGrad -> resize({dim0,dim1});
+            TGrad -> setBackend("cpu");
+            TGrad -> getImpl() -> setRawPtr(gradArray, dim0*dim1);
+
+            // Convert Input to Tensor
+            std::shared_ptr<Tensor> TInput = std::make_shared<Tensor>(DataType::Float32);
+            TInput -> resize({dim0,dim1});
+            TInput -> setBackend("cpu");
+            TInput -> getImpl() -> setRawPtr(Array, dim0*dim1);
+            
+            float min = dismin(gen);
+            std::shared_ptr<Tensor> Tmin = std::make_shared<Tensor>(DataType::Float32);
+            Tmin -> resize({});
+            Tmin -> setBackend("cpu");
+            Tmin -> getImpl() -> setRawPtr(&min,1);
+
+            float max = dismax(gen);
+            std::shared_ptr<Tensor> Tmax = std::make_shared<Tensor>(DataType::Float32);
+            Tmax -> resize({});
+            Tmax -> setBackend("cpu");
+            Tmax -> getImpl() -> setRawPtr(&max,1);
+            // convert res to Tensor
+            std::vector<float> GT(Array, Array + (dim0*dim1));
+            for (float& val : GT)
+            {
+                val = std::max(min, std::min(val, max));//Clip operation
+            }
+            std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
+            Tres -> resize({dim0,dim1});
+            Tres -> setBackend("cpu");
+            Tres -> getImpl() -> setRawPtr(GT.data(), dim0*dim1);
+
+            op->associateInput(0, TInput);
+            op->associateInput(1, Tmin);
+            op->associateInput(2, Tmax);
+            op->setDataType(DataType::Float32);
+            op->setBackend("cpu");
+            op->forwardDims(true);
+            myClip->forward();
+
+            op->getOutput(0)->setGrad(TGrad);
+            
+            start = std::chrono::system_clock::now();
+            REQUIRE_NOTHROW(myClip->backward());
+            end = std::chrono::system_clock::now();
+
+            auto GradTensor = op->getInput(0)->grad();
+            float* BackwardTensor = (float*)GradTensor->getImpl()->rawPtr();
+            std::vector<float> GT0(Array,Array+(dim0*dim1));
+            std::vector<float> GT1(gradArray,gradArray+(dim0*dim1));
+            std::vector<float> BackwardTensorVec(BackwardTensor,BackwardTensor+(dim0*dim1));
+            ComputeClipBackward(GT0,GT1,min,max);
+            duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+            REQUIRE(GT1 == BackwardTensorVec);
+        }
+        std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
+        std::cout << "total time: " << duration.count() << std::endl;
+    }
+ }
+} // namespace Aidge 
+}
\ No newline at end of file