From df820e6af731905159ce7dd1c131fea8c55f9ebc Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Fri, 20 Sep 2024 14:32:32 +0000 Subject: [PATCH] Adding of backward compatibility for Clip operator --- .../aidge/backend/cpu/operator/ClipImpl.hpp | 8 +- ...nels.ooo => ClipImpl_backward_kernels.hpp} | 19 +- src/operator/ClipImpl.cpp | 17 +- unit_tests/operator/Test_ClipImpl.cpp | 261 +++++++----------- 4 files changed, 126 insertions(+), 179 deletions(-) rename include/aidge/backend/cpu/operator/{ClipImpl_backward_kernels.ooo => ClipImpl_backward_kernels.hpp} (75%) diff --git a/include/aidge/backend/cpu/operator/ClipImpl.hpp b/include/aidge/backend/cpu/operator/ClipImpl.hpp index 416a5c72..04cd38a7 100644 --- a/include/aidge/backend/cpu/operator/ClipImpl.hpp +++ b/include/aidge/backend/cpu/operator/ClipImpl.hpp @@ -31,10 +31,10 @@ class ClipImplForward_cpu std::tuple<DataType, DataType,DataType>, void(const void*, const void*, const void*,const std::size_t, void*)>{}; -/*class ClipImplBackward_cpu +class ClipImplBackward_cpu : public Registrable <ClipImplBackward_cpu, - std::tuple<DataType, DataType, DataType>, - void(const float, const float, const std::size_t, const void*, const void*, void*)> {};*/ + std::tuple<DataType, DataType, DataType, DataType, DataType>, + void(const void*, const void*, const std::size_t, const void*, const void*, void*)> {}; class ClipImpl_cpu : public OperatorImpl { public: @@ -48,7 +48,7 @@ public: void forward() override final; - //void backward() override final; + void backward() override final; }; namespace { diff --git a/include/aidge/backend/cpu/operator/ClipImpl_backward_kernels.ooo b/include/aidge/backend/cpu/operator/ClipImpl_backward_kernels.hpp similarity index 75% rename from include/aidge/backend/cpu/operator/ClipImpl_backward_kernels.ooo rename to include/aidge/backend/cpu/operator/ClipImpl_backward_kernels.hpp index ce9ba1ba..ef3d91df 100644 --- a/include/aidge/backend/cpu/operator/ClipImpl_backward_kernels.ooo +++ b/include/aidge/backend/cpu/operator/ClipImpl_backward_kernels.hpp @@ -11,42 +11,39 @@ #ifndef AIDGE_CPU_OPERATOR_CLIPIMPL_BACKWARD_KERNEL_H_ #define AIDGE_CPU_OPERATOR_CLIPIMPL_BACKWARD_KERNEL_H_ - -#include <cstddef> // std::size_t - #include "aidge/backend/cpu/operator/ClipImpl.hpp" #include "aidge/utils/Registrar.hpp" namespace Aidge { template <class I, class GI, class GO> void ClipImpl_cpu_backward_kernel( - const float min_, - const float max_, + const void* min_, + const void* max_, const std::size_t length, const void* input_, const void* grad_output_, void* grad_input_) { - const I min = static_cast<const I>(min_); - const I max = static_cast<const I>(max_); + const I* min = static_cast<const I*>(min_); + const I* max = static_cast<const I*>(max_); const I* input = static_cast<const I*>(input_); const GO* grad_output = static_cast<const GO*>(grad_output_); GI* grad_input = static_cast<GI*>(grad_input_); for (std::size_t i = 0; i < length; ++i) { - grad_input[i] = ((input[i] > min) && (input[i] < max)) ? grad_output[i] : 0; + grad_input[i] = ((input[i] > min[0]) && (input[i] < max[0])) ? grad_output[i] : 0; } } namespace { static Registrar<ClipImplBackward_cpu> registrarClipImplBackward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32}, + {DataType::Float32, DataType::Float32, DataType::Float32,DataType::Float32,DataType::Float32}, Aidge::ClipImpl_cpu_backward_kernel<float, float, float>); static Registrar<ClipImplBackward_cpu> registrarClipImplBackward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32}, + {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, Aidge::ClipImpl_cpu_backward_kernel<int, int, int>); static Registrar<ClipImplBackward_cpu> registrarClipImplBackward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64}, + {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, Aidge::ClipImpl_cpu_backward_kernel<double, double, double>); } // namespace } // namespace Aidge diff --git a/src/operator/ClipImpl.cpp b/src/operator/ClipImpl.cpp index 0bd8c15e..c7e4d625 100644 --- a/src/operator/ClipImpl.cpp +++ b/src/operator/ClipImpl.cpp @@ -20,7 +20,7 @@ #include "aidge/backend/cpu/operator/ClipImpl.hpp" #include "aidge/backend/cpu/operator/ClipImpl_forward_kernels.hpp" -//#include "aidge/backend/cpu/operator/ClipImpl_backward_kernels.hpp" +#include "aidge/backend/cpu/operator/ClipImpl_backward_kernels.hpp" Aidge::Elts_t Aidge::ClipImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // this implementation can be in-place @@ -35,6 +35,9 @@ void Aidge::ClipImpl_cpu::forward() { std::shared_ptr<Tensor> in2 = op_.getInput(2); std::shared_ptr<Tensor> out0 = op_.getOutput(0); AIDGE_ASSERT(in0, "missing input #0"); + AIDGE_ASSERT(in1, "missing input #1 -> Min value empty shape Tensor"); + AIDGE_ASSERT(in2, "missing input #2 -> Max value empty shape Tensor"); + // Find the correct kernel type auto kernelFunc = Registrar<ClipImplForward_cpu>::create({ @@ -53,10 +56,12 @@ void Aidge::ClipImpl_cpu::forward() { ); } -/*void Aidge::ClipImpl_cpu::backward() { +void Aidge::ClipImpl_cpu::backward() { const Clip_Op& op_ = dynamic_cast<const Clip_Op&>(mOp); std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> in1min = op_.getInput(1); + std::shared_ptr<Tensor> in2max = op_.getInput(2); std::shared_ptr<Tensor> out0 = op_.getOutput(0); std::shared_ptr<Tensor> gra_in0 = op_.getInput(0)->grad(); std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); @@ -64,6 +69,8 @@ void Aidge::ClipImpl_cpu::forward() { // Find the correct kernel type auto kernelFunc = Registrar<ClipImplBackward_cpu>::create({ + in1min->dataType(), + in2max->dataType(), in0->dataType(), gra_in0->dataType(), gra_out0->dataType() @@ -71,11 +78,11 @@ void Aidge::ClipImpl_cpu::forward() { // Call kernel kernelFunc( - op_.min(), - op_.max(), + getCPUPtr(in1min), + getCPUPtr(in2max), gra_in0->size(), getCPUPtr(in0), getCPUPtr(gra_out0), getCPUPtr(gra_in0) ); -}*/ +} diff --git a/unit_tests/operator/Test_ClipImpl.cpp b/unit_tests/operator/Test_ClipImpl.cpp index 012c9dda..8807bc5e 100644 --- a/unit_tests/operator/Test_ClipImpl.cpp +++ b/unit_tests/operator/Test_ClipImpl.cpp @@ -15,6 +15,8 @@ #include <chrono> #include <iostream> #include <vector> +#include <algorithm> +#include <iomanip> #include <memory> #include <random> // std::random_device, std::mt19937, std::uniform_real_distribution @@ -23,13 +25,32 @@ #include "aidge/operator/OperatorTensor.hpp" #include "aidge/utils/TensorUtils.hpp" #include "aidge/backend/cpu.hpp" +void prettyPrint(float* tensor, std::string tensor_name) { + std::cout << "Printing formatted tensor: " <<tensor_name << std::endl; -namespace Aidge { -int azertBpoint() -{ - return 0; + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + std::cout << std::setw(10) << std::setprecision(4) << tensor[i * 3 + j] << " "; + } + std::cout << std::endl; + } } -TEST_CASE("[cpu/operator] Clip(forward)", "[Clip][CPU]") { +void applyMask(const std::vector<float>& vec1, std::vector<float>& vec2, float min, float max) { + if (vec1.size() != vec2.size()) { + std::cerr << "Les vecteurs doivent être de la même taille." << std::endl; + return; + } + + for (size_t i = 0; i < vec1.size(); ++i) { + if (vec1[i] < min || vec1[i] > max) { + vec2[i] = 0.0f; + } + } +} +namespace Aidge +{ +TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") + { const std::uint16_t NBTRIALS = 10; // Create a random number generator std::random_device rd; @@ -42,7 +63,6 @@ TEST_CASE("[cpu/operator] Clip(forward)", "[Clip][CPU]") { // Create MatMul Operator std::shared_ptr<Node> myClip = Aidge::Clip("nop"); - azertBpoint(); auto op = std::static_pointer_cast<OperatorTensor>(myClip -> getOperator()); // To measure execution time of 'MatMul_Op::forward()' member function call @@ -50,7 +70,7 @@ TEST_CASE("[cpu/operator] Clip(forward)", "[Clip][CPU]") { std::chrono::time_point<std::chrono::system_clock> end; std::chrono::duration<double, std::micro> duration; - SECTION("2-D Tensors") { + SECTION("Simple clamp test [Forward]") { std::size_t totalComputation = 0; for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { // generate Tensors dimensions @@ -64,213 +84,136 @@ TEST_CASE("[cpu/operator] Clip(forward)", "[Clip][CPU]") { Array[i] = dis(gen); // Generate random float value } - // Convert bigArray1 to Tensor + // Convert Input to Tensor std::shared_ptr<Tensor> TInput = std::make_shared<Tensor>(DataType::Float32); TInput -> resize({dim0,dim1}); TInput -> setBackend("cpu"); TInput -> getImpl() -> setRawPtr(Array, dim0*dim1); - // Convert bigArray2 to Tensor - float a = dismin(gen); + float min = dismin(gen); std::shared_ptr<Tensor> Tmin = std::make_shared<Tensor>(DataType::Float32); Tmin -> resize({}); Tmin -> setBackend("cpu"); - Tmin -> getImpl() -> setRawPtr(&a,1); + Tmin -> getImpl() -> setRawPtr(&min,1); - float b = dismax(gen); + float max = dismax(gen); std::shared_ptr<Tensor> Tmax = std::make_shared<Tensor>(DataType::Float32); Tmax -> resize({}); Tmax -> setBackend("cpu"); - Tmax -> getImpl() -> setRawPtr(&b,1); - // convert res to Tensor - + Tmax -> getImpl() -> setRawPtr(&max,1); + // convert res to Tensordf std::vector<float> GT(Array, Array + (dim0*dim1)); - - std::for_each(GT.begin(), GT.end(),a,b { - valeur = std::clamp(valeur,a,b); - }); - - // Affichage des éléments du vecteur pour vérifier le clampage - for (const auto& valeur : GT) + for (float& val : GT) { - std::cout << valeur << " "; + val = std::max(min, std::min(val, max)); } - float* gt_raw = GT.data(); - //std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32); + std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32); + Tres -> resize({dim0,dim1}); + Tres -> setBackend("cpu"); + Tres -> getImpl() -> setRawPtr(GT.data(), dim0*dim1); op->associateInput(0, TInput); op->associateInput(1, Tmin); op->associateInput(2, Tmax); op->setDataType(DataType::Float32); - azertBpoint(); op->setBackend("cpu"); op->forwardDims(); + start = std::chrono::system_clock::now(); myClip->forward(); end = std::chrono::system_clock::now(); duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); - REQUIRE(approxEq<float>(*(op->getOutput(0)), gt_raw)); + REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); } std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; std::cout << "total time: " << duration.count() << std::endl; } - /* SECTION("3-D Tensors") { + SECTION("Simple clamp test [Backward]") { std::size_t totalComputation = 0; duration = std::chrono::duration<double, std::micro>::zero(); + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + std::size_t totalComputation = 0; for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { // generate Tensors dimensions - const std::size_t dimNb = distNbMatrix(gen); const std::size_t dim0 = distDims(gen); const std::size_t dim1 = distDims(gen); - const std::size_t dim2 = distDims(gen); - totalComputation += dim0*dim1*dim2*dimNb; + + totalComputation += dim0*dim1; // Create and populate the array with random float values - float* bigArray1 = new float[dimNb*dim0*dim1]; - for (std::size_t i = 0; i < dimNb*dim0*dim1; ++i) { - bigArray1[i] = dis(gen); // Generate random float value - } - float* bigArray2 = new float[dimNb*dim1*dim2]; - for (int i = 0; i < dimNb*dim1*dim2; ++i) { - bigArray2[i] = dis(gen); // Generate random float value - } - float* res = new float[dimNb*dim0*dim2]; - for (std::size_t n = 0; n < dimNb; ++n) { - for (int i = 0; i < dim0; ++i) { - for (int j = 0; j < dim2; ++j) { - float sum = 0.0; - for (int k = 0; k < dim1; ++k) { - sum += bigArray1[n*dim0*dim1 + i*dim1 + k] * bigArray2[n*dim2*dim1+k*dim2+j]; - } - res[n*dim0*dim2+i*dim2+j] = sum; - } - } + float* Array = new float[dim0*dim1]; + float* gradArray = new float[dim0*dim1]; + for (int i = 0; i < dim0*dim1; ++i) { + Array[i] = dis(gen); // Generate random float value + gradArray[i] = dis(gen); } - // Convert bigArray1 to Tensor - std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32); - T1 -> resize({dimNb,dim0,dim1}); - T1 -> setBackend("cpu"); - T1 -> getImpl() -> setRawPtr(bigArray1, dimNb*dim0*dim1); - // Convert bigArray2 to Tensor - std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32); - T2 -> resize({dimNb,dim1,dim2}); - T2 -> setBackend("cpu"); - T2 -> getImpl() -> setRawPtr(bigArray2, dimNb*dim1*dim2); - // convert res to Tensor - std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32); - Tres -> resize({dimNb,dim0,dim2}); - Tres -> setBackend("cpu"); - Tres -> getImpl() -> setRawPtr(res, dimNb*dim0*dim2); - - op->associateInput(0, T1); - op->associateInput(1, T2); - op->setDataType(DataType::Float32); - op->setBackend("cpu"); - op->forwardDims(); - start = std::chrono::system_clock::now(); - myMatMul->forward(); - end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); - REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); - } - std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; - std::cout << "total time: " << duration.count() << std::endl; - } + std::shared_ptr<Tensor> TGrad = std::make_shared<Tensor>(DataType::Float32); + TGrad -> resize({dim0,dim1}); + TGrad -> setBackend("cpu"); + TGrad -> getImpl() -> setRawPtr(gradArray, dim0*dim1); - SECTION("4-D Tensors") { - std::size_t totalComputation = 0; - duration = std::chrono::duration<double, std::micro>::zero(); - for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { - // generate Tensors dimensions - const std::size_t dimNb1 = distNbMatrix(gen); - const std::size_t dimNb2 = distNbMatrix(gen); - const std::size_t dim0 = distDims(gen); - const std::size_t dim1 = distDims(gen); - const std::size_t dim2 = distDims(gen); - totalComputation += dim0*dim1*dim2*dimNb1*dimNb2; + // Convert Input to Tensor + std::shared_ptr<Tensor> TInput = std::make_shared<Tensor>(DataType::Float32); + TInput -> resize({dim0,dim1}); + TInput -> setBackend("cpu"); + TInput -> getImpl() -> setRawPtr(Array, dim0*dim1); + + float min = dismin(gen); + std::shared_ptr<Tensor> Tmin = std::make_shared<Tensor>(DataType::Float32); + Tmin -> resize({}); + Tmin -> setBackend("cpu"); + Tmin -> getImpl() -> setRawPtr(&min,1); - // Create and populate the array with random float values - float* bigArray1 = new float[dimNb1*dimNb2*dim0*dim1]; - for (std::size_t i = 0; i < dimNb1*dimNb2*dim0*dim1; ++i) { - bigArray1[i] = dis(gen); // Generate random float value - } - float* bigArray2 = new float[dimNb1*dimNb2*dim1*dim2]; - for (std::size_t i = 0; i < dimNb1*dimNb2*dim1*dim2; ++i) { - bigArray2[i] = dis(gen); // Generate random float value - } - float* res = new float[dimNb1*dimNb2*dim0*dim2]; - for (std::size_t n1 = 0; n1 < dimNb1; ++n1) { - for (std::size_t n2 = 0; n2 < dimNb2; ++n2) { - for (int i = 0; i < dim0; ++i) { - for (int j = 0; j < dim2; ++j) { - float sum = 0.0; - for (int k = 0; k < dim1; ++k) { - sum += bigArray1[n1*dimNb2*dim0*dim1+n2*dim0*dim1+i*dim1+k] * bigArray2[n1*dimNb2*dim1*dim2+n2*dim1*dim2+k*dim2+j]; - } - res[n1*dimNb2*dim0*dim2+n2*dim0*dim2+i*dim2+j] = sum; - } - } - } + float max = dismax(gen); + std::shared_ptr<Tensor> Tmax = std::make_shared<Tensor>(DataType::Float32); + Tmax -> resize({}); + Tmax -> setBackend("cpu"); + Tmax -> getImpl() -> setRawPtr(&max,1); + // convert res to Tensordf + std::vector<float> GT(Array, Array + (dim0*dim1)); + for (float& val : GT) + { + val = std::max(min, std::min(val, max)); } - // Convert bigArray1 to Tensor - std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32); - T1 -> resize({dimNb1,dimNb2,dim0,dim1}); - T1 -> setBackend("cpu"); - T1 -> getImpl() -> setRawPtr(bigArray1, dimNb1*dimNb2*dim0*dim1); - // Convert bigArray2 to Tensor - std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32); - T2 -> resize({dimNb1,dimNb2,dim1,dim2}); - T2 -> setBackend("cpu"); - T2 -> getImpl() -> setRawPtr(bigArray2, dimNb1*dimNb2*dim1*dim2); - // convert res to Tensor std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32); - Tres -> resize({dimNb1,dimNb2,dim0,dim2}); + Tres -> resize({dim0,dim1}); Tres -> setBackend("cpu"); - Tres -> getImpl() -> setRawPtr(res, dimNb1*dimNb2*dim0*dim2); + Tres -> getImpl() -> setRawPtr(GT.data(), dim0*dim1); - op->associateInput(0, T1); - op->associateInput(1, T2); + op->associateInput(0, TInput); + op->associateInput(1, Tmin); + op->associateInput(2, Tmax); op->setDataType(DataType::Float32); op->setBackend("cpu"); op->forwardDims(); + myClip->forward(); + + op->getOutput(0)->setGrad(TGrad); + start = std::chrono::system_clock::now(); - myMatMul->forward(); + REQUIRE_NOTHROW(myClip->backward()); end = std::chrono::system_clock::now(); + + auto GradTensor = op->getInput(0)->grad(); + float* BackwardTensor = (float*)GradTensor->getImpl()->rawPtr(); + std::cout << "Range of clip is: [min:" << min << "->max: " << max << "]\n"; + prettyPrint(Array,"Input"); + prettyPrint(gradArray,"Gradient Input"); + prettyPrint(BackwardTensor,"final TENSOR"); + + std::vector<float> GT0(Array,Array+(dim0*dim1)); + std::vector<float> GT1(gradArray,gradArray+(dim0*dim1)); + std::vector<float> BackwardTensorVec(BackwardTensor,BackwardTensor+(dim0*dim1)); + applyMask(GT0,GT1,min,max); duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); - REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); + REQUIRE(GT1 == BackwardTensorVec); } std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; std::cout << "total time: " << duration.count() << std::endl; } - - SECTION("+2-D / 1-D") { - // allows to test both computation with a 1-D Tensor and broadcasting - // input_0 - std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>(); - op->associateInput(0,T0); - const std::size_t dim0 = distNbMatrix(gen); - const std::size_t dim1 = distNbMatrix(gen) + 1; - const std::size_t dim2 = distNbMatrix(gen); - const std::size_t dim3 = distNbMatrix(gen); - T0->resize({dim0,dim1,dim2,dim3}); - T0->setDataType(DataType::Float32); - T0->setBackend("cpu"); - - // input_1 - std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(); - op -> associateInput(1,T1); - T1->resize({dim3}); - T1->setDataType(DataType::Float32); - T1->setBackend("cpu"); - - op->setDataType(DataType::Float32); - op->setBackend("cpu"); - op->forwardDims(); - myMatMul->forward(); - - }*/ -} -} // namespace Aidge \ No newline at end of file + } + } +} // namespace Aidge \ No newline at end of file -- GitLab