diff --git a/include/aidge/backend/cpu/operator/ClipImpl.hpp b/include/aidge/backend/cpu/operator/ClipImpl.hpp index a46230d0a2a648beece00d546fd4038c5f1d83e7..c83836d5aa1d6aae27e3fdce1bbb9561b70ec31e 100644 --- a/include/aidge/backend/cpu/operator/ClipImpl.hpp +++ b/include/aidge/backend/cpu/operator/ClipImpl.hpp @@ -16,6 +16,7 @@ #include <memory> #include <tuple> // std::tuple #include <vector> +#include <algorithm> #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Clip.hpp" @@ -27,13 +28,13 @@ namespace Aidge { // Operator implementation entry point for the backend using ClipImpl_cpu = OperatorImpl_cpu<Clip_Op, - void(const void*, //Forward Types - const void*, + void(float, //Forward Types + float, const void*, const std::size_t, void*), - void(const void*,//Backward Types - const void*, + void(float,//Backward Types + float, const std::size_t, const void*, const void*, diff --git a/include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp index 7c80aed59d6ff11f7f29b8e2a4abaaab997ecdd1..6646aa1c3772962afd320452a51257c319367808 100644 --- a/include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp @@ -19,40 +19,35 @@ namespace Aidge { template <class I, class O> void ClipImpl_cpu_forward_kernel( - const void* min_, - const void* max_, + float min_, + float max_, const void* input_, const std::size_t length, void* output_) { - const I* min = static_cast<const I*>(min_); - const I* max = static_cast<const I*>(max_); - const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); for (std::size_t i = 0; i < length; ++i) { - output[i] = std::max(min[0], std::min(input[i], max[0])); + output[i] = std::max(min_, std::min(static_cast<float>(input[i]), max_)); } } template <class I, class GI, class GO> void ClipImpl_cpu_backward_kernel( - const void* min_, - const void* max_, + float min_, + float max_, const std::size_t length, const void* input_, const void* grad_output_, void* grad_input_) { - const I* min = static_cast<const I*>(min_); - const I* max = static_cast<const I*>(max_); const I* input = static_cast<const I*>(input_); const GO* grad_output = static_cast<const GO*>(grad_output_); GI* grad_input = static_cast<GI*>(grad_input_); for (std::size_t i = 0; i < length; ++i) { - grad_input[i] = ((input[i] > min[0]) && (input[i] < max[0])) ? grad_output[i] : 0; + grad_input[i] = ((input[i] > min_) && (input[i] < max_)) ? grad_output[i] : 0; } } diff --git a/src/operator/ClipImpl.cpp b/src/operator/ClipImpl.cpp index 3949ae9568c4a3a4d38bf573eecf420f95a8c5e7..931d25426a8f6e08363bfc08d23f1714e934634c 100644 --- a/src/operator/ClipImpl.cpp +++ b/src/operator/ClipImpl.cpp @@ -26,33 +26,17 @@ void Aidge::ClipImpl_cpu::forward() { const Clip_Op& op_ = dynamic_cast<const Clip_Op&>(mOp); std::shared_ptr<Tensor> in0 = op_.getInput(0); - std::shared_ptr<Tensor> in1 = op_.getInput(1); - std::shared_ptr<Tensor> in2 = op_.getInput(2); std::shared_ptr<Tensor> out0 = op_.getOutput(0); AIDGE_ASSERT(in0, "missing input #0"); /*AIDGE_ASSERT(in1, "missing input #1 -> Min value empty shape Tensor"); AIDGE_ASSERT(in2, "missing input #2 -> Max value empty shape Tensor");*/ - void* min; - void* max; - - if (!in1 || !in2) - { - min = &op_.min(); - max = &op_.max(); - } - else - { - min = getCPUPtr(mOp.getRawInput(1)); - max = getCPUPtr(mOp.getRawInput(2)); - } // Find the correct kernel type const auto impl = Registrar<ClipImpl_cpu>::create(getBestMatch(getRequiredSpec())); - // Call kernel impl.forward( - min, - max, + op_.min(), + op_.max(), getCPUPtr(mOp.getRawInput(0)), in0->size(), getCPUPtr(mOp.getRawOutput(0)) @@ -64,8 +48,6 @@ void Aidge::ClipImpl_cpu::backward() { const Clip_Op& op_ = dynamic_cast<const Clip_Op&>(mOp); std::shared_ptr<Tensor> in0 = op_.getInput(0); - std::shared_ptr<Tensor> in1min = op_.getInput(1); - std::shared_ptr<Tensor> in2max = op_.getInput(2); std::shared_ptr<Tensor> out0 = op_.getOutput(0); std::shared_ptr<Tensor> gra_in0 = op_.getInput(0)->grad(); std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); @@ -73,22 +55,10 @@ void Aidge::ClipImpl_cpu::backward() { // Find the correct kernel type const auto impl = Registrar<ClipImpl_cpu>::create(getBestMatch(getRequiredSpec())); - void* min; - void* max; - if (!in1min || !in2max) - { - min = &op_.min(); - max = &op_.max(); - } - else - { - min = getCPUPtr(mOp.getRawInput(1)); - max = getCPUPtr(mOp.getRawInput(2)); - } // Call kernel impl.backward( - min, - max, + op_.min(), + op_.max(), gra_in0->size(), getCPUPtr(in0), getCPUPtr(gra_out0), diff --git a/unit_tests/operator/Test_ClipImpl.cpp b/unit_tests/operator/Test_ClipImpl.cpp index b7f286f1c6e0703509c9117afa06705706f7c3c2..a2da6d71a989f026439e3e60f66d272cc6f7e864 100644 --- a/unit_tests/operator/Test_ClipImpl.cpp +++ b/unit_tests/operator/Test_ClipImpl.cpp @@ -108,13 +108,14 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") op->associateInput(2, Tmax); op->setDataType(DataType::Float32); op->setBackend("cpu"); - op->forwardDims(); + op->forwardDims(true); start = std::chrono::system_clock::now(); myClip->forward(); end = std::chrono::system_clock::now(); duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); } std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; @@ -161,7 +162,7 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") op->associateInput(0, TInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); - op->forwardDims(); + op->forwardDims(true); start = std::chrono::system_clock::now(); myCl->forward(); end = std::chrono::system_clock::now(); @@ -231,7 +232,7 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") op->associateInput(2, Tmax); op->setDataType(DataType::Float32); op->setBackend("cpu"); - op->forwardDims(); + op->forwardDims(true); myClip->forward(); op->getOutput(0)->setGrad(TGrad);