From 1b7c5b5b0a7c3d7f80c0ddec7e8a5a311648c7ee Mon Sep 17 00:00:00 2001 From: Jerome Hue <jerome.hue@cea.fr> Date: Wed, 22 Jan 2025 15:39:38 +0100 Subject: [PATCH] Add test and register Sub backward --- .../aidge/backend/cpu/operator/SubImpl.hpp | 14 +- .../backend/cpu/operator/SubImpl_kernels.hpp | 4 +- src/operator/SubImpl.cpp | 26 ++- unit_tests/operator/Test_SubImpl.cpp | 161 ++++++++++++++++++ 4 files changed, 200 insertions(+), 5 deletions(-) diff --git a/include/aidge/backend/cpu/operator/SubImpl.hpp b/include/aidge/backend/cpu/operator/SubImpl.hpp index eed26ddc..064b5329 100644 --- a/include/aidge/backend/cpu/operator/SubImpl.hpp +++ b/include/aidge/backend/cpu/operator/SubImpl.hpp @@ -23,7 +23,19 @@ namespace Aidge { // Operator implementation entry point for the backend using SubImpl_cpu = OperatorImpl_cpu<Sub_Op, - void(std::vector<std::size_t>, std::vector<std::size_t>, const std::vector<std::size_t>&, const void*, const void*,void*)>; + void(std::vector<std::size_t>, std::vector<std::size_t>, const std::vector<std::size_t>&, const void*, const void*,void*), + void(const std::size_t, + const std::size_t, + const std::size_t, + const std::vector<std::size_t>&, + const std::vector<std::size_t>&, + const std::vector<std::size_t>&, + const void*, + const void*, + const void*, + void*, + void*) +>; // Implementation entry point registration to Operator REGISTRAR(Sub_Op, "cpu", Aidge::SubImpl_cpu::create); diff --git a/include/aidge/backend/cpu/operator/SubImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SubImpl_kernels.hpp index a1954776..cb16c037 100644 --- a/include/aidge/backend/cpu/operator/SubImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SubImpl_kernels.hpp @@ -163,8 +163,6 @@ void SubImpl_cpu_backward_kernel(const std::size_t input0Length, void* gradientInput0_, void* gradientInput1_) { - const I1* input0 = static_cast<const I1*>(input0_); - const I2* input1 = static_cast<const I2*>(input1_); const O* grad_output = static_cast<const O*>(grad_output_); auto* grad_input_0 = static_cast<I1*>(gradientInput0_); auto* grad_input_1 = static_cast<I2*>(gradientInput1_); @@ -202,7 +200,7 @@ void SubImpl_cpu_backward_kernel(const std::size_t input0Length, // Kernels registration to implementation entry point REGISTRAR(SubImpl_cpu, {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<float, float, float>, nullptr}); + {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<float, float, float>, Aidge::SubImpl_cpu_backward_kernel<float,float,float>}); REGISTRAR(SubImpl_cpu, {DataType::Float64}, {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<double, double, double>, nullptr}); diff --git a/src/operator/SubImpl.cpp b/src/operator/SubImpl.cpp index e36abe2a..cce4e27a 100644 --- a/src/operator/SubImpl.cpp +++ b/src/operator/SubImpl.cpp @@ -41,5 +41,29 @@ void Aidge::SubImpl_cpu::forward() { template <> void Aidge::SubImpl_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Sub_Op on backend cpu"); + + const Sub_Op& op_ = dynamic_cast<const Sub_Op&>(mOp); + + auto in0 = op_.getInput(0); + auto in1 = op_.getInput(1); + auto in0grad = op_.getInput(0)->grad(); + auto in1grad = op_.getInput(1)->grad(); + auto out0grad = op_.getOutput(0)->grad(); + + // Find the correct kernel type + const auto impl = Registrar<SubImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.backward(/* input0Length */ in0grad->size(), + /* input1Length */ in1grad->size(), + /* grad0Length */ out0grad->size(), + /* input0Dims */ in0->dims(), + /* input1Dims */ in1->dims(), + out0grad->dims(), + getCPUPtr(in0), + getCPUPtr(in1), + getCPUPtr(out0grad), + getCPUPtr(in0grad), + getCPUPtr(in1grad)); + } diff --git a/unit_tests/operator/Test_SubImpl.cpp b/unit_tests/operator/Test_SubImpl.cpp index 1317e88a..d9b6207b 100644 --- a/unit_tests/operator/Test_SubImpl.cpp +++ b/unit_tests/operator/Test_SubImpl.cpp @@ -322,4 +322,165 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") { } } } + + +TEST_CASE("[CPU/Operator] Sub(Backward)", "[Sub][CPU][Backward]") { + std::shared_ptr<Node> mySub = Sub(); + auto op = std::static_pointer_cast<OperatorTensor>(mySub->getOperator()); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + SECTION("Case 1: 1D and 2D Tensors") { + const auto T0 = std::make_shared<Tensor>( + Array2D<float, 2, 3>({{{1, 2, 3}, {4, 5, 6}}})); + + const auto T1 = + std::make_shared<Tensor>(Array1D<float, 3>({0.1, 0.2, 0.3})); + + T0->setDataType(DataType::Float32); + T0->setBackend("cpu"); + T1->setDataType(DataType::Float32); + T1->setBackend("cpu"); + + op->associateInput(0, T0); + op->associateInput(1, T1); + op->getOutput(0)->setGrad(std::make_shared<Tensor>( + Array2D<float, 2, 3>({{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}}))); + op->forwardDims(); + + mySub->backward(); + + // For subtraction: grad_input0 = grad_output + const auto expectedGrad0 = std::make_shared<Tensor>( + Array2D<float, 2, 3>({{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}})); + + // For subtraction: grad_input1 = -grad_output (summed across broadcast dimensions) + const auto expectedGrad1 = + std::make_shared<Tensor>(Array1D<float, 3>({-2, -2, -2})); + + REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0)); + REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1)); + } + + SECTION("Case 2: 3D and 1D tensors") { + const auto T0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + {{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}, + {{7.0, 8.0, 9.0}, {10.0, 11.0, 12.0}}}})); + + const auto T1 = + std::make_shared<Tensor>(Array1D<float, 3>({0.3, 0.2, 0.1})); + + const auto newGrad = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + {{{{1, 1, 1}, {1, 1, 1}}, {{1, 1, 1}, {1, 1, 1}}}})); + + const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + {{{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}, + {{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}}})); + + const auto expectedGrad1 = + std::make_shared<Tensor>(Array1D<float, 3>({-4.0, -4.0, -4.0})); + + for (auto T : {T0, T1, newGrad, expectedGrad0, expectedGrad1}) { + T->setBackend("cpu"); + T->setDataType(DataType::Float32); + } + + op->associateInput(0, T0); + op->associateInput(1, T1); + op->getOutput(0)->setGrad(newGrad); + op->forwardDims(); + + mySub->backward(); + + REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0)); + REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1)); + } + + SECTION("Case 3: Random values with broadcasting") { + // Use random values + std::vector<std::size_t> dims0 = {5, 2, 1, 7}; // First tensor + std::vector<std::size_t> dims1 = {2, 6, 7}; // Second tensor + std::vector<std::size_t> outputDims = {5, 2, 6, 7}; + + const auto input0Size = 5 * 2 * 1 * 7; + const auto input1Size = 2 * 6 * 7; + const auto outputSize = 5 * 2 * 6 * 7; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> dist(0.1f, 1.0f); + + std::vector<float> input0Data(input0Size); + std::vector<float> input1Data(input1Size); + std::vector<float> gradOutputData(outputSize); + + // Fill with random values + for (auto &val : input0Data) val = dist(gen); + for (auto &val : input1Data) val = dist(gen); + for (auto &val : gradOutputData) val = dist(gen); + + auto T0 = std::make_shared<Tensor>(); + auto T1 = std::make_shared<Tensor>(); + + T0->setDataType(DataType::Float32); + T0->setBackend("cpu"); + T0->resize(dims0); + T0->getImpl()->setRawPtr(input0Data.data(), input0Size); + + T1->setDataType(DataType::Float32); + T1->setBackend("cpu"); + T1->resize(dims1); + T1->getImpl()->setRawPtr(input1Data.data(), input1Size); + + op->associateInput(0, T0); + op->associateInput(1, T1); + + // Set gradient of output + op->getOutput(0)->setGrad(std::make_shared<Tensor>()); + op->getOutput(0)->grad()->resize(outputDims); + op->getOutput(0)->grad()->getImpl()->setRawPtr(gradOutputData.data(), outputSize); + + op->forwardDims(); + + // Compute reference gradients + std::vector<float> expectedGrad0(input0Size, 0.0f); + std::vector<float> expectedGrad1(input1Size, 0.0f); + + for (std::size_t n = 0; n < 5; ++n) { + for (std::size_t c = 0; c < 2; ++c) { + for (std::size_t h = 0; h < 6; ++h) { + for (std::size_t w = 0; w < 7; ++w) { + std::size_t outIdx = w + 7 * (h + 6 * (c + 2 * n)); + std::size_t in0Idx = w + 7 * (0 + 1 * (c + 2 * n)); + std::size_t in1Idx = w + 7 * (h + 6 * c); + + // Gradient for input0: grad_output + expectedGrad0[in0Idx] += gradOutputData[outIdx]; + // Gradient for input1: -grad_output + expectedGrad1[in1Idx] += -gradOutputData[outIdx]; + } + } + } + } + + // Perform backward pass + mySub->backward(); + + auto expectedGrad0Tensor = std::make_shared<Tensor>(); + expectedGrad0Tensor->resize(T0->dims()); + expectedGrad0Tensor->setBackend("cpu"); + expectedGrad0Tensor->setDataType(DataType::Float32); + expectedGrad0Tensor->getImpl()->setRawPtr(expectedGrad0.data(), expectedGrad0.size()); + + auto expectedGrad1Tensor = std::make_shared<Tensor>(); + expectedGrad1Tensor->resize(T1->dims()); + expectedGrad1Tensor->setBackend("cpu"); + expectedGrad1Tensor->setDataType(DataType::Float32); + expectedGrad1Tensor->getImpl()->setRawPtr(expectedGrad1.data(), expectedGrad1.size()); + + // Verify backward pass + REQUIRE(approxEq<float>(*T0->grad(), *expectedGrad0Tensor)); + REQUIRE(approxEq<float>(*T1->grad(), *expectedGrad1Tensor)); + } +} } // namespace Aidge -- GitLab