diff --git a/include/aidge/backend/cpu/operator/PowImpl.hpp b/include/aidge/backend/cpu/operator/PowImpl.hpp index daf23177fb57bee4111c92654ad94dfae3e50f08..cfbb8173d1f83162519016a8f2b3c3166977a5b7 100644 --- a/include/aidge/backend/cpu/operator/PowImpl.hpp +++ b/include/aidge/backend/cpu/operator/PowImpl.hpp @@ -24,7 +24,8 @@ namespace Aidge { // Operator implementation entry point for the backend using PowImpl_cpu = OperatorImpl_cpu<Pow_Op, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*), - void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, void*)>; + void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, const void*, void*, void*)>; + // Implementation entry point registration to Operator REGISTRAR(Pow_Op, "cpu", Aidge::PowImpl_cpu::create); diff --git a/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp b/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp index f484fabf437f656dc8671d4ac78161ef11e84de5..ab9b2ccc7b823842decd044b90a5c6364cedc9c9 100644 --- a/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp @@ -31,14 +31,10 @@ void PowImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims, const I2* input_2 = static_cast<const I2*>(input2_); O* output = static_cast<O*>(output_); - size_t totalElements = 1; - for (size_t dimSize : outputDims) { - totalElements *= dimSize; - } - + std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>()); for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) { - std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex); + std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, oIndex); std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); @@ -47,16 +43,53 @@ void PowImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims, } } +template <class I1, class I2, class O> +void PowImpl_cpu_backward_kernel(const std::vector<std::size_t>& input0Dims, + const std::vector<std::size_t>& input1Dims, + const std::vector<std::size_t>& outputDims, + const void* input0_, + const void* input1_, + const void* gradOutput_, + void* gradientInput0_, + void* gradientInput1_) { + const I1* input0 = static_cast<const I1*>(input0_); + I1* grad0 = static_cast<I1*>(gradientInput0_); + const I2* input1 = static_cast<const I2*>(input1_); + I2* grad1 = static_cast<I2*>(gradientInput1_); + const O* gradOut = static_cast<const O*>(gradOutput_); + + // Fill input grads with zeros + std::size_t input0Elements = std::accumulate(input0Dims.cbegin(), input0Dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + std::fill(grad0, grad0 + input0Elements, I1(0)); + std::size_t input1Elements = std::accumulate(input1Dims.cbegin(), input1Dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + std::fill(grad1, grad1 + input1Elements, I2(0)); + + std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + for (size_t oIndex = 0; oIndex < totalElements; ++oIndex) + { + // Compute indexes in inputs 0 and 1 to support broadcasting + std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, oIndex); + std::size_t idx0 = getFlattenedIndex(input0Dims, indexes); + std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); + + // grad0 = grad_output * (input1 * pow(input0, (input1 -1))) + grad0[idx0] += gradOut[oIndex]*input1[idx1]* std::pow(input0[idx0], input1[idx1]-1); + + // grad1 = grad_output * (output * ln(input0)) + grad1[idx1] += gradOut[oIndex] * std::pow(input0[idx0], input1[idx1]) * std::log(input0[idx0]); + } +} + // Kernels registration to implementation entry point REGISTRAR(PowImpl_cpu, {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<float, float, float>, nullptr}); + {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<float, float, float>, Aidge::PowImpl_cpu_backward_kernel<float, float, float>}); REGISTRAR(PowImpl_cpu, {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<double, double, double>, nullptr}); + {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<double, double, double>, Aidge::PowImpl_cpu_backward_kernel<double, double, double>}); REGISTRAR(PowImpl_cpu, {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<int32_t, int32_t, int32_t>, nullptr}); + {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<int32_t, int32_t, int32_t>, Aidge::PowImpl_cpu_backward_kernel<int32_t, int32_t, int32_t>}); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_POWIMPL_KERNELS_H_ */ diff --git a/src/operator/PowImpl.cpp b/src/operator/PowImpl.cpp index fe16bb955973d99e022c61043e8144aeaf6801a1..74a7be71e176ba8e1cb8851050e575d6aa7465df 100644 --- a/src/operator/PowImpl.cpp +++ b/src/operator/PowImpl.cpp @@ -44,21 +44,29 @@ void Aidge::PowImpl_cpu::forward() { template <> void Aidge::PowImpl_cpu::backward() { - // Find the correct kernel type const Pow_Op& op_ = dynamic_cast<const Pow_Op&>(mOp); - const std::vector<std::size_t> input0gradDims = getBroadcastedDims(op_.getInput(0)->grad()->dims(), - op_.getOutput(0)->grad()->dims()); - const std::vector<std::size_t> input1gradDims = getBroadcastedDims(op_.getInput(1)->grad()->dims(), - op_.getOutput(0)->grad()->dims()); + + auto in0 = op_.getInput(0); + auto in1 = op_.getInput(1); + auto in0grad = op_.getInput(0)->grad(); + auto in1grad = op_.getInput(1)->grad(); + auto out0grad = op_.getOutput(0)->grad(); + + const std::vector<std::size_t> input0gradDims = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->grad()->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->grad()->dims()); + const std::vector<std::size_t> input1gradDims = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->grad()->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->grad()->dims()); // Find the correct kernel type const auto impl = Registrar<PowImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - impl.backward(op_.getOutput(0)->grad()->dims(), - input0gradDims, - input1gradDims, - getCPUPtr(mOp.getRawOutput(0)), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawInput(1))); + impl.backward(input0gradDims, + input1gradDims, + out0grad->dims(), + getCPUPtr(in0), + getCPUPtr(in1), + getCPUPtr(out0grad), + getCPUPtr(in0grad), + getCPUPtr(in1grad)); } \ No newline at end of file diff --git a/unit_tests/operator/Test_PowImpl.cpp b/unit_tests/operator/Test_PowImpl.cpp index 3b85defb37ff76439b658faa84c3c7457a152d2f..cb5d8872c9c7242bb4aa4efca388d53b578417f9 100644 --- a/unit_tests/operator/Test_PowImpl.cpp +++ b/unit_tests/operator/Test_PowImpl.cpp @@ -313,5 +313,171 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { std::cout << "total time: " << duration.count() << "μs" << std::endl; } } + + + SECTION("PowImpl_cpu::backward()") { + SECTION("3D Tensors") { + const auto input0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + { + { + { + {2.0, 3.0}, + {4.0, 5.0} + }, + { + {6.0, 7.0}, + {8.0, 9.0} + } + } + } + )); + const auto input1 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + { + { + { + {1.0, 2.0}, + {3.0, 2.0} + }, + { + {2.0, 3.0}, + {1.0, 0.5} + } + } + } + )); + const auto gradOut = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + { + { + { + {0.5, 1.0}, + {1.5, 2.0} + }, + { + {2.5, 3.0}, + {3.5, 4.0} + } + } + } + )); + const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + { + { + { + {0.50000000, 6.00000000}, + {72.00000000, 20.00000000} + }, + { + {30.00000000, 441.00000000}, + {3.50000000, 0.66666669} + } + } + } + )); + const auto expectedGrad1 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + { + { + { + { 0.693147182, 9.88751030}, + {1.33084259e+02, 8.04718933e+01} + }, + { + {1.61258362e+02, 2.00234143e+03}, + {5.82243652e+01, 2.63666954e+01} + } + } + } + )); + for(const auto T: {input0, input1, gradOut, expectedGrad0, expectedGrad1}) + { + T->setBackend("cpu") ; + T->setDataType(DataType::Float32); + } + std::shared_ptr<Node> powOp = Pow(); + auto opr = std::static_pointer_cast<OperatorTensor>(powOp-> getOperator()); + opr->setDataType(DataType::Float32); + opr->setBackend("cpu"); + opr->associateInput(0, input0); + opr->associateInput(1, input1); + opr->getOutput(0)->setGrad(gradOut); + opr->forward(); + + powOp->backward(); + REQUIRE(approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0)); + REQUIRE(approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1)); + } + SECTION("Broadcasting") { + const auto input0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + { + { + { + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0} + }, + { + {1.5, 2.5, 3.5}, + {4.5, 5.5, 6.5} + } + } + } + )); + const auto input1 = std::make_shared<Tensor>(Array1D<float, 3>( + { + {0.1, 0.2, 0.3} + } + )); + + const auto gradOut = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + { + { + { + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0} + }, + { + {6.0, 5.0, 4.0}, + {3.0, 2.0, 1.0} + } + } + } + )); + const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + { + { + { + {0.10000000, 0.22973967, 0.41711676}, + {0.11486985, 0.27594593, 0.51353097} + }, + { + {0.41655189, 0.48044977, 0.49926791}, + {0.07748720, 0.10227509, 0.08092485} + } + } + } + )); + const auto expectedGrad1 = std::make_shared<Tensor>(Array1D<float, 3>( + { + {14.14779854, 22.99299049, 33.56402588} + } + )); + + for(const auto T: {input0, input1, gradOut, expectedGrad0, expectedGrad1}) + { + T->setBackend("cpu") ; + T->setDataType(DataType::Float32); + } + std::shared_ptr<Node> powOp = Pow(); + auto opr = std::static_pointer_cast<OperatorTensor>(powOp-> getOperator()); + opr->setDataType(DataType::Float32); + opr->setBackend("cpu"); + opr->associateInput(0, input0); + opr->associateInput(1, input1); + opr->getOutput(0)->setGrad(gradOut); + powOp->forward(); + + powOp->backward(); + REQUIRE(approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0)); + REQUIRE(approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1)); + } + } } } // namespace Aidge