Skip to content
Snippets Groups Projects
Commit ff3a3ed7 authored by Jerome Hue's avatar Jerome Hue Committed by Olivier BICHLER
Browse files

Implement backward function for Div operator

parent 742b763f
No related branches found
No related tags found
3 merge requests!166Update 0.5.0 -> 0.6.0,!136Add selection mechanism in graph,!134feat: Implement backward function for Div operator
Pipeline #65495 passed
......@@ -24,7 +24,18 @@
namespace Aidge {
// Operator implementation entry point for the backend
using DivImpl_cpu = OperatorImpl_cpu<Div_Op,
void(const std::size_t, const std::size_t, const std::size_t, const void*, const void*,void*)>;
void(const std::size_t, const std::size_t, const std::size_t, const void*, const void*,void*),
void(const std::size_t,
const std::size_t,
const std::size_t,
const std::vector<std::size_t>,
const std::vector<std::size_t>,
const std::vector<std::size_t>,
const void*,
const void*,
const void*,
void*,
void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Div_Op, "cpu", Aidge::DivImpl_cpu::create);
......
......@@ -17,6 +17,7 @@
#include <cstdint> // std::int32_t, std::int64_t
#include <functional> // std::multiplies
#include "aidge/backend/cpu/operator/MulImpl_kernels.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/data/Broadcasting.hpp"
......@@ -69,16 +70,70 @@ constexpr void DivImpl_cpu_forward_kernel(const std::size_t input1size_,
}
}
template <class I1, class I2, class O>
void DivImpl_cpu_backward_kernel(const std::size_t input0Length,
const std::size_t input1Length,
const std::size_t gradOutputLength,
const std::vector<std::size_t>& dims0,
const std::vector<std::size_t>& dims1,
const std::vector<std::size_t>& outputDims,
const void* input0_,
const void* input1_,
const void* grad_output_,
void* gradientInput0_,
void* gradientInput1_)
{
const I1* input0 = static_cast<const I1*>(input0_); // a
const I2* input1 = static_cast<const I2*>(input1_); // b
const O* grad_output = static_cast<const O*>(grad_output_);
auto* grad_input_0 = static_cast<I1*>(gradientInput0_); // gradient w.r.t. a
auto* grad_input_1 = static_cast<I2*>(gradientInput1_); // gradient w.r.t. b
std::fill_n(grad_input_0, input0Length, static_cast<I1>(0));
std::fill_n(grad_input_1, input1Length, static_cast<I2>(0));
// Broadcast dims0 and dims1 to match the shape of outputDims
auto broadcastedDims0 = getBroadcastedDims(outputDims, dims0);
auto broadcastedDims1 = getBroadcastedDims(outputDims, dims1);
for (std::size_t i = 0; i < gradOutputLength; ++i) {
auto idxOutputGrad = getMultiDimIndices(outputDims, i);
std::vector<std::size_t> idxInput0(broadcastedDims0.size());
std::vector<std::size_t> idxInput1(broadcastedDims1.size());
// Map output indices to input indices, considering broadcasting
for (std::size_t dimension = 0; dimension < broadcastedDims0.size(); ++dimension) {
idxInput0[dimension] = (broadcastedDims0[dimension] == 1) ? 0 : idxOutputGrad[dimension];
}
for (std::size_t dimension = 0; dimension < broadcastedDims1.size(); ++dimension) {
idxInput1[dimension] = (broadcastedDims1[dimension] == 1) ? 0 : idxOutputGrad[dimension];
}
auto idx0 = getFlattenedIndex(broadcastedDims0, idxInput0);
auto idx1 = getFlattenedIndex(broadcastedDims1, idxInput1);
// grad_a = grad_output * (1/b)
grad_input_0[idx0] += static_cast<I1>(grad_output[i] / input1[idx1]);
// grad_b = grad_output * (-a/b²)
grad_input_1[idx1] += static_cast<I2>(grad_output[i] * (-input0[idx0] / (input1[idx1] * input1[idx1])));
}
}
// Kernels registration to implementation entry point
REGISTRAR(DivImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<float, float, float>, nullptr});
{ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<float, float, float>, Aidge::DivImpl_cpu_backward_kernel<float, float, float>});
REGISTRAR(DivImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<double, double, double>, nullptr});
{ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<double, double, double>, Aidge::DivImpl_cpu_backward_kernel<double, double, double>});
REGISTRAR(DivImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, nullptr});
{ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>,
Aidge::DivImpl_cpu_backward_kernel<std::int32_t, std::int32_t, std::int32_t>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_DIVIMPL_KERNELS_H_ */
......@@ -152,5 +152,26 @@ void Aidge::DivImpl_cpu::forward() {
template <>
void Aidge::DivImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Div_Op on backend cpu");
const Div_Op& op_ = dynamic_cast<const Div_Op&>(mOp);
auto in0 = op_.getInput(0);
auto in1 = op_.getInput(1);
auto in0grad = op_.getInput(0)->grad();
auto in1grad = op_.getInput(1)->grad();
auto out0grad = op_.getOutput(0)->grad();
const auto impl = Registrar<DivImpl_cpu>::create(getBestMatch(getRequiredSpec()));
impl.backward(in0grad->size(),
in1grad->size(),
out0grad->size(),
in0->dims(),
in1->dims(),
out0grad->dims(),
getCPUPtr(in0),
getCPUPtr(in1),
getCPUPtr(out0grad),
getCPUPtr(in0grad),
getCPUPtr(in1grad));
}
......@@ -322,4 +322,275 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") {
}
}
}
TEST_CASE("[CPU/Operator] Div(Backward)", "[Div][CPU][Backward]") {
std::shared_ptr<Div_Op> op = std::make_shared<Div_Op>();
op->setDataType(DataType::Float32);
op->setBackend("cpu");
// NOTE: The first four tests use fixed values, the last one uses random values but static dimensions.
SECTION("Case 1: 1D and 2D Tensors") {
const auto T0 = std::make_shared<Tensor>(
Array2D<cpptype_t<DataType::Float32>, 2, 3>({{{1, 2, 3}, {4, 5, 6}}}));
const auto T1 =
std::make_shared<Tensor>(Array1D<cpptype_t<DataType::Float32>, 3>({0.1, 0.2, 0.3}));
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(std::make_shared<Tensor>(
Array2D<float, 2, 3>({{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}})));
op->forwardDims();
op->backward();
const Tensor expectedGrad0 =
Array2D<cpptype_t<DataType::Float32>, 2, 3>({{{10, 5, 3.3333}, {10, 5, 3.3333}}});
const Tensor expectedGrad1 = Array1D<cpptype_t<DataType::Float32>, 3>({-500, -175, -100});
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0));
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1));
}
SECTION("Case 2: 3D and 1D tensors") {
const auto T0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
{{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}},
{{7.0, 8.0, 9.0}, {10.0, 11.0, 12.0}}}}));
const auto T1 =
std::make_shared<Tensor>(Array1D<float, 3>({0.3, 0.2, 0.1}));
const auto newGrad = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
{{{{1, 1, 1}, {1, 1, 1}}, {{1, 1, 1}, {1, 1, 1}}}}));
const Tensor expectedGrad0 =
Array3D<float, 2, 2, 3>({{{{3.3333, 5.0, 10}, {3.3333, 5.0, 10}},
{{3.3333, 5.0, 10}, {3.3333, 5.0, 10}}}});
const Tensor expectedGrad1 = Array1D<cpptype_t<DataType::Float32>, 3>({-244.4444, -650.0, -3000.0});
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(newGrad);
op->forwardDims();
op->backward();
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0));
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1));
}
SECTION("Case 3: 4D and 2D tensors") {
const auto T0 = std::make_shared<Tensor>(Array4D<cpptype_t<DataType::Float32>, 2, 2, 3, 3>(
{{{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}, {7.0, 8.0, 9.0}},
{{10.0, 11.0, 12.0}, {13.0, 14.0, 15.0}, {16.0, 17.0, 18.0}}},
{{{19.0, 20.0, 21.0}, {22.0, 23.0, 24.0}, {25.0, 26.0, 27.0}},
{{28.0, 29.0, 30.0},
{31.0, 32.0, 33.0},
{34.0, 35.0, 36.0}}}}}));
const auto T1 = std::make_shared<Tensor>(Array2D<cpptype_t<DataType::Float32>, 3, 3>(
{{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}}));
const auto newGrad =
std::make_shared<Tensor>(Array4D<cpptype_t<DataType::Float32>, 2, 2, 3, 3>(
{{{{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}},
{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}},
{{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}},
{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}}}}));
const Tensor expectedGrad0 =
Array4D<cpptype_t<DataType::Float32>, 2, 2, 3, 3>(
{{{{{2, 3.3333, 10}, {2.5, 5.0, 1.66667}, {1.42857, 1.2500, 1.11111}},
{{2, 3.3333, 10}, {2.5, 5.0, 1.66667}, {1.42857, 1.2500, 1.11111}}},
{{{2, 3.3333, 10}, {2.5, 5.0, 1.66667}, {1.42857, 1.2500, 1.11111}},
{{2, 3.3333, 10}, {2.5, 5.0, 1.66667}, {1.42857, 1.2500, 1.11111}}}}});
const Tensor expectedGrad1 =
Array2D<cpptype_t<DataType::Float32>, 3, 3>({{{-232.0, -688.888, -6600.0},
{-437.5, -1850.0, -216.66667},
{-167.3469, -134.3750, -111.111}}});
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(newGrad);
op->forwardDims();
op->backward();
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0));
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1));
}
SECTION("Case 4: 3D and 2D tensors") {
const auto T0 = std::make_shared<Tensor>(
Array3D<float, 2, 3, 4>({{{
{1.0, 2.0, 3.0, 4.0},
{5.0, 6.0, 7.0, 8.0},
{9.0, 10.0, 11.0, 12.0},
},
{
{13.0, 14.0, 15.0, 16.0},
{17.0, 18.0, 19.0, 20.0},
{21.0, 22.0, 23.0, 24.0},
}}}));
const auto T1 = std::make_shared<Tensor>(
Array2D<cpptype_t<DataType::Float32>, 3, 4>({{{0.1, 0.2, 0.3, 0.4},
{0.5, 0.6, 0.7, 0.8},
{0.9, 1.0, 1.1, 1.2}}}));
const auto newGrad = std::make_shared<Tensor>(
Array3D<cpptype_t<DataType::Float32>, 2, 3, 4>({{{
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
},
{
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
}}}));
const Tensor expectedGrad0 =
Array3D<cpptype_t<DataType::Float32>, 2, 3, 4>({{{
{10, 5, 3.33333, 2.5},
{2, 1.66667, 1.42857, 1.2500},
{1.11111, 1.0, 0.90909, 0.83333}},
{{10, 5, 3.33333, 2.5},
{2, 1.66667, 1.42857, 1.2500},
{1.11111, 1.0, 0.90909, 0.83333}}}});
const Tensor expectedGrad1 =
Array2D<cpptype_t<DataType::Float32>, 3, 4>({{
{-1400.0, -400.0, -200.0, -125.0},
{-88.0, -66.66667, -53.0612, -43.750},
{-37.0370, -32.0, -28.0992, -25.00}}});
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(newGrad);
op->forwardDims();
op->backward();
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0));
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1));
}
SECTION("Case 5: Tensors with random values") {
// Use random values
const std::vector<std::size_t> dims0 = {5, 2, 1, 7}; // First tensor
const std::vector<std::size_t> dims1 = {2, 6, 7}; // Second tensor
const std::vector<std::size_t> outputDims = {5, 2, 6, 7};
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> dist(0.1f, 1.0f);
auto T0 = std::make_shared<Tensor>(dims0);
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
float* input0Data = static_cast<float*>(T0->getImpl()->rawPtr());
// Fill with random values
for (std::size_t i = 0; i < T0->size(); ++i) {
input0Data[i] = dist(gen);
}
auto T1 = std::make_shared<Tensor>(dims1);
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
float* input1Data = static_cast<float*>(T1->getImpl()->rawPtr());
// Fill with random values
for (std::size_t i = 0; i < T1->size(); ++i) {
input1Data[i] = dist(gen);
}
op->associateInput(0, T0);
op->associateInput(1, T1);
op->forwardDims();
op->forward();
Tensor expectedOutput{outputDims};
expectedOutput.setBackend("cpu");
float* expectedOutputData = static_cast<float*>(expectedOutput.getImpl()->rawPtr());
for (std::size_t n = 0; n < 5; ++n) {
for (std::size_t c = 0; c < 2; ++c) {
for (std::size_t h = 0; h < 6; ++h) {
for (std::size_t w = 0; w < 7; ++w) {
std::size_t outIdx = w + 7 * (h + 6 * (c + 2 * n));
std::size_t in0Idx =
w + 7 * (0 + 1 * (c + 2 * n)); // middle dim is 1
std::size_t in1Idx =
w + 7 * (h + 6 * c); // no n dimension
expectedOutputData[outIdx] = input0Data[in0Idx] / input1Data[in1Idx];
}
}
}
}
auto outputTensor = op->getOutput(0);
REQUIRE(approxEq<float>(*outputTensor, expectedOutput));
// Backward pass
std::vector<float> gradOutputData(expectedOutput.size());
for (auto &val : gradOutputData) {
val = dist(gen);
}
op->getOutput(0)->setGrad(std::make_shared<Tensor>());
op->getOutput(0)->grad()->resize(outputDims);
op->getOutput(0)->grad()->getImpl()->setRawPtr(gradOutputData.data(),
expectedOutput.size());
// Compute reference gradients
std::vector<float> expectedGrad0(T0->size(), 0.0f);
std::vector<float> expectedGrad1(T1->size(), 0.0f);
for (std::size_t n = 0; n < 5; ++n) {
for (std::size_t c = 0; c < 2; ++c) {
for (std::size_t h = 0; h < 6; ++h) {
for (std::size_t w = 0; w < 7; ++w) {
std::size_t outIdx = w + 7 * (h + 6 * (c + 2 * n));
std::size_t in0Idx = w + 7 * (0 + 1 * (c + 2 * n));
std::size_t in1Idx = w + 7 * (h + 6 * c);
expectedGrad0[in0Idx] +=
gradOutputData[outIdx] * (1.0f / input1Data[in1Idx]);
expectedGrad1[in1Idx] +=
gradOutputData[outIdx] * (-input0Data[in0Idx] / (input1Data[in1Idx] * input1Data[in1Idx]));
}
}
}
}
// Perform backward pass
op->backward();
auto expectedGrad0Tensor = std::make_shared<Tensor>();
expectedGrad0Tensor->resize(T0->dims());
expectedGrad0Tensor->setBackend("cpu");
expectedGrad0Tensor->setDataType(DataType::Float32);
expectedGrad0Tensor->getImpl()->setRawPtr(expectedGrad0.data(),
expectedGrad0.size());
auto expectedGrad1Tensor = std::make_shared<Tensor>(T1->dims());
expectedGrad1Tensor->setBackend("cpu");
expectedGrad1Tensor->setDataType(DataType::Float32);
expectedGrad1Tensor->getImpl()->setRawPtr(expectedGrad1.data(),
expectedGrad1.size());
// Verify backward pass
REQUIRE(approxEq<float>(*T0->grad(), *expectedGrad0Tensor));
REQUIRE(approxEq<float>(*T1->grad(), *expectedGrad1Tensor));
}
}
} // namespace Aidge
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment