Skip to content
Snippets Groups Projects
Commit 1b7c5b5b authored by Jerome Hue's avatar Jerome Hue Committed by Maxence Naud
Browse files

Add test and register Sub backward

parent 50a2eb35
No related branches found
No related tags found
No related merge requests found
...@@ -23,7 +23,19 @@ ...@@ -23,7 +23,19 @@
namespace Aidge { namespace Aidge {
// Operator implementation entry point for the backend // Operator implementation entry point for the backend
using SubImpl_cpu = OperatorImpl_cpu<Sub_Op, using SubImpl_cpu = OperatorImpl_cpu<Sub_Op,
void(std::vector<std::size_t>, std::vector<std::size_t>, const std::vector<std::size_t>&, const void*, const void*,void*)>; void(std::vector<std::size_t>, std::vector<std::size_t>, const std::vector<std::size_t>&, const void*, const void*,void*),
void(const std::size_t,
const std::size_t,
const std::size_t,
const std::vector<std::size_t>&,
const std::vector<std::size_t>&,
const std::vector<std::size_t>&,
const void*,
const void*,
const void*,
void*,
void*)
>;
// Implementation entry point registration to Operator // Implementation entry point registration to Operator
REGISTRAR(Sub_Op, "cpu", Aidge::SubImpl_cpu::create); REGISTRAR(Sub_Op, "cpu", Aidge::SubImpl_cpu::create);
......
...@@ -163,8 +163,6 @@ void SubImpl_cpu_backward_kernel(const std::size_t input0Length, ...@@ -163,8 +163,6 @@ void SubImpl_cpu_backward_kernel(const std::size_t input0Length,
void* gradientInput0_, void* gradientInput0_,
void* gradientInput1_) void* gradientInput1_)
{ {
const I1* input0 = static_cast<const I1*>(input0_);
const I2* input1 = static_cast<const I2*>(input1_);
const O* grad_output = static_cast<const O*>(grad_output_); const O* grad_output = static_cast<const O*>(grad_output_);
auto* grad_input_0 = static_cast<I1*>(gradientInput0_); auto* grad_input_0 = static_cast<I1*>(gradientInput0_);
auto* grad_input_1 = static_cast<I2*>(gradientInput1_); auto* grad_input_1 = static_cast<I2*>(gradientInput1_);
...@@ -202,7 +200,7 @@ void SubImpl_cpu_backward_kernel(const std::size_t input0Length, ...@@ -202,7 +200,7 @@ void SubImpl_cpu_backward_kernel(const std::size_t input0Length,
// Kernels registration to implementation entry point // Kernels registration to implementation entry point
REGISTRAR(SubImpl_cpu, REGISTRAR(SubImpl_cpu,
{DataType::Float32}, {DataType::Float32},
{ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<float, float, float>, nullptr}); {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<float, float, float>, Aidge::SubImpl_cpu_backward_kernel<float,float,float>});
REGISTRAR(SubImpl_cpu, REGISTRAR(SubImpl_cpu,
{DataType::Float64}, {DataType::Float64},
{ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<double, double, double>, nullptr}); {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<double, double, double>, nullptr});
......
...@@ -41,5 +41,29 @@ void Aidge::SubImpl_cpu::forward() { ...@@ -41,5 +41,29 @@ void Aidge::SubImpl_cpu::forward() {
template <> template <>
void Aidge::SubImpl_cpu::backward() { void Aidge::SubImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Sub_Op on backend cpu");
const Sub_Op& op_ = dynamic_cast<const Sub_Op&>(mOp);
auto in0 = op_.getInput(0);
auto in1 = op_.getInput(1);
auto in0grad = op_.getInput(0)->grad();
auto in1grad = op_.getInput(1)->grad();
auto out0grad = op_.getOutput(0)->grad();
// Find the correct kernel type
const auto impl = Registrar<SubImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
impl.backward(/* input0Length */ in0grad->size(),
/* input1Length */ in1grad->size(),
/* grad0Length */ out0grad->size(),
/* input0Dims */ in0->dims(),
/* input1Dims */ in1->dims(),
out0grad->dims(),
getCPUPtr(in0),
getCPUPtr(in1),
getCPUPtr(out0grad),
getCPUPtr(in0grad),
getCPUPtr(in1grad));
} }
...@@ -322,4 +322,165 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") { ...@@ -322,4 +322,165 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
} }
} }
} }
TEST_CASE("[CPU/Operator] Sub(Backward)", "[Sub][CPU][Backward]") {
std::shared_ptr<Node> mySub = Sub();
auto op = std::static_pointer_cast<OperatorTensor>(mySub->getOperator());
op->setDataType(DataType::Float32);
op->setBackend("cpu");
SECTION("Case 1: 1D and 2D Tensors") {
const auto T0 = std::make_shared<Tensor>(
Array2D<float, 2, 3>({{{1, 2, 3}, {4, 5, 6}}}));
const auto T1 =
std::make_shared<Tensor>(Array1D<float, 3>({0.1, 0.2, 0.3}));
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(std::make_shared<Tensor>(
Array2D<float, 2, 3>({{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}})));
op->forwardDims();
mySub->backward();
// For subtraction: grad_input0 = grad_output
const auto expectedGrad0 = std::make_shared<Tensor>(
Array2D<float, 2, 3>({{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}}));
// For subtraction: grad_input1 = -grad_output (summed across broadcast dimensions)
const auto expectedGrad1 =
std::make_shared<Tensor>(Array1D<float, 3>({-2, -2, -2}));
REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0));
REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1));
}
SECTION("Case 2: 3D and 1D tensors") {
const auto T0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
{{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}},
{{7.0, 8.0, 9.0}, {10.0, 11.0, 12.0}}}}));
const auto T1 =
std::make_shared<Tensor>(Array1D<float, 3>({0.3, 0.2, 0.1}));
const auto newGrad = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
{{{{1, 1, 1}, {1, 1, 1}}, {{1, 1, 1}, {1, 1, 1}}}}));
const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
{{{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}},
{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}}}));
const auto expectedGrad1 =
std::make_shared<Tensor>(Array1D<float, 3>({-4.0, -4.0, -4.0}));
for (auto T : {T0, T1, newGrad, expectedGrad0, expectedGrad1}) {
T->setBackend("cpu");
T->setDataType(DataType::Float32);
}
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(newGrad);
op->forwardDims();
mySub->backward();
REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0));
REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1));
}
SECTION("Case 3: Random values with broadcasting") {
// Use random values
std::vector<std::size_t> dims0 = {5, 2, 1, 7}; // First tensor
std::vector<std::size_t> dims1 = {2, 6, 7}; // Second tensor
std::vector<std::size_t> outputDims = {5, 2, 6, 7};
const auto input0Size = 5 * 2 * 1 * 7;
const auto input1Size = 2 * 6 * 7;
const auto outputSize = 5 * 2 * 6 * 7;
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> dist(0.1f, 1.0f);
std::vector<float> input0Data(input0Size);
std::vector<float> input1Data(input1Size);
std::vector<float> gradOutputData(outputSize);
// Fill with random values
for (auto &val : input0Data) val = dist(gen);
for (auto &val : input1Data) val = dist(gen);
for (auto &val : gradOutputData) val = dist(gen);
auto T0 = std::make_shared<Tensor>();
auto T1 = std::make_shared<Tensor>();
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
T0->resize(dims0);
T0->getImpl()->setRawPtr(input0Data.data(), input0Size);
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
T1->resize(dims1);
T1->getImpl()->setRawPtr(input1Data.data(), input1Size);
op->associateInput(0, T0);
op->associateInput(1, T1);
// Set gradient of output
op->getOutput(0)->setGrad(std::make_shared<Tensor>());
op->getOutput(0)->grad()->resize(outputDims);
op->getOutput(0)->grad()->getImpl()->setRawPtr(gradOutputData.data(), outputSize);
op->forwardDims();
// Compute reference gradients
std::vector<float> expectedGrad0(input0Size, 0.0f);
std::vector<float> expectedGrad1(input1Size, 0.0f);
for (std::size_t n = 0; n < 5; ++n) {
for (std::size_t c = 0; c < 2; ++c) {
for (std::size_t h = 0; h < 6; ++h) {
for (std::size_t w = 0; w < 7; ++w) {
std::size_t outIdx = w + 7 * (h + 6 * (c + 2 * n));
std::size_t in0Idx = w + 7 * (0 + 1 * (c + 2 * n));
std::size_t in1Idx = w + 7 * (h + 6 * c);
// Gradient for input0: grad_output
expectedGrad0[in0Idx] += gradOutputData[outIdx];
// Gradient for input1: -grad_output
expectedGrad1[in1Idx] += -gradOutputData[outIdx];
}
}
}
}
// Perform backward pass
mySub->backward();
auto expectedGrad0Tensor = std::make_shared<Tensor>();
expectedGrad0Tensor->resize(T0->dims());
expectedGrad0Tensor->setBackend("cpu");
expectedGrad0Tensor->setDataType(DataType::Float32);
expectedGrad0Tensor->getImpl()->setRawPtr(expectedGrad0.data(), expectedGrad0.size());
auto expectedGrad1Tensor = std::make_shared<Tensor>();
expectedGrad1Tensor->resize(T1->dims());
expectedGrad1Tensor->setBackend("cpu");
expectedGrad1Tensor->setDataType(DataType::Float32);
expectedGrad1Tensor->getImpl()->setRawPtr(expectedGrad1.data(), expectedGrad1.size());
// Verify backward pass
REQUIRE(approxEq<float>(*T0->grad(), *expectedGrad0Tensor));
REQUIRE(approxEq<float>(*T1->grad(), *expectedGrad1Tensor));
}
}
} // namespace Aidge } // namespace Aidge
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment