Skip to content
Snippets Groups Projects
Commit 4fa8bf81 authored by Jerome Hue's avatar Jerome Hue Committed by Maxence Naud
Browse files

Implement backward function of Add operator

parent 128b735f
No related branches found
No related tags found
1 merge request!166Update 0.5.0 -> 0.6.0
......@@ -25,7 +25,19 @@
namespace Aidge {
// Operator implementation entry point for the backend
using AddImpl_cpu = OperatorImpl_cpu<Add_Op,
void(std::vector<std::size_t>, std::vector<std::size_t>, const std::vector<std::size_t>&, const void*, const void*, void*)>;
void(std::vector<std::size_t>, std::vector<std::size_t>, const std::vector<std::size_t>&, const void*, const void*, void*),
void(const std::size_t,
const std::size_t,
const std::size_t,
const std::vector<std::size_t>&,
const std::vector<std::size_t>&,
const std::vector<std::size_t>&,
const void*,
const void*,
const void*,
void*,
void*)
>;
// Implementation entry point registration to Operator
REGISTRAR(Add_Op, "cpu", Aidge::AddImpl_cpu::create);
......
......@@ -147,25 +147,75 @@ void AddImpl_cpu_forward_kernel(std::vector<std::size_t> dims0,
}
}
template <class I, class O>
void AddImpl_cpu_backward_kernel(const std::size_t input0Length,
const std::size_t input1Length,
const std::size_t gradOutputLength,
const std::vector<std::size_t>& dims0,
const std::vector<std::size_t>& dims1,
const std::vector<std::size_t>& outputDims,
const void* input0_,
const void* input1_,
const void* grad_output_,
void* gradientInput0_,
void* gradientInput1_)
{
// TODO: Remove input0/1 from the function
const I* input0 = static_cast<const I*>(input0_);
const I* input1 = static_cast<const I*>(input1_);
const O* gradOutput = static_cast<const O*>(grad_output_);
auto* gradInput0 = static_cast<I*>(gradientInput0_);
auto* gradInput1 = static_cast<I*>(gradientInput1_);
std::fill_n(gradInput0, input0Length, static_cast<I>(0));
std::fill_n(gradInput1, input1Length, static_cast<I>(0));
auto broadcastedDims0 = getBroadcastedDims(outputDims, dims0);
auto broadcastedDims1 = getBroadcastedDims(outputDims, dims1);
for (std::size_t i = 0; i < gradOutputLength; ++i) {
auto idxOutputGrad = getMultiDimIndices(outputDims, i);
std::vector<std::size_t> idxInput0(broadcastedDims0.size());
std::vector<std::size_t> idxInput1(broadcastedDims1.size());
for (std::size_t dimension = 0; dimension < broadcastedDims0.size(); ++dimension) {
idxInput0[dimension] = (broadcastedDims0[dimension] == 1) ? 0 : idxOutputGrad[dimension];
}
for (std::size_t dimension = 0; dimension < broadcastedDims1.size(); ++dimension) {
idxInput1[dimension] = (broadcastedDims1[dimension] == 1) ? 0 : idxOutputGrad[dimension];
}
auto idx0 = getFlattenedIndex(broadcastedDims0, idxInput0);
auto idx1 = getFlattenedIndex(broadcastedDims1, idxInput1);
// For addition: gradient of both inputs is just the output gradient
// (unlike multiplication where we need to multiply by the other input,
// or subtraction where we need to negate one of them)
gradInput0[idx0] += static_cast<I>(gradOutput[i]);
gradInput1[idx1] += static_cast<I>(gradOutput[i]);
}
}
// Kernels registration to implementation entry point
REGISTRAR(AddImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}},
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<float, float>, nullptr});
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<float, float>, Aidge::AddImpl_cpu_backward_kernel<float, float>});
REGISTRAR(AddImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}},
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<double, double>, nullptr});
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<double, double>, Aidge::AddImpl_cpu_backward_kernel<double, double>});
REGISTRAR(AddImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int8}},
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int8_t, std::int8_t>, nullptr});
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int8_t, std::int8_t>, Aidge::AddImpl_cpu_backward_kernel<std::int8_t, std::int8_t>});
REGISTRAR(AddImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::UInt8}},
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::uint8_t, std::uint8_t>, nullptr});
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::uint8_t, std::uint8_t>, Aidge::AddImpl_cpu_backward_kernel<std::uint8_t, std::uint8_t>});
REGISTRAR(AddImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}},
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr});
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, Aidge::AddImpl_cpu_backward_kernel<std::int32_t, std::int32_t>});
REGISTRAR(AddImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int64}},
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, nullptr});
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, Aidge::AddImpl_cpu_backward_kernel<std::int64_t, std::int64_t>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ADDIMPL_CPU_KERNELS_H_ */
\ No newline at end of file
#endif /* AIDGE_CPU_OPERATOR_ADDIMPL_CPU_KERNELS_H_ */
......@@ -55,5 +55,28 @@ void Aidge::AddImpl_cpu::forward() {
template <>
void Aidge::AddImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Add_Op on backend cpu");
const Add_Op& op_ = dynamic_cast<const Add_Op&>(mOp);
auto in0 = op_.getInput(0);
auto in1 = op_.getInput(1);
auto in0grad = op_.getInput(0)->grad();
auto in1grad = op_.getInput(1)->grad();
auto out0grad = op_.getOutput(0)->grad();
// Find the correct kernel type
const auto impl = Registrar<AddImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
impl.backward(in0grad->size(),
in1grad->size(),
out0grad->size(),
in0->dims(),
in1->dims(),
out0grad->dims(),
getCPUPtr(in0),
getCPUPtr(in1),
getCPUPtr(out0grad),
getCPUPtr(in0grad),
getCPUPtr(in1grad));
}
......@@ -10,6 +10,7 @@
********************************************************************************/
#include <memory>
#include <random>
#include <catch2/catch_test_macros.hpp>
......@@ -19,6 +20,7 @@
#include "aidge/graph/Node.hpp"
#include "aidge/operator/Add.hpp"
#include "aidge/utils/ArrayHelpers.hpp"
#include "aidge/utils/TensorUtils.hpp"
using namespace Aidge;
......@@ -139,4 +141,275 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") {
Log::info("Expected Add_1 Tensor:\n{}", expectedOutput);
REQUIRE(*op_1->getOutput(0) == expectedOutput);
}
}
\ No newline at end of file
}
TEST_CASE("[cpu/operator] Add(backward)", "[Add][CPU]") {
std::shared_ptr<Add_Op> op = std::make_shared<Add_Op>();
op->setDataType(DataType::Float32);
op->setBackend("cpu");
// NOTE: The first four tests use fixed values, the last one uses random values but static dimensions.
SECTION("Case 1: 1D and 2D Tensors") {
const auto T0 = std::make_shared<Tensor>(
Array2D<cpptype_t<DataType::Float32>, 2, 3>({{{1, 2, 3}, {4, 5, 6}}}));
const auto T1 =
std::make_shared<Tensor>(Array1D<cpptype_t<DataType::Float32>, 3>({0.1, 0.2, 0.3}));
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(std::make_shared<Tensor>(
Array2D<float, 2, 3>({{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}})));
op->forwardDims();
op->backward();
const Tensor expectedGrad0 =
Array2D<cpptype_t<DataType::Float32>, 2, 3>({{{1, 1, 1}, {1, 1, 1}}});
const Tensor expectedGrad1 = Array1D<cpptype_t<DataType::Float32>, 3>({2, 2, 2});
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0));
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1));
}
SECTION("Case 2: 3D and 1D tensors") {
const auto T0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
{{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}},
{{7.0, 8.0, 9.0}, {10.0, 11.0, 12.0}}}}));
const auto T1 =
std::make_shared<Tensor>(Array1D<float, 3>({0.3, 0.2, 0.1}));
const auto newGrad = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
{{{{1, 1, 1}, {1, 1, 1}}, {{1, 1, 1}, {1, 1, 1}}}}));
const Tensor expectedGrad0 =
Array3D<float, 2, 2, 3>({{{{1, 1, 1}, {1, 1, 1}},
{{1, 1, 1}, {1, 1, 1}}}});
const Tensor expectedGrad1 = Array1D<cpptype_t<DataType::Float32>, 3>({4, 4, 4});
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(newGrad);
op->forwardDims();
op->backward();
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0));
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1));
}
SECTION("Case 3: 4D and 2D tensors") {
const auto T0 = std::make_shared<Tensor>(Array4D<cpptype_t<DataType::Float32>, 2, 2, 3, 3>(
{{{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}, {7.0, 8.0, 9.0}},
{{10.0, 11.0, 12.0}, {13.0, 14.0, 15.0}, {16.0, 17.0, 18.0}}},
{{{19.0, 20.0, 21.0}, {22.0, 23.0, 24.0}, {25.0, 26.0, 27.0}},
{{28.0, 29.0, 30.0},
{31.0, 32.0, 33.0},
{34.0, 35.0, 36.0}}}}}));
const auto T1 = std::make_shared<Tensor>(Array2D<cpptype_t<DataType::Float32>, 3, 3>(
{{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}}));
const auto newGrad =
std::make_shared<Tensor>(Array4D<cpptype_t<DataType::Float32>, 2, 2, 3, 3>(
{{{{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}},
{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}},
{{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}},
{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}}}}));
const Tensor expectedGrad0 =
Array4D<cpptype_t<DataType::Float32>, 2, 2, 3, 3>(
{{{{{1, 1, 1}, {1, 1, 1}, {1, 1, 1}},
{{1, 1, 1}, {1, 1, 1}, {1, 1, 1}}},
{{{1, 1, 1}, {1, 1, 1}, {1, 1, 1}},
{{1, 1, 1}, {1, 1, 1}, {1, 1, 1}}}}});
const Tensor expectedGrad1 =
Array2D<cpptype_t<DataType::Float32>, 3, 3>({{
{4.0, 4.0, 4.0},
{4.0, 4.0, 4.0},
{4.0, 4.0, 4.0}}});
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(newGrad);
op->forwardDims();
op->backward();
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0));
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1));
}
SECTION("Case 4: 3D and 2D tensors") {
const auto T0 = std::make_shared<Tensor>(
Array3D<float, 2, 3, 4>({{{
{1.0, 2.0, 3.0, 4.0},
{5.0, 6.0, 7.0, 8.0},
{9.0, 10.0, 11.0, 12.0},
},
{
{13.0, 14.0, 15.0, 16.0},
{17.0, 18.0, 19.0, 20.0},
{21.0, 22.0, 23.0, 24.0},
}}}));
const auto T1 = std::make_shared<Tensor>(
Array2D<cpptype_t<DataType::Float32>, 3, 4>({{{0.1, 0.2, 0.3, 0.4},
{0.5, 0.6, 0.7, 0.8},
{0.9, 1.0, 1.1, 1.2}}}));
const auto newGrad = std::make_shared<Tensor>(
Array3D<cpptype_t<DataType::Float32>, 2, 3, 4>({{{
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
},
{
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
}}}));
const Tensor expectedGrad0 =
Array3D<cpptype_t<DataType::Float32>, 2, 3, 4>({{{{1, 1, 1, 1},
{1, 1, 1, 1},
{1, 1, 1, 1}},
{{1, 1, 1, 1},
{1, 1, 1, 1},
{1, 1, 1, 1}}}});
const Tensor expectedGrad1 =
Array2D<cpptype_t<DataType::Float32>, 3, 4>({{{2.0, 2.0, 2.0, 2.0},
{2.0, 2.0, 2.0, 2.0},
{2.0, 2.0, 2.0, 2.0}}});
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(newGrad);
op->forwardDims();
op->backward();
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0));
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1));
}
SECTION("Case 5: Tensors with random values") {
// Use random values
const std::vector<std::size_t> dims0 = {5, 2, 1, 7}; // First tensor
const std::vector<std::size_t> dims1 = {2, 6, 7}; // Second tensor
const std::vector<std::size_t> outputDims = {5, 2, 6, 7};
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> dist(0.1f, 1.0f);
auto T0 = std::make_shared<Tensor>(dims0);
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
float* input0Data = static_cast<float*>(T0->getImpl()->rawPtr());
// Fill with random values
for (std::size_t i = 0; i < T0->size(); ++i) {
input0Data[i] = dist(gen);
}
auto T1 = std::make_shared<Tensor>(dims1);
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
float* input1Data = static_cast<float*>(T1->getImpl()->rawPtr());
// Fill with random values
for (std::size_t i = 0; i < T1->size(); ++i) {
input1Data[i] = dist(gen);
}
op->associateInput(0, T0);
op->associateInput(1, T1);
op->forwardDims();
op->forward();
Tensor expectedOutput{outputDims};
expectedOutput.setBackend("cpu");
float* expectedOutputData = static_cast<float*>(expectedOutput.getImpl()->rawPtr());
for (std::size_t n = 0; n < 5; ++n) {
for (std::size_t c = 0; c < 2; ++c) {
for (std::size_t h = 0; h < 6; ++h) {
for (std::size_t w = 0; w < 7; ++w) {
std::size_t outIdx = w + 7 * (h + 6 * (c + 2 * n));
std::size_t in0Idx =
w + 7 * (0 + 1 * (c + 2 * n)); // middle dim is 1
std::size_t in1Idx =
w + 7 * (h + 6 * c); // no n dimension
expectedOutputData[outIdx] = input0Data[in0Idx] + input1Data[in1Idx];
}
}
}
}
auto outputTensor = op->getOutput(0);
REQUIRE(approxEq<float>(*outputTensor, expectedOutput));
// Backward pass
std::vector<float> gradOutputData(expectedOutput.size());
for (auto &val : gradOutputData) {
val = dist(gen);
}
op->getOutput(0)->setGrad(std::make_shared<Tensor>());
op->getOutput(0)->grad()->resize(outputDims);
op->getOutput(0)->grad()->getImpl()->setRawPtr(gradOutputData.data(),
expectedOutput.size());
// Compute reference gradients
std::vector<float> expectedGrad0(T0->size(), 0.0f);
std::vector<float> expectedGrad1(T1->size(), 0.0f);
for (std::size_t n = 0; n < 5; ++n) {
for (std::size_t c = 0; c < 2; ++c) {
for (std::size_t h = 0; h < 6; ++h) {
for (std::size_t w = 0; w < 7; ++w) {
std::size_t outIdx = w + 7 * (h + 6 * (c + 2 * n));
std::size_t in0Idx = w + 7 * (0 + 1 * (c + 2 * n));
std::size_t in1Idx = w + 7 * (h + 6 * c);
// Gradient for input0: just accumulate grad_output
expectedGrad0[in0Idx] += gradOutputData[outIdx];
// Gradient for input1: just accumulate grad_output
expectedGrad1[in1Idx] += gradOutputData[outIdx];
}
}
}
}
// Perform backward pass
op->backward();
auto expectedGrad0Tensor = std::make_shared<Tensor>();
expectedGrad0Tensor->resize(T0->dims());
expectedGrad0Tensor->setBackend("cpu");
expectedGrad0Tensor->setDataType(DataType::Float32);
expectedGrad0Tensor->getImpl()->setRawPtr(expectedGrad0.data(),
expectedGrad0.size());
auto expectedGrad1Tensor = std::make_shared<Tensor>(T1->dims());
expectedGrad1Tensor->setBackend("cpu");
expectedGrad1Tensor->setDataType(DataType::Float32);
expectedGrad1Tensor->getImpl()->setRawPtr(expectedGrad1.data(),
expectedGrad1.size());
// Verify backward pass
REQUIRE(approxEq<float>(*T0->grad(), *expectedGrad0Tensor));
REQUIRE(approxEq<float>(*T1->grad(), *expectedGrad1Tensor));
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment