Skip to content
Snippets Groups Projects
Commit 5de68dcc authored by Jerome Hue's avatar Jerome Hue Committed by Maxence Naud
Browse files

chore: Improve and test Mul Backward kernel

- Rework Mul backward kernel to make more straighforward, and easily
adaptable to other element-wise kernels (sub, add, div).
- Add tests, including new test with random values
parent 7302dd25
No related branches found
No related tags found
3 merge requests!128Draft: fix failed onnx tests,!122[add] Element wise backward,!115Draft: Add management for aidge module dependencies
Pipeline #63584 passed
...@@ -34,6 +34,7 @@ using MulImpl_cpu = OperatorImpl_cpu<Mul_Op, ...@@ -34,6 +34,7 @@ using MulImpl_cpu = OperatorImpl_cpu<Mul_Op,
const std::size_t, const std::size_t,
const std::vector<std::size_t>, const std::vector<std::size_t>,
const std::vector<std::size_t>, const std::vector<std::size_t>,
const std::vector<std::size_t>,
const void*, const void*,
const void*, const void*,
const void*, const void*,
......
...@@ -149,61 +149,53 @@ void MulImpl_cpu_forward_kernel(std::vector<std::size_t> dims0, ...@@ -149,61 +149,53 @@ void MulImpl_cpu_forward_kernel(std::vector<std::size_t> dims0,
template <class I1, class I2, class O> template <class I1, class I2, class O>
void MulImpl_cpu_backward_kernel(const std::size_t input0Length, void MulImpl_cpu_backward_kernel(const std::size_t input0Length,
const std::size_t input1Length, const std::size_t input1Length,
const std::size_t grad0Length, const std::size_t gradOutputLength,
const std::vector<std::size_t> input0Dims, const std::vector<std::size_t>& dims0,
const std::vector<std::size_t> input1Dims, const std::vector<std::size_t>& dims1,
const void* input0_, const std::vector<std::size_t>& outputDims,
const void* input1_, const void* input0_,
const void* grad_output_, const void* input1_,
void* gradientInput0, const void* grad_output_,
void* gradientInput1) void* gradientInput0_,
void* gradientInput1_)
{ {
const auto* input0 = static_cast<const I1*>(input0_); const I1* input0 = static_cast<const I1*>(input0_);
const auto* input1 = static_cast<const I1*>(input1_); const I2* input1 = static_cast<const I2*>(input1_);
const auto* grad_output = static_cast<const O*>(grad_output_); const O* grad_output = static_cast<const O*>(grad_output_);
auto* grad_input_0 = static_cast<I1*>(gradientInput0); auto* grad_input_0 = static_cast<I1*>(gradientInput0_);
auto* grad_input_1 = static_cast<I2*>(gradientInput1); auto* grad_input_1 = static_cast<I2*>(gradientInput1_);
std::fill_n(grad_input_0, input0Length, static_cast<I1>(0));
if(input0Dims.size() >= input1Dims.size()) std::fill_n(grad_input_1, input1Length, static_cast<I2>(0));
{
AIDGE_ASSERT(input0Length == grad0Length, "Incorrect dimensions between Mul input and output tensors"); // Broadcast dims0 and dims1 to match the shape of outputDims
auto broadcastedDims0 = getBroadcastedDims(outputDims, dims0);
for(auto i = 0U; i < input0Length; ++i) auto broadcastedDims1 = getBroadcastedDims(outputDims, dims1);
{
const auto indices = getMultiDimIndices(input1Dims, i); for (std::size_t i = 0; i < gradOutputLength; ++i) {
const auto flattenedIndex = getFlattenedIndex(input1Dims, indices); auto idxOutputGrad = getMultiDimIndices(outputDims, i);
std::vector<std::size_t> idxInput0(broadcastedDims0.size());
grad_input_0[i] = input1[flattenedIndex] * grad_output[i]; std::vector<std::size_t> idxInput1(broadcastedDims1.size());
// Map output indices to input0 indices, considering broadcasting
for (std::size_t dimension = 0; dimension < broadcastedDims0.size(); ++dimension) {
// If input0 is broadcasted along this dimension (== 1) or both dimensions are 1, index is 0.
// idxInput0 represent the multi dim index of input0 contributing
// to the output at index i.
idxInput0[dimension] = (broadcastedDims0[dimension] == 1) ? 0 : idxOutputGrad[dimension];
} }
for(std::size_t i = 0 ; i < grad0Length; ++i) for (std::size_t dimension = 0; dimension < broadcastedDims1.size(); ++dimension) {
{ idxInput1[dimension] = (broadcastedDims1[dimension] == 1) ? 0 : idxOutputGrad[dimension];
const auto indices = getMultiDimIndices(input1Dims, i);
const auto flattenedIndex = getFlattenedIndex(input1Dims, indices);
grad_input_1[flattenedIndex] += input0[i] * grad_output[i];
} }
} else { // We have to access tensors with a flat index, hence the conversion
AIDGE_ASSERT(input1Length == grad0Length, "Incorrect dimensions between Mul input and output tensors"); auto idx0 = getFlattenedIndex(broadcastedDims0, idxInput0);
auto idx1 = getFlattenedIndex(broadcastedDims1, idxInput1);
for(auto i = 0U; i < input1Length; ++i) grad_input_0[idx0] += static_cast<I1>(grad_output[i] * input1[idx1]);
{ grad_input_1[idx1] += static_cast<I2>(grad_output[i] * input0[idx0]);
const auto indices = getMultiDimIndices(input0Dims, i);
const auto flattenedIndex = getFlattenedIndex(input0Dims, indices);
grad_input_1[i] = input0[flattenedIndex] * grad_output[i];
}
for(std::size_t i = 0 ; i < grad0Length; ++i)
{
const auto indices = getMultiDimIndices(input0Dims, i);
const auto flattenedIndex = getFlattenedIndex(input0Dims, indices);
grad_input_0[flattenedIndex] += input1[i] * grad_output[i];
}
} }
} }
......
...@@ -58,6 +58,7 @@ void Aidge::MulImpl_cpu::backward() { ...@@ -58,6 +58,7 @@ void Aidge::MulImpl_cpu::backward() {
/* grad0Length */ out0grad->size(), /* grad0Length */ out0grad->size(),
/* input0Dims */ in0->dims(), /* input0Dims */ in0->dims(),
/* input1Dims */ in1->dims(), /* input1Dims */ in1->dims(),
out0grad->dims(),
getCPUPtr(in0), getCPUPtr(in0),
getCPUPtr(in1), getCPUPtr(in1),
getCPUPtr(out0grad), getCPUPtr(out0grad),
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment