Skip to content
Snippets Groups Projects
Commit d8519488 authored by Jerome Hue's avatar Jerome Hue
Browse files

Optimize backward kernels of Sub and Add

Only in the case where the number of dimensions is the same for both
input gradients.
We know this is the case in Leaky node, for instance
parent a9513dba
No related branches found
No related tags found
2 merge requests!221[upd] version 0.7.0 -> 0.8.0,!187Optimize backward kernels of Sub and Add in simple cases
Pipeline #82526 failed
......@@ -163,6 +163,15 @@ void AddImpl_cpu_backward_kernel(const std::size_t /*input0Length*/,
auto* gradInput0 = static_cast<I*>(gradientInput0_);
auto* gradInput1 = static_cast<I*>(gradientInput1_);
// simple elementwise gradient addition when no broadcasting is required
if (dims0 == dims1) {
for (std::size_t i = 0; i < gradOutputLength; ++i) {
gradInput0[i] += static_cast<I>(gradOutput[i]);
gradInput1[i] += static_cast<I>(gradOutput[i]);
}
return;
}
auto broadcastedDims0 = getBroadcastedDims(outputDims, dims0);
auto broadcastedDims1 = getBroadcastedDims(outputDims, dims1);
......
......@@ -165,6 +165,21 @@ void SubImpl_cpu_backward_kernel(const std::size_t /*input0Length*/,
auto* grad_input_0 = static_cast<I1*>(gradientInput0_);
auto* grad_input_1 = static_cast<I2*>(gradientInput1_);
// special case for equal dimensions, gradient can be computed directly
if (dims0 == dims1) {
const std::size_t contiguousSize = std::accumulate(
dims0.cbegin(), dims0.cend(),
static_cast<std::size_t>(1),
std::multiplies<std::size_t>()
);
for (std::size_t i = 0; i < contiguousSize; ++i) {
grad_input_0[i] += static_cast<I1>(grad_output[i]);
grad_input_1[i] += static_cast<I2>(-grad_output[i]);
}
return;
}
auto broadcastedDims0 = getBroadcastedDims(outputDims, dims0);
auto broadcastedDims1 = getBroadcastedDims(outputDims, dims1);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment