Skip to content
Snippets Groups Projects
Commit 869cc27f authored by Jerome Hue's avatar Jerome Hue
Browse files

Optimize backward kernels of Sub and Add

Only in the case where the number of dimensions is the same for both
input gradients.
We know this is the case in Leaky node, for instance
parent 382f6d47
No related branches found
No related tags found
No related merge requests found
......@@ -163,6 +163,15 @@ void AddImpl_cpu_backward_kernel(const std::size_t /*input0Length*/,
auto* gradInput0 = static_cast<I*>(gradientInput0_);
auto* gradInput1 = static_cast<I*>(gradientInput1_);
// simple elementwise gradient addition when no broadcasting is required
if (dims0 == dims1) {
for (std::size_t i = 0; i < gradOutputLength; ++i) {
gradInput0[i] += static_cast<I>(gradOutput[i]);
gradInput1[i] += static_cast<I>(gradOutput[i]);
}
return;
}
auto broadcastedDims0 = getBroadcastedDims(outputDims, dims0);
auto broadcastedDims1 = getBroadcastedDims(outputDims, dims1);
......
......@@ -165,6 +165,21 @@ void SubImpl_cpu_backward_kernel(const std::size_t /*input0Length*/,
auto* grad_input_0 = static_cast<I1*>(gradientInput0_);
auto* grad_input_1 = static_cast<I2*>(gradientInput1_);
// special case for equal dimensions, gradient can be computed directly
if (dims0 == dims1) {
const std::size_t contiguousSize = std::accumulate(
dims0.cbegin(), dims0.cend(),
static_cast<std::size_t>(1),
std::multiplies<std::size_t>()
);
for (std::size_t i = 0; i < contiguousSize; ++i) {
grad_input_0[i] += static_cast<I1>(grad_output[i]);
grad_input_1[i] += static_cast<I2>(-grad_output[i]);
}
return;
}
auto broadcastedDims0 = getBroadcastedDims(outputDims, dims0);
auto broadcastedDims1 = getBroadcastedDims(outputDims, dims1);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment