Skip to content
Snippets Groups Projects
Commit 83a07c63 authored by Olivier BICHLER's avatar Olivier BICHLER
Browse files

Merge branch 'Fix/PowBackwardKernel' into 'dev'

Fix/PowBackwardKernel

See merge request !90
parents 54e3425a b9ac173b
No related branches found
No related tags found
2 merge requests!93Release v0.3.0,!90Fix/PowBackwardKernel
Pipeline #55795 canceled
......@@ -24,7 +24,8 @@ namespace Aidge {
// Operator implementation entry point for the backend
using PowImpl_cpu = OperatorImpl_cpu<Pow_Op,
void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*),
void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, void*)>;
void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, const void*, void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Pow_Op, "cpu", Aidge::PowImpl_cpu::create);
......
......@@ -31,14 +31,10 @@ void PowImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims,
const I2* input_2 = static_cast<const I2*>(input2_);
O* output = static_cast<O*>(output_);
size_t totalElements = 1;
for (size_t dimSize : outputDims) {
totalElements *= dimSize;
}
std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex)
{
std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex);
std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, oIndex);
std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
std::size_t idx2 = getFlattenedIndex(input2Dims, indexes);
......@@ -47,16 +43,53 @@ void PowImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims,
}
}
template <class I1, class I2, class O>
void PowImpl_cpu_backward_kernel(const std::vector<std::size_t>& input0Dims,
const std::vector<std::size_t>& input1Dims,
const std::vector<std::size_t>& outputDims,
const void* input0_,
const void* input1_,
const void* gradOutput_,
void* gradientInput0_,
void* gradientInput1_) {
const I1* input0 = static_cast<const I1*>(input0_);
I1* grad0 = static_cast<I1*>(gradientInput0_);
const I2* input1 = static_cast<const I2*>(input1_);
I2* grad1 = static_cast<I2*>(gradientInput1_);
const O* gradOut = static_cast<const O*>(gradOutput_);
// Fill input grads with zeros
std::size_t input0Elements = std::accumulate(input0Dims.cbegin(), input0Dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
std::fill(grad0, grad0 + input0Elements, I1(0));
std::size_t input1Elements = std::accumulate(input1Dims.cbegin(), input1Dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
std::fill(grad1, grad1 + input1Elements, I2(0));
std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
for (size_t oIndex = 0; oIndex < totalElements; ++oIndex)
{
// Compute indexes in inputs 0 and 1 to support broadcasting
std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, oIndex);
std::size_t idx0 = getFlattenedIndex(input0Dims, indexes);
std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
// grad0 = grad_output * (input1 * pow(input0, (input1 -1)))
grad0[idx0] += gradOut[oIndex]*input1[idx1]* std::pow(input0[idx0], input1[idx1]-1);
// grad1 = grad_output * (output * ln(input0))
grad1[idx1] += gradOut[oIndex] * std::pow(input0[idx0], input1[idx1]) * std::log(input0[idx0]);
}
}
// Kernels registration to implementation entry point
REGISTRAR(PowImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<float, float, float>, nullptr});
{ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<float, float, float>, Aidge::PowImpl_cpu_backward_kernel<float, float, float>});
REGISTRAR(PowImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<double, double, double>, nullptr});
{ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<double, double, double>, Aidge::PowImpl_cpu_backward_kernel<double, double, double>});
REGISTRAR(PowImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<int32_t, int32_t, int32_t>, nullptr});
{ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<int32_t, int32_t, int32_t>, Aidge::PowImpl_cpu_backward_kernel<int32_t, int32_t, int32_t>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_POWIMPL_KERNELS_H_ */
......@@ -44,21 +44,29 @@ void Aidge::PowImpl_cpu::forward() {
template <>
void Aidge::PowImpl_cpu::backward() {
// Find the correct kernel type
const Pow_Op& op_ = dynamic_cast<const Pow_Op&>(mOp);
const std::vector<std::size_t> input0gradDims = getBroadcastedDims(op_.getInput(0)->grad()->dims(),
op_.getOutput(0)->grad()->dims());
const std::vector<std::size_t> input1gradDims = getBroadcastedDims(op_.getInput(1)->grad()->dims(),
op_.getOutput(0)->grad()->dims());
auto in0 = op_.getInput(0);
auto in1 = op_.getInput(1);
auto in0grad = op_.getInput(0)->grad();
auto in1grad = op_.getInput(1)->grad();
auto out0grad = op_.getOutput(0)->grad();
const std::vector<std::size_t> input0gradDims = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->grad()->dims(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->grad()->dims());
const std::vector<std::size_t> input1gradDims = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->grad()->dims(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->grad()->dims());
// Find the correct kernel type
const auto impl = Registrar<PowImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
impl.backward(op_.getOutput(0)->grad()->dims(),
input0gradDims,
input1gradDims,
getCPUPtr(mOp.getRawOutput(0)),
getCPUPtr(mOp.getRawInput(0)),
getCPUPtr(mOp.getRawInput(1)));
impl.backward(input0gradDims,
input1gradDims,
out0grad->dims(),
getCPUPtr(in0),
getCPUPtr(in1),
getCPUPtr(out0grad),
getCPUPtr(in0grad),
getCPUPtr(in1grad));
}
\ No newline at end of file
......@@ -313,5 +313,171 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
std::cout << "total time: " << duration.count() << "μs" << std::endl;
}
}
SECTION("PowImpl_cpu::backward()") {
SECTION("3D Tensors") {
const auto input0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
{
{
{
{2.0, 3.0},
{4.0, 5.0}
},
{
{6.0, 7.0},
{8.0, 9.0}
}
}
}
));
const auto input1 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
{
{
{
{1.0, 2.0},
{3.0, 2.0}
},
{
{2.0, 3.0},
{1.0, 0.5}
}
}
}
));
const auto gradOut = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
{
{
{
{0.5, 1.0},
{1.5, 2.0}
},
{
{2.5, 3.0},
{3.5, 4.0}
}
}
}
));
const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
{
{
{
{0.50000000, 6.00000000},
{72.00000000, 20.00000000}
},
{
{30.00000000, 441.00000000},
{3.50000000, 0.66666669}
}
}
}
));
const auto expectedGrad1 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
{
{
{
{ 0.693147182, 9.88751030},
{1.33084259e+02, 8.04718933e+01}
},
{
{1.61258362e+02, 2.00234143e+03},
{5.82243652e+01, 2.63666954e+01}
}
}
}
));
for(const auto T: {input0, input1, gradOut, expectedGrad0, expectedGrad1})
{
T->setBackend("cpu") ;
T->setDataType(DataType::Float32);
}
std::shared_ptr<Node> powOp = Pow();
auto opr = std::static_pointer_cast<OperatorTensor>(powOp-> getOperator());
opr->setDataType(DataType::Float32);
opr->setBackend("cpu");
opr->associateInput(0, input0);
opr->associateInput(1, input1);
opr->getOutput(0)->setGrad(gradOut);
opr->forward();
powOp->backward();
REQUIRE(approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0));
REQUIRE(approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1));
}
SECTION("Broadcasting") {
const auto input0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
{
{
{
{1.0, 2.0, 3.0},
{4.0, 5.0, 6.0}
},
{
{1.5, 2.5, 3.5},
{4.5, 5.5, 6.5}
}
}
}
));
const auto input1 = std::make_shared<Tensor>(Array1D<float, 3>(
{
{0.1, 0.2, 0.3}
}
));
const auto gradOut = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
{
{
{
{1.0, 2.0, 3.0},
{4.0, 5.0, 6.0}
},
{
{6.0, 5.0, 4.0},
{3.0, 2.0, 1.0}
}
}
}
));
const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
{
{
{
{0.10000000, 0.22973967, 0.41711676},
{0.11486985, 0.27594593, 0.51353097}
},
{
{0.41655189, 0.48044977, 0.49926791},
{0.07748720, 0.10227509, 0.08092485}
}
}
}
));
const auto expectedGrad1 = std::make_shared<Tensor>(Array1D<float, 3>(
{
{14.14779854, 22.99299049, 33.56402588}
}
));
for(const auto T: {input0, input1, gradOut, expectedGrad0, expectedGrad1})
{
T->setBackend("cpu") ;
T->setDataType(DataType::Float32);
}
std::shared_ptr<Node> powOp = Pow();
auto opr = std::static_pointer_cast<OperatorTensor>(powOp-> getOperator());
opr->setDataType(DataType::Float32);
opr->setBackend("cpu");
opr->associateInput(0, input0);
opr->associateInput(1, input1);
opr->getOutput(0)->setGrad(gradOut);
powOp->forward();
powOp->backward();
REQUIRE(approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0));
REQUIRE(approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1));
}
}
}
} // namespace Aidge
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment