Skip to content
Snippets Groups Projects
Commit e713e7df authored by Adam MARONI's avatar Adam MARONI
Browse files

[Issue #251] : WIP: Softmax Backward implementation for cpu

parent e3662e43
No related branches found
No related tags found
No related merge requests found
......@@ -23,7 +23,8 @@
namespace Aidge {
// Operator implementation entry point for the backend
using SoftmaxImpl_cpu = OperatorImpl_cpu<Softmax_Op,
void(std::size_t, const std::vector<DimSize_t>&, const void*, void*)>;
void(std::size_t, const std::vector<DimSize_t>&, const void*, void*),
void(std::size_t axisIdx, const std::vector<DimSize_t>& , const void* , const void* , void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Softmax_Op, "cpu", Aidge::SoftmaxImpl_cpu::create);
......
......@@ -22,8 +22,13 @@
#include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
namespace Aidge {
template <class I, class O>
void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSize_t>& inputDims, const void* input_, void* output_)
void SoftmaxImpl_cpu_forward_kernel(
std::size_t axisIdx,
const std::vector<DimSize_t>& inputDims,
const void* input_,
void* output_)
{
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
......@@ -41,35 +46,100 @@ void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSi
for (std::size_t j = 0; j < postAxisElems; ++j) {
I maxVal = input[i * inputDims[axisIdx] * postAxisElems + j];
for (std::size_t k = 1; k < inputDims[axisIdx]; ++k) {
std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
std::size_t inIdx =
i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
maxVal = std::max(maxVal, input[inIdx]);
}
// Calculate sum of exponentials within the axis
I sumExp = 0;
for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) {
std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
std::size_t inIdx =
i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
sumExp += std::exp(input[inIdx] - maxVal);
}
// Calculate softmax for the current slice along the axis
for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) {
std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
std::size_t inIdx =
i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
output[inIdx] = std::exp(input[inIdx] - maxVal) / sumExp;
}
}
}
}
/**
* @brief Backward Kernel for Softmax on CPU (Use cross entropy as Loss func)
* @tparam I Input data type.
* @tparam O Output data type.
* @param[in] inputDims Array of input dimensions.
* @param[in] softmaxOut_ Softmax forward output tensor
* @param[in] target_ Target output
* @param[out] gradientLoss_ Backward gradient Output Tensor.
*/
template<class I, class O>
void SoftmaxImpl_cpu_backward_kernel(
std::size_t axisIdx,
const std::vector<DimSize_t>& inputDims,
const void* softmaxOut_,
const void* target_,
void* gradientLoss_)
{
const O* softmaxOut = static_cast<const O*>(softmaxOut_);
const O* target = static_cast<const O*>(target_);
I* dL = static_cast<I*>(gradientLoss_);
// Compute the number of elements after the softmax axis (post-axis size)
std::size_t postAxisElems = 1;
for (std::size_t i = axisIdx + 1; i < inputDims.size(); ++i) {
postAxisElems *= inputDims[i];
}
// Compute the number of elements after the softmax axis (pre-axis size)
std::size_t preAxisElems = 1;
for (std::size_t i = 0; i < axisIdx; ++i) {
preAxisElems *= inputDims[i];
}
//Iterate over batches (pre-axis elements)
for (std::size_t i = 0; i < preAxisElems; ++i) {
for (std::size_t j = 0; j < postAxisElems; ++j) {
for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) {
std::size_t inIdx =
i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
dL[inIdx] = softmaxOut[inIdx] - target[inIdx] ;
}
}
}
}
REGISTRAR(SoftmaxImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>, nullptr});
{ DataType::Float32 },
{
ProdConso::inPlaceModel,
Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>,
Aidge::SoftmaxImpl_cpu_backward_kernel<float, float>
}
);
REGISTRAR(SoftmaxImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>, nullptr});
{ DataType::Float64 },
{
ProdConso::inPlaceModel,
Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>,
Aidge::SoftmaxImpl_cpu_backward_kernel<double, double>
}
);
REGISTRAR(SoftmaxImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr});
{ DataType::Int32 },
{
ProdConso::inPlaceModel,
Aidge::SoftmaxImpl_cpu_forward_kernel<int32_t, int32_t>,
Aidge::SoftmaxImpl_cpu_backward_kernel<int32_t, int32_t>
}
);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_KERNELS_H_ */
......@@ -40,5 +40,19 @@ void Aidge::SoftmaxImpl_cpu::forward() {
template <>
void Aidge::SoftmaxImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Softmax_Op on backend cpu");
const auto& op_ = dynamic_cast<const Softmax_Op&>(mOp);
AIDGE_ASSERT(!op_.getInput(0)->empty(), "Softmax input empty");
std::int32_t axis = (op_.axis() >= 0) ? op_.axis() : op_.getInput(0)->nbDims() + op_.axis();
// Find the correct kernel type
const auto impl = Registrar<SoftmaxImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
impl.backward(static_cast<std::size_t>(axis), // axisIdx
op_.getInput(0)->dims(),
op_.getOutput(0)->getImpl()->rawPtr(),
op_.getOutput(0)->grad()->getImpl()->rawPtr(),
op_.getInput(0)->grad()->getImpl()->rawPtr()
);
}
......@@ -111,4 +111,201 @@ TEST_CASE("[cpu/operator] Softmax(forward)", "[Softmax][CPU]") {
REQUIRE(approxEq<float>(*(op->getOutput(0)), expectedOutput, 1e-5f, 1e-8f));
}
}
\ No newline at end of file
}
TEST_CASE("[cpu/operator] Softmax(backward)", "[Softmax][CPU]") {
SECTION("1D Tensor") {
std::shared_ptr<Softmax_Op> op = std::make_shared<Softmax_Op>(0);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
std::shared_ptr<Tensor> softMaxForwardInputTensor =
std::make_shared<Tensor>(Array1D<float,3> { {3.0, 1.0, 0.2} });
//One hot encoded targets
// Expected input obtained after softMax backward execution
op->associateInput(0,softMaxForwardInputTensor);
op->forward();
std::shared_ptr<Tensor> target1 =
std::make_shared<Tensor>(Array1D<float, 3>{ {1,0, 0} });
Tensor expectedGrad1 = Array1D<float,3> {
{-0.163981, 0.113143, 0.050838}
};
op->getOutput(0)->setGrad(target1);
op->backward();
REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), expectedGrad1,
1e-5f, 1e-8f));
std::shared_ptr<Tensor> target2 =
std::make_shared<Tensor>(Array1D<float, 3>{ {0,1, 0} });
Tensor expectedGrad2 = Array1D<float,3> {
{0.836019, -0.886857, 0.050838}
};
op->getOutput(0)->setGrad(target2);
op->backward();
REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), expectedGrad2,
1e-5f, 1e-8f));
std::shared_ptr<Tensor> target3 =
std::make_shared<Tensor>(Array1D<float, 3>{ {0,0, 1} });
Tensor expectedGrad3 = Array1D<float,3> {
{0.836019, 0.113143, -0.949162}
};
op->getOutput(0)->setGrad(target3);
op->backward();
REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), expectedGrad3,
1e-5f, 1e-8f));
}
SECTION("2D Tensor") {
std::shared_ptr<Softmax_Op> op = std::make_shared<Softmax_Op>(1);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
std::shared_ptr<Tensor> softMaxForwardInputTensor =
std::make_shared<Tensor>(Array2D<float, 2, 3> {
{
{2.0, 1.0, 0.1},
{1.0, 3.0, 0.2}
}
});
op->associateInput(0,softMaxForwardInputTensor);
op->forward();
std::shared_ptr<Tensor> target1 =
std::make_shared<Tensor>(Array2D<float, 2, 3>{
{
{1, 0, 0},
{0, 1, 0}
}
});
Tensor expectedGrad1 = Array2D<float, 2, 3> {
{
{-0.34099886, 0.24243297, 0.09856589},
{0.11314284, -0.1639812, 0.05083836}
}
};
op->getOutput(0)->setGrad(target1);
op->backward();
REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), expectedGrad1,
1e-5f, 1e-8f));
}
SECTION("4D Tensor"){
std::shared_ptr<Softmax_Op> op = std::make_shared<Softmax_Op>(1);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
std::shared_ptr<Tensor> softMaxForwardInputTensor =
std::make_shared<Tensor>(Array4D<float,2,3,3,3> {
{
{
{{8.28257084e-01, 7.99335480e-01, 7.36702740e-01},
{2.36729562e-01, 8.61912668e-01, 9.93067741e-01},
{1.63514376e-01, 8.95773172e-02, 2.96533108e-01}},
{{2.20776618e-01, 5.89067876e-01, 2.03930080e-01},
{1.31294072e-01, 7.10182846e-01, 1.08420849e-04},
{7.21750259e-01, 4.38212037e-01, 5.08823872e-01}},
{{4.30953979e-01, 1.51903450e-01, 3.76343548e-01},
{8.07861805e-01, 7.79679358e-01, 5.01209974e-01},
{9.31280375e-01, 9.94207084e-01, 1.74868107e-03}}
},
{
{{6.22058094e-01, 2.32256651e-02, 6.18222237e-01},
{9.58304763e-01, 2.11395025e-02, 4.95614648e-01},
{2.50825584e-01, 4.50860739e-01, 3.80362332e-01}},
{{9.91703272e-02, 5.06073236e-01, 4.88969564e-01},
{1.12059772e-01, 7.64178872e-01, 7.60362148e-01},
{2.84135342e-02, 4.29610193e-01, 1.27862811e-01}},
{{9.57209170e-01, 8.22797656e-01, 1.91352129e-01},
{9.52722490e-01, 6.35501027e-01, 5.67592978e-02},
{2.00799644e-01, 4.00822222e-01, 9.14380193e-01}}
}
}
});
op->associateInput(0,softMaxForwardInputTensor);
op->forward();
std::shared_ptr<Tensor> target1 =
std::make_shared<Tensor>(Array4D<float, 2, 3, 3, 3>{{
{
{
{ 1.0f, 0.0f, 0.0f },
{ 0.0f, 1.0f, 0.0f },
{ 0.0f, 0.0f, 1.0f }
},
{
{ 1.0f, 0.0f, 0.0f },
{ 0.0f, 1.0f, 0.0f },
{ 0.0f, 0.0f, 1.0f }
},
{
{ 1.0f, 0.0f, 0.0f },
{ 0.0f, 1.0f, 0.0f },
{ 0.0f, 0.0f, 1.0f }
}
},
{
{
{ 1.0f, 0.0f, 0.0f },
{ 0.0f, 1.0f, 0.0f },
{ 0.0f, 0.0f, 1.0f }
},
{
{ 1.0f, 0.0f, 0.0f },
{ 0.0f, 1.0f, 0.0f },
{ 0.0f, 0.0f, 1.0f }
},
{
{ 1.0f, 0.0f, 0.0f },
{ 0.0f, 1.0f, 0.0f },
{ 0.0f, 0.0f, 1.0f }
}
}
}});
Tensor expectedGrad1 = Array4D<float,2,3,3,3> {
{{
{{ 0.0196f, -0.0000f, -0.0000f },
{ -0.0000f, 0.0044f, -0.0000f },
{ -0.0000f, -0.0000f, 0.0004f }},
{{ -0.0146f, -0.0000f, -0.0000f },
{ -0.0000f, -0.0040f, -0.0000f },
{ -0.0000f, -0.0000f, 0.0136f }},
{{ -0.0050f, -0.0000f, -0.0000f },
{ -0.0000f, -0.0003f, -0.0000f },
{ -0.0000f, -0.0000f, -0.0139f }}
},
{
{{ 0.0002f, -0.0000f, -0.0000f },
{ -0.0000f, -0.0219f, -0.0000f },
{ -0.0000f, -0.0000f, -0.0077f }},
{{ -0.0225f, -0.0000f, -0.0000f },
{ -0.0000f, 0.0152f, -0.0000f },
{ -0.0000f, -0.0000f, -0.0184f }},
{{ 0.0224f, -0.0000f, -0.0000f },
{ -0.0000f, 0.0067f, -0.0000f },
{ -0.0000f, -0.0000f, 0.0261f }}
}}
};
op->getOutput(0)->setGrad(target1);
op->backward();
REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), expectedGrad1,
1e-5f, 1e-8f));
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment