From 650873eb903bbc2a0a31aac584fdaf74b7c08106 Mon Sep 17 00:00:00 2001 From: Adam MARONI <amaroni@lrtechnologies.fr> Date: Wed, 12 Mar 2025 14:39:56 +0100 Subject: [PATCH] [Issue #251] : Softmax Backward implementation for cpu --- .../backend/cpu/operator/SoftmaxImpl.hpp | 3 +- .../cpu/operator/SoftmaxImpl_kernels.hpp | 90 +++++++- src/operator/SoftmaxImpl.cpp | 16 +- unit_tests/operator/Test_SoftmaxImpl.cpp | 206 +++++++++++++++++- 4 files changed, 302 insertions(+), 13 deletions(-) diff --git a/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp index ec2c2696..fb17d04c 100644 --- a/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp +++ b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp @@ -23,7 +23,8 @@ namespace Aidge { // Operator implementation entry point for the backend using SoftmaxImpl_cpu = OperatorImpl_cpu<Softmax_Op, - void(std::size_t, const std::vector<DimSize_t>&, const void*, void*)>; + void(std::size_t, const std::vector<DimSize_t>&, const void*, void*), + void(std::size_t axisIdx, const std::vector<DimSize_t>& , const void* , const void* , void*)>; // Implementation entry point registration to Operator REGISTRAR(Softmax_Op, "cpu", Aidge::SoftmaxImpl_cpu::create); diff --git a/include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp index 07486a48..3f110672 100644 --- a/include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp @@ -22,8 +22,13 @@ #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" namespace Aidge { + template <class I, class O> -void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSize_t>& inputDims, const void* input_, void* output_) +void SoftmaxImpl_cpu_forward_kernel( + std::size_t axisIdx, + const std::vector<DimSize_t>& inputDims, + const void* input_, + void* output_) { const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); @@ -41,35 +46,100 @@ void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSi for (std::size_t j = 0; j < postAxisElems; ++j) { I maxVal = input[i * inputDims[axisIdx] * postAxisElems + j]; for (std::size_t k = 1; k < inputDims[axisIdx]; ++k) { - std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j; + std::size_t inIdx = + i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j; maxVal = std::max(maxVal, input[inIdx]); } // Calculate sum of exponentials within the axis I sumExp = 0; for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) { - std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j; + std::size_t inIdx = + i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j; sumExp += std::exp(input[inIdx] - maxVal); } // Calculate softmax for the current slice along the axis for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) { - std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j; + std::size_t inIdx = + i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j; output[inIdx] = std::exp(input[inIdx] - maxVal) / sumExp; } } } } +/** + * @brief Backward Kernel for Softmax on CPU (Use cross entropy as Loss func) + * @tparam I Input data type. + * @tparam O Output data type. + * @param[in] inputDims Array of input dimensions. + * @param[in] softmaxOut_ Softmax forward output tensor + * @param[in] target_ Target output + * @param[out] gradientLoss_ Backward gradient Output Tensor. + */ +template<class I, class O> +void SoftmaxImpl_cpu_backward_kernel( + std::size_t axisIdx, + const std::vector<DimSize_t>& inputDims, + const void* softmaxOut_, + const void* target_, + void* gradientLoss_) + +{ + const O* softmaxOut = static_cast<const O*>(softmaxOut_); + const O* target = static_cast<const O*>(target_); + I* dL = static_cast<I*>(gradientLoss_); + + // Compute the number of elements after the softmax axis (post-axis size) + std::size_t postAxisElems = 1; + for (std::size_t i = axisIdx + 1; i < inputDims.size(); ++i) { + postAxisElems *= inputDims[i]; + } + + // Compute the number of elements after the softmax axis (pre-axis size) + std::size_t preAxisElems = 1; + for (std::size_t i = 0; i < axisIdx; ++i) { + preAxisElems *= inputDims[i]; + } + + //Iterate over batches (pre-axis elements) + for (std::size_t i = 0; i < preAxisElems; ++i) { + for (std::size_t j = 0; j < postAxisElems; ++j) { + for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) { + std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + + k * postAxisElems + j; + dL[inIdx] = softmaxOut[inIdx] - target[inIdx] ; + } + } + } +} + REGISTRAR(SoftmaxImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>, nullptr}); + { DataType::Float32 }, + { + ProdConso::inPlaceModel, + Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>, + Aidge::SoftmaxImpl_cpu_backward_kernel<float, float> + } +); REGISTRAR(SoftmaxImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>, nullptr}); + { DataType::Float64 }, + { + ProdConso::inPlaceModel, + Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>, + Aidge::SoftmaxImpl_cpu_backward_kernel<double, double> + } +); REGISTRAR(SoftmaxImpl_cpu, - {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr}); + { DataType::Int32 }, + { + ProdConso::inPlaceModel, + Aidge::SoftmaxImpl_cpu_forward_kernel<int32_t, int32_t>, + Aidge::SoftmaxImpl_cpu_backward_kernel<int32_t, int32_t> + } +); + } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_KERNELS_H_ */ diff --git a/src/operator/SoftmaxImpl.cpp b/src/operator/SoftmaxImpl.cpp index 8b6933f2..3f3bb75c 100644 --- a/src/operator/SoftmaxImpl.cpp +++ b/src/operator/SoftmaxImpl.cpp @@ -40,5 +40,19 @@ void Aidge::SoftmaxImpl_cpu::forward() { template <> void Aidge::SoftmaxImpl_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Softmax_Op on backend cpu"); + const auto& op_ = dynamic_cast<const Softmax_Op&>(mOp); + + AIDGE_ASSERT(!op_.getInput(0)->empty(), "Softmax input empty"); + std::int32_t axis = (op_.axis() >= 0) ? op_.axis() : op_.getInput(0)->nbDims() + op_.axis(); + + // Find the correct kernel type + const auto impl = Registrar<SoftmaxImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.backward(static_cast<std::size_t>(axis), // axisIdx + op_.getInput(0)->dims(), + op_.getOutput(0)->getImpl()->rawPtr(), + op_.getOutput(0)->grad()->getImpl()->rawPtr(), + op_.getInput(0)->grad()->getImpl()->rawPtr() + ); } diff --git a/unit_tests/operator/Test_SoftmaxImpl.cpp b/unit_tests/operator/Test_SoftmaxImpl.cpp index bc452a40..d949e393 100644 --- a/unit_tests/operator/Test_SoftmaxImpl.cpp +++ b/unit_tests/operator/Test_SoftmaxImpl.cpp @@ -111,4 +111,208 @@ TEST_CASE("[cpu/operator] Softmax(forward)", "[Softmax][CPU]") { REQUIRE(approxEq<float>(*(op->getOutput(0)), expectedOutput, 1e-5f, 1e-8f)); } -} \ No newline at end of file +} + + + +TEST_CASE("[cpu/operator] Softmax(backward)", "[Softmax][CPU]") { + SECTION("1D Tensor") { + std::shared_ptr<Softmax_Op> op = std::make_shared<Softmax_Op>(0); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + std::shared_ptr<Tensor> softMaxForwardInputTensor = + std::make_shared<Tensor>(Array1D<float,3> { {3.0, 1.0, 0.2} }); + //One hot encoded targets + // Expected input obtained after softMax backward execution + op->associateInput(0,softMaxForwardInputTensor); + op->forward(); + + std::shared_ptr<Tensor> target1 = + std::make_shared<Tensor>(Array1D<float, 3>{ {1,0, 0} }); + Tensor expectedGrad1 = Array1D<float,3> { + {-0.163981, 0.113143, 0.050838} + }; + op->getOutput(0)->setGrad(target1); + op->backward(); + REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), expectedGrad1, + 1e-5f, 1e-8f)); + + std::shared_ptr<Tensor> target2 = + std::make_shared<Tensor>(Array1D<float, 3>{ {0,1, 0} }); + Tensor expectedGrad2 = Array1D<float,3> { + {0.836019, -0.886857, 0.050838} + }; + op->getOutput(0)->setGrad(target2); + op->backward(); + REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), expectedGrad2, + 1e-5f, 1e-8f)); + + std::shared_ptr<Tensor> target3 = + std::make_shared<Tensor>(Array1D<float, 3>{ {0,0, 1} }); + Tensor expectedGrad3 = Array1D<float,3> { + {0.836019, 0.113143, -0.949162} + }; + op->getOutput(0)->setGrad(target3); + op->backward(); + REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), expectedGrad3, + 1e-5f, 1e-8f)); + } + SECTION("2D Tensor") { + std::shared_ptr<Softmax_Op> op = std::make_shared<Softmax_Op>(1); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + std::shared_ptr<Tensor> softMaxForwardInputTensor = + std::make_shared<Tensor>(Array2D<float, 2, 3> { + { + {2.0, 1.0, 0.1}, + {1.0, 3.0, 0.2} + } + }); + + op->associateInput(0,softMaxForwardInputTensor); + op->forward(); + + std::shared_ptr<Tensor> target1 = + std::make_shared<Tensor>(Array2D<float, 2, 3>{ + { + {1, 0, 0}, + {0, 1, 0} + } + }); + Tensor expectedGrad1 = Array2D<float, 2, 3> { + { + {-0.34099886, 0.24243297, 0.09856589}, + {0.11314284, -0.1639812, 0.05083836} + } + }; + op->getOutput(0)->setGrad(target1); + op->backward(); + REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), expectedGrad1, + 1e-5f, 1e-8f)); + } + SECTION("4D Tensor"){ + std::shared_ptr<Softmax_Op> op = std::make_shared<Softmax_Op>(1); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + + + std::shared_ptr<Tensor> softMaxForwardInputTensor = + std::make_shared<Tensor>(Array4D<float,2,3,3,3> { + { + { + {{8.28257084e-01, 7.99335480e-01, 7.36702740e-01}, + {2.36729562e-01, 8.61912668e-01, 9.93067741e-01}, + {1.63514376e-01, 8.95773172e-02, 2.96533108e-01}}, + + {{2.20776618e-01, 5.89067876e-01, 2.03930080e-01}, + {1.31294072e-01, 7.10182846e-01, 1.08420849e-04}, + {7.21750259e-01, 4.38212037e-01, 5.08823872e-01}}, + + {{4.30953979e-01, 1.51903450e-01, 3.76343548e-01}, + {8.07861805e-01, 7.79679358e-01, 5.01209974e-01}, + {9.31280375e-01, 9.94207084e-01, 1.74868107e-03}} + }, + { + {{6.22058094e-01, 2.32256651e-02, 6.18222237e-01}, + {9.58304763e-01, 2.11395025e-02, 4.95614648e-01}, + {2.50825584e-01, 4.50860739e-01, 3.80362332e-01}}, + + {{9.91703272e-02, 5.06073236e-01, 4.88969564e-01}, + {1.12059772e-01, 7.64178872e-01, 7.60362148e-01}, + {2.84135342e-02, 4.29610193e-01, 1.27862811e-01}}, + + {{9.57209170e-01, 8.22797656e-01, 1.91352129e-01}, + {9.52722490e-01, 6.35501027e-01, 5.67592978e-02}, + {2.00799644e-01, 4.00822222e-01, 9.14380193e-01}} + } + } + }); + + + op->associateInput(0,softMaxForwardInputTensor); + op->forward(); + + std::shared_ptr<Tensor> target1 = + std::make_shared<Tensor>(Array4D<float, 2, 3, 3, 3>{{ + { + { + { 1.0f, 0.0f, 0.0f }, + { 0.0f, 1.0f, 0.0f }, + { 0.0f, 0.0f, 1.0f } + }, + { + { 1.0f, 0.0f, 0.0f }, + { 0.0f, 1.0f, 0.0f }, + { 0.0f, 0.0f, 1.0f } + }, + { + { 1.0f, 0.0f, 0.0f }, + { 0.0f, 1.0f, 0.0f }, + { 0.0f, 0.0f, 1.0f } + } + }, + { + { + { 1.0f, 0.0f, 0.0f }, + { 0.0f, 1.0f, 0.0f }, + { 0.0f, 0.0f, 1.0f } + }, + { + { 1.0f, 0.0f, 0.0f }, + { 0.0f, 1.0f, 0.0f }, + { 0.0f, 0.0f, 1.0f } + }, + { + { 1.0f, 0.0f, 0.0f }, + { 0.0f, 1.0f, 0.0f }, + { 0.0f, 0.0f, 1.0f } + } + } + }}); + + Tensor expectedGrad1 = Array4D<float,2,3,3,3> { + { + {{{-0.548909903, 0.428493917, 0.437751532}, + { 0.272464514, -0.640323639, 0.504549026}, + { 0.203976154, 0.204576448, -0.664564550}}, + + {{-0.754281461, 0.347237468, 0.256949306}, + { 0.245199680, -0.690958738, 0.186924666}, + { 0.356466055, 0.289911687, -0.585231602}}, + + {{-0.696808696, 0.224268600, 0.305299193}, + { 0.482335806, -0.668717623, 0.308526367}, + { 0.439557761, 0.505511820, -0.750203848}}}, + + + {{{-0.665658951, 0.206386790, 0.395053923}, + { 0.412633836, -0.798012137, 0.339227289}, + { 0.363399804, 0.341277540, -0.712860584}}, + + {{-0.801800549, 0.334487498, 0.347154379}, + { 0.177029371, -0.575357676, 0.442047715}, + { 0.290932596, 0.334101707, -0.776933849}}, + + {{-0.532540560, 0.459125668, 0.257791758}, + { 0.410336822, -0.626630187, 0.218724951}, + { 0.345667630, 0.324620724, -0.510205626}}} + } + }; + op->getOutput(0)->setGrad(target1); + op->backward(); + REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), expectedGrad1, + 1e-5f, 1e-8f)); + } +} + + + + + + + + + -- GitLab