[Issue #251] : WIP: Softmax Backward implementation for cpu

e713e7df · Adam MARONI · e3662e43 · e713e7df · e713e7df · e713e7df
Commit e713e7df authored 1 month ago by Adam MARONI
--- a/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp
+++ b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp
@@ -23,7 +23,8 @@
 namespace Aidge {
 // Operator implementation entry point for the backend
 using SoftmaxImpl_cpu = OperatorImpl_cpu<Softmax_Op,
-    void(std::size_t, const std::vector<DimSize_t>&, const void*, void*)>;
+    void(std::size_t, const std::vector<DimSize_t>&, const void*, void*),
+    void(std::size_t axisIdx, const std::vector<DimSize_t>& , const void* , const void* , void*)>;

 // Implementation entry point registration to Operator
 REGISTRAR(Softmax_Op, "cpu", Aidge::SoftmaxImpl_cpu::create);

--- a/include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp
@@ -22,8 +22,13 @@
 #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"

 namespace Aidge {
+
 template <class I, class O>
-void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSize_t>& inputDims, const void* input_, void* output_)
+void SoftmaxImpl_cpu_forward_kernel(
+    std::size_t axisIdx,
+    const std::vector<DimSize_t>& inputDims,
+    const void* input_,
+    void* output_)
 {
    const I* input = static_cast<const I*>(input_);
    O* output = static_cast<O*>(output_);
@@ -41,35 +46,100 @@ void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSi
        for (std::size_t j = 0; j < postAxisElems; ++j) {
            I maxVal = input[i * inputDims[axisIdx] * postAxisElems + j];
            for (std::size_t k = 1; k < inputDims[axisIdx]; ++k) {
-                std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
+                std::size_t inIdx =
+                i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
                maxVal = std::max(maxVal, input[inIdx]);
            }

            // Calculate sum of exponentials within the axis
            I sumExp = 0;
            for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) {
-                std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
+                std::size_t inIdx =
+                i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
                sumExp += std::exp(input[inIdx] - maxVal);
            }

            // Calculate softmax for the current slice along the axis
            for (std::size_t  k = 0; k < inputDims[axisIdx]; ++k) {
-                std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
+                std::size_t inIdx =
+                i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
                output[inIdx] = std::exp(input[inIdx] - maxVal) / sumExp;
            }
        }
    }
 }

+/**
+ * @brief Backward Kernel for Softmax on CPU (Use cross entropy as Loss func)
+ * @tparam I Input data type.
+ * @tparam O Output data type.
+ * @param[in] inputDims Array of input dimensions.
+ * @param[in] softmaxOut_ Softmax forward output tensor
+ * @param[in] target_ Target output
+ * @param[out] gradientLoss_ Backward gradient Output Tensor.
+ */
+template<class I, class O>
+void SoftmaxImpl_cpu_backward_kernel(
+    std::size_t axisIdx,
+    const std::vector<DimSize_t>& inputDims,
+    const void* softmaxOut_,
+    const void* target_,
+    void* gradientLoss_)
+    
+{
+    const O* softmaxOut = static_cast<const O*>(softmaxOut_); 
+    const O* target = static_cast<const O*>(target_); 
+    I* dL = static_cast<I*>(gradientLoss_);
+
+    // Compute the number of elements after the softmax axis (post-axis size)
+    std::size_t postAxisElems = 1;
+    for (std::size_t i = axisIdx + 1; i < inputDims.size(); ++i) {
+        postAxisElems *= inputDims[i];
+    }
+
+    // Compute the number of elements after the softmax axis (pre-axis size)
+    std::size_t preAxisElems = 1;
+    for (std::size_t i = 0; i < axisIdx; ++i) {
+        preAxisElems *= inputDims[i];
+    }
+
+    //Iterate over batches (pre-axis elements)
+    for (std::size_t i = 0; i < preAxisElems; ++i) {
+        for (std::size_t j = 0; j < postAxisElems; ++j) {
+            for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) {
+                std::size_t inIdx = 
+                i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
+                dL[inIdx] = softmaxOut[inIdx] - target[inIdx] ;
+            }
+        }
+    }
+}
+
 REGISTRAR(SoftmaxImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>, nullptr});
+    { DataType::Float32 },
+    {
+        ProdConso::inPlaceModel,
+        Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>,
+        Aidge::SoftmaxImpl_cpu_backward_kernel<float, float>
+    }
+);
 REGISTRAR(SoftmaxImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>, nullptr});
+    { DataType::Float64 },
+    {
+        ProdConso::inPlaceModel,
+        Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>,
+        Aidge::SoftmaxImpl_cpu_backward_kernel<double, double>
+    }
+);
 REGISTRAR(SoftmaxImpl_cpu,
-    {DataType::Int32},
-    {ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr});
+    { DataType::Int32 },
+    {
+        ProdConso::inPlaceModel,
+        Aidge::SoftmaxImpl_cpu_forward_kernel<int32_t, int32_t>,
+        Aidge::SoftmaxImpl_cpu_backward_kernel<int32_t, int32_t>
+    }
+);
+
 }  // namespace Aidge

 #endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_KERNELS_H_ */
--- a/src/operator/SoftmaxImpl.cpp
+++ b/src/operator/SoftmaxImpl.cpp
@@ -40,5 +40,19 @@ void Aidge::SoftmaxImpl_cpu::forward() {

 template <>
 void Aidge::SoftmaxImpl_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Softmax_Op on backend cpu");
+    const auto& op_ = dynamic_cast<const Softmax_Op&>(mOp);
+
+    AIDGE_ASSERT(!op_.getInput(0)->empty(), "Softmax input empty");
+    std::int32_t axis = (op_.axis() >= 0) ? op_.axis() : op_.getInput(0)->nbDims() + op_.axis();
+
+    // Find the correct kernel type
+    const auto impl = Registrar<SoftmaxImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+
+    // Call kernel
+    impl.backward(static_cast<std::size_t>(axis), // axisIdx
+               op_.getInput(0)->dims(),
+               op_.getOutput(0)->getImpl()->rawPtr(),
+               op_.getOutput(0)->grad()->getImpl()->rawPtr(),
+               op_.getInput(0)->grad()->getImpl()->rawPtr()
+    );
 }
--- a/unit_tests/operator/Test_SoftmaxImpl.cpp
+++ b/unit_tests/operator/Test_SoftmaxImpl.cpp
@@ -111,4 +111,201 @@ TEST_CASE("[cpu/operator] Softmax(forward)", "[Softmax][CPU]") {

        REQUIRE(approxEq<float>(*(op->getOutput(0)), expectedOutput, 1e-5f, 1e-8f));
    }
-}
\ No newline at end of file
+}
+
+
+
+TEST_CASE("[cpu/operator] Softmax(backward)", "[Softmax][CPU]") {
+    SECTION("1D Tensor") {
+      std::shared_ptr<Softmax_Op> op = std::make_shared<Softmax_Op>(0);
+      op->setDataType(DataType::Float32);
+      op->setBackend("cpu");
+
+      std::shared_ptr<Tensor> softMaxForwardInputTensor = 
+        std::make_shared<Tensor>(Array1D<float,3> { {3.0, 1.0, 0.2} });
+        //One hot encoded targets
+        // Expected input  obtained after softMax backward execution
+        op->associateInput(0,softMaxForwardInputTensor);
+        op->forward();
+
+        std::shared_ptr<Tensor> target1 =
+          std::make_shared<Tensor>(Array1D<float, 3>{ {1,0, 0} });
+        Tensor expectedGrad1 = Array1D<float,3> {
+             {-0.163981,  0.113143,  0.050838}
+        };
+        op->getOutput(0)->setGrad(target1);
+        op->backward();
+        REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), expectedGrad1,
+              1e-5f, 1e-8f));
+        
+        std::shared_ptr<Tensor> target2 =
+          std::make_shared<Tensor>(Array1D<float, 3>{ {0,1, 0} });
+        Tensor expectedGrad2 = Array1D<float,3> {
+             {0.836019, -0.886857,  0.050838}
+        };
+        op->getOutput(0)->setGrad(target2);
+        op->backward();
+        REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), expectedGrad2,
+              1e-5f, 1e-8f));
+
+        std::shared_ptr<Tensor> target3 =
+          std::make_shared<Tensor>(Array1D<float, 3>{ {0,0, 1} });
+        Tensor expectedGrad3 = Array1D<float,3> {
+             {0.836019,  0.113143, -0.949162}
+        };
+        op->getOutput(0)->setGrad(target3);
+        op->backward();
+        REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), expectedGrad3,
+              1e-5f, 1e-8f));
+    }
+    SECTION("2D Tensor") {
+      std::shared_ptr<Softmax_Op> op = std::make_shared<Softmax_Op>(1);
+      op->setDataType(DataType::Float32);
+      op->setBackend("cpu");
+
+      std::shared_ptr<Tensor> softMaxForwardInputTensor = 
+        std::make_shared<Tensor>(Array2D<float, 2, 3> {
+            {
+              {2.0, 1.0, 0.1},
+              {1.0, 3.0, 0.2}
+            }
+        });
+
+        op->associateInput(0,softMaxForwardInputTensor);
+        op->forward();
+
+        std::shared_ptr<Tensor> target1 =
+          std::make_shared<Tensor>(Array2D<float, 2, 3>{
+            {
+              {1, 0, 0},
+              {0, 1, 0}
+            }
+          });
+        Tensor expectedGrad1 = Array2D<float, 2, 3> {
+          {
+            {-0.34099886, 0.24243297, 0.09856589},
+            {0.11314284, -0.1639812,  0.05083836}
+          }
+        };
+        op->getOutput(0)->setGrad(target1);
+        op->backward();
+        REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), expectedGrad1,
+              1e-5f, 1e-8f));
+    }
+    SECTION("4D Tensor"){
+      std::shared_ptr<Softmax_Op> op = std::make_shared<Softmax_Op>(1);
+      op->setDataType(DataType::Float32);
+      op->setBackend("cpu");
+
+
+
+      std::shared_ptr<Tensor> softMaxForwardInputTensor = 
+        std::make_shared<Tensor>(Array4D<float,2,3,3,3> {
+            {
+                {
+                    {{8.28257084e-01, 7.99335480e-01, 7.36702740e-01},
+                     {2.36729562e-01, 8.61912668e-01, 9.93067741e-01},
+                     {1.63514376e-01, 8.95773172e-02, 2.96533108e-01}},
+                     
+                    {{2.20776618e-01, 5.89067876e-01, 2.03930080e-01},
+                     {1.31294072e-01, 7.10182846e-01, 1.08420849e-04},
+                     {7.21750259e-01, 4.38212037e-01, 5.08823872e-01}},
+                     
+                    {{4.30953979e-01, 1.51903450e-01, 3.76343548e-01},
+                     {8.07861805e-01, 7.79679358e-01, 5.01209974e-01},
+                     {9.31280375e-01, 9.94207084e-01, 1.74868107e-03}}
+                },
+                {
+                    {{6.22058094e-01, 2.32256651e-02, 6.18222237e-01},
+                     {9.58304763e-01, 2.11395025e-02, 4.95614648e-01},
+                     {2.50825584e-01, 4.50860739e-01, 3.80362332e-01}},
+                     
+                    {{9.91703272e-02, 5.06073236e-01, 4.88969564e-01},
+                     {1.12059772e-01, 7.64178872e-01, 7.60362148e-01},
+                     {2.84135342e-02, 4.29610193e-01, 1.27862811e-01}},
+
+                    {{9.57209170e-01, 8.22797656e-01, 1.91352129e-01},
+                     {9.52722490e-01, 6.35501027e-01, 5.67592978e-02},
+                     {2.00799644e-01, 4.00822222e-01, 9.14380193e-01}}
+                }
+            }
+        });
+
+
+        op->associateInput(0,softMaxForwardInputTensor);
+        op->forward();
+
+        std::shared_ptr<Tensor> target1 =
+            std::make_shared<Tensor>(Array4D<float, 2, 3, 3, 3>{{
+                {
+                    {
+                        { 1.0f, 0.0f, 0.0f },
+                        { 0.0f, 1.0f, 0.0f },
+                        { 0.0f, 0.0f, 1.0f }
+                    },
+                    {
+                        { 1.0f, 0.0f, 0.0f },
+                        { 0.0f, 1.0f, 0.0f },
+                        { 0.0f, 0.0f, 1.0f }
+                    },
+                    {
+                        { 1.0f, 0.0f, 0.0f },
+                        { 0.0f, 1.0f, 0.0f },
+                        { 0.0f, 0.0f, 1.0f }
+                    }
+                },
+                {
+                    {
+                        { 1.0f, 0.0f, 0.0f },
+                        { 0.0f, 1.0f, 0.0f },
+                        { 0.0f, 0.0f, 1.0f }
+                    },
+                    {
+                        { 1.0f, 0.0f, 0.0f },
+                        { 0.0f, 1.0f, 0.0f },
+                        { 0.0f, 0.0f, 1.0f }
+                    },
+                    {
+                        { 1.0f, 0.0f, 0.0f },
+                        { 0.0f, 1.0f, 0.0f },
+                        { 0.0f, 0.0f, 1.0f }
+                    }
+                }
+            }});
+
+        Tensor expectedGrad1 = Array4D<float,2,3,3,3> {
+          {{
+             {{  0.0196f, -0.0000f, -0.0000f },
+               { -0.0000f,  0.0044f, -0.0000f },
+               { -0.0000f, -0.0000f,  0.0004f }},
+
+               {{ -0.0146f, -0.0000f, -0.0000f },
+                 { -0.0000f, -0.0040f, -0.0000f },
+                 { -0.0000f, -0.0000f,  0.0136f }},
+
+               {{ -0.0050f, -0.0000f, -0.0000f },
+                 { -0.0000f, -0.0003f, -0.0000f },
+                 { -0.0000f, -0.0000f, -0.0139f }}
+           },
+           {
+              {{  0.0002f, -0.0000f, -0.0000f },
+                { -0.0000f, -0.0219f, -0.0000f },
+                { -0.0000f, -0.0000f, -0.0077f }},
+
+              {{ -0.0225f, -0.0000f, -0.0000f },
+                { -0.0000f,  0.0152f, -0.0000f },
+                { -0.0000f, -0.0000f, -0.0184f }},
+
+              {{  0.0224f, -0.0000f, -0.0000f },
+                { -0.0000f,  0.0067f, -0.0000f },
+                { -0.0000f, -0.0000f,  0.0261f }}
+            }}
+        };
+
+        op->getOutput(0)->setGrad(target1);
+        op->backward();
+        REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), expectedGrad1,
+              1e-5f, 1e-8f));
+    }
+}
+