Add Multi Step CE Loss

fa4bf3b7 · Jerome Hue · fed5d9d0 · fa4bf3b7 · fa4bf3b7 · fa4bf3b7
Commit fa4bf3b7 authored 1 month ago by Jerome Hue
--- a/include/aidge/loss/LossList.hpp
+++ b/include/aidge/loss/LossList.hpp
@@ -49,6 +49,9 @@ Tensor KD(std::shared_ptr<Tensor>& student_prediction,
 Tensor CELoss(std::shared_ptr<Tensor>& prediction,
           const std::shared_ptr<Tensor>& target);

+Tensor multiStepCELoss(std::shared_ptr<Tensor>& prediction,
+           const std::shared_ptr<Tensor>& target, std::uint32_t nbTimeSteps);
+
 }  // namespace loss
 }  // namespace Aidge


--- a/python_binding/learning/loss/pybind_Loss.cpp
+++ b/python_binding/learning/loss/pybind_Loss.cpp
@@ -27,5 +27,6 @@ void init_Loss(py::module &m)
    m_loss.def("BCE", &loss::BCE, py::arg("graph"), py::arg("target"));
    m_loss.def("CELoss", &loss::CELoss, py::arg("graph"), py::arg("target"));
    m_loss.def("KD", &loss::KD, py::arg("student_prediction"), py::arg("teacher_prediction"), py::arg("temperature") = 2.0f);
+    m_loss.def("multiStepCELoss", &loss::multiStepCELoss, py::arg("graph"), py::arg("target"), py::arg("nbTimeSteps"));
 }
 }  // namespace Aidge
--- a/src/loss/classification/CELoss.cpp
+++ b/src/loss/classification/CELoss.cpp
@@ -9,6 +9,7 @@
 *
 ********************************************************************************/

+#include <aidge/backend/cpu/data/GetCPUPtr.h>
 #include <memory>
 #include <numeric>  // std::iota

@@ -32,9 +33,9 @@


 Aidge::Tensor Aidge::loss::CELoss(std::shared_ptr<Tensor>& prediction,
-                                  const std::shared_ptr<Tensor>& target) 
+                                  const std::shared_ptr<Tensor>& target)
 {
-    AIDGE_ASSERT(prediction->nbDims() == 2, 
+    AIDGE_ASSERT(prediction->nbDims() == 2,
                 "Label must have two dims: [BatchSize, NbChannel]");
    AIDGE_ASSERT(prediction->backend() == target->backend(),
                 "'prediction' and 'target' Tensors must be on the "
@@ -113,11 +114,32 @@ Aidge::Tensor Aidge::loss::CELoss(std::shared_ptr<Tensor>& prediction,
    scalar.setBackend(backend);
    scalar.setDataType(dataType);

+
    (*err) = (*err) * scalar;

    prediction->setGrad(err);

    // Return the loss value
+    return (*lossTensor);
+}
+
+Aidge::Tensor Aidge::loss::multiStepCELoss(std::shared_ptr<Aidge::Tensor>& prediction, const std::shared_ptr<Tensor>& target, std::uint32_t nbTimeSteps)
+{
+    AIDGE_ASSERT(prediction->dataType() == target->dataType(), "Prediction and Target needs to be of the same type");
+
+    auto loss = Tensor(prediction->dataType());
+    loss.setBackend("cpu");
+    loss.resize({1});
+    loss.zeros();
+
+    for (std::size_t i = 0; i < nbTimeSteps; ++i) {
+        auto predTimeStep = std::make_shared<Tensor>(prediction->extract({i}).clone());
+        loss += loss::CELoss(predTimeStep, target);
+
+        const auto nbGradElements = predTimeStep->grad()->size();
+        const auto offset = i * nbGradElements;

-    return (*lossTensor); 
-}
\ No newline at end of file
+        prediction->grad()->getImpl()->copy(predTimeStep->grad()->getImpl()->rawPtr(), nbGradElements, offset);
+    }
+    return loss;
+}
--- a/unit_tests/loss/classification/Test_CELoss.cpp
+++ b/unit_tests/loss/classification/Test_CELoss.cpp
@@ -9,21 +9,40 @@
 *
 ********************************************************************************/

+#include <aidge/backend/cpu/data/GetCPUPtr.h>
+#include <aidge/data/DataType.hpp>
+#include <aidge/graph/GraphView.hpp>
+#include <aidge/scheduler/SequentialScheduler.hpp>
+#include <algorithm>
 #include <catch2/catch_test_macros.hpp>
-#include <cstddef>     // std::size_t
-#include <cmath>       //
-#include <functional>  // std::multiplies, std::plus
-#include <memory>      // std::make_unique
-#include <numeric>     // std::accumulate
-#include <random>      // std::random_device, std::mt19937,
-                       // std::uniform_int_distribution
+#include <cmath>      //
+#include <cstddef>    // std::size_t
+#include <cstdlib>
+#include <functional> // std::multiplies, std::plus
+#include <memory>     // std::make_unique
+#include <numeric>    // std::accumulate
+#include <random>     // std::random_device, std::mt19937,
+                      // std::uniform_int_distribution
 #include <vector>

-#include "aidge/loss/LossList.hpp"
 #include "aidge/data/Tensor.hpp"
+#include "aidge/graph/Context.hpp"
+#include "aidge/learning/learningRate/LRSchedulerList.hpp"
+#include "aidge/learning/optimizer/SGD.hpp"
+#include "aidge/loss/LossList.hpp"
 #include "aidge/utils/TensorUtils.hpp"
+#include "aidge/recipes/Recipes.hpp"

 #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
+
+#include <aidge/backend/cpu/operator/FCImpl.hpp>
+#include <aidge/backend/cpu/operator/HeavisideImpl.hpp>
+#include <aidge/operator/FC.hpp>
+#include <aidge/operator/Heaviside.hpp>
+#include <aidge/operator/MetaOperatorDefs.hpp>
+#include <aidge/operator/Pop.hpp>
+#include <aidge/operator/Stack.hpp>
+
 #if USE_AIDGE_BACKEND_CUDA
 #include "aidge/backend/cuda/operator/SoftmaxImpl.hpp"
 #endif
@@ -32,8 +51,10 @@ namespace Aidge {

 // Utility that compute the CELoss manually

-static float manualCELoss(float *predictionArray, float *targetArray, std::size_t batchSize, std::size_t outputSize)
-{
+static float manualCELoss(float *predictionArray,
+                          float *targetArray,
+                          std::size_t batchSize,
+                          std::size_t outputSize) {
    const std::size_t nbElements = batchSize * outputSize;

    float *softmaxArray = new float[nbElements];
@@ -49,25 +70,25 @@ static float manualCELoss(float *predictionArray, float *targetArray, std::size_
        }
    }

-    float* productArray = new float[nbElements];
+    float *productArray = new float[nbElements];
    for (std::size_t i = 0; i < nbElements; ++i)
        productArray[i] = targetArray[i] * std::log(softmaxArray[i]);

-    float* sumArray = new float[batchSize];
+    float *sumArray = new float[batchSize];
    for (std::size_t i = 0; i < batchSize; ++i) {
        float acc = 0;
        for (std::size_t j = 0; j < outputSize; ++j)
            acc += productArray[i * outputSize + j];
        sumArray[i] = acc;
    }
-        
+
    float mean = 0;
    for (std::size_t i = 0; i < batchSize; ++i)
-        mean += sumArray[i] / static_cast<float> (batchSize);
+        mean += sumArray[i] / static_cast<float>(batchSize);

-    delete[] softmaxArray; 
-    delete[] productArray;      
-    delete[] sumArray;      
+    delete[] softmaxArray;
+    delete[] productArray;
+    delete[] sumArray;

    return -mean;
 }
@@ -82,12 +103,12 @@ TEST_CASE("[loss/classification] CELoss", "[loss][classification][CELoss]") {

    std::mt19937 gen(100);
    std::uniform_real_distribution<float> valueDist(-2, 2);
-    std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(1), std::size_t(8));
+    std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(1),
+                                                           std::size_t(8));

    SECTION("CPU") {

-        for (std::uint16_t trial = 0; trial < NB_TRIALS; ++trial) 
-        {
+        for (std::uint16_t trial = 0; trial < NB_TRIALS; ++trial) {
            const std::size_t nbDims = 2;
            const std::size_t batchSize = dimSizeDist(gen);
            const std::size_t outputSize = dimSizeDist(gen);
@@ -100,49 +121,61 @@ TEST_CASE("[loss/classification] CELoss", "[loss][classification][CELoss]") {

            // Create the data array/tensors

-            float* predictionArray = new float[nbElements];
+            float *predictionArray = new float[nbElements];
            for (std::size_t i = 0; i < nbElements; ++i)
                predictionArray[i] = valueDist(gen);

-            float* targetArray = new float[nbElements];
+            float *targetArray = new float[nbElements];
            for (std::size_t i = 0; i < nbElements; ++i)
                targetArray[i] = valueDist(gen);

-            std::shared_ptr<Tensor> predictionTensor = std::make_shared<Tensor>(dims);
+            std::shared_ptr<Tensor> predictionTensor =
+                std::make_shared<Tensor>(dims);
            predictionTensor->setBackend("cpu");
            predictionTensor->setDataType(DataType::Float32);
-            predictionTensor->getImpl()->setRawPtr(predictionArray, nbElements);
+            predictionTensor->getImpl()->setRawPtr(predictionArray,
+                                                   nbElements);
+            Log::info("Prediction");
+            predictionTensor->print();

-            std::shared_ptr<Tensor> targetTensor = std::make_shared<Tensor>(dims);
+            std::shared_ptr<Tensor> targetTensor =
+                std::make_shared<Tensor>(dims);
            targetTensor->setBackend("cpu");
            targetTensor->setDataType(DataType::Float32);
            targetTensor->getImpl()->setRawPtr(targetArray, nbElements);
+            Log::info("Target");
+            targetTensor->print();

            // Compute the CELoss manually

-            Tensor manualResult = Tensor(manualCELoss(predictionArray, targetArray, batchSize, outputSize));
+            Tensor manualResult = Tensor(manualCELoss(predictionArray,
+                                                      targetArray,
+                                                      batchSize,
+                                                      outputSize));
            manualResult.resize({1, 1});
-          
+
            // Compute the CELoss using Aidge::loss::CELoss function

-            const Tensor functionResult = loss::CELoss(predictionTensor, targetTensor);
+            const Tensor functionResult =
+                loss::CELoss(predictionTensor, targetTensor);

            // Compare results

-            Log::info( " CELoss = {} {} ", manualResult.get<float>(0), functionResult.get<float>(0));
-            REQUIRE(approxEq<float>(manualResult, functionResult));   
+            Log::info(" CELoss = {} {} ",
+                      manualResult.get<float>(0),
+                      functionResult.get<float>(0));
+            REQUIRE(approxEq<float>(manualResult, functionResult));

            // Free memory

-            delete[] predictionArray; 
-            delete[] targetArray;               
+            delete[] predictionArray;
+            delete[] targetArray;
        }
    }
 #if USE_AIDGE_BACKEND_CUDA
    SECTION("CUDA") {

-        for (std::uint16_t trial = 0; trial < NB_TRIALS; ++trial) 
-        {
+        for (std::uint16_t trial = 0; trial < NB_TRIALS; ++trial) {
            const std::size_t nbDims = 2;
            const std::size_t batchSize = dimSizeDist(gen);
            const std::size_t outputSize = dimSizeDist(gen);
@@ -155,49 +188,66 @@ TEST_CASE("[loss/classification] CELoss", "[loss][classification][CELoss]") {

            // Create the arrays/tensors

-            float* predictionArray = new float[nbElements];
+            float *predictionArray = new float[nbElements];
            for (std::size_t i = 0; i < nbElements; ++i)
                predictionArray[i] = valueDist(gen);

-            float* targetArray = new float[nbElements];
+            float *targetArray = new float[nbElements];
            for (std::size_t i = 0; i < nbElements; ++i)
                targetArray[i] = valueDist(gen);

-            std::shared_ptr<Tensor> predictionTensor = std::make_shared<Tensor>(dims);
+            std::shared_ptr<Tensor> predictionTensor =
+                std::make_shared<Tensor>(dims);
            predictionTensor->setDataType(DataType::Float32);
            predictionTensor->setBackend("cuda");

-            float* predictionArrayDevice;
-            cudaMalloc(reinterpret_cast<void **> (&predictionArrayDevice), sizeof(float) * nbElements);
-            cudaMemcpy(predictionArrayDevice, predictionArray, sizeof(float) * nbElements, cudaMemcpyHostToDevice);
-            predictionTensor->getImpl()->setRawPtr(predictionArrayDevice, nbElements);
-
-            std::shared_ptr<Tensor> targetTensor = std::make_shared<Tensor>(dims);
+            float *predictionArrayDevice;
+            cudaMalloc(reinterpret_cast<void **>(&predictionArrayDevice),
+                       sizeof(float) * nbElements);
+            cudaMemcpy(predictionArrayDevice,
+                       predictionArray,
+                       sizeof(float) * nbElements,
+                       cudaMemcpyHostToDevice);
+            predictionTensor->getImpl()->setRawPtr(predictionArrayDevice,
+                                                   nbElements);
+
+            std::shared_ptr<Tensor> targetTensor =
+                std::make_shared<Tensor>(dims);
            targetTensor->setDataType(DataType::Float32);
            targetTensor->setBackend("cuda");

-            float* targetArrayDevice;
-            cudaMalloc(reinterpret_cast<void **> (&targetArrayDevice), sizeof(float) * nbElements);
-            cudaMemcpy(targetArrayDevice, targetArray, sizeof(float) * nbElements, cudaMemcpyHostToDevice);
+            float *targetArrayDevice;
+            cudaMalloc(reinterpret_cast<void **>(&targetArrayDevice),
+                       sizeof(float) * nbElements);
+            cudaMemcpy(targetArrayDevice,
+                       targetArray,
+                       sizeof(float) * nbElements,
+                       cudaMemcpyHostToDevice);
            targetTensor->getImpl()->setRawPtr(targetArrayDevice, nbElements);

            // Compute the CELoss manually

-            Tensor manualResult = Tensor(manualCELoss(predictionArray, targetArray, batchSize, outputSize));
+            Tensor manualResult = Tensor(manualCELoss(predictionArray,
+                                                      targetArray,
+                                                      batchSize,
+                                                      outputSize));
            manualResult.resize({1, 1});
-          
+
            // Compute the CELoss using Aidge::loss::CELoss function

-            const Tensor functionResult = loss::CELoss(predictionTensor, targetTensor);
+            const Tensor functionResult =
+                loss::CELoss(predictionTensor, targetTensor);

            // Compare results

-            Log::info(" CELoss = {} {} ", manualResult.get<float>(0), functionResult.get<float>(0));
-            REQUIRE(approxEq<float>(manualResult, functionResult));   
+            Log::info(" CELoss = {} {} ",
+                      manualResult.get<float>(0),
+                      functionResult.get<float>(0));
+            REQUIRE(approxEq<float>(manualResult, functionResult));

            // Free memory

-            delete[] predictionArray; 
+            delete[] predictionArray;
            delete[] targetArray;
            cudaFree(predictionArrayDevice);
            cudaFree(targetArrayDevice);
@@ -205,4 +255,4 @@ TEST_CASE("[loss/classification] CELoss", "[loss][classification][CELoss]") {
    }
 #endif
 }
-}  // namespace Aidge
+} // namespace Aidge