Skip to content
Snippets Groups Projects
Commit fa4bf3b7 authored by Jerome Hue's avatar Jerome Hue
Browse files

Add Multi Step CE Loss

parent fed5d9d0
No related branches found
No related tags found
2 merge requests!530.4.0,!49Add Multi Step CE Loss
Pipeline #76827 passed
......@@ -49,6 +49,9 @@ Tensor KD(std::shared_ptr<Tensor>& student_prediction,
Tensor CELoss(std::shared_ptr<Tensor>& prediction,
const std::shared_ptr<Tensor>& target);
Tensor multiStepCELoss(std::shared_ptr<Tensor>& prediction,
const std::shared_ptr<Tensor>& target, std::uint32_t nbTimeSteps);
} // namespace loss
} // namespace Aidge
......
......@@ -27,5 +27,6 @@ void init_Loss(py::module &m)
m_loss.def("BCE", &loss::BCE, py::arg("graph"), py::arg("target"));
m_loss.def("CELoss", &loss::CELoss, py::arg("graph"), py::arg("target"));
m_loss.def("KD", &loss::KD, py::arg("student_prediction"), py::arg("teacher_prediction"), py::arg("temperature") = 2.0f);
m_loss.def("multiStepCELoss", &loss::multiStepCELoss, py::arg("graph"), py::arg("target"), py::arg("nbTimeSteps"));
}
} // namespace Aidge
......@@ -9,6 +9,7 @@
*
********************************************************************************/
#include <aidge/backend/cpu/data/GetCPUPtr.h>
#include <memory>
#include <numeric> // std::iota
......@@ -32,9 +33,9 @@
Aidge::Tensor Aidge::loss::CELoss(std::shared_ptr<Tensor>& prediction,
const std::shared_ptr<Tensor>& target)
const std::shared_ptr<Tensor>& target)
{
AIDGE_ASSERT(prediction->nbDims() == 2,
AIDGE_ASSERT(prediction->nbDims() == 2,
"Label must have two dims: [BatchSize, NbChannel]");
AIDGE_ASSERT(prediction->backend() == target->backend(),
"'prediction' and 'target' Tensors must be on the "
......@@ -113,11 +114,32 @@ Aidge::Tensor Aidge::loss::CELoss(std::shared_ptr<Tensor>& prediction,
scalar.setBackend(backend);
scalar.setDataType(dataType);
(*err) = (*err) * scalar;
prediction->setGrad(err);
// Return the loss value
return (*lossTensor);
}
Aidge::Tensor Aidge::loss::multiStepCELoss(std::shared_ptr<Aidge::Tensor>& prediction, const std::shared_ptr<Tensor>& target, std::uint32_t nbTimeSteps)
{
AIDGE_ASSERT(prediction->dataType() == target->dataType(), "Prediction and Target needs to be of the same type");
auto loss = Tensor(prediction->dataType());
loss.setBackend("cpu");
loss.resize({1});
loss.zeros();
for (std::size_t i = 0; i < nbTimeSteps; ++i) {
auto predTimeStep = std::make_shared<Tensor>(prediction->extract({i}).clone());
loss += loss::CELoss(predTimeStep, target);
const auto nbGradElements = predTimeStep->grad()->size();
const auto offset = i * nbGradElements;
return (*lossTensor);
}
\ No newline at end of file
prediction->grad()->getImpl()->copy(predTimeStep->grad()->getImpl()->rawPtr(), nbGradElements, offset);
}
return loss;
}
......@@ -9,21 +9,40 @@
*
********************************************************************************/
#include <aidge/backend/cpu/data/GetCPUPtr.h>
#include <aidge/data/DataType.hpp>
#include <aidge/graph/GraphView.hpp>
#include <aidge/scheduler/SequentialScheduler.hpp>
#include <algorithm>
#include <catch2/catch_test_macros.hpp>
#include <cstddef> // std::size_t
#include <cmath> //
#include <functional> // std::multiplies, std::plus
#include <memory> // std::make_unique
#include <numeric> // std::accumulate
#include <random> // std::random_device, std::mt19937,
// std::uniform_int_distribution
#include <cmath> //
#include <cstddef> // std::size_t
#include <cstdlib>
#include <functional> // std::multiplies, std::plus
#include <memory> // std::make_unique
#include <numeric> // std::accumulate
#include <random> // std::random_device, std::mt19937,
// std::uniform_int_distribution
#include <vector>
#include "aidge/loss/LossList.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/graph/Context.hpp"
#include "aidge/learning/learningRate/LRSchedulerList.hpp"
#include "aidge/learning/optimizer/SGD.hpp"
#include "aidge/loss/LossList.hpp"
#include "aidge/utils/TensorUtils.hpp"
#include "aidge/recipes/Recipes.hpp"
#include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
#include <aidge/backend/cpu/operator/FCImpl.hpp>
#include <aidge/backend/cpu/operator/HeavisideImpl.hpp>
#include <aidge/operator/FC.hpp>
#include <aidge/operator/Heaviside.hpp>
#include <aidge/operator/MetaOperatorDefs.hpp>
#include <aidge/operator/Pop.hpp>
#include <aidge/operator/Stack.hpp>
#if USE_AIDGE_BACKEND_CUDA
#include "aidge/backend/cuda/operator/SoftmaxImpl.hpp"
#endif
......@@ -32,8 +51,10 @@ namespace Aidge {
// Utility that compute the CELoss manually
static float manualCELoss(float *predictionArray, float *targetArray, std::size_t batchSize, std::size_t outputSize)
{
static float manualCELoss(float *predictionArray,
float *targetArray,
std::size_t batchSize,
std::size_t outputSize) {
const std::size_t nbElements = batchSize * outputSize;
float *softmaxArray = new float[nbElements];
......@@ -49,25 +70,25 @@ static float manualCELoss(float *predictionArray, float *targetArray, std::size_
}
}
float* productArray = new float[nbElements];
float *productArray = new float[nbElements];
for (std::size_t i = 0; i < nbElements; ++i)
productArray[i] = targetArray[i] * std::log(softmaxArray[i]);
float* sumArray = new float[batchSize];
float *sumArray = new float[batchSize];
for (std::size_t i = 0; i < batchSize; ++i) {
float acc = 0;
for (std::size_t j = 0; j < outputSize; ++j)
acc += productArray[i * outputSize + j];
sumArray[i] = acc;
}
float mean = 0;
for (std::size_t i = 0; i < batchSize; ++i)
mean += sumArray[i] / static_cast<float> (batchSize);
mean += sumArray[i] / static_cast<float>(batchSize);
delete[] softmaxArray;
delete[] productArray;
delete[] sumArray;
delete[] softmaxArray;
delete[] productArray;
delete[] sumArray;
return -mean;
}
......@@ -82,12 +103,12 @@ TEST_CASE("[loss/classification] CELoss", "[loss][classification][CELoss]") {
std::mt19937 gen(100);
std::uniform_real_distribution<float> valueDist(-2, 2);
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(1), std::size_t(8));
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(1),
std::size_t(8));
SECTION("CPU") {
for (std::uint16_t trial = 0; trial < NB_TRIALS; ++trial)
{
for (std::uint16_t trial = 0; trial < NB_TRIALS; ++trial) {
const std::size_t nbDims = 2;
const std::size_t batchSize = dimSizeDist(gen);
const std::size_t outputSize = dimSizeDist(gen);
......@@ -100,49 +121,61 @@ TEST_CASE("[loss/classification] CELoss", "[loss][classification][CELoss]") {
// Create the data array/tensors
float* predictionArray = new float[nbElements];
float *predictionArray = new float[nbElements];
for (std::size_t i = 0; i < nbElements; ++i)
predictionArray[i] = valueDist(gen);
float* targetArray = new float[nbElements];
float *targetArray = new float[nbElements];
for (std::size_t i = 0; i < nbElements; ++i)
targetArray[i] = valueDist(gen);
std::shared_ptr<Tensor> predictionTensor = std::make_shared<Tensor>(dims);
std::shared_ptr<Tensor> predictionTensor =
std::make_shared<Tensor>(dims);
predictionTensor->setBackend("cpu");
predictionTensor->setDataType(DataType::Float32);
predictionTensor->getImpl()->setRawPtr(predictionArray, nbElements);
predictionTensor->getImpl()->setRawPtr(predictionArray,
nbElements);
Log::info("Prediction");
predictionTensor->print();
std::shared_ptr<Tensor> targetTensor = std::make_shared<Tensor>(dims);
std::shared_ptr<Tensor> targetTensor =
std::make_shared<Tensor>(dims);
targetTensor->setBackend("cpu");
targetTensor->setDataType(DataType::Float32);
targetTensor->getImpl()->setRawPtr(targetArray, nbElements);
Log::info("Target");
targetTensor->print();
// Compute the CELoss manually
Tensor manualResult = Tensor(manualCELoss(predictionArray, targetArray, batchSize, outputSize));
Tensor manualResult = Tensor(manualCELoss(predictionArray,
targetArray,
batchSize,
outputSize));
manualResult.resize({1, 1});
// Compute the CELoss using Aidge::loss::CELoss function
const Tensor functionResult = loss::CELoss(predictionTensor, targetTensor);
const Tensor functionResult =
loss::CELoss(predictionTensor, targetTensor);
// Compare results
Log::info( " CELoss = {} {} ", manualResult.get<float>(0), functionResult.get<float>(0));
REQUIRE(approxEq<float>(manualResult, functionResult));
Log::info(" CELoss = {} {} ",
manualResult.get<float>(0),
functionResult.get<float>(0));
REQUIRE(approxEq<float>(manualResult, functionResult));
// Free memory
delete[] predictionArray;
delete[] targetArray;
delete[] predictionArray;
delete[] targetArray;
}
}
#if USE_AIDGE_BACKEND_CUDA
SECTION("CUDA") {
for (std::uint16_t trial = 0; trial < NB_TRIALS; ++trial)
{
for (std::uint16_t trial = 0; trial < NB_TRIALS; ++trial) {
const std::size_t nbDims = 2;
const std::size_t batchSize = dimSizeDist(gen);
const std::size_t outputSize = dimSizeDist(gen);
......@@ -155,49 +188,66 @@ TEST_CASE("[loss/classification] CELoss", "[loss][classification][CELoss]") {
// Create the arrays/tensors
float* predictionArray = new float[nbElements];
float *predictionArray = new float[nbElements];
for (std::size_t i = 0; i < nbElements; ++i)
predictionArray[i] = valueDist(gen);
float* targetArray = new float[nbElements];
float *targetArray = new float[nbElements];
for (std::size_t i = 0; i < nbElements; ++i)
targetArray[i] = valueDist(gen);
std::shared_ptr<Tensor> predictionTensor = std::make_shared<Tensor>(dims);
std::shared_ptr<Tensor> predictionTensor =
std::make_shared<Tensor>(dims);
predictionTensor->setDataType(DataType::Float32);
predictionTensor->setBackend("cuda");
float* predictionArrayDevice;
cudaMalloc(reinterpret_cast<void **> (&predictionArrayDevice), sizeof(float) * nbElements);
cudaMemcpy(predictionArrayDevice, predictionArray, sizeof(float) * nbElements, cudaMemcpyHostToDevice);
predictionTensor->getImpl()->setRawPtr(predictionArrayDevice, nbElements);
std::shared_ptr<Tensor> targetTensor = std::make_shared<Tensor>(dims);
float *predictionArrayDevice;
cudaMalloc(reinterpret_cast<void **>(&predictionArrayDevice),
sizeof(float) * nbElements);
cudaMemcpy(predictionArrayDevice,
predictionArray,
sizeof(float) * nbElements,
cudaMemcpyHostToDevice);
predictionTensor->getImpl()->setRawPtr(predictionArrayDevice,
nbElements);
std::shared_ptr<Tensor> targetTensor =
std::make_shared<Tensor>(dims);
targetTensor->setDataType(DataType::Float32);
targetTensor->setBackend("cuda");
float* targetArrayDevice;
cudaMalloc(reinterpret_cast<void **> (&targetArrayDevice), sizeof(float) * nbElements);
cudaMemcpy(targetArrayDevice, targetArray, sizeof(float) * nbElements, cudaMemcpyHostToDevice);
float *targetArrayDevice;
cudaMalloc(reinterpret_cast<void **>(&targetArrayDevice),
sizeof(float) * nbElements);
cudaMemcpy(targetArrayDevice,
targetArray,
sizeof(float) * nbElements,
cudaMemcpyHostToDevice);
targetTensor->getImpl()->setRawPtr(targetArrayDevice, nbElements);
// Compute the CELoss manually
Tensor manualResult = Tensor(manualCELoss(predictionArray, targetArray, batchSize, outputSize));
Tensor manualResult = Tensor(manualCELoss(predictionArray,
targetArray,
batchSize,
outputSize));
manualResult.resize({1, 1});
// Compute the CELoss using Aidge::loss::CELoss function
const Tensor functionResult = loss::CELoss(predictionTensor, targetTensor);
const Tensor functionResult =
loss::CELoss(predictionTensor, targetTensor);
// Compare results
Log::info(" CELoss = {} {} ", manualResult.get<float>(0), functionResult.get<float>(0));
REQUIRE(approxEq<float>(manualResult, functionResult));
Log::info(" CELoss = {} {} ",
manualResult.get<float>(0),
functionResult.get<float>(0));
REQUIRE(approxEq<float>(manualResult, functionResult));
// Free memory
delete[] predictionArray;
delete[] predictionArray;
delete[] targetArray;
cudaFree(predictionArrayDevice);
cudaFree(targetArrayDevice);
......@@ -205,4 +255,4 @@ TEST_CASE("[loss/classification] CELoss", "[loss][classification][CELoss]") {
}
#endif
}
} // namespace Aidge
} // namespace Aidge
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment