diff --git a/.gitlab/ci/build.gitlab-ci.yml b/.gitlab/ci/build.gitlab-ci.yml index c0b72d3e179b696b3776de7444adca263ab58c27..39b6ace150d146082045e820953236db559393d3 100644 --- a/.gitlab/ci/build.gitlab-ci.yml +++ b/.gitlab/ci/build.gitlab-ci.yml @@ -15,7 +15,6 @@ build:ubuntu_cpp: # aidge_backend_cpu - DEPENDENCY_NAME="aidge_backend_cpu" - !reference [.download_dependency, script] - # Build current module - export CMAKE_PREFIX_PATH=../install_cpp - mkdir -p build_cpp diff --git a/CHANGELOG b/CHANGELOG index d6594bc686a7c8c0e244c77ef7c69496d0eb8643..40caa5a799c6904df84cf2e0b3fc38eb9caf6683 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,8 @@ +# Version 0.1.1 (May 14, 2024) + +* Fix loss function to return Tensor with gradient +* Add Python binding for the loss function + # Version 0.1.0 (April 4, 2024) Initial release diff --git a/include/aidge/learning/optimizer/Optimizer.hpp b/include/aidge/learning/optimizer/Optimizer.hpp index 9e621875beb1cfd58bf8474753c536b8c4e5183c..195d64965d3ba4eb89c9c4d0ca2155cb719f76f3 100644 --- a/include/aidge/learning/optimizer/Optimizer.hpp +++ b/include/aidge/learning/optimizer/Optimizer.hpp @@ -49,7 +49,7 @@ public: virtual void setParameters(const std::vector<std::shared_ptr<Tensor>>& parameters) { mParameters = parameters; for (const auto& param : parameters) { - param->initGradient(); // create gradient and set it to zeros + param->initGrad(); // create gradient and set it to zeros } } diff --git a/include/aidge/loss/LossList.hpp b/include/aidge/loss/LossList.hpp index e65123dde897610f82ca876f1260a165b785e33f..5a0241d9816becbaace75185e796c5ec7c787e89 100644 --- a/include/aidge/loss/LossList.hpp +++ b/include/aidge/loss/LossList.hpp @@ -20,10 +20,19 @@ namespace Aidge { namespace loss { -Tensor MSE(const std::shared_ptr<Tensor>& prediction, +/** + * @brief Compute the Mean Square Error loss. + * This function returns the loss and set the ``grad()`` of the prediction + * input. + * @param prediction Tensor returned by the Aidge Graph, it is important that + * this tensor is not a copy as overwise the backward function will not have a + * gradient to start. + * @param target Tensor representing the ground truth, it must be one hot encoded. + */ +Tensor MSE(std::shared_ptr<Tensor>& prediction, const std::shared_ptr<Tensor>& target); -} // loss -} // namespace Aidge +} // namespace loss +} // namespace Aidge #endif /* AIDGE_CORE_LOSS_LOSSLIST_H_ */ diff --git a/python_binding/learning/loss/pybind_Loss.cpp b/python_binding/learning/loss/pybind_Loss.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5e3c3af23cb81effc87888f91ac108f8b1cfd61a --- /dev/null +++ b/python_binding/learning/loss/pybind_Loss.cpp @@ -0,0 +1,27 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <pybind11/pybind11.h> + +#include "aidge/data/Tensor.hpp" +#include "aidge/graph/GraphView.hpp" +#include "aidge/loss/LossList.hpp" + +namespace py = pybind11; + +namespace Aidge { + +void init_Loss(py::module &m) { + auto m_loss = + m.def_submodule("loss", "Submodule dedicated to loss functions"); + m_loss.def("MSE", &loss::MSE, py::arg("graph"), py::arg("target")); +} +} // namespace Aidge diff --git a/python_binding/pybind_learning.cpp b/python_binding/pybind_learning.cpp index c93884e318847121d00504a6b5602f5a1eaea910..3b4a16ceffb0db7bd7e1d407bcef5d5df830cb2f 100644 --- a/python_binding/pybind_learning.cpp +++ b/python_binding/pybind_learning.cpp @@ -16,12 +16,13 @@ namespace py = pybind11; namespace Aidge { // namespace learning { +void init_Loss(py::module&); void init_Optimizer(py::module&); void init_SGD(py::module&); - void init_LRScheduler(py::module&); void init_Aidge(py::module& m) { + init_Loss(m); init_Optimizer(m); init_SGD(m); diff --git a/src/loss/regression/MSE.cpp b/src/loss/regression/MSE.cpp index 3245fcdbce33c5966c18bf56579eb18b2aa790bd..87f685a0f550a1cb60563503447407f70868ce9a 100644 --- a/src/loss/regression/MSE.cpp +++ b/src/loss/regression/MSE.cpp @@ -9,8 +9,6 @@ * ********************************************************************************/ -#include "aidge/loss/LossList.hpp" - #include <memory> #include <numeric> // std::iota @@ -20,54 +18,95 @@ #include "aidge/data/Tensor.hpp" #include "aidge/graph/GraphView.hpp" #include "aidge/graph/OpArgs.hpp" +#include "aidge/loss/LossList.hpp" #include "aidge/operator/OperatorTensor.hpp" #include "aidge/operator/Pow.hpp" #include "aidge/operator/ReduceMean.hpp" #include "aidge/operator/Sub.hpp" +#include "aidge/recipes/GraphViewHelper.hpp" #include "aidge/scheduler/Scheduler.hpp" #include "aidge/scheduler/SequentialScheduler.hpp" -Aidge::Tensor Aidge::loss::MSE(const std::shared_ptr<Tensor>& prediction, const std::shared_ptr<Tensor>& target) { +Aidge::Tensor Aidge::loss::MSE(std::shared_ptr<Tensor>& prediction, + const std::shared_ptr<Tensor>& target) { + /* + Implementation note: + MSE is computed using a graph in order to not be backend dependant. + + The graph used is the following: + + pred->Sub + label->Sub + Sub->Pow + (2)->Pow->ReduceMean->Loss + Sub->Mul + (2/NbBatch)->Mul->Gradient + */ + + prediction->initGrad(); // Enable gradient for output + + // compile_gradient(graph); // Warning compile gradient here, without + // // it, grad is nullptr. Maybe we can find a better + // // place to do so ? + + AIDGE_ASSERT(target->dims().size() == 2, + "Label must have two dims: [BatchSize, NbChannel]"); + + std::shared_ptr<Tensor> outputGrad = prediction->grad(); + AIDGE_ASSERT(prediction->backend() == target->backend(), - "'prediction' and 'target' Tensors must be on the same backend. Found {} and {}.\n", - prediction->backend(), - target->backend()); + "'prediction' and 'target' Tensors must be on the " + "same backend. Found {} and {}.\n", + prediction->backend(), target->backend()); AIDGE_ASSERT(prediction->dims() == target->dims(), - "'prediction' (shape {}) and 'target' (shape {}) Tensors must have the same dimensions.\n", - prediction->dims(), - target->dims()); + "'prediction' (shape {}) and 'target' (shape {}) Tensors must " + "have the same dimensions.\n", + prediction->dims(), target->dims()); AIDGE_ASSERT(prediction->dataType() == target->dataType(), - "'prediction' (shape {}) and 'target' (shape {}) Tensors must have the same dimensions.\n", - prediction->dims(), - target->dims()); + "'prediction' (data type {}) and 'target' (data type {}) " + "Tensors must have the same data type.\n", + prediction->dataType(), target->dataType()); // could be accelerated with constexpr constructors std::vector<int> axes_dims(prediction->nbDims()); std::iota(std::begin(axes_dims), std::end(axes_dims), 0); auto rm_node = ReduceMean(axes_dims, 1, "mse_res"); - const std::shared_ptr<Node> pow_node = Pow(); - const std::shared_ptr<Node> pow_exp_node = Producer(std::make_shared<Tensor>(Array1D<int,1>{{2}})); + const std::shared_ptr<Node> pow_node = Pow("square"); + const std::shared_ptr<Node> pow_exp_node = + Producer(std::make_shared<Tensor>(Array1D<int, 1>{{2}}), "exp_val"); pow_exp_node->addChild(pow_node, 0, 1); - const std::shared_ptr<Node> sub_node = Sub(); - Producer(prediction)->addChild(sub_node, 0, 0); - Producer(target)->addChild(sub_node, 0, 1); + const std::shared_ptr<Node> sub_node = Sub("err"); + Producer(prediction, "pred")->addChild(sub_node, 0, 0); + Producer(target, "label")->addChild(sub_node, 0, 1); + + const std::shared_ptr<Node> mul_node = Mul("gradient"); + // Note: this assume target is [nbBatch, nbChan] + Producer(std::make_shared<Tensor>( + Array1D<float, 1>{{2 / float(target->dims()[0])}})) + ->addChild(mul_node, 0, 1); + sub_node->addChild(mul_node, 0, 0); // Error computation branch ! - std::shared_ptr<GraphView> gv_local = Sequential({ - sub_node, - pow_node, - rm_node - }); - gv_local->add({sub_node->getParent(0), sub_node->getParent(1), pow_exp_node}); + std::shared_ptr<GraphView> gv_local = + Sequential({sub_node, pow_node, rm_node}); + gv_local->add({sub_node->getParent(0), sub_node->getParent(1), pow_exp_node, + mul_node->getParent(1), mul_node}); gv_local->compile(prediction->getImpl()->backend(), prediction->dataType()); - gv_local->save("MSEgraph"); + SequentialScheduler ss_local{gv_local}; ss_local.forward(false); + // Retrieve gradient + // Can we avoid copy ? + outputGrad->copyFrom( + std::dynamic_pointer_cast<OperatorTensor>(mul_node->getOperator()) + ->getOutput(0) + ->clone()); + // TODO: way too complicated to access - const std::shared_ptr<OperatorTensor> res = std::dynamic_pointer_cast<OperatorTensor>(rm_node->getOperator()); + const std::shared_ptr<OperatorTensor> res = + std::dynamic_pointer_cast<OperatorTensor>(rm_node->getOperator()); return res->getOutput(0)->clone(); - } diff --git a/unit_tests/loss/regression/Test_MSE.cpp b/unit_tests/loss/regression/Test_MSE.cpp index 3899470b5f0141fc747f6a2a52cc35b41a590d49..2b0e6d1edfaa1d452c714a08c4998725331df2c3 100644 --- a/unit_tests/loss/regression/Test_MSE.cpp +++ b/unit_tests/loss/regression/Test_MSE.cpp @@ -35,9 +35,8 @@ TEST_CASE("[loss/regression] MSE", "[loss][regression][MSE]") { std::uniform_real_distribution<float> valueDist(0.0f, 1.0f); for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { - // Create a random number generator - const std::size_t nb_dims = nbDimsDist(gen); - std::vector<std::size_t> dims(nb_dims); + const std::size_t nb_dims = 2; // For MSE test, nb_dims is fixed as 2: NbBatch, NbChan + std::vector<std::size_t> dims(2); for (std::size_t i = 0; i < nb_dims; ++i) { dims[i] = dimsDist(gen); } const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); @@ -78,11 +77,11 @@ TEST_CASE("[loss/regression] MSE", "[loss][regression][MSE]") { targ_tensor->setBackend("cpu"); targ_tensor->getImpl()->setRawPtr(targ.get(), nb_elements); targ_tensor->print(); - const Tensor res_function = loss::MSE(pred_tensor, targ_tensor); + const Tensor res_function = loss::MSE(pred_tensor, targ_tensor); // compare results Tensor res_manual_tensor = Tensor(res_manual); REQUIRE(approxEq<float>(res_manual, res_function)); } } -} // namespace Aidge \ No newline at end of file +} // namespace Aidge diff --git a/unit_tests/optimizer/Test_SGD.cpp b/unit_tests/optimizer/Test_SGD.cpp index 17f946ae1630c2423a37f703c7923a40e5fe66bf..df9924d557d89d0483d018ce08951cf573e233d7 100644 --- a/unit_tests/optimizer/Test_SGD.cpp +++ b/unit_tests/optimizer/Test_SGD.cpp @@ -77,7 +77,7 @@ TEST_CASE("[learning/SGD] update", "[Optimizer][SGD]") { optim_tensors[i] = std::make_shared<Tensor>(dims); optim_tensors[i]->setBackend("cpu"); optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]); - optim_tensors[i]->initGradient(); + optim_tensors[i]->initGrad(); grad_tensors[i] = std::make_shared<Tensor>(dims); grad_tensors[i]->setBackend("cpu"); diff --git a/version.txt b/version.txt index 6e8bf73aa550d4c57f6f35830f1bcdc7a4a62f38..17e51c385ea382d4f2ef124b7032c1604845622d 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.1.0 +0.1.1