diff --git a/include/aidge/backend/cpu/operator/AddImpl.hpp b/include/aidge/backend/cpu/operator/AddImpl.hpp index 5e795922a67be178dde588e8e5e346ec268efe86..331bd10b16b9089649f696b0da2ff136fb85abfc 100644 --- a/include/aidge/backend/cpu/operator/AddImpl.hpp +++ b/include/aidge/backend/cpu/operator/AddImpl.hpp @@ -12,8 +12,8 @@ #ifndef AIDGE_CPU_OPERATOR_ADDIMPL_H_ #define AIDGE_CPU_OPERATOR_ADDIMPL_H_ -#include <cstddef> // std::size_t -#include <memory> // std::unique_ptr, std::make_unique +#include <cstddef> // std::size_t +#include <memory> // std::unique_ptr, std::make_unique #include <string> #include <vector> @@ -24,11 +24,17 @@ namespace Aidge { // Operator implementation entry point for the backend -using AddImpl_cpu = OperatorImpl_cpu<Add_Op, - void(const std::vector<const void*>, const std::vector<std::vector<std::size_t>>&, const std::size_t, const std::vector<std::size_t>&, void*)>; +using AddImpl_cpu = + OperatorImpl_cpu<Add_Op, + void(const std::vector<const void *>, + const std::vector<std::vector<std::size_t>> &, + const std::size_t, + const std::vector<std::size_t> &, + void *), + void()>; // Implementation entry point registration to Operator REGISTRAR(Add_Op, "cpu", Aidge::AddImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_ADDIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/AddImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AddImpl_kernels.hpp index 4a4ba2a8999c4dc33fc743b5a3a7dad023f9e0dd..fe1e5e0ac5f280daf894de0bcc00cdcd4326fcab 100644 --- a/include/aidge/backend/cpu/operator/AddImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AddImpl_kernels.hpp @@ -14,7 +14,7 @@ #include "aidge/utils/Registrar.hpp" -#include <cstdint> // std::int32_t, std::int64_t +#include <cstdint> // std::int32_t, std::int64_t #include "aidge/backend/cpu/data/Broadcasting.hpp" #include "aidge/backend/cpu/operator/AddImpl.hpp" @@ -22,38 +22,56 @@ namespace Aidge { template <class I, class O> -void AddImpl_cpu_forward_kernel(const std::vector<const void*> inputs_, const std::vector<std::vector<std::size_t>>& inputDims, const std::size_t outputLength, const std::vector<std::size_t>& outDims, void* output_) { +void AddImpl_cpu_forward_kernel( + const std::vector<const void *> inputs_, + const std::vector<std::vector<std::size_t>> &inputDims, + const std::size_t outputLength, + const std::vector<std::size_t> &outDims, + void *output_) { // FIXME: missing Add attributes as arguments - std::vector<const I*> inputs; - for (const auto& input_ : inputs_) { - inputs.push_back(static_cast<const I*>(input_)); + std::vector<const I *> inputs; + for (const auto &input_ : inputs_) { + inputs.push_back(static_cast<const I *>(input_)); } - O* output = static_cast<O*>(output_); + O *output = static_cast<O *>(output_); - for (std::size_t oIndex = 0; oIndex < outputLength; ++oIndex) - { + for (std::size_t oIndex = 0; oIndex < outputLength; ++oIndex) { output[oIndex] = 0; - std::vector<size_t> indexes = getMultiDimIndices(outDims, oIndex); - for(std::size_t iIndex = 0; iIndex < inputs.size(); ++iIndex) { - std::size_t idx = getFlattenedIndex(inputDims[iIndex], indexes); + std::vector<size_t> indexes = getMultiDimIndices(outDims, oIndex); + for (std::size_t iIndex = 0; iIndex < inputs.size(); ++iIndex) { + std::size_t idx = getFlattenedIndex(inputDims[iIndex], indexes); output[oIndex] += inputs[iIndex][idx]; - } - } + } + } +} + +template <class I, class O> void AddImpl_cpu_backward_kernel() { + Log::debug("Do nothing"); } // Kernels registration to implementation entry point REGISTRAR(AddImpl_cpu, - {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}}, - {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<float, float>, nullptr}); + {ImplSpec::IOSpec{DataType::Any}, + ImplSpec::IOSpec{DataType::Float32}}, + {ProdConso::inPlaceModel, + Aidge::AddImpl_cpu_forward_kernel<float, float>, + Aidge::AddImpl_cpu_backward_kernel<float, float>}); REGISTRAR(AddImpl_cpu, - {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}}, - {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<double, double>, nullptr}); + {ImplSpec::IOSpec{DataType::Any}, + ImplSpec::IOSpec{DataType::Float64}}, + {ProdConso::inPlaceModel, + Aidge::AddImpl_cpu_forward_kernel<double, double>, + Aidge::AddImpl_cpu_backward_kernel<double, double>}); REGISTRAR(AddImpl_cpu, - {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}}, - {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr}); + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}}, + {ProdConso::inPlaceModel, + Aidge::AddImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, + Aidge::AddImpl_cpu_backward_kernel<std::int32_t, std::int32_t>}); REGISTRAR(AddImpl_cpu, - {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int64}}, - {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, nullptr}); -} // namespace Aidge + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int64}}, + {ProdConso::inPlaceModel, + Aidge::AddImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, + Aidge::AddImpl_cpu_backward_kernel<std::int64_t, std::int64_t>}); +} // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_ADDIMPL_CPU_KERNELS_H_ */ \ No newline at end of file +#endif /* AIDGE_CPU_OPERATOR_ADDIMPL_CPU_KERNELS_H_ */ diff --git a/output b/output new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/operator/AddImpl.cpp b/src/operator/AddImpl.cpp index 457a0b17e531fac35ff873f9eedca7bbbe82d459..d674aa11937bea5299f0b65e993c52fc10b06b1d 100644 --- a/src/operator/AddImpl.cpp +++ b/src/operator/AddImpl.cpp @@ -68,5 +68,27 @@ void Aidge::AddImpl_cpu::forward() { template <> void Aidge::AddImpl_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Add_Op on backend cpu"); + const Add_Op& op = static_cast<const Add_Op&>(mOp); + + // Check inputs + AIDGE_ASSERT(op.getInput(0), "missing input in Add operator"); + AIDGE_ASSERT(op.getInput(0)->hasImpl(), "cannot run Add forward because the 0-th input has no implementation."); + + DataType datatypeFirstInput = op.getInput(0)->dataType(); + for (IOIndex_t i = 1; i < op.nbInputs(); ++i) { + AIDGE_ASSERT(op.getInput(i), "missing input in Add operator"); + AIDGE_ASSERT(op.getInput(i)->hasImpl(), "cannot run Add forward because the {}-th input has no implementation.", i); + AIDGE_ASSERT(op.getInput(i)->dataType() == datatypeFirstInput, "Cannot add inputs with two differents data type."); + } + + Log::debug("Trying to find a kernel function"); + Log::info("Add node output grad : {}", op.getOutput(0)->grad()->toString()); + + const auto impl = Registrar<AddImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + Log::debug("Kernel function found !"); + + impl.backward(); + + //AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Add_Op on backend cpu"); } diff --git a/src/operator/MulImpl.cpp b/src/operator/MulImpl.cpp index ea5e3d3ab8ac24934a0cb6f9042858fa094700af..79f90f0e77ac4bb0fadad5409133e5049f0f8f34 100644 --- a/src/operator/MulImpl.cpp +++ b/src/operator/MulImpl.cpp @@ -52,6 +52,8 @@ void Aidge::MulImpl_cpu::backward() { auto in1grad = op_.getInput(1)->grad(); auto out0grad = op_.getOutput(0)->grad(); + Log::info("Mul node output grad 0 : {}", op_.getOutput(0)->grad()->toString()); + // Find the correct kernel type const auto impl = Registrar<MulImpl_cpu>::create(getBestMatch(getRequiredSpec())); diff --git a/unit_tests/operator/Test_MetaOperator.cpp b/unit_tests/operator/Test_MetaOperator.cpp index a203d3d29f7d135eb1f63223b48ccc5a5dfd49f8..a82d71bd4b4e923b296a99cec1ce35cb6434721b 100644 --- a/unit_tests/operator/Test_MetaOperator.cpp +++ b/unit_tests/operator/Test_MetaOperator.cpp @@ -611,8 +611,8 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { scheduler.forward(true); scheduler.saveSchedulingDiagram("stack_scheduler_seq"); - op->getOutput(0)->print(); - myHiddenState->print(); + //op->getOutput(0)->print(); + //myHiddenState->print(); REQUIRE(true); } @@ -624,6 +624,7 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { constexpr auto IN_CHANNELS = 28*28; constexpr auto OUT_CHANNELS = 10; Log::info("Stack tests"); + Log::debug("Stack tests"); auto pop = Pop(); @@ -639,13 +640,13 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { myInput->setBackend("cpu"); myInput->setDataType(DataType::Float32); - auto leaky = Leaky(3, 0.5, "leaky"); + auto leaky = Leaky(2, 0.5, "leaky"); auto op = std::dynamic_pointer_cast<MetaOperator_Op>(leaky->getOperator()); //pop->addChild(op->getMicroGraph()->getOrderedInputs()[0].first, 0, 0); pop->addChild(leaky,0,0); leaky->addChild(sta, 0,0); - auto memorizeInit = getCpuFloat32Tensor({1, 10},0); + auto memorizeInit = getCpuFloat32Tensor({1, 10},1); auto input = getCpuFloat32Tensor({1,10},0.5); //op->associateInput(0, input); @@ -657,9 +658,90 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { scheduler.forward(); scheduler.saveSchedulingDiagram("scheddiagram"); REQUIRE(true); + + Log::info("\n\n***** Running backward *****\n\n"); + scheduler.backward(); Log::info("Done!\n\n"); + /* + auto pop = Pop(); + //auto sta = Pop(); + auto sta = stack(2); + auto relu = ReLU("reluOp"); + auto myInput = std::make_shared<Tensor>( + Array3D<float, 2, 1, 10>{ + {{{0.0,1,2,3,4,5,6,7,8,9}}, + {{0.0,1,2,3,4,5,6,7,8,9}} + }} + ); + myInput->setBackend("cpu"); + myInput->setDataType(DataType::Float32); + + //auto leaky = Leaky(3, 0.5, "leaky"); + auto op = std::dynamic_pointer_cast<MetaOperator_Op>(relu->getOperator()); + //pop->addChild(op->getMicroGraph()->getOrderedInputs()[0].first, 0, 0); + + pop->addChild(relu,0,0); + relu->addChild(sta, 0,0); + auto memorizeInit = getCpuFloat32Tensor({1, 10},0); + auto input = getCpuFloat32Tensor({1,10},0.5); + + //op->associateInput(0, input); + pop->getOperator()->associateInput(0, myInput); + + auto gv = getCpuFloat32GV(relu); + auto scheduler = SequentialScheduler(gv); + scheduler.forward(); + scheduler.saveSchedulingDiagram("scheddiagram"); + REQUIRE(true); + Log::info("Done!\n\n"); + */ + + } + + SECTION("Integrated") + { + // TODO: Add a pop and a stack node + constexpr auto BETA = 0.95; + constexpr auto IN_CHANNELS = 28*28; + constexpr auto OUT_CHANNELS = 10; + Log::info("Stack tests"); + Log::debug("Stack tests"); + + + auto myInput = std::make_shared<Tensor>( + Array3D<float, 2, 1, 10>{ + {{{0.0,1,2,3,4,5,6,7,8,9}}, + {{0.0,1,2,3,4,5,6,7,8,9}} + }} + ); + myInput->setBackend("cpu"); + myInput->setDataType(DataType::Float32); + + auto inte = Decay(2, "decay"); + auto op = std::dynamic_pointer_cast<MetaOperator_Op>(inte->getOperator()); + //pop->addChild(op->getMicroGraph()->getOrderedInputs()[0].first, 0, 0); + + auto memorizeInit = getCpuFloat32Tensor({1, 10},1); + auto input = getCpuFloat32Tensor({1,10},0.5); + + //op->associateInput(0, input); + inte->getOperator()->associateInput(0, myInput); + inte->getOperator()->associateInput(1, memorizeInit); + + auto gv = getCpuFloat32GV(inte); + auto scheduler = SequentialScheduler(gv); + scheduler.forward(); + Log::info("Done running forward"); + op->getOutput(0)->print(); + scheduler.saveSchedulingDiagram("scheddiagram"); + REQUIRE(true); + + Log::info("\n\n***** Running backward *****\n\n"); + scheduler.backward(); + Log::info("Done!\n\n"); + /* auto pop = Pop(); //auto sta = Pop();