diff --git a/include/aidge/backend/cpu/operator/TanhImpl.hpp b/include/aidge/backend/cpu/operator/TanhImpl.hpp index 3e88a3d00b5829fc24d8dc77ce53cb358551c7e4..4169b1a533a8b2382644246ea295a683e6f83f1d 100644 --- a/include/aidge/backend/cpu/operator/TanhImpl.hpp +++ b/include/aidge/backend/cpu/operator/TanhImpl.hpp @@ -33,7 +33,7 @@ class TanhImplBackward_cpu class TanhImpl_cpu : public OperatorImpl { public: - TanhImpl_cpu(const Tanh_Op& op) : OperatorImpl(op) {} + TanhImpl_cpu(const Tanh_Op& op) : OperatorImpl(op, "cpu") {} static std::unique_ptr<TanhImpl_cpu> create(const Tanh_Op& op) { return std::make_unique<TanhImpl_cpu>(op); diff --git a/include/aidge/backend/cpu/operator/TransposeImpl.hpp b/include/aidge/backend/cpu/operator/TransposeImpl.hpp index 712e672752648f5ff8a3c073f6c81bbe7cc85d9d..3c6913dd71d6642d8b76198a272d64bfaba833e8 100644 --- a/include/aidge/backend/cpu/operator/TransposeImpl.hpp +++ b/include/aidge/backend/cpu/operator/TransposeImpl.hpp @@ -57,7 +57,7 @@ class TransposeImpl6DBackward_cpu class TransposeImpl2D_cpu : public OperatorImpl { public: - TransposeImpl2D_cpu(const Transpose_Op<2>& op) : OperatorImpl(op) {} + TransposeImpl2D_cpu(const Transpose_Op<2>& op) : OperatorImpl(op, "cpu") {} static std::unique_ptr<TransposeImpl2D_cpu> create(const Transpose_Op<2>& op) { return std::make_unique<TransposeImpl2D_cpu>(op); @@ -68,7 +68,7 @@ public: }; class TransposeImpl3D_cpu : public OperatorImpl { public: - TransposeImpl3D_cpu(const Transpose_Op<3>& op) : OperatorImpl(op) {} + TransposeImpl3D_cpu(const Transpose_Op<3>& op) : OperatorImpl(op, "cpu") {} static std::unique_ptr<TransposeImpl3D_cpu> create(const Transpose_Op<3>& op) { return std::make_unique<TransposeImpl3D_cpu>(op); @@ -79,7 +79,7 @@ public: }; class TransposeImpl4D_cpu : public OperatorImpl { public: - TransposeImpl4D_cpu(const Transpose_Op<4>& op) : OperatorImpl(op) {} + TransposeImpl4D_cpu(const Transpose_Op<4>& op) : OperatorImpl(op, "cpu") {} static std::unique_ptr<TransposeImpl4D_cpu> create(const Transpose_Op<4>& op) { return std::make_unique<TransposeImpl4D_cpu>(op); @@ -90,7 +90,7 @@ public: }; class TransposeImpl5D_cpu : public OperatorImpl { public: - TransposeImpl5D_cpu(const Transpose_Op<5>& op) : OperatorImpl(op) {} + TransposeImpl5D_cpu(const Transpose_Op<5>& op) : OperatorImpl(op, "cpu") {} static std::unique_ptr<TransposeImpl5D_cpu> create(const Transpose_Op<5>& op) { return std::make_unique<TransposeImpl5D_cpu>(op); @@ -101,7 +101,7 @@ public: }; class TransposeImpl6D_cpu : public OperatorImpl { public: - TransposeImpl6D_cpu(const Transpose_Op<6>& op) : OperatorImpl(op) {} + TransposeImpl6D_cpu(const Transpose_Op<6>& op) : OperatorImpl(op, "cpu") {} static std::unique_ptr<TransposeImpl6D_cpu> create(const Transpose_Op<6>& op) { return std::make_unique<TransposeImpl6D_cpu>(op); diff --git a/src/operator/AddImpl.cpp b/src/operator/AddImpl.cpp index 7355ebcb3e8fb68bf74dbd1ce831bf471d285cb7..abd40bd6af06c52945815fd6245e661710fa1127 100644 --- a/src/operator/AddImpl.cpp +++ b/src/operator/AddImpl.cpp @@ -9,17 +9,18 @@ * ********************************************************************************/ +#include "aidge/backend/cpu/operator/AddImpl.hpp" + #include <cassert> #include <numeric> // std::accumulate #include <vector> -#include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp" #include "aidge/data/Data.hpp" #include "aidge/data/Tensor.hpp" - -#include "aidge/backend/cpu/operator/AddImpl.hpp" -#include "aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp" +#include "aidge/utils/Types.h" +#include "aidge/utils/ErrorHandling.hpp" Aidge::NbElts_t Aidge::AddImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // this implementation can be in-place @@ -27,15 +28,18 @@ Aidge::NbElts_t Aidge::AddImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex } void Aidge::AddImpl_cpu::forward() { - assert(mOp.getRawInput(0) && "missing input in Add operator"); - DataType datatypeFirstInput = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(); - for (IOIndex_t i = 1; i < mOp.nbInputs(); ++i) { - assert(mOp.getRawInput(i) && "missing input in Add operator"); - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->dataType() == datatypeFirstInput); + const auto& opTensor = static_cast<const OperatorTensor&>(mOp); + AIDGE_ASSERT(opTensor.getInput(0)->hasImpl(), "cannot run Add forward because the 0-th input has no implementation."); + assert(opTensor.getInput(0) && "missing input in Add operator"); + DataType datatypeFirstInput = opTensor.getInput(0)->dataType(); + for (IOIndex_t i = 1; i < opTensor.nbInputs(); ++i) { + AIDGE_ASSERT(opTensor.getInput(i)->hasImpl(), "cannot run Add forward because the {}-th input has no implementation.", i); + assert(opTensor.getInput(i) && "missing input in Add operator"); + assert(opTensor.getInput(i)->dataType() == datatypeFirstInput); } // Find the correct kernel type - const auto outputDataType = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType(); + const auto outputDataType = opTensor.getOutput(0)->dataType(); const Registrar<AddImplForward_cpu>::registrar_key registrarKey = { datatypeFirstInput, outputDataType}; @@ -55,26 +59,26 @@ void Aidge::AddImpl_cpu::forward() { // TODO: right now, if needed, memory will be allocated/deallocated at each // call to forward(). We might put the following shared_ptr as members of // this class to avoid that. - std::size_t nbDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->nbDims(); + const std::size_t nbDims = opTensor.getOutput(0)->nbDims(); std::vector<std::vector<std::size_t>> inputsDims; std::vector<const void*> opInputs; - std::vector<std::shared_ptr<Tensor>> inputsFallback(mOp.nbInputs()); - for (IOIndex_t i = 0; i < mOp.nbInputs(); ++i) { + std::vector<std::shared_ptr<Tensor>> inputsFallback(opTensor.nbInputs()); + for (IOIndex_t i = 0; i < opTensor.nbInputs(); ++i) { std::vector<std::size_t> inputDims(nbDims, 1); - auto dims = std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->dims(); + auto dims = opTensor.getInput(i)->dims(); for(std::size_t j=dims.size()-1; j+1>0; --j) { std::size_t idx = nbDims - (dims.size()-j); inputDims[idx] = dims[j]; } inputsDims.push_back(inputDims); - const auto& input = std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->refCastFrom(inputsFallback[i], *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); + const auto& input = opTensor.getInput(i)->refCastFrom(inputsFallback[i], *opTensor.getOutput(0)); opInputs.push_back(input.getImpl()->rawPtr()); } kernelFunc(opInputs, inputsDims, - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->size(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), - getCPUPtr(mOp.getRawOutput(0))); + opTensor.getOutput(0)->size(), + opTensor.getOutput(0)->dims(), + getCPUPtr(opTensor.getRawOutput(0))); } diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp index b849142dd3abe0131fb0c6c448530a7669ce27dc..34ea7b37ec9929908192bde6f31d84ae581640a2 100644 --- a/src/operator/ConvImpl.cpp +++ b/src/operator/ConvImpl.cpp @@ -28,17 +28,19 @@ Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputI } void Aidge::ConvImpl2D_cpu::forward() { + const auto& opTensor = static_cast<const OperatorTensor&>(mOp); + // FIXME: uncomment the following code once memory handling will work assert(mOp.getRawInput(0) && "missing input #0"); assert(mOp.getRawInput(1) && "missing input #1"); assert(mOp.getRawInput(2) && "missing input #2"); // Find the correct kernel type - const auto outputDataType = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType(); + const auto outputDataType = opTensor.getOutput(0)->dataType(); const Registrar<ConvImpl2DForward_cpu>::registrar_key registrarKey = { - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(), + opTensor.getInput(0)->dataType(), + opTensor.getInput(1)->dataType(), + opTensor.getInput(2)->dataType(), outputDataType}; Registrar<ConvImpl2DForward_cpu>::registrar_type kernelFunc; @@ -57,12 +59,12 @@ void Aidge::ConvImpl2D_cpu::forward() { // call to forward(). We might put the following shared_ptr as members of // this class to avoid that. std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; - const auto& input0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->refCastFrom(input0Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); - const auto& input1 = std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->refCastFrom(input1Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); - const auto& input2 = std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->refCastFrom(input2Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); + const auto& input0 = opTensor.getInput(0)->refCastFrom(input0Fallback, *opTensor.getOutput(0)); + const auto& input1 = opTensor.getInput(1)->refCastFrom(input1Fallback, *opTensor.getOutput(0)); + const auto& input2 = opTensor.getInput(2)->refCastFrom(input2Fallback, *opTensor.getOutput(0)); // Call kernel - kernelFunc(dynamic_cast<const Conv_Op<2>&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), + kernelFunc(dynamic_cast<const Conv_Op<2>&>(mOp).getStaticAttributes(), opTensor.getInput(0)->template dims<4>(), input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(), getCPUPtr(mOp.getRawOutput(0))); } diff --git a/src/operator/DivImpl.cpp b/src/operator/DivImpl.cpp index 729aff2452b46f00eb6d3e0b558c0b3d58ea2f0e..8e2118e9e78fd364189769ead2eb01f1c55b3c58 100644 --- a/src/operator/DivImpl.cpp +++ b/src/operator/DivImpl.cpp @@ -57,17 +57,18 @@ void Aidge::DivImpl_cpu::forward() { // 3. Compute the highest number of contiguous data -> 7 // 4. Compute stride and offset step for the broadcast mechnism // 5. Call a simple kernel + const auto& opTensor = static_cast<const Div_Op&>(mOp); // Find the correct kernel type auto kernelFunc = Registrar<DivImplForward_cpu>::create({ - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + opTensor.getInput(0)->dataType(), + opTensor.getInput(1)->dataType(), + opTensor.getOutput(0)->dataType()}); // Compute compatible input dimensions - std::vector<std::size_t> dims0 = static_cast<const Div_Op&>(mOp).getInput(0)->dims(); - std::vector<std::size_t> dims1 = static_cast<const Div_Op&>(mOp).getInput(1)->dims(); - const std::vector<std::size_t>& outDims = static_cast<const Div_Op&>(mOp).getOutput(0)->dims(); + std::vector<std::size_t> dims0 = opTensor.getInput(0)->dims(); + std::vector<std::size_t> dims1 = opTensor.getInput(1)->dims(); + const std::vector<std::size_t>& outDims = opTensor.getOutput(0)->dims(); // if (dims0 == dims1) { // const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>()); @@ -108,24 +109,24 @@ void Aidge::DivImpl_cpu::forward() { const std::size_t output_contiguous_size = std::accumulate(outDims.cbegin()+contiguousIdx, outDims.cend(), std::size_t(1), std::multiplies<std::size_t>()); // initialize strides to iterate through data because of broadcasting - std::size_t *stride_post0; - std::size_t *stride_post1; + std::int32_t *stride_post0; + std::int32_t *stride_post1; std::int32_t *stride_step0; std::int32_t *stride_step1; if (contiguousIdx > 0) { - stride_post0 = new std::size_t[contiguousIdx]; + stride_post0 = new std::int32_t[contiguousIdx]; stride_post0[contiguousIdx - 1] = 1; - stride_post1 = new std::size_t[contiguousIdx]; + stride_post1 = new std::int32_t[contiguousIdx]; stride_post1[contiguousIdx - 1] = 1; for (std::size_t i = contiguousIdx - 2; i != static_cast<std::size_t>(-1); --i) { - stride_post0[i] = stride_post0[i+1]*dims0[i+1]; - stride_post1[i] = stride_post1[i+1]*dims1[i+1]; + stride_post0[i] = stride_post0[i+1]*static_cast<std::int32_t>(dims0[i+1]); + stride_post1[i] = stride_post1[i+1]*static_cast<std::int32_t>(dims1[i+1]); } stride_step0 = new std::int32_t[contiguousIdx]; stride_step1 = new std::int32_t[contiguousIdx]; for (std::size_t i = 0; i != contiguousIdx; ++i) { - stride_step0[i] = (dims0[i] == 1) ? 1 - static_cast<std::int32_t>(stride_post0[i]) : 1; - stride_step1[i] = (dims1[i] == 1) ? 1 - static_cast<std::int32_t>(stride_post1[i]) : 1; + stride_step0[i] = (dims0[i] == 1) ? 1 - stride_post0[i] : 1; + stride_step1[i] = (dims1[i] == 1) ? 1 - stride_post1[i] : 1; } } diff --git a/src/operator/ErfImpl.cpp b/src/operator/ErfImpl.cpp index 06ec65008aee41215192cd05e126ac4f82388c1b..55752e4f5b9f798a6901e108ddcba2f61fdf9774 100644 --- a/src/operator/ErfImpl.cpp +++ b/src/operator/ErfImpl.cpp @@ -9,32 +9,34 @@ * ********************************************************************************/ -#include <cassert> -#include <chrono> // std::chrono::milliseconds -#include <numeric> // std::accumulate -#include <thread> // std::this_thread::sleep_for +#include "aidge/backend/cpu/operator/ErfImpl.hpp" + +#include <memory> #include <vector> +#include "aidge/backend/cpu/operator/ErfImpl_forward_kernels.hpp" +#include "aidge/data/Tensor.hpp" #include "aidge/operator/Erf.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/operator/ErfImpl.hpp" -#include "aidge/backend/cpu/operator/ErfImpl_forward_kernels.hpp" - Aidge::NbElts_t Aidge::ErfImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // this implementation can be in-place return 0; } void Aidge::ErfImpl_cpu::forward() { + const Erf_Op& op = static_cast<const Erf_Op&>(mOp); // Find the correct kernel type auto kernelFunc = Registrar<ErfImplForward_cpu>::create({ - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + op.getInput(0)->dataType(), + op.getOutput(0)->dataType() + }); // Call kernel - kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); + kernelFunc( + op.getInput(0)->size(), + op.getInput(0)->getImpl()->rawPtr(), + op.getOutput(0)->getImpl()->rawPtr() + ); } diff --git a/src/operator/GatherImpl.cpp b/src/operator/GatherImpl.cpp index ce98627d95e0d05541db1ccaf4896abe756431b0..d80b53e7e864faf3fca289f94aba4f511bcba161 100644 --- a/src/operator/GatherImpl.cpp +++ b/src/operator/GatherImpl.cpp @@ -9,32 +9,34 @@ * ********************************************************************************/ -#include <cassert> -#include <chrono> // std::chrono::milliseconds -#include <numeric> // std::accumulate -#include <thread> // std::this_thread::sleep_for +#include "aidge/backend/cpu/operator/GatherImpl.hpp" + +#include <memory> #include <vector> +#include "aidge/backend/cpu/operator/GatherImpl_forward_kernels.hpp" +#include "aidge/data/Data.hpp" +#include "aidge/data/Tensor.hpp" #include "aidge/operator/Gather.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/operator/GatherImpl.hpp" -#include "aidge/backend/cpu/operator/GatherImpl_forward_kernels.hpp" - Aidge::NbElts_t Aidge::GatherImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // this implementation can be in-place return 0; } void Aidge::GatherImpl_cpu::forward() { + const Gather_Op& op = static_cast<const Gather_Op&>(mOp); auto kernelFunc = Registrar<GatherImplForward_cpu>::create({ - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + op.getInput(0)->dataType(), + op.getOutput(0)->dataType() + }); // Call kernel kernelFunc(dynamic_cast<const Gather_Op&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); + op.getInput(0)->dims(), + op.getInput(0)->getImpl()->rawPtr(), + op.getOutput(0)->getImpl()->rawPtr() + ); }