Skip to content
Snippets Groups Projects
Commit f203e3ac authored by Olivier BICHLER's avatar Olivier BICHLER
Browse files

Finished operators adaptation

parent 2fe30cc0
No related branches found
No related tags found
2 merge requests!93Release v0.3.0,!79Refactor OperatorImpl for backend/export
Pipeline #54307 failed
Showing
with 186 additions and 192 deletions
......@@ -14,24 +14,27 @@
#include <memory>
#include <vector>
#include "aidge/backend/cpu/operator/AbsImpl_forward_kernels.hpp"
#include "aidge/backend/cpu/operator/AbsImpl_kernels.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Abs.hpp"
#include "aidge/utils/Types.h"
template <>
void Aidge::AbsImpl_cpu::forward() {
const Abs_Op& op = static_cast<const Abs_Op&>(mOp);
// Find the correct kernel type
auto kernelFunc = Registrar<AbsImplForward_cpu>::create({
op.getInput(0)->dataType(),
op.getOutput(0)->dataType()
});
const auto impl = Registrar<AbsImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
kernelFunc(
impl.forward(
op.getInput(0)->size(),
op.getInput(0)->getImpl()->rawPtr(),
op.getOutput(0)->getImpl()->rawPtr()
);
}
template <>
void Aidge::AbsImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Abs_Op on backend cpu");
}
......@@ -16,64 +16,57 @@
#include <vector>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp"
#include "aidge/backend/cpu/operator/AddImpl_kernels.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/utils/Types.h"
#include "aidge/utils/ErrorHandling.hpp"
template <>
void Aidge::AddImpl_cpu::forward() {
const auto& opTensor = static_cast<const OperatorTensor&>(mOp);
AIDGE_ASSERT(opTensor.getInput(0)->hasImpl(), "cannot run Add forward because the 0-th input has no implementation.");
assert(opTensor.getInput(0) && "missing input in Add operator");
DataType datatypeFirstInput = opTensor.getInput(0)->dataType();
for (IOIndex_t i = 1; i < opTensor.nbInputs(); ++i) {
AIDGE_ASSERT(opTensor.getInput(i)->hasImpl(), "cannot run Add forward because the {}-th input has no implementation.", i);
assert(opTensor.getInput(i) && "missing input in Add operator");
assert(opTensor.getInput(i)->dataType() == datatypeFirstInput);
const Add_Op& op = static_cast<const Add_Op&>(mOp);
// Check inputs
AIDGE_ASSERT(op.getInput(0), "missing input in Add operator");
AIDGE_ASSERT(op.getInput(0)->hasImpl(), "cannot run Add forward because the 0-th input has no implementation.");
DataType datatypeFirstInput = op.getInput(0)->dataType();
for (IOIndex_t i = 1; i < op.nbInputs(); ++i) {
AIDGE_ASSERT(op.getInput(i), "missing input in Add operator");
AIDGE_ASSERT(op.getInput(i)->hasImpl(), "cannot run Add forward because the {}-th input has no implementation.", i);
AIDGE_ASSERT(op.getInput(i)->dataType() == datatypeFirstInput, "Cannot add inputs with two differents data type.");
}
// Find the correct kernel type
const auto outputDataType = opTensor.getOutput(0)->dataType();
const Registrar<AddImplForward_cpu>::registrar_key registrarKey = {
datatypeFirstInput,
outputDataType};
Registrar<AddImplForward_cpu>::registrar_type kernelFunc;
if (Registrar<AddImplForward_cpu>::exists(registrarKey)) {
// One exists with the right inputs/output types
kernelFunc = Registrar<AddImplForward_cpu>::create(registrarKey);
}
else {
// Otherwise, fallback to the kernel with all types matching output type
kernelFunc = Registrar<AddImplForward_cpu>::create({
outputDataType, outputDataType});
}
const auto impl = Registrar<AddImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Convert input data (no overhead if not needed!)
// TODO: right now, if needed, memory will be allocated/deallocated at each
// call to forward(). We might put the following shared_ptr as members of
// this class to avoid that.
const std::size_t nbDims = opTensor.getOutput(0)->nbDims();
const std::size_t nbDims = op.getOutput(0)->nbDims();
std::vector<std::vector<std::size_t>> inputsDims;
std::vector<const void*> opInputs;
std::vector<std::shared_ptr<Tensor>> inputsFallback(opTensor.nbInputs());
for (IOIndex_t i = 0; i < opTensor.nbInputs(); ++i) {
std::vector<std::shared_ptr<Tensor>> inputsFallback(op.nbInputs());
for (IOIndex_t i = 0; i < op.nbInputs(); ++i) {
std::vector<std::size_t> inputDims(nbDims, 1);
auto dims = opTensor.getInput(i)->dims();
auto dims = op.getInput(i)->dims();
for(std::size_t j=dims.size()-1; j+1>0; --j)
{
std::size_t idx = nbDims - (dims.size()-j);
inputDims[idx] = dims[j];
}
inputsDims.push_back(inputDims);
const auto& input = opTensor.getInput(i)->refCastFrom(inputsFallback[i], *opTensor.getOutput(0));
const auto& input = op.getInput(i)->refCastFrom(inputsFallback[i], *op.getOutput(0));
opInputs.push_back(input.getImpl()->rawPtr());
}
kernelFunc(opInputs,
impl.forward(opInputs,
inputsDims,
opTensor.getOutput(0)->size(),
opTensor.getOutput(0)->dims(),
getCPUPtr(opTensor.getRawOutput(0)));
op.getOutput(0)->size(),
op.getOutput(0)->dims(),
getCPUPtr(op.getRawOutput(0)));
}
template <>
void Aidge::AddImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Add_Op on backend cpu");
}
......@@ -21,25 +21,29 @@
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/AndImpl.hpp"
#include "aidge/backend/cpu/operator/AndImpl_forward_kernels.hpp"
#include "aidge/backend/cpu/operator/AndImpl_kernels.hpp"
template <>
void Aidge::AndImpl_cpu::forward() {
// Find the correct kernel type
auto kernelFunc = Registrar<AndImplForward_cpu>::create({
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims());
const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims());
// Find the correct kernel type
const auto impl = Registrar<AndImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
kernelFunc(inputDims0,
impl.forward(inputDims0,
inputDims1,
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
getCPUPtr(mOp.getRawInput(0)),
getCPUPtr(mOp.getRawInput(1)),
getCPUPtr(mOp.getRawOutput(0)));
}
template <>
void Aidge::AndImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for And_Op on backend cpu");
}
......@@ -16,19 +16,24 @@
#include "aidge/utils/Types.h"
#include "aidge/operator/ArgMax.hpp"
#include "aidge/backend/cpu/operator/ArgMaxImpl_forward_kernels.hpp"
#include "aidge/backend/cpu/operator/ArgMaxImpl_kernels.hpp"
template <>
void Aidge::ArgMaxImpl_cpu::forward() {
const ArgMax_Op& op_ = dynamic_cast<const ArgMax_Op&>(mOp);
// Find the correct kernel type
auto kernelFunc = Registrar<ArgMaxImplForward_cpu>::create({
op_.getInput(0)->dataType(),
op_.getOutput(0)->dataType()});
const auto impl = Registrar<ArgMaxImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
kernelFunc(op_.axis(),
impl.forward(op_.axis(),
op_.selectLastIndex(),
op_.getInput(0)->dims(),
op_.getInput(0)->getImpl()->rawPtr(),
op_.getOutput(0)->getImpl()->rawPtr());
}
template <>
void Aidge::ArgMaxImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ArgMax_Op on backend cpu");
}
......@@ -16,24 +16,29 @@
#include <vector>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp"
#include "aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/AvgPooling.hpp"
#include "aidge/utils/Types.h"
template <>
void Aidge::AvgPoolingImpl2D_cpu::forward() {
const auto& op_ = dynamic_cast<const AvgPooling_Op<2>&>(mOp);
assert(op_.getInput(0) && "missing input #0");
// Find the correct kernel type
auto kernelFunc = Registrar<AvgPoolingImpl2DForward_cpu>::create(
{op_.getInput(0)->dataType(),
op_.getOutput(0)->dataType()});
const auto impl = Registrar<AvgPoolingImpl2D_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
kernelFunc(op_.strideDims(),
impl.forward(op_.strideDims(),
op_.kernelDims(),
op_.getInput(0)->template dims<4>(),
getCPUPtr(op_.getInput(0)),
getCPUPtr(op_.getOutput(0)));
}
template <>
void Aidge::AvgPoolingImpl2D_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for AvgPooling_Op<2> on backend cpu");
}
......@@ -19,8 +19,9 @@
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/operator/BatchNorm.hpp"
#include "aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp"
#include "aidge/backend/cpu/operator/BatchNormImpl_kernels.hpp"
template <>
void Aidge::BatchNormImpl2D_cpu::forward() {
const auto& op_ = dynamic_cast<const BatchNorm_Op<2>&>(mOp);
AIDGE_ASSERT(op_.getInput(0), "missing input #0 for BatchNorm Operator");
......@@ -30,14 +31,12 @@ void Aidge::BatchNormImpl2D_cpu::forward() {
AIDGE_ASSERT(op_.getInput(4), "missing input #4 for BatchNorm Operator");
AIDGE_ASSERT(op_.getOutput(0)->nbDims() == 4, "");
// Find the correct kernel type
auto kernelFunc =
Registrar<BatchNormImpl2DForward_cpu>::create({op_.getInput(0)->dataType(),
op_.getInput(1)->dataType(),
op_.getOutput(0)->dataType()});
const auto impl = Registrar<BatchNormImpl2D_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
kernelFunc(op_.epsilon(),
impl.forward(op_.epsilon(),
op_.momentum(),
op_.getInput(0)->template dims<4>(),
getCPUPtr(op_.getRawInput(0)),
......@@ -48,3 +47,8 @@ void Aidge::BatchNormImpl2D_cpu::forward() {
getCPUPtr(op_.getRawOutput(0)),
true);
}
template <>
void Aidge::BatchNormImpl2D_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for BatchNorm_Op<2> on backend cpu");
}
......@@ -15,12 +15,13 @@
#include <vector>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp"
#include "aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/ConvDepthWise.hpp"
#include "aidge/utils/Log.hpp"
#include "aidge/utils/Types.h"
template <>
void Aidge::ConvDepthWiseImpl1D_cpu::forward() {
const auto& op_ = dynamic_cast<const ConvDepthWise_Op<1>&>(mOp);
......@@ -30,23 +31,7 @@ void Aidge::ConvDepthWiseImpl1D_cpu::forward() {
AIDGE_ASSERT((op_.getInput(0)->nbDims() == 3), "support for 4-dimensions tensors only");
// Find the correct kernel type
const auto outputDataType = op_.getOutput(0)->dataType();
const Registrar<ConvDepthWiseImpl1DForward_cpu>::registrar_key registrarKey = {
op_.getInput(0)->dataType(),
op_.getInput(1)->dataType(),
((op_.getInput(2)) ? op_.getInput(2)->dataType() : op_.getInput(1)->dataType()),
outputDataType};
Registrar<ConvDepthWiseImpl1DForward_cpu>::registrar_type kernelFunc;
if (Registrar<ConvDepthWiseImpl1DForward_cpu>::exists(registrarKey)) {
// One exists with the right inputs/output types
kernelFunc = Registrar<ConvDepthWiseImpl1DForward_cpu>::create(registrarKey);
}
else {
// Otherwise, fallback to the kernel with all types matching output type
kernelFunc = Registrar<ConvDepthWiseImpl1DForward_cpu>::create({
outputDataType, outputDataType, outputDataType, outputDataType});
}
const auto impl = Registrar<ConvDepthWiseImpl1D_cpu>::create(getBestMatch(getRequiredSpec()));
// Convert input data (no overhead if not needed!)
// TODO: right now, if needed, memory will be allocated/deallocated at each
......@@ -58,7 +43,7 @@ void Aidge::ConvDepthWiseImpl1D_cpu::forward() {
const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor();
// Call kernel
kernelFunc(op_.strideDims(),
impl.forward(op_.strideDims(),
op_.dilationDims(),
op_.kernelDims(), // Conv attributes
op_.getInput(0)->template dims<3>(), // input dimensions
......@@ -69,6 +54,12 @@ void Aidge::ConvDepthWiseImpl1D_cpu::forward() {
);
}
template <>
void Aidge::ConvDepthWiseImpl1D_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ConvDepthWise_Op<1> on backend cpu");
}
template <>
void Aidge::ConvDepthWiseImpl2D_cpu::forward() {
const auto& op_ = dynamic_cast<const ConvDepthWise_Op<2>&>(mOp);
......@@ -79,11 +70,7 @@ void Aidge::ConvDepthWiseImpl2D_cpu::forward() {
AIDGE_ASSERT((op_.getInput(0)->nbDims() == 4), "support for 4-dimensions tensors only");
// Find the correct kernel type
auto kernelFunc = Registrar<ConvDepthWiseImpl2DForward_cpu>::create(
{op_.getInput(0)->dataType(),
op_.getInput(1)->dataType(),
op_.getInput(2)->dataType(),
op_.getOutput(0)->dataType()});
const auto impl = Registrar<ConvDepthWiseImpl2D_cpu>::create(getBestMatch(getRequiredSpec()));
// Convert input data (no overhead if not needed!)
// TODO: right now, if needed, memory will be allocated/deallocated at each
......@@ -95,7 +82,7 @@ void Aidge::ConvDepthWiseImpl2D_cpu::forward() {
const auto& input2 = op_.getInput(2) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor();
// Call kernel
kernelFunc(op_.strideDims(),
impl.forward(op_.strideDims(),
op_.dilationDims(),
op_.kernelDims(),
op_.getInput(0)->template dims<4>(),
......@@ -104,3 +91,8 @@ void Aidge::ConvDepthWiseImpl2D_cpu::forward() {
op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr,
getCPUPtr(op_.getRawOutput(0)));
}
template <>
void Aidge::ConvDepthWiseImpl2D_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ConvDepthWise_Op<2> on backend cpu");
}
......@@ -30,6 +30,7 @@ void Aidge::ConvImpl1D_cpu::forward() {
AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator.");
AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator.");
// Find the correct kernel type
const auto impl = Registrar<ConvImpl1D_cpu>::create(getBestMatch(getRequiredSpec()));
// Convert input data (no overhead if not needed!)
......@@ -67,6 +68,7 @@ void Aidge::ConvImpl2D_cpu::forward() {
AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator.");
AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator.");
// Find the correct kernel type
const auto impl = Registrar<ConvImpl2D_cpu>::create(getBestMatch(getRequiredSpec()));
// Convert input data (no overhead if not needed!)
......
......@@ -15,10 +15,11 @@
#include "aidge/backend/cpu/data/Broadcasting.hpp"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/DivImpl.hpp"
#include "aidge/backend/cpu/operator/DivImpl_forward_kernels.hpp"
#include "aidge/backend/cpu/operator/DivImpl_kernels.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/utils/Types.h"
template <>
void Aidge::DivImpl_cpu::forward() {
// Find the correct kernel type
// auto kernelFunc = Registrar<DivImplForward_cpu>::create({
......@@ -55,10 +56,7 @@ void Aidge::DivImpl_cpu::forward() {
const auto& opTensor = static_cast<const Div_Op&>(mOp);
// Find the correct kernel type
auto kernelFunc = Registrar<DivImplForward_cpu>::create({
opTensor.getInput(0)->dataType(),
opTensor.getInput(1)->dataType(),
opTensor.getOutput(0)->dataType()});
const auto impl = Registrar<DivImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Compute compatible input dimensions
std::vector<std::size_t> dims0 = opTensor.getInput(0)->dims();
......@@ -68,7 +66,7 @@ void Aidge::DivImpl_cpu::forward() {
// special case for equal dimensions, the kernel is called with the entire arrays at once
if (dims0 == dims1) {
const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
kernelFunc(input0_contiguous_size, input0_contiguous_size, input0_contiguous_size,
impl.forward(input0_contiguous_size, input0_contiguous_size, input0_contiguous_size,
getCPUPtr(mOp.getRawInput(0)),
getCPUPtr(mOp.getRawInput(1)),
getCPUPtr(mOp.getRawOutput(0)));
......@@ -134,7 +132,7 @@ void Aidge::DivImpl_cpu::forward() {
std::size_t dim = contiguousIdx - 1;
const std::size_t nbStacks = std::accumulate(outDims.cbegin(), outDims.cbegin() + contiguousIdx, std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t stack = 0; stack < nbStacks;) {
kernelFunc(input0_contiguous_size, input1_contiguous_size, output_contiguous_size,
impl.forward(input0_contiguous_size, input1_contiguous_size, output_contiguous_size,
getCPUPtr(mOp.getRawInput(0), offsetIn0*input0_contiguous_size),
getCPUPtr(mOp.getRawInput(1), offsetIn1*input1_contiguous_size),
getCPUPtr(mOp.getRawOutput(0), offsetOut*output_contiguous_size));
......@@ -151,3 +149,8 @@ void Aidge::DivImpl_cpu::forward() {
}
}
}
template <>
void Aidge::DivImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Div_Op on backend cpu");
}
......@@ -14,24 +14,27 @@
#include <memory>
#include <vector>
#include "aidge/backend/cpu/operator/ErfImpl_forward_kernels.hpp"
#include "aidge/backend/cpu/operator/ErfImpl_kernels.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Erf.hpp"
#include "aidge/utils/Types.h"
template <>
void Aidge::ErfImpl_cpu::forward() {
const Erf_Op& op = static_cast<const Erf_Op&>(mOp);
// Find the correct kernel type
auto kernelFunc = Registrar<ErfImplForward_cpu>::create({
op.getInput(0)->dataType(),
op.getOutput(0)->dataType()
});
const auto impl = Registrar<ErfImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
kernelFunc(
impl.forward(
op.getInput(0)->size(),
op.getInput(0)->getImpl()->rawPtr(),
op.getOutput(0)->getImpl()->rawPtr()
);
}
template <>
void Aidge::ErfImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Erf_Op on backend cpu");
}
......@@ -17,37 +17,20 @@
#include <tuple>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp"
#include "aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp"
#include "aidge/backend/cpu/operator/FCImpl_kernels.hpp"
#include "aidge/operator/FC.hpp"
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/utils/Types.h"
template <>
void Aidge::FCImpl_cpu::forward()
{
const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp);
AIDGE_ASSERT(op_.getInput(0), "missing input #0");
AIDGE_ASSERT(op_.getInput(1), "missing input #1");
// Find the correct kernel type
const auto outputDataType = op_.getOutput(0)->dataType();
const Registrar<FCImplForward_cpu>::registrar_key registrarKey = {
op_.getInput(0)->dataType(),
op_.getInput(1)->dataType(),
((op_.getInput(2)) ? op_.getInput(2)->dataType() : op_.getInput(1)->dataType()),
outputDataType};
Registrar<FCImplForward_cpu>::registrar_type kernelFunc;
if (Registrar<FCImplForward_cpu>::exists(registrarKey)) {
// One exists with the right inputs/output types
kernelFunc = Registrar<FCImplForward_cpu>::create(registrarKey);
}
else {
// Otherwise, fallback to the kernel with all types matching output type
kernelFunc = Registrar<FCImplForward_cpu>::create({
outputDataType, outputDataType, outputDataType, outputDataType});
}
const auto impl = Registrar<FCImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Convert input data (no overhead if not needed!)
// TODO: right now, if needed, memory will be allocated/deallocated at each
......@@ -60,7 +43,7 @@ void Aidge::FCImpl_cpu::forward()
// Call kernel
const auto batchSize = (input0.dims().size() > 1) ? input0.dims()[0] : 1;
kernelFunc(batchSize,
impl.forward(batchSize,
input1.dims()[1], // nb input features
input1.dims()[0], // nb output features
input0.getImpl()->rawPtr(),
......@@ -69,6 +52,7 @@ void Aidge::FCImpl_cpu::forward()
getCPUPtr(mOp.getRawOutput(0)));
}
template <>
void Aidge::FCImpl_cpu::backward()
{
const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp);
......@@ -77,23 +61,7 @@ void Aidge::FCImpl_cpu::backward()
AIDGE_ASSERT(op_.getInput(0)->grad(), "missing input #0 gradient");
AIDGE_ASSERT(op_.getInput(1)->grad(), "missing input #1 gradient");
// Find the correct kernel type
const Registrar<FCImplBackward_cpu>::registrar_key registrarKey = {
fc_grad->dataType(),
op_.getInput(1)->grad()->dataType(),
(op_.getInput(2)) ? op_.getInput(2)->grad()->dataType() : op_.getInput(1)->grad()->dataType(),
op_.getInput(0)->grad()->dataType()};
Registrar<FCImplBackward_cpu>::registrar_type kernelFunc;
if (Registrar<FCImplBackward_cpu>::exists(registrarKey)) {
// One exists with the right inputs/output types
kernelFunc = Registrar<FCImplBackward_cpu>::create(registrarKey);
}
else {
// Otherwise, fallback to the kernel with all types matching output type
kernelFunc = Registrar<FCImplBackward_cpu>::create({
fc_grad->dataType(), fc_grad->dataType(), fc_grad->dataType(), fc_grad->dataType()});
}
const auto impl = Registrar<FCImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Convert input data (no overhead if not needed!)
// TODO: right now, if needed, memory will be allocated/deallocated at each
......@@ -106,7 +74,7 @@ void Aidge::FCImpl_cpu::backward()
// Call kernel
const auto batchSize = (input0grad.dims().size() > 1) ? input0grad.dims()[0] : 1;
kernelFunc(batchSize,
impl.backward(batchSize,
input1grad.dims()[1], // nb input features
input1grad.dims()[0], // nb output features
getCPUPtr(fc_grad),
......
......@@ -20,18 +20,18 @@
#include "aidge/operator/Conv.hpp"
#include "aidge/backend/cpu/operator/FoldImpl.hpp"
#include "aidge/backend/cpu/operator/FoldImpl_forward_kernels.hpp"
#include "aidge/backend/cpu/operator/FoldImpl_kernels.hpp"
template <>
void Aidge::FoldImpl2D_cpu::forward() {
const auto& op_ = static_cast<const Fold_Op<2>&>(mOp);
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
// Find the correct kernel type
auto kernelFunc =
Registrar<FoldImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
const auto impl = Registrar<FoldImpl2D_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
const auto& op_ = static_cast<const Fold_Op<2>&>(mOp);
kernelFunc(op_.outputDims(),
impl.forward(op_.outputDims(),
op_.strideDims(),
op_.dilationDims(),
op_.kernelDims(),
......@@ -39,3 +39,8 @@ void Aidge::FoldImpl2D_cpu::forward() {
getCPUPtr(mOp.getRawInput(0)),
getCPUPtr(mOp.getRawOutput(0)));
}
template <>
void Aidge::FoldImpl2D_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Fold_Op<2> on backend cpu");
}
......@@ -15,7 +15,7 @@
#include <memory>
#include <vector>
#include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp"
#include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/GlobalAveragePooling.hpp"
......@@ -24,18 +24,23 @@
#include "aidge/utils/Types.h"
template <>
void Aidge::GlobalAveragePoolingImpl_cpu::forward()
{
const GlobalAveragePooling_Op& op_ = static_cast<const GlobalAveragePooling_Op&>(mOp);
// Check if input is provided
AIDGE_ASSERT(op_.getInput(0), "missing input 0");
// Create the forward kernal with the wanted types
auto kernelFunc = Registrar<GlobalAveragePoolingImplForward_cpu>::create({op_.getInput(0)->dataType(),
op_.getOutput(0)->dataType()});
// Find the correct kernel type
const auto impl = Registrar<GlobalAveragePoolingImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
kernelFunc(op_.getInput(0)->dims(),
impl.forward(op_.getInput(0)->dims(),
op_.getInput(0)->getImpl()->rawPtr(),
op_.getOutput(0)->getImpl()->rawPtr());
}
\ No newline at end of file
}
template <>
void Aidge::GlobalAveragePoolingImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for GlobalAveragePooling_Op on backend cpu");
}
......@@ -14,14 +14,14 @@
#include <vector>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp"
#include "aidge/backend/cpu/operator/LeakyReLUImpl_backward_kernels.hpp"
#include "aidge/backend/cpu/operator/LeakyReLUImpl_kernels.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/LeakyReLU.hpp"
#include "aidge/utils/Log.hpp"
#include "aidge/utils/Types.h"
#include "aidge/utils/Registrar.hpp"
template <>
void Aidge::LeakyReLUImpl_cpu::forward() {
const LeakyReLU_Op& op_ = dynamic_cast<const LeakyReLU_Op&>(mOp);
......@@ -30,17 +30,16 @@ void Aidge::LeakyReLUImpl_cpu::forward() {
AIDGE_ASSERT(in0, "missing input #0");
// Find the correct kernel type
auto kernelFunc = Registrar<LeakyReLUImplForward_cpu>::create({
in0->dataType(),
out0->dataType()});
const auto impl = Registrar<LeakyReLUImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
kernelFunc(op_.negativeSlope(),
impl.forward(op_.negativeSlope(),
in0->size(),
getCPUPtr(mOp.getRawInput(0)),
getCPUPtr(mOp.getRawOutput(0)));
}
template <>
void Aidge::LeakyReLUImpl_cpu::backward() {
// reversing in and out Data for backprop
const LeakyReLU_Op& op_ = dynamic_cast<const LeakyReLU_Op&>(mOp);
......@@ -49,12 +48,10 @@ void Aidge::LeakyReLUImpl_cpu::backward() {
AIDGE_ASSERT(in0, "missing input #0");
// Find the correct kernel type
auto kernelFunc = Registrar<LeakyReLUImplForward_cpu>::create({
in0->dataType(),
out0->dataType()});
const auto impl = Registrar<LeakyReLUImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
kernelFunc(op_.negativeSlope(),
impl.backward(op_.negativeSlope(),
in0->size(),
getCPUPtr(in0),
getCPUPtr(out0));
......
......@@ -20,9 +20,9 @@
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/LnImpl.hpp"
#include "aidge/backend/cpu/operator/LnImpl_forward_kernels.hpp"
#include "aidge/backend/cpu/operator/LnImpl_backward_kernels.hpp"
#include "aidge/backend/cpu/operator/LnImpl_kernels.hpp"
template <>
void Aidge::LnImpl_cpu::forward() {
const Ln_Op& op_ = static_cast<const Ln_Op&>(mOp);
std::shared_ptr<Tensor> in0 = op_.getInput(0);
......@@ -30,16 +30,15 @@ void Aidge::LnImpl_cpu::forward() {
AIDGE_ASSERT(in0, "missing input #0");
// Find the correct kernel type
auto kernelFunc = Registrar<LnImplForward_cpu>::create({
in0->dataType(),
out0->dataType()});
const auto impl = Registrar<LnImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
kernelFunc(in0->size(),
impl.forward(in0->size(),
getCPUPtr(mOp.getRawInput(0)),
getCPUPtr(mOp.getRawOutput(0)));
}
template <>
void Aidge::LnImpl_cpu::backward() {
const Ln_Op& op_ = dynamic_cast<const Ln_Op&>(mOp);
std::shared_ptr<Tensor> in0 = op_.getInput(0);
......@@ -49,12 +48,8 @@ void Aidge::LnImpl_cpu::backward() {
AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type());
// Find the correct kernel type
auto kernelFunc = Registrar<LnImplBackward_cpu>::create({
in0->dataType(),
gra_int0->dataType(),
gra_out0->dataType()
});
const auto impl = Registrar<LnImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
kernelFunc(gra_int0->size(), getCPUPtr(in0), getCPUPtr(gra_out0), getCPUPtr(gra_int0));
impl.backward(gra_int0->size(), getCPUPtr(in0), getCPUPtr(gra_out0), getCPUPtr(gra_int0));
}
......@@ -19,17 +19,16 @@
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/operator/MatMulImpl.hpp"
#include "aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp"
#include "aidge/backend/cpu/operator/MatMulImpl_kernels.hpp"
template <>
void Aidge::MatMulImpl_cpu::forward()
{
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(1)) && "missing input #1");
// Find the correct kernel type
auto kernelFunc = Registrar<MatMulImplForward_cpu>::create(
{std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
const auto impl = Registrar<MatMulImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Compute compatible input dimensions
std::vector<std::size_t> dims0 = static_cast<const MatMul_Op&>(mOp).getInput(0)->dims();
......@@ -91,7 +90,7 @@ void Aidge::MatMulImpl_cpu::forward()
const std::size_t matrix1Size = k*m;
const std::size_t matrixOutSize = n*m;
for (std::size_t stack = 0; stack < nbMatrices;) {
kernelFunc(n, k, m,
impl.forward(n, k, m,
getCPUPtr(mOp.getRawInput(0), offsetIn0*matrix0Size),
getCPUPtr(mOp.getRawInput(1), offsetIn1*matrix1Size),
getCPUPtr(mOp.getRawOutput(0), offsetOut*matrixOutSize));
......@@ -126,3 +125,8 @@ void Aidge::MatMulImpl_cpu::forward()
// getCPUPtr(mOp.getRawInput(1)),
// getCPUPtr(mOp.getRawOutput(0)));
// }
template <>
void Aidge::MatMulImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for MatMul_Op on backend cpu");
}
......@@ -14,26 +14,29 @@
#include <vector>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp"
#include "aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp"
#include "aidge/operator/MaxPooling.hpp"
#include "aidge/utils/Log.hpp"
#include "aidge/utils/Types.h"
template <>
void Aidge::MaxPoolingImpl2D_cpu::forward() {
const auto& op_ = dynamic_cast<const MaxPooling_Op<2>&>(mOp);
AIDGE_ASSERT(op_.getInput(0), "missing input #0 in MaxPooling Operator.");
// Find the correct kernel type
auto kernelFunc = Registrar<MaxPoolingImpl2DForward_cpu>::create({
op_.getInput(0)->dataType(),
op_.getOutput(0)->dataType()
});
const auto impl = Registrar<MaxPoolingImpl2D_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
kernelFunc(op_.strideDims(),
impl.forward(op_.strideDims(),
op_.kernelDims(),
op_.ceilMode(),
op_.getInput(0)->template dims<4>(),
getCPUPtr(mOp.getRawInput(0)),
getCPUPtr(mOp.getRawOutput(0)));
}
template <>
void Aidge::MaxPoolingImpl2D_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for MaxPooling_Op<2> on backend cpu");
}
......@@ -21,25 +21,28 @@
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/MulImpl.hpp"
#include "aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp"
#include "aidge/backend/cpu/operator/MulImpl_kernels.hpp"
template <>
void Aidge::MulImpl_cpu::forward() {
// Find the correct kernel type
auto kernelFunc = Registrar<MulImplForward_cpu>::create({
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims());
const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims());
// Find the correct kernel type
const auto impl = Registrar<MulImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
kernelFunc(inputDims0,
impl.forward(inputDims0,
inputDims1,
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
getCPUPtr(mOp.getRawInput(0)),
getCPUPtr(mOp.getRawInput(1)),
getCPUPtr(mOp.getRawOutput(0)));
}
template <>
void Aidge::MulImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Mul_Op on backend cpu");
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment