Skip to content
Snippets Groups Projects
Commit 4260ba9f authored by Houssem ROUIS's avatar Houssem ROUIS
Browse files

Merge branch 'master' of gitlab.eclipse.org:hrouis/aidge_backend_cpu into vit_operators

parents bfdf7407 fafa52d8
No related branches found
No related tags found
2 merge requests!50version 0.2.0,!20Vit operators
Showing
with 305 additions and 341 deletions
......@@ -49,24 +49,24 @@ class test_recipies(unittest.TestCase):
np_shift = np.array([0.05]).astype(np.float32)
np_mean = np.array([0.05]).astype(np.float32)
np_var = np.array([0.05]).astype(np.float32)
conv.input(1)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_weights))
conv.input(2)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_bias))
bn.input(1)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_scale))
bn.input(2)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_shift))
bn.input(3)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_mean))
bn.input(4)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_var))
conv.input(1)[0].get_operator().set_output(0, aidge_core.Tensor(np_weights))
conv.input(2)[0].get_operator().set_output(0, aidge_core.Tensor(np_bias))
bn.input(1)[0].get_operator().set_output(0, aidge_core.Tensor(np_scale))
bn.input(2)[0].get_operator().set_output(0, aidge_core.Tensor(np_shift))
bn.input(3)[0].get_operator().set_output(0, aidge_core.Tensor(np_mean))
bn.input(4)[0].get_operator().set_output(0, aidge_core.Tensor(np_var))
scheduler0 = aidge_core.SequentialScheduler(graph_view)
scheduler0.forward()
for outNode in graph_view.get_output_nodes():
output_aidge0 = outNode.get_operator().output(0)
output_aidge0 = outNode.get_operator().get_output(0)
aidge_core.fuse_batchnorm(graph_view)
scheduler1 = aidge_core.SequentialScheduler(graph_view)
scheduler1.forward()
for outNode in graph_view.get_output_nodes():
output_aidge1 = outNode.get_operator().output(0)
output_aidge1 = outNode.get_operator().get_output(0)
self.assertTrue(aidge_core.approx_eq(output_aidge0, output_aidge1, 0.000001, 0.0001))
......
......@@ -22,30 +22,30 @@ class test_scheduler(unittest.TestCase):
gv.add(relu)
gv.add(input_node)
input_node.add_child(relu)
gv.set_datatype(aidge_core.DataType.Int32)
gv.set_backend("cpu")
input_node.add_child(relu)
scheduler = aidge_core.SequentialScheduler(gv)
scheduler.forward()
out_tensor = relu.get_operator().output(0)
out_tensor = relu.get_operator().get_output(0)
expected_out = [0,0,0,0,1,2]
for i in range(len(expected_out)):
self.assertEqual(expected_out[i], out_tensor[i])
def test_sequential_scheduling(self):
input_data = np.array([]).astype(np.float32)
input_data = np.array([0]).astype(np.float32)
input_tensor = aidge_core.Tensor(input_data)
input_node = aidge_core.Producer(input_tensor, "X")
graph_view = aidge_core.sequential([
aidge_core.FC(50, name='0'),
aidge_core.FC(50, name='1'),
aidge_core.FC(10, name='2'),
aidge_core.FC(1, 50, name='0'),
aidge_core.FC(50, 50, name='1'),
aidge_core.FC(50, 10, name='2'),
])
EXPECTED_SCHEDULE = ['0', '1', '2']
......@@ -64,14 +64,14 @@ class test_scheduler(unittest.TestCase):
def test_parallel_scheduling(self):
input_data = np.array([]).astype(np.float32)
input_data = np.array([0]).astype(np.float32)
input_tensor = aidge_core.Tensor(input_data)
input_node = aidge_core.Producer(input_tensor, "X")
graph_view = aidge_core.sequential([
aidge_core.FC(50, name='0'),
aidge_core.parallel([aidge_core.FC(50, name='1'), aidge_core.FC(50, name='3')]),
aidge_core.Add(name='2'),
aidge_core.FC(1, 50, name='0'),
aidge_core.parallel([aidge_core.FC(50, 50, name='1'), aidge_core.FC(50, 50, name='3')]),
aidge_core.Add(2, name='2'),
])
EXPECTED_SCHEDULE = [['0', '1', '3', '2'], ['0', '3', '1', '2']] # Both scheduling are valid !
......
......@@ -23,87 +23,39 @@ namespace Aidge {
// class Add_Op<2>;
// compute kernel registry for forward and backward
template <DimIdx_t NUM>
class AddImplForward_cpu;
template <DimIdx_t NUM>
class AddImplBackward_cpu;
template <>
class AddImplForward_cpu<1>
: public Registrable<AddImplForward_cpu<1>, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {};
template <>
class AddImplBackward_cpu<1>
: public Registrable<AddImplBackward_cpu<1>, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {};
template <>
class AddImplForward_cpu<2> : public Registrable<AddImplForward_cpu<2>, std::tuple<DataType, DataType, DataType>,
void(const std::size_t, const void*, const void*, void*)> {};
template <>
class AddImplBackward_cpu<2> : public Registrable<AddImplBackward_cpu<2>, std::tuple<DataType, DataType, DataType>,
void(const std::size_t, const void*, const void*, void*)> {};
template <>
class AddImplForward_cpu<3> : public Registrable<AddImplForward_cpu<3>, std::tuple<DataType, DataType, DataType, DataType>,
void(const std::size_t, const void*, const void*, const void*, void*)> {
};
template <>
class AddImplBackward_cpu<3>
: public Registrable<AddImplBackward_cpu<3>, std::tuple<DataType, DataType, DataType, DataType>,
void(const std::size_t, const void*, const void*, const void*, void*)> {};
class AddImplForward_cpu
: public Registrable<AddImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const std::vector<const void*>, void*)> {};
class AddImplBackward_cpu
: public Registrable<AddImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const std::vector<const void*>, void*)> {};
template <DimIdx_t NUM>
class AddImpl_cpu : public OperatorImpl {
public:
AddImpl_cpu(const Add_Op<NUM>& op) : OperatorImpl(op) {}
AddImpl_cpu(const Add_Op& op) : OperatorImpl(op) {}
static std::unique_ptr<AddImpl_cpu<NUM>> create(const Add_Op<NUM>& op) {
return std::make_unique<AddImpl_cpu<NUM>>(op);
static std::unique_ptr<AddImpl_cpu> create(const Add_Op& op) {
return std::make_unique<AddImpl_cpu>(op);
}
};
template <>
class AddImpl_cpu<1> : public OperatorImpl {
public:
AddImpl_cpu(const Add_Op<1>& op) : OperatorImpl(op) {}
static std::unique_ptr<AddImpl_cpu<1>> create(const Add_Op<1>& op) {
return std::make_unique<AddImpl_cpu<1>>(op);
}
NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final;
void forward() override;
};
template <>
class AddImpl_cpu<2> : public OperatorImpl {
public:
AddImpl_cpu(const Add_Op<2>& op) : OperatorImpl(op) {}
static std::unique_ptr<AddImpl_cpu<2>> create(const Add_Op<2>& op) {
return std::make_unique<AddImpl_cpu<2>>(op);
}
NbElts_t getRequiredMemory(const IOIndex_t outputIdx, const std::vector<DimSize_t>& /*inputsSize*/) const override final;
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
template <>
class AddImpl_cpu<3> : public OperatorImpl {
public:
AddImpl_cpu(const Add_Op<3>& op) : OperatorImpl(op) {}
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
static std::unique_ptr<AddImpl_cpu<3>> create(const Add_Op<3>& op) {
return std::make_unique<AddImpl_cpu<3>>(op);
}
void updateConsummerProducer() override final;
NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final;
void forward() override;
};
namespace {
static Registrar<Add_Op<1>> registrarAddImpl1I_cpu("cpu", Aidge::AddImpl_cpu<1>::create);
static Registrar<Add_Op<2>> registrarAddImpl2I_cpu("cpu", Aidge::AddImpl_cpu<2>::create);
static Registrar<Add_Op<3>> registrarAddImpl3I_cpu("cpu", Aidge::AddImpl_cpu<3>::create);
static Registrar<Add_Op> registrarAddImpl_cpu("cpu", Aidge::AddImpl_cpu::create);
} // namespace
} // namespace Aidge
......
......@@ -18,70 +18,30 @@
namespace Aidge {
template <class I1, class O>
void AddImpl1I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, void* output_) {
template <class I, class O>
void AddImpl_cpu_forward_kernel(const std::size_t inputLength, const std::vector<const void*> inputs_, void* output_) {
// FIXME: missing Add attributes as arguments
const I1* input1 = static_cast<const I1*>(input1_);
O* output = static_cast<O*>(output_);
for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) {
output[oIndex] = input1[oIndex];
}
}
template <class I1, class I2, class O>
void AddImpl2I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_,
void* output_) {
// FIXME: missing Add attributes as arguments
const I1* input1 = static_cast<const I1*>(input1_);
const I2* input2 = static_cast<const I2*>(input2_);
O* output = static_cast<O*>(output_);
for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) {
output[oIndex] = input1[oIndex] + input2[oIndex];
std::vector<const I*> inputs;
for (const auto& input_ : inputs_) {
inputs.push_back(static_cast<const I*>(input_));
}
}
template <class I1, class I2, class I3, class O>
void AddImpl3I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_,
const void* input3_, void* output_) {
// FIXME: missing Add attributes as arguments
const I1* input1 = static_cast<const I1*>(input1_);
const I2* input2 = static_cast<const I2*>(input2_);
const I3* input3 = static_cast<const I3*>(input3_);
O* output = static_cast<O*>(output_);
for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) {
output[oIndex] = input1[oIndex] + input2[oIndex] + input3[oIndex];
}
for (std::size_t iIndex = 0; iIndex < inputs.size(); ++iIndex) {
for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) {
output[oIndex] += inputs[iIndex][oIndex];
}
}
}
namespace {
static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::AddImpl1I_cpu_forward_kernel<float, float>);
static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::AddImpl1I_cpu_forward_kernel<int, int>);
static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::AddImpl1I_cpu_forward_kernel<double, double>);
static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::AddImpl2I_cpu_forward_kernel<float, float, float>);
static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32}, Aidge::AddImpl2I_cpu_forward_kernel<int, int, int>);
static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64}, Aidge::AddImpl2I_cpu_forward_kernel<double, double, double>);
static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::AddImpl3I_cpu_forward_kernel<float, float, float, float>);
static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::AddImpl3I_cpu_forward_kernel<int, int, int, int>);
static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::AddImpl3I_cpu_forward_kernel<double, double, double, double>);
static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::AddImpl_cpu_forward_kernel<float, float>);
static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::AddImpl_cpu_forward_kernel<int, int>);
static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::AddImpl_cpu_forward_kernel<double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ADDIMPL_CPU_FORWARD_KERNEL_H_ */
#endif /* AIDGE_CPU_OPERATOR_ADDIMPL_CPU_FORWARD_KERNEL_H_ */
\ No newline at end of file
......@@ -21,14 +21,21 @@
namespace Aidge {
// class Concat_Op;
// compute kernel registry for forward and backward
class ConcatImplForward_cpu
: public Registrable<ConcatImplForward_cpu, std::tuple<DataType, DataType>, void(std::size_t, const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const std::vector<void*>, void*)> {
};
: public Registrable<ConcatImplForward_cpu, std::tuple<DataType, DataType>, void(const Concat_Op::Attrs&,
const std::vector<DimSize_t>,
const std::vector<DimSize_t>&,
const std::vector<const void*>,
void*)> {};
class ConcatImplBackward_cpu
: public Registrable<ConcatImplBackward_cpu, std::tuple<DataType, DataType>, void(std::size_t, const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const std::vector<void*>, void*)> {
};
: public Registrable<ConcatImplBackward_cpu, std::tuple<DataType, DataType>, void(const Concat_Op::Attrs&,
const std::vector<DimSize_t>,
const std::vector<DimSize_t>&,
const std::vector<const void*>,
void*)> {};
class ConcatImpl_cpu : public OperatorImpl {
public:
......@@ -38,13 +45,27 @@ public:
return std::make_unique<ConcatImpl_cpu>(op);
}
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
public:
NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final;
NbElts_t getRequiredMemory(const IOIndex_t outputIdx, const std::vector<DimSize_t>& /*inputsSize*/) const override final;
NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward() override;
void backward() override;
};
namespace {
static Registrar<Concat_Op> registrarConcatImpl_cpu("cpu", Aidge::ConcatImpl_cpu::create);
}
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONCATIMPL_H_ */
......@@ -22,13 +22,18 @@
namespace Aidge {
template <class I, class O>
void ConcatImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSize_t>& inputDims, const std::vector<DimSize_t>& dimsOnAxis, const std::vector<void*> input_, void* output_)
void ConcatImpl_cpu_forward_kernel(const Concat_Op::Attrs& attrs,
const std::vector<DimSize_t>& inputDims,
const std::vector<DimSize_t>& concatAxisValues,
const std::vector<const void*>& inputs_,
void* output_)
{
std::size_t axisIdx = std::get<0>(attrs);
O* output = static_cast<O*>(output_);
std::vector<I*> input;
for(const auto& elem:input_)
std::vector<const I*> input;
for(const auto& elem:inputs_)
{
input.emplace_back(static_cast<I*>(elem));
input.emplace_back(static_cast<const I*>(elem));
}
std::size_t postAxisElems = 1;
......@@ -44,7 +49,7 @@ void ConcatImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSiz
{
for(std::size_t j=0; j < input.size(); ++j)
{
std::size_t strideOnAxis = postAxisElems * dimsOnAxis[j];
std::size_t strideOnAxis = postAxisElems * concatAxisValues[j];
const I* copyPtr = std::next(input[j], i * strideOnAxis);
std::copy_n(copyPtr, strideOnAxis, output);
output += strideOnAxis;
......@@ -58,8 +63,9 @@ static Registrar<ConcatImplForward_cpu> registrarConcatImplForward_cpu_Float32(
static Registrar<ConcatImplForward_cpu> registrarConcatImplForward_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::ConcatImpl_cpu_forward_kernel<int, int>);
static Registrar<ConcatImplForward_cpu> registrarConcatImplForward_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::ConcatImpl_cpu_forward_kernel<double, double>);
{DataType::Float64, DataType::Float64},
Aidge::ConcatImpl_cpu_forward_kernel<double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONCATIMPL_FORWARD_KERNEL_H_ */
#endif /* AIDGE_CPU_OPERATOR_CONCATIMPL_CPU_FORWARD_KERNEL_H_ */
......@@ -17,6 +17,7 @@
#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
#include "aidge/utils/Types.h"
#include <cmath>
#include <cstddef>
#include <array>
#include <algorithm>
......
......@@ -9,8 +9,10 @@
*
********************************************************************************/
#ifndef __AIDGE_CPU_OPERATOR_ScalingIMPL_FORWARD_KERNEL_H__
#define __AIDGE_CPU_OPERATOR_ScalingIMPL_FORWARD_KERNEL_H__
#ifndef AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H
#define AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H
#include <cmath>
#include "aidge/utils/Registrar.hpp"
......@@ -61,7 +63,7 @@ const O& clamp(const O& x, const O& min, const O& max)
template<class O>
O saturate(O value, std::size_t quantizedNbBits, bool isOutputUnsigned) {
assert(quantizedNbBits > 0);
const O min = isOutputUnsigned?0:
-(1ll << (quantizedNbBits - 1ll));
const O max = isOutputUnsigned?(1ll << quantizedNbBits) - 1ll:
......@@ -101,4 +103,4 @@ static Registrar<ScalingImplForward_cpu> registrarScalingImplForward_cpu_Float64
} // namespace
} // namespace Aidge
#endif /* __AIDGE_CPU_OPERATOR_ScalingIMPL_FORWARD_KERNEL_H__ */
#endif /* AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H */
......@@ -12,12 +12,14 @@
#ifndef AIDGE_CPU_OPERATOR_SLICEIMPL_H_
#define AIDGE_CPU_OPERATOR_SLICEIMPL_H_
#include <memory>
#include <vector>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Slice.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include <memory>
#include <vector>
namespace Aidge {
// class Slice_Op;
......@@ -42,8 +44,16 @@ public:
return std::make_unique<SliceImpl_cpu>(op);
}
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final;
NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final;
NbElts_t getRequiredMemory(const IOIndex_t outputIdx,
const std::vector<DimSize_t>& inputsSize) const override final;
NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final;
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward() override;
void backward() override;
};
namespace {
......@@ -51,4 +61,4 @@ static Registrar<Slice_Op> registrarSliceImpl_cpu("cpu", Aidge::SliceImpl_cpu::c
}
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_H_ */
#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_H_ */
\ No newline at end of file
......@@ -103,4 +103,4 @@ static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Float64(
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ */
#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ */
\ No newline at end of file
......@@ -10,93 +10,74 @@
********************************************************************************/
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <vector>
#include "aidge/operator/Conv.hpp"
#include "aidge/utils/Types.h"
#include "aidge/data/Data.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/backend/cpu/operator/AddImpl.hpp"
#include "aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp"
//////////////////////////////////
// AddImpl_cpu<1>
//////////////////////////////////
Aidge::NbElts_t Aidge::AddImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
assert(mOp.getRawInput(inputIdx) && "requires valid input");
Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
// this implementation can be in-place
return 0;
// Requires the whole tensors
const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(inputIdx))->dims();
return std::accumulate(inputDims.begin(), inputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>());
}
void Aidge::AddImpl_cpu<1>::forward() {
assert(mOp.getInput(0) && "missing input #0");
// Find the correct kernel type
auto kernelFunc = Registrar<AddImplForward_cpu<1>>::create({
mOp.getInput(0)->dataType(),
mOp.getOutput(0)->dataType()});
// Call kernel
kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
mOp.getInput(0)->getImpl()->rawPtr(),
mOp.getOutput(0)->getImpl()->rawPtr());
Aidge::NbElts_t Aidge::AddImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
// for the direct convolution algorithm, convolutions can be in-place, if there is no padding!
return 0;
}
Aidge::NbElts_t Aidge::AddImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const {
// Requires the whole tensors, regardless of available data on inputs
assert(outputIdx == 0 && "operator has only one output");
(void) outputIdx;
//////////////////////////////////
// AddImpl_cpu<2>
//////////////////////////////////
const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims();
return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
// this implementation of add can be in-place
return 0;
Aidge::NbElts_t Aidge::AddImpl_cpu::getNbConsumedData(const Aidge::IOIndex_t inputIdx) const {
assert(inputIdx < mNbConsumedData.size());
return mNbConsumedData[inputIdx];
}
void Aidge::AddImpl_cpu<2>::forward() {
assert(mOp.getInput(0) && "missing input #0");
assert(mOp.getInput(1) && "missing input #1");
// Find the correct kernel type
auto kernelFunc = Registrar<AddImplForward_cpu<2>>::create({
mOp.getInput(0)->dataType(),
mOp.getInput(1)->dataType(),
mOp.getOutput(0)->dataType()});
// Call kernel
kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
mOp.getInput(0)->getImpl()->rawPtr(),
mOp.getInput(1)->getImpl()->rawPtr(),
mOp.getOutput(0)->getImpl()->rawPtr());
Aidge::NbElts_t Aidge::AddImpl_cpu::getNbProducedData(const Aidge::IOIndex_t outputIdx) const {
assert(outputIdx < mNbProducedData.size());
return mNbProducedData[outputIdx];
}
void Aidge::AddImpl_cpu::updateConsummerProducer() {
for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx)
mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass
//////////////////////////////////
// AddImpl_cpu<3>
//////////////////////////////////
mNbProducedData[0]+= getRequiredMemory(0, {});
Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
// this implementation of add can be in-place
return 0;
}
void Aidge::AddImpl_cpu<3>::forward() {
assert(mOp.getInput(0) && "missing input #0");
assert(mOp.getInput(1) && "missing input #1");
assert(mOp.getInput(2) && "missing input #2");
// Find the correct kernel type
auto kernelFunc = Registrar<AddImplForward_cpu<3>>::create({
mOp.getInput(0)->dataType(),
mOp.getInput(1)->dataType(),
mOp.getInput(2)->dataType(),
mOp.getOutput(0)->dataType()});
// Call kernel
kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
mOp.getInput(0)->getImpl()->rawPtr(),
mOp.getInput(1)->getImpl()->rawPtr(),
mOp.getInput(2)->getImpl()->rawPtr(),
mOp.getOutput(0)->getImpl()->rawPtr());
}
void Aidge::AddImpl_cpu::forward() {
assert(mOp.getRawInput(0) && "missing input in Add operator");
DataType datatypeFirstInput = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType();
for (IOIndex_t i = 1; i < mOp.nbInputs(); ++i) {
assert(mOp.getRawInput(i) && "missing input in Add operator");
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->dataType() == datatypeFirstInput);
}
auto kernelFunc = Registrar<AddImplForward_cpu>::create({
datatypeFirstInput,
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
std::vector<const void*> opInputs;
for (IOIndex_t i = 0; i < mOp.nbInputs(); ++i) {
opInputs.push_back(std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->getImpl()->rawPtr());
}
kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(),
opInputs,
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
}
\ No newline at end of file
......@@ -26,15 +26,15 @@ Aidge::NbElts_t Aidge::AvgPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*
}
void Aidge::AvgPoolingImpl2D_cpu::forward() {
assert(mOp.getInput(0) && "missing input #0");
assert(mOp.getRawInput(0) && "missing input #0");
// Find the correct kernel type
auto kernelFunc =
Registrar<AvgPoolingImpl2DForward_cpu>::create({mOp.getInput(0)->dataType(), mOp.getOutput(0)->dataType()});
Registrar<AvgPoolingImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
// Call kernel
kernelFunc(dynamic_cast<const AvgPooling_Op<2>&>(mOp).getStaticAttributes(),
mOp.getInput(0)->dims<4>(),
mOp.getInput(0)->getImpl()->rawPtr(),
mOp.getOutput(0)->getImpl()->rawPtr());
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
}
......@@ -25,26 +25,27 @@ Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*i
}
void Aidge::BatchNormImpl2D_cpu::forward() {
assert(mOp.getInput(0) && "missing input #0");
assert(mOp.getInput(1) && "missing input #1");
assert(mOp.getInput(2) && "missing input #2");
assert(mOp.getInput(3) && "missing input #3");
assert(mOp.getInput(4) && "missing input #4");
assert(mOp.getRawInput(0) && "missing input #0");
assert(mOp.getRawInput(1) && "missing input #1");
assert(mOp.getRawInput(2) && "missing input #2");
assert(mOp.getRawInput(3) && "missing input #3");
assert(mOp.getRawInput(4) && "missing input #4");
assert(mOp.getOutput(0)->nbDims() == 4);
assert(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->nbDims() == 4);
// Find the correct kernel type
auto kernelFunc =
Registrar<BatchNormImpl2DForward_cpu>::create({mOp.getInput(0)->dataType(), mOp.getInput(1)->dataType(),
mOp.getOutput(0)->dataType()});
Registrar<BatchNormImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
// Call kernel
kernelFunc(dynamic_cast<const BatchNorm_Op<2>&>(mOp).getStaticAttributes(),
mOp.getInput(0)->dims<4>(),
mOp.getInput(0)->getImpl()->rawPtr(),
mOp.getInput(1)->getImpl()->rawPtr(),
mOp.getInput(2)->getImpl()->rawPtr(),
mOp.getInput(3)->getImpl()->rawPtr(),
mOp.getInput(4)->getImpl()->rawPtr(),
mOp.getOutput(0)->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(3))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(4))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr(),
true);
}
......@@ -10,44 +10,82 @@
********************************************************************************/
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <vector>
#include "aidge/operator/Concat.hpp"
#include "aidge/utils/Types.h"
#include "aidge/data/Data.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/backend/cpu/operator/ConcatImpl.hpp"
#include "aidge/backend/cpu/operator/ConcatImpl_forward_kernels.hpp"
Aidge::NbElts_t Aidge::ConcatImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
// this implementation can be in-place
Aidge::NbElts_t Aidge::ConcatImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
assert(mOp.getRawInput(inputIdx) && "requires valid input");
// Requires the whole tensors
const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(inputIdx))->dims();
return std::accumulate(inputDims.begin(), inputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::ConcatImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
// for the direct convolution algorithm, convolutions can be in-place, if there is no padding!
return 0;
}
void Aidge::ConcatImpl_cpu::forward() {
for (std::size_t i = 0; i < dynamic_cast<const Concat_Op&>(mOp).mNbIn; ++i) {
assert(mOp.getInput(i) && ("missing input #"+std::to_string(i)).c_str());
Aidge::NbElts_t Aidge::ConcatImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const {
// Requires the whole tensors, regardless of available data on inputs
assert(outputIdx == 0 && "operator has only one output");
(void) outputIdx;
const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims();
return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::ConcatImpl_cpu::getNbConsumedData(const Aidge::IOIndex_t inputIdx) const {
assert(inputIdx < mNbConsumedData.size());
return mNbConsumedData[inputIdx];
}
Aidge::NbElts_t Aidge::ConcatImpl_cpu::getNbProducedData(const Aidge::IOIndex_t outputIdx) const {
assert(outputIdx < mNbProducedData.size());
return mNbProducedData[outputIdx];
}
void Aidge::ConcatImpl_cpu::updateConsummerProducer() {
for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx)
mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass
mNbProducedData[0]+= getRequiredMemory(0, {});
}
void Aidge::ConcatImpl_cpu::forward() {
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input in Concat operator");
DataType datatypeFirstInput = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType();
for (IOIndex_t i = 1; i < mOp.nbInputs(); ++i) {
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(i)) && "missing input in Concat operator");
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->dataType() == datatypeFirstInput);
}
Concat_Op::Attrs attr = dynamic_cast<const Concat_Op&>(mOp).getStaticAttributes();
std::size_t axisIdx = static_cast<const int&>(std::get<0>(attr));
assert(mOp.getInput(0)->nbDims() > axisIdx && ("input dim must be bigger than "+std::to_string(axisIdx)).c_str());
auto kernelFunc = Registrar<ConcatImplForward_cpu>::create({
mOp.getInput(0)->dataType(),
mOp.getOutput(0)->dataType()});
std::vector<void*> inputTensors;
std::vector<std::size_t> dimsOnAxis;
for (std::size_t i = 0; i < dynamic_cast<const Concat_Op&>(mOp).mNbIn; ++i) {
inputTensors.push_back(mOp.getInput(i)->getImpl()->rawPtr());
dimsOnAxis.push_back(mOp.getInput(i)->dims()[axisIdx]);
datatypeFirstInput,
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
std::vector<const void*> opInputs;
std::vector<DimSize_t> opInputAxis;
for (IOIndex_t i = 0; i < mOp.nbInputs(); ++i) {
opInputs.push_back(std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->getImpl()->rawPtr());
opInputAxis.push_back(std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->dims()[dynamic_cast<const Concat_Op&>(mOp).template getAttr<DimSize_t>("Axis")]);
}
// Call kernel
kernelFunc(axisIdx,
mOp.getInput(0)->dims(),
dimsOnAxis,
inputTensors,
mOp.getOutput(0)->getImpl()->rawPtr());
kernelFunc(dynamic_cast<const Concat_Op&>(mOp).getStaticAttributes(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
opInputAxis,
opInputs,
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
}
void Aidge::ConcatImpl_cpu::backward() { printf("Not implemented yet.\n"); }
......@@ -27,19 +27,23 @@ Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbRequiredProtected(IOIndex_t
}
void Aidge::ConvDepthWiseImpl2D_cpu::forward() {
assert(mOp.getInput(0) && "missing input #0");
assert(mOp.getInput(1) && "missing input #1");
assert(mOp.getInput(2) && "missing input #2");
assert(mOp.getRawInput(0) && "missing input #0");
assert(mOp.getRawInput(1) && "missing input #1");
assert(mOp.getRawInput(2) && "missing input #2");
assert((mOp.getInput(0)->nbDims() == 4) && "support for 4-dimensions tensors only");
assert((std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->nbDims() == 4) && "support for 4-dimensions tensors only");
// Find the correct kernel type
auto kernelFunc =
Registrar<ConvDepthWiseImpl2DForward_cpu>::create({mOp.getInput(0)->dataType(), mOp.getInput(1)->dataType(),
mOp.getInput(2)->dataType(), mOp.getOutput(0)->dataType()});
Registrar<ConvDepthWiseImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
// Call kernel
kernelFunc(dynamic_cast<const ConvDepthWise_Op<2>&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(),
mOp.getInput(0)->getImpl()->rawPtr(), mOp.getInput(1)->getImpl()->rawPtr(),
mOp.getInput(2)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr());
kernelFunc(dynamic_cast<const ConvDepthWise_Op<2>&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
}
......@@ -28,17 +28,19 @@ Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputI
void Aidge::ConvImpl2D_cpu::forward() {
// FIXME: uncomment the following code once memory handling will work
assert(mOp.getInput(0) && "missing input #0");
assert(mOp.getInput(1) && "missing input #1");
assert(mOp.getInput(2) && "missing input #2");
assert(mOp.getRawInput(0) && "missing input #0");
assert(mOp.getRawInput(1) && "missing input #1");
assert(mOp.getRawInput(2) && "missing input #2");
// Find the correct kernel type
auto kernelFunc =
Registrar<ConvImpl2DForward_cpu>::create({mOp.getInput(0)->dataType(), mOp.getInput(1)->dataType(),
mOp.getInput(2)->dataType(), mOp.getOutput(0)->dataType()});
Registrar<ConvImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
// Call kernel
kernelFunc(dynamic_cast<const Conv_Op<2>&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(),
mOp.getInput(0)->getImpl()->rawPtr(), mOp.getInput(1)->getImpl()->rawPtr(),
mOp.getInput(2)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr());
kernelFunc(dynamic_cast<const Conv_Op<2>&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->getImpl()->rawPtr(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
}
......@@ -27,25 +27,16 @@ Aidge::NbElts_t Aidge::DivImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_
}
void Aidge::DivImpl_cpu::forward() {
assert(mOp.getInput(0) && "missing input #0");
assert(mOp.getInput(1) && "missing input #1");
assert(((mOp.getInput(1)->size() == 1) ||
(mOp.getInput(1)->size() == mOp.getInput(0)->size()) ||
(mOp.getInput(1)->nbDims() == 1 && mOp.getInput(1)->size() == mOp.getInput(0)->dims()[mOp.getInput(0)->nbDims()-1])
) &&
"input #1 must either be a tensor of size 1, the number of channels of input # or the same size of input #0");
// Find the correct kernel type
auto kernelFunc = Registrar<DivImplForward_cpu>::create({
mOp.getInput(0)->dataType(),
mOp.getInput(1)->dataType(),
mOp.getOutput(0)->dataType()});
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
// Call kernel
kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
std::static_pointer_cast<Tensor>(mOp.getInput(1))->size(),
mOp.getInput(0)->getImpl()->rawPtr(),
mOp.getInput(1)->getImpl()->rawPtr(),
mOp.getOutput(0)->getImpl()->rawPtr());
kernelFunc(std::static_pointer_cast<Tensor>(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)))->size(),
std::static_pointer_cast<Tensor>(std::static_pointer_cast<Tensor>(mOp.getRawInput(1)))->size(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
}
......@@ -27,15 +27,14 @@ Aidge::NbElts_t Aidge::ErfImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_
}
void Aidge::ErfImpl_cpu::forward() {
assert(mOp.getInput(0) && "missing input #0");
// Find the correct kernel type
auto kernelFunc = Registrar<ErfImplForward_cpu>::create({
mOp.getInput(0)->dataType(),
mOp.getOutput(0)->dataType()});
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
// Call kernel
kernelFunc(mOp.getInput(0)->size(),
mOp.getInput(0)->getImpl()->rawPtr(),
mOp.getOutput(0)->getImpl()->rawPtr());
kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
}
......@@ -23,23 +23,23 @@
void Aidge::FCImpl_cpu::forward()
{
assert(mOp.getInput(0) && "missing input #0");
assert(mOp.getInput(1) && "missing input #1");
assert(mOp.getInput(2) && "missing input #2");
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(1)) && "missing input #1");
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(2)) && "missing input #2");
// Find the correct kernel type
auto kernelFunc = Registrar<FCImplForward_cpu>::create(
{mOp.getInput(0)->dataType(),
mOp.getInput(1)->dataType(),
mOp.getInput(2)->dataType(),
mOp.getOutput(0)->dataType()});
{std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
// Call kernel
// if (mOp.getInput(0)->nbDims() == 4) {
// if (std::static_pointer_cast<Tensor>(mOp.getRawInput(0)->nbDims() == 4) {
// kernelFunc(
// mOp.getStaticAttributes(),
// std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(),
// mOp.getInput(0)->getImpl()->rawPtr(),
// std::static_pointer_cast<Tensor>(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(),
// std::static_pointer_cast<Tensor>(mOp.getRawInput(0)->getImpl()->rawPtr(),
// mOp.mInputs[1]->getImpl()->rawPtr(),
// mOp.mInputs[2]->getImpl()->rawPtr(),
// mOp.getOutput(0)->getImpl()->rawPtr());
......@@ -47,10 +47,10 @@ void Aidge::FCImpl_cpu::forward()
// else
kernelFunc(
dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(),
mOp.getInput(0)->dims()[0],
mOp.getInput(0)->sizeM1(),
mOp.getInput(0)->getImpl()->rawPtr(),
mOp.getInput(1)->getImpl()->rawPtr(),
mOp.getInput(2)->getImpl()->rawPtr(),
mOp.getOutput(0)->getImpl()->rawPtr());
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0],
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->sizeM1(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
}
......@@ -27,23 +27,19 @@ Aidge::NbElts_t Aidge::GatherImpl_cpu::getNbRequiredProtected(const Aidge::IOInd
}
void Aidge::GatherImpl_cpu::forward() {
assert(mOp.getInput(0) && "missing input #0");
assert(mOp.getInput(1) && "missing input #1");
assert((mOp.getInput(0)->nbDims() == 2 && mOp.getInput(1)->nbDims() == 2 )&& "only 2D tensors are supported");
Gather_Op::Attrs attr = dynamic_cast<const Gather_Op&>(mOp).getStaticAttributes();
const int& axisIdx = static_cast<const int&>(std::get<0>(attr));
assert(mOp.getInput(0)->nbDims() > 1);// > axisIdx && "input dim must be bigger than "+std::to_strint(axisIdx)
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->nbDims() > 1);// > axisIdx && "input dim must be bigger than "+std::to_strint(axisIdx)
auto kernelFunc = Registrar<GatherImplForward_cpu>::create({
mOp.getInput(0)->dataType(),
mOp.getOutput(0)->dataType()});
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
// Call kernel
kernelFunc(axisIdx,
mOp.getInput(0)->dims(),
mOp.getInput(1)->dims(),
mOp.getInput(0)->getImpl()->rawPtr(),
mOp.getInput(1)->getImpl()->rawPtr(),
mOp.getOutput(0)->getImpl()->rawPtr());
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment