Skip to content
Snippets Groups Projects
Commit 0703a9a0 authored by Maxence Naud's avatar Maxence Naud
Browse files

Merge branch 'tiling' into 'master'

Tiling

See merge request !23
parents fafa52d8 a6d0293d
No related branches found
No related tags found
1 merge request!23Tiling
Pipeline #35164 passed
......@@ -9,13 +9,12 @@
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H
#define AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H
#ifndef AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H_
#include <cmath>
#include <cstddef>
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/ScalingImpl.hpp"
//TODO : improve propagate, n2d2 :
......@@ -61,12 +60,13 @@ const O& clamp(const O& x, const O& min, const O& max)
}
template<class O>
O saturate(O value, std::size_t quantizedNbBits, bool isOutputUnsigned) {
O saturate(const O value, const std::size_t quantizedNbBits, const bool isOutputUnsigned) {
// TODO: no assertions in kernel
assert(quantizedNbBits > 0);
const O min = isOutputUnsigned?0:
const O min = isOutputUnsigned ? 0 :
-(1ll << (quantizedNbBits - 1ll));
const O max = isOutputUnsigned?(1ll << quantizedNbBits) - 1ll:
const O max = isOutputUnsigned ? (1ll << quantizedNbBits) - 1ll :
(1ll << (quantizedNbBits - 1ll)) - 1ll;
return clamp(value, min, max);
......@@ -81,8 +81,8 @@ void ScalingImpl_cpu_forward_kernel(const Scaling_Op::Attrs& attrs,
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
const I& scalingFactor = static_cast<const I&>(std::get<0>(attrs));
std::size_t quantizedNbBits = static_cast<std::size_t>(std::get<1>(attrs));
bool isOutputUnsigned = static_cast<bool>(std::get<2>(attrs));
const std::size_t quantizedNbBits = static_cast<std::size_t>(std::get<1>(attrs));
const bool isOutputUnsigned = static_cast<bool>(std::get<2>(attrs));
for (std::size_t i = 0; i < inputLenght; ++i) {
output[i] = input[i] * scalingFactor;
......@@ -103,4 +103,4 @@ static Registrar<ScalingImplForward_cpu> registrarScalingImplForward_cpu_Float64
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H */
#endif /* AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H_ */
\ No newline at end of file
......@@ -26,95 +26,26 @@ namespace Aidge {
// class Slice_Op;
// compute kernel registry for forward and backward
template <DimIdx_t DIM>
class SliceImplForward_cpu
: public Registrable<SliceImplForward_cpu<DIM>, std::tuple<DataType>,
void(const typename Slice_Op<DIM>::Attrs&,
const std::array<std::size_t, DIM>,
: public Registrable<SliceImplForward_cpu, std::tuple<DataType>,
void(const typename Slice_Op::Attrs&,
const std::vector<std::size_t>,
const void*,
void*)> {};
template <DimIdx_t DIM>
class SliceImplBackward_cpu
: public Registrable<SliceImplBackward_cpu<DIM>, std::tuple<DataType>,
void(const typename Slice_Op<DIM>::Attrs&,
const std::array<std::size_t, DIM>,
: public Registrable<SliceImplBackward_cpu, std::tuple<DataType>,
void(const typename Slice_Op::Attrs&,
const std::vector<std::size_t>,
const void*,
void*)> {};
template <DimIdx_t DIM>
class SliceImpl_cpu : public OperatorImpl {
public:
SliceImpl_cpu(const Slice_Op<DIM>& op) : OperatorImpl(op) {}
static std::unique_ptr<SliceImpl_cpu<DIM>> create(const Slice_Op<DIM>& op) {
return std::make_unique<SliceImpl_cpu<DIM>>(op);
}
public:
NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final {
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "requires valid input");
// Requires the whole tensors
const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims();
return std::accumulate(inputDims.begin(), inputDims.end(), static_cast<NbElts_t>(1),
std::multiplies<NbElts_t>());
}
NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final { return 0; }
NbElts_t getRequiredMemory(const IOIndex_t outputIdx,
const std::vector<DimSize_t>& inputsSize) const override final {
(void)outputIdx;
(void)inputsSize;
const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims();
return std::accumulate(outputDims.begin(), outputDims.end(), static_cast<NbElts_t>(1),
std::multiplies<NbElts_t>());
}
NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final {
return mNbConsumedData[0];
}
NbElts_t getNbProducedData(const IOIndex_t /*outputIdx*/) const override final {
return mNbProducedData[0];
}
void updateConsummerProducer() override final {
// each input is consumed by the minimum amount for a forward pass
mNbConsumedData[0] += getNbRequiredData(0);
mNbProducedData[0] += getRequiredMemory(0, {});
}
void forward() override {
// FIXME: uncomment the following code once memory handling will work
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
// Find the correct kernel type
auto kernelFunc = Registrar<SliceImplForward_cpu<DIM>>::create(
{std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType()});
// Call kernel
kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<DIM>(),
std::get<1>(std::static_pointer_cast<const Slice_Op<DIM>&>(mOp).getStaticAttributes()),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()
);
// each input is consumed by the minimum amount for a forward pass
mNbConsumedData[0] += getNbRequiredData(0);
mNbProducedData[0] += getRequiredMemory(0, {});
}
void backward() override { printf("Not implemented yet.\n"); }
};
/******************************************************************************/
template <>
class SliceImpl_cpu<1> : public OperatorImpl {
class SliceImpl_cpu : public OperatorImpl {
public:
SliceImpl_cpu(const Slice_Op<1>& op) : OperatorImpl(op) {}
SliceImpl_cpu(const Slice_Op& op) : OperatorImpl(op) {}
static std::unique_ptr<SliceImpl_cpu<1>> create(const Slice_Op<1>& op) {
return std::make_unique<SliceImpl_cpu<1>>(op);
static std::unique_ptr<SliceImpl_cpu> create(const Slice_Op& op) {
return std::make_unique<SliceImpl_cpu>(op);
}
public:
......@@ -127,89 +58,14 @@ public:
void updateConsummerProducer() override final;
void forward() override;
void backward() override;
};
/******************************************************************************/
template <>
class SliceImpl_cpu<2> : public OperatorImpl {
public:
SliceImpl_cpu(const Slice_Op<2>& op) : OperatorImpl(op) {}
static std::unique_ptr<SliceImpl_cpu<2>> create(const Slice_Op<2>& op) {
return std::make_unique<SliceImpl_cpu<2>>(op);
}
public:
NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final;
NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final;
NbElts_t getRequiredMemory(const IOIndex_t outputIdx,
const std::vector<DimSize_t>& inputsSize) const override final;
NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final;
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward() override;
void backward() override;
};
/******************************************************************************/
template <>
class SliceImpl_cpu<3> : public OperatorImpl {
public:
SliceImpl_cpu(const Slice_Op<3>& op) : OperatorImpl(op) {}
static std::unique_ptr<SliceImpl_cpu<3>> create(const Slice_Op<3>& op) {
return std::make_unique<SliceImpl_cpu<3>>(op);
}
public:
NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final;
NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final;
NbElts_t getRequiredMemory(const IOIndex_t outputIdx,
const std::vector<DimSize_t>& inputsSize) const override final;
NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final;
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward() override;
void backward() override;
};
/******************************************************************************/
template <>
class SliceImpl_cpu<4> : public OperatorImpl {
public:
SliceImpl_cpu(const Slice_Op<4>& op) : OperatorImpl(op) {}
static std::unique_ptr<SliceImpl_cpu<4>> create(const Slice_Op<4>& op) {
return std::make_unique<SliceImpl_cpu<4>>(op);
}
public:
NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final;
NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final;
NbElts_t getRequiredMemory(const IOIndex_t outputIdx,
const std::vector<DimSize_t>& inputsSize) const override final;
NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final;
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward() override;
void backward() override;
};
namespace {
static Registrar<Slice_Op<1>> registrarSliceImpl_1D_cpu("cpu", Aidge::SliceImpl_cpu<1>::create);
static Registrar<Slice_Op<2>> registrarSliceImpl_2D_cpu("cpu", Aidge::SliceImpl_cpu<2>::create);
static Registrar<Slice_Op<3>> registrarSliceImpl_3D_cpu("cpu", Aidge::SliceImpl_cpu<3>::create);
static Registrar<Slice_Op<4>> registrarSliceImpl_4D_cpu("cpu", Aidge::SliceImpl_cpu<4>::create);
static Registrar<Slice_Op> registrarSliceImpl_cpu("cpu", Aidge::SliceImpl_cpu::create);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H_ */
#endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H_ */
\ No newline at end of file
......@@ -15,46 +15,47 @@
#include "aidge/utils/Registrar.hpp"
#include "aidge/operator/Slice.hpp"
#include "aidge/backend/cpu/operator/SliceImpl.hpp"
#include <array>
#include <vector>
#include <cstddef>
#include "aidge/data/Data.hpp"
namespace Aidge {
template <class I, std::size_t DIM>
void SliceImpl_cpu_forward_kernel(const typename Slice_Op<DIM>::Attrs& attrs,
const std::array<std::size_t, DIM> inputDims,
template <class I>
void SliceImpl_cpu_forward_kernel(const typename Slice_Op::Attrs& attrs,
const std::vector<std::size_t> inputDims,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_) + std::get<0>(attrs);
I* output = static_cast<I*>(output_);
const std::array<std::size_t, DIM> slicedDims = std::get<1>(attrs);
const std::vector<std::size_t> slicedDims = std::get<1>(attrs);
const std::size_t nbDims = slicedDims.size();
// for inputDims = {4,5,5,3} & slicedDims = {3,2,2,1}, substractDims = {1,5,5,3}
std::array<std::size_t, DIM> substractedDims;
for (std::size_t i = 0; i < DIM; ++i) {
std::vector<std::size_t> substractedDims = std::vector<std::size_t>(nbDims);
for (std::size_t i = 0; i < nbDims; ++i) {
substractedDims[i] = inputDims[i] - slicedDims[i];
}
// for slicedDims = {3,2,2,1}, prodSlicedDims = {12,4,2,1}
std::array<std::size_t, DIM> prodSlicedDims;
std::array<std::size_t, DIM+1> prodInputDims;
prodSlicedDims[DIM - 1] = slicedDims[DIM - 1];
prodInputDims[DIM - 1] = inputDims[DIM - 1];
prodInputDims[DIM] = 1;
for (std::size_t i = 2; i <= DIM; ++i) {
prodSlicedDims[DIM - i] = prodSlicedDims[DIM - i + 1]*slicedDims[DIM - i];
prodInputDims[DIM - i] = prodInputDims[DIM - i + 1]*inputDims[DIM - i];
std::vector<std::size_t> prodSlicedDims = std::vector<std::size_t>(nbDims);
std::vector<std::size_t> prodInputDims = std::vector<std::size_t>(nbDims+1);
prodSlicedDims[nbDims - 1] = slicedDims[nbDims - 1];
prodInputDims[nbDims - 1] = inputDims[nbDims - 1];
prodInputDims[nbDims] = 1;
for (std::size_t i = 2; i <= nbDims; ++i) {
prodSlicedDims[nbDims - i] = prodSlicedDims[nbDims - i + 1]*slicedDims[nbDims - i];
prodInputDims[nbDims - i] = prodInputDims[nbDims - i + 1]*inputDims[nbDims - i];
}
std::size_t j = 0;
std::size_t i = 0;
for (; j < prodSlicedDims[0];) {
output[j] = input[i++];
++j;
for (std::size_t idx = DIM - 1; idx > 0; --idx) {
i += j % prodSlicedDims[idx] == 0 ? substractedDims[idx]*prodInputDims[idx+1] : 0;
++j;
for (std::size_t idx = nbDims - 1; idx > 0; --idx) {
i += j % prodSlicedDims[idx] == 0 ? substractedDims[idx]*prodInputDims[idx+1] : 0;
}
}
}
......@@ -62,37 +63,13 @@ void SliceImpl_cpu_forward_kernel(const typename Slice_Op<DIM>::Attrs& attrs,
namespace {
// DIM = 1
static Registrar<SliceImplForward_cpu<1>> registrarSliceImplForward_1D_cpu_Float32(
{DataType::Float32}, Aidge::SliceImpl_cpu_forward_kernel<float, 1>);
static Registrar<SliceImplForward_cpu<1>> registrarSliceImplForward_1D_cpu_Int32(
{DataType::Int32}, Aidge::SliceImpl_cpu_forward_kernel<int, 1>);
static Registrar<SliceImplForward_cpu<1>> registrarSliceImplForward_1D_cpu_Float64(
{DataType::Float64}, Aidge::SliceImpl_cpu_forward_kernel<double, 1>);
// DIM = 2
static Registrar<SliceImplForward_cpu<2>> registrarSliceImplForward_2D_cpu_Float32(
{DataType::Float32}, Aidge::SliceImpl_cpu_forward_kernel<float, 2>);
static Registrar<SliceImplForward_cpu<2>> registrarSliceImplForward_2D_cpu_Int32(
{DataType::Int32}, Aidge::SliceImpl_cpu_forward_kernel<int, 2>);
static Registrar<SliceImplForward_cpu<2>> registrarSliceImplForward_2D_cpu_Float64(
{DataType::Float64}, Aidge::SliceImpl_cpu_forward_kernel<double, 2>);
// DIM = 3
static Registrar<SliceImplForward_cpu<3>> registrarSliceImplForward_3D_cpu_Float32(
{DataType::Float32}, Aidge::SliceImpl_cpu_forward_kernel<float, 3>);
static Registrar<SliceImplForward_cpu<3>> registrarSliceImplForward_3D_cpu_Int32(
{DataType::Int32}, Aidge::SliceImpl_cpu_forward_kernel<int, 3>);
static Registrar<SliceImplForward_cpu<3>> registrarSliceImplForward_3D_cpu_Float64(
{DataType::Float64}, Aidge::SliceImpl_cpu_forward_kernel<double, 3>);
// DIM = 4
static Registrar<SliceImplForward_cpu<4>> registrarSliceImplForward_4D_cpu_Float32(
{DataType::Float32}, Aidge::SliceImpl_cpu_forward_kernel<float, 4>);
static Registrar<SliceImplForward_cpu<4>> registrarSliceImplForward_4D_cpu_Int32(
{DataType::Int32}, Aidge::SliceImpl_cpu_forward_kernel<int, 4>);
static Registrar<SliceImplForward_cpu<4>> registrarSliceImplForward_4D_cpu_Float64(
{DataType::Float64}, Aidge::SliceImpl_cpu_forward_kernel<double, 4>);
static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Float32(
{DataType::Float32}, Aidge::SliceImpl_cpu_forward_kernel<float>);
static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Int32(
{DataType::Int32}, Aidge::SliceImpl_cpu_forward_kernel<int>);
static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Float64(
{DataType::Float64}, Aidge::SliceImpl_cpu_forward_kernel<double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H_ */
#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ */
......@@ -22,231 +22,55 @@
#include <cassert>
#include <tuple>
Aidge::NbElts_t Aidge::SliceImpl_cpu<1>::getNbRequiredData(const Aidge::IOIndex_t /*inputIdx*/) const {
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "requires valid input");
// Requires the whole tensors
return std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<1>()[0];
}
Aidge::NbElts_t Aidge::SliceImpl_cpu<1>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { return 0; }
Aidge::NbElts_t Aidge::SliceImpl_cpu<1>::getRequiredMemory(const Aidge::IOIndex_t outputIdx,
const std::vector<Aidge::DimSize_t>& inputsSize) const {
(void)outputIdx;
(void)inputsSize;
return std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->template dims<1>()[0];
}
Aidge::NbElts_t Aidge::SliceImpl_cpu<1>::getNbConsumedData(const Aidge::IOIndex_t /*inputIdx*/) const {
return mNbConsumedData[0];
}
Aidge::NbElts_t Aidge::SliceImpl_cpu<1>::getNbProducedData(const Aidge::IOIndex_t /*outputIdx*/) const {
return mNbProducedData[0];
}
void Aidge::SliceImpl_cpu<1>::updateConsummerProducer() {
// each input is consumed by the minimum amount for a forward pass
mNbConsumedData[0] += getNbRequiredData(0);
mNbProducedData[0] += getRequiredMemory(0, {});
}
void Aidge::SliceImpl_cpu<1>::forward() {
// FIXME: uncomment the following code once memory handling will work
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
// Find the correct kernel type
auto kernelFunc = Registrar<SliceImplForward_cpu<1>>::create(
{std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType()});
// Call kernel
kernelFunc(dynamic_cast<const Slice_Op<1>&>(mOp).getStaticAttributes(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<1>(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()
);
// each input is consumed by the minimum amount for a forward pass
mNbConsumedData[0] += getNbRequiredData(0);
mNbProducedData[0] += getRequiredMemory(0, {});
}
void Aidge::SliceImpl_cpu<1>::backward() { printf("Not implemented yet.\n"); }
/////////////////////////////////////////////////////////////////////////
Aidge::NbElts_t Aidge::SliceImpl_cpu<2>::getNbRequiredData(const Aidge::IOIndex_t /*inputIdx*/) const {
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "requires valid input");
// Requires the whole tensors
const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<2>();
return inputDims[0]*inputDims[1];
}
Aidge::NbElts_t Aidge::SliceImpl_cpu<2>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { return 0; }
Aidge::NbElts_t Aidge::SliceImpl_cpu<2>::getRequiredMemory(const Aidge::IOIndex_t outputIdx,
const std::vector<Aidge::DimSize_t>& inputsSize) const {
(void)outputIdx;
(void)inputsSize;
const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->template dims<2>();
return outputDims[0]*outputDims[1];
}
Aidge::NbElts_t Aidge::SliceImpl_cpu<2>::getNbConsumedData(const Aidge::IOIndex_t /*inputIdx*/) const {
return mNbConsumedData[0];
}
Aidge::NbElts_t Aidge::SliceImpl_cpu<2>::getNbProducedData(const Aidge::IOIndex_t /*outputIdx*/) const {
return mNbProducedData[0];
}
void Aidge::SliceImpl_cpu<2>::updateConsummerProducer() {
// each input is consumed by the minimum amount for a forward pass
mNbConsumedData[0] += getNbRequiredData(0);
mNbProducedData[0] += getRequiredMemory(0, {});
}
void Aidge::SliceImpl_cpu<2>::forward() {
// FIXME: uncomment the following code once memory handling will work
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
// Find the correct kernel type
auto kernelFunc = Registrar<SliceImplForward_cpu<2>>::create(
{std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType()});
// Call kernel
kernelFunc(dynamic_cast<const Slice_Op<2>&>(mOp).getStaticAttributes(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<2>(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()
);
// each input is consumed by the minimum amount for a forward pass
mNbConsumedData[0] += getNbRequiredData(0);
mNbProducedData[0] += getRequiredMemory(0, {});
}
void Aidge::SliceImpl_cpu<2>::backward() { printf("Not implemented yet.\n"); }
////////////////////////////////////////////////////////////////////////////
Aidge::NbElts_t Aidge::SliceImpl_cpu<3>::getNbRequiredData(const Aidge::IOIndex_t /*inputIdx*/) const {
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "requires valid input");
// Requires the whole tensors
const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<3>();
return std::accumulate(inputDims.begin(), inputDims.end(), static_cast<NbElts_t>(1),
std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::SliceImpl_cpu<3>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { return 0; }
Aidge::NbElts_t Aidge::SliceImpl_cpu<3>::getRequiredMemory(const Aidge::IOIndex_t outputIdx,
const std::vector<Aidge::DimSize_t>& inputsSize) const {
(void)outputIdx;
(void)inputsSize;
const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->template dims<3>();
return std::accumulate(outputDims.begin(), outputDims.end(), static_cast<NbElts_t>(1),
std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::SliceImpl_cpu<3>::getNbConsumedData(const Aidge::IOIndex_t /*inputIdx*/) const {
return mNbConsumedData[0];
}
Aidge::NbElts_t Aidge::SliceImpl_cpu<3>::getNbProducedData(const Aidge::IOIndex_t /*outputIdx*/) const {
return mNbProducedData[0];
}
void Aidge::SliceImpl_cpu<3>::updateConsummerProducer() {
// each input is consumed by the minimum amount for a forward pass
mNbConsumedData[0] += getNbRequiredData(0);
mNbProducedData[0] += getRequiredMemory(0, {});
}
void Aidge::SliceImpl_cpu<3>::forward() {
// FIXME: uncomment the following code once memory handling will work
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
// Find the correct kernel type
auto kernelFunc = Registrar<SliceImplForward_cpu<3>>::create(
{std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType()});
// Call kernel
kernelFunc(dynamic_cast<const Slice_Op<3>&>(mOp).getStaticAttributes(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<3>(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()
);
// each input is consumed by the minimum amount for a forward pass
mNbConsumedData[0] += getNbRequiredData(0);
mNbProducedData[0] += getRequiredMemory(0, {});
}
void Aidge::SliceImpl_cpu<3>::backward() { printf("Not implemented yet.\n"); }
//////////////////////////////////////////////////////////////////////////////
Aidge::NbElts_t Aidge::SliceImpl_cpu<4>::getNbRequiredData(const Aidge::IOIndex_t /*inputIdx*/) const {
Aidge::NbElts_t Aidge::SliceImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t /*inputIdx*/) const {
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "requires valid input");
// Requires the whole tensors
const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>();
const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims();
return std::accumulate(inputDims.begin(), inputDims.end(), static_cast<NbElts_t>(1),
std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::SliceImpl_cpu<4>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { return 0; }
Aidge::NbElts_t Aidge::SliceImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { return 0; }
Aidge::NbElts_t Aidge::SliceImpl_cpu<4>::getRequiredMemory(const Aidge::IOIndex_t outputIdx,
const std::vector<Aidge::DimSize_t>& inputsSize) const {
Aidge::NbElts_t Aidge::SliceImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx,
const std::vector<Aidge::DimSize_t>& inputsSize) const {
(void)outputIdx;
(void)inputsSize;
const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->template dims<4>();
const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims();
return std::accumulate(outputDims.begin(), outputDims.end(), static_cast<NbElts_t>(1),
std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::SliceImpl_cpu<4>::getNbConsumedData(const Aidge::IOIndex_t /*inputIdx*/) const {
Aidge::NbElts_t Aidge::SliceImpl_cpu::getNbConsumedData(const Aidge::IOIndex_t /*inputIdx*/) const {
return mNbConsumedData[0];
}
Aidge::NbElts_t Aidge::SliceImpl_cpu<4>::getNbProducedData(const Aidge::IOIndex_t /*outputIdx*/) const {
Aidge::NbElts_t Aidge::SliceImpl_cpu::getNbProducedData(const Aidge::IOIndex_t /*outputIdx*/) const {
return mNbProducedData[0];
}
void Aidge::SliceImpl_cpu<4>::updateConsummerProducer() {
void Aidge::SliceImpl_cpu::updateConsummerProducer() {
// each input is consumed by the minimum amount for a forward pass
mNbConsumedData[0] += getNbRequiredData(0);
mNbProducedData[0] += getRequiredMemory(0, {});
}
void Aidge::SliceImpl_cpu<4>::forward() {
void Aidge::SliceImpl_cpu::forward() {
// FIXME: uncomment the following code once memory handling will work
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
// Find the correct kernel type
auto kernelFunc = Registrar<SliceImplForward_cpu<4>>::create(
auto kernelFunc = Registrar<SliceImplForward_cpu>::create(
{std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType()});
// Call kernel
kernelFunc(dynamic_cast<const Slice_Op<4>&>(mOp).getStaticAttributes(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()
kernelFunc(dynamic_cast<const Slice_Op&>(mOp).getStaticAttributes(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()
);
// each input is consumed by the minimum amount for a forward pass
......@@ -255,4 +79,4 @@ void Aidge::SliceImpl_cpu<4>::forward() {
mNbProducedData[0] += getRequiredMemory(0, {});
}
void Aidge::SliceImpl_cpu<4>::backward() { printf("Not implemented yet.\n"); }
\ No newline at end of file
void Aidge::SliceImpl_cpu::backward() { printf("Not implemented yet.\n"); }
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include <set>
#include "aidge/graph/GraphView.hpp"
#include "aidge/graph/OpArgs.hpp"
#include "aidge/operator/Conv.hpp"
#include "aidge/operator/ReLU.hpp"
#include "aidge/recipies/Recipies.hpp"
#include "aidge/scheduler/Scheduler.hpp"
#include "aidge/operator/Concat.hpp"
namespace Aidge {
TEST_CASE("[core/recipies] Tiling(transformation)", "[Tiling][Recipies]") {
SECTION("Transform a pre-generated GraphView") {
SECTION("Simple Node: Conv") {
std::shared_ptr<Node> myReLU = ReLU("myReLU");
std::shared_ptr<Node> myConv = Conv(3,4,{3,3}, "myconv");
std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> {
{
{
{{ 0, 1, 2},
{ 3, 4, 5},
{ 6, 7, 8}},
{{ 9, 10, 11},
{ 12, 13, 14},
{ 15, 16, 17}},
{{ 18, 19, 20},
{ 21, 22, 23},
{ 24, 25, 26}}
},
{
{{ 27, 28, 29},
{ 30, 31, 32},
{ 33, 34, 35}},
{{ 36, 37, 38},
{ 39, 40, 41},
{ 42, 43, 44}},
{{ 45, 46, 47},
{ 48, 49, 50},
{ 51, 52, 53}}
},
{
{{ 54, 55, 56},
{ 57, 58, 59},
{ 60, 61, 62}},
{{ 63, 64, 65},
{ 66, 67, 68},
{ 69, 70, 71}},
{{ 72, 73, 74},
{ 75, 76, 77},
{ 78, 79, 80}}
},
{
{{ 81, 82, 83},
{ 84, 85, 86},
{ 87, 88, 89}},
{{ 90, 91, 92},
{ 93, 94, 95},
{ 96, 97, 98}},
{{ 99, 100, 101},
{102, 103, 104},
{105, 106, 107}}
}
}
});
std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}});
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
{
{
{{ 0, 1, 2, 3, 4},
{ 5, 6, 7, 8, 9},
{ 10, 11, 12, 13, 14},
{ 15, 16, 17, 18, 19},
{ 20, 21, 22, 23, 24}},
{{ 25, 26, 27, 28, 29},
{ 30, 31, 32, 33, 34},
{ 35, 36, 37, 38, 39},
{ 40, 41, 42, 43, 44},
{ 45, 46, 47, 48, 49}},
{{ 50, 51, 52, 53, 54},
{ 55, 56, 57, 58, 59},
{ 60, 61, 62, 63, 64},
{ 65, 66, 67, 68, 69},
{ 70, 71, 72, 73, 74}}
},
{
{{ 75, 76, 77, 78, 79},
{ 80, 81, 82, 83, 84},
{ 85, 86, 87, 88, 89},
{ 90, 91, 92, 93, 94},
{ 95, 96, 97, 98, 99}},
{{100, 101, 102, 103, 104},
{105, 106, 107, 108, 109},
{110, 111, 112, 113, 114},
{115, 116, 117, 118, 119},
{120, 121, 122, 123, 124}},
{{125, 126, 127, 128, 129},
{130, 131, 132, 133, 134},
{135, 136, 137, 138, 139},
{140, 141, 142, 143, 144},
{145, 146, 147, 148, 149}}
}
}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> {
{
{
{{ 15226, 15577, 15928},
{ 16981, 17332, 17683},
{ 18736, 19087, 19438}},
{{ 37818, 38898, 39978},
{ 43218, 44298, 45378},
{ 48618, 49698, 50778}},
{{ 60426, 62235, 64044},
{ 69471, 71280, 73089},
{ 78516, 80325, 82134}},
{{ 83016, 85554, 88092},
{ 95706, 98244, 100782},
{108396, 110934, 113472}}
},
{
{{ 41551, 41902, 42253},
{ 43306, 43657, 44008},
{ 45061, 45412, 45763}},
{{118818, 119898, 120978},
{124218, 125298, 126378},
{129618, 130698, 131778}},
{{196101, 197910, 199719},
{205146, 206955, 208764},
{214191, 216000, 217809}},
{{273366, 275904, 278442},
{286056, 288594, 291132},
{298746, 301284, 303822}}
}
}
});
myReLU->getOperator()->associateInput(0, myInput);
myReLU->addChild(myConv, 0, 0);
myConv->getOperator()->setInput(1, myWeights);
myConv->getOperator()->setInput(2, myBias);
std::dynamic_pointer_cast<Conv_Op<2>>(myConv->getOperator())->computeOutputDims();
std::shared_ptr<GraphView> g = std::make_shared<GraphView>();
g->add({myReLU, myConv});
g->compile("cpu", DataType::Int32);
std::set<std::shared_ptr<Node>> tiledConv = getConvHorizontalTiling(myConv, 2, 3);
SequentialScheduler s(g);
s.forward();
REQUIRE(*(std::dynamic_pointer_cast<Conv_Op<2>>(myConv->getOperator())->getOutput(0)) == *myOutput);
GraphView::replace({myConv, myConv->getParent(1), myConv->getParent(2)}, tiledConv);
g->compile("cpu", DataType::Int32);
s.resetScheduling();
s.forward();
REQUIRE(*(std::dynamic_pointer_cast<OperatorTensor>((*g->outputNodes().begin())->getOperator())->getOutput(0)) == *myOutput);
}
}
}
}
// std::shared_ptr<GraphView> g = Sequential({
// Conv(3, 16, {3,3}, "conv1"),
// ReLU("relu1"),
// Conv(16, 32, {1,1}, "conv2"),
// Conv(32, 16, {1,1}, "conv3"),
// Conv(16, 10, {3,3}, "conv4"),
// ReLU("relu2")
// });
// for (auto& individualConv : g->match("Conv")) {
// auto tiledConv = horizontalTiling(individualConv);
// g->replace(individualConv, tiledConv);
// }
// }
// SECTION("Create the GraphView with tiled layers") {
// std::shared_ptr<GraphView> g;
// g->addChild(horizontalTiling(Conv()))
// }
// }
// } // namespace Aidge
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment