Skip to content
Snippets Groups Projects
Commit f43bebf7 authored by Vincent Templier's avatar Vincent Templier
Browse files

Add other operators in CPU library

parent 41e0f1b4
No related merge requests found
#ifndef ConvImpl2D_ref_cpp_H_
#define ConvImpl2D_ref_cpp_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "backend/OperatorImpl.hpp"
#include "operator/Conv.hpp"
#include "utils/Registrar.hpp"
#include "utils/Types.h"
namespace Aidge {
// class Conv_Op;
// compute kernel registry for forward and backward
class ConvImpl2DForward_ref_cpp
: public Registrable<std::tuple<DataType, DataType, DataType, DataType>,
void(const Conv_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *,
const void *, const void *, void *)> {};
class ConvImpl2DBackward_ref_cpp
: public Registrable<std::tuple<DataType, DataType, DataType, DataType>,
void(const Conv_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *,
const void *, const void *, void *)> {};
class ConvImpl2D_ref_cpp : public OperatorImpl {
private:
const Conv_Op<2> &mOp;
std::array<NbElts_t, 3> mNbConsumedData;
std::array<NbElts_t, 1> mNbProducedData;
public:
ConvImpl2D_ref_cpp(const Conv_Op<2> &op) : mOp(op), mNbConsumedData({0, 0, 0}), mNbProducedData({0}) {}
static std::unique_ptr<ConvImpl2D_ref_cpp> create(const Conv_Op<2> &op) {
return std::make_unique<ConvImpl2D_ref_cpp>(op);
}
public:
NbElts_t getNbRequiredData(IOIndex_t inputIdx) const override final;
NbElts_t getNbRequiredProtected(IOIndex_t inputIdx) const override final;
NbElts_t getRequiredMemory(IOIndex_t outputIdx, const std::vector<DimSize_t> &inputsSize) const override final;
NbElts_t getNbConsumedData(IOIndex_t inputIdx) const override final;
NbElts_t getNbProducedData(IOIndex_t outputIdx) const override final;
void forward();
void backward();
};
namespace {
// add ref_cpp backend to Conv_Op<2> implementation registry
static Registrar<Conv_Op<2>> registrarConvImpl2D_ref_cpp("cpu", Aidge::ConvImpl2D_ref_cpp::create);
} // namespace
} // namespace Aidge
#endif /* ConvImpl2D_ref_cpp_H_ */
\ No newline at end of file
#ifndef ConvImpl2D_ref_cpp_forward_kernel_H_
#define ConvImpl2D_ref_cpp_forward_kernel_H_
#include "utils/Registrar.hpp"
#include "operator/ConvImpl.hpp"
#include "utils/Types.h"
#include <array>
#include <algorithm>
namespace Aidge {
/**
* @brief Forward kernel for 2D Convolution on CPU backend.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Parameters from the Operator
* @param dims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvImpl2D_ref_cpp_forward_kernel(const Conv_Op<2>::Parameters &params, const std::array<DimSize_t, 4> &dims,
const void *input_, const void *weights_, const void *biases_, void *output_) {
// FIXME: missing convolution parameters as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
/*
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(params)[0] + std::get<0>(params)[0]) /
static_cast<float>(std::get<0>(params)[0]));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(params)[1] + std::get<0>(params)[1]) /
static_cast<float>(std::get<0>(params)[1]));
// TODO: kernel computation
// output (Xout, Yout, outCh, batch)
// input (Xin, Yin, inCh, batch)
// weight (kernelX, kernelY, inCh, outCh)
// does not take Dilation parameter into account
for (std::size_t ox = 0; ox < oxSize; ++ox) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
const std::size_t ix = ox * std::get<0>(params)[0];
const std::size_t iy = oy * std::get<0>(params)[1];
for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) {
const std::size_t oIndex = dims[3] * (outCh + std::get<3>(params) * (oy + oySize * ox));
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
for (std::size_t batch = 0; batch < dims[3]; ++batch) {
output[oIndex + batch] = biasVal;
}
for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
for (std::size_t sx = 0; sx < std::get<4>(params)[0]; ++sx) {
for (std::size_t sy = 0; sy < std::get<4>(params)[1]; ++sy) {
const std::size_t wIndex =
outCh + std::get<3>(params) * (inCh + dims[2] * (sy + std::get<4>(params)[1] * sx));
std::size_t iIndex = dims[3] * (inCh + dims[2] * ((iy + sy) + dims[1] * (ix + sx)));
for (std::size_t batch = 0; batch < dims[3]; ++batch) {
output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
}
}
}
}
}
}
}
*/
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(static_cast<float>(dims[2] - std::get<4>(params)[0] + std::get<0>(params)[0]) /
static_cast<float>(std::get<0>(params)[0]));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(static_cast<float>(dims[3] - std::get<4>(params)[1] + std::get<0>(params)[1]) /
static_cast<float>(std::get<0>(params)[1]));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, inCh, Xin, Yin)
// weight (outCh, inCh, kernelX, kernelY)
// does not take Dilation parameter into account
for (std::size_t batch = 0; batch < dims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) {
const std::size_t oIndex = (outCh + batch*std::get<3>(params)) * oxSize * oySize;
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
for (std::size_t inCh = 0; inCh < dims[1]; ++inCh) {
const std::size_t iIndex = (inCh + batch*dims[1]) * dims[2] * dims[3];
const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(params)[0] * std::get<4>(params)[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const std::size_t ix = ox * std::get<0>(params)[0];
const std::size_t iy = oy * std::get<0>(params)[1];
for (std::size_t sx = 0; sx < std::get<4>(params)[0]; ++sx) {
for (std::size_t sy = 0; sy < std::get<4>(params)[1]; ++sy) {
output[oIndexFull] += weights[wIndex + sx*std::get<4>(params)[1] + sy] *
input[iIndex + (ix+sx)*dims[3] + (iy+sy)];
}
}
}
}
}
}
}
}
namespace {
static Registrar<ConvImpl2DForward_ref_cpp> registrarConvImpl2DForward_ref_cpp_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::ConvImpl2D_ref_cpp_forward_kernel<float, float, float, float>);
static Registrar<ConvImpl2DForward_ref_cpp> registrarConvImpl2DForward_ref_cpp_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::ConvImpl2D_ref_cpp_forward_kernel<int, int, int, int>);
static Registrar<ConvImpl2DForward_ref_cpp> registrarConvImpl2DForward_ref_cpp_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::ConvImpl2D_ref_cpp_forward_kernel<double, double, double, double>);
} // namespace
} // namespace Aidge
#endif /* ConvImpl2D_ref_cpp_forward_kernel_H_ */
#ifndef FCImpl_ref_cpp_H_
#define FCImpl_ref_cpp_H_
#include "backend/OperatorImpl.hpp"
#include "operator/FC.hpp"
#include "utils/Registrar.hpp"
#include "utils/Types.h"
#include <memory>
#include <vector>
#include <array>
namespace Aidge {
// class FC_Op;
// compute kernel registry for forward and backward
class FCImplForward_ref_cpp : public Registrable<std::tuple<DataType, DataType, DataType, DataType>,
void(const FC_Op::Parameters &, const DimSize_t, const DimSize_t,
const void *, const void *, const void *, void *)> {};
class FCImplBackward_ref_cpp : public Registrable<std::tuple<DataType, DataType, DataType, DataType>,
void(const FC_Op::Parameters &, const DimSize_t, const DimSize_t,
const void *, const void *, const void *, void *)> {};
class FCImpl_ref_cpp : public OperatorImpl {
private:
const FC_Op &mOp;
std::array<NbElts_t, 3> mNbConsumedData;
std::array<NbElts_t, 1> mNbProducedData;
public:
FCImpl_ref_cpp(const FC_Op &op) : mOp(op), mNbConsumedData({0, 0, 0}), mNbProducedData({0}) {}
static std::unique_ptr<FCImpl_ref_cpp> create(const FC_Op &op) { return std::make_unique<FCImpl_ref_cpp>(op); }
public:
NbElts_t getNbRequiredData(IOIndex_t inputIdx) const override final;
NbElts_t getNbRequiredProtected(IOIndex_t inputIdx) const override final;
NbElts_t getRequiredMemory(IOIndex_t outputIdx, const std::vector<DimSize_t> &inputsSize) const override final;
NbElts_t getNbConsumedData(IOIndex_t inputIdx) const override final;
NbElts_t getNbProducedData(IOIndex_t outputIdx) const override final;
void forward();
void backward();
};
namespace {
static Registrar<FC_Op> registrarFCImpl_ref_cpp("cpu", Aidge::FCImpl_ref_cpp::create);
}
} // namespace Aidge
#endif /* FCImpl_ref_cpp_H_ */
\ No newline at end of file
#ifndef FCImpl_ref_cpp_forward_kernel_H_
#define FCImpl_ref_cpp_forward_kernel_H_
#include "utils/Registrar.hpp"
#include <algorithm>
#include "operator/FCImpl.hpp"
namespace Aidge {
// template <class I, class W, class B, class O>
// void FCImpl_ref_cpp_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 4>& dims,
// const void* input_, const void* weights_, const void* biases_, void* output_) {
// // FIXME: missing FC parameters as arguments
// const I* input = static_cast<const I*>(input_);
// const W* weights = static_cast<const W*>(weights_);
// const B* biases = static_cast<const B*>(biases_);
// O* output = static_cast<O*>(output_);
// for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) {
// std::size_t oIndex = outIdx * dims[3];
// const B bias = std::get<1>(params) ? B(0) : biases[outIdx];
// for (std::size_t batch = 0; batch < dims[3]; ++batch) {
// output[oIndex + batch] = bias;
// }
// }
// for (std::size_t ix = 0; ix < dims[0]; ++ix) {
// for (std::size_t iy = 0; iy < dims[1]; ++iy) {
// for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
// const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix));
// for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) {
// const std::size_t oIndex = dims[3] * outCh;
// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(params) +
// outCh; // (iIndex*std::get<0>(params) + oIndex)/dims[3];
// for (std::size_t batch = 0; batch < dims[3]; ++batch) {
// output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
// }
// }
// }
// }
// }
// }
// template <class I, class W, class B, class O>
// void FCImpl_ref_cpp_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 2>& dims,
// const void* input_, const void* weights_, const void* biases_, void* output_) {
// // FIXME: missing FC parameters as arguments
// const I* input = static_cast<const I*>(input_);
// const W* weights = static_cast<const W*>(weights_);
// const B* biases = static_cast<const B*>(biases_);
// O* output = static_cast<O*>(output_);
// // let's have I.dims() = [N, C, H, W] instead of [H, W, C, N]
// for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) {
// std::size_t oIndex = outIdx * dims[0];
// const B bias = std::get<1>(params) ? B(0) : biases[outIdx];
// for (std::size_t batch = 0; batch < dims[0]; ++batch) {
// output[oIndex + batch] = bias;
// }
// }
// for (std::size_t batch = 0; batch < dims[0]; ++batch) {
// const std::size_t oIndex = dims[1] * batch;
// for (std::size_t i = 0; i < dims[1]; ++i) {
// for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) {
// std::size_t wIndex = i * std::get<0>(params) + outCh; // (iIndex*std::get<0>(params) + oIndex)/dims[3];
// output[oIndex + outCh] += weights[wIndex] * input[i + batch];
// }
// }
// }
// }
template <class I, class W, class B, class O>
void FCImpl_ref_cpp_forward_kernel(const FC_Op::Parameters& params, const DimSize_t batchSize, const DimSize_t oneInputSize,
const void* input_, const void* weights_, const void* biases_, void* output_) {
// FIXME: missing FC parameters as arguments
const I* input = static_cast<const I*>(input_);
const W* weights = static_cast<const W*>(weights_);
const B* biases = static_cast<const B*>(biases_);
O* output = static_cast<O*>(output_);
if (std::get<1>(params)) {
std::fill(output, output+(batchSize*std::get<0>(params)), B(0));
}
else {
for (std::size_t batch = 0; batch < batchSize; ++batch) {
std::copy(biases, biases+std::get<0>(params), output+(batch*std::get<0>(params)));
}
}
for (std::size_t batch = 0; batch < batchSize; ++batch) {
for (std::size_t out = 0; out < std::get<0>(params); ++out) {
output[out + batch*std::get<0>(params)] = std::inner_product(input + batch*oneInputSize,
input + (batch + 1)*oneInputSize,
weights + out*oneInputSize,
output[out + batch*std::get<0>(params)]);
}
}
}
namespace {
static Registrar<FCImplForward_ref_cpp> registrarFCImpl2DForward_ref_cpp_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::FCImpl_ref_cpp_forward_kernel<float, float, float, float>);
static Registrar<FCImplForward_ref_cpp> registrarFCImpl2DForward_ref_cpp_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::FCImpl_ref_cpp_forward_kernel<int, int, int, int>);
static Registrar<FCImplForward_ref_cpp> registrarFCImpl2DForward_ref_cpp_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::FCImpl_ref_cpp_forward_kernel<double, double, double, double>);
} // namespace
} // namespace Aidge
#endif /* FCImpl_ref_cpp_forward_kernel_H_ */
#ifndef ProducerImpl_ref_cpp_H_
#define ProducerImpl_ref_cpp_H_
#include "utils/Types.h"
#include "backend/OperatorImpl.hpp"
#include "operator/Producer.hpp"
#include "utils/Registrar.hpp"
#include <memory>
namespace Aidge {
template <DimIdx_t DIM>
class ProducerImpl_ref_cpp : public OperatorImpl {
private:
const Producer_Op<DIM> &mOp;
public:
ProducerImpl_ref_cpp(const Producer_Op<DIM> &op) : mOp(op) {}
static std::unique_ptr<ProducerImpl_ref_cpp> create(const Producer_Op<DIM> &op) {
return std::make_unique<ProducerImpl_ref_cpp>(op);
}
public:
NbElts_t getNbRequiredData(IOIndex_t inputIdx) const override final;
NbElts_t getNbRequiredProtected(IOIndex_t inputIdx) const override final;
NbElts_t getRequiredMemory(IOIndex_t outputIdx, const std::vector<DimSize_t> &inputsSize) const override final;
NbElts_t getNbConsumedData(IOIndex_t inputIdx) const override final;
NbElts_t getNbProducedData(IOIndex_t outputIdx) const override final;
void forward();
void backward();
};
namespace {
static Registrar<Producer_Op<1>> registrarProducer1DImpl_ref_cpp("cpu", Aidge::ProducerImpl_ref_cpp<1>::create);
static Registrar<Producer_Op<2>> registrarProducer2DImpl_ref_cpp("cpu", Aidge::ProducerImpl_ref_cpp<2>::create);
static Registrar<Producer_Op<3>> registrarProducer3DImpl_ref_cpp("cpu", Aidge::ProducerImpl_ref_cpp<3>::create);
static Registrar<Producer_Op<4>> registrarProducer4DImpl_ref_cpp("cpu", Aidge::ProducerImpl_ref_cpp<4>::create);
static Registrar<Producer_Op<5>> registrarProducer5DImpl_ref_cpp("cpu", Aidge::ProducerImpl_ref_cpp<5>::create);
} // namespace
} // namespace Aidge
#endif /* ProducerImpl_ref_cpp_H_ */
\ No newline at end of file
#ifndef ReLUImpl_ref_cpp_H_
#define ReLUImpl_ref_cpp_H_
#include "backend/OperatorImpl.hpp"
#include "operator/ReLU.hpp"
#include "utils/Registrar.hpp"
#include <memory>
#include <vector>
namespace Aidge {
// class ReLU_Op;
// compute kernel registry for forward and backward
class ReLUImplForward_ref_cpp
: public Registrable<std::tuple<DataType, DataType>, void(const ReLU_Op::Parameters&, std::size_t, const void*, void*)> {
};
class ReLUImplBackward_ref_cpp
: public Registrable<std::tuple<DataType, DataType>, void(const ReLU_Op::Parameters&, std::size_t, const void*, void*)> {
};
class ReLUImpl_ref_cpp : public OperatorImpl {
private:
const ReLU_Op& mOp;
std::array<NbElts_t, 1> mNbConsumedData;
std::array<NbElts_t, 1> mNbProducedData;
public:
ReLUImpl_ref_cpp(const ReLU_Op& op) : mOp(op), mNbConsumedData({0}), mNbProducedData({0}) {}
static std::unique_ptr<ReLUImpl_ref_cpp> create(const ReLU_Op& op) {
return std::make_unique<ReLUImpl_ref_cpp>(op);
}
public:
NbElts_t getNbRequiredData(IOIndex_t inputIdx) const override final;
NbElts_t getNbRequiredProtected(IOIndex_t inputIdx) const override final;
NbElts_t getRequiredMemory(IOIndex_t outputIdx, const std::vector<DimSize_t>& inputsSize) const override final;
NbElts_t getNbConsumedData(IOIndex_t inputIdx) const override final;
NbElts_t getNbProducedData(IOIndex_t outputIdx) const override final;
void forward();
void backward();
};
namespace {
static Registrar<ReLU_Op> registrarReLUImpl_ref_cpp("cpu", Aidge::ReLUImpl_ref_cpp::create);
}
} // namespace Aidge
#endif /* ReLUImpl_ref_cpp_H_ */
\ No newline at end of file
#ifndef ReLUImpl_ref_cpp_forward_kernel_H_
#define ReLUImpl_ref_cpp_forward_kernel_H_
#include "utils/Registrar.hpp"
#include "operator/ReLUImpl.hpp"
namespace Aidge {
template <class I, class O>
void ReLUImpl_ref_cpp_forward_kernel(const ReLU_Op::Parameters& params,
std::size_t inputLenght,
const void* input_,
void* output_) {
// FIXME: missing ReLU parameters as arguments
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
for (std::size_t i = 0; i < inputLenght; ++i) {
output[i] = input[i] >= 0 ? input[i] : input[i] * static_cast<I>(std::get<0>(params));
}
}
namespace {
static Registrar<ReLUImplForward_ref_cpp> registrarReLUImplForward_ref_cpp_Float32(
{DataType::Float32, DataType::Float32}, Aidge::ReLUImpl_ref_cpp_forward_kernel<float, float>);
static Registrar<ReLUImplForward_ref_cpp> registrarReLUImplForward_ref_cpp_Int32(
{DataType::Int32, DataType::Int32}, Aidge::ReLUImpl_ref_cpp_forward_kernel<int, int>);
static Registrar<ReLUImplForward_ref_cpp> registrarReLUImplForward_ref_cpp_Float64(
{DataType::Float64, DataType::Float64}, Aidge::ReLUImpl_ref_cpp_forward_kernel<double, double>);
} // namespace
} // namespace Aidge
#endif /* ReLUImpl_ref_cpp_forward_kernel_H_ */
#include "operator/ConvImpl.hpp"
#include <cassert>
#include <chrono>
#include <numeric>
#include <thread>
#include <vector>
#include "operator/ConvImpl_kernels.hpp"
#include "operator/Conv.hpp"
#include "utils/Types.h"
Aidge::NbElts_t Aidge::ConvImpl2D_ref_cpp::getNbRequiredData(Aidge::IOIndex_t inputIdx) const {
assert(mOp.getInput(inputIdx) && "requires valid input");
// Requires the whole tensors
const auto &inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims();
return std::accumulate(inputDims.begin(), inputDims.end(), Aidge::NbElts_t(1), std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::ConvImpl2D_ref_cpp::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
// for the direct convolution algorithm, convolutions can be in-place, if
// there is no padding!
return 0;
}
Aidge::NbElts_t Aidge::ConvImpl2D_ref_cpp::getRequiredMemory(Aidge::IOIndex_t outputIdx,
const std::vector<Aidge::DimSize_t> & /*inputsSize*/) const {
// Requires the whole tensors, regardless of available data on inputs
assert(outputIdx == 0 && "operator has only one output");
const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::ConvImpl2D_ref_cpp::getNbConsumedData(Aidge::IOIndex_t inputIdx) const {
assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size());
return mNbConsumedData[static_cast<std::size_t>(inputIdx)];
}
Aidge::NbElts_t Aidge::ConvImpl2D_ref_cpp::getNbProducedData(Aidge::IOIndex_t outputIdx) const {
assert((outputIdx == 0) && (static_cast<std::size_t>(outputIdx) < mNbProducedData.size()));
return mNbProducedData[static_cast<std::size_t>(outputIdx)];
}
void Aidge::ConvImpl2D_ref_cpp::forward() {
// FIXME: uncomment the following code once memory handling will work
assert(mOp.mInputs[0] && "missing input #0");
assert(mOp.mInputs[1] && "missing input #1");
assert(mOp.mInputs[2] && "missing input #2");
// Find the correct kernel type
auto kernelFunc =
Registrar<ConvImpl2DForward_ref_cpp>::create({mOp.mInputs[0]->dataType(), mOp.mInputs[1]->dataType(),
mOp.mInputs[2]->dataType(), mOp.mOutput->dataType()});
// Call kernel
kernelFunc(mOp.getParams(), std::static_pointer_cast<Tensor>(mOp.mInputs[0])->dims<4>(),
mOp.mInputs[0]->getImpl()->rawPtr(), mOp.mInputs[1]->getImpl()->rawPtr(),
mOp.mInputs[2]->getImpl()->rawPtr(), mOp.mOutput->getImpl()->rawPtr());
// FIXME: Dummy wait for some earlier scheduler tests
std::this_thread::sleep_for(std::chrono::milliseconds(mOp.get<ConvParam::OutChannels>()));
// Update producer-consumer data
for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx)
mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum
// amount for a forward pass
mNbProducedData[0] += getRequiredMemory(0, {});
}
void Aidge::ConvImpl2D_ref_cpp::backward() { printf("Not implemented yet.\n"); }
#include <cassert>
#include <chrono>
#include <numeric>
#include <thread>
#include <vector>
#include "operator/FC.hpp"
#include "operator/FCImpl.hpp"
#include "operator/FCImpl_kernels.hpp"
#include "utils/Types.h"
Aidge::NbElts_t Aidge::FCImpl_ref_cpp::getNbRequiredData(Aidge::IOIndex_t inputIdx) const
{
assert(mOp.getInput(inputIdx) && "requires valid input");
// Requires the whole tensors
const auto &inputDims
= std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims();
return std::accumulate(
inputDims.begin(),
inputDims.end(),
Aidge::NbElts_t(1),
std::multiplies<Aidge::NbElts_t>());
}
Aidge::NbElts_t
Aidge::FCImpl_ref_cpp::getNbRequiredProtected(Aidge::IOIndex_t /*inputIdx*/) const
{
// for the direct convolution algorithm, convolutions can be in-place, if
// there is no padding!
return 0;
}
Aidge::NbElts_t Aidge::FCImpl_ref_cpp::getRequiredMemory(
IOIndex_t outputIdx, const std::vector<DimSize_t> & /*inputsSize*/) const
{
// Requires the whole tensors, regardless of available data on inputs
assert(outputIdx == 0 && "operator has only one output");
const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
return std::accumulate(
outputDims.begin(),
outputDims.end(),
static_cast<NbElts_t>(1),
std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::FCImpl_ref_cpp::getNbConsumedData(Aidge::IOIndex_t inputIdx) const
{
assert((inputIdx >= 0) && (static_cast<IONb_t>(inputIdx) < mNbConsumedData.size()));
return mNbConsumedData[static_cast<std::size_t>(inputIdx)];
}
Aidge::NbElts_t Aidge::FCImpl_ref_cpp::getNbProducedData(Aidge::IOIndex_t outputIdx) const
{
assert(static_cast<std::size_t>(outputIdx) < mNbProducedData.size());
return mNbProducedData[static_cast<std::size_t>(outputIdx)];
}
void Aidge::FCImpl_ref_cpp::forward()
{
// FIXME: uncomment the following code once memory handling will work
assert(mOp.mInputs[0] && "missing input #0");
assert(mOp.mInputs[1] && "missing input #1");
assert(mOp.mInputs[2] && "missing input #2");
// Find the correct kernel type
auto kernelFunc = Registrar<FCImplForward_ref_cpp>::create(
{mOp.mInputs[0]->dataType(),
mOp.mInputs[1]->dataType(),
mOp.mInputs[2]->dataType(),
mOp.mOutput->dataType()});
// Call kernel
// if (mOp.mInputs[0]->nbDims() == 4) {
// kernelFunc(
// mOp.getParams(),
// std::static_pointer_cast<Tensor>(mOp.mInputs[0])->dims<4>(),
// mOp.mInputs[0]->getImpl()->rawPtr(),
// mOp.mInputs[1]->getImpl()->rawPtr(),
// mOp.mInputs[2]->getImpl()->rawPtr(),
// mOp.mOutput->getImpl()->rawPtr());
// }
// else
kernelFunc(
mOp.getParams(),
mOp.mInputs[0]->dims()[0],
mOp.mInputs[0]->sizeM1(),
mOp.mInputs[0]->getImpl()->rawPtr(),
mOp.mInputs[1]->getImpl()->rawPtr(),
mOp.mInputs[2]->getImpl()->rawPtr(),
mOp.mOutput->getImpl()->rawPtr());
// FIXME: Dummy wait for some earlier scheduler tests
std::this_thread::sleep_for(std::chrono::milliseconds(mOp.get<FCParam::OutChannels>()));
// Update producer-consumer data
for (IOIndex_t inputIdx = 0; static_cast<std::size_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx)
mNbConsumedData[inputIdx]
+= getNbRequiredData(static_cast<std::size_t>(inputIdx)); // each input is consumed by the minimum
// amount for a forward pass
mNbProducedData[0] += getRequiredMemory(0, {});
}
void Aidge::FCImpl_ref_cpp::backward()
{
printf("Not implemented yet.\n");
}
#include <cassert>
#include <numeric>
#include <vector>
#include "data/Tensor.hpp"
#include "operator/Producer.hpp"
#include "utils/Types.h"
#include "operator/ProducerImpl.hpp"
template<Aidge::DimIdx_t DIM>
std::size_t Aidge::ProducerImpl_ref_cpp<DIM>::getNbRequiredData(
Aidge::IOIndex_t /*inputIdx*/) const
{
return 0;
}
template<Aidge::DimIdx_t DIM>
Aidge::DimSize_t Aidge::ProducerImpl_ref_cpp<DIM>::getNbConsumedData(
Aidge::IOIndex_t /*inputIdx*/) const
{
return 0;
}
template<Aidge::DimIdx_t DIM>
std::size_t Aidge::ProducerImpl_ref_cpp<DIM>::getNbRequiredProtected(
Aidge::IOIndex_t /*inputIdx*/) const
{
return 0;
}
template<Aidge::DimIdx_t DIM>
std::size_t Aidge::ProducerImpl_ref_cpp<DIM>::getRequiredMemory(
IOIndex_t outputIdx, const std::vector<DimSize_t> & /*inputsSize*/) const
{
// Requires the whole tensors, regardless of available data on inputs
assert(outputIdx == 0 && "operator has only one output");
const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
return std::accumulate(
outputDims.begin(),
outputDims.end(),
NbElts_t(1),
std::multiplies<NbElts_t>());
}
template<Aidge::DimIdx_t DIM>
Aidge::DimSize_t Aidge::ProducerImpl_ref_cpp<DIM>::getNbProducedData(
Aidge::IOIndex_t /*outputIdx*/) const
{
return getRequiredMemory(0, {});
}
template<Aidge::DimIdx_t DIM> void Aidge::ProducerImpl_ref_cpp<DIM>::forward()
{
}
template<Aidge::DimIdx_t DIM> void Aidge::ProducerImpl_ref_cpp<DIM>::backward()
{
printf("Not implemented yet.\n");
}
\ No newline at end of file
#include <cassert>
#include <numeric>
#include <chrono>
#include <thread>
#include "operator/ReLU.hpp"
#include "operator/ReLUImpl.hpp"
#include "operator/ReLUImpl_kernels.hpp"
#include "utils/Types.h"
#include <numeric>
#include <vector>
// FIXME: replace whole Tensor with minimum needed data quantity
Aidge::NbElts_t Aidge::ReLUImpl_ref_cpp::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const {
assert(mOp.getInput(0) && "requires valid input");
// Requires the whole tensors
const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims();
return std::accumulate(inputDims.begin(), inputDims.end(),
static_cast<NbElts_t>(1), std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::ReLUImpl_ref_cpp::getNbRequiredProtected(Aidge::IOIndex_t /*inputIdx*/) const {
// for the direct convolution algorithm, convolutions can be in-place, if there is no padding!
return 0;
}
Aidge::NbElts_t Aidge::ReLUImpl_ref_cpp::getRequiredMemory(Aidge::IOIndex_t /*outputIdx*/, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const {
const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
return std::accumulate(outputDims.begin(), outputDims.end(),
static_cast<NbElts_t>(1), std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::ReLUImpl_ref_cpp::getNbConsumedData(Aidge::IOIndex_t /*inputIdx*/) const {
return mNbConsumedData[0];
}
Aidge::NbElts_t Aidge::ReLUImpl_ref_cpp::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const {
return mNbProducedData[0];
}
void Aidge::ReLUImpl_ref_cpp::forward() {
// FIXME: uncomment the following code once memory handling will work
assert(mOp.mInputs[0] && "missing input #0");
// Find the correct kernel type
auto kernelFunc = Registrar<ReLUImplForward_ref_cpp>::create({
mOp.mInputs[0]->dataType(),
mOp.mOutput->dataType()});
// Call kernel
kernelFunc(mOp.getParams(),
std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
mOp.mInputs[0]->getImpl()->rawPtr(),
mOp.mOutput->getImpl()->rawPtr());
mNbConsumedData[0]+= getNbRequiredData(0); // each input is consumed by the minimum amount for a forward pass
mNbProducedData[0]+= getRequiredMemory(0, {});
}
void Aidge::ReLUImpl_ref_cpp::backward() {
printf("Not implemented yet.\n");
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment