Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mszczep/aidge_backend_cpu
  • eclipse/aidge/aidge_backend_cpu
  • hrouis/aidge_backend_cpu
  • oantoni/aidge_backend_cpu
  • raphaelmillet/aidge_backend_cpu
  • cguillon/aidge_backend_cpu
  • jeromeh/aidge_backend_cpu
  • axelfarr/aidge_backend_cpu
  • noamzerah/aidge_backend_cpu
  • silvanosky/aidge_backend_cpu
  • maab05/aidge_backend_cpu
  • lucaslopez/aidge_backend_cpu_ll
  • farnez/aidge_backend_cpu
13 results
Show changes
Showing
with 1132 additions and 380 deletions
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_DATA_BROADCASTING_H_
#define AIDGE_CPU_DATA_BROADCASTING_H_
#include <vector>
namespace Aidge {
// Function to broadCast an input dims vector into the same size as an outputDims vector
/**
* @brief Broadcast an input dims vector into the same size as an outputDims vector
* @details The missing dimensions would be completed by 1
* @param outputDims The vector of dimensions to follow
* @param dimsToBroadcast The vecotr of dimensions to braodcast
* @return std::vector<std::size_t> a broadcasted vector by addding 1 on the missing dimensions.
*/
std::vector<std::size_t> getBroadcastedDims(const std::vector<std::size_t>& outputDims, const std::vector<std::size_t>& dimsToBroadcast);
/**
* @brief Get a vector of indexes along the dimensions vector from a flattened index
* @param dimensions The vector of dimensions we want the indexes on
* @param idx The flattened index
* @return std::vector<std::size_t> vector of indexes along dimensions.
*/
std::vector<std::size_t> getMultiDimIndices(const std::vector<std::size_t>& dimensions, std::size_t idx);
// Function to get a flattened index from multi-dimensional indices
/**
* @brief Get a flattened index the dimensions vector from a given vector of indices on a broadcasted vector
* @param dimensions The vector of dimensions we want the flattened index on
* @param indices The vector of indices we want to flatten
* @return std::size_t The flattened index on the dimensions vector
*/
std::size_t getFlattenedIndex(const std::vector<std::size_t>& dimensions, const std::vector<std::size_t>& indices);
} // namespace Aidge
#endif // AIDGE_CPU_DATA_BROADCASTING_H_
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_DATA_INTERPOLATION_H_
#define AIDGE_CPU_DATA_INTERPOLATION_H_
#include <vector>
#include <aidge/data/Interpolation.hpp>
#include <aidge/utils/Types.h>
namespace Aidge {
class InterpolationCPU : public Interpolation {
public:
/*
* @brief Interpolates values given via input in given mode.
*
* Values are contiguously arranged in a "square" shape around the point to
* interpolate. Depending on interpolation mode.
* The point that will be interpolated is located right in the
* middle of all points.
* Immediate neighbours :
* 1D interp : 2D interp :
* . . . . . .
* . . 1 2 . . . . . . . .
* . . 1 2 . .
* . . 3 4 . .
* . . . . . .
* . . . . . .
*
* 2 neighbours :
* 1D interp : 2D interp :
* . . . . . . . .
* . . . . . . . .
* . . 1 2 3 4 . . . . 1 2 3 4 . .
* . . 5 6 7 8 . .
* . . 9 10 11 12 . .
* . . 13 14 15 16 . .
* . . . . . . . .
* . . . . . . . .
*
* @param[in] originalCoords: coord of the point to interpolate in the
* original picture. These coords are generated with
* Interpolation::untransformCoords(coordsInInterpolatedTensor)
* @param[in] points : points to interpolate, arranged in a vector of a
* pairs ((point_coord), value) :
* [[[X1, X2, ..., XN], Xval], ...., [[A1, A2, ..., AN],Aval]].
* With :
* - N: the number of dimensions.
* - A: the number of points of the grid to interpolate.
* - All coordinates expressed in originalTensor frame.
* @param[in] interpMode: interpolation mode
* @return interpolated value
*/
template <typename T>
static T interpolate(const std::vector<float> &coordsToInterpolate,
const std::set<Point<T>> &points,
const Mode interpMode = Interpolation::Mode::Linear);
/**
* @brief performs linear interpolation on given points.
* @param[in] values: values to interpolate, since we only do an average of
* all values, their indexes isn't useful.
* @return interpolated value
*/
template <typename T>
static T linear(const std::vector<float> &originalCoords,
const std::set<Point<T>> &points);
/**
* @brief performs nearest interpolation on given points.
* @note it is a wrapper for linearRecurse() private method
* @param[in] coordsToInterpolate: coordinates to interpolate
* @param[in] points: points to interpolate
* @param[in] interpMode: interpolation method, must be a Nearest...
* otherwise function will throw an error.
* @return interpolated value
*/
template <typename T>
static T nearest(const std::vector<float> &coordsToInterpolate,
const std::set<Point<T>> &points,
const Interpolation::Mode nearestMode);
private:
/**
* @brief actual linear interpolation function.
* will :
* - Split all points along each dimension depending of if their coords at
* idx alongDim are above or under coordsToInterpolate until they are
* 1-to-1.
* - Perform interpolation in 2 leftover points and return interpolated
* point to parent call with a set of size 1.
* - repeat until all dimensions have been interpolated.
* @param[in] coordsToInterpolate: coordinates to interpolate
* @param[in] points: points to interpolate
* @param[in] alongDim: discriminant on along which dimension are being
* segregated.
* @return
*/
template <typename T>
static std::set<Interpolation::Point<T>>
linearRecurse(const std::vector<float> &coordsToInterpolate,
const std::set<Point<T>> &points,
const DimIdx_t alongDim = 0);
};
} // namespace Aidge
#endif // AIDGE_CPU_DATA_INTERPOLATION_H_
#ifndef AIDGE_CPU_DATA_TENSORIMPL_H_
#define AIDGE_CPU_DATA_TENSORIMPL_H_
#include "aidge/backend/TensorImpl.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
template <class T>
class TensorImpl_cpu : public TensorImpl {
private:
const Tensor &mTensor; // Impl needs to access Tensor information, but is not
// supposed to change it!
std::vector<T> mData;
public:
static constexpr const char *Backend = "cpu";
TensorImpl_cpu(const Tensor &tensor) : TensorImpl(Backend), mTensor(tensor) {}
bool operator==(const TensorImpl &otherImpl) const override final {
std::size_t i = 0;
for (; i < mTensor.size() &&
mData[i] == reinterpret_cast<const TensorImpl_cpu<T> &>(otherImpl).data()[i];
++i) {
}
return i == mTensor.size();
}
static std::unique_ptr<TensorImpl_cpu> create(const Tensor &tensor) {
return std::make_unique<TensorImpl_cpu<T>>(tensor);
}
// native interface
const std::vector<T> &data() const { return mData; }
std::size_t scalarSize() const override { return sizeof(T); }
void copy(const void *src, NbElts_t length) override {
std::copy(static_cast<const T *>(src), static_cast<const T *>(src) + length,
static_cast<T *>(rawPtr()));
}
void *rawPtr() override {
lazyInit(mData);
return mData.data();
};
void* getRaw(std::size_t idx){
return static_cast<void*>(static_cast<T *>(rawPtr()) + idx);
};
virtual ~TensorImpl_cpu() = default;
void setRawPtr(void *ptr) override final {
T *newPtr = static_cast<T *>(ptr);
mData = std::vector<T>(newPtr, newPtr + mTensor.size());
};
private:
void lazyInit(std::vector<T> &data) {
assert(mTensor.dataType() == NativeType<T>::type);
if (data.size() != mTensor.size()) data.resize(mTensor.size());
}
};
namespace {
static Registrar<Tensor> registrarTensorImpl_cpu_Float64(
{"cpu", DataType::Float64}, Aidge::TensorImpl_cpu<double>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_Float32(
{"cpu", DataType::Float32}, Aidge::TensorImpl_cpu<float>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_Int32(
{"cpu", DataType::Int32}, Aidge::TensorImpl_cpu<int>::create);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_DATA_TENSORIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ABSIMPL_H_
#define AIDGE_CPU_OPERATOR_ABSIMPL_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Abs.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include <memory>
#include <vector>
namespace Aidge {
// Operator implementation entry point for the backend
using AbsImpl_cpu = OperatorImpl_cpu<Abs_Op,
void(const std::size_t, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Abs_Op, "cpu", Aidge::AbsImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ABSIMPL_H_ */
......@@ -9,16 +9,18 @@
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H_
#ifndef AIDGE_CPU_OPERATOR_ABSIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_ABSIMPL_KERNELS_H_
#include <cmath>
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/ReLUImpl.hpp"
#include "aidge/backend/cpu/operator/AbsImpl.hpp"
namespace Aidge {
template <class I, class O>
void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght,
void AbsImpl_cpu_forward_kernel(std::size_t inputLenght,
const void* input_,
void* output_) {
......@@ -26,18 +28,20 @@ void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght,
O* output = static_cast<O*>(output_);
for (std::size_t i = 0; i < inputLenght; ++i) {
output[i] = input[i] > 0 ? input[i] : 0;
output[i] = std::abs(input[i]);
}
}
namespace {
static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::ReLUImpl_cpu_forward_kernel<float, float>);
static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::ReLUImpl_cpu_forward_kernel<int, int>);
static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::ReLUImpl_cpu_forward_kernel<double, double>);
} // namespace
// Kernels registration to implementation entry point
REGISTRAR(AbsImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::AbsImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(AbsImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::AbsImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(AbsImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::AbsImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H_ */
#endif /* AIDGE_CPU_OPERATOR_ABSIMPL_KERNELS_H_ */
......@@ -12,99 +12,23 @@
#ifndef AIDGE_CPU_OPERATOR_ADDIMPL_H_
#define AIDGE_CPU_OPERATOR_ADDIMPL_H_
#include "aidge/backend/OperatorImpl.hpp"
#include <cstddef> // std::size_t
#include <memory> // std::unique_ptr, std::make_unique
#include <string>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Add.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include <memory>
#include <vector>
namespace Aidge {
// class Add_Op<2>;
// compute kernel registry for forward and backward
template <DimIdx_t NUM>
class AddImplForward_cpu;
template <DimIdx_t NUM>
class AddImplBackward_cpu;
template <>
class AddImplForward_cpu<1>
: public Registrable<AddImplForward_cpu<1>, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {};
template <>
class AddImplBackward_cpu<1>
: public Registrable<AddImplBackward_cpu<1>, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {};
template <>
class AddImplForward_cpu<2> : public Registrable<AddImplForward_cpu<2>, std::tuple<DataType, DataType, DataType>,
void(const std::size_t, const void*, const void*, void*)> {};
template <>
class AddImplBackward_cpu<2> : public Registrable<AddImplBackward_cpu<2>, std::tuple<DataType, DataType, DataType>,
void(const std::size_t, const void*, const void*, void*)> {};
template <>
class AddImplForward_cpu<3> : public Registrable<AddImplForward_cpu<3>, std::tuple<DataType, DataType, DataType, DataType>,
void(const std::size_t, const void*, const void*, const void*, void*)> {
};
template <>
class AddImplBackward_cpu<3>
: public Registrable<AddImplBackward_cpu<3>, std::tuple<DataType, DataType, DataType, DataType>,
void(const std::size_t, const void*, const void*, const void*, void*)> {};
template <DimIdx_t NUM>
class AddImpl_cpu : public OperatorImpl {
public:
AddImpl_cpu(const Add_Op<NUM>& op) : OperatorImpl(op) {}
static std::unique_ptr<AddImpl_cpu<NUM>> create(const Add_Op<NUM>& op) {
return std::make_unique<AddImpl_cpu<NUM>>(op);
}
};
template <>
class AddImpl_cpu<1> : public OperatorImpl {
public:
AddImpl_cpu(const Add_Op<1>& op) : OperatorImpl(op) {}
static std::unique_ptr<AddImpl_cpu<1>> create(const Add_Op<1>& op) {
return std::make_unique<AddImpl_cpu<1>>(op);
}
NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final;
void forward() override;
};
template <>
class AddImpl_cpu<2> : public OperatorImpl {
public:
AddImpl_cpu(const Add_Op<2>& op) : OperatorImpl(op) {}
static std::unique_ptr<AddImpl_cpu<2>> create(const Add_Op<2>& op) {
return std::make_unique<AddImpl_cpu<2>>(op);
}
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
template <>
class AddImpl_cpu<3> : public OperatorImpl {
public:
AddImpl_cpu(const Add_Op<3>& op) : OperatorImpl(op) {}
static std::unique_ptr<AddImpl_cpu<3>> create(const Add_Op<3>& op) {
return std::make_unique<AddImpl_cpu<3>>(op);
}
NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final;
void forward() override;
};
// Operator implementation entry point for the backend
using AddImpl_cpu = OperatorImpl_cpu<Add_Op,
void(std::vector<std::size_t>, std::vector<std::size_t>, const std::vector<std::size_t>&, const void*, const void*, void*)>;
namespace {
static Registrar<Add_Op<1>> registrarAddImpl1I_cpu("cpu", Aidge::AddImpl_cpu<1>::create);
static Registrar<Add_Op<2>> registrarAddImpl2I_cpu("cpu", Aidge::AddImpl_cpu<2>::create);
static Registrar<Add_Op<3>> registrarAddImpl3I_cpu("cpu", Aidge::AddImpl_cpu<3>::create);
} // namespace
// Implementation entry point registration to Operator
REGISTRAR(Add_Op, "cpu", Aidge::AddImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ADDIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ADDIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_ADDIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/AddImpl.hpp"
namespace Aidge {
template <class I1, class O>
void AddImpl1I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, void* output_) {
// FIXME: missing Add attributes as arguments
const I1* input1 = static_cast<const I1*>(input1_);
O* output = static_cast<O*>(output_);
for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) {
output[oIndex] = input1[oIndex];
}
}
template <class I1, class I2, class O>
void AddImpl2I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_,
void* output_) {
// FIXME: missing Add attributes as arguments
const I1* input1 = static_cast<const I1*>(input1_);
const I2* input2 = static_cast<const I2*>(input2_);
O* output = static_cast<O*>(output_);
for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) {
output[oIndex] = input1[oIndex] + input2[oIndex];
}
}
template <class I1, class I2, class I3, class O>
void AddImpl3I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_,
const void* input3_, void* output_) {
// FIXME: missing Add attributes as arguments
const I1* input1 = static_cast<const I1*>(input1_);
const I2* input2 = static_cast<const I2*>(input2_);
const I3* input3 = static_cast<const I3*>(input3_);
O* output = static_cast<O*>(output_);
for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) {
output[oIndex] = input1[oIndex] + input2[oIndex] + input3[oIndex];
}
}
namespace {
static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::AddImpl1I_cpu_forward_kernel<float, float>);
static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::AddImpl1I_cpu_forward_kernel<int, int>);
static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::AddImpl1I_cpu_forward_kernel<double, double>);
static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::AddImpl2I_cpu_forward_kernel<float, float, float>);
static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32}, Aidge::AddImpl2I_cpu_forward_kernel<int, int, int>);
static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64}, Aidge::AddImpl2I_cpu_forward_kernel<double, double, double>);
static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::AddImpl3I_cpu_forward_kernel<float, float, float, float>);
static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::AddImpl3I_cpu_forward_kernel<int, int, int, int>);
static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::AddImpl3I_cpu_forward_kernel<double, double, double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ADDIMPL_CPU_FORWARD_KERNEL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ADDIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_ADDIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include <cstddef> // std::size_t
#include "aidge/backend/cpu/data/Broadcasting.hpp"
#include "aidge/backend/cpu/operator/AddImpl.hpp"
namespace Aidge {
namespace {
// suppose values are contiguous in memory
template <class I, class O>
void add_contiguous_arrays(const std::size_t input1size,
const std::size_t input2size,
const std::size_t output1size,
const I* input1,
const I* input2,
O* output)
{
for (std::size_t i = 0; i < output1size; ++i)
{
const std::size_t in1_id = (input1size != 1) ? i : 0;
const std::size_t in2_id = (input2size != 1) ? i : 0;
output[i] = static_cast<O>(input1[in1_id] + input2[in2_id]);
}
}
}
template <class I, class O>
void AddImpl_cpu_forward_kernel(std::vector<std::size_t> dims0,
std::vector<std::size_t> dims1,
const std::vector<std::size_t>& outputDims,
const void* input0_,
const void* input1_,
void* output_) {
const I* input_0 = static_cast<const I*>(input0_);
const I* input_1 = static_cast<const I*>(input1_);
O* output = static_cast<O*>(output_);
// [5,2,1,7] & [2,6,7]
// 1. Same number of dimensions -> [5,2,1,7] & [1,2,6,7]
// 2. Find the highest equal dimension -> 3
// Exception: if the first diverging dimension is the last one, then -> 4 (dims.size())
// 3. Compute the highest number of contiguous data -> 7
// 4. Compute stride and offset step for the broadcast mechanism
// 5. Call a simple kernel
// special case for equal dimensions, the kernel is called with the entire arrays at once
if (dims0 == dims1) {
const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t i = 0; i < input0_contiguous_size; ++i)
{
output[i] = static_cast<O>(input_0[i] + input_1[i]);
}
return;
}
// set dimensions to be of equal size by filling the smallest one with ones.
if (dims0.size() > dims1.size()) {
dims1.insert(dims1.cbegin(), dims0.size() - dims1.size(), std::size_t(1));
}
else if (dims1.size() > dims0.size()) {
dims0.insert(dims0.cbegin(), dims1.size() - dims0.size(), std::size_t(1));
}
const std::size_t nbDims = dims0.size();
// Find the highest equal dimension
// std::size_t contiguousIdx = nbDims - 1;
std::size_t contiguousIdx = nbDims;
while (contiguousIdx-- > 0) {
// for (; contiguousIdx+1 > 0; --contiguousIdx) {
if (dims0[contiguousIdx] != dims1[contiguousIdx]) {
if (contiguousIdx == (nbDims -1)) { // last dimensions of one of the input Tensor are of size 1
const std::vector<std::size_t>& dims = (dims0[contiguousIdx] == 1) ? dims0 : dims1;
while ((contiguousIdx+1 > 0) && (dims[contiguousIdx] == 1)) {
--contiguousIdx;
}
}
break;
}
}
++contiguousIdx;
// Compute the highest number of contiguous data for each Tensor
const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin()+contiguousIdx, dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t input1_contiguous_size = std::accumulate(dims1.cbegin()+contiguousIdx, dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t output_contiguous_size = std::accumulate(outputDims.cbegin()+contiguousIdx, outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
// initialize strides to iterate through data because of broadcasting
std::unique_ptr<std::int32_t[]> stride_post0 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_post1 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_step0 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_step1 = std::make_unique<std::int32_t[]>(contiguousIdx);
if (contiguousIdx > 0) {
stride_post0[contiguousIdx - 1] = 1;
stride_post1[contiguousIdx - 1] = 1;
for (std::size_t i = contiguousIdx - 2; i != static_cast<std::size_t>(-1); --i) {
stride_post0[i] = stride_post0[i+1]*static_cast<std::int32_t>(dims0[i+1]);
stride_post1[i] = stride_post1[i+1]*static_cast<std::int32_t>(dims1[i+1]);
}
for (std::size_t i = 0; i != contiguousIdx; ++i) {
stride_step0[i] = (dims0[i] == 1) ? 1 - stride_post0[i] : 1;
stride_step1[i] = (dims1[i] == 1) ? 1 - stride_post1[i] : 1;
}
}
// variables for arrays offsets
std::size_t offsetIn0 = 0;
std::size_t offsetIn1 = 0;
std::size_t offsetOut = 0;
std::size_t dim = contiguousIdx - 1;
const std::size_t nbStacks = std::accumulate(outputDims.cbegin(), outputDims.cbegin() + contiguousIdx, std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t stack = 0; stack < nbStacks;) {
add_contiguous_arrays<I,O>(input0_contiguous_size, input1_contiguous_size, output_contiguous_size,
input_0 + offsetIn0*input0_contiguous_size,
input_1 + offsetIn1*input1_contiguous_size,
output + offsetOut*output_contiguous_size);
if (++stack < nbStacks) {
std::size_t tmp_stack = stack;
while(tmp_stack % outputDims[dim] == 0) {
tmp_stack /= outputDims[dim];
dim--;
}
offsetIn0 += stride_step0[dim];
offsetIn1 += stride_step1[dim];
++offsetOut;
dim = contiguousIdx - 1;
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(AddImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}},
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(AddImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}},
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(AddImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int8}},
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int8_t, std::int8_t>, nullptr});
REGISTRAR(AddImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::UInt8}},
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::uint8_t, std::uint8_t>, nullptr});
REGISTRAR(AddImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}},
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr});
REGISTRAR(AddImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int64}},
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ADDIMPL_CPU_KERNELS_H_ */
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ANDIMPL_H_
#define AIDGE_CPU_OPERATOR_ANDIMPL_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/And.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
namespace Aidge {
// Operator implementation entry point for the backend
using AndImpl_cpu = OperatorImpl_cpu<And_Op,
void(std::vector<std::size_t>, std::vector<std::size_t>, const std::vector<std::size_t>&, const void*, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(And_Op, "cpu", Aidge::AndImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ANDIMPL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ANDIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_ANDIMPL_KERNELS_H_
#include "aidge/backend/cpu/operator/AndImpl.hpp"
#include "aidge/utils/Registrar.hpp"
namespace Aidge {
namespace {
// suppose values are contiguous in memory
template <class I, class O>
void equal_contiguous_arrays(const std::size_t input1size,
const std::size_t input2size,
const std::size_t output1size,
const I* input1,
const I* input2,
O* output)
{
for (std::size_t i = 0; i < output1size; ++i)
{
const std::size_t in1_id = (input1size != 1) ? i : 0;
const std::size_t in2_id = (input2size != 1) ? i : 0;
output[i] = static_cast<O>(input1[in1_id] == input2[in2_id]);
}
}
}
template <class I, class O>
void EqualImpl_cpu_forward_kernel(std::vector<std::size_t> dims0,
std::vector<std::size_t> dims1,
const std::vector<std::size_t>& outputDims,
const void* input0_,
const void* input1_,
void* output_) {
const I* input_0 = static_cast<const I*>(input0_);
const I* input_1 = static_cast<const I*>(input1_);
O* output = static_cast<O*>(output_);
// [5,2,1,7] & [2,6,7]
// 1. Same number of dimensions -> [5,2,1,7] & [1,2,6,7]
// 2. Find the highest equal dimension -> 3
// Exception: if the first diverging dimension is the last one, then -> 4 (dims.size())
// 3. Compute the highest number of contiguous data -> 7
// 4. Compute stride and offset step for the broadcast mechanism
// 5. Call a simple kernel
// special case for equal dimensions, the kernel is called with the entire arrays at once
if (dims0 == dims1) {
const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t i = 0; i < input0_contiguous_size; ++i)
{
output[i] = static_cast<O>(input_0[i] == input_1[i]);
}
return;
}
// set dimensions to be of equal size by filling the smallest one with ones.
if (dims0.size() > dims1.size()) {
dims1.insert(dims1.cbegin(), dims0.size() - dims1.size(), std::size_t(1));
}
else if (dims1.size() > dims0.size()) {
dims0.insert(dims0.cbegin(), dims1.size() - dims0.size(), std::size_t(1));
}
const std::size_t nbDims = dims0.size();
// Find the highest equal dimension
// std::size_t contiguousIdx = nbDims - 1;
std::size_t contiguousIdx = nbDims;
while (contiguousIdx-- > 0) {
// for (; contiguousIdx+1 > 0; --contiguousIdx) {
if (dims0[contiguousIdx] != dims1[contiguousIdx]) {
if (contiguousIdx == (nbDims -1)) { // last dimensions of one of the input Tensor are of size 1
const std::vector<std::size_t>& dims = (dims0[contiguousIdx] == 1) ? dims0 : dims1;
while ((contiguousIdx+1 > 0) && (dims[contiguousIdx] == 1)) {
--contiguousIdx;
}
}
break;
}
}
++contiguousIdx;
// Compute the highest number of contiguous data for each Tensor
const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin()+contiguousIdx, dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t input1_contiguous_size = std::accumulate(dims1.cbegin()+contiguousIdx, dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t output_contiguous_size = std::accumulate(outputDims.cbegin()+contiguousIdx, outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
// initialize strides to iterate through data because of broadcasting
std::unique_ptr<std::int32_t[]> stride_post0 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_post1 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_step0 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_step1 = std::make_unique<std::int32_t[]>(contiguousIdx);
if (contiguousIdx > 0) {
stride_post0[contiguousIdx - 1] = 1;
stride_post1[contiguousIdx - 1] = 1;
for (std::size_t i = contiguousIdx - 2; i != static_cast<std::size_t>(-1); --i) {
stride_post0[i] = stride_post0[i+1]*static_cast<std::int32_t>(dims0[i+1]);
stride_post1[i] = stride_post1[i+1]*static_cast<std::int32_t>(dims1[i+1]);
}
for (std::size_t i = 0; i != contiguousIdx; ++i) {
stride_step0[i] = (dims0[i] == 1) ? 1 - stride_post0[i] : 1;
stride_step1[i] = (dims1[i] == 1) ? 1 - stride_post1[i] : 1;
}
}
// variables for arrays offsets
std::size_t offsetIn0 = 0;
std::size_t offsetIn1 = 0;
std::size_t offsetOut = 0;
std::size_t dim = contiguousIdx - 1;
const std::size_t nbStacks = std::accumulate(outputDims.cbegin(), outputDims.cbegin() + contiguousIdx, std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t stack = 0; stack < nbStacks;) {
equal_contiguous_arrays<I,O>(input0_contiguous_size, input1_contiguous_size, output_contiguous_size,
input_0 + offsetIn0*input0_contiguous_size,
input_1 + offsetIn1*input1_contiguous_size,
output + offsetOut*output_contiguous_size);
if (++stack < nbStacks) {
std::size_t tmp_stack = stack;
while(tmp_stack % outputDims[dim] == 0) {
tmp_stack /= outputDims[dim];
dim--;
}
offsetIn0 += stride_step0[dim];
offsetIn1 += stride_step1[dim];
++offsetOut;
dim = contiguousIdx - 1;
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(AndImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(AndImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(AndImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr});
REGISTRAR(AndImpl_cpu,
{DataType::Int64},
{ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ANDIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ARGMAXIMPL_H_
#define AIDGE_CPU_OPERATOR_ARGMAXIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/ArgMax.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using ArgMaxImpl_cpu = OperatorImpl_cpu<ArgMax_Op,
void(std::int32_t,
DimSize_t,
const std::vector<DimSize_t>&,
const void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(ArgMax_Op, "cpu", Aidge::ArgMaxImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ARGMAXIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ARGMAXIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_ARGMAXIMPL_KERNELS_H_
#include <algorithm> // std::for_each
#include <cstddef> // std::size_t
#include <cstdint> // std::int32_t
#include <functional> //std::multiplies
#include <numeric> //std::accumulate
#include <vector>
#include <limits>
#include "aidge/backend/cpu/operator/ArgMaxImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/operator/ArgMax.hpp"
#include "aidge/utils/Registrar.hpp"
namespace Aidge {
template <class I, class O>
void ArgMaxImpl_cpu_forward_kernel(std::int32_t axis_,
DimSize_t select_last_index,
const std::vector<DimSize_t>& inputDims,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
const std::size_t axis = static_cast<std::size_t>(axis_);
std::size_t stride_post = 1;
for (std::size_t i = axis + 1; i < inputDims.size(); ++i) {
stride_post *= inputDims[i];
}
std::size_t stride_pre = 1;
for (std::size_t i = 0; i < axis; ++i) {
stride_pre *= inputDims[i];
}
const std::size_t dim_i = inputDims[axis];
for (std::size_t pre = 0; pre < stride_pre; ++pre) {
for (std::size_t post = 0; post < stride_post; ++post) {
const std::size_t idx_i = pre * dim_i * stride_post + post;
const std::size_t idx_o = pre * stride_post + post;
I max = std::numeric_limits<I>::min();
for (std::size_t i = 0; i < dim_i; ++i) {
I curr_value = input[idx_i + i*stride_post];
if (select_last_index) {
if (curr_value>=max) {
output[idx_o] = i;
max = curr_value;
}
}
else {
if (curr_value > max) {
output[idx_o] = i;
max = curr_value;
}
}
}
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(ArgMaxImpl_cpu,
{DataType::Float32},
{ProdConso::defaultModel, Aidge::ArgMaxImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(ArgMaxImpl_cpu,
{DataType::Float64},
{ProdConso::defaultModel, Aidge::ArgMaxImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(ArgMaxImpl_cpu,
{DataType::Int32},
{ProdConso::defaultModel, Aidge::ArgMaxImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ARGMAXIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ATAN_H_
#define AIDGE_CPU_OPERATOR_ATAN_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Atan.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
namespace Aidge {
// Operator implementation entry point for the backend
using AtanImpl_cpu = OperatorImpl_cpu<Atan_Op,
void(const std::size_t, const void*, void*),
void(const std::size_t, const void*, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Atan_Op, "cpu", Aidge::AtanImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ATAN_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ATANIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_ATANIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/AtanImpl.hpp"
#include <cmath> // For atan()
namespace Aidge {
template <class I, class O>
void AtanImpl_cpu_forward_kernel(std::size_t inputLenght,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
for (size_t i = 0; i < inputLenght; ++i) {
output[i] = static_cast<O>(atan(input[i]));
}
}
template <class O, class GI, class GO>
void AtanImpl_cpu_backward_kernel(const std::size_t inputLenght,
const void* output_, const void* grad_output_,
void* grad_input_) {
const O* output = static_cast<const O*>(output_);
const GO* grad_output = static_cast<const GO*>(grad_output_);
GI* grad_input = static_cast<GI*>(grad_input_);
// Apply the derivative of atan for each element in the input array
for (size_t i = 0; i < inputLenght; ++i) {
// dx = dy * (1 / (1 + x^2))
grad_input[i] = grad_output[i] * static_cast<O>(1.0 / (1.0 + output[i] * output[i]));
}
}
// Kernels registration to implementation entry point
REGISTRAR(AtanImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::AtanImpl_cpu_forward_kernel<float, float>, Aidge::AtanImpl_cpu_backward_kernel<float, float, float>});
REGISTRAR(AtanImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::AtanImpl_cpu_forward_kernel<double, double>, Aidge::AtanImpl_cpu_backward_kernel<double, double, double>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ATANIMPL_KERNELS_H_ */
......@@ -17,40 +17,24 @@
#include <tuple>
#include <vector>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/AvgPooling.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// class AvgPooling_Op;
// compute kernel registry for forward and backward
class AvgPoolingImpl2DForward_cpu
: public Registrable<AvgPoolingImpl2DForward_cpu,
std::tuple<DataType, DataType>,
void(const AvgPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {};
class AvgPoolingImpl2DBackward_cpu
: public Registrable<AvgPoolingImpl2DBackward_cpu,
std::tuple<DataType, DataType>,
void(const AvgPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {};
class AvgPoolingImpl2D_cpu : public OperatorImpl {
public:
AvgPoolingImpl2D_cpu(const AvgPooling_Op<2> &op) : OperatorImpl(op) {}
static std::unique_ptr<AvgPoolingImpl2D_cpu> create(const AvgPooling_Op<2> &op) {
return std::make_unique<AvgPoolingImpl2D_cpu>(op);
}
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
namespace {
// add cpu backend to AvgPooling_Op<2> implementation registry
static Registrar<AvgPooling_Op<2>> registrarAvgPoolingImpl2D_cpu("cpu", Aidge::AvgPoolingImpl2D_cpu::create);
} // namespace
// Operator implementation entry point for the backend
using AvgPooling2D_Op = AvgPooling_Op<2>;
using AvgPoolingImpl2D_cpu = OperatorImpl_cpu<AvgPooling_Op<2>,
void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 4>&,
const void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(AvgPooling2D_Op, "cpu", Aidge::AvgPoolingImpl2D_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_H_ */
......@@ -9,18 +9,19 @@
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_
#ifndef AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
#include "aidge/utils/Types.h"
#include "aidge/data/Data.hpp"
#include <array>
#include <tuple>
#include <cmath>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
/**
* @brief Forward kernel for 2D AvgPoolingolution on CPU backend.
......@@ -32,10 +33,11 @@ namespace Aidge {
* @param output_ Output Tensor.
*/
template <class I, class O>
void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs,
const std::array<DimSize_t, 4> &dims,
const void *input_,
void *output_) {
void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& kernelDims,
const std::array<DimSize_t, 4> &dims,
const void *input_,
void *output_) {
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_);
......@@ -43,12 +45,12 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs,
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<1>(attrs)[0] + std::get<0>(attrs)[0]) /
static_cast<float>(std::get<0>(attrs)[0])));
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) /
static_cast<float>(strideDims[0])));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<1>(attrs)[1] + std::get<0>(attrs)[1]) /
static_cast<float>(std::get<0>(attrs)[1])));
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) /
static_cast<float>(strideDims[1])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
......@@ -60,17 +62,18 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs,
for (std::size_t ch = 0; ch < dims[1]; ++ch) {
const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize;
const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
std::fill(output + oIndex, output+(oIndex+oxSize*oySize), 0);
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]);
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<1>(attrs)[0] ? std::get<1>(attrs)[0] : dims[2] + difx);
const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] : dims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]);
const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<1>(attrs)[1] ? std::get<1>(attrs)[1] : dims[3] + dify);
const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > kernelDims[1] ? kernelDims[1] : dims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const std::size_t ix = ox * std::get<0>(attrs)[0];
const std::size_t iy = oy * std::get<0>(attrs)[1];
const std::size_t ix = ox * strideDims[0];
const std::size_t iy = oy * strideDims[1];
if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
output[oIndexFull] += static_cast<O>(
......@@ -98,17 +101,16 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs,
}
}
namespace {
static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Float32(
std::tuple<DataType, DataType>({DataType::Float32, DataType::Float32}),
Aidge::AvgPoolingImpl2D_cpu_forward_kernel<float, float>);
static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32},
Aidge::AvgPoolingImpl2D_cpu_forward_kernel<int, int>);
static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64},
Aidge::AvgPoolingImpl2D_cpu_forward_kernel<double, double>);
} // namespace
// Kernels registration to implementation entry point
REGISTRAR(AvgPoolingImpl2D_cpu,
{{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::AvgPoolingImpl2D_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(AvgPoolingImpl2D_cpu,
{{DataType::Int32, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::AvgPoolingImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr});
REGISTRAR(AvgPoolingImpl2D_cpu,
{{DataType::Float64, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::AvgPoolingImpl2D_cpu_forward_kernel<double, double>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_ */
#endif /* AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_KERNELS_H_ */
......@@ -17,55 +17,29 @@
#include <tuple>
#include <vector>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/BatchNorm.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// class BatchNorm_Op;
// compute kernel registry for forward and backward
class BatchNormImpl2DForward_cpu
: public Registrable<BatchNormImpl2DForward_cpu,
std::tuple<DataType, DataType, DataType>,
void(const BatchNorm_Op<2>::Attrs &,
const std::array<DimSize_t, 4> &,
const void *,
const void *,
const void *,
void *,
void *,
void *,
const bool)> {};
class BatchNormImpl2DBackward_cpu
: public Registrable<BatchNormImpl2DBackward_cpu,
std::tuple<DataType, DataType, DataType>,
void(const BatchNorm_Op<2>::Attrs &,
const std::array<DimSize_t, 4> &,
const void *,
const void *,
const void *,
void *,
void *,
void *)> {};
class BatchNormImpl2D_cpu : public OperatorImpl {
public:
BatchNormImpl2D_cpu(const BatchNorm_Op<2> &op) : OperatorImpl(op) {}
static std::unique_ptr<BatchNormImpl2D_cpu> create(const BatchNorm_Op<2> &op) {
return std::make_unique<BatchNormImpl2D_cpu>(op);
}
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
namespace {
// add cpu backend to BatchNorm_Op<2> implementation registry
static Registrar<BatchNorm_Op<2>> registrarBatchNormImpl2D_cpu("cpu", Aidge::BatchNormImpl2D_cpu::create);
} // namespace
// Operator implementation entry point for the backend
using BatchNorm2D_Op = BatchNorm_Op<2>;
using BatchNormImpl2D_cpu = OperatorImpl_cpu<BatchNorm_Op<2>,
void(float,
float,
const std::vector<DimSize_t> &,
const void *,
const void *,
const void *,
void *,
void *,
void *,
const bool)>;
// Implementation entry point registration to Operator
REGISTRAR(BatchNorm2D_Op, "cpu", Aidge::BatchNormImpl2D_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_BATCHNORMIMPL_H_ */
......@@ -9,13 +9,14 @@
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H_
#ifndef AIDGE_CPU_OPERATOR_BATCHNORMIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_BATCHNORMIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/BatchNormImpl.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <array>
#include <cmath>
#include <algorithm>
......@@ -37,7 +38,7 @@ namespace Aidge {
* @param output_ Output Tensor.
*/
template <class I, class P, class O>
void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims,
void BatchNormImpl2D_cpu_forward_kernel(float epsilon, float momentum, const std::vector<DimSize_t> &dims,
const void *input_, const void *scale_, const void *shift_, void *batchMean_, void *batchVar_, void *output_, const bool freeze) {
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
......@@ -48,16 +49,15 @@ void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Attrs &attrs, con
O *output = static_cast<O *>(output_);
const DimSize_t nbBatch = dims[0];
const DimSize_t nbChannels = dims[1];
const DimSize_t featureMapSize = dims[2]*dims[3];
const DimSize_t nbChannels = (dims.size() > 1) ? dims[1] : 1;
const DimSize_t featureMapSize = (dims.size() > 2) ? std::accumulate(dims.begin() + 2, dims.end(), 1, std::multiplies<DimSize_t>()) : 1;
if ((freeze == true) || (std::get<1>(attrs) == 0.0f)) {
if ((freeze == true) || (momentum == 0.0f)) {
for (std::size_t batch = 0; batch < nbBatch; ++batch) {
for (std::size_t ch = 0; ch < nbChannels; ++ch) {
const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]);
const P var = std::sqrt(batchVar[ch] + static_cast<P>(std::get<0>(attrs)));
const P var = std::sqrt(batchVar[ch] + static_cast<P>(epsilon));
for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-batchMean[ch]) / var;
......@@ -81,10 +81,10 @@ void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Attrs &attrs, con
const I inputMean = sum / static_cast<I>(nbDataPerChannel);
const I inputVar = sumSquare / static_cast<I>(nbDataPerChannel) - inputMean*inputMean;
batchMean[ch] = batchMean[ch]*(1-std::get<1>(attrs)) + inputMean*std::get<1>(attrs);
batchVar[ch] = batchVar[ch]*(1-std::get<1>(attrs)) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*std::get<1>(attrs);
batchMean[ch] = batchMean[ch]*(1-momentum) + inputMean*momentum;
batchVar[ch] = batchVar[ch]*(1-momentum) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*momentum;
const P var = std::sqrt(inputVar + static_cast<P>(std::get<0>(attrs)));
const P var = std::sqrt(inputVar + static_cast<P>(epsilon));
for (std::size_t batch = 0; batch < nbBatch; ++batch) {
const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
......@@ -95,15 +95,10 @@ void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Attrs &attrs, con
}
}
namespace {
static Registrar<BatchNormImpl2DForward_cpu> registrarBatchNormImpl2DForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::BatchNormImpl2D_cpu_forward_kernel<float, float, float>);
} // namespace
// Kernels registration to implementation entry point
REGISTRAR(BatchNormImpl2D_cpu,
{{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::BatchNormImpl2D_cpu_forward_kernel<float, float, float>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H_ */
#endif /* AIDGE_CPU_OPERATOR_BATCHNORMIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_BITSHIFTIMPL_H_
#define AIDGE_CPU_OPERATOR_BITSHIFTIMPL_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/BitShift.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
namespace Aidge {
// Operator implementation entry point for the backend
using BitShiftImpl_cpu = OperatorImpl_cpu<BitShift_Op,
void(const BitShift_Op::BitShiftDirection,
std::vector<std::size_t>,
std::vector<std::size_t>,
const std::vector<std::size_t>&,
const void*,
const void*,
void*)>;
// Implementation entry point registration to Operator
REGISTRAR(BitShift_Op,"cpu",Aidge::BitShiftImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_BITSHIFTIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_BITSHIFTIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_BITSHIFTIMPL_KERNELS_H_
#include <cstdint> // std::int32_t, std::int64_t
#include <cstddef> // std::size_t
#include "aidge/backend/cpu/data/Broadcasting.hpp"
#include "aidge/backend/cpu/operator/BitShiftImpl.hpp"
#include "aidge/operator/BitShift.hpp"
#include "aidge/utils/Registrar.hpp"
namespace {
// suppose values are contiguous in memory
template <class I1, class I2, class O>
void bitshift_contiguous_arrays(
const Aidge::BitShift_Op::BitShiftDirection direction,
const std::size_t input1size,
const std::size_t input2size,
const std::size_t output1size,
const I1* input_1,
const I2* input_2,
O* output)
{
if(direction == Aidge::BitShift_Op::BitShiftDirection::right) {
for (std::size_t i = 0; i < output1size; ++i) {
const std::size_t idx1 = (input1size != 1) ? i : 0;
const std::size_t idx2 = (input2size != 1) ? i : 0;
output[i]= input_1[idx1] >> input_2[idx2];
}
} else {
for (std::size_t i = 0; i < output1size; ++i) {
const std::size_t idx1 = (input1size != 1) ? i : 0;
const std::size_t idx2 = (input2size != 1) ? i : 0;
output[i] = input_1[idx1] << input_2[idx2];
}
}
}
}
namespace Aidge {
template <class I1, class I2, class O>
void BitShiftImpl_cpu_forward_kernel(
const BitShift_Op::BitShiftDirection direction,
std::vector<std::size_t> dims0,
std::vector<std::size_t> dims1,
const std::vector<std::size_t>& outputDims,
const void* input0_,
const void* input1_,
void* output_
) {
const I1* input_0 = static_cast<const I1*>(input0_);
const I2* input_1 = static_cast<const I2*>(input1_);
O* output = static_cast<O*>(output_);
// [5,2,1,7] & [2,6,7]
// 1. Same number of dimensions -> [5,2,1,7] & [1,2,6,7]
// 2. Find the highest equal dimension -> 3
// Exception: if the first diverging dimension is the last one, then -> 4 (dims.size())
// 3. Compute the highest number of contiguous data -> 7
// 4. Compute stride and offset step for the broadcast mechanism
// 5. Call a simple kernel
// ## Compute compatible input dimensions
// special case for equal dimensions, the kernel is called with the entire arrays at once
if (dims0 == dims1) {
const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
bitshift_contiguous_arrays(direction, input0_contiguous_size, input0_contiguous_size, input0_contiguous_size, input_0, input_1, output);
return;
}
// set dimensions to be of equal size by filling the smallest one with ones.
if (dims0.size() > dims1.size()) {
dims1.insert(dims1.cbegin(), dims0.size() - dims1.size(), std::size_t(1));
}
else if (dims1.size() > dims0.size()) {
dims0.insert(dims0.cbegin(), dims1.size() - dims0.size(), std::size_t(1));
}
const std::size_t nbDims = dims0.size();
// Find the highest equal dimension
// std::size_t contiguousIdx = nbDims - 1;
std::size_t contiguousIdx = nbDims;
while (contiguousIdx-- > 0) {
// for (; contiguousIdx+1 > 0; --contiguousIdx) {
if (dims0[contiguousIdx] != dims1[contiguousIdx]) {
if (contiguousIdx == (nbDims -1)) { // last dimensions of one of the input Tensor are of size 1
const std::vector<std::size_t>& dims = (dims0[contiguousIdx] == 1) ? dims0 : dims1;
while ((contiguousIdx+1 > 0) && (dims[contiguousIdx] == 1)) {
--contiguousIdx;
}
}
break;
}
}
++contiguousIdx;
// Compute the highest number of contiguous data for each Tensor
const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin()+contiguousIdx, dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t input1_contiguous_size = std::accumulate(dims1.cbegin()+contiguousIdx, dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t output_contiguous_size = std::accumulate(outputDims.cbegin()+contiguousIdx, outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
// initialize strides to iterate through data because of broadcasting
std::unique_ptr<std::int32_t[]> stride_post0 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_post1 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_step0 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_step1 = std::make_unique<std::int32_t[]>(contiguousIdx);
if (contiguousIdx > 0) {
stride_post0[contiguousIdx - 1] = 1;
stride_post1[contiguousIdx - 1] = 1;
for (std::size_t i = contiguousIdx - 2; i != static_cast<std::size_t>(-1); --i) {
stride_post0[i] = stride_post0[i+1]*static_cast<std::int32_t>(dims0[i+1]);
stride_post1[i] = stride_post1[i+1]*static_cast<std::int32_t>(dims1[i+1]);
}
for (std::size_t i = 0; i != contiguousIdx; ++i) {
stride_step0[i] = (dims0[i] == 1) ? 1 - stride_post0[i] : 1;
stride_step1[i] = (dims1[i] == 1) ? 1 - stride_post1[i] : 1;
}
}
// variables for arrays offsets
std::size_t offsetIn0 = 0;
std::size_t offsetIn1 = 0;
std::size_t offsetOut = 0;
std::size_t dim = contiguousIdx - 1;
const std::size_t nbStacks = std::accumulate(outputDims.cbegin(), outputDims.cbegin() + contiguousIdx, std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t stack = 0; stack < nbStacks;) {
bitshift_contiguous_arrays<I1,I2,O>(direction, input0_contiguous_size, input1_contiguous_size, output_contiguous_size,
input_0 + offsetIn0*input0_contiguous_size,
input_1 + offsetIn1*input1_contiguous_size,
output + offsetOut*output_contiguous_size);
if (++stack < nbStacks) {
std::size_t tmp_stack = stack;
while(tmp_stack % outputDims[dim] == 0) {
tmp_stack /= outputDims[dim];
dim--;
}
offsetIn0 += stride_step0[dim];
offsetIn1 += stride_step1[dim];
++offsetOut;
dim = contiguousIdx - 1;
}
}
}
REGISTRAR(BitShiftImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel,Aidge::BitShiftImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>,nullptr});
REGISTRAR(BitShiftImpl_cpu,
{DataType::Int64},
{ProdConso::inPlaceModel,Aidge::BitShiftImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>,nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_BitShiftIMPL_KERNELS_H_ */
\ No newline at end of file