Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mszczep/aidge_backend_cpu
  • eclipse/aidge/aidge_backend_cpu
  • hrouis/aidge_backend_cpu
  • oantoni/aidge_backend_cpu
  • raphaelmillet/aidge_backend_cpu
  • cguillon/aidge_backend_cpu
  • jeromeh/aidge_backend_cpu
  • axelfarr/aidge_backend_cpu
  • noamzerah/aidge_backend_cpu
  • silvanosky/aidge_backend_cpu
  • maab05/aidge_backend_cpu
  • lucaslopez/aidge_backend_cpu_ll
  • farnez/aidge_backend_cpu
  • mick94/aidge_backend_cpu
14 results
Show changes
Showing
with 1331 additions and 0 deletions
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ROUNDIMPL_H_
#define AIDGE_CPU_OPERATOR_ROUNDIMPL_H_
#include <cstddef> // std::size_t
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Round.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using RoundImpl_cpu = OperatorImpl_cpu<Round_Op,
void(const std::size_t, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Round_Op, "cpu", Aidge::RoundImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ROUNDIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ROUNDIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_ROUNDIMPL_KERNELS_H_
#include <cmath> //std::round
#include <cstddef> // std::size_t
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/RoundImpl.hpp"
namespace Aidge {
template <class I, class O>
void RoundImpl_cpu_forward_kernel(const std::size_t inputLength,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
for (std::size_t i = 0; i < inputLength; ++i) {
//std::round would not work since it doesn't follow the halves rules (See ONNX Round)
output[i] = static_cast<O>(std::nearbyint(static_cast<float>(input[i])));
}
}
REGISTRAR(RoundImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::RoundImpl_cpu_forward_kernel<float, float>,nullptr});
REGISTRAR(RoundImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::RoundImpl_cpu_forward_kernel<double, double>,nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ROUNDIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef __AIDGE_CPU_OPERATOR_ScalingIMPL_H__
#define __AIDGE_CPU_OPERATOR_ScalingIMPL_H__
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Scaling.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
#include <array>
namespace Aidge {
// Operator implementation entry point for the backend
using ScalingImpl_cpu = OperatorImpl_cpu<Scaling_Op,
void(const float,
const std::size_t,
const bool,
std::size_t,
const void*,
void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Scaling_Op, "cpu", Aidge::ScalingImpl_cpu::create);
} // namespace Aidge
#endif /* __AIDGE_CPU_OPERATOR_ScalingIMPL_H__ */
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_SCALINGIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_SCALINGIMPL_KERNELS_H_
#include <cmath>
#include <cstddef>
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/ScalingImpl.hpp"
//TODO : improve propagate, n2d2 :
/*
template<typename T>
void N2D2::floatingPointScaling_propagate(const Tensor<T>& input, Tensor<T>& output,
std::size_t batchSize, std::size_t nbChannels,
std::size_t height, std::size_t width,
bool isClipped,
const std::vector<Float_T>& clippingFactorPerChannel,
const std::vector<Float_T>& scalingFactorPerChannel,
std::size_t quantizedNbBits, bool isOutputUnsigned)
{
std::size_t index = 0;
for (std::size_t batch = 0; batch < batchSize; batch++) {
for(std::size_t ch = 0; ch < nbChannels; ch++) {
for(std::size_t y = 0; y < height; y++) {
for(std::size_t x = 0; x < width; x++) {
T res = isClipped ? Clip(input(index), clippingFactorPerChannel[ch])
: input(index);
res = Scale(res, scalingFactorPerChannel[ch]);
if(quantizedNbBits > 0) {
res = saturate(std::round(res), quantizedNbBits, isOutputUnsigned);
}
output(index) = (T) res;
index++;
}
}
}
}
}
*/
namespace Aidge {
template <class O>
const O& clamp(const O& x, const O& min, const O& max)
{
return (x < min) ? min : (x > max) ? max : x;
}
template<class O>
O saturate(const O value, const std::size_t quantizedNbBits, const bool isOutputUnsigned) {
// TODO: no assertions in kernel
assert(quantizedNbBits > 0);
const O min = isOutputUnsigned ? 0 :
-(1ll << (quantizedNbBits - 1ll));
const O max = isOutputUnsigned ? (1ll << quantizedNbBits) - 1ll :
(1ll << (quantizedNbBits - 1ll)) - 1ll;
return clamp(value, min, max);
}
template <class I, class O>
void ScalingImpl_cpu_forward_kernel(const float scalingFactor,
const std::size_t quantizedNbBits,
const bool isOutputUnsigned,
std::size_t inputLength,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
for (std::size_t i = 0; i < inputLength; ++i) {
output[i] = static_cast<O>(input[i] * static_cast<I>(scalingFactor));
if(quantizedNbBits > 0) {
output[i] = saturate(std::round(output[i]), quantizedNbBits, isOutputUnsigned);
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(ScalingImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::ScalingImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(ScalingImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::ScalingImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(ScalingImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::ScalingImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_SCALINGIMPL_KERNELS_H_ */
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_SIGMOIDIMPL_H_
#define AIDGE_CPU_OPERATOR_SIGMOIDIMPL_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Sigmoid.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
namespace Aidge {
// Operator implementation entry point for the backend
using SigmoidImpl_cpu = OperatorImpl_cpu<Sigmoid_Op,
void(const std::size_t, const void*, void*),
void(const std::size_t, const void*, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Sigmoid_Op, "cpu", Aidge::SigmoidImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_SIGMOIDIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_SIGMOIDIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_SIGMOIDIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/SigmoidImpl.hpp"
namespace Aidge {
template <class I, class O>
void SigmoidImpl_cpu_forward_kernel(std::size_t inputLength,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
//#pragma omp parallel for if (inputLength > 1024)
for (std::size_t i = 0; i < inputLength; ++i) {
if (input[i] > I(0)) {
output[i] = O(1) / (O(1) + std::exp(-input[i]));
} else {
output[i] = std::exp(input[i]) / (O(1) + std::exp(input[i]));
}
}
}
template <class O, class GI, class GO>
void SigmoidImpl_cpu_backward_kernel(const std::size_t inputLength,
const void* output_, const void* grad_output_,
void* grad_input_) {
const O* output = static_cast<const O*>(output_);
const GO* grad_output = static_cast<const GO*>(grad_output_);
GI* grad_input = static_cast<GI*>(grad_input_);
for (std::size_t i = 0; i < inputLength; ++i) {
grad_input[i] += output[i] * (O(1) - output[i]) * grad_output[i];
}
}
// Kernels registration to implementation entry point
REGISTRAR(SigmoidImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::SigmoidImpl_cpu_forward_kernel<float, float>, Aidge::SigmoidImpl_cpu_backward_kernel<float, float, float>});
REGISTRAR(SigmoidImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::SigmoidImpl_cpu_forward_kernel<double, double>, Aidge::SigmoidImpl_cpu_backward_kernel<double, double, double>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_SIGMOIDIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_SLICEIMPL_H__
#define AIDGE_CPU_OPERATOR_SLICEIMPL_H__
#include <memory>
#include <vector>
#include <array>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Slice.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// Operator implementation entry point for the backend
using SliceImpl_cpu = OperatorImpl_cpu<Slice_Op,
void(const std::vector<std::int64_t>&,
const std::vector<std::int64_t>&,
const std::vector<std::int8_t>&,
const std::vector<std::int64_t>&,
const std::vector<DimSize_t>&,
const void*,
void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Slice_Op, "cpu", Aidge::SliceImpl_cpu::create);
} // namespace Aidge
#endif /* __AIDGE_CPU_OPERATOR_SLICEIMPL_H__ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_SLICEIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_SLICEIMPL_KERNELS_H_
#include <algorithm>
#include <cmath>
#include <cstddef>
#include <iterator>
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/SliceImpl.hpp"
namespace Aidge {
template<class I, class O>
void SliceImpl_cpu_forward_kernel(const std::vector<std::int64_t>& starts,
const std::vector<std::int64_t>& ends,
const std::vector<std::int8_t>& axes,
const std::vector<std::int64_t>& steps,
const std::vector<DimSize_t>& inputDims,
const void* input_,
void* output_)
{
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
const std::size_t nbDims = inputDims.size();
std::vector<DimSize_t> dims = inputDims;
DimSize_t totalSize = std::accumulate(inputDims.cbegin(), inputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
const I* inputAccumulation = input;
I* outputAccumulation = nullptr;
const std::size_t nbAxes = starts.size();
for (std::size_t i = 0; i < nbAxes; ++i) {
const DimIdx_t axis = axes[i] >= 0 ?
static_cast<DimIdx_t>(axes[i]) :
static_cast<DimIdx_t>(axes[i] + static_cast<DimIdx_t>(inputDims.size()));
const DimSize_t start = std::min(starts[i] >= 0 ?
static_cast<DimSize_t>(starts[i]) :
static_cast<DimSize_t>(starts[i] + static_cast<std::int64_t>(inputDims[axis])),
dims[axis]-1);
const DimSize_t end = std::min(ends[i] >= 0 ?
static_cast<DimSize_t>(ends[i]) :
static_cast<DimSize_t>(ends[i] + static_cast<std::int64_t>(inputDims[axis])),
dims[axis]);
const std::int64_t step = steps[i];
const std::size_t sliceSize = static_cast<std::size_t>(std::ceil((static_cast<float>(end) - static_cast<float>(start)) / static_cast<float>(step)));
totalSize /= dims[axis];
totalSize *= sliceSize;
outputAccumulation = new I[totalSize];
const std::size_t stride_pre = std::accumulate(dims.cbegin(), dims.cbegin() + axis, 1, std::multiplies<std::size_t>());
const std::size_t stride_post = std::accumulate(dims.crbegin(), dims.crbegin() + nbDims -1 - axis, 1, std::multiplies<std::size_t>());
for (std::size_t outer = 0; outer < stride_pre; ++outer)
{
const std::size_t idx_in = outer * stride_post * dims[axis] + start * stride_post;
const std::size_t idx_out = outer * stride_post * sliceSize;
for (std::size_t inner = 0; inner < sliceSize; ++inner)
{
std::copy_n(std::next(inputAccumulation, idx_in + inner * step * stride_post),
stride_post,
std::next(outputAccumulation, idx_out + inner * stride_post));
}
}
dims[axis] = sliceSize;
if (inputAccumulation != input) {
delete[] inputAccumulation;
}
inputAccumulation = outputAccumulation;
}
// Copy elements from inputAccumulation to output while dividing by divisor
std::copy_n(inputAccumulation, totalSize, output);
if (outputAccumulation) {
delete[] outputAccumulation;
}
}
REGISTRAR(SliceImpl_cpu,
{{DataType::Float32, DataType::Any}, {DataType::Float32}},
{ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(SliceImpl_cpu,
{{DataType::Float64, DataType::Any}, {DataType::Float64}},
{ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(SliceImpl_cpu,
{{DataType::Int32, DataType::Any}, {DataType::Int32}},
{ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_KERNELS_H_ */
......@@ -12,52 +12,21 @@
#ifndef AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_
#define AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Softmax.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
namespace Aidge {
// class Softmax_Op;
// Operator implementation entry point for the backend
using SoftmaxImpl_cpu = OperatorImpl_cpu<Softmax_Op,
void(std::size_t, const std::vector<DimSize_t>&, const void*, void*)>;
// compute kernel registry for forward and backward
class SoftmaxImplForward_cpu
: public Registrable<SoftmaxImplForward_cpu, std::tuple<DataType, DataType>, void(const DimSize_t, const DimSize_t, const DimSize_t, const void*, void*)> {
};
class SoftmaxImplBackward_cpu
: public Registrable<SoftmaxImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
};
class SoftmaxImpl_cpu : public OperatorImpl {
private:
const Softmax_Op& mOp;
std::array<NbElts_t, 1> mNbConsumedData;
std::array<NbElts_t, 1> mNbProducedData;
public:
SoftmaxImpl_cpu(const Softmax_Op& op) : mOp(op), mNbConsumedData({0}), mNbProducedData({0}) {}
static std::unique_ptr<SoftmaxImpl_cpu> create(const Softmax_Op& op) {
return std::make_unique<SoftmaxImpl_cpu>(op);
}
public:
NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, __attribute__((unused)) const std::vector<DimSize_t>& inputsSize) const override final;
NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void forward();
void backward();
};
namespace {
static Registrar<Softmax_Op> registrarSoftmaxImpl_cpu("cpu", Aidge::SoftmaxImpl_cpu::create);
}
// Implementation entry point registration to Operator
REGISTRAR(Softmax_Op, "cpu", Aidge::SoftmaxImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_ */
\ No newline at end of file
#endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_SOFTMAXIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_SOFTMAXIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include <cstddef>
#include <cmath>
#include "aidge/data/Data.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
namespace Aidge {
template <class I, class O>
void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSize_t>& inputDims, const void* input_, void* output_)
{
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
std::size_t postAxisElems = 1;
for (std::size_t i = axisIdx + 1; i < inputDims.size(); ++i) {
postAxisElems *= inputDims[i];
}
std::size_t preAxisElems = 1;
for (std::size_t i = 0; i < axisIdx; ++i) {
preAxisElems *= inputDims[i];
}
#ifdef _OPENMP
#pragma omp parallel for collapse(2) if (preAxisElems * postAxisElems >= 16)
#endif
for (int i = 0; i < static_cast<int>(preAxisElems); ++i) {
for (int j = 0; j < static_cast<int>(postAxisElems); ++j) {
I maxVal = input[i * inputDims[axisIdx] * postAxisElems + j];
for (std::size_t k = 1; k < inputDims[axisIdx]; ++k) {
std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
maxVal = std::max(maxVal, input[inIdx]);
}
// Calculate sum of exponentials within the axis
I sumExp = 0;
for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) {
std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
sumExp += std::exp(input[inIdx] - maxVal);
}
// Calculate softmax for the current slice along the axis
for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) {
std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
output[inIdx] = std::exp(input[inIdx] - maxVal) / sumExp;
}
}
}
}
REGISTRAR(SoftmaxImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(SoftmaxImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(SoftmaxImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_SQRTIMPL_H_
#define AIDGE_CPU_OPERATOR_SQRTIMPL_H_
#include <cstddef> // std::size_t
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Sqrt.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using SqrtImpl_cpu = OperatorImpl_cpu<Sqrt_Op,
void(const std::size_t, const void*, void*),
void(const std::size_t, const void*, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Sqrt_Op, "cpu", Aidge::SqrtImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_SQRTIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_SQRTIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_SQRTIMPL_KERNELS_H_
#include <cmath> // std::sqrt
#include <cstddef> // std::size_t
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/SqrtImpl.hpp"
namespace Aidge {
template <class I, class O>
void SqrtImpl_cpu_forward_kernel(const std::size_t inputLength,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
for (std::size_t i = 0; i < inputLength; ++i) {
output[i] = static_cast<O>(std::sqrt(static_cast<float>(input[i])));
}
}
template <class I, class O>
void SqrtImpl_cpu_backward_kernel(const std::size_t inputLength,
const void* output_,
const void* grad_output_,
void* grad_input_) {
const I* output = static_cast<const I*>(output_);
const I* grad_output = static_cast<const I*>(grad_output_);
O* grad_input = static_cast<O*>(grad_input_);
for (std::size_t i = 0; i < inputLength; ++i) {
grad_input[i] += static_cast<O>(0.5/output[i]) * grad_output[i];
}
}
REGISTRAR(SqrtImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::SqrtImpl_cpu_forward_kernel<float, float>, Aidge::SqrtImpl_cpu_backward_kernel<float, float>});
REGISTRAR(SqrtImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::SqrtImpl_cpu_forward_kernel<double, double>, Aidge::SqrtImpl_cpu_backward_kernel<double, double>});
REGISTRAR(SqrtImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::SqrtImpl_cpu_forward_kernel<int32_t, int32_t>, Aidge::SqrtImpl_cpu_backward_kernel<int32_t, int32_t>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_SQRTIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_SUBIMPL_H_
#define AIDGE_CPU_OPERATOR_SUBIMPL_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Sub.hpp"
#include "aidge/utils/Registrar.hpp"
#include <vector>
namespace Aidge {
// Operator implementation entry point for the backend
using SubImpl_cpu = OperatorImpl_cpu<Sub_Op,
void(std::vector<std::size_t>, std::vector<std::size_t>, const std::vector<std::size_t>&, const void*, const void*,void*),
void(const std::size_t,
const std::size_t,
const std::size_t,
const std::vector<std::size_t>&,
const std::vector<std::size_t>&,
const std::vector<std::size_t>&,
const void*,
void*,
void*)
>;
// Implementation entry point registration to Operator
REGISTRAR(Sub_Op, "cpu", Aidge::SubImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_SUBIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_SUBIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_SUBIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include <cstddef> // std::size_t
#include <cstdint> // std::int32_t, std::int64_t
#include <vector>
#include "aidge/backend/cpu/data/Broadcasting.hpp"
#include "aidge/backend/cpu/operator/SubImpl.hpp"
namespace {
// suppose values are contiguous in memory
template <class I1, class I2, class O>
void sub_contiguous_arrays(const std::size_t input1size,
const std::size_t input2size,
const std::size_t output1size,
const I1* input1,
const I2* input2,
O* output)
{
for (std::size_t i = 0; i < output1size; ++i)
{
const std::size_t in1_id = (input1size != 1) ? i : 0;
const std::size_t in2_id = (input2size != 1) ? i : 0;
output[i] = static_cast<O>(input1[in1_id] - input2[in2_id]);
}
}
}
namespace Aidge {
template <class I1, class I2, class O>
void SubImpl_cpu_forward_kernel(std::vector<std::size_t> dims0,
std::vector<std::size_t> dims1,
const std::vector<std::size_t>& outputDims,
const void* input0_,
const void* input1_,
void* output_) {
const I1* input_0 = static_cast<const I1*>(input0_);
const I2* input_1 = static_cast<const I2*>(input1_);
O* output = static_cast<O*>(output_);
// [5,2,1,7] & [2,6,7]
// 1. Same number of dimensions -> [5,2,1,7] & [1,2,6,7]
// 2. Find the highest equal dimension -> 3
// Exception: if the first diverging dimension is the last one, then -> 4 (dims.size())
// 3. Compute the highest number of contiguous data -> 7
// 4. Compute stride and offset step for the broadcast mechanism
// 5. Call a simple kernel
// special case for equal dimensions, the kernel is called with the entire arrays at once
if (dims0 == dims1) {
const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t i = 0; i < input0_contiguous_size; ++i)
{
output[i] = static_cast<O>(input_0[i] - input_1[i]);
}
return;
}
// set dimensions to be of equal size by filling the smallest one with ones.
if (dims0.size() > dims1.size()) {
dims1.insert(dims1.cbegin(), dims0.size() - dims1.size(), std::size_t(1));
}
else if (dims1.size() > dims0.size()) {
dims0.insert(dims0.cbegin(), dims1.size() - dims0.size(), std::size_t(1));
}
const std::size_t nbDims = dims0.size();
// Find the highest equal dimension
// std::size_t contiguousIdx = nbDims - 1;
std::size_t contiguousIdx = nbDims;
while (contiguousIdx-- > 0) {
// for (; contiguousIdx+1 > 0; --contiguousIdx) {
if (dims0[contiguousIdx] != dims1[contiguousIdx]) {
if (contiguousIdx == (nbDims -1)) { // last dimensions of one of the input Tensor are of size 1
const std::vector<std::size_t>& dims = (dims0[contiguousIdx] == 1) ? dims0 : dims1;
while ((contiguousIdx+1 > 0) && (dims[contiguousIdx] == 1)) {
--contiguousIdx;
}
}
break;
}
}
++contiguousIdx;
// Compute the highest number of contiguous data for each Tensor
const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin()+contiguousIdx, dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t input1_contiguous_size = std::accumulate(dims1.cbegin()+contiguousIdx, dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t output_contiguous_size = std::accumulate(outputDims.cbegin()+contiguousIdx, outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
// initialize strides to iterate through data because of broadcasting
std::unique_ptr<std::int32_t[]> stride_post0 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_post1 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_step0 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_step1 = std::make_unique<std::int32_t[]>(contiguousIdx);
if (contiguousIdx > 0) {
stride_post0[contiguousIdx - 1] = 1;
stride_post1[contiguousIdx - 1] = 1;
for (std::size_t i = contiguousIdx - 2; i != static_cast<std::size_t>(-1); --i) {
stride_post0[i] = stride_post0[i+1]*static_cast<std::int32_t>(dims0[i+1]);
stride_post1[i] = stride_post1[i+1]*static_cast<std::int32_t>(dims1[i+1]);
}
for (std::size_t i = 0; i != contiguousIdx; ++i) {
stride_step0[i] = (dims0[i] == 1) ? 1 - stride_post0[i] : 1;
stride_step1[i] = (dims1[i] == 1) ? 1 - stride_post1[i] : 1;
}
}
// variables for arrays offsets
std::size_t offsetIn0 = 0;
std::size_t offsetIn1 = 0;
std::size_t offsetOut = 0;
std::size_t dim = contiguousIdx - 1;
const std::size_t nbStacks = std::accumulate(outputDims.cbegin(), outputDims.cbegin() + contiguousIdx, std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t stack = 0; stack < nbStacks;) {
sub_contiguous_arrays<I1,I2,O>(input0_contiguous_size, input1_contiguous_size, output_contiguous_size,
input_0 + offsetIn0*input0_contiguous_size,
input_1 + offsetIn1*input1_contiguous_size,
output + offsetOut*output_contiguous_size);
if (++stack < nbStacks) {
std::size_t tmp_stack = stack;
while(tmp_stack % outputDims[dim] == 0) {
tmp_stack /= outputDims[dim];
dim--;
}
offsetIn0 += stride_step0[dim];
offsetIn1 += stride_step1[dim];
++offsetOut;
dim = contiguousIdx - 1;
}
}
}
template <class I1, class I2, class O>
void SubImpl_cpu_backward_kernel(const std::size_t input0Length,
const std::size_t input1Length,
const std::size_t gradOutputLength,
const std::vector<std::size_t>& dims0,
const std::vector<std::size_t>& dims1,
const std::vector<std::size_t>& outputDims,
const void* grad_output_,
void* gradientInput0_,
void* gradientInput1_)
{
const O* grad_output = static_cast<const O*>(grad_output_);
auto* grad_input_0 = static_cast<I1*>(gradientInput0_);
auto* grad_input_1 = static_cast<I2*>(gradientInput1_);
auto broadcastedDims0 = getBroadcastedDims(outputDims, dims0);
auto broadcastedDims1 = getBroadcastedDims(outputDims, dims1);
for (std::size_t i = 0; i < gradOutputLength; ++i) {
auto idxOutputGrad = getMultiDimIndices(outputDims, i);
std::vector<std::size_t> idxInput0(broadcastedDims0.size());
std::vector<std::size_t> idxInput1(broadcastedDims1.size());
for (std::size_t dimension = 0; dimension < broadcastedDims0.size(); ++dimension) {
idxInput0[dimension] = (broadcastedDims0[dimension] == 1) ? 0 : idxOutputGrad[dimension];
}
for (std::size_t dimension = 0; dimension < broadcastedDims1.size(); ++dimension) {
idxInput1[dimension] = (broadcastedDims1[dimension] == 1) ? 0 : idxOutputGrad[dimension];
}
auto idx0 = getFlattenedIndex(broadcastedDims0, idxInput0);
auto idx1 = getFlattenedIndex(broadcastedDims1, idxInput1);
// For subtraction: gradient of first input is 1 * grad_output
grad_input_0[idx0] += static_cast<I1>(grad_output[i]);
// For subtraction: gradient of second input is -1 * grad_output
grad_input_1[idx1] += static_cast<I2>(-grad_output[i]);
}
}
// Kernels registration to implementation entry point
REGISTRAR(SubImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<float, float, float>, Aidge::SubImpl_cpu_backward_kernel<float,float,float>});
REGISTRAR(SubImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<double, double, double>, nullptr});
REGISTRAR(SubImpl_cpu,
{DataType::Int8},
{ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<std::int8_t, std::int8_t, std::int8_t>, nullptr});
REGISTRAR(SubImpl_cpu,
{DataType::UInt8},
{ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<std::uint8_t, std::uint8_t, std::uint8_t>, nullptr});
REGISTRAR(SubImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, nullptr});
REGISTRAR(SubImpl_cpu,
{DataType::Int64},
{ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_SUBIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_TANHIMPL_H_
#define AIDGE_CPU_OPERATOR_TANHIMPL_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Tanh.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
namespace Aidge {
// Operator implementation entry point for the backend
using TanhImpl_cpu = OperatorImpl_cpu<Tanh_Op,
void(const std::size_t, const void*, void*),
void(const std::size_t, const void*, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Tanh_Op, "cpu", Aidge::TanhImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_TANHIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_TANHIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_TANHIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/TanhImpl.hpp"
namespace Aidge {
template <class I, class O>
void TanhImpl_cpu_forward_kernel(std::size_t inputLength,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
//#pragma omp parallel for if (inputLength > 1024)
for (std::size_t i = 0; i < inputLength; ++i) {
output[i] = std::tanh(input[i]);
}
}
template <class O, class GI, class GO>
void TanhImpl_cpu_backward_kernel(const std::size_t inputLength,
const void* output_, const void* grad_output_,
void* grad_input_) {
const O* output = static_cast<const O*>(output_);
const GO* grad_output = static_cast<const GO*>(grad_output_);
GI* grad_input = static_cast<GI*>(grad_input_);
for (std::size_t i = 0; i < inputLength; ++i) {
grad_input[i] += (O(1) - output[i] * output[i]) * grad_output[i];
}
}
// Kernels registration to implementation entry point
REGISTRAR(TanhImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::TanhImpl_cpu_forward_kernel<float, float>, Aidge::TanhImpl_cpu_backward_kernel<float, float, float>});
REGISTRAR(TanhImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::TanhImpl_cpu_forward_kernel<double, double>, Aidge::TanhImpl_cpu_backward_kernel<double, double, double>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_TANHIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_TOPKIMPL_H_
#define AIDGE_CPU_OPERATOR_TOPKIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/TopK.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using TopKImpl_cpu = OperatorImpl_cpu<TopK_Op,
void(int64_t,
bool,
bool,
IOIndex_t,
const std::vector<DimSize_t>&,
const void*,
void*,
void*)>;
// Implementation entry point registration to Operator
REGISTRAR(TopK_Op, "cpu", Aidge::TopKImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_TOPKIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_TOPKIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_TOPKIMPL_KERNELS_H_
#include <algorithm> // std::for_each
#include <cstddef> // std::size_t
#include <cstdint> // std::int32_t
#include <functional> //std::multiplies
#include <numeric> //std::accumulate
#include <vector>
#include "aidge/backend/cpu/operator/TopKImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/operator/TopK.hpp"
#include "aidge/utils/Registrar.hpp"
namespace Aidge {
template <class I, class O>
void TopKImpl_cpu_forward_kernel(int64_t axis,
bool largest,
bool /*sorted*/,
IOIndex_t k,
const std::vector<DimSize_t>& inputDims,
const void* input_,
void* output_,
void* indices_)
{
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
int64_t* indices = static_cast<int64_t*>(indices_);
const std::size_t nb_dims = inputDims.size();
const std::size_t stride_pre = std::accumulate(inputDims.cbegin(), inputDims.cbegin() + axis, 1, std::multiplies<std::size_t>());
const std::size_t stride_post = std::accumulate(inputDims.crbegin(), inputDims.crbegin() + nb_dims -1 - axis, 1, std::multiplies<std::size_t>());
const std::size_t dim_i = inputDims[axis];
std::vector<std::pair<I, int64_t>> buffer(dim_i);
#ifdef _OPENMP
#pragma omp parallel for collapse(2) if (stride_pre * stride_post >= 16)
#endif
for (int pre = 0; pre < static_cast<int>(stride_pre); ++pre) {
for (int post = 0; post < static_cast<int>(stride_post); ++post) {
const std::size_t idx_i = pre * dim_i * stride_post + post;
const std::size_t idx_o = pre * k * stride_post + post;
for (size_t i = 0; i < dim_i; ++i) {
const auto idx = idx_i + i * stride_post;
buffer[i] = std::make_pair(input[idx], i);
}
if (largest) {
std::partial_sort(buffer.begin(), buffer.begin() + k, buffer.end(),
[](const auto& lhs, const auto& rhs) { return lhs.first > rhs.first; });
}
else {
std::partial_sort(buffer.begin(), buffer.begin() + k, buffer.end(),
[](const auto& lhs, const auto& rhs) { return lhs.first < rhs.first; });
}
for (size_t i = 0; i < k; ++i) {
output[idx_o + i] = buffer[i].first;
indices[idx_o + i] = buffer[i].second;
}
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(TopKImpl_cpu,
{
{{DataType::Float32}, {DataType::Any}},
{{DataType::Float32}, {DataType::Int64}}
},
{ProdConso::inPlaceModel, Aidge::TopKImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(TopKImpl_cpu,
{
{{DataType::Float64}, {DataType::Any}},
{{DataType::Float64}, {DataType::Int64}}
},
{ProdConso::inPlaceModel, Aidge::TopKImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(TopKImpl_cpu,
{
{{DataType::Int32}, {DataType::Any}},
{{DataType::Int32}, {DataType::Int64}}
},
{ProdConso::inPlaceModel, Aidge::TopKImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_TOPKIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVINGIMPL_H_
#define AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVINGIMPL_H_
#include <array>
#include <memory>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/WeightInterleaving.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using WeightInterleavedImpl_cpu = OperatorImpl_cpu<WeightInterleaving_Op,
void(const DimSize_t,
const DimSize_t,
const DimSize_t,
const void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(WeightInterleaving_Op, "cpu", Aidge::WeightInterleavedImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_WeightInterleavingIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVEDIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVEDIMPL_KERNELS_H_
#include <cstddef> // std::size_t
#include <cstdint> // std::int8_t, std::uint8_t
#include "aidge/backend/cpu/operator/WeightInterleavedImpl.hpp"
#include "aidge/data/DataType.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/ErrorHandling.hpp"
namespace Aidge {
/**
* @brief Compacts 8-bit data into a smaller bit-width representation.
*
* This function takes an array of 8-bit data and compacts it into smaller chunks
* based on the specified bit-width `nb_bits`. Each element in `compactData` will
* store multiple packed `nb_bits` segments extracted from `data`.
*
* @param data The input array of 8-bit values to be compacted.
* @param dataSize The size of the input `data` array.
* @param compactData The output array storing the compacted data.
* @param nb_bits The number of bits to extract from each `data` element (must be less than 8).
*/
template <typename T>
void compact_data(const T* data, std::size_t dataSize, T* compactData, std::uint8_t nb_bits) {
AIDGE_ASSERT(nb_bits > 0 && nb_bits < 5, "Cannot compact with the given nb_bits"); // Ensure valid bit width
// Mask to extract `nb_bits` from each data element
const unsigned int mask = (1U << nb_bits) - 1;
// Calculate the number of `nb_bits` segments that fit into an 8-bit compacted value
const unsigned int nbSlot = 8 / nb_bits;
// Case nb_bits=3 or 4, then shift is 4
// Case nb_bits=2, then shift is 2
// Case nb_bits=1, then shift is 1
std::uint8_t shift = 8 / nbSlot;
const unsigned int nbFullCompactbytes = dataSize / nbSlot;
// Main loop to process data in groups of `nbSlot`
for (std::size_t i = 0; i < nbFullCompactbytes; ++i) {
T compact = 0;
for (unsigned int j = 0; j < nbSlot; ++j) {
compact |= (data[i * nbSlot + j] & mask); // Apply mask to keep `nb_bits` only
// Shift only if not on the last slot to make room for the next `nb_bits`
if (j < nbSlot - 1) {
compact <<= shift;
}
}
// Store the compacted value in the output array
compactData[i] = compact;
}
// Handle any remaining data elements (if dataSize is not a multiple of nbSlot).
std::size_t remaining = dataSize % nbSlot;
if (remaining != 0) {
std::int8_t compact = 0;
for (std::size_t j = 0; j < remaining; ++j) {
compact |= (data[nbFullCompactbytes*nbSlot + j] & mask);
if (j < remaining - 1) {
compact <<= shift;
}
}
compact <<= (shift*(nbSlot - remaining));
// Store the last compacted value
compactData[dataSize / nbSlot] = compact;
}
}
template <class I, class O, int nb_bits>
void WeightInterleavedImpl_cpu_forward_kernel(const DimSize_t input_interleaving,
const DimSize_t nb_interleaving,
const DimSize_t output_interleaving,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
// Aidge::compact_data(const std::int8_t* data, std::size_t dataSize, std::int8_t* compactData, std::uint8_t nb_bits) {
for (std::size_t i=0; i<nb_interleaving; ++i){
compact_data(input+(i*input_interleaving), input_interleaving, output+(i*output_interleaving), static_cast<std::uint8_t>(nb_bits));
}
}
REGISTRAR(WeightInterleavedImpl_cpu,
{ImplSpec::IOSpec{DataType::Int4, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavedType_v<DataType::Int4>, DataFormat::NHWC}},
{ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<int8_t, int8_t, 4>, nullptr});
REGISTRAR(WeightInterleavedImpl_cpu,
{ImplSpec::IOSpec{DataType::Int3, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavedType_v<DataType::Int3>, DataFormat::NHWC}},
{ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<int8_t, int8_t, 3>, nullptr});
REGISTRAR(WeightInterleavedImpl_cpu,
{ImplSpec::IOSpec{DataType::Int2, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavedType_v<DataType::Int2>, DataFormat::NHWC}},
{ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<int8_t, int8_t, 2>, nullptr});
REGISTRAR(WeightInterleavedImpl_cpu,
{ImplSpec::IOSpec{DataType::Binary, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavedType_v<DataType::Binary>, DataFormat::NHWC}},
{ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<int8_t, int8_t, 1>, nullptr});
REGISTRAR(WeightInterleavedImpl_cpu,
{ImplSpec::IOSpec{DataType::UInt4, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavedType_v<DataType::UInt4>, DataFormat::NHWC}},
{ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<uint8_t, uint8_t, 4>, nullptr});
REGISTRAR(WeightInterleavedImpl_cpu,
{ImplSpec::IOSpec{DataType::UInt3, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavedType_v<DataType::UInt3>, DataFormat::NHWC}},
{ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<uint8_t, uint8_t, 3>, nullptr});
REGISTRAR(WeightInterleavedImpl_cpu,
{ImplSpec::IOSpec{DataType::UInt2, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavedType_v<DataType::UInt2>, DataFormat::NHWC}},
{ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<uint8_t, uint8_t, 2>, nullptr});
// REGISTRAR(WeightInterleavedImpl_cpu,
// {ImplSpec::IOSpec{DataType::Int4, DataFormat::NHWC}},
// {ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<int8_t, int8_t, 4>, nullptr});
// REGISTRAR(WeightInterleavedImpl_cpu,
// {ImplSpec::IOSpec{DataType::Int3, DataFormat::NHWC}},
// {ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<int8_t, int8_t, 3>, nullptr});
// REGISTRAR(WeightInterleavedImpl_cpu,
// {ImplSpec::IOSpec{DataType::Int2, DataFormat::NHWC}},
// {ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<int8_t, int8_t, 2>, nullptr});
}
#endif /* AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVEDIMPL_KERNELS_H_ */
\ No newline at end of file