Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mszczep/aidge_backend_cpu
  • eclipse/aidge/aidge_backend_cpu
  • hrouis/aidge_backend_cpu
  • oantoni/aidge_backend_cpu
  • raphaelmillet/aidge_backend_cpu
  • cguillon/aidge_backend_cpu
  • jeromeh/aidge_backend_cpu
  • axelfarr/aidge_backend_cpu
  • noamzerah/aidge_backend_cpu
  • silvanosky/aidge_backend_cpu
  • maab05/aidge_backend_cpu
  • lucaslopez/aidge_backend_cpu_ll
  • farnez/aidge_backend_cpu
  • mick94/aidge_backend_cpu
14 results
Show changes
Showing
with 1760 additions and 0 deletions
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_EXPANDIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_EXPANDIMPL_KERNELS_H_
#include "aidge/backend/cpu/operator/ExpandImpl.hpp"
#include "aidge/utils/Registrar.hpp"
#include <aidge/data/Data.hpp>
#include <aidge/data/Tensor.hpp>
#include <aidge/data/half.hpp>
#include <aidge/scheduler/ProdConso.hpp>
#include <aidge/utils/Types.h>
#include <cmath>
#include <cstdint> // std::int32_t, std::int64_t
#include <memory>
#include <numeric>
namespace {
// suppose values are contiguous in memory
template <class IO>
void expandContiguousArray(const std::size_t inputStackSize,
const std::size_t outputStackSize,
const IO *input,
IO *output) {
for (std::size_t i = 0; i < outputStackSize; ++i) {
output[i] = (inputStackSize == 1) ? input[0] : input[i];
}
return;
}
} // namespace
namespace Aidge {
template <class IO>
void ExpandImpl_cpu_forward_kernel(
const std::shared_ptr<Tensor> &inData,
const std::shared_ptr<Tensor> &_inExpandShape,
void *_output,
const std::vector<DimSize_t> &outputDims) {
// retrieving data of inputShape & dimensions of inputDims
// as the process will require to modify the values
IO *output = static_cast<IO *>(_output);
std::vector<DimSize_t> inExpandShape(_inExpandShape->size());
for (DimSize_t i = 0; i < _inExpandShape->size(); ++i) {
inExpandShape[i] = _inExpandShape->get<std::int64_t>(i);
}
std::vector<DimSize_t> inDataDims = inData->dims();
// Example with 2 tensors
// [5,2,1,7] & [2,6,7]
// 1. Same number of dimensions but adding 1s to le left of "smallest"
// tensor -> [5,2,1,7] & [1,2,6,7]
// 2. Find the highest equal dimension -> 3
// Exception: if the first diverging dimension is the last one, then ->
// 4 (dims.size())
// 3. Compute the highest number of contiguous data -> 7
// 4. Compute stride and offset step for the broadcast mechanism
// 5. Call a simple kernel
// ## Compute compatible input dimensions
// special case for equal dimensions, the kernel is called with the entire
// arrays at once
if (inDataDims == inExpandShape) {
const std::size_t input0ContiguousSize =
std::accumulate(inDataDims.cbegin(),
inDataDims.cend(),
static_cast<std::size_t>(1),
std::multiplies<std::size_t>());
for (std::size_t i = 0; i < input0ContiguousSize; ++i) {
output[i] = inData->get<IO>(i);
}
return;
}
// set dimensions to be of equal size by filling the smallest one with
// ones.
if (inDataDims.size() > inExpandShape.size()) {
inExpandShape.insert(inExpandShape.cbegin(),
inDataDims.size() - inExpandShape.size(),
static_cast<DimSize_t>(1));
} else if (_inExpandShape->size() > inDataDims.size()) {
inDataDims.insert(inDataDims.cbegin(),
inExpandShape.size() - inDataDims.size(),
static_cast<DimSize_t>(1));
}
const std::size_t nbDims = inDataDims.size();
// Find the highest equal dimension
// std::size_t contiguousIdx = nbDims - 1;
std::size_t contiguousIdx = nbDims;
while (contiguousIdx-- > 0) {
// for (; contiguousIdx+1 > 0; --contiguousIdx) {
if (inDataDims[contiguousIdx] != inExpandShape[contiguousIdx]) {
break;
}
}
if (contiguousIdx == (nbDims - 1)) {
// last dimensions of one of the input Tensor are of size 1
const std::vector<std::size_t> &dims =
(inDataDims[contiguousIdx] == 1) ? inDataDims : inExpandShape;
while ((contiguousIdx + 1 > 0) && (dims[contiguousIdx] == 1)) {
--contiguousIdx;
}
}
++contiguousIdx;
// Compute the highest number of contiguous data for each Tensor
const std::size_t inputDataContiguousSize =
std::accumulate(inDataDims.cbegin() + contiguousIdx,
inDataDims.cend(),
static_cast<std::size_t>(1),
std::multiplies<std::size_t>());
const std::size_t outputContiguousSize =
std::accumulate(outputDims.cbegin() + contiguousIdx,
outputDims.cend(),
static_cast<std::size_t>(1),
std::multiplies<std::size_t>());
// initialize strides to iterate through data because of broadcasting
std::unique_ptr<std::int32_t[]> stridePostIn =
std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> strideStepIn =
std::make_unique<std::int32_t[]>(contiguousIdx);
if (contiguousIdx > 0) {
stridePostIn[contiguousIdx - 1] = 1;
for (std::size_t i = contiguousIdx - 2;
i != static_cast<std::size_t>(-1);
--i) {
stridePostIn[i] = stridePostIn[i + 1] *
static_cast<std::int32_t>(inDataDims[i + 1]);
}
for (std::size_t i = 0; i != contiguousIdx; ++i) {
strideStepIn[i] = (inDataDims[i] == 1) ? 1 - stridePostIn[i] : 1;
}
}
// variables for arrays offsets
std::size_t offsetInData = 0;
std::size_t offsetOut = 0;
std::size_t dim = contiguousIdx - 1;
const std::size_t nbStacks =
std::accumulate(outputDims.cbegin(),
outputDims.cbegin() + contiguousIdx,
static_cast<std::size_t>(1),
std::multiplies<std::size_t>());
for (std::size_t stack = 0; stack < nbStacks;) {
expandContiguousArray<IO>(
inputDataContiguousSize,
outputContiguousSize,
&static_cast<const IO *>(
inData->getImpl()
->rawPtr())[offsetInData * inputDataContiguousSize],
&output[offsetOut * outputContiguousSize]);
if (++stack < nbStacks) {
std::size_t tmpStack = stack;
while (tmpStack % outputDims[dim] == 0) {
tmpStack /= outputDims[dim];
dim--;
}
offsetInData += strideStepIn[dim];
++offsetOut;
dim = contiguousIdx - 1;
}
}
}
REGISTRAR(ExpandImpl_cpu,
{{DataType::Int16, DataType::Int64}, {DataType::Int16}},
{ProdConso::inPlaceModel,
Aidge::ExpandImpl_cpu_forward_kernel<std::int16_t>,
nullptr});
REGISTRAR(ExpandImpl_cpu,
{{DataType::Int32, DataType::Int64}, {DataType::Int32}},
{ProdConso::inPlaceModel,
Aidge::ExpandImpl_cpu_forward_kernel<std::int32_t>,
nullptr});
REGISTRAR(ExpandImpl_cpu,
{{DataType::Int64, DataType::Int64}, {DataType::Int64}},
{ProdConso::inPlaceModel,
Aidge::ExpandImpl_cpu_forward_kernel<std::int64_t>,
nullptr});
REGISTRAR(ExpandImpl_cpu,
{{DataType::Float16, DataType::Int64}, {DataType::Float16}},
{ProdConso::inPlaceModel,
Aidge::ExpandImpl_cpu_forward_kernel<half_float::half>,
nullptr});
REGISTRAR(ExpandImpl_cpu,
{{DataType::Float32, DataType::Int64}, {DataType::Float32}},
{ProdConso::inPlaceModel,
Aidge::ExpandImpl_cpu_forward_kernel<float>,
nullptr});
REGISTRAR(ExpandImpl_cpu,
{{DataType::Float64, DataType::Int64}, {DataType::Float64}},
{ProdConso::inPlaceModel,
Aidge::ExpandImpl_cpu_forward_kernel<double>,
nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_EXPANDIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_FCIMPL_H_
#define AIDGE_CPU_OPERATOR_FCIMPL_H_
#include <array>
#include <memory>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/FC.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using FCImpl_cpu = OperatorImpl_cpu<FC_Op,
void(const DimSize_t,
const DimSize_t,
const DimSize_t,
const void *,
const void *,
const void *,
void *),
void(const DimSize_t,
const DimSize_t,
const DimSize_t,
const void *,
const void *,
const void *,
void *,
void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(FC_Op, "cpu", Aidge::FCImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_FCIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include <algorithm>
#include "aidge/operator/FCImpl.hpp"
namespace Aidge {
// template <class I, class W, class B, class O>
// void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 4>& dims,
// const void* input_, const void* weights_, const void* biases_, void* output_) {
// // FIXME: missing FC parameters as arguments
// const I* input = static_cast<const I*>(input_);
// const W* weights = static_cast<const W*>(weights_);
// const B* biases = static_cast<const B*>(biases_);
// O* output = static_cast<O*>(output_);
// for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) {
// std::size_t oIndex = outIdx * dims[3];
// const B bias = std::get<1>(params) ? B(0) : biases[outIdx];
// for (std::size_t batch = 0; batch < dims[3]; ++batch) {
// output[oIndex + batch] = bias;
// }
// }
// for (std::size_t ix = 0; ix < dims[0]; ++ix) {
// for (std::size_t iy = 0; iy < dims[1]; ++iy) {
// for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
// const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix));
// for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) {
// const std::size_t oIndex = dims[3] * outCh;
// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(params) +
// outCh; // (iIndex*std::get<0>(params) + oIndex)/dims[3];
// for (std::size_t batch = 0; batch < dims[3]; ++batch) {
// output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
// }
// }
// }
// }
// }
// }
// template <class I, class W, class B, class O>
// void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 2>& dims,
// const void* input_, const void* weights_, const void* biases_, void* output_) {
// // FIXME: missing FC parameters as arguments
// const I* input = static_cast<const I*>(input_);
// const W* weights = static_cast<const W*>(weights_);
// const B* biases = static_cast<const B*>(biases_);
// O* output = static_cast<O*>(output_);
// // let's have I.dims() = [N, C, H, W] instead of [H, W, C, N]
// for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) {
// std::size_t oIndex = outIdx * dims[0];
// const B bias = std::get<1>(params) ? B(0) : biases[outIdx];
// for (std::size_t batch = 0; batch < dims[0]; ++batch) {
// output[oIndex + batch] = bias;
// }
// }
// for (std::size_t batch = 0; batch < dims[0]; ++batch) {
// const std::size_t oIndex = dims[1] * batch;
// for (std::size_t i = 0; i < dims[1]; ++i) {
// for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) {
// std::size_t wIndex = i * std::get<0>(params) + outCh; // (iIndex*std::get<0>(params) + oIndex)/dims[3];
// output[oIndex + outCh] += weights[wIndex] * input[i + batch];
// }
// }
// }
// }
template <class I, class W, class B, class O>
void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const DimSize_t batchSize, const DimSize_t oneInputSize,
const void* input_, const void* weights_, const void* biases_, void* output_) {
// FIXME: missing FC parameters as arguments
const I* input = static_cast<const I*>(input_);
const W* weights = static_cast<const W*>(weights_);
const B* biases = static_cast<const B*>(biases_);
O* output = static_cast<O*>(output_);
if (std::get<1>(params)) {
std::fill(output, output+(batchSize*std::get<0>(params)), B(0));
}
else {
for (std::size_t batch = 0; batch < batchSize; ++batch) {
std::copy(biases, biases+std::get<0>(params), output+(batch*std::get<0>(params)));
}
}
for (std::size_t batch = 0; batch < batchSize; ++batch) {
for (std::size_t out = 0; out < std::get<0>(params); ++out) {
output[out + batch*std::get<0>(params)] = std::inner_product(input + batch*oneInputSize,
input + (batch + 1)*oneInputSize,
weights + out*oneInputSize,
output[out + batch*std::get<0>(params)]);
}
}
}
namespace {
static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::FCImpl_cpu_forward_kernel<float, float, float, float>);
static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::FCImpl_cpu_forward_kernel<int, int, int, int>);
static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::FCImpl_cpu_forward_kernel<double, double, double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_FCIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_FCIMPL_KERNELS_H_
#include <algorithm>
#include "aidge/backend/cpu/operator/FCImpl.hpp"
#include "aidge/utils/Registrar.hpp"
namespace Aidge {
// template <class I, class W, class B, class O>
// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const std::array<DimSize_t, 4>& dims,
// const void* input_, const void* weights_, const void* biases_, void* output_) {
// // FIXME: missing FC attributes as arguments
// const I* input = static_cast<const I*>(input_);
// const W* weights = static_cast<const W*>(weights_);
// const B* biases = static_cast<const B*>(biases_);
// O* output = static_cast<O*>(output_);
// for (std::size_t outIdx = 0; outIdx < outputFeatureSize; ++outIdx) {
// std::size_t oIndex = outIdx * dims[3];
// const B bias = std::get<0>(attrs) ? B(0) : biases[outIdx];
// for (std::size_t batch = 0; batch < dims[3]; ++batch) {
// output[oIndex + batch] = bias;
// }
// }
// for (std::size_t ix = 0; ix < dims[0]; ++ix) {
// for (std::size_t iy = 0; iy < dims[1]; ++iy) {
// for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
// const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix));
// for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) {
// const std::size_t oIndex = dims[3] * outCh;
// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * outputFeatureSize +
// outCh; // (iIndex*outputFeatureSize + oIndex)/dims[3];
// for (std::size_t batch = 0; batch < dims[3]; ++batch) {
// output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
// }
// }
// }
// }
// }
// }
// template <class I, class W, class B, class O>
// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const std::array<DimSize_t, 2>& dims,
// const void* input_, const void* weights_, const void* biases_, void* output_) {
// // FIXME: missing FC attributes as arguments
// const I* input = static_cast<const I*>(input_);
// const W* weights = static_cast<const W*>(weights_);
// const B* biases = static_cast<const B*>(biases_);
// O* output = static_cast<O*>(output_);
// // let's have I.dims() = [N, C, H, W] instead of [H, W, C, N]
// for (std::size_t outIdx = 0; outIdx < outputFeatureSize; ++outIdx) {
// std::size_t oIndex = outIdx * dims[0];
// const B bias = std::get<0>(attrs) ? B(0) : biases[outIdx];
// for (std::size_t batch = 0; batch < dims[0]; ++batch) {
// output[oIndex + batch] = bias;
// }
// }
// for (std::size_t batch = 0; batch < dims[0]; ++batch) {
// const std::size_t oIndex = dims[1] * batch;
// for (std::size_t i = 0; i < dims[1]; ++i) {
// for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) {
// std::size_t wIndex = i * outputFeatureSize + outCh; // (iIndex*outputFeatureSize + oIndex)/dims[3];
// output[oIndex + outCh] += weights[wIndex] * input[i + batch];
// }
// }
// }
// }
template <class I, class W, class B, class O>
void FCImpl_cpu_forward_kernel(const DimSize_t batchSize,
const DimSize_t inputFeatureSize,
const DimSize_t outputFeatureSize,
const void* input_,
const void* weights_,
const void* biases_,
void* output_) {
// FIXME: missing FC attributes as arguments
const I* input = static_cast<const I*>(input_);
const W* weights = static_cast<const W*>(weights_);
const B* biases = static_cast<const B*>(biases_);
O* output = static_cast<O*>(output_);
#ifdef _OPENMP
#pragma omp parallel for collapse(2) if (batchSize * outputFeatureSize >= 16)
#endif
for (int batch = 0; batch < static_cast<int>(batchSize); ++batch) {
for (int out = 0; out < static_cast<int>(outputFeatureSize); ++out) {
const auto biasVal = (biases) ? biases[out] : B(0);
output[out + batch*outputFeatureSize] = std::inner_product(input + batch*inputFeatureSize,
input + (batch + 1)*inputFeatureSize,
weights + out*inputFeatureSize,
biasVal);
}
}
}
template <class I, class O, class W, class B>
void FCImpl_cpu_backward_kernel(const DimSize_t batchSize,
const DimSize_t inputFeatureSize,
const DimSize_t outputFeatureSize,
const void* input_,
const void* originalInput_,
const void* weight_,
void* output_,
void* weightGrad_,
void* biasesGrad_)
{
// FIXME: missing FC attributes as arguments
const I* input = static_cast<const I*>(input_);
const I* originalInput = static_cast<const I*>(originalInput_);
const W* weight = static_cast<const W*>(weight_);
O* output = static_cast<O*>(output_);
W* weightGrad = static_cast<W*>(weightGrad_);
B* biasesGrad = static_cast<B*>(biasesGrad_);
// bias grad
if (biasesGrad != nullptr) {
for (std::size_t o = 0; o < outputFeatureSize; ++o) { // nb outputs
B sum{0};
for (std::size_t b = 0; b < batchSize; ++b) {
sum += input[b*outputFeatureSize + o];
}
biasesGrad[o]+= sum;
}
}
// weight grad
for (std::size_t o = 0; o < outputFeatureSize; ++o) {
for (std::size_t c = 0; c < inputFeatureSize; ++c) {
W sum{0};
for (std::size_t b = 0; b < batchSize; ++b) {
sum += originalInput[b*inputFeatureSize + c]*input[b*outputFeatureSize + o];
}
weightGrad[o*inputFeatureSize + c]+= sum;
}
}
// input grad
for (std::size_t b = 0; b < batchSize; ++b) {
for (std::size_t c = 0; c < inputFeatureSize; ++c) {
O sum{0};
for (std::size_t o = 0; o < outputFeatureSize; ++o) {
sum += weight[o*inputFeatureSize + c] * input[b*outputFeatureSize + o];
}
output[b*inputFeatureSize + c]+= sum;
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(FCImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}},
{ProdConso::defaultModel, Aidge::FCImpl_cpu_forward_kernel<float, float, float, float>, Aidge::FCImpl_cpu_backward_kernel<float, float, float, float>});
REGISTRAR(FCImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}},
{ProdConso::defaultModel, Aidge::FCImpl_cpu_forward_kernel<double, double, double, double>, Aidge::FCImpl_cpu_backward_kernel<double, double, double, double>});
REGISTRAR(FCImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}},
{ProdConso::defaultModel, Aidge::FCImpl_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>, Aidge::FCImpl_cpu_backward_kernel<int32_t, int32_t, int32_t, int32_t>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_FCIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_FOLDIMPL_H_
#define AIDGE_CPU_OPERATOR_FOLDIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Fold.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// Operator implementation entry point for the backend
using Fold2D_Op = Fold_Op<2>;
using FoldImpl2D_cpu = OperatorImpl_cpu<Fold_Op<2>,
void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::vector<DimSize_t> &,
const void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(Fold2D_Op, "cpu", Aidge::FoldImpl2D_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_FOLDIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_FOLDIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_FOLDIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/FoldImpl.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <cmath>
#include <array>
#include <algorithm>
namespace Aidge {
template <class I, class O>
void FoldImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& outputDims,
const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& dilationDims,
const std::array<DimSize_t, 2>& kernelDims,
const std::vector<DimSize_t> &dims,
const void *input_, void *output_)
{
const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_);
const DimSize_t inHeight = outputDims[0];
const DimSize_t inWidth = outputDims[1];
const DimSize_t kernelExtentHeight = dilationDims[0] *
(kernelDims[0] - 1) + 1;
const DimSize_t outHeight = 1 + static_cast<DimSize_t>(
floor(static_cast<float>(inHeight - kernelExtentHeight) /
static_cast<float>(strideDims[0])));
const DimSize_t kernelExtentWidth = dilationDims[1] *
(kernelDims[1] - 1) + 1;
const DimSize_t outWidth = 1 + static_cast<DimSize_t>(
floor(static_cast<float>(inWidth - kernelExtentWidth) /
static_cast<float>(strideDims[1])));
const DimSize_t outChannels = dims[dims.size() - 2];
const DimSize_t inChannels = outChannels / kernelDims[0] / kernelDims[1];
std::fill_n(output, dims[0] * outHeight * outWidth * outChannels, O(0));
for (DimSize_t n = 0; n < dims[0]; ++n) {
for (DimSize_t outC = 0; outC < outChannels; ++outC) {
const auto inOffsetW = outC % kernelDims[1];
const auto inOffsetH = (outC / kernelDims[1]) % kernelDims[0];
const auto inC = outC / kernelDims[0] / kernelDims[1];
for (DimSize_t outH = 0; outH < outHeight; ++outH) {
const auto inH = outH * strideDims[0] + inOffsetH * dilationDims[0];
for (DimSize_t outW = 0; outW < outWidth; ++outW) {
const auto inW = outW * strideDims[1] + inOffsetW * dilationDims[1];
output[((n * inChannels + inC) * inHeight + inH) * inWidth + inW] +=
input[((n * outChannels + outC) * outHeight + outH) * outWidth + outW];
}
}
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(FoldImpl2D_cpu,
{DataType::Float32},
{ProdConso::defaultModel, Aidge::FoldImpl2D_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(FoldImpl2D_cpu,
{DataType::Float64},
{ProdConso::defaultModel, Aidge::FoldImpl2D_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(FoldImpl2D_cpu,
{DataType::Int32},
{ProdConso::defaultModel, Aidge::FoldImpl2D_cpu_forward_kernel<int32_t, int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_FOLDIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_H_
#define AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_H_
#include <memory>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/GlobalAveragePooling.hpp"
#include "aidge/utils/Registrar.hpp"
namespace Aidge {
// Operator implementation entry point for the backend
using GlobalAveragePoolingImpl_cpu = OperatorImpl_cpu<GlobalAveragePooling_Op,
void(const std::shared_ptr<Tensor>&, void *)>;
// Implementation entry point registration to Operator
REGISTRAR(GlobalAveragePooling_Op, "cpu", Aidge::GlobalAveragePoolingImpl_cpu::create);
} // namespace Aidge
#endif /* _AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_KERNELS_H_
#include <cstddef> // std::size_t
#include <vector>
#include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
template <typename T>
typename std::enable_if_t<std::is_floating_point<T>::value, T>
static stableMean(const T* vec, std::size_t size) {
T mean{0};
for (std::size_t i = 0; i < size; ++i) {
mean = std::fma(vec[i] - mean, static_cast<T>(1) / static_cast<T>(i + 1), mean);
}
return mean;
}
// Specialization for integers: perform the mean computation in float
template <typename T>
typename std::enable_if_t<!std::is_floating_point<T>::value, double>
static stableMean(const T* vec, std::size_t size) {
double mean{0};
for (std::size_t i = 0; i < size; ++i) {
mean = std::fma<double>(static_cast<double>(vec[i]) - mean, 1.0 / static_cast<double>(i + 1), mean);
}
return mean;
}
template <typename T>
typename std::enable_if_t<std::is_floating_point<T>::value, T>
static castFromFloat(T value) {
return value;
}
template <typename T>
typename std::enable_if_t<!std::is_floating_point<T>::value, T>
static castFromFloat(double value) {
return static_cast<T>(std::nearbyint(value));
}
template <DataType DT_I, DataType DT_O = DT_I>
void GlobalAveragePoolingImpl_cpu_forward_kernel(const std::shared_ptr<Tensor>& inputTensor, void *output_) {
// computation
using I = cpptype_t<DT_I>;
using O = cpptype_t<DT_O>;
const I *input = static_cast<const I *>(inputTensor->getImpl()->rawPtr());
O *output = static_cast<O *>(output_);
const auto& dims = inputTensor->dims();
DimSize_t nb_elems = std::accumulate(dims.begin(), dims.end(), std::size_t(1),
std::multiplies<std::size_t>());
const DimSize_t in_batch_nb_elems{nb_elems / dims[0]};
const DimSize_t in_channel_nb_elems{in_batch_nb_elems / dims[1]};
const DimSize_t out_batch_nb_elems{dims[1]};
// parse channel by channel and fill each output with the average of the
// values in the channel
#ifdef _OPENMP
#pragma omp parallel for collapse(2) if (dims[0] * dims[1] >= 16)
#endif
for (int batch = 0; batch < static_cast<int>(dims[0]); ++batch) {
for (int channel = 0; channel < static_cast<int>(dims[1]); ++channel) {
const I *filter_start = std::next(
input, (batch * in_batch_nb_elems) + (channel * in_channel_nb_elems));
output[batch * out_batch_nb_elems + channel] = castFromFloat<O>(stableMean<I>(filter_start, in_channel_nb_elems));
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(GlobalAveragePoolingImpl_cpu,
{DataType::Float32},
{ProdConso::defaultModel, Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<DataType::Float32>, nullptr});
REGISTRAR(GlobalAveragePoolingImpl_cpu,
{DataType::Float64},
{ProdConso::defaultModel, Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<DataType::Float64>, nullptr});
REGISTRAR(GlobalAveragePoolingImpl_cpu,
{DataType::Int32},
{ProdConso::defaultModel, Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<DataType::Int32>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_
#define AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/GridSample.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// Operator implementation entry point for the backend
using GridSampleImpl_cpu = OperatorImpl_cpu<GridSample_Op,
void(const GridSample_Op&,
const std::shared_ptr<Tensor>&,
const std::shared_ptr<Tensor>&,
const std::shared_ptr<Tensor>&)>;
// Implementation entry point registration to Operator
REGISTRAR(GridSample_Op, "cpu", Aidge::GridSampleImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_
#include <algorithm> // std::max, std::min
#include <cmath> // std::fabs, std::trunf, std::nearbyint
#include <cstddef> // std::size_t
#include <cstdint> // std::int64_t
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/GridSampleImpl.hpp"
#include "aidge/data/half.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
static bool in_bound(float coord, float lower_bound, float upper_bound) noexcept {
return (coord > lower_bound) && (coord < upper_bound);
}
static float unnormalized_coord(float coord, float new_lower_bound, float new_upper_bound) noexcept {
return (coord + 1) / 2 * (new_upper_bound - new_lower_bound) + new_lower_bound;
}
// unused
// static float normalized_coord(float coord, float prev_lower_bound, float prev_upper_bound) noexcept {
// return (coord + prev_lower_bound) / (prev_upper_bound-prev_lower_bound) * 2 - 1;
// }
static float unnormalize_grid_sample_coord(float coord, std::size_t size, bool align_corners) noexcept {
return align_corners ? unnormalized_coord(coord, 0.0f, static_cast<float>(size) - 1.0f)
: unnormalized_coord(coord, -0.5f, static_cast<float>(size) - 0.5f);
}
// unused
// static float normalize_grid_sample_coord(float coord, std::size_t size, bool align_corners) noexcept {
// return align_corners ? normalized_coord(coord, 0.0f, static_cast<float>(size) - 1.0f)
// : normalized_coord(coord, -0.5f, static_cast<float>(size) - 0.5f);
// }
static float update_normalized_coord_with_padding(float coord, Aidge::GridSample_Op::PaddingMode padding_mode) {
if (!in_bound(coord, -1.0f, 1.0f)) {
if (padding_mode == Aidge::GridSample_Op::PaddingMode::Border) {
coord = std::min(std::max(-1.0f, coord), 1.0f);
}
else if (padding_mode == Aidge::GridSample_Op::PaddingMode::Reflection) {
float abs_coord = std::fabs(coord);
float int_coord = std::truncf(abs_coord);
std::int32_t nb_refl = static_cast<std::int32_t>((int_coord - 1) / 2);
float res = ((nb_refl + 1)*2) - abs_coord;
coord = (coord > 0) ? (nb_refl % 2 == 0 ? res : -res) \
: (nb_refl % 2 == 0 ? -res : res);
}
}
return coord;
}
static std::int64_t update_unnormalized_coord_with_padding(std::int64_t coord, std::int64_t size, Aidge::GridSample_Op::PaddingMode padding_mode) {
if (!in_bound(coord, 0, size)) {
// out of bound. switch padding mode
if (padding_mode == Aidge::GridSample_Op::PaddingMode::Border) {
coord = std::min(std::max(std::int64_t(0), coord), size-std::int64_t(1));
} else if (padding_mode == Aidge::GridSample_Op::PaddingMode::Reflection) {
const std::int64_t quotient = coord / (size-1);
const std::int64_t remainer = std::abs(coord - quotient*(size-1));
coord = (quotient % 2 == 0) ? remainer : size - 1 - remainer;
}
}
return coord;
}
namespace Aidge {
/**
* @brief Forward kernel for 1D GridSample on CPU backend.
* @tparam I Input data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param grid_ const grid Tensor.
* @param output_ Output Tensor.
*/
template <class I, class O>
void GridSampleImpl1D_cpu_forward_kernel(const GridSample_Op& op,
const std::shared_ptr<Tensor>& in0,
const std::shared_ptr<Tensor>& in1,
const std::shared_ptr<Tensor>& out)
{
const I* const input = static_cast<const I *>(in0->getImpl()->rawPtr());
const I* input_ptr = input;
float* const grid = static_cast<float*>(in1->getImpl()->rawPtr());
float* grid_ptr = grid;
O* const output = static_cast<O*>(out->getImpl()->rawPtr());
O* output_ptr = output;
const std::size_t N = in0->dim(0);
const std::size_t C = in0->dim(1);
const std::size_t in_H = in0->dim(2);
const std::size_t grid_H = in1->dim(1);
const std::size_t in_N_s = in0->stride(0);
const std::size_t in_C_s = in0->stride(1);
const std::size_t in_H_s = in0->stride(2);
const std::size_t grid_N_s = in1->stride(0);
const std::size_t grid_H_s = in1->stride(1);
const std::size_t out_N_s = out->stride(0);
const std::size_t out_C_s = out->stride(1);
const std::size_t out_H_s = out->stride(2);
float* grid_ptr_N = grid;
const I* input_ptr_N = input;
O* output_ptr_N = output;
for (std::size_t n = 0; n < N; ++n) {
grid_ptr = grid_ptr_N;
for (std::size_t grid_x = 0; grid_x < grid_H; ++grid_x) {
output_ptr = output_ptr_N + grid_x*out_H_s;
/*
* change grid_x coord to match padding_mode
* Change range from [-1, 1] to [0, H-1] or [-0.5, H-0.5] according to align_corners
* Handle computation of interpolation
* any value outside bounds is considered 0
* if nearest:
* else if linear:
* else if cubic:
* else : nothing
*/
float x = *grid_ptr;
x = update_normalized_coord_with_padding(x, op.paddingMode());
x = unnormalize_grid_sample_coord(x, in_H, op.alignCorners());
if (op.mode() == GridSample_Op::Mode::Nearest) {
const std::int64_t x_rounded = std::nearbyintf(x);
if (in_bound(x_rounded, 0, in_H)) {
input_ptr = input_ptr_N + x_rounded*in_H_s;
for (std::size_t c = 0; c < C; ++c) {
*output_ptr = *input_ptr;
input_ptr += in_C_s;
output_ptr += out_C_s;
}
} else {
for (std::size_t c = 0; c < C; ++c) {
*output_ptr = O(0);
output_ptr += out_C_s;
}
}
} else if (op.mode() == GridSample_Op::Mode::Linear) {
const std::int64_t x_inf = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_H, op.paddingMode());
const std::int64_t x_sup = update_unnormalized_coord_with_padding(x_inf + 1, in_H, op.paddingMode());
const I* input_ptr_NC = input_ptr_N;
for (std::size_t c = 0; c < C; ++c) {
const I f_inf = in_bound(x_inf, 0, in_H) ?
input_ptr_NC[static_cast<std::size_t>(x_inf)*in_H_s] : I(0);
const I f_sup = in_bound(x_sup, 0, in_H) ?
input_ptr_NC[static_cast<std::size_t>(x_sup)*in_H_s] : I(0);
*output_ptr = static_cast<O>(static_cast<I>(x - x_inf)*f_inf \
+ static_cast<I>(x_sup - x)*f_sup);
input_ptr_NC += in_C_s;
output_ptr += out_C_s;
}
} else if (op.mode() == GridSample_Op::Mode::Cubic) {
const std::int64_t x_inf = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_H, op.paddingMode());
const std::int64_t x_sup = update_unnormalized_coord_with_padding(x_inf + 1, in_H, op.paddingMode());
const std::int64_t x_inf_inf = update_unnormalized_coord_with_padding(x_inf - 1, in_H, op.paddingMode());
const std::int64_t x_sup_sup = update_unnormalized_coord_with_padding(x_sup + 1, in_H, op.paddingMode());
const I x1 = static_cast<I>(x - static_cast<float>(x_inf));
const I x2 = x1 * x1;
const I x3 = x1 * x2;
const I* input_ptr_NC = input_ptr_N;
for (std::size_t c = 0; c < C; ++c) {
const I f_inf_inf = in_bound(x_inf_inf, 0, in_H) ? input_ptr_NC[x_inf_inf*in_H_s] : I(0);
const I f_inf = in_bound(x_inf, 0, in_H) ? input_ptr_NC[x_inf*in_H_s] : I(0);
const I f_sup = in_bound(x_sup, 0, in_H) ? input_ptr_NC[x_sup*in_H_s] : I(0);
const I f_sup_sup = in_bound(x_sup_sup, 0, in_H) ? input_ptr_NC[x_sup_sup*in_H_s] : I(0);
const I m_inf = (f_sup - f_inf_inf) / I(2);
const I m_sup = (f_sup_sup - f_inf) / I(2);
*output_ptr = f_inf \
+ x1 * m_inf \
+ x2 * (3 * (f_sup - f_inf) - 2 * m_inf - m_sup) \
+ x3 * (2*(f_inf - f_sup) + m_inf + m_sup);
input_ptr_NC += in_C_s;
output_ptr += out_C_s;
}
}
grid_ptr += grid_H_s;
}
input_ptr_N += in_N_s;
grid_ptr_N += grid_N_s;
output_ptr_N += out_N_s;
}
}
// Kernels registration to implementation entry point
// only accept 1st input with only 1 spatial feat. (nb dims = 1)
REGISTRAR(GridSampleImpl_cpu,
{{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, {{DataType::Float16}}},
{ProdConso::defaultModel, Aidge::GridSampleImpl1D_cpu_forward_kernel<half_float::half, half_float::half>, nullptr});
REGISTRAR(GridSampleImpl_cpu,
{{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, {{DataType::Float32}}},
{ProdConso::defaultModel, Aidge::GridSampleImpl1D_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(GridSampleImpl_cpu,
{{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, {{DataType::Float64}}},
{ProdConso::defaultModel, Aidge::GridSampleImpl1D_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(GridSampleImpl_cpu,
{{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, {{DataType::Int32}}},
{ProdConso::defaultModel, Aidge::GridSampleImpl1D_cpu_forward_kernel<int32_t, int32_t>, nullptr});
/**
* @brief Forward kernel for 1D GridSample on CPU backend.
* @tparam I Input data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param grid_ const grid Tensor.
* @param output_ Output Tensor.
*/
template <class I, class O>
void GridSampleImpl2D_cpu_forward_kernel(const GridSample_Op& op,
const std::shared_ptr<Tensor>& in0,
const std::shared_ptr<Tensor>& in1,
const std::shared_ptr<Tensor>& out)
{
const I* input = static_cast<const I *>(in0->getImpl()->rawPtr());
const I* input_ptr = input;
float* const grid = static_cast<float*>(in0->getImpl()->rawPtr());
float* grid_ptr = grid;
O* const output = static_cast<O*>(out->getImpl()->rawPtr());
const std::size_t N = in0->dim(0);
const std::size_t C = in0->dim(1);
const std::size_t in_H = in0->dim(2);
const std::size_t in_W = in0->dim(3);
const std::size_t grid_H = in1->dim(1);
const std::size_t grid_W = in1->dim(2);
const std::size_t in_N_s = in0->stride(0);
const std::size_t in_C_s = in0->stride(1);
const std::size_t in_H_s = in0->stride(2);
const std::size_t in_W_s = in0->stride(3);
const std::size_t grid_N_s = in1->stride(0);
const std::size_t grid_H_s = in1->stride(1);
const std::size_t grid_W_s = in1->stride(2);
const std::size_t grid_Coord_s = in1->stride(3);
const std::size_t out_N_s = out->stride(0);
const std::size_t out_C_s = out->stride(1);
const std::size_t out_H_s = out->stride(2);
const std::size_t out_W_s = out->stride(3);
float* grid_ptr_N = grid;
const I* input_ptr_N = input;
O* output_ptr_N = output;
for (std::size_t n = 0; n < N; ++n) {
for (std::size_t grid_y = 0; grid_y < grid_H; ++grid_y) {
for (std::size_t grid_x = 0; grid_x < grid_W; ++grid_x) {
O* output_ptr = output_ptr_N + grid_y*out_H_s + grid_y*out_W_s;
grid_ptr = grid_ptr_N + grid_y*grid_H_s + grid_x*grid_W_s;
/*
* change grid_x coord to match padding_mode
* Change range from [-1, 1] to [0, H-1] or [-0.5, H-0.5] according to align_corners
* Handle computation of interpolation
* any value outside bounds is considered 0
* if nearest:
* else if linear:
* else if cubic:
* else : nothing
*/
float x = *grid_ptr;
float y = grid_ptr[grid_Coord_s];
x = update_normalized_coord_with_padding(x, op.paddingMode());
x = unnormalize_grid_sample_coord(x, in_W, op.alignCorners());
y = update_normalized_coord_with_padding(y, op.paddingMode());
y = unnormalize_grid_sample_coord(y, in_H, op.alignCorners());
if (op.mode() == GridSample_Op::Mode::Nearest) {
const std::int64_t x_rounded = std::nearbyintf(x);
const std::int64_t y_rounded = std::nearbyintf(y);
if (in_bound(x_rounded, 0, in_W) && in_bound(y_rounded, 0, in_H)) {
input_ptr = input_ptr_N + y_rounded*in_H_s + x_rounded*in_W_s;
for (std::size_t c = 0; c < C; ++c) {
*output_ptr = *input_ptr;
input_ptr += in_C_s;
output_ptr += out_C_s;
}
} else {
for (std::size_t c = 0; c < C; ++c) {
*output_ptr = O(0);
output_ptr += out_C_s;
}
}
} else if (op.mode() == GridSample_Op::Mode::Linear) {
const std::int64_t x_r = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_W, op.paddingMode()); // right
const std::int64_t x_l = update_unnormalized_coord_with_padding(x_r + 1, in_W, op.paddingMode()); // left
const std::int64_t y_t = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(y)), in_H, op.paddingMode()); // top
const std::int64_t y_b = update_unnormalized_coord_with_padding(y_t + 1, in_H, op.paddingMode()); // bottom
const I* input_ptr_NC = input_ptr_N;
for (std::size_t c = 0; c < C; ++c) {
const I f_tr = (in_bound(x_r, 0, in_W) && in_bound(y_t, 0, in_H)) ?
input_ptr_NC[static_cast<std::size_t>(y_t)*in_H_s
+ static_cast<std::size_t>(x_r)*in_W_s]
: I(0);
const I f_tl = (in_bound(x_l, 0, in_W) && in_bound(y_t, 0, in_H)) ?
input_ptr_NC[static_cast<std::size_t>(y_t)*in_H_s
+ static_cast<std::size_t>(x_l)*in_W_s]
: I(0);
const I f_br = (in_bound(x_r, 0, in_W) && in_bound(y_b, 0, in_H)) ?
input_ptr_NC[static_cast<std::size_t>(y_b)*in_H_s
+ static_cast<std::size_t>(x_r)*in_W_s]
: I(0);
const I f_bl = (in_bound(x_l, 0, in_W) && in_bound(y_b, 0, in_H)) ?
input_ptr_NC[static_cast<std::size_t>(y_b)*in_H_s
+ static_cast<std::size_t>(x_l)*in_W_s]
: I(0);
// compute weighted sum of the 4 corners
const I w_tr = static_cast<I>((y - static_cast<float>(y_t))*(static_cast<float>(x_r) - x));
const I w_tl = static_cast<I>((y - static_cast<float>(y_t))*(x - static_cast<float>(x_l)));
const I w_br = static_cast<I>((static_cast<float>(y_b) - y)*(static_cast<float>(x_r) - x));
const I w_bl = static_cast<I>((static_cast<float>(y_b) - y)*(x - static_cast<float>(x_l)));
*output_ptr = static_cast<O>(w_tr*f_tr + w_tl*f_tl + w_br*f_br + w_bl*f_bl);
input_ptr_NC += in_C_s;
output_ptr += out_C_s;
}
} else if (op.mode() == GridSample_Op::Mode::Cubic) {
/*
* .. .. .. .. .. ..
* .. 00 01 02 03 ..
* .. 10 11 12 13 ..
* .. 20 21 22 23 ..
* .. 30 31 32 33 ..
* .. .. .. .. .. ..
*/
const std::int64_t x_1 = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_W, op.paddingMode());
const std::int64_t x_0 = update_unnormalized_coord_with_padding(x_1 - 1, in_W, op.paddingMode());
const std::int64_t x_2 = update_unnormalized_coord_with_padding(x_1 + 1, in_W, op.paddingMode());
const std::int64_t x_3 = update_unnormalized_coord_with_padding(x_1 + 2, in_W, op.paddingMode());
const std::int64_t y_1 = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(y)), in_H, op.paddingMode());
const std::int64_t y_0 = update_unnormalized_coord_with_padding(y_1 - 1, in_H, op.paddingMode());
const std::int64_t y_2 = update_unnormalized_coord_with_padding(y_1 + 1, in_H, op.paddingMode());
const std::int64_t y_3 = update_unnormalized_coord_with_padding(y_1 + 2, in_H, op.paddingMode());
const I* input_ptr_NC = input_ptr_N;
for (std::size_t c = 0; c < C; ++c) {
const I f_00 = in_bound(x_0, 0, in_W) && in_bound(y_0, 0, in_H) ?
input_ptr_NC[x_0*in_W_s + y_0*in_H_s] : I(0);
const I f_01 = in_bound(x_0, 0, in_W) && in_bound(y_1, 0, in_H) ?
input_ptr_NC[x_0*in_W_s + y_1*in_H_s] : I(0);
const I f_02 = in_bound(x_0, 0, in_W) && in_bound(y_2, 0, in_H) ?
input_ptr_NC[x_0*in_W_s + y_2*in_H_s] : I(0);
const I f_03 = in_bound(x_0, 0, in_W) && in_bound(y_3, 0, in_H) ?
input_ptr_NC[x_0*in_W_s + y_3*in_H_s] : I(0);
const I f_10 = in_bound(x_1, 0, in_W) && in_bound(y_0, 0, in_H) ?
input_ptr_NC[x_1*in_W_s + y_0*in_H_s] : I(0);
const I f_20 = in_bound(x_2, 0, in_W) && in_bound(y_0, 0, in_H) ?
input_ptr_NC[x_2*in_W_s + y_0*in_H_s] : I(0);
const I f_30 = in_bound(x_3, 0, in_W) && in_bound(y_0, 0, in_H) ?
input_ptr_NC[x_3*in_W_s + y_0*in_H_s] : I(0);
const I f_11 = in_bound(x_1, 0, in_W) && in_bound(y_1, 0, in_H) ?
input_ptr_NC[x_1*in_W_s + y_1*in_H_s] : I(0);
const I f_12 = in_bound(x_1, 0, in_W) && in_bound(y_2, 0, in_H) ?
input_ptr_NC[x_1*in_W_s + y_2*in_H_s] : I(0);
const I f_13 = in_bound(x_1, 0, in_W) && in_bound(y_3, 0, in_H) ?
input_ptr_NC[x_1*in_W_s + y_3*in_H_s] : I(0);
const I f_21 = in_bound(x_2, 0, in_W) && in_bound(y_1, 0, in_H) ?
input_ptr_NC[x_2*in_W_s + y_1*in_H_s] : I(0);
const I f_22 = in_bound(x_2, 0, in_W) && in_bound(y_2, 0, in_H) ?
input_ptr_NC[x_2*in_W_s + y_2*in_H_s] : I(0);
const I f_23 = in_bound(x_2, 0, in_W) && in_bound(y_3, 0, in_H) ?
input_ptr_NC[x_2*in_W_s + y_3*in_H_s] : I(0);
const I f_31 = in_bound(x_3, 0, in_W) && in_bound(y_1, 0, in_H) ?
input_ptr_NC[x_3*in_W_s + y_1*in_H_s] : I(0);
const I f_32 = in_bound(x_3, 0, in_W) && in_bound(y_2, 0, in_H) ?
input_ptr_NC[x_3*in_W_s + y_2*in_H_s] : I(0);
const I f_33 = in_bound(x_3, 0, in_W) && in_bound(y_3, 0, in_H) ?
input_ptr_NC[x_3*in_W_s + y_3*in_H_s] : I(0);
const I mx_11 = (f_21 - f_01) / I(2);
const I mx_12 = (f_22 - f_02) / I(2);
const I mx_21 = (f_31 - f_11) / I(2);
const I mx_22 = (f_32 - f_12) / I(2);
const I my_11 = (f_12 - f_10) / I(2);
const I my_12 = (f_13 - f_11) / I(2);
const I my_21 = (f_22 - f_20) / I(2);
const I my_22 = (f_23 - f_21) / I(2);
const I mxy_11 = (f_22 - f_20 - f_02 - + f_00) / I(4);
const I mxy_12 = (f_23 - f_21 - f_03 - + f_01) / I(4);
const I mxy_21 = (f_32 - f_30 - f_12 - + f_10) / I(4);
const I mxy_22 = (f_33 - f_31 - f_13 - + f_11) / I(4);
const I a_00 = f_11;
const I a_10 = mx_11;
const I a_20 = I(3)*(f_21 - f_11) - I(2)*mx_11 - mx_21;
const I a_30 = I(2)*(f_11 - f_21) + mx_11 + mx_21;
const I a_01 = my_11;
const I a_11 = mxy_11;
const I a_21 = I(3)*(my_21 - my_11) - I(2)*mxy_11 - mxy_21;
const I a_31 = I(2)*(my_11 - my_21) + mxy_11 + mxy_21;
const I a_02 = I(3)*(f_12 - f_11) - I(2)*my_11 - my_12;
const I a_12 = I(3)*(mx_12 - mx_11) - I(2)*mxy_11 - mxy_12;
const I a_22 = I(9)*(f_11 + f_22 - f_21 - f_12) + I(3)*(I(2)*(mx_11 - mx_12 + my_11 - my_21) + mx_21 - mx_22 + my_12 - my_22) + mxy_22 + I(2)*(mxy_12 + mxy_21 + I(2)*mxy_11);
const I a_32 = - mxy_12 - mxy_22 + I(2)*(my_22 - my_12 - mxy_11 - mxy_21 + I(2)*(my_21 - my_11) + I(3)*(f_21 + f_12 - f_11 - f_22)) + I(3)*(mx_12 + mx_22 - mx_11 - mx_21);
const I a_03 = I(2)*(f_11 - f_12) + my_11 + my_12;
const I a_13 = I(2)*(mx_11 - mx_12) + mxy_11 + mxy_12;
const I a_23 = - mxy_21 - mxy_22 + I(2)*(-mx_21 + mx_22 - mxy_11 - mxy_12 + I(2)*(mx_12 - mx_11) + I(3)*(f_12 + f_21 - f_11 - f_22)) + I(3)*(my_21 + my_22 - my_11 - my_12);
const I a_33 = mxy_11 + mxy_21 + mxy_12 + mxy_22 + I(2)*(mx_11 + mx_21 - mx_12 - mx_22 + my_11 - my_21 + my_12 - my_22 + I(2)*(f_11 - f_21 - f_12 + f_22));
const I x2 = static_cast<I>(x*x);
const I x3 = static_cast<I>(x*x*x);
const I y2 = static_cast<I>(y*y);
const I y3 = static_cast<I>(y*y*y);
*output_ptr = static_cast<O>( \
a_00 + a_10*x + a_20*x2 + a_30*x3 \
+ a_01*y + a_11*x*y + a_21*x2*y + a_31*x3*y \
+ a_02*y2 + a_12*x*y2 + a_22*x2*y2 + a_32*x3*y2 \
+ a_03*y3 + a_13*x*y3 + a_23*x2*y3 + a_33*x3*y3);
input_ptr_NC += in_C_s;
output_ptr += out_C_s;
}
}
}
}
input_ptr_N += in_N_s;
grid_ptr_N += grid_N_s;
output_ptr_N += out_N_s;
}
}
// Kernels registration to implementation entry point
// only accept 1st input with only 2 spatial feat. (nb dims = 2)
REGISTRAR(GridSampleImpl_cpu,
{{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, {DataType::Any}}, {{DataType::Float16}}},
{ProdConso::defaultModel, Aidge::GridSampleImpl2D_cpu_forward_kernel<half_float::half, half_float::half>, nullptr});
REGISTRAR(GridSampleImpl_cpu,
{{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, {DataType::Any}}, {{DataType::Float32}}},
{ProdConso::defaultModel, Aidge::GridSampleImpl2D_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(GridSampleImpl_cpu,
{{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, {DataType::Any}}, {{DataType::Float64}}},
{ProdConso::defaultModel, Aidge::GridSampleImpl2D_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(GridSampleImpl_cpu,
{{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, {DataType::Any}}, {{DataType::Int32}}},
{ProdConso::defaultModel, Aidge::GridSampleImpl2D_cpu_forward_kernel<int32_t, int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2025 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_H_
#define AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_H_
#include <cstddef> // std::size_t
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Heaviside.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/future_std/span.hpp"
namespace Aidge {
using HeavisideImplCpu =
OperatorImpl_cpu<Heaviside_Op,
void(std::size_t, const void *, void *, const float),
void(std::size_t, const void *, const void *, void *)>;
// Implementation entry point registration for operator Heaviside
REGISTRAR(Heaviside_Op, "cpu", HeavisideImplCpu::create);
} // namespace Aidge
#endif // AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_H_
/********************************************************************************
* Copyright (c) 2025 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include <cstddef> // std::size_t
#include <cmath>
#include "aidge/backend/cpu/operator/HeavisideImpl.hpp"
#include "aidge/utils/ErrorHandling.hpp"
namespace Aidge {
template <class I, class O>
void HeavisideImplCpuForwardKernel(std::size_t inputLength,
const void *input_,
void *output_,
const float value) {
const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_);
for (std::size_t i = 0; i < inputLength; ++i) {
output[i] = (input[i] > 0) ? 1 : (input[i] == 0 ? value : 0);
}
}
// Surrogate Gradient
template <class O, class GO, class GI>
void HeavisideImplCpuBackwardKernel(std::size_t inputLength,
const void* output_,
const void* grad_output_,
void* grad_input_) {
/*
* Heaviside is approximated by an arctan function for backward :
* S ~= \frac{1}{\pi}\text{arctan}(\pi U \frac{\alpha}{2})
* \frac{dS}{dU} = \frac{\alpha}{2} \frac{1}{(1+(\frac{\pi U \alpha}{2})^2)}}
* */
const O* output = static_cast<const O*>(output_);
const GO* grad_output = static_cast<const GO*>(grad_output_);
GI* grad_input = static_cast<GI*>(grad_input_);
for (size_t i = 0; i < inputLength; ++i) {
grad_input[i] += grad_output[i] * static_cast<O>(1.0 / (1.0 + (output[i] * M_PI) * (output[i] * M_PI)));
}
}
// Kernels registration to implementation entry point
REGISTRAR(HeavisideImplCpu,
{DataType::Float32},
{ProdConso::inPlaceModel,
Aidge::HeavisideImplCpuForwardKernel<float, float>,
Aidge::HeavisideImplCpuBackwardKernel<float,float,float>});
} // namespace Aidge
#endif // AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_KERNELS_H__H_
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_LRNIMPL_H_
#define AIDGE_CPU_OPERATOR_LRNIMPL_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/LRN.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
namespace Aidge {
// Operator implementation entry point for the backend
using LRNImpl_cpu = OperatorImpl_cpu<LRN_Op,
void(float, float, float, std::size_t, const std::vector<DimSize_t>&, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(LRN_Op, "cpu", Aidge::LRNImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_LRNIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_LRNIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_LRNIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include <cstddef>
#include <cmath>
#include "aidge/data/Data.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/LRNImpl.hpp"
namespace Aidge {
template <class I, class O>
void LRNImpl_cpu_forward_kernel(float alpha, float beta, float bias, std::size_t size, const std::vector<DimSize_t>& inputDims, const void* input_, void* output_)
{
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
const DimSize_t nbBatch = inputDims[0];
const DimSize_t nbChannels = (inputDims.size() > 1) ? inputDims[1] : 1;
const DimSize_t featureMapSize = (inputDims.size() > 2) ? std::accumulate(inputDims.begin() + 2, inputDims.end(), 1, std::multiplies<DimSize_t>()) : 1;
for (std::size_t batch = 0; batch < nbBatch; ++batch) {
for (std::size_t ch = 0; ch < nbChannels; ++ch) {
const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
const unsigned int channelMin
= std::max<int>(0, ch - size / 2);
const unsigned int channelMax
= std::min<size_t>(nbChannels - 1, ch + size / 2);
for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
// For each input channel, accumulate the value
O accAccrossChannels(0.0);
for (unsigned int accChannel = channelMin;
accChannel < channelMax; ++accChannel)
{
accAccrossChannels += input[ioIndex + feature];
}
// Compute the output signal
output[ioIndex + feature] = input[ioIndex + feature]
/ std::pow((bias + (accAccrossChannels * accAccrossChannels) * alpha), beta);
}
}
}
}
REGISTRAR(LRNImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::LRNImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(LRNImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::LRNImpl_cpu_forward_kernel<double, double>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_LRNIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H_
#define AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H_
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/LeakyReLU.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// Operator implementation entry point for the backend
using LeakyReLUImpl_cpu = OperatorImpl_cpu<LeakyReLU_Op,
void(const float,
std::size_t,
const void*,
void*),
void(const float,
std::size_t,
const void*,
const void*,
void*)>;
// Implementation entry point registration to Operator
REGISTRAR(LeakyReLU_Op, "cpu", Aidge::LeakyReLUImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
namespace Aidge {
template <class I, class O>
void LeakyReLUImpl_cpu_forward_kernel(const float negativeSlope_,
std::size_t inputLength,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
const I negativeSlope = static_cast<const I>(negativeSlope_);
for (std::size_t i = 0; i < inputLength; ++i) {
output[i] = (input[i] >= 0) ? input[i] : input[i] * negativeSlope;
}
}
template <class I, class O>
void LeakyReLUImpl_cpu_backward_kernel(const float negativeSlope_,
std::size_t inputLength,
const void* input_,
const void* grad_output_,
void* grad_input_) {
const O* input = static_cast<const O*>(input_);
const I* grad_output = static_cast<const I*>(grad_output_);
O* grad_input = static_cast<O*>(grad_input_);
const I negativeSlope = static_cast<const I>(negativeSlope_);
for (std::size_t i = 0; i < inputLength; ++i) {
grad_input[i] += (input[i] > 0) ? grad_output[i] : negativeSlope*grad_output[i];
}
}
// Kernels registration to implementation entry point
REGISTRAR(LeakyReLUImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::LeakyReLUImpl_cpu_forward_kernel<float, float>, Aidge::LeakyReLUImpl_cpu_backward_kernel<float, float>});
REGISTRAR(LeakyReLUImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::LeakyReLUImpl_cpu_forward_kernel<double, double>, Aidge::LeakyReLUImpl_cpu_backward_kernel<double, double>});
REGISTRAR(LeakyReLUImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::LeakyReLUImpl_cpu_forward_kernel<int32_t, int32_t>, Aidge::LeakyReLUImpl_cpu_backward_kernel<int32_t, int32_t>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_LNIMPL_H_
#define AIDGE_CPU_OPERATOR_LNIMPL_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Ln.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
namespace Aidge {
// Operator implementation entry point for the backend
using LnImpl_cpu = OperatorImpl_cpu<Ln_Op,
void(const std::size_t, const void*, void*),
void(const std::size_t, const void*, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Ln_Op, "cpu", Aidge::LnImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_LNIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_LNIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_LNIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/LnImpl.hpp"
namespace Aidge {
template <class I, class O>
void LnImpl_cpu_forward_kernel(std::size_t inputLength,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
const float eps = 1.0e-20f;
//#pragma omp parallel for if (inputLength > 1024)
for (std::size_t i = 0; i < inputLength; ++i) {
if (input[i] > I(eps)) {
output[i] = std::log(input[i]);
} else {
output[i] = std::log(I(eps));
}
}
}
template <class I, class GI, class GO>
void LnImpl_cpu_backward_kernel(const std::size_t inputLength,
const void* input_, const void* grad_output_,
void* grad_input_) {
const I* input = static_cast<const I*>(input_);
const GO* grad_output = static_cast<const GO*>(grad_output_);
GI* grad_input = static_cast<GI*>(grad_input_);
const float eps = 1.0e-20f;
for (std::size_t i = 0; i < inputLength; ++i) {
if (input[i] > I(eps)) {
grad_input[i] += grad_output[i] / input[i];
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(LnImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::LnImpl_cpu_forward_kernel<float, float>, Aidge::LnImpl_cpu_backward_kernel<float, float, float>});
REGISTRAR(LnImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::LnImpl_cpu_forward_kernel<double, double>, Aidge::LnImpl_cpu_backward_kernel<double, double, double>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_LNIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_MATMULIMPL_H_
#define AIDGE_CPU_OPERATOR_MATMULIMPL_H_
#include <array>
#include <memory>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/MatMul.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// Operator implementation entry point for the backend
using MatMulImpl_cpu = OperatorImpl_cpu<MatMul_Op,
void(const std::size_t, const std::size_t, const std::size_t,
const void *, const void *, void *)>;
// Implementation entry point registration to Operator
REGISTRAR(MatMul_Op, "cpu", Aidge::MatMulImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_MATMULIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_MATMULIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_MATMULIMPL_KERNELS_H_
#include "aidge/backend/cpu/operator/MatMulImpl.hpp"
namespace Aidge {
template <class I, class O>
void MatMulImpl_cpu_forward_kernel(const std::size_t n, const std::size_t k, const std::size_t m,
const void* input1_, const void* input2_, void* __restrict output_) {
// FIXME: missing MatMul parameters as arguments
const I* input1 = static_cast<const I*>(input1_);
const I* input2 = static_cast<const I*>(input2_);
O* __restrict output = static_cast<O* __restrict>(output_);
std::memset(output, O(0), n * m * sizeof(O));
#ifdef _OPENMP
#pragma omp parallel for if (n >= 16)
#endif
for (int i = 0; i < static_cast<int>(n); ++i) {
for (std::size_t l = 0; l < k; ++l) {
for (std::size_t j = 0; j < m; ++j) {
output[i*m + j] += static_cast<O>(input1[i*k + l] * input2[l*m + j]);
}
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(MatMulImpl_cpu,
{DataType::Float32},
{ProdConso::defaultModel, Aidge::MatMulImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(MatMulImpl_cpu,
{DataType::Float64},
{ProdConso::defaultModel, Aidge::MatMulImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(MatMulImpl_cpu,
{DataType::Int32},
{ProdConso::defaultModel, Aidge::MatMulImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_MATMULIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_H_
#define AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/MaxPooling.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// Operator implementation entry point for the backend
using MaxPooling2D_Op = MaxPooling_Op<2>;
using MaxPoolingImpl2D_cpu = OperatorImpl_cpu<MaxPooling_Op<2>,
void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const bool,
const std::array<DimSize_t, 4> &,
const void *,
void *),
void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const bool,
const std::array<DimSize_t, 4> &,
const void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(MaxPooling2D_Op, "cpu", Aidge::MaxPoolingImpl2D_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_H_ */