Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mszczep/aidge_backend_cpu
  • eclipse/aidge/aidge_backend_cpu
  • hrouis/aidge_backend_cpu
  • oantoni/aidge_backend_cpu
  • raphaelmillet/aidge_backend_cpu
  • cguillon/aidge_backend_cpu
  • jeromeh/aidge_backend_cpu
  • axelfarr/aidge_backend_cpu
  • noamzerah/aidge_backend_cpu
  • silvanosky/aidge_backend_cpu
  • maab05/aidge_backend_cpu
  • lucaslopez/aidge_backend_cpu_ll
  • farnez/aidge_backend_cpu
  • mick94/aidge_backend_cpu
14 results
Show changes
Showing
with 2648 additions and 0 deletions
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CLIPIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_CLIPIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/ClipImpl.hpp"
namespace Aidge {
template <class I, class O>
void ClipImpl_cpu_forward_kernel(
float min_,
float max_,
const void* input_,
const std::size_t length,
void* output_)
{
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
I minCasted = static_cast<I>(min_);
I maxCasted = static_cast<I>(max_);
for (std::size_t i = 0; i < length; ++i) {
output[i] = std::min(std::max(input[i], minCasted), maxCasted);
}
}
template <class I, class GI, class GO>
void ClipImpl_cpu_backward_kernel(
float min_,
float max_,
const std::size_t length,
const void* input_,
const void* grad_output_,
void* grad_input_)
{
const I* input = static_cast<const I*>(input_);
const GO* grad_output = static_cast<const GO*>(grad_output_);
GI* grad_input = static_cast<GI*>(grad_input_);
for (std::size_t i = 0; i < length; ++i) {
grad_input[i] += ((input[i] > min_) && (input[i] < max_)) ? grad_output[i] : 0;
}
}
REGISTRAR(ClipImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel,
Aidge::ClipImpl_cpu_forward_kernel<float,float>,
Aidge::ClipImpl_cpu_backward_kernel<float,float,float>});
REGISTRAR(ClipImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel,
Aidge::ClipImpl_cpu_forward_kernel<double,double>,
Aidge::ClipImpl_cpu_backward_kernel<double,double,double>});
REGISTRAR(ClipImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel,
Aidge::ClipImpl_cpu_forward_kernel<std::int32_t,std::int32_t>,
Aidge::ClipImpl_cpu_backward_kernel<std::int32_t,std::int32_t,std::int32_t>});
REGISTRAR(ClipImpl_cpu,
{DataType::Int64},
{ProdConso::inPlaceModel,
Aidge::ClipImpl_cpu_forward_kernel<std::int64_t,std::int64_t>,
Aidge::ClipImpl_cpu_backward_kernel<std::int64_t,std::int64_t,std::int64_t>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CLIPIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_
#define AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_
#include <memory>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/ConstantOfShape.hpp"
#include "aidge/utils/Registrar.hpp"
namespace Aidge {
class Tensor;
// Operator implementation entry point for the backend
using ConstantOfShapeImpl_cpu = OperatorImpl_cpu<ConstantOfShape_Op,
void(const std::shared_ptr<Tensor>&, const Tensor&)>;
// Implementation entry point registration to Operator
REGISTRAR(ConstantOfShape_Op, "cpu", Aidge::ConstantOfShapeImpl_cpu::create);
} // namespace Aidge
#endif /* _AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_KERNELS_H_
#include <aidge/data/Tensor.hpp>
#include <aidge/data/half.hpp>
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <functional> // std::multiplies
#include <numeric> // std::accumulate
#include <vector>
#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
template <class O>
void ConstantOfShapeimpl_cpu_forward_kernel(
const std::shared_ptr<Tensor>& output_, const Tensor &value) {
O* output = static_cast<O*>(output_->getImpl()->hostPtr());
const O val = *reinterpret_cast<O*>(value.getImpl()->hostPtr());
std::fill_n(output, output_->size(), val);
}
// Kernels registration to implementation entry point
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Float16}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<half_float::half>, nullptr});
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Float32}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<float>, nullptr});
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Float64}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<double>, nullptr});
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Int16}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int16_t>, nullptr});
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Int32}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int32_t>, nullptr});
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Int64}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int64_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_H_
#define AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/ConvDepthWise.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// Operator implementation entry point for the backend
using ConvDepthWise1D_Op = ConvDepthWise_Op<1>;
using ConvDepthWiseImpl1D_cpu = OperatorImpl_cpu<ConvDepthWise_Op<1>,
void(const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 3>&,
const void *,
const void *,
const void *,
void *)>;
using ConvDepthWise2D_Op = ConvDepthWise_Op<2>;
using ConvDepthWiseImpl2D_cpu = OperatorImpl_cpu<ConvDepthWise_Op<2>,
void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 4> &,
const void *,
const void *,
const void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(ConvDepthWise1D_Op, "cpu", Aidge::ConvDepthWiseImpl1D_cpu::create);
REGISTRAR(ConvDepthWise2D_Op, "cpu", Aidge::ConvDepthWiseImpl2D_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_KERNELS_H_
#include <algorithm>
#include <array>
#include <cmath>
#include <cstddef>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
/**
* @brief Forward kernel for 1D ConvDepthWiseolution on CPU backend.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
const std::array<DimSize_t, 1>& dilationDims,
const std::array<DimSize_t, 1>& kernelDims,
const std::array<DimSize_t, 3>& inputDims,
const void *input_,
const void *weights_,
const void *biases_,
void *output_) {
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
// output H size
const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) /
static_cast<float>(strideDims[0])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, ch, Xin, Yin)
// weight (outCh, ch, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
#ifdef _OPENMP
#pragma omp parallel for collapse(2) if (inputDims[0] * inputDims[1] >= 16)
#endif
for (int batch = 0; batch < static_cast<int>(inputDims[0]); ++batch) {
for (int ch = 0; ch < static_cast<int>(inputDims[1]); ++ch) {
const std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize;
B biasVal = (biases != nullptr) ? biases[ch] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize), biasVal);
const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2];
const std::size_t wIndex = ch * kernelDims[0];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
// const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
// const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
// const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
const std::size_t sxMin = 0;
const std::size_t sxMax = dilated_kernel_x;
const std::size_t oIndexFull = oIndex + ox;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) {
output[oIndexFull] += weights[wIndex + sx] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))];
}
}
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(ConvDepthWiseImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<float, float, float, float>, nullptr});
REGISTRAR(ConvDepthWiseImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>, nullptr});
REGISTRAR(ConvDepthWiseImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<double, double, double, double>, nullptr});
/**
* @brief Forward kernel for 2D ConvDepthWiseolution on CPU backend.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& dilationDims,
const std::array<DimSize_t, 2>& kernelDims,
const std::array<DimSize_t, 4>& inputDims,
const void *input_,
const void *weights_,
const void *biases_,
void *output_)
{
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
// output H size
const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) /
static_cast<float>(strideDims[0])));
// output W size
const DimSize_t dilated_kernel_y = dilationDims[1]*(kernelDims[1] - 1) + 1;
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - dilated_kernel_y + strideDims[1]) /
static_cast<float>(strideDims[1])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, ch, Xin, Yin)
// weight (outCh, ch, kernelX, kernelY)
// does not take Dilation attribute into account
const std::size_t outChannels_s = oxSize * oySize;
if (dilated_kernel_x ==3 && dilated_kernel_y == 3) {
#ifdef _OPENMP
#pragma omp parallel for collapse(2) if (inputDims[0] * inputDims[1] >= 16)
#endif
for (int batch = 0; batch < static_cast<int>(inputDims[0]); ++batch) {
for (int ch = 0; ch < static_cast<int>(inputDims[1]); ++ch) {
B biasVal = (biases != nullptr) ? biases[ch] : B(0);
std::size_t oIndex = (ch + batch*inputDims[1]) * outChannels_s;
std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = ch * 9;
if (strideDims[0] == 1 && strideDims[1]==1) {
for (std::size_t ox = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=inputDims[3]) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] = biasVal + weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+1]+weights[wIndex+2]*input[iIndex+oy+2];
}
iIndex+=inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+1]+weights[wIndex+5]*input[iIndex+oy+2];
}
iIndex+=inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+1]+weights[wIndex+8]*input[iIndex+oy+2];
}
}
} else {
for (std::size_t ox = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex+=(strideDims[0]-2)*inputDims[3]) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] = biasVal + weights[wIndex+0]*input[iIndex+oy*strideDims[1]]+weights[wIndex+1]*input[iIndex+oy*strideDims[1]+1]+weights[wIndex+2]*input[iIndex+oy*strideDims[1]+2];
}
iIndex+=inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy*strideDims[1]]+weights[wIndex+4]*input[iIndex+oy*strideDims[1]+1]+weights[wIndex+5]*input[iIndex+oy*strideDims[1]+2];
}
iIndex+=inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy*strideDims[1]]+weights[wIndex+7]*input[iIndex+oy*strideDims[1]+1]+weights[wIndex+8]*input[iIndex+oy*strideDims[1]+2];
}
}
}
}
}
} else if (dilated_kernel_x == 1 && dilated_kernel_y == 1) {
#ifdef _OPENMP
#pragma omp parallel for collapse(2) if (inputDims[0] * inputDims[1] >= 16)
#endif
for (int batch = 0; batch < static_cast<int>(inputDims[0]); ++batch) {
for (int ch = 0; ch < static_cast<int>(inputDims[1]); ++ch) {
B biasVal = (biases != nullptr) ? biases[ch] : B(0);
std::size_t oIndex = (ch + batch*inputDims[1]) * outChannels_s;
std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = ch;
if (strideDims[0] == 1 && strideDims[1] == 1) {
for (std::size_t i = 0; i < oxSize*oySize; ++i) {
output[oIndex + i] = biasVal + weights[wIndex] * input[iIndex + i];
}
} else {
for (std::size_t ox = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex+=strideDims[0]*inputDims[3]) {
for (std::size_t oy = 0, iy = 0; oy < oySize; ++oy, iy+=strideDims[1]) {
output[oIndex + oy] = biasVal + weights[wIndex]*input[iIndex+iy];
}
}
}
}
}
} else {
#ifdef _OPENMP
#pragma omp parallel for collapse(2) if (inputDims[0] * inputDims[1] >= 16)
#endif
for (int batch = 0; batch < static_cast<int>(inputDims[0]); ++batch) {
for (int ch = 0; ch < static_cast<int>(inputDims[1]); ++ch) {
const std::size_t oIndex = (ch + batch*inputDims[1]) * outChannels_s;
const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = ch * kernelDims[0] * kernelDims[1];
B biasVal = (biases != nullptr) ? biases[ch] : B(0);
std::fill(output + oIndex, output + oIndex + outChannels_s, biasVal);
for (std::size_t ox = 0; ox < oxSize; ++ox) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const std::size_t ix = ox * strideDims[0];
const std::size_t iy = oy * strideDims[1];
for (std::size_t kx = 0; kx*dilationDims[0] < dilated_kernel_x; ++kx) {
for (std::size_t ky = 0; ky*dilationDims[1] < dilated_kernel_y; ++ky) {
output[oIndexFull] += weights[wIndex + kx*kernelDims[1] + ky] *
input[iIndex + (ix + kx*dilationDims[0])*inputDims[3] + (iy + ky*dilationDims[1])];
}
}
}
}
}
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(ConvDepthWiseImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<float, float, float, float>, nullptr});
REGISTRAR(ConvDepthWiseImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>, nullptr});
REGISTRAR(ConvDepthWiseImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<double, double, double, double>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONVIMPL_H_
#define AIDGE_CPU_OPERATOR_CONVIMPL_H_
#include <array>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Conv.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using Conv1D_Op = Conv_Op<1>;
using ConvImpl1D_cpu = OperatorImpl_cpu<Conv_Op<1>,
void(const std::array<DimSize_t, 1> &,
const std::array<DimSize_t, 1> &,
const std::array<DimSize_t, 1> &,
const std::array<DimSize_t, 3> &,
DimSize_t,
const void *,
const void *,
const void *,
void *),
void(const std::array<DimSize_t, 1> &,
const std::array<DimSize_t, 1> &,
const std::array<DimSize_t, 1> &,
const std::array<DimSize_t, 3> &,
const std::array<DimSize_t, 3> &,
const void *,
const void *,
const void *,
void *,
void *,
void *)>;
using Conv2D_Op = Conv_Op<2>;
using ConvImpl2D_cpu = OperatorImpl_cpu<Conv2D_Op,
void(const std::array<DimSize_t, 2> &,
const std::array<DimSize_t, 2> &,
const std::array<DimSize_t, 2> &,
const std::array<DimSize_t, 4> &,
DimSize_t,
const void *,
const void *,
const void *,
void *),
void(const std::array<DimSize_t, 2> &,
const std::array<DimSize_t, 2> &,
const std::array<DimSize_t, 2> &,
const std::array<DimSize_t, 4> &,
const std::array<DimSize_t, 4> &,
const void *,
const void *,
const void *,
void *,
void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(Conv1D_Op, "cpu", Aidge::ConvImpl1D_cpu::create);
REGISTRAR(Conv2D_Op, "cpu", Aidge::ConvImpl2D_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_H_ */
This diff is collapsed.
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONVTRANSPOSEIMPL_H_
#define AIDGE_CPU_OPERATOR_CONVTRANSPOSEIMPL_H_
#include <array>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/ConvTranspose.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
using std::array;
// Operator implementation entry point for the backend
using ConvTranspose1D_Op = ConvTranspose_Op<1>;
using ConvTransposeImpl1D_cpu =
OperatorImpl_cpu<ConvTranspose1D_Op,
void(const array<DimSize_t,1> &,
const array<DimSize_t,1> &,
const array<DimSize_t,1> &,
const array<DimSize_t, 3> &,
const array<DimSize_t, 3> &,
const void *,
const void *,
const void *,
void *)>;
using ConvTranspose2D_Op = ConvTranspose_Op<2>;
using ConvTransposeImpl2D_cpu =
OperatorImpl_cpu<ConvTranspose2D_Op,
void(const array<DimSize_t, 2> &,
const array<DimSize_t, 2> &,
const array<DimSize_t, 2> &,
const array<DimSize_t, 4> &,
const array<DimSize_t, 4> &,
const void *,
const void *,
const void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(ConvTranspose1D_Op, "cpu", ConvTransposeImpl1D_cpu::create);
REGISTRAR(ConvTranspose2D_Op, "cpu", ConvTransposeImpl2D_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVTRANSPOSEIMPL_H_ */
/********************************************************************************
* Copyright (c) 2025 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONVTRANSPOSEIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_CONVTRANSPOSEIMPL_KERNELS_H_
#include <array>
#include "aidge/backend/cpu/operator/ConvTransposeImpl.hpp"
#include "aidge/utils/Registrar.hpp"
#include <aidge/backend/cpu/operator/ConvImpl_kernels.hpp>
#include <aidge/data/Data.hpp>
#include <aidge/data/half.hpp>
#include <aidge/scheduler/ProdConso.hpp>
#include <aidge/utils/Types.h>
namespace Aidge {
using std::array;
////////////////////////////////////////////////////////
////////////////////////////////////////////////////////
// 1D
////////////////////////////////////////////////////////
////////////////////////////////////////////////////////
/**
* @brief performs forward bias operation for convtranspose operator
*
* @tparam B Bias data type.
* @tparam O Output data type.
* @param[in] bias bias values
* @param[in] oDims dimensions of the output
* @param[in] oStrides nb of elements contained per dimension of the output
* @param[out] output
*/
template <class B, class O>
static void convTranspose1DForwardBias(const B *biases,
const array<DimSize_t, 3> &oDims,
const array<DimSize_t, 2> &oStrides,
O *output) {
array<DimSize_t, 2> outOffsets{0, 0};
for (DimSize_t batch = 0; batch < oDims[0]; ++batch) {
outOffsets[0] = batch * oStrides[0];
for (DimSize_t outCh = 0; outCh < oDims[1]; ++outCh) {
outOffsets[1] = outCh * oStrides[1] + outOffsets[0];
// If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output + outOffsets[1],
output + (outOffsets[1] + oDims[2]),
biasVal);
}
}
}
/**
* @brief forward kernel for convtranspose
* @note ConvTranspose forward is simply convolution backward kernel.
* Check convolution functions for more in-depth details on how the
subfunctions are built.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param[in] stride stride parameter of the convTranspose operator
* @param[in] dilation dilation parameter of the convTranspose operator
* @param[in] inputDims input dimensions
* @param[in] outputDims output tensor dimensions
* @param[in] oStrides nb of elements contained per dimension of the output
* @param[in] input_ values
* @param[in] weight_ values
* @param[in] biases_ values
* @param[out] output
*/
template <class I, class W, class B, class O>
void ConvTransposeImpl1D_cpu_forward_kernel(
const array<DimSize_t, 1> &stride,
const array<DimSize_t, 1> &dilation,
const array<DimSize_t, 1> &kernelDim,
const array<DimSize_t, 3> &inputDims,
const array<DimSize_t, 3> &outputDims,
const void *input_,
const void *weights_,
const void *biases_,
void *output_) {
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
O *output = static_cast<O *>(output_);
// {batch_stride, channel_stride, dim0_stride}
const array<DimSize_t, 2> inputStrides{inputDims[1] * inputDims[2],
inputDims[2]};
// {batch_stride, channel_stride, dim0_stride}
const array<DimSize_t, 2> outputStrides{outputDims[1] * outputDims[2],
outputDims[2]};
// NOTE: kernel dims = {inChannels, outChannels, kernelDims[0]}
const array<DimSize_t, 2> kernelStrides{
outputDims[1] * kernelDim[0],
kernelDim[0],
};
if (biases_ != nullptr) {
const B *biases = static_cast<const B *>(biases_);
convTranspose1DForwardBias(biases, outputDims, outputStrides, output);
}
conv1DBackwardInput(stride,
dilation,
kernelDim,
kernelStrides,
weights,
inputDims,
inputStrides,
input,
outputDims,
outputStrides,
output);
}
REGISTRAR(ConvTransposeImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW},
{DataType::Int32, DataFormat::NCHW}},
{ProdConso::inPlaceModel,
ConvTransposeImpl1D_cpu_forward_kernel<std::int32_t,
std::int32_t,
std::int32_t,
std::int32_t>,
nullptr});
REGISTRAR(ConvTransposeImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW},
{DataType::Float32, DataFormat::NCHW}},
{ProdConso::inPlaceModel,
ConvTransposeImpl1D_cpu_forward_kernel<float, float, float, float>,
nullptr});
REGISTRAR(ConvTransposeImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW},
{DataType::Float16, DataFormat::NCHW}},
{ProdConso::inPlaceModel,
ConvTransposeImpl1D_cpu_forward_kernel<half_float::half,
half_float::half,
half_float::half,
half_float::half>,
nullptr});
REGISTRAR(
ConvTransposeImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
{ProdConso::inPlaceModel,
ConvTransposeImpl1D_cpu_forward_kernel<double, double, double, double>,
nullptr});
////////////////////////////////////////////////////////
////////////////////////////////////////////////////////
// 2D
////////////////////////////////////////////////////////
////////////////////////////////////////////////////////
/**
* @brief performs forward bias operation for convtranspose operator
*
* @tparam B Bias data type.
* @tparam O Output data type.
* @param[in] bias bias values
* @param[in] oDims dimensions of the output
* @param[in] oStrides nb of elements contained per dimension of the output
* @param[out] output
*/
template <class B, class O>
static void convTranspose2DForwardBias(const B *biases,
const array<DimSize_t, 4> &oDims,
const array<DimSize_t, 3> &oStrides,
O *output) {
array<DimSize_t, 2> outOffsets{0, 0};
for (DimSize_t batch = 0; batch < oDims[0]; ++batch) {
outOffsets[0] = batch * oStrides[0];
for (DimSize_t outCh = 0; outCh < oDims[1]; ++outCh) {
outOffsets[1] = outCh * oStrides[1] + outOffsets[0];
// If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output + outOffsets[1],
(output + outOffsets[1]) + oStrides[1],
biasVal);
}
}
}
/**
* @brief forward kernel for convtranspose
* @note ConvTranspose forward is simply convolution backward kernel.
* Check convolution functions for more in-depth details on how the
subfunctions are built.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param[in] stride stride parameter of the convTranspose operator
* @param[in] dilation dilation parameter of the convTranspose operator
* @param[in] inputDims input dimensions
* @param[in] outputDims output tensor dimensions
* @param[in] oStrides nb of elements contained per dimension of the output
* @param[in] input_ values
* @param[in] weight_ values
* @param[in] biases_ values
* @param[out] output
*/
template <class I, class W, class B, class O>
void ConvTransposeImpl2D_cpu_forward_kernel(
const array<DimSize_t, 2> &stride,
const array<DimSize_t, 2> &dilation,
const array<DimSize_t, 2> &kernelDims,
const array<DimSize_t, 4> &inputDims,
const array<DimSize_t, 4> &outputDims,
const void *input_,
const void *weights_,
const void *biases_,
void *output_) {
auto input = static_cast<const I *>(input_);
auto weights = static_cast<const W *>(weights_);
auto output = static_cast<O *>(output_);
// {channel_stride, dim0_stride, dim1_stride}
const array<DimSize_t, 3> inputStrides{
inputDims[1] * inputDims[2] * inputDims[3],
inputDims[2] * inputDims[3],
inputDims[3]};
// {channel_stride, dim0_stride, dim1_stride}
const array<DimSize_t, 3> outputStrides{
outputDims[1] * outputDims[2] * outputDims[3],
outputDims[2] * outputDims[3],
outputDims[3]};
// NOTE: kernel dims = {inChannels, outChannels, kernelDims[0],
// kernelDims[1]}
const array<DimSize_t, 3> kernelStrides{
outputDims[1] * kernelDims[0] * kernelDims[1],
kernelDims[0] * kernelDims[1],
kernelDims[1],
};
if (biases_ != nullptr) {
auto biases = static_cast<const B *>(biases_);
convTranspose2DForwardBias(biases, outputDims, outputStrides, output);
}
conv2DBackwardInput(stride,
dilation,
kernelDims,
kernelStrides,
weights,
inputDims,
inputStrides,
input,
outputDims,
outputStrides,
output);
}
REGISTRAR(ConvTransposeImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW},
{DataType::Int32, DataFormat::NCHW}},
{ProdConso::inPlaceModel,
ConvTransposeImpl2D_cpu_forward_kernel<std::int32_t,
std::int32_t,
std::int32_t,
std::int32_t>,
nullptr});
REGISTRAR(ConvTransposeImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW},
{DataType::Float16, DataFormat::NCHW}},
{ProdConso::inPlaceModel,
ConvTransposeImpl2D_cpu_forward_kernel<half_float::half,
half_float::half,
half_float::half,
half_float::half>,
nullptr});
REGISTRAR(ConvTransposeImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW},
{DataType::Float32, DataFormat::NCHW}},
{ProdConso::inPlaceModel,
ConvTransposeImpl2D_cpu_forward_kernel<float, float, float, float>,
nullptr});
REGISTRAR(
ConvTransposeImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
{ProdConso::inPlaceModel,
ConvTransposeImpl2D_cpu_forward_kernel<double, double, double, double>,
nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVTRANSPOSEIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CRYPTOHASHIMPL_H_
#define AIDGE_CPU_OPERATOR_CRYPTOHASHIMPL_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/CryptoHash.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
#ifdef WITH_OPENSSL
#include <openssl/sha.h>
namespace Aidge {
// Operator implementation entry point for the backend
using CryptoHashImpl_cpu = OperatorImpl_cpu<CryptoHash_Op,
void(const std::size_t, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(CryptoHash_Op, "cpu", Aidge::CryptoHashImpl_cpu::create);
} // namespace Aidge
#endif
#endif /* AIDGE_CPU_OPERATOR_CRYPTOHASHIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CRYPTOHASHIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_CRYPTOHASHIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/CryptoHashImpl.hpp"
#ifdef WITH_OPENSSL
namespace Aidge {
template <class I, class O>
void CryptoHashImpl_cpu_forward_kernel(std::size_t inputLength,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
// output must be at least SHA256_DIGEST_LENGTH bytes length
SHA256(reinterpret_cast<const uint8_t*>(input), inputLength * sizeof(I), reinterpret_cast<uint8_t*>(output));
}
// Kernels registration to implementation entry point
REGISTRAR(CryptoHashImpl_cpu,
{{DataType::UInt8, DataFormat::Any}, {DataType::UInt8}},
{ProdConso::inPlaceModel, Aidge::CryptoHashImpl_cpu_forward_kernel<uint8_t, uint8_t>, nullptr});
REGISTRAR(CryptoHashImpl_cpu,
{{DataType::UInt8, DataFormat::Any}, {DataType::UInt64}},
{ProdConso::inPlaceModel, Aidge::CryptoHashImpl_cpu_forward_kernel<uint8_t, uint64_t>, nullptr});
REGISTRAR(CryptoHashImpl_cpu,
{{DataType::Float32, DataFormat::Any}, {DataType::UInt8}},
{ProdConso::inPlaceModel, Aidge::CryptoHashImpl_cpu_forward_kernel<float, uint8_t>, nullptr});
REGISTRAR(CryptoHashImpl_cpu,
{{DataType::Float32, DataFormat::Any}, {DataType::UInt64}},
{ProdConso::inPlaceModel, Aidge::CryptoHashImpl_cpu_forward_kernel<float, uint64_t>, nullptr});
REGISTRAR(CryptoHashImpl_cpu,
{{DataType::Float64, DataFormat::Any}, {DataType::UInt8}},
{ProdConso::inPlaceModel, Aidge::CryptoHashImpl_cpu_forward_kernel<double, uint8_t>, nullptr});
} // namespace Aidge
#endif
#endif /* AIDGE_CPU_OPERATOR_CRYPTOHASHIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_DIVIMPL_H_
#define AIDGE_CPU_OPERATOR_DIVIMPL_H_
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Div.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using DivImpl_cpu = OperatorImpl_cpu<Div_Op,
void(const std::size_t, const std::size_t, const std::size_t, const void*, const void*,void*),
void(const std::size_t,
const std::size_t,
const std::size_t,
const std::vector<std::size_t>,
const std::vector<std::size_t>,
const std::vector<std::size_t>,
const void*,
const void*,
const void*,
void*,
void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Div_Op, "cpu", Aidge::DivImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_DIVIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_DIVIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_DIVIMPL_KERNELS_H_
#include <numeric> // std::accumulate
#include <cstddef> // std::size_t
#include <cstdint> // std::int32_t, std::int64_t
#include <functional> // std::multiplies
#include "aidge/backend/cpu/operator/MulImpl_kernels.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/data/Broadcasting.hpp"
#include "aidge/backend/cpu/operator/DivImpl.hpp"
namespace Aidge {
// template <class I1, class I2, class O>
// void DivImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims,
// const std::vector<std::size_t>& input2Dims,
// const std::vector<std::size_t>& outputDims,
// const void* input1_,
// const void* input2_,
// void* output_) {
// const I1* input_1 = static_cast<const I1*>(input1_);
// const I2* input_2 = static_cast<const I2*>(input2_);
// O* output = static_cast<O*>(output_);
// const std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
// for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex)
// {
// std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, oIndex);
// std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
// std::size_t idx2 = getFlattenedIndex(input2Dims, indexes);
// // TODO assert if input_2 is bad?
// output[oIndex] = input_1[idx1] / input_2[idx2];
// }
// }
template <class I1, class I2, class O>
constexpr void DivImpl_cpu_forward_kernel(const std::size_t input1size_,
const std::size_t input2size_,
const std::size_t output1size_,
const void* input1_,
const void* input2_,
void* output_) {
const I1* input_1 = static_cast<const I1*>(input1_);
const I2* input_2 = static_cast<const I2*>(input2_);
O* output = static_cast<O*>(output_);
// suppose values are contiguous in memory
for (std::size_t i = 0; i < output1size_; ++i) {
const std::size_t in1_id = (input1size_ != 1) ? i : 0;
const std::size_t in2_id = (input2size_ != 1) ? i : 0;
output[i] = static_cast<O>(input_1[in1_id] / input_2[in2_id]);
}
}
template <class I1, class I2, class O>
void DivImpl_cpu_backward_kernel(const std::size_t input0Length,
const std::size_t input1Length,
const std::size_t gradOutputLength,
const std::vector<std::size_t>& dims0,
const std::vector<std::size_t>& dims1,
const std::vector<std::size_t>& outputDims,
const void* input0_,
const void* input1_,
const void* grad_output_,
void* gradientInput0_,
void* gradientInput1_)
{
const I1* input0 = static_cast<const I1*>(input0_); // a
const I2* input1 = static_cast<const I2*>(input1_); // b
const O* grad_output = static_cast<const O*>(grad_output_);
auto* grad_input_0 = static_cast<I1*>(gradientInput0_); // gradient w.r.t. a
auto* grad_input_1 = static_cast<I2*>(gradientInput1_); // gradient w.r.t. b
std::fill_n(grad_input_0, input0Length, static_cast<I1>(0));
std::fill_n(grad_input_1, input1Length, static_cast<I2>(0));
// Broadcast dims0 and dims1 to match the shape of outputDims
auto broadcastedDims0 = getBroadcastedDims(outputDims, dims0);
auto broadcastedDims1 = getBroadcastedDims(outputDims, dims1);
for (std::size_t i = 0; i < gradOutputLength; ++i) {
auto idxOutputGrad = getMultiDimIndices(outputDims, i);
std::vector<std::size_t> idxInput0(broadcastedDims0.size());
std::vector<std::size_t> idxInput1(broadcastedDims1.size());
// Map output indices to input indices, considering broadcasting
for (std::size_t dimension = 0; dimension < broadcastedDims0.size(); ++dimension) {
idxInput0[dimension] = (broadcastedDims0[dimension] == 1) ? 0 : idxOutputGrad[dimension];
}
for (std::size_t dimension = 0; dimension < broadcastedDims1.size(); ++dimension) {
idxInput1[dimension] = (broadcastedDims1[dimension] == 1) ? 0 : idxOutputGrad[dimension];
}
auto idx0 = getFlattenedIndex(broadcastedDims0, idxInput0);
auto idx1 = getFlattenedIndex(broadcastedDims1, idxInput1);
// grad_a = grad_output * (1/b)
grad_input_0[idx0] += static_cast<I1>(grad_output[i] / input1[idx1]);
// grad_b = grad_output * (-a/b²)
grad_input_1[idx1] += static_cast<I2>(grad_output[i] * (-input0[idx0] / (input1[idx1] * input1[idx1])));
}
}
// Kernels registration to implementation entry point
REGISTRAR(DivImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<float, float, float>, Aidge::DivImpl_cpu_backward_kernel<float, float, float>});
REGISTRAR(DivImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<double, double, double>, Aidge::DivImpl_cpu_backward_kernel<double, double, double>});
REGISTRAR(DivImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>,
Aidge::DivImpl_cpu_backward_kernel<std::int32_t, std::int32_t, std::int32_t>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_DIVIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2025 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_DROPOUTIMPL_H_
#define AIDGE_CPU_OPERATOR_DROPOUTIMPL_H_
#include <cstddef> // std::size_t
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Dropout.hpp"
#include "aidge/utils/Registrar.hpp"
namespace Aidge {
// Operator implementation entry point for the backend
using DropoutImpl_cpu = OperatorImpl_cpu<Dropout_Op,
void(float,
std::size_t,
unsigned int,
const void*,
void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Dropout_Op, "cpu", Aidge::DropoutImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_DROPOUTIMPL_H_ */
/********************************************************************************
* Copyright (c) 2025 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_DROPOUTIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_DROPOUTIMPL_KERNELS_H_
#include <cstddef> // std::size_t
#include <memory>
#include <random>
#include "aidge/backend/cpu/operator/DropoutImpl.hpp"
#include "aidge/data/DataType.hpp"
#include "aidge/utils/Registrar.hpp"
namespace Aidge {
template <DataType DT_I, DataType DT_O = DT_I>
void DropoutImpl_cpu_forward_kernel(float probability,
std::size_t nb_elements,
unsigned int seed,
const void* input_,
void* output_)
{
using I = cpptype_t<DT_I>;
using O = cpptype_t<DT_O>;
const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_);
// const unsigned int seed = static_cast<unsigned int>(std::random_device{}());
std::mt19937 rng(seed);
std::bernoulli_distribution bernoulli_dist(1.0f - probability); //bernoulli keep_prob
const I scale = I(1.0) / static_cast<I>(1.0f - probability);
for (std::size_t i = 0; i < nb_elements; ++i)
{
output[i] = bernoulli_dist(rng) ? static_cast<O>(input[i] * scale) : static_cast<O>(0.0);
}
}
REGISTRAR(DropoutImpl_cpu,
{DataType::Float32},
{ProdConso::defaultModel, DropoutImpl_cpu_forward_kernel<DataType::Float32>, nullptr});
REGISTRAR(DropoutImpl_cpu,
{DataType::Float64},
{ProdConso::defaultModel, DropoutImpl_cpu_forward_kernel<DataType::Float64>, nullptr});
} // namespace aidge
#endif // AIDGE_CPU_OPERATOR_DROPOUTIMPL_KERNELS_H_
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_EQUALIMPL_H_
#define AIDGE_CPU_OPERATOR_EQUALIMPL_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Equal.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
namespace Aidge {
// Operator implementation entry point for the backend
using EqualImpl_cpu = OperatorImpl_cpu<Equal_Op,
void(std::vector<std::size_t>, std::vector<std::size_t>, const std::vector<std::size_t>&, const void*, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Equal_Op, "cpu", Aidge::EqualImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_EQUALIMPL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_EQUALIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_EQUALIMPL_KERNELS_H_
#include "aidge/backend/cpu/operator/EqualImpl.hpp"
#include "aidge/utils/Registrar.hpp"
namespace Aidge {
namespace {
// suppose values are contiguous in memory
template <class I, class O>
void equal_contiguous_arrays(const std::size_t input1size,
const std::size_t input2size,
const std::size_t output1size,
const I* input1,
const I* input2,
O* output)
{
for (std::size_t i = 0; i < output1size; ++i)
{
const std::size_t in1_id = (input1size != 1) ? i : 0;
const std::size_t in2_id = (input2size != 1) ? i : 0;
output[i] = static_cast<O>(input1[in1_id] == input2[in2_id]);
}
}
}
template <class I, class O>
void EqualImpl_cpu_forward_kernel(std::vector<std::size_t> dims0,
std::vector<std::size_t> dims1,
const std::vector<std::size_t>& outputDims,
const void* input0_,
const void* input1_,
void* output_) {
const I* input_0 = static_cast<const I*>(input0_);
const I* input_1 = static_cast<const I*>(input1_);
O* output = static_cast<O*>(output_);
// [5,2,1,7] & [2,6,7]
// 1. Same number of dimensions -> [5,2,1,7] & [1,2,6,7]
// 2. Find the highest equal dimension -> 3
// Exception: if the first diverging dimension is the last one, then -> 4 (dims.size())
// 3. Compute the highest number of contiguous data -> 7
// 4. Compute stride and offset step for the broadcast mechanism
// 5. Call a simple kernel
// special case for equal dimensions, the kernel is called with the entire arrays at once
if (dims0 == dims1) {
const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t i = 0; i < input0_contiguous_size; ++i)
{
output[i] = static_cast<O>(input_0[i] == input_1[i]);
}
return;
}
// set dimensions to be of equal size by filling the smallest one with ones.
if (dims0.size() > dims1.size()) {
dims1.insert(dims1.cbegin(), dims0.size() - dims1.size(), std::size_t(1));
}
else if (dims1.size() > dims0.size()) {
dims0.insert(dims0.cbegin(), dims1.size() - dims0.size(), std::size_t(1));
}
const std::size_t nbDims = dims0.size();
// Find the highest equal dimension
// std::size_t contiguousIdx = nbDims - 1;
std::size_t contiguousIdx = nbDims;
while (contiguousIdx-- > 0) {
// for (; contiguousIdx+1 > 0; --contiguousIdx) {
if (dims0[contiguousIdx] != dims1[contiguousIdx]) {
if (contiguousIdx == (nbDims -1)) { // last dimensions of one of the input Tensor are of size 1
const std::vector<std::size_t>& dims = (dims0[contiguousIdx] == 1) ? dims0 : dims1;
while ((contiguousIdx+1 > 0) && (dims[contiguousIdx] == 1)) {
--contiguousIdx;
}
}
break;
}
}
++contiguousIdx;
// Compute the highest number of contiguous data for each Tensor
const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin()+contiguousIdx, dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t input1_contiguous_size = std::accumulate(dims1.cbegin()+contiguousIdx, dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t output_contiguous_size = std::accumulate(outputDims.cbegin()+contiguousIdx, outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
// initialize strides to iterate through data because of broadcasting
std::unique_ptr<std::int32_t[]> stride_post0 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_post1 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_step0 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_step1 = std::make_unique<std::int32_t[]>(contiguousIdx);
if (contiguousIdx > 0) {
stride_post0[contiguousIdx - 1] = 1;
stride_post1[contiguousIdx - 1] = 1;
for (std::size_t i = contiguousIdx - 2; i != static_cast<std::size_t>(-1); --i) {
stride_post0[i] = stride_post0[i+1]*static_cast<std::int32_t>(dims0[i+1]);
stride_post1[i] = stride_post1[i+1]*static_cast<std::int32_t>(dims1[i+1]);
}
for (std::size_t i = 0; i != contiguousIdx; ++i) {
stride_step0[i] = (dims0[i] == 1) ? 1 - stride_post0[i] : 1;
stride_step1[i] = (dims1[i] == 1) ? 1 - stride_post1[i] : 1;
}
}
// variables for arrays offsets
std::size_t offsetIn0 = 0;
std::size_t offsetIn1 = 0;
std::size_t offsetOut = 0;
std::size_t dim = contiguousIdx - 1;
const std::size_t nbStacks = std::accumulate(outputDims.cbegin(), outputDims.cbegin() + contiguousIdx, std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t stack = 0; stack < nbStacks;) {
equal_contiguous_arrays<I,O>(input0_contiguous_size, input1_contiguous_size, output_contiguous_size,
input_0 + offsetIn0*input0_contiguous_size,
input_1 + offsetIn1*input1_contiguous_size,
output + offsetOut*output_contiguous_size);
if (++stack < nbStacks) {
std::size_t tmp_stack = stack;
while(tmp_stack % outputDims[dim] == 0) {
tmp_stack /= outputDims[dim];
dim--;
}
offsetIn0 += stride_step0[dim];
offsetIn1 += stride_step1[dim];
++offsetOut;
dim = contiguousIdx - 1;
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(EqualImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}},
{ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(EqualImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}},
{ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(EqualImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}},
{ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr});
REGISTRAR(EqualImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int64}},
{ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_EQUALIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ERFIMPL_H_
#define AIDGE_CPU_OPERATOR_ERFIMPL_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Erf.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include <memory>
#include <vector>
namespace Aidge {
// Operator implementation entry point for the backend
using ErfImpl_cpu = OperatorImpl_cpu<Erf_Op,
void(const std::size_t, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Erf_Op, "cpu", Aidge::ErfImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ERFIMPL_H_ */
This diff is collapsed.