diff --git a/include/aidge/backend/cpu/operator/ConvImpl.hpp b/include/aidge/backend/cpu/operator/ConvImpl.hpp index 2c52893315385f65910e8322202fd26d67d1f24f..740d72657b90c05ac3f21459f2716dc7935e2be4 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl.hpp @@ -35,15 +35,6 @@ using ConvImpl1D_cpu = OperatorImpl_cpu<Conv_Op<1>, const void *, const void *, const void *, - void *), - void(const std::array<DimSize_t, 1>&, - const std::array<DimSize_t, 1>&, - const std::array<DimSize_t, 1>&, - bool, - const std::array<DimSize_t, 3> &, - const void *, - const void *, - const void *, void *)>; using Conv2D_Op = Conv_Op<2>; @@ -56,207 +47,11 @@ using ConvImpl2D_cpu = OperatorImpl_cpu<Conv_Op<2>, const void *, const void *, const void *, - void *), - void(const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 2>&, - bool, - const std::array<DimSize_t, 4> &, - const void *, - const void *, - const void *, void *)>; // Implementation entry point registration to Operator REGISTRAR(Conv1D_Op, "cpu", Aidge::ConvImpl1D_cpu::create); REGISTRAR(Conv2D_Op, "cpu", Aidge::ConvImpl2D_cpu::create); - -//////////////////////////////////////////////////////////////////////////////// - -/** - * @brief Forward kernel for 1D Convolution on CPU backend. - * @tparam I Input data type. - * @tparam W Weight data type. - * @tparam B Bias data type. - * @tparam O Output data type. - * @param params tuple of Attributes from the Operator - * @param inputDims Array of input dimensions. - * @param input_ const input Tensor. - * @param weights_ const weight Tensor. - * @param biases_ const Biais Tensor. - * @param output_ Output Tensor. - */ -template <class I, class W, class B, class O> -void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, - const std::array<DimSize_t, 1>& /*dilationDims*/, - const std::array<DimSize_t, 1>& kernelDims, - const std::array<DimSize_t, 3>& inputDims, - DimSize_t outChannels, - const void *input_, - const void *weights_, - const void *biases_, - void *output_) -{ - // FIXME: missing convolution attributes as arguments - const I *input = static_cast<const I *>(input_); - const W *weights = static_cast<const W *>(weights_); - const B *biases = static_cast<const B *>(biases_); - O *output = static_cast<O *>(output_); - - // output H size - const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) / - static_cast<float>(strideDims[0]))); - - // TODO: kernel computation - // output (batch, outCh, Xout, Yout) - // input (batch, inCh, Xin, Yin) - // weight (outCh, inCh, kernelX, kernelY) - // does not take Dilation attribute into account - using signedsize = std::make_signed<std::size_t>::type; - for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { - for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { - const std::size_t oIndex = (outCh + batch*outChannels) * oxSize; - // If bias = nullptr, set B(0) - B biasVal = (biases != nullptr) ? biases[outCh] : B(0); - std::fill(output + oIndex, output+(oIndex+oxSize), biasVal); - for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) { - const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2]; - const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0]; - for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); - const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); - const std::size_t oIndexFull = oIndex + ox; - const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); - - for (std::size_t sx = sxMin; sx < sxMax; ++sx) { - output[oIndexFull] += weights[wIndex + sx] * - input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))]; - } - } - } - } - } -} - -REGISTRAR(ConvImpl1D_cpu, - {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<float, float, float, float>, nullptr}); -REGISTRAR(ConvImpl1D_cpu, - {{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr}); -REGISTRAR(ConvImpl1D_cpu, - {{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<int, int, int, int>, nullptr}); -REGISTRAR(ConvImpl1D_cpu, - {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<double, double, double, double>, nullptr}); - - -/** - * @brief Forward kernel for 2D Convolution on CPU backend. - * @tparam I Input data type. - * @tparam W Weight data type. - * @tparam B Bias data type. - * @tparam O Output data type. - * @param params tuple of Attributes from the Operator - * @param inputDims Array of input dimensions. - * @param input_ const input Tensor. - * @param weights_ const weight Tensor. - * @param biases_ const Biais Tensor. - * @param output_ Output Tensor. - */ -template <class I, class W, class B, class O> -void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, - const std::array<DimSize_t, 2>& /*dilationDims*/, - const std::array<DimSize_t, 2>& kernelDims, - const std::array<DimSize_t, 4> &inputDims, - DimSize_t outChannels, - const void *input_, - const void *weights_, - const void *biases_, - void *output_) -{ - // FIXME: missing convolution attributes as arguments - const I *input = static_cast<const I *>(input_); - const W *weights = static_cast<const W *>(weights_); - const B *biases = static_cast<const B *>(biases_); - O *output = static_cast<O *>(output_); - - // output H size - const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) / - static_cast<float>(strideDims[0]))); - // output W size - const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) / - static_cast<float>(strideDims[1]))); - - // TODO: kernel computation - // output (batch, outCh, Xout, Yout) - // input (batch, inCh, Xin, Yin) - // weight (outCh, inCh, kernelX, kernelY) - // does not take Dilation attribute into account - using signedsize = std::make_signed<std::size_t>::type; - for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { - for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { - const std::size_t oIndex = (outCh + batch*outChannels) * oxSize * oySize; - // If bias = nullptr, set B(0) - B biasVal = (biases != nullptr) ? biases[outCh] : B(0); - std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal); - for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) { - const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3]; - const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1]; - for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); - const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); - for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); - const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify); - const std::size_t oIndexFull = oIndex + ox*oySize + oy; - const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); - const signedsize iy = static_cast<signedsize>(oy * strideDims[1]); - - if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { - output[oIndexFull] += (weights[wIndex + 0*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 0*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 0*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 1*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 1*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 1*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 2*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]); - } else { - for (std::size_t sx = sxMin; sx < sxMax; ++sx) { - for (std::size_t sy = syMin; sy < syMax; ++sy) { - output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] * - input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; - } - } - } - } - } - } - } - } -} - -REGISTRAR(ConvImpl2D_cpu, - {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>, nullptr}); -REGISTRAR(ConvImpl2D_cpu, - {{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr}); -REGISTRAR(ConvImpl2D_cpu, - {{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<int, int, int, int>, nullptr}); -REGISTRAR(ConvImpl2D_cpu, - {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>, nullptr}); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_CONVIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1b37f74d2a8eaa6688792059fab5b6242296e531 --- /dev/null +++ b/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp @@ -0,0 +1,214 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ + +#include <array> +#include <memory> +#include <tuple> +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/ConvImpl.hpp" +#include "aidge/operator/Conv.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" + +namespace Aidge { +/** + * @brief Forward kernel for 1D Convolution on CPU backend. + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam B Bias data type. + * @tparam O Output data type. + * @param params tuple of Attributes from the Operator + * @param inputDims Array of input dimensions. + * @param input_ const input Tensor. + * @param weights_ const weight Tensor. + * @param biases_ const Biais Tensor. + * @param output_ Output Tensor. + */ +template <class I, class W, class B, class O> +void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, + const std::array<DimSize_t, 1>& /*dilationDims*/, + const std::array<DimSize_t, 1>& kernelDims, + const std::array<DimSize_t, 3>& inputDims, + DimSize_t outChannels, + const void *input_, + const void *weights_, + const void *biases_, + void *output_) +{ + // FIXME: missing convolution attributes as arguments + const I *input = static_cast<const I *>(input_); + const W *weights = static_cast<const W *>(weights_); + const B *biases = static_cast<const B *>(biases_); + O *output = static_cast<O *>(output_); + + // output H size + const std::size_t oxSize = + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) / + static_cast<float>(strideDims[0]))); + + // TODO: kernel computation + // output (batch, outCh, Xout, Yout) + // input (batch, inCh, Xin, Yin) + // weight (outCh, inCh, kernelX, kernelY) + // does not take Dilation attribute into account + using signedsize = std::make_signed<std::size_t>::type; + for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { + for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { + const std::size_t oIndex = (outCh + batch*outChannels) * oxSize; + // If bias = nullptr, set B(0) + B biasVal = (biases != nullptr) ? biases[outCh] : B(0); + std::fill(output + oIndex, output+(oIndex+oxSize), biasVal); + for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) { + const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2]; + const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0]; + for (std::size_t ox = 0; ox < oxSize; ++ox) { + const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); + const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); + const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + const std::size_t oIndexFull = oIndex + ox; + const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); + + for (std::size_t sx = sxMin; sx < sxMax; ++sx) { + output[oIndexFull] += weights[wIndex + sx] * + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))]; + } + } + } + } + } +} + +REGISTRAR(ConvImpl1D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<float, float, float, float>, nullptr}); +REGISTRAR(ConvImpl1D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr}); +REGISTRAR(ConvImpl1D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<int, int, int, int>, nullptr}); +REGISTRAR(ConvImpl1D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<double, double, double, double>, nullptr}); + + +/** + * @brief Forward kernel for 2D Convolution on CPU backend. + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam B Bias data type. + * @tparam O Output data type. + * @param params tuple of Attributes from the Operator + * @param inputDims Array of input dimensions. + * @param input_ const input Tensor. + * @param weights_ const weight Tensor. + * @param biases_ const Biais Tensor. + * @param output_ Output Tensor. + */ +template <class I, class W, class B, class O> +void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, + const std::array<DimSize_t, 2>& /*dilationDims*/, + const std::array<DimSize_t, 2>& kernelDims, + const std::array<DimSize_t, 4> &inputDims, + DimSize_t outChannels, + const void *input_, + const void *weights_, + const void *biases_, + void *output_) +{ + // FIXME: missing convolution attributes as arguments + const I *input = static_cast<const I *>(input_); + const W *weights = static_cast<const W *>(weights_); + const B *biases = static_cast<const B *>(biases_); + O *output = static_cast<O *>(output_); + + // output H size + const std::size_t oxSize = + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) / + static_cast<float>(strideDims[0]))); + // output W size + const std::size_t oySize = + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) / + static_cast<float>(strideDims[1]))); + + // TODO: kernel computation + // output (batch, outCh, Xout, Yout) + // input (batch, inCh, Xin, Yin) + // weight (outCh, inCh, kernelX, kernelY) + // does not take Dilation attribute into account + using signedsize = std::make_signed<std::size_t>::type; + for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { + for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { + const std::size_t oIndex = (outCh + batch*outChannels) * oxSize * oySize; + // If bias = nullptr, set B(0) + B biasVal = (biases != nullptr) ? biases[outCh] : B(0); + std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal); + for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) { + const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3]; + const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1]; + for (std::size_t ox = 0; ox < oxSize; ++ox) { + const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); + const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); + const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + for (std::size_t oy = 0; oy < oySize; ++oy) { + const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); + const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); + const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify); + const std::size_t oIndexFull = oIndex + ox*oySize + oy; + const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); + const signedsize iy = static_cast<signedsize>(oy * strideDims[1]); + + if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { + output[oIndexFull] += (weights[wIndex + 0*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 0*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 0*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 1*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 1*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 1*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 2*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]); + } else { + for (std::size_t sx = sxMin; sx < sxMax; ++sx) { + for (std::size_t sy = syMin; sy < syMax; ++sy) { + output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] * + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; + } + } + } + } + } + } + } + } +} + +REGISTRAR(ConvImpl2D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>, nullptr}); +REGISTRAR(ConvImpl2D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr}); +REGISTRAR(ConvImpl2D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<int, int, int, int>, nullptr}); +REGISTRAR(ConvImpl2D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>, nullptr}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/OperatorImpl.hpp b/include/aidge/backend/cpu/operator/OperatorImpl.hpp index 4e3100ec4172861c7341776f96c44f5c4032cb47..abf94ab9069a07e8f87819cb29c027b1adbfd9c6 100644 --- a/include/aidge/backend/cpu/operator/OperatorImpl.hpp +++ b/include/aidge/backend/cpu/operator/OperatorImpl.hpp @@ -22,7 +22,7 @@ #include "aidge/utils/Types.h" namespace Aidge { -template <class Op, class FwdFunc, class BwdFunc> +template <class Op, class FwdFunc, class BwdFunc = void()> class OperatorImpl_cpu : public OperatorImpl, public Registrable<OperatorImpl_cpu<Op, FwdFunc, BwdFunc>, ImplSpec, Impl<FwdFunc, BwdFunc>> { diff --git a/include/aidge/backend/cpu/operator/ReLUImpl.hpp b/include/aidge/backend/cpu/operator/ReLUImpl.hpp index 1b8622e287f985279e57e9838308dc2f844aec1a..5b900618abce83ff1c3822d4f61cc62c93f5081f 100644 --- a/include/aidge/backend/cpu/operator/ReLUImpl.hpp +++ b/include/aidge/backend/cpu/operator/ReLUImpl.hpp @@ -30,46 +30,6 @@ using ReLUImpl_cpu = OperatorImpl_cpu<ReLU_Op, // Implementation entry point registration to Operator REGISTRAR(ReLU_Op, "cpu", Aidge::ReLUImpl_cpu::create); - -//////////////////////////////////////////////////////////////////////////////// - -// Kernels -template <class I, class O> -void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght, - const void* input_, - void* output_) { - - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); - -//#pragma omp parallel for if (inputLenght > 1024) - for (std::size_t i = 0; i < inputLenght; ++i) { - output[i] = (input[i] > 0) ? input[i] : 0; - } -} - -template <class I, class GI, class GO> -void ReLUImpl_cpu_backward_kernel(const std::size_t inputLenght, - const void* input_, const void* grad_output_, - void* grad_input_) { - const I* input = static_cast<const I*>(input_); - const GO* grad_output = static_cast<const GO*>(grad_output_); - GI* grad_input = static_cast<GI*>(grad_input_); - for (std::size_t i = 0; i < inputLenght; ++i) { - grad_input[i] = (input[i] > 0) ? grad_output[i] : 0; - } -} - -// Kernels registration to implementation entry point -REGISTRAR(ReLUImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<float, float>, Aidge::ReLUImpl_cpu_backward_kernel<float, float, float>}); -REGISTRAR(ReLUImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<double, double>, Aidge::ReLUImpl_cpu_backward_kernel<double, double, double>}); -REGISTRAR(ReLUImpl_cpu, - {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<int, int>, Aidge::ReLUImpl_cpu_backward_kernel<int, int, int>}); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_RELUIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3131d6922fcb3930b9ab15fe7897cb3e30277447 --- /dev/null +++ b/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp @@ -0,0 +1,66 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_RELUIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_RELUIMPL_KERNELS_H_ + +#include <cstddef> // std::size_t +#include <memory> +#include <tuple> // std::tuple +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/ReLUImpl.hpp" +#include "aidge/operator/ReLU.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// Kernels +template <class I, class O> +void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + +//#pragma omp parallel for if (inputLenght > 1024) + for (std::size_t i = 0; i < inputLenght; ++i) { + output[i] = (input[i] > 0) ? input[i] : 0; + } +} + +template <class I, class GI, class GO> +void ReLUImpl_cpu_backward_kernel(const std::size_t inputLenght, + const void* input_, const void* grad_output_, + void* grad_input_) { + const I* input = static_cast<const I*>(input_); + const GO* grad_output = static_cast<const GO*>(grad_output_); + GI* grad_input = static_cast<GI*>(grad_input_); + for (std::size_t i = 0; i < inputLenght; ++i) { + grad_input[i] = (input[i] > 0) ? grad_output[i] : 0; + } +} + +// Kernels registration to implementation entry point +REGISTRAR(ReLUImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<float, float>, Aidge::ReLUImpl_cpu_backward_kernel<float, float, float>}); +REGISTRAR(ReLUImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<double, double>, Aidge::ReLUImpl_cpu_backward_kernel<double, double, double>}); +REGISTRAR(ReLUImpl_cpu, + {DataType::Int32}, + {ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<int, int>, Aidge::ReLUImpl_cpu_backward_kernel<int, int, int>}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_RELUIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl.hpp index b8a9d3bf8e3e0cbde08a6c07b224f4708ecce34d..ee1c36edecbe50cc1765da59737509a2b6333caf 100644 --- a/include/aidge/backend/cpu/operator/SigmoidImpl.hpp +++ b/include/aidge/backend/cpu/operator/SigmoidImpl.hpp @@ -12,7 +12,7 @@ #ifndef AIDGE_CPU_OPERATOR_SIGMOIDIMPL_H_ #define AIDGE_CPU_OPERATOR_SIGMOIDIMPL_H_ -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Sigmoid.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" @@ -21,34 +21,13 @@ #include <vector> namespace Aidge { -// class Sigmoid_Op; +// Operator implementation entry point for the backend +using SigmoidImpl_cpu = OperatorImpl_cpu<Sigmoid_Op, + void(const std::size_t, const void*, void*), + void(const std::size_t, const void*, const void*, void*)>; -// compute kernel registry for forward and backward -class SigmoidImplForward_cpu - : public Registrable<SigmoidImplForward_cpu, std::tuple<DataType, DataType>, std::function<void(const std::size_t, const void*, void*)>> { -}; -class SigmoidImplBackward_cpu - : public Registrable<SigmoidImplBackward_cpu, std::tuple<DataType, DataType, DataType>, std::function<void(const std::size_t, const void*, const void*, void*)>> { -}; - -class SigmoidImpl_cpu : public OperatorImpl { -public: - SigmoidImpl_cpu(const Sigmoid_Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<SigmoidImpl_cpu> create(const Sigmoid_Op& op) { - return std::make_unique<SigmoidImpl_cpu>(op); - } - - std::shared_ptr<ProdConso> getProdConso() const override { return std::make_unique<ProdConso>(mOp, true); }; - - void forward() override final; - - void backward() override final; -}; - -namespace { -static Registrar<Sigmoid_Op> registrarSigmoidImpl_cpu("cpu", Aidge::SigmoidImpl_cpu::create); -} +// Implementation entry point registration to Operator +REGISTRAR(Sigmoid_Op, "cpu", Aidge::SigmoidImpl_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_SIGMOIDIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl_backward_kernels.hpp deleted file mode 100644 index 4ceb3bd7ed9a3fb739591eee488f8035770fef18..0000000000000000000000000000000000000000 --- a/include/aidge/backend/cpu/operator/SigmoidImpl_backward_kernels.hpp +++ /dev/null @@ -1,43 +0,0 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef AIDGE_CPU_OPERATOR_SIGMOIDIMPL_BACKWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_SIGMOIDIMPL_BACKWARD_KERNEL_H_ - -#include <cstddef> // std::size_t - -#include "aidge/backend/cpu/operator/SigmoidImpl.hpp" -#include "aidge/utils/Registrar.hpp" - -namespace Aidge { -template <class O, class GI, class GO> -void SigmoidImpl_cpu_backward_kernel(const std::size_t inputLenght, - const void* output_, const void* grad_output_, - void* grad_input_) { - const O* output = static_cast<const O*>(output_); - const GO* grad_output = static_cast<const GO*>(grad_output_); - GI* grad_input = static_cast<GI*>(grad_input_); - for (std::size_t i = 0; i < inputLenght; ++i) { - grad_input[i] = output[i] * (O(1) - output[i]) * grad_output[i]; - } -} - -namespace { -static Registrar<SigmoidImplBackward_cpu> registrarSigmoidImplBackward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::SigmoidImpl_cpu_backward_kernel<float, float, float>); -static Registrar<SigmoidImplBackward_cpu> registrarSigmoidImplBackward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::SigmoidImpl_cpu_backward_kernel<double, double, double>); -} // namespace -} // namespace Aidge - -#endif /* AIDGE_CPU_OPERATOR_SIGMOIDIMPL_BACKWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp deleted file mode 100644 index 24ba11a0bca7f3fa15f9ac1e2c13e29f88eaf074..0000000000000000000000000000000000000000 --- a/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp +++ /dev/null @@ -1,46 +0,0 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef AIDGE_CPU_OPERATOR_SIGMOIDIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_SIGMOIDIMPL_FORWARD_KERNEL_H_ - -#include "aidge/utils/Registrar.hpp" - -#include "aidge/backend/cpu/operator/SigmoidImpl.hpp" - -namespace Aidge { -template <class I, class O> -void SigmoidImpl_cpu_forward_kernel(std::size_t inputLenght, - const void* input_, - void* output_) { - - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); - -//#pragma omp parallel for if (inputLenght > 1024) - for (std::size_t i = 0; i < inputLenght; ++i) { - if (input[i] > I(0)) { - output[i] = O(1) / (O(1) + std::exp(-input[i])); - } else { - output[i] = std::exp(input[i]) / (O(1) + std::exp(input[i])); - } - } -} - -namespace { -static Registrar<SigmoidImplForward_cpu> registrarSigmoidImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::SigmoidImpl_cpu_forward_kernel<float, float>); -static Registrar<SigmoidImplForward_cpu> registrarSigmoidImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::SigmoidImpl_cpu_forward_kernel<double, double>); -} // namespace -} // namespace Aidge - -#endif /* AIDGE_CPU_OPERATOR_SIGMOIDIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..dfd71ce0a878efbeb779f3a67ad4ccc762bb8363 --- /dev/null +++ b/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp @@ -0,0 +1,59 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_SIGMOIDIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_SIGMOIDIMPL_KERNELS_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/SigmoidImpl.hpp" + +namespace Aidge { +template <class I, class O> +void SigmoidImpl_cpu_forward_kernel(std::size_t inputLenght, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + +//#pragma omp parallel for if (inputLenght > 1024) + for (std::size_t i = 0; i < inputLenght; ++i) { + if (input[i] > I(0)) { + output[i] = O(1) / (O(1) + std::exp(-input[i])); + } else { + output[i] = std::exp(input[i]) / (O(1) + std::exp(input[i])); + } + } +} + +template <class O, class GI, class GO> +void SigmoidImpl_cpu_backward_kernel(const std::size_t inputLenght, + const void* output_, const void* grad_output_, + void* grad_input_) { + const O* output = static_cast<const O*>(output_); + const GO* grad_output = static_cast<const GO*>(grad_output_); + GI* grad_input = static_cast<GI*>(grad_input_); + for (std::size_t i = 0; i < inputLenght; ++i) { + grad_input[i] = output[i] * (O(1) - output[i]) * grad_output[i]; + } +} + +// Kernels registration to implementation entry point +REGISTRAR(SigmoidImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::SigmoidImpl_cpu_forward_kernel<float, float>, Aidge::SigmoidImpl_cpu_backward_kernel<float, float, float>}); +REGISTRAR(SigmoidImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::SigmoidImpl_cpu_forward_kernel<double, double>, Aidge::SigmoidImpl_cpu_backward_kernel<double, double, double>}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_SIGMOIDIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/SliceImpl.hpp b/include/aidge/backend/cpu/operator/SliceImpl.hpp index 0f67f8c5541f739c127c50435c97862f26cb03a3..fd98b38d7117eaa14e35fe3cb89abf95b2913997 100644 --- a/include/aidge/backend/cpu/operator/SliceImpl.hpp +++ b/include/aidge/backend/cpu/operator/SliceImpl.hpp @@ -16,52 +16,25 @@ #include <vector> #include <array> -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Slice.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { -// class Slice_Op; - -// compute kernel registry for forward and backward -class SliceImplForward_cpu - : public Registrable<SliceImplForward_cpu, - std::tuple<DataType, DataType>, - std::function<void(const std::vector<std::int64_t>&, +// Operator implementation entry point for the backend +using SliceImpl_cpu = OperatorImpl_cpu<Slice_Op, + void(const std::vector<std::int64_t>&, const std::vector<std::int64_t>&, const std::vector<std::int8_t>&, const std::vector<std::int64_t>&, const std::vector<DimSize_t>&, const void*, - void*)>> {}; -class SliceImplBackward_cpu - : public Registrable<SliceImplBackward_cpu, - std::tuple<DataType, DataType>, - std::function<void(const std::vector<std::int64_t>&, - const std::vector<std::int64_t>&, - const std::vector<std::int8_t>&, - const std::vector<std::int64_t>&, - const std::vector<DimSize_t>&, - const void*, - void*)>> {}; - -class SliceImpl_cpu : public OperatorImpl { -public: - SliceImpl_cpu(const Slice_Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<SliceImpl_cpu> create(const Slice_Op& op) { - return std::make_unique<SliceImpl_cpu>(op); - } - - std::shared_ptr<ProdConso> getProdConso() const override { return std::make_unique<ProdConso>(mOp, true); }; - void forward() override; -}; + void*)>; -namespace { -static Registrar<Slice_Op> registrarSliceImpl_cpu("cpu", Aidge::SliceImpl_cpu::create); -} +// Implementation entry point registration to Operator +REGISTRAR(Slice_Op, "cpu", Aidge::SliceImpl_cpu::create); } // namespace Aidge #endif /* __AIDGE_CPU_OPERATOR_SLICEIMPL_H__ */ diff --git a/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SliceImpl_kernels.hpp similarity index 84% rename from include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/SliceImpl_kernels.hpp index 31e409369cc640bbda9f54c54652af7f72b509b6..4c37316d663212e303cdccb2c90bf55842f93575 100644 --- a/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SliceImpl_kernels.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_SLICEIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_SLICEIMPL_KERNELS_H_ #include <algorithm> #include <cmath> @@ -88,14 +88,15 @@ void SliceImpl_cpu_forward_kernel(const std::vector<std::int64_t>& starts, } } -namespace { -static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::SliceImpl_cpu_forward_kernel<float, float>); -static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, Aidge::SliceImpl_cpu_forward_kernel<int, int>); -static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::SliceImpl_cpu_forward_kernel<double, double>); -} // namespace +REGISTRAR(SliceImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<float, float>, nullptr}); +REGISTRAR(SliceImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<double, double>, nullptr}); +REGISTRAR(SliceImpl_cpu, + {DataType::Int32}, + {ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<int, int>, nullptr}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp index a09261d0ec79869465c2bd6291f057dfa8387c90..ec2c2696ed6e2ba8cad1536519298d9331921c07 100644 --- a/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp +++ b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp @@ -12,7 +12,7 @@ #ifndef AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_ #define AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_ -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Softmax.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" @@ -21,31 +21,12 @@ #include <vector> namespace Aidge { -// class Softmax_Op; +// Operator implementation entry point for the backend +using SoftmaxImpl_cpu = OperatorImpl_cpu<Softmax_Op, + void(std::size_t, const std::vector<DimSize_t>&, const void*, void*)>; -// compute kernel registry for forward and backward -class SoftmaxImplForward_cpu - : public Registrable<SoftmaxImplForward_cpu, std::tuple<DataType, DataType>, std::function<void(std::size_t, const std::vector<DimSize_t>&, const void*, void*)>> { -}; -class SoftmaxImplBackward_cpu - : public Registrable<SoftmaxImplBackward_cpu, std::tuple<DataType, DataType>, std::function<void(std::size_t, const std::vector<DimSize_t>&, const void*, void*)>> { -}; - -class SoftmaxImpl_cpu : public OperatorImpl { -public: - SoftmaxImpl_cpu(const Softmax_Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<SoftmaxImpl_cpu> create(const Softmax_Op& op) { - return std::make_unique<SoftmaxImpl_cpu>(op); - } - - std::shared_ptr<ProdConso> getProdConso() const override { return std::make_unique<ProdConso>(mOp, true); }; - void forward() override; -}; - -namespace { -static Registrar<Softmax_Op> registrarSoftmaxImpl_cpu("cpu", Aidge::SoftmaxImpl_cpu::create); -} +// Implementation entry point registration to Operator +REGISTRAR(Softmax_Op, "cpu", Aidge::SoftmaxImpl_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp similarity index 75% rename from include/aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp index 6ff8b3ddf39412aa6febdc188b7c27e8bfdcc178..6b52a307832674edd095d2c98b559d0adcb4062a 100644 --- a/include/aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_SOFTMAXIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_SOFTMAXIMPL_KERNELS_H_ #include "aidge/utils/Registrar.hpp" #include <cstddef> @@ -61,14 +61,15 @@ void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSi } } -namespace { -static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>); -static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, Aidge::SoftmaxImpl_cpu_forward_kernel<int, int>); -static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>); -} // namespace +REGISTRAR(SoftmaxImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>, nullptr}); +REGISTRAR(SoftmaxImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>, nullptr}); +REGISTRAR(SoftmaxImpl_cpu, + {DataType::Int32}, + {ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<int, int>, nullptr}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/SqrtImpl.hpp b/include/aidge/backend/cpu/operator/SqrtImpl.hpp index 5764fea4519b55389597db6ac0797239352b7dea..dba75d1c58fb19ab2284ee0e98a32bff7ac58557 100644 --- a/include/aidge/backend/cpu/operator/SqrtImpl.hpp +++ b/include/aidge/backend/cpu/operator/SqrtImpl.hpp @@ -17,39 +17,19 @@ #include <tuple> #include <vector> -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Sqrt.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" namespace Aidge { +// Operator implementation entry point for the backend +using SqrtImpl_cpu = OperatorImpl_cpu<Sqrt_Op, + void(const std::size_t, const void*, void*), + void(const std::size_t, const void*, void*)>; -// compute kernel registry for forward and backward -class SqrtImplForward_cpu - : public Registrable<SqrtImplForward_cpu, std::tuple<DataType, DataType>, std::function<void(const std::size_t, const void*, void*)>> { -}; -class SqrtImplBackward_cpu - : public Registrable<SqrtImplBackward_cpu, std::tuple<DataType, DataType>, std::function<void(const std::size_t, const void*, void*)>> { -}; - -class SqrtImpl_cpu : public OperatorImpl { -public: - SqrtImpl_cpu(const Sqrt_Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<SqrtImpl_cpu> create(const Sqrt_Op& op) { - return std::make_unique<SqrtImpl_cpu>(op); - } - - std::shared_ptr<ProdConso> getProdConso() const override { return std::make_unique<ProdConso>(mOp, true); }; - - void forward() override final; - - void backward() override final; -}; - -namespace { -static Registrar<Sqrt_Op> registrarSqrtImpl_cpu("cpu", Aidge::SqrtImpl_cpu::create); -} +// Implementation entry point registration to Operator +REGISTRAR(Sqrt_Op, "cpu", Aidge::SqrtImpl_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_SQRTIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SqrtImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/SqrtImpl_backward_kernels.hpp deleted file mode 100644 index 9cf5118a5ac81520d7a180b6aba22417ca512890..0000000000000000000000000000000000000000 --- a/include/aidge/backend/cpu/operator/SqrtImpl_backward_kernels.hpp +++ /dev/null @@ -1,46 +0,0 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef AIDGE_CPU_OPERATOR_SQRTIMPL_BACKWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_SQRTIMPL_BACKWARD_KERNEL_H_ - -#include <cmath> // std::sqrt -#include <cstddef> // std::size_t - -#include "aidge/utils/Registrar.hpp" - -#include "aidge/backend/cpu/operator/SqrtImpl.hpp" - -namespace Aidge { -template <class I, class O> -void SqrtImpl_cpu_backward_kernel(const std::size_t inputLenght, - const void* input_, - void* output_) { - - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); - - for (std::size_t i = 0; i < inputLenght; ++i) { - output[i] = static_cast<O>(0.5/(std::sqrt(static_cast<float>(input[i])))); - } -} - -namespace { -static Registrar<SqrtImplBackward_cpu> registrarSqrtImplBackward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::SqrtImpl_cpu_backward_kernel<float, float>); -static Registrar<SqrtImplBackward_cpu> registrarSqrtImplBackward_cpu_Int32( - {DataType::Int32, DataType::Int32}, Aidge::SqrtImpl_cpu_backward_kernel<int, int>); -static Registrar<SqrtImplBackward_cpu> registrarSqrtImplBackward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::SqrtImpl_cpu_backward_kernel<double, double>); -} // namespace -} // namespace Aidge - -#endif /* AIDGE_CPU_OPERATOR_SQRTIMPL_BACKWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SqrtImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SqrtImpl_forward_kernels.hpp deleted file mode 100644 index 886b978c2345ce555d229d684ba83f952be9e00e..0000000000000000000000000000000000000000 --- a/include/aidge/backend/cpu/operator/SqrtImpl_forward_kernels.hpp +++ /dev/null @@ -1,46 +0,0 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef AIDGE_CPU_OPERATOR_SQRTIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_SQRTIMPL_FORWARD_KERNEL_H_ - -#include <cmath> // std::sqrt -#include <cstddef> // std::size_t - -#include "aidge/utils/Registrar.hpp" - -#include "aidge/backend/cpu/operator/SqrtImpl.hpp" - -namespace Aidge { -template <class I, class O> -void SqrtImpl_cpu_forward_kernel(const std::size_t inputLenght, - const void* input_, - void* output_) { - - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); - - for (std::size_t i = 0; i < inputLenght; ++i) { - output[i] = static_cast<O>(std::sqrt(static_cast<float>(input[i]))); - } -} - -namespace { -static Registrar<SqrtImplForward_cpu> registrarSqrtImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::SqrtImpl_cpu_forward_kernel<float, float>); -static Registrar<SqrtImplForward_cpu> registrarSqrtImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, Aidge::SqrtImpl_cpu_forward_kernel<int, int>); -static Registrar<SqrtImplForward_cpu> registrarSqrtImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::SqrtImpl_cpu_forward_kernel<double, double>); -} // namespace -} // namespace Aidge - -#endif /* AIDGE_CPU_OPERATOR_SQRTIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0826251ea45583c8598ea0c320c5eda9672e5a25 --- /dev/null +++ b/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp @@ -0,0 +1,60 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_SQRTIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_SQRTIMPL_KERNELS_H_ + +#include <cmath> // std::sqrt +#include <cstddef> // std::size_t + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/SqrtImpl.hpp" + +namespace Aidge { +template <class I, class O> +void SqrtImpl_cpu_forward_kernel(const std::size_t inputLenght, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + for (std::size_t i = 0; i < inputLenght; ++i) { + output[i] = static_cast<O>(std::sqrt(static_cast<float>(input[i]))); + } +} + +template <class I, class O> +void SqrtImpl_cpu_backward_kernel(const std::size_t inputLenght, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + for (std::size_t i = 0; i < inputLenght; ++i) { + output[i] = static_cast<O>(0.5/(std::sqrt(static_cast<float>(input[i])))); + } +} + +REGISTRAR(SqrtImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::SqrtImpl_cpu_forward_kernel<float, float>, Aidge::SqrtImpl_cpu_backward_kernel<float, float>}); +REGISTRAR(SqrtImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::SqrtImpl_cpu_forward_kernel<double, double>, Aidge::SqrtImpl_cpu_backward_kernel<double, double>}); +REGISTRAR(SqrtImpl_cpu, + {DataType::Int32}, + {ProdConso::inPlaceModel, Aidge::SqrtImpl_cpu_forward_kernel<int, int>, Aidge::SqrtImpl_cpu_backward_kernel<int, int>}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_SQRTIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/SubImpl.hpp b/include/aidge/backend/cpu/operator/SubImpl.hpp index 6f9b9a6d5d3d18499e6a74a9139cee7253b5d95a..2bb22bda74edf7db09404fd5613b6714ddcdf513 100644 --- a/include/aidge/backend/cpu/operator/SubImpl.hpp +++ b/include/aidge/backend/cpu/operator/SubImpl.hpp @@ -12,7 +12,7 @@ #ifndef AIDGE_CPU_OPERATOR_SUBIMPL_H_ #define AIDGE_CPU_OPERATOR_SUBIMPL_H_ -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Sub.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" @@ -21,31 +21,12 @@ #include <vector> namespace Aidge { -// class Sub_Op; +// Operator implementation entry point for the backend +using SubImpl_cpu = OperatorImpl_cpu<Sub_Op, + void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*)>; -// compute kernel registry for forward and backward -class SubImplForward_cpu - : public Registrable<SubImplForward_cpu, std::tuple<DataType, DataType, DataType>, std::function<void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*)>> { -}; -class SubImplBackward_cpu - : public Registrable<SubImplBackward_cpu, std::tuple<DataType, DataType, DataType>, std::function<void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, void*)>> { -}; - -class SubImpl_cpu : public OperatorImpl { -public: - SubImpl_cpu(const Sub_Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<SubImpl_cpu> create(const Sub_Op& op) { - return std::make_unique<SubImpl_cpu>(op); - } - - std::shared_ptr<ProdConso> getProdConso() const override { return std::make_unique<ProdConso>(mOp, true); }; - void forward() override; -}; - -namespace { -static Registrar<Sub_Op> registrarSubImpl_cpu("cpu", Aidge::SubImpl_cpu::create); -} +// Implementation entry point registration to Operator +REGISTRAR(Sub_Op, "cpu", Aidge::SubImpl_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_SUBIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SubImpl_kernels.hpp similarity index 62% rename from include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/SubImpl_kernels.hpp index 10e6f58bb44b63f2d8712dc0aa64e0660f3356b2..0486ed2105b23e95f9cdfcda578e14900fcb2c8e 100644 --- a/include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SubImpl_kernels.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_SUBIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_SUBIMPL_FORWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_SUBIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_SUBIMPL_KERNELS_H_ #include "aidge/utils/Registrar.hpp" @@ -49,20 +49,19 @@ void SubImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims, } } -namespace { -static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::SubImpl_cpu_forward_kernel<float, float, float>); -static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::SubImpl_cpu_forward_kernel<double, double, double>); -static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32}, - Aidge::SubImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>); -static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Int64( - {DataType::Int64, DataType::Int64, DataType::Int64}, - Aidge::SubImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>); -} // namespace +// Kernels registration to implementation entry point +REGISTRAR(SubImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<float, float, float>, nullptr}); +REGISTRAR(SubImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<double, double, double>, nullptr}); +REGISTRAR(SubImpl_cpu, + {DataType::Int32}, + {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, nullptr}); +REGISTRAR(SubImpl_cpu, + {DataType::Int64}, + {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>, nullptr}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_SUBIMPL_FORWARD_KERNEL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_SUBIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/TanhImpl.hpp b/include/aidge/backend/cpu/operator/TanhImpl.hpp index 09864d3e50182df319762a2356c946c977b6253b..b1c2217bd29805eca2cf7b7906316756b75a74e0 100644 --- a/include/aidge/backend/cpu/operator/TanhImpl.hpp +++ b/include/aidge/backend/cpu/operator/TanhImpl.hpp @@ -12,7 +12,7 @@ #ifndef AIDGE_CPU_OPERATOR_TANHIMPL_H_ #define AIDGE_CPU_OPERATOR_TANHIMPL_H_ -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Tanh.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" @@ -21,34 +21,13 @@ #include <vector> namespace Aidge { -// class Tanh_Op; +// Operator implementation entry point for the backend +using TanhImpl_cpu = OperatorImpl_cpu<Tanh_Op, + void(const std::size_t, const void*, void*), + void(const std::size_t, const void*, const void*, void*)>; -// compute kernel registry for forward and backward -class TanhImplForward_cpu - : public Registrable<TanhImplForward_cpu, std::tuple<DataType, DataType>, std::function<void(const std::size_t, const void*, void*)>> { -}; -class TanhImplBackward_cpu - : public Registrable<TanhImplBackward_cpu, std::tuple<DataType, DataType, DataType>, std::function<void(const std::size_t, const void*, const void*, void*)>> { -}; - -class TanhImpl_cpu : public OperatorImpl { -public: - TanhImpl_cpu(const Tanh_Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<TanhImpl_cpu> create(const Tanh_Op& op) { - return std::make_unique<TanhImpl_cpu>(op); - } - - std::shared_ptr<ProdConso> getProdConso() const override { return std::make_unique<ProdConso>(mOp, true); }; - - void forward() override final; - - void backward() override final; -}; - -namespace { -static Registrar<Tanh_Op> registrarTanhImpl_cpu("cpu", Aidge::TanhImpl_cpu::create); -} +// Implementation entry point registration to Operator +REGISTRAR(Tanh_Op, "cpu", Aidge::TanhImpl_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_TANHIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp deleted file mode 100644 index 9e57b6dfcb0da322f5b21944fb10ec7a10cd0ab8..0000000000000000000000000000000000000000 --- a/include/aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp +++ /dev/null @@ -1,42 +0,0 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef AIDGE_CPU_OPERATOR_TANHIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_TANHIMPL_FORWARD_KERNEL_H_ - -#include "aidge/utils/Registrar.hpp" - -#include "aidge/backend/cpu/operator/TanhImpl.hpp" - -namespace Aidge { -template <class I, class O> -void TanhImpl_cpu_forward_kernel(std::size_t inputLenght, - const void* input_, - void* output_) { - - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); - -//#pragma omp parallel for if (inputLenght > 1024) - for (std::size_t i = 0; i < inputLenght; ++i) { - output[i] = std::tanh(input[i]); - } -} - -namespace { -static Registrar<TanhImplForward_cpu> registrarTanhImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::TanhImpl_cpu_forward_kernel<float, float>); -static Registrar<TanhImplForward_cpu> registrarTanhImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::TanhImpl_cpu_forward_kernel<double, double>); -} // namespace -} // namespace Aidge - -#endif /* AIDGE_CPU_OPERATOR_TANHIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/TanhImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp similarity index 51% rename from include/aidge/backend/cpu/operator/TanhImpl_backward_kernels.hpp rename to include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp index 3a13c2cad21c35822fc6248590550e4716ee046d..fdcac210484b11f2220dcc2a6813efed503d1913 100644 --- a/include/aidge/backend/cpu/operator/TanhImpl_backward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp @@ -9,15 +9,28 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_TANHIMPL_BACKWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_TANHIMPL_BACKWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_TANHIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_TANHIMPL_KERNELS_H_ -#include <cstddef> // std::size_t +#include "aidge/utils/Registrar.hpp" #include "aidge/backend/cpu/operator/TanhImpl.hpp" -#include "aidge/utils/Registrar.hpp" namespace Aidge { +template <class I, class O> +void TanhImpl_cpu_forward_kernel(std::size_t inputLenght, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + +//#pragma omp parallel for if (inputLenght > 1024) + for (std::size_t i = 0; i < inputLenght; ++i) { + output[i] = std::tanh(input[i]); + } +} + template <class O, class GI, class GO> void TanhImpl_cpu_backward_kernel(const std::size_t inputLenght, const void* output_, const void* grad_output_, @@ -30,14 +43,13 @@ void TanhImpl_cpu_backward_kernel(const std::size_t inputLenght, } } -namespace { -static Registrar<TanhImplBackward_cpu> registrarTanhImplBackward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::TanhImpl_cpu_backward_kernel<float, float, float>); -static Registrar<TanhImplBackward_cpu> registrarTanhImplBackward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::TanhImpl_cpu_backward_kernel<double, double, double>); -} // namespace +// Kernels registration to implementation entry point +REGISTRAR(TanhImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::TanhImpl_cpu_forward_kernel<float, float>, Aidge::TanhImpl_cpu_backward_kernel<float, float, float>}); +REGISTRAR(TanhImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::TanhImpl_cpu_forward_kernel<double, double>, Aidge::TanhImpl_cpu_backward_kernel<double, double, double>}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_TANHIMPL_BACKWARD_KERNEL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_TANHIMPL_KERNELS_H_ */ diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp index 8df5934c4daa08a04aec0abf705c41252095d751..0f2a77a2a0a60be0f4e4f99bfd46be22e6ef7686 100644 --- a/src/operator/ConvImpl.cpp +++ b/src/operator/ConvImpl.cpp @@ -10,6 +10,7 @@ ********************************************************************************/ #include "aidge/backend/cpu/operator/ConvImpl.hpp" +#include "aidge/backend/cpu/operator/ConvImpl_kernels.hpp" #include <cassert> #include <chrono> // std::chrono::milliseconds diff --git a/src/operator/ReLUImpl.cpp b/src/operator/ReLUImpl.cpp index 5f4170472cc99ee179faaf41853a5088c743eb2a..832f91aad347fc081439ec487d06b14b0e2fe8da 100644 --- a/src/operator/ReLUImpl.cpp +++ b/src/operator/ReLUImpl.cpp @@ -19,6 +19,7 @@ #include "aidge/utils/ErrorHandling.hpp" #include "aidge/backend/cpu/operator/ReLUImpl.hpp" +#include "aidge/backend/cpu/operator/ReLUImpl_kernels.hpp" template <> void Aidge::ReLUImpl_cpu::forward() { diff --git a/src/operator/SigmoidImpl.cpp b/src/operator/SigmoidImpl.cpp index 7e00f6f1944bb73c40324a9d5cb45a0f24a4626a..cdcbac85df3a38fea9b7100324e0618949262fc9 100644 --- a/src/operator/SigmoidImpl.cpp +++ b/src/operator/SigmoidImpl.cpp @@ -20,9 +20,9 @@ #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/SigmoidImpl.hpp" -#include "aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp" -#include "aidge/backend/cpu/operator/SigmoidImpl_backward_kernels.hpp" +#include "aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp" +template <> void Aidge::SigmoidImpl_cpu::forward() { const Sigmoid_Op& op_ = dynamic_cast<const Sigmoid_Op&>(mOp); std::shared_ptr<Tensor> in0 = op_.getInput(0); @@ -30,16 +30,15 @@ void Aidge::SigmoidImpl_cpu::forward() { AIDGE_ASSERT(in0, "missing input #0"); // Find the correct kernel type - auto kernelFunc = Registrar<SigmoidImplForward_cpu>::create({ - in0->dataType(), - out0->dataType()}); + const auto impl = Registrar<SigmoidImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(in0->size(), + impl.forward(in0->size(), getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } +template <> void Aidge::SigmoidImpl_cpu::backward() { const Sigmoid_Op& op_ = dynamic_cast<const Sigmoid_Op&>(mOp); std::shared_ptr<Tensor> out0 = op_.getOutput(0); @@ -48,12 +47,8 @@ void Aidge::SigmoidImpl_cpu::backward() { AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type()); // Find the correct kernel type - auto kernelFunc = Registrar<SigmoidImplBackward_cpu>::create({ - out0->dataType(), - gra_int0->dataType(), - gra_out0->dataType() - }); + const auto impl = Registrar<SigmoidImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0)); + impl.backward(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0)); } diff --git a/src/operator/SliceImpl.cpp b/src/operator/SliceImpl.cpp index a7664262a63e0e3b0e3ffdc775b7c11702b00e6a..945c1bc752feb8e6a194b1aff99b26f01a6a0e69 100644 --- a/src/operator/SliceImpl.cpp +++ b/src/operator/SliceImpl.cpp @@ -14,22 +14,21 @@ #include <vector> #include "aidge/backend/cpu/data/GetCPUPtr.h" -#include "aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp" +#include "aidge/backend/cpu/operator/SliceImpl_kernels.hpp" #include "aidge/operator/Slice.hpp" #include "aidge/utils/Log.hpp" #include "aidge/utils/Types.h" +template <> void Aidge::SliceImpl_cpu::forward() { const auto& op_ = dynamic_cast<const Slice_Op&>(mOp); AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Slice Operator."); // Find the correct kernel type - auto kernelFunc = Registrar<SliceImplForward_cpu>::create({ - op_.getInput(0)->dataType(), - op_.getOutput(0)->dataType()}); + const auto impl = Registrar<SliceImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(op_.starts(), + impl.forward(op_.starts(), op_.ends(), op_.axes(), op_.steps(), @@ -37,3 +36,8 @@ void Aidge::SliceImpl_cpu::forward() { getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } + +template <> +void Aidge::SliceImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Slice_Op on backend cpu"); +} diff --git a/src/operator/SoftmaxImpl.cpp b/src/operator/SoftmaxImpl.cpp index 07eaec6cbb9ab0b10705d51b0749ac9ae5b83daa..8b6933f22f3673476f4a9f1e261fbcdc09857300 100644 --- a/src/operator/SoftmaxImpl.cpp +++ b/src/operator/SoftmaxImpl.cpp @@ -20,22 +20,25 @@ #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" -#include "aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp" +#include "aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp" +template <> void Aidge::SoftmaxImpl_cpu::forward() { const auto& op_ = dynamic_cast<const Softmax_Op&>(mOp); AIDGE_ASSERT(!op_.getInput(0)->empty(), "Softmax input empty"); + std::int32_t axis = (op_.axis() >= 0) ? op_.axis() : op_.getInput(0)->nbDims() + op_.axis(); // Find the correct kernel type - auto kernelFunc = Registrar<SoftmaxImplForward_cpu>::create({ - op_.getInput(0)->dataType(), - op_.getOutput(0)->dataType()}); - - std::int32_t axis = (op_.axis() >= 0) ? op_.axis() : op_.getInput(0)->nbDims() + op_.axis(); + const auto impl = Registrar<SoftmaxImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(static_cast<std::size_t>(axis), // axisIdx + impl.forward(static_cast<std::size_t>(axis), // axisIdx std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(), std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); } + +template <> +void Aidge::SoftmaxImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Softmax_Op on backend cpu"); +} diff --git a/src/operator/SqrtImpl.cpp b/src/operator/SqrtImpl.cpp index 128135b2b5e415e3aaebcfd9975ec70950577ce9..25bdb42fd5140ef4f64d704fc3a5ccf237f17f81 100644 --- a/src/operator/SqrtImpl.cpp +++ b/src/operator/SqrtImpl.cpp @@ -19,25 +19,24 @@ #include "aidge/utils/Types.h" #include "aidge/backend/cpu/operator/SqrtImpl.hpp" -#include "aidge/backend/cpu/operator/SqrtImpl_forward_kernels.hpp" -#include "aidge/backend/cpu/operator/SqrtImpl_backward_kernels.hpp" +#include "aidge/backend/cpu/operator/SqrtImpl_kernels.hpp" +template <> void Aidge::SqrtImpl_cpu::forward() { std::shared_ptr<Tensor> in0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0)); std::shared_ptr<Tensor> out0 = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)); AIDGE_ASSERT(in0, "missing input #0"); // Find the correct kernel type - auto kernelFunc = Registrar<SqrtImplForward_cpu>::create({ - in0->dataType(), - out0->dataType()}); + const auto impl = Registrar<SqrtImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(in0->size(), + impl.forward(in0->size(), getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } +template <> void Aidge::SqrtImpl_cpu::backward() { // reversing in and out Data for backprop const Sqrt_Op& op_ = dynamic_cast<const Sqrt_Op&>(mOp); @@ -46,12 +45,10 @@ void Aidge::SqrtImpl_cpu::backward() { AIDGE_ASSERT(out0grad, "missing output #0"); // Find the correct kernel type - auto kernelFunc = Registrar<SqrtImplForward_cpu>::create({ - out0grad->dataType(), - in0grad->dataType()}); + const auto impl = Registrar<SqrtImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(out0grad->size(), + impl.backward(out0grad->size(), getCPUPtr(out0grad), getCPUPtr(in0grad)); } \ No newline at end of file diff --git a/src/operator/SubImpl.cpp b/src/operator/SubImpl.cpp index 24f2c982a4f305d4a27b579bbe6b61a41a96de41..d43771b967889183801cb93418c967ce9d9c8453 100644 --- a/src/operator/SubImpl.cpp +++ b/src/operator/SubImpl.cpp @@ -21,26 +21,28 @@ #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/SubImpl.hpp" -#include "aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp" +#include "aidge/backend/cpu/operator/SubImpl_kernels.hpp" +template <> void Aidge::SubImpl_cpu::forward() { - - // Find the correct kernel type - auto kernelFunc = Registrar<SubImplForward_cpu>::create({ - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()); const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims()); + // Find the correct kernel type + const auto impl = Registrar<SubImpl_cpu>::create(getBestMatch(getRequiredSpec())); + // Call kernel - kernelFunc(inputDims0, + impl.forward(inputDims0, inputDims1, std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawInput(1)), getCPUPtr(mOp.getRawOutput(0))); } + +template <> +void Aidge::SubImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Sub_Op on backend cpu"); +} diff --git a/src/operator/TanhImpl.cpp b/src/operator/TanhImpl.cpp index 9fe054f103a2e1c5500dd86bb70735455f316cb2..ed8dce08b9f710c9e5830b2c72ffef71013edb6e 100644 --- a/src/operator/TanhImpl.cpp +++ b/src/operator/TanhImpl.cpp @@ -20,9 +20,9 @@ #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/TanhImpl.hpp" -#include "aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp" -#include "aidge/backend/cpu/operator/TanhImpl_backward_kernels.hpp" +#include "aidge/backend/cpu/operator/TanhImpl_kernels.hpp" +template <> void Aidge::TanhImpl_cpu::forward() { const Tanh_Op& op_ = dynamic_cast<const Tanh_Op&>(mOp); std::shared_ptr<Tensor> in0 = op_.getInput(0); @@ -30,16 +30,15 @@ void Aidge::TanhImpl_cpu::forward() { AIDGE_ASSERT(in0, "missing input #0"); // Find the correct kernel type - auto kernelFunc = Registrar<TanhImplForward_cpu>::create({ - in0->dataType(), - out0->dataType()}); + const auto impl = Registrar<TanhImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(in0->size(), + impl.forward(in0->size(), getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } +template <> void Aidge::TanhImpl_cpu::backward() { const Tanh_Op& op_ = dynamic_cast<const Tanh_Op&>(mOp); std::shared_ptr<Tensor> out0 = op_.getOutput(0); @@ -48,13 +47,9 @@ void Aidge::TanhImpl_cpu::backward() { AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type()); // Find the correct kernel type - auto kernelFunc = Registrar<TanhImplBackward_cpu>::create({ - out0->dataType(), - gra_int0->dataType(), - gra_out0->dataType() - }); + const auto impl = Registrar<TanhImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0)); + impl.backward(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0)); }