diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp index 801bd315f9e5058ffade574fc92179b1e3c513e4..720e331cafec570af73c355dda73ed85d435fa6c 100644 --- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp @@ -12,15 +12,15 @@ #ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_ #define AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_ -#include "aidge/utils/Registrar.hpp" +#include <algorithm> +#include <array> +#include <cmath> +#include <cstddef> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" +#include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" -#include <cmath> -#include <cstddef> -#include <array> -#include <algorithm> namespace Aidge { /** @@ -30,14 +30,14 @@ namespace Aidge { * @tparam B Bias data type. * @tparam O Output data type. * @param params tuple of Attributes from the Operator - * @param dims Array of input dimensions. + * @param inputDims Array of input dimensions. * @param input_ const input Tensor. * @param weights_ const weight Tensor. * @param biases_ const Biais Tensor. * @param output_ Output Tensor. */ template <class I, class W, class B, class O> -void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims, +void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &inputDims, const void *input_, const void *weights_, const void *biases_, void *output_) { // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); @@ -48,11 +48,11 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &at // output H size const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<3>(attrs)[0] + std::get<0>(attrs)[0]) / + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - std::get<2>(attrs)[0] + std::get<0>(attrs)[0]) / static_cast<float>(std::get<0>(attrs)[0]))); // output W size const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<3>(attrs)[1] + std::get<0>(attrs)[1]) / + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - std::get<2>(attrs)[1] + std::get<0>(attrs)[1]) / static_cast<float>(std::get<0>(attrs)[1]))); // TODO: kernel computation @@ -61,40 +61,40 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &at // weight (outCh, ch, kernelX, kernelY) // does not take Dilation attribute into account using signedsize = std::make_signed<std::size_t>::type; - for (std::size_t batch = 0; batch < dims[0]; ++batch) { - for (std::size_t ch = 0; ch < std::get<2>(attrs); ++ch) { - const std::size_t oIndex = (ch + batch*std::get<2>(attrs)) * oxSize * oySize; - B biasVal = ((!std::get<4>(attrs)) && biases != nullptr) ? biases[ch] : B(0); + for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { + for (std::size_t ch = 0; ch < inputDims[1]; ++ch) { + const std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize * oySize; + B biasVal = ((!std::get<3>(attrs)) && biases != nullptr) ? biases[ch] : B(0); std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal); - const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; - const std::size_t wIndex = ch * std::get<3>(attrs)[0] * std::get<3>(attrs)[1]; + const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3]; + const std::size_t wIndex = ch * std::get<2>(attrs)[0] * std::get<2>(attrs)[1]; for (std::size_t ox = 0; ox < oxSize; ++ox) { const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]); const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<3>(attrs)[0] ? std::get<3>(attrs)[0] : dims[2] + difx); + const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > std::get<2>(attrs)[0] ? std::get<2>(attrs)[0] : inputDims[2] + difx); for (std::size_t oy = 0; oy < oySize; ++oy) { const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]); const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<3>(attrs)[1] ? std::get<3>(attrs)[1] : dims[3] + dify); + const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > std::get<2>(attrs)[1] ? std::get<2>(attrs)[1] : inputDims[3] + dify); const std::size_t oIndexFull = oIndex + ox*oySize + oy; const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]); const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]); if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { - output[oIndexFull] += (weights[wIndex + 0*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 0*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 0*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 1*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 1*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 1*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 2*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 2*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 2*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]); + output[oIndexFull] += (weights[wIndex + 0*std::get<2>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 0*std::get<2>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 0*std::get<2>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 1*std::get<2>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 1*std::get<2>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 1*std::get<2>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 2*std::get<2>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 2*std::get<2>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 2*std::get<2>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]); } else { for (std::size_t sx = sxMin; sx < sxMax; ++sx) { for (std::size_t sy = syMin; sy < syMax; ++sy) { - output[oIndexFull] += weights[wIndex + sx*std::get<3>(attrs)[1] + sy] * - input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; + output[oIndexFull] += weights[wIndex + sx*std::get<2>(attrs)[1] + sy] * + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; } } } diff --git a/include/aidge/backend/cpu/operator/ConvImpl.hpp b/include/aidge/backend/cpu/operator/ConvImpl.hpp index e7ce0892a6241009a8e80821e341b3209a19faa4..12af5860316ba0bc9f6c3eafc551037f531da6d7 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl.hpp @@ -30,7 +30,7 @@ namespace Aidge { class ConvImpl2DForward_cpu : public Registrable<ConvImpl2DForward_cpu, std::tuple<DataType, DataType, DataType, DataType>, - void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, + void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, DimSize_t, const void *, const void *, const void *, void *)> {}; class ConvImpl2DBackward_cpu : public Registrable<ConvImpl2DBackward_cpu, diff --git a/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp index 00d34f6596780f42aa5864058ea543f046f8edb1..0f171d79ae425bb83b5f3b8f0b67d69434a85355 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp @@ -30,14 +30,14 @@ namespace Aidge { * @tparam B Bias data type. * @tparam O Output data type. * @param params tuple of Attributes from the Operator - * @param dims Array of input dimensions. + * @param inputDims Array of input dimensions. * @param input_ const input Tensor. * @param weights_ const weight Tensor. * @param biases_ const Biais Tensor. * @param output_ Output Tensor. */ template <class I, class W, class B, class O> -void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims, +void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &inputDims, DimSize_t outChannels, const void *input_, const void *weights_, const void *biases_, void *output_) { // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); @@ -47,11 +47,11 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar /* // output H size const std::size_t oxSize = - static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(attrs)[0] + std::get<0>(attrs)[0]) / + static_cast<std::size_t>(static_cast<float>(inputDims[0] - std::get<2>(attrs)[0] + std::get<0>(attrs)[0]) / static_cast<float>(std::get<0>(attrs)[0])); // output W size const std::size_t oySize = - static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(attrs)[1] + std::get<0>(attrs)[1]) / + static_cast<std::size_t>(static_cast<float>(inputDims[1] - std::get<2>(attrs)[1] + std::get<0>(attrs)[1]) / static_cast<float>(std::get<0>(attrs)[1])); // TODO: kernel computation @@ -64,19 +64,19 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar const std::size_t ix = ox * std::get<0>(attrs)[0]; const std::size_t iy = oy * std::get<0>(attrs)[1]; - for (std::size_t outCh = 0; outCh < std::get<3>(attrs); ++outCh) { - const std::size_t oIndex = dims[3] * (outCh + std::get<3>(attrs) * (oy + oySize * ox)); + for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { + const std::size_t oIndex = inputDims[3] * (outCh + outChannels * (oy + oySize * ox)); B biasVal = (biases != nullptr) ? biases[outCh] : B(0); - for (std::size_t batch = 0; batch < dims[3]; ++batch) { + for (std::size_t batch = 0; batch < inputDims[3]; ++batch) { output[oIndex + batch] = biasVal; } - for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) { - for (std::size_t sx = 0; sx < std::get<4>(attrs)[0]; ++sx) { - for (std::size_t sy = 0; sy < std::get<4>(attrs)[1]; ++sy) { + for (std::size_t inCh = 0; inCh < inputDims[2]; ++inCh) { + for (std::size_t sx = 0; sx < std::get<2>(attrs)[0]; ++sx) { + for (std::size_t sy = 0; sy < std::get<2>(attrs)[1]; ++sy) { const std::size_t wIndex = - outCh + std::get<3>(attrs) * (inCh + dims[2] * (sy + std::get<4>(attrs)[1] * sx)); - std::size_t iIndex = dims[3] * (inCh + dims[2] * ((iy + sy) + dims[1] * (ix + sx))); - for (std::size_t batch = 0; batch < dims[3]; ++batch) { + outCh + outChannels * (inCh + inputDims[2] * (sy + std::get<2>(attrs)[1] * sx)); + std::size_t iIndex = inputDims[3] * (inCh + inputDims[2] * ((iy + sy) + inputDims[1] * (ix + sx))); + for (std::size_t batch = 0; batch < inputDims[3]; ++batch) { output[oIndex + batch] += weights[wIndex] * input[iIndex + batch]; } } @@ -90,11 +90,11 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar // output H size const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<4>(attrs)[0] + std::get<0>(attrs)[0]) / + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - std::get<2>(attrs)[0] + std::get<0>(attrs)[0]) / static_cast<float>(std::get<0>(attrs)[0]))); // output W size const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<4>(attrs)[1] + std::get<0>(attrs)[1]) / + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - std::get<2>(attrs)[1] + std::get<0>(attrs)[1]) / static_cast<float>(std::get<0>(attrs)[1]))); // TODO: kernel computation @@ -103,42 +103,42 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar // weight (outCh, inCh, kernelX, kernelY) // does not take Dilation attribute into account using signedsize = std::make_signed<std::size_t>::type; - for (std::size_t batch = 0; batch < dims[0]; ++batch) { - for (std::size_t outCh = 0; outCh < std::get<3>(attrs); ++outCh) { - const std::size_t oIndex = (outCh + batch*std::get<3>(attrs)) * oxSize * oySize; + for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { + for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { + const std::size_t oIndex = (outCh + batch*outChannels) * oxSize * oySize; // If NoBias or bias = nullptr, set B(0) - B biasVal = ((!std::get<5>(attrs)) && biases != nullptr) ? biases[outCh] : B(0); + B biasVal = ((!std::get<3>(attrs)) && biases != nullptr) ? biases[outCh] : B(0); std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal); - for (std::size_t inCh = 0; inCh < dims[1]; ++inCh) { - const std::size_t iIndex = (inCh + batch*dims[1]) * dims[2] * dims[3]; - const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(attrs)[0] * std::get<4>(attrs)[1]; + for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) { + const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3]; + const std::size_t wIndex = (inCh + outCh*inputDims[1]) * std::get<2>(attrs)[0] * std::get<2>(attrs)[1]; for (std::size_t ox = 0; ox < oxSize; ++ox) { const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]); const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<4>(attrs)[0] ? std::get<4>(attrs)[0] : dims[2] + difx); + const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > std::get<2>(attrs)[0] ? std::get<2>(attrs)[0] : inputDims[2] + difx); for (std::size_t oy = 0; oy < oySize; ++oy) { const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]); const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<4>(attrs)[1] ? std::get<4>(attrs)[1] : dims[3] + dify); + const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > std::get<2>(attrs)[1] ? std::get<2>(attrs)[1] : inputDims[3] + dify); const std::size_t oIndexFull = oIndex + ox*oySize + oy; const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]); const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]); if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { - output[oIndexFull] += (weights[wIndex + 0*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 0*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 0*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 1*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 1*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 1*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 2*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 2*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 2*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]); + output[oIndexFull] += (weights[wIndex + 0*std::get<2>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 0*std::get<2>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 0*std::get<2>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 1*std::get<2>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 1*std::get<2>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 1*std::get<2>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 2*std::get<2>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 2*std::get<2>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 2*std::get<2>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]); } else { for (std::size_t sx = sxMin; sx < sxMax; ++sx) { for (std::size_t sy = syMin; sy < syMax; ++sy) { - output[oIndexFull] += weights[wIndex + sx*std::get<4>(attrs)[1] + sy] * - input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; + output[oIndexFull] += weights[wIndex + sx*std::get<2>(attrs)[1] + sy] * + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; } } } diff --git a/include/aidge/backend/cpu/operator/FCImpl.hpp b/include/aidge/backend/cpu/operator/FCImpl.hpp index fedd8b38b2dbee9e5fd288a07d5cd722470723e5..db5f76834411925f3356a42bfb4bfda7da600e8e 100644 --- a/include/aidge/backend/cpu/operator/FCImpl.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl.hpp @@ -31,6 +31,7 @@ class FCImplForward_cpu : public Registrable<FCImplForward_cpu, DataType, DataType>, void(const FC_Op::Attrs&, + const DimSize_t, const DimSize_t, const DimSize_t, const void *, @@ -45,6 +46,7 @@ class FCImplBackward_cpu : public Registrable<FCImplBackward_cpu, void(const FC_Op::Attrs&, const DimSize_t, const DimSize_t, + const DimSize_t, const void *, const void *, const void *, diff --git a/include/aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp index 50fb5f49033cccd3c554d692bc336c7d5d677384..9dd91eb883902db907f5e5004dd0cf4db59bd6a2 100644 --- a/include/aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp @@ -19,8 +19,17 @@ namespace Aidge { template <class I, class O, class W, class B> -void FCImpl_cpu_backward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize, - const void* input_, const void* originalInput_, const void* weight_, void* output_, void* weightGrad_, void* biasesGrad_) { +void FCImpl_cpu_backward_kernel(const FC_Op::Attrs& attrs, + const DimSize_t batchSize, + const DimSize_t inputFeatureSize, + const DimSize_t outputFeatureSize, + const void* input_, + const void* originalInput_, + const void* weight_, + void* output_, + void* weightGrad_, + void* biasesGrad_) +{ // FIXME: missing FC attributes as arguments const I* input = static_cast<const I*>(input_); const I* originalInput = static_cast<const I*>(originalInput_); @@ -31,37 +40,37 @@ void FCImpl_cpu_backward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batch // bias grad - if (std::get<1>(attrs)) { // no bias - std::fill(biasesGrad, biasesGrad + std::get<0>(attrs), B(0)); + if (std::get<0>(attrs)) { // no bias + std::fill(biasesGrad, biasesGrad + outputFeatureSize, B(0)); } else { - for (std::size_t o = 0; o < std::get<0>(attrs); ++o) { // nb outputs + for (std::size_t o = 0; o < outputFeatureSize; ++o) { // nb outputs B sum{0}; for (std::size_t b = 0; b < batchSize; ++b) { - sum += input[b*std::get<0>(attrs) + o]; + sum += input[b*outputFeatureSize + o]; } biasesGrad[o] = sum; } } // weight grad - for (std::size_t o = 0; o < std::get<0>(attrs); ++o) { - for (std::size_t c = 0; c < oneInputSize; ++c) { + for (std::size_t o = 0; o < outputFeatureSize; ++o) { + for (std::size_t c = 0; c < inputFeatureSize; ++c) { W sum{0}; for (std::size_t b = 0; b < batchSize; ++b) { - sum += originalInput[b*oneInputSize + c]*input[b*std::get<0>(attrs) + o]; + sum += originalInput[b*inputFeatureSize + c]*input[b*outputFeatureSize + o]; } - weightGrad[o*oneInputSize + c] = sum; + weightGrad[o*inputFeatureSize + c] = sum; } } // input grad for (std::size_t b = 0; b < batchSize; ++b) { - for (std::size_t c = 0; c < oneInputSize; ++c) { + for (std::size_t c = 0; c < inputFeatureSize; ++c) { O sum{0}; - for (std::size_t o = 0; o < std::get<0>(attrs); ++o) { - sum += weight[o*oneInputSize + c] * input[b*std::get<0>(attrs) + o]; + for (std::size_t o = 0; o < outputFeatureSize; ++o) { + sum += weight[o*inputFeatureSize + c] * input[b*outputFeatureSize + o]; } - output[b*oneInputSize + c] = sum; + output[b*inputFeatureSize + c] = sum; } } } diff --git a/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp index 64f3b3e18f7255b74decad5137cbb5ccd6966123..2a1a86bac51106e23593a8dc5aa13d72511582b6 100644 --- a/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp @@ -27,9 +27,9 @@ namespace Aidge { // const B* biases = static_cast<const B*>(biases_); // O* output = static_cast<O*>(output_); -// for (std::size_t outIdx = 0; outIdx < std::get<0>(attrs); ++outIdx) { +// for (std::size_t outIdx = 0; outIdx < outputFeatureSize; ++outIdx) { // std::size_t oIndex = outIdx * dims[3]; -// const B bias = std::get<1>(attrs) ? B(0) : biases[outIdx]; +// const B bias = std::get<0>(attrs) ? B(0) : biases[outIdx]; // for (std::size_t batch = 0; batch < dims[3]; ++batch) { // output[oIndex + batch] = bias; // } @@ -39,10 +39,10 @@ namespace Aidge { // for (std::size_t iy = 0; iy < dims[1]; ++iy) { // for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) { // const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix)); -// for (std::size_t outCh = 0; outCh < std::get<0>(attrs); ++outCh) { +// for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) { // const std::size_t oIndex = dims[3] * outCh; -// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(attrs) + -// outCh; // (iIndex*std::get<0>(attrs) + oIndex)/dims[3]; +// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * outputFeatureSize + +// outCh; // (iIndex*outputFeatureSize + oIndex)/dims[3]; // for (std::size_t batch = 0; batch < dims[3]; ++batch) { // output[oIndex + batch] += weights[wIndex] * input[iIndex + batch]; // } @@ -63,9 +63,9 @@ namespace Aidge { // // let's have I.dims() = [N, C, H, W] instead of [H, W, C, N] -// for (std::size_t outIdx = 0; outIdx < std::get<0>(attrs); ++outIdx) { +// for (std::size_t outIdx = 0; outIdx < outputFeatureSize; ++outIdx) { // std::size_t oIndex = outIdx * dims[0]; -// const B bias = std::get<1>(attrs) ? B(0) : biases[outIdx]; +// const B bias = std::get<0>(attrs) ? B(0) : biases[outIdx]; // for (std::size_t batch = 0; batch < dims[0]; ++batch) { // output[oIndex + batch] = bias; // } @@ -74,8 +74,8 @@ namespace Aidge { // for (std::size_t batch = 0; batch < dims[0]; ++batch) { // const std::size_t oIndex = dims[1] * batch; // for (std::size_t i = 0; i < dims[1]; ++i) { -// for (std::size_t outCh = 0; outCh < std::get<0>(attrs); ++outCh) { -// std::size_t wIndex = i * std::get<0>(attrs) + outCh; // (iIndex*std::get<0>(attrs) + oIndex)/dims[3]; +// for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) { +// std::size_t wIndex = i * outputFeatureSize + outCh; // (iIndex*outputFeatureSize + oIndex)/dims[3]; // output[oIndex + outCh] += weights[wIndex] * input[i + batch]; // } // } @@ -83,7 +83,8 @@ namespace Aidge { // } template <class I, class W, class B, class O> -void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize, +void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t inputFeatureSize, + const DimSize_t outputFeatureSize, const void* input_, const void* weights_, const void* biases_, void* output_) { // FIXME: missing FC attributes as arguments const I* input = static_cast<const I*>(input_); @@ -91,21 +92,21 @@ void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batchS const B* biases = static_cast<const B*>(biases_); O* output = static_cast<O*>(output_); - if (std::get<1>(attrs)) { - std::fill(output, output+(batchSize*std::get<0>(attrs)), B(0)); + if (std::get<0>(attrs)) { + std::fill(output, output+(batchSize*outputFeatureSize), B(0)); } else { for (std::size_t batch = 0; batch < batchSize; ++batch) { - std::copy(biases, biases+std::get<0>(attrs), output+(batch*std::get<0>(attrs))); + std::copy(biases, biases+outputFeatureSize, output+(batch*outputFeatureSize)); } } for (std::size_t batch = 0; batch < batchSize; ++batch) { - for (std::size_t out = 0; out < std::get<0>(attrs); ++out) { - output[out + batch*std::get<0>(attrs)] = std::inner_product(input + batch*oneInputSize, - input + (batch + 1)*oneInputSize, - weights + out*oneInputSize, - output[out + batch*std::get<0>(attrs)]); + for (std::size_t out = 0; out < outputFeatureSize; ++out) { + output[out + batch*outputFeatureSize] = std::inner_product(input + batch*inputFeatureSize, + input + (batch + 1)*inputFeatureSize, + weights + out*inputFeatureSize, + output[out + batch*outputFeatureSize]); } } } diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp index 7457a1a0b75af1f922c5a65ac88aabc813d00069..27e2882c8ee7ddcc60d3d8521802debdcf4b9eb4 100644 --- a/src/operator/ConvImpl.cpp +++ b/src/operator/ConvImpl.cpp @@ -9,18 +9,18 @@ * ********************************************************************************/ +#include "aidge/backend/cpu/operator/ConvImpl.hpp" + #include <cassert> #include <chrono> // std::chrono::milliseconds #include <numeric> // std::accumulate #include <thread> // std::this_thread::sleep_for #include <vector> -#include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" -#include "aidge/operator/Conv.hpp" - -#include "aidge/backend/cpu/operator/ConvImpl.hpp" #include "aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp" +#include "aidge/operator/Conv.hpp" +#include "aidge/utils/Types.h" Aidge::Elts_t Aidge::ConvImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { // this implementation can be in-place @@ -64,7 +64,12 @@ void Aidge::ConvImpl2D_cpu::forward() { const auto& input2 = opTensor.getInput(2)->refCastFrom(input2Fallback, *opTensor.getOutput(0)); // Call kernel - kernelFunc(dynamic_cast<const Conv_Op<2>&>(mOp).getStaticAttributes(), opTensor.getInput(0)->template dims<4>(), - input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(), - getCPUPtr(mOp.getRawOutput(0))); + kernelFunc(dynamic_cast<const Conv_Op<2>&>(mOp).getStaticAttributes(), // Conv attributes + opTensor.getInput(0)->template dims<4>(), // input dimensions + dynamic_cast<const Conv_Op<2>&>(mOp).outChannels(), // outChannels + input0.getImpl()->rawPtr(), // input + input1.getImpl()->rawPtr(), // weight + input2.getImpl()->rawPtr(), // bias + getCPUPtr(mOp.getRawOutput(0)) // output + ); } diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp index d9edf3a9959c1c80dbe85c93f7a1499260452c4c..9ade584146cd14b22dfb7a0e31147f136dd5fc8a 100644 --- a/src/operator/FCImpl.cpp +++ b/src/operator/FCImpl.cpp @@ -34,9 +34,9 @@ void Aidge::FCImpl_cpu::forward() // Find the correct kernel type const auto outputDataType = op_.getOutput(0)->dataType(); const Registrar<FCImplForward_cpu>::registrar_key registrarKey = { - op_.getInput(0)->dataType(), - op_.getInput(1)->dataType(), - op_.getInput(2)->dataType(), + outputDataType, + outputDataType, + outputDataType, outputDataType}; Registrar<FCImplForward_cpu>::registrar_type kernelFunc; @@ -63,7 +63,8 @@ void Aidge::FCImpl_cpu::forward() const auto batchSize = (input0.dims().size() > 1) ? input0.dims()[0] : 1; kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(), batchSize, - input0.size() / batchSize, + input1.dims()[1], // nb input features + input1.dims()[0], // nb output features input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(), getCPUPtr(mOp.getRawOutput(0))); } @@ -108,7 +109,8 @@ void Aidge::FCImpl_cpu::backward() const auto batchSize = (input0grad.dims().size() > 1) ? input0grad.dims()[0] : 1; kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(), batchSize, - input0grad.size() / batchSize, + input1grad.dims()[1], // nb input features + input1grad.dims()[0], // nb output features getCPUPtr(fc_grad), getCPUPtr(op_.getInput(0)), getCPUPtr(mOp.getRawInput(1)),