Skip to content
Snippets Groups Projects
Commit 143a9693 authored by Maxence Naud's avatar Maxence Naud
Browse files

Change Conv, ConvDepthWise, FC kernels according to aidge_core changes

parent 87d07901
No related branches found
No related tags found
No related merge requests found
......@@ -12,15 +12,15 @@
#ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include <algorithm>
#include <array>
#include <cmath>
#include <cstddef>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <cmath>
#include <cstddef>
#include <array>
#include <algorithm>
namespace Aidge {
/**
......@@ -30,14 +30,14 @@ namespace Aidge {
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param dims Array of input dimensions.
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims,
void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &inputDims,
const void *input_, const void *weights_, const void *biases_, void *output_) {
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
......@@ -48,11 +48,11 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &at
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<3>(attrs)[0] + std::get<0>(attrs)[0]) /
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - std::get<2>(attrs)[0] + std::get<0>(attrs)[0]) /
static_cast<float>(std::get<0>(attrs)[0])));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<3>(attrs)[1] + std::get<0>(attrs)[1]) /
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - std::get<2>(attrs)[1] + std::get<0>(attrs)[1]) /
static_cast<float>(std::get<0>(attrs)[1])));
// TODO: kernel computation
......@@ -61,40 +61,40 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &at
// weight (outCh, ch, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < dims[0]; ++batch) {
for (std::size_t ch = 0; ch < std::get<2>(attrs); ++ch) {
const std::size_t oIndex = (ch + batch*std::get<2>(attrs)) * oxSize * oySize;
B biasVal = ((!std::get<4>(attrs)) && biases != nullptr) ? biases[ch] : B(0);
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t ch = 0; ch < inputDims[1]; ++ch) {
const std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize * oySize;
B biasVal = ((!std::get<3>(attrs)) && biases != nullptr) ? biases[ch] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
const std::size_t wIndex = ch * std::get<3>(attrs)[0] * std::get<3>(attrs)[1];
const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = ch * std::get<2>(attrs)[0] * std::get<2>(attrs)[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<3>(attrs)[0] ? std::get<3>(attrs)[0] : dims[2] + difx);
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > std::get<2>(attrs)[0] ? std::get<2>(attrs)[0] : inputDims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<3>(attrs)[1] ? std::get<3>(attrs)[1] : dims[3] + dify);
const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > std::get<2>(attrs)[1] ? std::get<2>(attrs)[1] : inputDims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]);
const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]);
if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
output[oIndexFull] += (weights[wIndex + 0*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 0*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 0*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 1*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 1*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 1*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 2*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 2*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 2*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
output[oIndexFull] += (weights[wIndex + 0*std::get<2>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 0*std::get<2>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 0*std::get<2>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 1*std::get<2>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 1*std::get<2>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 1*std::get<2>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 2*std::get<2>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 2*std::get<2>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 2*std::get<2>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]);
} else {
for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
for (std::size_t sy = syMin; sy < syMax; ++sy) {
output[oIndexFull] += weights[wIndex + sx*std::get<3>(attrs)[1] + sy] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
output[oIndexFull] += weights[wIndex + sx*std::get<2>(attrs)[1] + sy] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
}
}
}
......
......@@ -30,7 +30,7 @@ namespace Aidge {
class ConvImpl2DForward_cpu
: public Registrable<ConvImpl2DForward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, DimSize_t, const void *,
const void *, const void *, void *)> {};
class ConvImpl2DBackward_cpu
: public Registrable<ConvImpl2DBackward_cpu,
......
......@@ -30,14 +30,14 @@ namespace Aidge {
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param dims Array of input dimensions.
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims,
void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &inputDims, DimSize_t outChannels,
const void *input_, const void *weights_, const void *biases_, void *output_) {
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
......@@ -47,11 +47,11 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar
/*
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(attrs)[0] + std::get<0>(attrs)[0]) /
static_cast<std::size_t>(static_cast<float>(inputDims[0] - std::get<2>(attrs)[0] + std::get<0>(attrs)[0]) /
static_cast<float>(std::get<0>(attrs)[0]));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(attrs)[1] + std::get<0>(attrs)[1]) /
static_cast<std::size_t>(static_cast<float>(inputDims[1] - std::get<2>(attrs)[1] + std::get<0>(attrs)[1]) /
static_cast<float>(std::get<0>(attrs)[1]));
// TODO: kernel computation
......@@ -64,19 +64,19 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar
const std::size_t ix = ox * std::get<0>(attrs)[0];
const std::size_t iy = oy * std::get<0>(attrs)[1];
for (std::size_t outCh = 0; outCh < std::get<3>(attrs); ++outCh) {
const std::size_t oIndex = dims[3] * (outCh + std::get<3>(attrs) * (oy + oySize * ox));
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
const std::size_t oIndex = inputDims[3] * (outCh + outChannels * (oy + oySize * ox));
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
for (std::size_t batch = 0; batch < dims[3]; ++batch) {
for (std::size_t batch = 0; batch < inputDims[3]; ++batch) {
output[oIndex + batch] = biasVal;
}
for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
for (std::size_t sx = 0; sx < std::get<4>(attrs)[0]; ++sx) {
for (std::size_t sy = 0; sy < std::get<4>(attrs)[1]; ++sy) {
for (std::size_t inCh = 0; inCh < inputDims[2]; ++inCh) {
for (std::size_t sx = 0; sx < std::get<2>(attrs)[0]; ++sx) {
for (std::size_t sy = 0; sy < std::get<2>(attrs)[1]; ++sy) {
const std::size_t wIndex =
outCh + std::get<3>(attrs) * (inCh + dims[2] * (sy + std::get<4>(attrs)[1] * sx));
std::size_t iIndex = dims[3] * (inCh + dims[2] * ((iy + sy) + dims[1] * (ix + sx)));
for (std::size_t batch = 0; batch < dims[3]; ++batch) {
outCh + outChannels * (inCh + inputDims[2] * (sy + std::get<2>(attrs)[1] * sx));
std::size_t iIndex = inputDims[3] * (inCh + inputDims[2] * ((iy + sy) + inputDims[1] * (ix + sx)));
for (std::size_t batch = 0; batch < inputDims[3]; ++batch) {
output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
}
}
......@@ -90,11 +90,11 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<4>(attrs)[0] + std::get<0>(attrs)[0]) /
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - std::get<2>(attrs)[0] + std::get<0>(attrs)[0]) /
static_cast<float>(std::get<0>(attrs)[0])));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<4>(attrs)[1] + std::get<0>(attrs)[1]) /
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - std::get<2>(attrs)[1] + std::get<0>(attrs)[1]) /
static_cast<float>(std::get<0>(attrs)[1])));
// TODO: kernel computation
......@@ -103,42 +103,42 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar
// weight (outCh, inCh, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < dims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < std::get<3>(attrs); ++outCh) {
const std::size_t oIndex = (outCh + batch*std::get<3>(attrs)) * oxSize * oySize;
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
const std::size_t oIndex = (outCh + batch*outChannels) * oxSize * oySize;
// If NoBias or bias = nullptr, set B(0)
B biasVal = ((!std::get<5>(attrs)) && biases != nullptr) ? biases[outCh] : B(0);
B biasVal = ((!std::get<3>(attrs)) && biases != nullptr) ? biases[outCh] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
for (std::size_t inCh = 0; inCh < dims[1]; ++inCh) {
const std::size_t iIndex = (inCh + batch*dims[1]) * dims[2] * dims[3];
const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(attrs)[0] * std::get<4>(attrs)[1];
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * std::get<2>(attrs)[0] * std::get<2>(attrs)[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<4>(attrs)[0] ? std::get<4>(attrs)[0] : dims[2] + difx);
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > std::get<2>(attrs)[0] ? std::get<2>(attrs)[0] : inputDims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<4>(attrs)[1] ? std::get<4>(attrs)[1] : dims[3] + dify);
const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > std::get<2>(attrs)[1] ? std::get<2>(attrs)[1] : inputDims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]);
const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]);
if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
output[oIndexFull] += (weights[wIndex + 0*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 0*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 0*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 1*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 1*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 1*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 2*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 2*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 2*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
output[oIndexFull] += (weights[wIndex + 0*std::get<2>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 0*std::get<2>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 0*std::get<2>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 1*std::get<2>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 1*std::get<2>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 1*std::get<2>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 2*std::get<2>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 2*std::get<2>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 2*std::get<2>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]);
} else {
for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
for (std::size_t sy = syMin; sy < syMax; ++sy) {
output[oIndexFull] += weights[wIndex + sx*std::get<4>(attrs)[1] + sy] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
output[oIndexFull] += weights[wIndex + sx*std::get<2>(attrs)[1] + sy] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
}
}
}
......
......@@ -31,6 +31,7 @@ class FCImplForward_cpu : public Registrable<FCImplForward_cpu,
DataType,
DataType>,
void(const FC_Op::Attrs&,
const DimSize_t,
const DimSize_t,
const DimSize_t,
const void *,
......@@ -45,6 +46,7 @@ class FCImplBackward_cpu : public Registrable<FCImplBackward_cpu,
void(const FC_Op::Attrs&,
const DimSize_t,
const DimSize_t,
const DimSize_t,
const void *,
const void *,
const void *,
......
......@@ -19,8 +19,17 @@
namespace Aidge {
template <class I, class O, class W, class B>
void FCImpl_cpu_backward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize,
const void* input_, const void* originalInput_, const void* weight_, void* output_, void* weightGrad_, void* biasesGrad_) {
void FCImpl_cpu_backward_kernel(const FC_Op::Attrs& attrs,
const DimSize_t batchSize,
const DimSize_t inputFeatureSize,
const DimSize_t outputFeatureSize,
const void* input_,
const void* originalInput_,
const void* weight_,
void* output_,
void* weightGrad_,
void* biasesGrad_)
{
// FIXME: missing FC attributes as arguments
const I* input = static_cast<const I*>(input_);
const I* originalInput = static_cast<const I*>(originalInput_);
......@@ -31,37 +40,37 @@ void FCImpl_cpu_backward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batch
// bias grad
if (std::get<1>(attrs)) { // no bias
std::fill(biasesGrad, biasesGrad + std::get<0>(attrs), B(0));
if (std::get<0>(attrs)) { // no bias
std::fill(biasesGrad, biasesGrad + outputFeatureSize, B(0));
} else {
for (std::size_t o = 0; o < std::get<0>(attrs); ++o) { // nb outputs
for (std::size_t o = 0; o < outputFeatureSize; ++o) { // nb outputs
B sum{0};
for (std::size_t b = 0; b < batchSize; ++b) {
sum += input[b*std::get<0>(attrs) + o];
sum += input[b*outputFeatureSize + o];
}
biasesGrad[o] = sum;
}
}
// weight grad
for (std::size_t o = 0; o < std::get<0>(attrs); ++o) {
for (std::size_t c = 0; c < oneInputSize; ++c) {
for (std::size_t o = 0; o < outputFeatureSize; ++o) {
for (std::size_t c = 0; c < inputFeatureSize; ++c) {
W sum{0};
for (std::size_t b = 0; b < batchSize; ++b) {
sum += originalInput[b*oneInputSize + c]*input[b*std::get<0>(attrs) + o];
sum += originalInput[b*inputFeatureSize + c]*input[b*outputFeatureSize + o];
}
weightGrad[o*oneInputSize + c] = sum;
weightGrad[o*inputFeatureSize + c] = sum;
}
}
// input grad
for (std::size_t b = 0; b < batchSize; ++b) {
for (std::size_t c = 0; c < oneInputSize; ++c) {
for (std::size_t c = 0; c < inputFeatureSize; ++c) {
O sum{0};
for (std::size_t o = 0; o < std::get<0>(attrs); ++o) {
sum += weight[o*oneInputSize + c] * input[b*std::get<0>(attrs) + o];
for (std::size_t o = 0; o < outputFeatureSize; ++o) {
sum += weight[o*inputFeatureSize + c] * input[b*outputFeatureSize + o];
}
output[b*oneInputSize + c] = sum;
output[b*inputFeatureSize + c] = sum;
}
}
}
......
......@@ -27,9 +27,9 @@ namespace Aidge {
// const B* biases = static_cast<const B*>(biases_);
// O* output = static_cast<O*>(output_);
// for (std::size_t outIdx = 0; outIdx < std::get<0>(attrs); ++outIdx) {
// for (std::size_t outIdx = 0; outIdx < outputFeatureSize; ++outIdx) {
// std::size_t oIndex = outIdx * dims[3];
// const B bias = std::get<1>(attrs) ? B(0) : biases[outIdx];
// const B bias = std::get<0>(attrs) ? B(0) : biases[outIdx];
// for (std::size_t batch = 0; batch < dims[3]; ++batch) {
// output[oIndex + batch] = bias;
// }
......@@ -39,10 +39,10 @@ namespace Aidge {
// for (std::size_t iy = 0; iy < dims[1]; ++iy) {
// for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
// const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix));
// for (std::size_t outCh = 0; outCh < std::get<0>(attrs); ++outCh) {
// for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) {
// const std::size_t oIndex = dims[3] * outCh;
// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(attrs) +
// outCh; // (iIndex*std::get<0>(attrs) + oIndex)/dims[3];
// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * outputFeatureSize +
// outCh; // (iIndex*outputFeatureSize + oIndex)/dims[3];
// for (std::size_t batch = 0; batch < dims[3]; ++batch) {
// output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
// }
......@@ -63,9 +63,9 @@ namespace Aidge {
// // let's have I.dims() = [N, C, H, W] instead of [H, W, C, N]
// for (std::size_t outIdx = 0; outIdx < std::get<0>(attrs); ++outIdx) {
// for (std::size_t outIdx = 0; outIdx < outputFeatureSize; ++outIdx) {
// std::size_t oIndex = outIdx * dims[0];
// const B bias = std::get<1>(attrs) ? B(0) : biases[outIdx];
// const B bias = std::get<0>(attrs) ? B(0) : biases[outIdx];
// for (std::size_t batch = 0; batch < dims[0]; ++batch) {
// output[oIndex + batch] = bias;
// }
......@@ -74,8 +74,8 @@ namespace Aidge {
// for (std::size_t batch = 0; batch < dims[0]; ++batch) {
// const std::size_t oIndex = dims[1] * batch;
// for (std::size_t i = 0; i < dims[1]; ++i) {
// for (std::size_t outCh = 0; outCh < std::get<0>(attrs); ++outCh) {
// std::size_t wIndex = i * std::get<0>(attrs) + outCh; // (iIndex*std::get<0>(attrs) + oIndex)/dims[3];
// for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) {
// std::size_t wIndex = i * outputFeatureSize + outCh; // (iIndex*outputFeatureSize + oIndex)/dims[3];
// output[oIndex + outCh] += weights[wIndex] * input[i + batch];
// }
// }
......@@ -83,7 +83,8 @@ namespace Aidge {
// }
template <class I, class W, class B, class O>
void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize,
void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t inputFeatureSize,
const DimSize_t outputFeatureSize,
const void* input_, const void* weights_, const void* biases_, void* output_) {
// FIXME: missing FC attributes as arguments
const I* input = static_cast<const I*>(input_);
......@@ -91,21 +92,21 @@ void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batchS
const B* biases = static_cast<const B*>(biases_);
O* output = static_cast<O*>(output_);
if (std::get<1>(attrs)) {
std::fill(output, output+(batchSize*std::get<0>(attrs)), B(0));
if (std::get<0>(attrs)) {
std::fill(output, output+(batchSize*outputFeatureSize), B(0));
}
else {
for (std::size_t batch = 0; batch < batchSize; ++batch) {
std::copy(biases, biases+std::get<0>(attrs), output+(batch*std::get<0>(attrs)));
std::copy(biases, biases+outputFeatureSize, output+(batch*outputFeatureSize));
}
}
for (std::size_t batch = 0; batch < batchSize; ++batch) {
for (std::size_t out = 0; out < std::get<0>(attrs); ++out) {
output[out + batch*std::get<0>(attrs)] = std::inner_product(input + batch*oneInputSize,
input + (batch + 1)*oneInputSize,
weights + out*oneInputSize,
output[out + batch*std::get<0>(attrs)]);
for (std::size_t out = 0; out < outputFeatureSize; ++out) {
output[out + batch*outputFeatureSize] = std::inner_product(input + batch*inputFeatureSize,
input + (batch + 1)*inputFeatureSize,
weights + out*inputFeatureSize,
output[out + batch*outputFeatureSize]);
}
}
}
......
......@@ -9,18 +9,18 @@
*
********************************************************************************/
#include "aidge/backend/cpu/operator/ConvImpl.hpp"
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <vector>
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/operator/Conv.hpp"
#include "aidge/backend/cpu/operator/ConvImpl.hpp"
#include "aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp"
#include "aidge/operator/Conv.hpp"
#include "aidge/utils/Types.h"
Aidge::Elts_t Aidge::ConvImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
// this implementation can be in-place
......@@ -64,7 +64,12 @@ void Aidge::ConvImpl2D_cpu::forward() {
const auto& input2 = opTensor.getInput(2)->refCastFrom(input2Fallback, *opTensor.getOutput(0));
// Call kernel
kernelFunc(dynamic_cast<const Conv_Op<2>&>(mOp).getStaticAttributes(), opTensor.getInput(0)->template dims<4>(),
input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(),
getCPUPtr(mOp.getRawOutput(0)));
kernelFunc(dynamic_cast<const Conv_Op<2>&>(mOp).getStaticAttributes(), // Conv attributes
opTensor.getInput(0)->template dims<4>(), // input dimensions
dynamic_cast<const Conv_Op<2>&>(mOp).outChannels(), // outChannels
input0.getImpl()->rawPtr(), // input
input1.getImpl()->rawPtr(), // weight
input2.getImpl()->rawPtr(), // bias
getCPUPtr(mOp.getRawOutput(0)) // output
);
}
......@@ -34,9 +34,9 @@ void Aidge::FCImpl_cpu::forward()
// Find the correct kernel type
const auto outputDataType = op_.getOutput(0)->dataType();
const Registrar<FCImplForward_cpu>::registrar_key registrarKey = {
op_.getInput(0)->dataType(),
op_.getInput(1)->dataType(),
op_.getInput(2)->dataType(),
outputDataType,
outputDataType,
outputDataType,
outputDataType};
Registrar<FCImplForward_cpu>::registrar_type kernelFunc;
......@@ -63,7 +63,8 @@ void Aidge::FCImpl_cpu::forward()
const auto batchSize = (input0.dims().size() > 1) ? input0.dims()[0] : 1;
kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(),
batchSize,
input0.size() / batchSize,
input1.dims()[1], // nb input features
input1.dims()[0], // nb output features
input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(),
getCPUPtr(mOp.getRawOutput(0)));
}
......@@ -108,7 +109,8 @@ void Aidge::FCImpl_cpu::backward()
const auto batchSize = (input0grad.dims().size() > 1) ? input0grad.dims()[0] : 1;
kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(),
batchSize,
input0grad.size() / batchSize,
input1grad.dims()[1], // nb input features
input1grad.dims()[0], // nb output features
getCPUPtr(fc_grad),
getCPUPtr(op_.getInput(0)),
getCPUPtr(mOp.getRawInput(1)),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment