Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mszczep/aidge_backend_cpu
  • eclipse/aidge/aidge_backend_cpu
  • hrouis/aidge_backend_cpu
  • oantoni/aidge_backend_cpu
  • raphaelmillet/aidge_backend_cpu
  • cguillon/aidge_backend_cpu
  • jeromeh/aidge_backend_cpu
  • axelfarr/aidge_backend_cpu
  • noamzerah/aidge_backend_cpu
  • silvanosky/aidge_backend_cpu
  • maab05/aidge_backend_cpu
  • lucaslopez/aidge_backend_cpu_ll
  • farnez/aidge_backend_cpu
13 results
Show changes
Commits on Source (35)
Showing
with 808 additions and 175 deletions
......@@ -13,16 +13,17 @@ class test_scheduler(unittest.TestCase):
pass
def test_relu_forward(self):
values = np.arange(6) - 3
input_node = aidge_core.Producer(aidge_core.Tensor(values), "Input")
t = aidge_core.Tensor(np.arange(6, dtype=np.int32) - 3)
input_node = aidge_core.Producer(t)
relu = aidge_core.ReLU()
input_node.add_child(relu)
gv = aidge_core.GraphView()
gv.add(relu)
gv.add(input_node)
input_node.add_child(relu)
gv.set_datatype(aidge_core.dtype.int32)
gv.set_backend("cpu")
......@@ -34,7 +35,7 @@ class test_scheduler(unittest.TestCase):
out_tensor = relu.get_operator().get_output(0)
expected_out = [0,0,0,0,1,2]
for i in range(len(expected_out)):
self.assertEqual(expected_out[i], out_tensor[i])
self.assertEqual(expected_out[i], out_tensor[i], f"On idx {i}")
def test_sequential_scheduling(self):
input_data = np.array([0]).astype(np.float32)
......@@ -69,7 +70,7 @@ class test_scheduler(unittest.TestCase):
aidge_core.Producer(input_tensor, "X"),
aidge_core.FC(1, 50, name='0'),
aidge_core.parallel([aidge_core.FC(50, 50, name='1'), aidge_core.FC(50, 50, name='3')]),
aidge_core.Add(2, name='2'),
aidge_core.Add(name='2'),
])
EXPECTED_SCHEDULE = [['0', '1', '3', '2'], ['0', '3', '1', '2']] # Both scheduling are valid !
......
......@@ -15,11 +15,14 @@
#include "aidge/backend/cpu/operator/AbsImpl.hpp"
#include "aidge/backend/cpu/operator/AddImpl.hpp"
#include "aidge/backend/cpu/operator/AndImpl.hpp"
#include "aidge/backend/cpu/operator/AtanImpl.hpp"
#include "aidge/backend/cpu/operator/ArgMaxImpl.hpp"
#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
#include "aidge/backend/cpu/operator/BatchNormImpl.hpp"
#include "aidge/backend/cpu/operator/BitShiftImpl.hpp"
#include "aidge/backend/cpu/operator/ClipImpl.hpp"
#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
#include "aidge/backend/cpu/operator/ConvImpl.hpp"
#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp"
......@@ -37,6 +40,7 @@
#include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp"
#include "aidge/backend/cpu/operator/ReduceSumImpl.hpp"
#include "aidge/backend/cpu/operator/ReLUImpl.hpp"
#include "aidge/backend/cpu/operator/RoundImpl.hpp"
#include "aidge/backend/cpu/operator/ScalingImpl.hpp"
#include "aidge/backend/cpu/operator/SigmoidImpl.hpp"
#include "aidge/backend/cpu/operator/SqrtImpl.hpp"
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ATAN_H_
#define AIDGE_CPU_OPERATOR_ATAN_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Atan.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
namespace Aidge {
// Operator implementation entry point for the backend
using AtanImpl_cpu = OperatorImpl_cpu<Atan_Op,
void(const std::size_t, const void*, void*),
void(const std::size_t, const void*, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Atan_Op, "cpu", Aidge::AtanImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ATAN_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ATANIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_ATANIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/AtanImpl.hpp"
#include <cmath> // For atan()
namespace Aidge {
template <class I, class O>
void AtanImpl_cpu_forward_kernel(std::size_t inputLenght,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
for (size_t i = 0; i < inputLenght; ++i) {
output[i] = static_cast<O>(atan(input[i]));
}
}
template <class O, class GI, class GO>
void AtanImpl_cpu_backward_kernel(const std::size_t inputLenght,
const void* output_, const void* grad_output_,
void* grad_input_) {
const O* output = static_cast<const O*>(output_);
const GO* grad_output = static_cast<const GO*>(grad_output_);
GI* grad_input = static_cast<GI*>(grad_input_);
// Apply the derivative of atan for each element in the input array
for (size_t i = 0; i < inputLenght; ++i) {
// dx = dy * (1 / (1 + x^2))
grad_input[i] = grad_output[i] * static_cast<O>(1.0 / (1.0 + output[i] * output[i]));
}
}
// Kernels registration to implementation entry point
REGISTRAR(AtanImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::AtanImpl_cpu_forward_kernel<float, float>, Aidge::AtanImpl_cpu_backward_kernel<float, float, float>});
REGISTRAR(AtanImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::AtanImpl_cpu_forward_kernel<double, double>, Aidge::AtanImpl_cpu_backward_kernel<double, double, double>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ATANIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CLIPIMPL_H_
#define AIDGE_CPU_OPERATOR_CLIPIMPL_H_
#include <cstddef> // std::size_t
#include <memory>
#include <tuple> // std::tuple
#include <vector>
#include <algorithm>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Clip.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// Operator implementation entry point for the backend
using ClipImpl_cpu = OperatorImpl_cpu<Clip_Op,
void(float, //Forward Types
float,
const void*,
const std::size_t,
void*),
void(float,//Backward Types
float,
const std::size_t,
const void*,
const void*,
void*)>;
REGISTRAR(Clip_Op,"cpu",Aidge::ClipImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CLIPIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CLIPIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_CLIPIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/ClipImpl.hpp"
namespace Aidge {
template <class I, class O>
void ClipImpl_cpu_forward_kernel(
float min_,
float max_,
const void* input_,
const std::size_t length,
void* output_)
{
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
for (std::size_t i = 0; i < length; ++i) {
output[i] = std::min(std::max(static_cast<float>(input[i]), min_), max_);
}
}
template <class I, class GI, class GO>
void ClipImpl_cpu_backward_kernel(
float min_,
float max_,
const std::size_t length,
const void* input_,
const void* grad_output_,
void* grad_input_)
{
const I* input = static_cast<const I*>(input_);
const GO* grad_output = static_cast<const GO*>(grad_output_);
GI* grad_input = static_cast<GI*>(grad_input_);
for (std::size_t i = 0; i < length; ++i) {
grad_input[i] = ((input[i] > min_) && (input[i] < max_)) ? grad_output[i] : 0;
}
}
REGISTRAR(ClipImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel,
Aidge::ClipImpl_cpu_forward_kernel<float,float>,
Aidge::ClipImpl_cpu_backward_kernel<float,float,float>});
REGISTRAR(ClipImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel,
Aidge::ClipImpl_cpu_forward_kernel<double,double>,
Aidge::ClipImpl_cpu_backward_kernel<double,double,double>});
REGISTRAR(ClipImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel,
Aidge::ClipImpl_cpu_forward_kernel<std::int32_t,std::int32_t>,
Aidge::ClipImpl_cpu_backward_kernel<std::int32_t,std::int32_t,std::int32_t>});
REGISTRAR(ClipImpl_cpu,
{DataType::Int64},
{ProdConso::inPlaceModel,
Aidge::ClipImpl_cpu_forward_kernel<std::int64_t,std::int64_t>,
Aidge::ClipImpl_cpu_backward_kernel<std::int64_t,std::int64_t,std::int64_t>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CLIPIMPL_KERNELS_H_ */
......@@ -38,7 +38,7 @@ namespace Aidge {
*/
template <class I, class W, class B, class O>
void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
const std::array<DimSize_t, 1>& /*dilationDims*/,
const std::array<DimSize_t, 1>& dilationDims,
const std::array<DimSize_t, 1>& kernelDims,
const std::array<DimSize_t, 3>& inputDims,
const void *input_,
......@@ -53,10 +53,12 @@ void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& stri
// output H size
const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) /
static_cast<float>(strideDims[0])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, ch, Xin, Yin)
......@@ -71,15 +73,17 @@ void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& stri
const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2];
const std::size_t wIndex = ch * kernelDims[0];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
// const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
// const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
// const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
const std::size_t sxMin = 0;
const std::size_t sxMax = dilated_kernel_x;
const std::size_t oIndexFull = oIndex + ox;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) {
output[oIndexFull] += weights[wIndex + sx] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))];
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))];
}
}
}
......@@ -113,7 +117,7 @@ REGISTRAR(ConvDepthWiseImpl1D_cpu,
*/
template <class I, class W, class B, class O>
void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& /*dilationDims*/,
const std::array<DimSize_t, 2>& dilationDims,
const std::array<DimSize_t, 2>& kernelDims,
const std::array<DimSize_t, 4>& inputDims,
const void *input_,
......@@ -129,12 +133,15 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri
// output H size
const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) /
static_cast<float>(strideDims[0])));
// output W size
const DimSize_t dilated_kernel_y = dilationDims[1]*(kernelDims[1] - 1) + 1;
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) /
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - dilated_kernel_y + strideDims[1]) /
static_cast<float>(strideDims[1])));
// TODO: kernel computation
......@@ -143,49 +150,106 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri
// weight (outCh, ch, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t ch = 0; ch < inputDims[1]; ++ch) {
const std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize * oySize;
B biasVal = (biases != nullptr) ? biases[ch] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = ch * kernelDims[0] * kernelDims[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
const signedsize iy = static_cast<signedsize>(oy * strideDims[1]);
if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
output[oIndexFull] += (weights[wIndex + 0*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 0*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 0*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 1*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 1*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 1*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 2*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]);
} else {
for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
for (std::size_t sy = syMin; sy < syMax; ++sy) {
const std::size_t outChannels_s = oxSize * oySize;
if (dilated_kernel_x ==3 && dilated_kernel_y == 3) {
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t ch = 0; ch < inputDims[1]; ++ch) {
B biasVal = (biases != nullptr) ? biases[ch] : B(0);
std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = ch * 9;
if (strideDims[0] == 1 && strideDims[1]==1) {
for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=inputDims[3]) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] = biasVal + weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+1]+weights[wIndex+2]*input[iIndex+oy+2];
}
iIndex+=inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+1]+weights[wIndex+5]*input[iIndex+oy+2];
}
iIndex+=inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+1]+weights[wIndex+8]*input[iIndex+oy+2];
}
}
} else {
for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=strideDims[0]*inputDims[3]) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+strideDims[0]]+weights[wIndex+2]*input[iIndex+oy+strideDims[0]*2];
}
iIndex+=strideDims[0]*inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+strideDims[0]]+weights[wIndex+5]*input[iIndex+oy+strideDims[0]*2];
}
iIndex+=strideDims[0]*inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+strideDims[0]]+weights[wIndex+8]*input[iIndex+oy+strideDims[0]*2];
}
}
}
output += outChannels_s;
}
}
} else if (dilated_kernel_x == 1 && dilated_kernel_y == 1) {
std::size_t index = 0;
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t ch = 0; ch < inputDims[1]; ++ch) {
B biasVal = (biases != nullptr) ? biases[ch] : B(0);
const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = ch;
if (strideDims[0] == 1 && strideDims[1] == 1) {
for (; index < iIndex + oxSize*oySize; ++index) {
output[index] = biasVal + weights[wIndex] * input[index];
}
} else {
std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize * oySize;
for (std::size_t ox = 0; ox < oxSize; ++ox, oIndex+=oySize) {
index = iIndex + strideDims[0]*inputDims[3];
for (std::size_t oy = 0, iy = 0; oy < oySize; ++oy, iy+=strideDims[1]) {
output[oIndex + oy] += weights[wIndex]*input[index+iy];
}
}
}
}
}
} else {
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t ch = 0; ch < inputDims[1]; ++ch) {
B biasVal = (biases != nullptr) ? biases[ch] : B(0);
std::fill(output, output+outChannels_s, biasVal);
const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = ch * kernelDims[0] * kernelDims[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
const std::size_t oIndexFull = ox*oySize + oy;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
const signedsize iy = static_cast<signedsize>(oy * strideDims[1]);
for (std::size_t sx = 0; sx*dilationDims[0] < dilated_kernel_x; ++sx) {
for (std::size_t sy = 0; sy*dilationDims[1] < dilated_kernel_y; ++sy) {
output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy*dilationDims[1]))];
}
}
}
}
}
output += outChannels_s;
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(ConvDepthWiseImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
......
......@@ -40,7 +40,7 @@ namespace Aidge {
*/
template <class I, class W, class B, class O>
void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
const std::array<DimSize_t, 1>& /*dilationDims*/,
const std::array<DimSize_t, 1>& dilationDims,
const std::array<DimSize_t, 1>& kernelDims,
const std::array<DimSize_t, 3>& inputDims,
DimSize_t outChannels,
......@@ -57,8 +57,9 @@ void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilationDims[0]*(kernelDims[0] - 1) - 1 + strideDims[0]) /
static_cast<float>(strideDims[0])));
const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
......@@ -76,15 +77,17 @@ void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2];
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
// const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
// const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
// const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
const std::size_t sxMin = 0;
const std::size_t sxMax = dilated_kernel_x;
const std::size_t oIndexFull = oIndex + ox;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) {
output[oIndexFull] += weights[wIndex + sx] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))];
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))];
}
}
}
......@@ -122,7 +125,7 @@ REGISTRAR(ConvImpl1D_cpu,
*/
template <class I, class W, class B, class O>
void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& /*dilationDims*/,
const std::array<DimSize_t, 2>& dilationDims,
const std::array<DimSize_t, 2>& kernelDims,
const std::array<DimSize_t, 4> &inputDims,
DimSize_t outChannels,
......@@ -138,66 +141,124 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
O *output = static_cast<O *>(output_);
// output H size
const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) /
static_cast<float>(strideDims[0])));
// output W size
const DimSize_t dilated_kernel_y = dilationDims[1]*(kernelDims[1] - 1) + 1;
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) /
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - dilated_kernel_y + strideDims[1]) /
static_cast<float>(strideDims[1])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, inCh, Xin, Yin)
// weight (outCh, inCh, kernelX, kernelY)
// does not take Dilation attribute into account
const std::size_t outChannels_s = oxSize * oySize;
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
const std::size_t oIndex = (outCh + batch*outChannels) * oxSize * oySize;
// If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
const signedsize iy = static_cast<signedsize>(oy * strideDims[1]);
if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
output[oIndexFull] += (weights[wIndex + 0*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 0*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 0*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 1*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 1*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 1*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 2*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]);
} else {
for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
for (std::size_t sy = syMin; sy < syMax; ++sy) {
output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
if (dilated_kernel_x == 3 && dilated_kernel_y == 3) {
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
// If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output, output+outChannels_s, biasVal);
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * 9;
if (strideDims[0] == 1 && strideDims[1]==1) {
for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=inputDims[3]) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+1]+weights[wIndex+2]*input[iIndex+oy+2];
}
iIndex+=inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+1]+weights[wIndex+5]*input[iIndex+oy+2];
}
iIndex+=inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+1]+weights[wIndex+8]*input[iIndex+oy+2];
}
}
} else {
for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=strideDims[0]*inputDims[3]) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+strideDims[0]]+weights[wIndex+2]*input[iIndex+oy+strideDims[0]*2];
}
iIndex+=strideDims[0]*inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+strideDims[0]]+weights[wIndex+5]*input[iIndex+oy+strideDims[0]*2];
}
iIndex+=strideDims[0]*inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+strideDims[0]]+weights[wIndex+8]*input[iIndex+oy+strideDims[0]*2];
}
}
}
}
output += outChannels_s;
}
}
} else if (dilated_kernel_x == 1 && dilated_kernel_y == 1) {
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
// If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output, output+outChannels_s, biasVal);
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = (inCh + outCh*inputDims[1]);
if (strideDims[0] == 1 && strideDims[1] == 1) {
for (std::size_t oIndex = 0; oIndex < oxSize*oySize; ++oIndex, ++iIndex) {
output[oIndex] += weights[wIndex] * input[iIndex];
}
} else {
for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex+=inputDims[3]*strideDims[0]) {
for (std::size_t oy = 0, iy = 0; oy < oySize; ++oy, iy+=strideDims[1]) {
output[oIndex + oy] += weights[wIndex+0]*input[iIndex+iy];
}
}
}
}
output += outChannels_s;
}
}
} else {
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
// If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output, output+outChannels_s, biasVal);
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
std::size_t iIndex_channel = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1];
// loop over each ouput line
for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex_channel+=inputDims[3]*strideDims[0]) {
// loop over associated input line
for (std::size_t ky = 0, ix = 0; ky < kernelDims[0]; ++ky, ix += inputDims[3]*dilationDims[0]) {
// loop over the entire line
for (std::size_t oy = 0, iy = 0; oy < oySize; ++oy, iy+=strideDims[1]) {
const std::size_t iIndex = iIndex_channel + ix + iy;
// loop over elements assosicated with one output
for (std::size_t kx = 0; kx < kernelDims[0]; ++kx) {
output[oIndex + oy] += weights[wIndex+kernelDims[0]*ky+kx]*input[iIndex+kx*dilationDims[1]];
}
}
}
}
}
output += outChannels_s;
}
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(ConvImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
......
......@@ -96,11 +96,11 @@ void GridSampleImpl1D_cpu_forward_kernel(const GridSample_Op& op,
const std::shared_ptr<Tensor>& in1,
const std::shared_ptr<Tensor>& out)
{
const I* const input = static_cast<const I * const>(in0->getImpl()->rawPtr());
const I* const input = static_cast<const I *>(in0->getImpl()->rawPtr());
const I* input_ptr = input;
float* const grid = static_cast<float* const>(in1->getImpl()->rawPtr());
float* const grid = static_cast<float*>(in1->getImpl()->rawPtr());
float* grid_ptr = grid;
O* const output = static_cast<O* const>(out->getImpl()->rawPtr());
O* const output = static_cast<O*>(out->getImpl()->rawPtr());
O* output_ptr = output;
const std::size_t N = in0->dim(0);
......@@ -243,9 +243,9 @@ void GridSampleImpl2D_cpu_forward_kernel(const GridSample_Op& op,
{
const I* input = static_cast<const I *>(in0->getImpl()->rawPtr());
const I* input_ptr = input;
float* const grid = static_cast<float* const>(in0->getImpl()->rawPtr());
float* const grid = static_cast<float*>(in0->getImpl()->rawPtr());
float* grid_ptr = grid;
O* const output = static_cast<O* const>(out->getImpl()->rawPtr());
O* const output = static_cast<O*>(out->getImpl()->rawPtr());
const std::size_t N = in0->dim(0);
const std::size_t C = in0->dim(1);
......
......@@ -38,8 +38,10 @@ public:
return impl.prodConso(mOp);
}
virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
return Registrar<OperatorImpl_cpu>::getKeys();
virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
// return Registrar<OperatorImpl_cpu>::getKeys(); // Note: cannot return set due to python binding
std::set<ImplSpec> implSpecsSet = Registrar<OperatorImpl_cpu>::getKeys();
return std::vector<ImplSpec>(implSpecsSet.begin(), implSpecsSet.end());
}
void forward() override;
......
......@@ -55,19 +55,19 @@ void PadImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 2>& beginEndBorder
O outputValue = static_cast<O>(borderValue);
if (borderType == PadBorderType::Constant) {
int ix = static_cast<int>(ox) - static_cast<int>(beginEndBorders[1]);
int ix = static_cast<int>(ox) - static_cast<int>(beginEndBorders[0]);
if (ix >= 0 && ix < static_cast<int>(dims[2])) {
outputValue = input[iIndex + static_cast<std::size_t>(ix)];
}
}
else if (borderType == PadBorderType::Edge) {
int ix = std::max(0, std::min(static_cast<int>(dims[2]) - 1, static_cast<int>(ox) - static_cast<int>(beginEndBorders[1])));
int ix = std::max(0, std::min(static_cast<int>(dims[2]) - 1, static_cast<int>(ox) - static_cast<int>(beginEndBorders[0])));
outputValue = input[iIndex + static_cast<std::size_t>(ix)];
}
else if (borderType == PadBorderType::Reflect) {
int ix = static_cast<int>(ox) - static_cast<int>(beginEndBorders[1]);
int ix = static_cast<int>(ox) - static_cast<int>(beginEndBorders[0]);
if (ix < 0)
ix = 0 - ix;
......@@ -77,7 +77,7 @@ void PadImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 2>& beginEndBorder
outputValue = input[iIndex + static_cast<std::size_t>(ix)];
}
else if (borderType == PadBorderType::Wrap) {
int ix = (static_cast<int>(dims[2]) + static_cast<int>(ox) - static_cast<int>(beginEndBorders[1])) % static_cast<int>(dims[2]);
int ix = (static_cast<int>(dims[2]) + static_cast<int>(ox) - static_cast<int>(beginEndBorders[0])) % static_cast<int>(dims[2]);
outputValue = input[iIndex + static_cast<std::size_t>(ix)];
}
......@@ -120,8 +120,8 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder
const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_);
const std::size_t oySize = dims[2] + beginEndBorders[0] + beginEndBorders[1];
const std::size_t oxSize = dims[3] + beginEndBorders[2] + beginEndBorders[3];
const std::size_t oySize = dims[2] + beginEndBorders[0] + beginEndBorders[2];
const std::size_t oxSize = dims[3] + beginEndBorders[1] + beginEndBorders[3];
for (std::size_t batch = 0; batch < dims[0]; ++batch) {
for (std::size_t ch = 0; ch < dims[1]; ++ch) {
......@@ -135,22 +135,22 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder
O outputValue = static_cast<O>(borderValue);
if (borderType == PadBorderType::Constant) {
std::int32_t ix = static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3]);
std::int32_t iy = static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1]);
std::int32_t ix = static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[1]);
std::int32_t iy = static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[0]);
if (ix >= 0 && ix < static_cast<std::int32_t>(dims[3]) && iy >= 0 && iy < static_cast<std::int32_t>(dims[2])) {
outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
}
}
else if (borderType == PadBorderType::Edge) {
std::int32_t ix = std::max(0, std::min(static_cast<std::int32_t>(dims[3]) - 1, static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3])));
std::int32_t iy = std::max(0, std::min(static_cast<std::int32_t>(dims[2]) - 1, static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1])));
std::int32_t ix = std::max(0, std::min(static_cast<std::int32_t>(dims[3]) - 1, static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[1])));
std::int32_t iy = std::max(0, std::min(static_cast<std::int32_t>(dims[2]) - 1, static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[0])));
outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
}
else if (borderType == PadBorderType::Reflect) {
std::int32_t ix = static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3]);
std::int32_t iy = static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1]);
std::int32_t ix = static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[1]);
std::int32_t iy = static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[0]);
if (ix < 0)
ix = 0 - ix;
......@@ -164,8 +164,8 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder
outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
}
else if (borderType == PadBorderType::Wrap) {
std::int32_t ix = (static_cast<std::int32_t>(dims[3]) + static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3])) % static_cast<std::int32_t>(dims[3]);
std::int32_t iy = (static_cast<std::int32_t>(dims[2]) + static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1])) % static_cast<std::int32_t>(dims[2]);
std::int32_t ix = (static_cast<std::int32_t>(dims[3]) + static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[1])) % static_cast<std::int32_t>(dims[3]);
std::int32_t iy = (static_cast<std::int32_t>(dims[2]) + static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[0])) % static_cast<std::int32_t>(dims[2]);
outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
}
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ROUNDIMPL_H_
#define AIDGE_CPU_OPERATOR_ROUNDIMPL_H_
#include <cstddef> // std::size_t
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Round.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using RoundImpl_cpu = OperatorImpl_cpu<Round_Op,
void(const std::size_t, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Round_Op, "cpu", Aidge::RoundImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ROUNDIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ROUNDIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_ROUNDIMPL_KERNELS_H_
#include <cmath> //std::round
#include <cstddef> // std::size_t
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/RoundImpl.hpp"
namespace Aidge {
template <class I, class O>
void RoundImpl_cpu_forward_kernel(const std::size_t inputLenght,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
for (std::size_t i = 0; i < inputLenght; ++i) {
//std::round would not work since it doesn't follow the halves rules (See ONNX Round)
output[i] = static_cast<O>(std::nearbyint(static_cast<float>(input[i])));
}
}
REGISTRAR(RoundImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::RoundImpl_cpu_forward_kernel<float, float>,nullptr});
REGISTRAR(RoundImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::RoundImpl_cpu_forward_kernel<double, double>,nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ROUNDIMPL_KERNELS_H_ */
......@@ -89,13 +89,13 @@ void SliceImpl_cpu_forward_kernel(const std::vector<std::int64_t>& starts,
}
REGISTRAR(SliceImpl_cpu,
{DataType::Float32},
{{DataType::Float32, DataType::Any}, {DataType::Float32}},
{ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(SliceImpl_cpu,
{DataType::Float64},
{{DataType::Float64, DataType::Any}, {DataType::Float64}},
{ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(SliceImpl_cpu,
{DataType::Int32},
{{DataType::Int32, DataType::Any}, {DataType::Int32}},
{ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr});
} // namespace Aidge
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <vector>
#include "aidge/operator/Atan.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/AtanImpl.hpp"
#include "aidge/backend/cpu/operator/AtanImpl_kernels.hpp"
template <>
void Aidge::AtanImpl_cpu::forward() {
const Atan_Op& op_ = dynamic_cast<const Atan_Op&>(mOp);
std::shared_ptr<Tensor> in0 = op_.getInput(0);
std::shared_ptr<Tensor> out0 = op_.getOutput(0);
AIDGE_ASSERT(in0, "missing input #0");
// Find the correct kernel type
const auto impl = Registrar<AtanImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
impl.forward(in0->size(),
getCPUPtr(mOp.getRawInput(0)),
getCPUPtr(mOp.getRawOutput(0)));
}
template <>
void Aidge::AtanImpl_cpu::backward() {
const Atan_Op& op_ = dynamic_cast<const Atan_Op&>(mOp);
std::shared_ptr<Tensor> out0 = op_.getOutput(0);
std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad();
std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad();
AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type());
// Find the correct kernel type
const auto impl = Registrar<AtanImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
impl.backward(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0));
}
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <memory>
#include <vector>
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Clip.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/backend/cpu/operator/ClipImpl.hpp"
#include "aidge/backend/cpu/operator/ClipImpl_kernels.hpp"
template<>
void Aidge::ClipImpl_cpu::forward() {
const Clip_Op& op_ = dynamic_cast<const Clip_Op&>(mOp);
std::shared_ptr<Tensor> in0 = op_.getInput(0);
std::shared_ptr<Tensor> out0 = op_.getOutput(0);
AIDGE_ASSERT(in0, "missing input #0");
/*AIDGE_ASSERT(in1, "missing input #1 -> Min value empty shape Tensor");
AIDGE_ASSERT(in2, "missing input #2 -> Max value empty shape Tensor");*/
// Find the correct kernel type
const auto impl = Registrar<ClipImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
impl.forward(
op_.min(),
op_.max(),
getCPUPtr(mOp.getRawInput(0)),
in0->size(),
getCPUPtr(mOp.getRawOutput(0))
);
}
template<>
void Aidge::ClipImpl_cpu::backward() {
const Clip_Op& op_ = dynamic_cast<const Clip_Op&>(mOp);
std::shared_ptr<Tensor> in0 = op_.getInput(0);
std::shared_ptr<Tensor> out0 = op_.getOutput(0);
std::shared_ptr<Tensor> gra_in0 = op_.getInput(0)->grad();
std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad();
AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type());
// Find the correct kernel type
const auto impl = Registrar<ClipImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
impl.backward(
op_.min(),
op_.max(),
gra_in0->size(),
getCPUPtr(in0),
getCPUPtr(gra_out0),
getCPUPtr(gra_in0)
);
}
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <memory>
#include <vector>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Round.hpp"
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/operator/RoundImpl.hpp"
#include "aidge/backend/cpu/operator/RoundImpl_kernels.hpp"
template <>
void Aidge::RoundImpl_cpu::forward() {
std::shared_ptr<Tensor> in0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0));
std::shared_ptr<Tensor> out0 = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0));
AIDGE_ASSERT(in0, "missing input #0");
// Find the correct kernel type
const auto impl = Registrar<RoundImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
impl.forward(in0->size(),
getCPUPtr(mOp.getRawInput(0)),
getCPUPtr(mOp.getRawOutput(0)));
}
template <>
void Aidge::RoundImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Round_Op on backend cpu");
}
\ No newline at end of file
......@@ -35,7 +35,7 @@ TEST_CASE("Test addition of Tensors","[TensorImpl][Add]") {
std::uniform_int_distribution<int> boolDist(0,1);
// Create MatMul Operator
std::shared_ptr<Node> mySub = Add(2);
std::shared_ptr<Node> mySub = Add();
auto op = std::static_pointer_cast<OperatorTensor>(mySub-> getOperator());
op->setDataType(DataType::Float32);
op->setBackend("cpu");
......@@ -154,6 +154,10 @@ TEST_CASE("Test addition of Tensors","[TensorImpl][Add]") {
Tensor T4(T1->dims());
T4.setDataType(DataType::Float64);
REQUIRE_THROWS(*T0 + T4);
delete[] array0;
delete[] array1;
delete[] result;
}
}
......
......@@ -39,17 +39,6 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") {
} //
}); //
SECTION("One input") {
std::shared_ptr<Node> myAdd = Add(1);
auto op = std::static_pointer_cast<OperatorTensor>(myAdd -> getOperator());
op->associateInput(0, input1);
op->setBackend("cpu");
op->setDataType(DataType::Int32);
myAdd->forward();
REQUIRE(*(op->getOutput(0)) == *input1);
}
SECTION("Two inputs") {
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
{
......@@ -71,7 +60,7 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") {
}
});
std::shared_ptr<Node> myAdd = Add(2);
std::shared_ptr<Node> myAdd = Add();
auto op = std::static_pointer_cast<OperatorTensor>(myAdd -> getOperator());
op->associateInput(0, input1);
op->associateInput(1, input1);
......@@ -82,39 +71,6 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") {
REQUIRE(*(op->getOutput(0)) == *expectedOutput);
}
SECTION("Three inputs") {
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
{
{
{{ 60, 141},{ 63, 144},{ 66, 147}},
{{ 69, 150},{ 72, 153},{ 75, 156}},
{{ 78, 159},{ 81, 162},{ 84, 165}}
},
{
{{ 87, 168},{ 90, 171},{ 93, 174}},
{{ 96, 177},{ 99, 180},{102, 183}},
{{105, 186},{108, 189},{111, 192}}
},
{
{{114, 195},{117, 198},{120, 201}},
{{123, 204},{126, 207},{129, 210}},
{{132, 213},{135, 216},{138, 219}}
}
}
});
std::shared_ptr<Node> myAdd = Add(3);
auto op = std::static_pointer_cast<OperatorTensor>(myAdd -> getOperator());
op->associateInput(0, input1);
op->associateInput(1, input1);
op->associateInput(2, input1);
op->setDataType(DataType::Int32);
op->setBackend("cpu");
myAdd->forward();
REQUIRE(*op->getOutput(0) == *expectedOutput);
}
SECTION("Broadcasting") {
std::shared_ptr<Tensor> input_0 = std::make_shared<Tensor>(Array4D<int,3,1,3,2> {
{ //
......@@ -139,7 +95,7 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") {
} //
}); //
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{100,200}});
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{100,200}});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
{ //
{ //
......@@ -160,16 +116,23 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") {
} //
}); //
std::shared_ptr<Node> myAdd = Add(3);
auto op = std::static_pointer_cast<OperatorTensor>(myAdd -> getOperator());
op->associateInput(0, input_0);
op->associateInput(1, input_1);
op->associateInput(2, input_2);
op->setDataType(DataType::Int32);
op->setBackend("cpu");
myAdd->forward();
op->getOutput(0)->print();
std::shared_ptr<Node> myAdd_0 = Add();
std::shared_ptr<Node> myAdd_1 = Add();
auto op_0 = std::static_pointer_cast<OperatorTensor>(myAdd_0 -> getOperator());
auto op_1 = std::static_pointer_cast<OperatorTensor>(myAdd_1 -> getOperator());
op_0->associateInput(0, input_0);
op_0->associateInput(1, input_1);
op_1->associateInput(0, input_2);
op_1->associateInput(1, op_0->getOutput(0));
op_0->setDataType(DataType::Int32);
op_1->setDataType(DataType::Int32);
op_0->setBackend("cpu");
op_1->setBackend("cpu");
myAdd_0->forward();
myAdd_1->forward();
op_1->getOutput(0)->print();
expectedOutput->print();
REQUIRE(*op->getOutput(0) == *expectedOutput);
REQUIRE(*op_1->getOutput(0) == *expectedOutput);
}
}
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Atan.hpp"
#include "aidge/backend/cpu.hpp"
#include <memory>
using namespace Aidge;
TEST_CASE("[cpu/operator] Atan(forward)") {
SECTION("1D Tensor") {
std::shared_ptr<Tensor> input0 =
std::make_shared<Tensor>(Array1D<float, 10>{
{0.41384590, 0.43120754, 0.93762982, 0.31049860, 0.77547199,
0.09514862, 0.16145366, 0.42776686, 0.43487436, 0.41170865}});
std::shared_ptr<Tensor> expectedOutput =
std::make_shared<Tensor>(Array1D<float, 10>{
{0.39238522, 0.40711672, 0.75322037, 0.30106049, 0.65960488,
0.09486303, 0.16007232, 0.40421187, 0.4102045, 0.39055911}});
std::shared_ptr<Node> myAtan = Atan();
auto op = std::static_pointer_cast<OperatorTensor>(myAtan->getOperator());
op->associateInput(0, input0);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
myAtan->forward();
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr =
static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i < expectedOutput->size(); ++i) {
REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001);
}
}
SECTION("3D Tensor") {
std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(
Array3D<float, 2, 2, 3>{{{
{0.97037154, 0.86208081, 0.77767169},
{0.38160080, 0.11422747, 0.77284443},
},
{{0.51592529, 0.72543722, 0.54641193},
{0.93866944, 0.97767913, 0.34172094}}}});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
Array3D<float, 2, 2, 3>{{{{0.77036231, 0.71146592, 0.66097706},
{0.36454508, 0.11373451, 0.65796196}},
{{0.47630652, 0.62759472, 0.50008428},
{0.75377332, 0.77411225, 0.32928031}}}});
std::shared_ptr<Node> myAtan = Atan();
auto op = std::static_pointer_cast<OperatorTensor>(myAtan->getOperator());
op->associateInput(0, input0);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
myAtan->forward();
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr =
static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i < expectedOutput->size(); ++i) {
REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001);
}
}
}