Skip to content
Snippets Groups Projects
Commit ab8dd8d8 authored by Olivier BICHLER's avatar Olivier BICHLER
Browse files

Merge branch 'fix_operators' into 'dev'

fix failed onnx tests

See merge request !130
parents 8a669993 f3de3e10
No related branches found
No related tags found
3 merge requests!166Update 0.5.0 -> 0.6.0,!136Add selection mechanism in graph,!130fix failed onnx tests
Pipeline #66369 waiting for manual action
Showing
with 816 additions and 289 deletions
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "aidge/backend/cpu/operator/ConvImpl.hpp" #include "aidge/backend/cpu/operator/ConvImpl.hpp"
#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp" #include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp"
#include "aidge/backend/cpu/operator/DivImpl.hpp" #include "aidge/backend/cpu/operator/DivImpl.hpp"
#include "aidge/backend/cpu/operator/EqualImpl.hpp"
#include "aidge/backend/cpu/operator/ErfImpl.hpp" #include "aidge/backend/cpu/operator/ErfImpl.hpp"
#include "aidge/backend/cpu/operator/ExpandImpl.hpp" #include "aidge/backend/cpu/operator/ExpandImpl.hpp"
#include "aidge/backend/cpu/operator/FCImpl.hpp" #include "aidge/backend/cpu/operator/FCImpl.hpp"
......
...@@ -20,7 +20,7 @@ namespace Aidge { ...@@ -20,7 +20,7 @@ namespace Aidge {
namespace { namespace {
// suppose values are contiguous in memory // suppose values are contiguous in memory
template <class I, class O> template <class I, class O>
void equal_contiguous_arrays(const std::size_t input1size, void and_contiguous_arrays(const std::size_t input1size,
const std::size_t input2size, const std::size_t input2size,
const std::size_t output1size, const std::size_t output1size,
const I* input1, const I* input1,
...@@ -31,14 +31,14 @@ void equal_contiguous_arrays(const std::size_t input1size, ...@@ -31,14 +31,14 @@ void equal_contiguous_arrays(const std::size_t input1size,
{ {
const std::size_t in1_id = (input1size != 1) ? i : 0; const std::size_t in1_id = (input1size != 1) ? i : 0;
const std::size_t in2_id = (input2size != 1) ? i : 0; const std::size_t in2_id = (input2size != 1) ? i : 0;
output[i] = static_cast<O>(input1[in1_id] == input2[in2_id]); output[i] = static_cast<O>(input1[in1_id] && input2[in2_id]);
} }
} }
} }
template <class I, class O> template <class I, class O>
void EqualImpl_cpu_forward_kernel(std::vector<std::size_t> dims0, void AndImpl_cpu_forward_kernel(std::vector<std::size_t> dims0,
std::vector<std::size_t> dims1, std::vector<std::size_t> dims1,
const std::vector<std::size_t>& outputDims, const std::vector<std::size_t>& outputDims,
const void* input0_, const void* input0_,
...@@ -60,9 +60,8 @@ void EqualImpl_cpu_forward_kernel(std::vector<std::size_t> dims0, ...@@ -60,9 +60,8 @@ void EqualImpl_cpu_forward_kernel(std::vector<std::size_t> dims0,
// special case for equal dimensions, the kernel is called with the entire arrays at once // special case for equal dimensions, the kernel is called with the entire arrays at once
if (dims0 == dims1) { if (dims0 == dims1) {
const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>()); const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t i = 0; i < input0_contiguous_size; ++i) for (std::size_t i = 0; i < input0_contiguous_size; ++i) {
{ output[i] = static_cast<O>(input_0[i] && input_1[i]);
output[i] = static_cast<O>(input_0[i] == input_1[i]);
} }
return; return;
} }
...@@ -126,7 +125,7 @@ void EqualImpl_cpu_forward_kernel(std::vector<std::size_t> dims0, ...@@ -126,7 +125,7 @@ void EqualImpl_cpu_forward_kernel(std::vector<std::size_t> dims0,
std::size_t dim = contiguousIdx - 1; std::size_t dim = contiguousIdx - 1;
const std::size_t nbStacks = std::accumulate(outputDims.cbegin(), outputDims.cbegin() + contiguousIdx, std::size_t(1), std::multiplies<std::size_t>()); const std::size_t nbStacks = std::accumulate(outputDims.cbegin(), outputDims.cbegin() + contiguousIdx, std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t stack = 0; stack < nbStacks;) { for (std::size_t stack = 0; stack < nbStacks;) {
equal_contiguous_arrays<I,O>(input0_contiguous_size, input1_contiguous_size, output_contiguous_size, and_contiguous_arrays<I,O>(input0_contiguous_size, input1_contiguous_size, output_contiguous_size,
input_0 + offsetIn0*input0_contiguous_size, input_0 + offsetIn0*input0_contiguous_size,
input_1 + offsetIn1*input1_contiguous_size, input_1 + offsetIn1*input1_contiguous_size,
output + offsetOut*output_contiguous_size); output + offsetOut*output_contiguous_size);
...@@ -146,17 +145,17 @@ void EqualImpl_cpu_forward_kernel(std::vector<std::size_t> dims0, ...@@ -146,17 +145,17 @@ void EqualImpl_cpu_forward_kernel(std::vector<std::size_t> dims0,
// Kernels registration to implementation entry point // Kernels registration to implementation entry point
REGISTRAR(AndImpl_cpu, REGISTRAR(AndImpl_cpu,
{DataType::Float32}, {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}},
{ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<float, float>, nullptr}); {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(AndImpl_cpu, REGISTRAR(AndImpl_cpu,
{DataType::Float64}, {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}},
{ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<double, double>, nullptr}); {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(AndImpl_cpu, REGISTRAR(AndImpl_cpu,
{DataType::Int32}, {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}},
{ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr}); {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr});
REGISTRAR(AndImpl_cpu, REGISTRAR(AndImpl_cpu,
{DataType::Int64}, {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int64}},
{ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, nullptr}); {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, nullptr});
} // namespace Aidge } // namespace Aidge
......
...@@ -28,8 +28,10 @@ namespace Aidge { ...@@ -28,8 +28,10 @@ namespace Aidge {
using AvgPooling2D_Op = AvgPooling_Op<2>; using AvgPooling2D_Op = AvgPooling_Op<2>;
using AvgPoolingImpl2D_cpu = OperatorImpl_cpu<AvgPooling_Op<2>, using AvgPoolingImpl2D_cpu = OperatorImpl_cpu<AvgPooling_Op<2>,
void(const std::array<DimSize_t, 2>&, void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&, const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 4>&, const std::array<DimSize_t, 4>&,
bool,
const void *, const void *,
void *)>; void *)>;
......
...@@ -35,66 +35,68 @@ namespace Aidge { ...@@ -35,66 +35,68 @@ namespace Aidge {
template <class I, class O> template <class I, class O>
void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& kernelDims, const std::array<DimSize_t, 2>& kernelDims,
const std::array<DimSize_t, 2>& dilations,
const std::array<DimSize_t, 4> &dims, const std::array<DimSize_t, 4> &dims,
bool ceilMode,
const void *input_, const void *input_,
void *output_) { void *output_) {
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_); const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_); O *output = static_cast<O *>(output_);
// output H size // output H size
const std::size_t oxSize = const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) / ceilMode
static_cast<float>(strideDims[0]))); ? static_cast<std::size_t>(std::ceil(static_cast<float>(dims[2] - (kernelDims[0] - 1) * dilations[0] - 1 + strideDims[0]) /
static_cast<float>(strideDims[0])))
: static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - (kernelDims[0] - 1) * dilations[0] - 1 + strideDims[0]) /
static_cast<float>(strideDims[0])));
// output W size // output W size
const std::size_t oySize = const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) / ceilMode
static_cast<float>(strideDims[1]))); ? static_cast<std::size_t>(std::ceil(static_cast<float>(dims[3] - (kernelDims[1] - 1) * dilations[1] - 1 + strideDims[1]) /
static_cast<float>(strideDims[1])))
// TODO: kernel computation : static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - (kernelDims[1] - 1) * dilations[1] - 1 + strideDims[1]) /
// output (batch, outCh, Xout, Yout) static_cast<float>(strideDims[1])));
// input (batch, ch, Xin, Yin)
// weight (outCh, ch, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type; using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < dims[0]; ++batch) { for (std::size_t batch = 0; batch < dims[0]; ++batch) {
for (std::size_t ch = 0; ch < dims[1]; ++ch) { for (std::size_t ch = 0; ch < dims[1]; ++ch) {
const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize; const std::size_t oIndex = (ch + batch * dims[1]) * oxSize * oySize;
const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; const std::size_t iIndex = (ch + batch * dims[1]) * dims[2] * dims[3];
std::fill(output + oIndex, output+(oIndex+oxSize*oySize), 0);
for (std::size_t ox = 0; ox < oxSize; ++ox) { for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); const signedsize difx = static_cast<signedsize>(-ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] : dims[2] + difx); const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] : dims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) { for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); const signedsize dify = static_cast<signedsize>(-oy * strideDims[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > kernelDims[1] ? kernelDims[1] : dims[3] + dify); const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > kernelDims[1] ? kernelDims[1] : dims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const std::size_t oIndexFull = oIndex + ox * oySize + oy;
const std::size_t ix = ox * strideDims[0]; const std::size_t ix = ox * strideDims[0];
const std::size_t iy = oy * strideDims[1]; const std::size_t iy = oy * strideDims[1];
if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { O sum = static_cast<O>(0);
output[oIndexFull] += static_cast<O>( std::size_t count = 0;
input[iIndex + (ix+0)*dims[3] + (iy+0)] +
input[iIndex + (ix+0)*dims[3] + (iy+1)] + for (unsigned int sy = syMin; sy < syMax; ++sy) {
input[iIndex + (ix+0)*dims[3] + (iy+2)] + for (unsigned int sx = sxMin; sx < sxMax; ++sx) {
input[iIndex + (ix+1)*dims[3] + (iy+0)] + // Apply dilation factor
input[iIndex + (ix+1)*dims[3] + (iy+1)] + const std::size_t dilated_sx = sx * dilations[0];
input[iIndex + (ix+1)*dims[3] + (iy+2)] + const std::size_t dilated_sy = sy * dilations[1];
input[iIndex + (ix+2)*dims[3] + (iy+0)] +
input[iIndex + (ix+2)*dims[3] + (iy+1)] + // Ensure within bounds
input[iIndex + (ix+2)*dims[3] + (iy+2)]) / O(9); if ((ix + dilated_sx) < dims[2] && (iy + dilated_sy) < dims[3]) {
} else { sum += static_cast<O>(input[iIndex + (ix + dilated_sx) * dims[3] + (iy + dilated_sy)]);
for (std::size_t sx = sxMin; sx < sxMax; ++sx) { ++count;
for (std::size_t sy = syMin; sy < syMax; ++sy) {
output[oIndexFull] += input[iIndex + (ix+sx)*dims[3] + (iy+sy)];
} }
} }
// padding not used
output[oIndexFull] /= (sxMax - sxMin) * (syMax - syMin);
} }
output[oIndexFull] = count > 0 ? sum / static_cast<O>(count) : 0;
} }
} }
} }
......
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_EQUALIMPL_H_
#define AIDGE_CPU_OPERATOR_EQUALIMPL_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Equal.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
namespace Aidge {
// Operator implementation entry point for the backend
using EqualImpl_cpu = OperatorImpl_cpu<Equal_Op,
void(std::vector<std::size_t>, std::vector<std::size_t>, const std::vector<std::size_t>&, const void*, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Equal_Op, "cpu", Aidge::EqualImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_EQUALIMPL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_EQUALIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_EQUALIMPL_KERNELS_H_
#include "aidge/backend/cpu/operator/EqualImpl.hpp"
#include "aidge/utils/Registrar.hpp"
namespace Aidge {
namespace {
// suppose values are contiguous in memory
template <class I, class O>
void equal_contiguous_arrays(const std::size_t input1size,
const std::size_t input2size,
const std::size_t output1size,
const I* input1,
const I* input2,
O* output)
{
for (std::size_t i = 0; i < output1size; ++i)
{
const std::size_t in1_id = (input1size != 1) ? i : 0;
const std::size_t in2_id = (input2size != 1) ? i : 0;
output[i] = static_cast<O>(input1[in1_id] == input2[in2_id]);
}
}
}
template <class I, class O>
void EqualImpl_cpu_forward_kernel(std::vector<std::size_t> dims0,
std::vector<std::size_t> dims1,
const std::vector<std::size_t>& outputDims,
const void* input0_,
const void* input1_,
void* output_) {
const I* input_0 = static_cast<const I*>(input0_);
const I* input_1 = static_cast<const I*>(input1_);
O* output = static_cast<O*>(output_);
// [5,2,1,7] & [2,6,7]
// 1. Same number of dimensions -> [5,2,1,7] & [1,2,6,7]
// 2. Find the highest equal dimension -> 3
// Exception: if the first diverging dimension is the last one, then -> 4 (dims.size())
// 3. Compute the highest number of contiguous data -> 7
// 4. Compute stride and offset step for the broadcast mechanism
// 5. Call a simple kernel
// special case for equal dimensions, the kernel is called with the entire arrays at once
if (dims0 == dims1) {
const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t i = 0; i < input0_contiguous_size; ++i)
{
output[i] = static_cast<O>(input_0[i] == input_1[i]);
}
return;
}
// set dimensions to be of equal size by filling the smallest one with ones.
if (dims0.size() > dims1.size()) {
dims1.insert(dims1.cbegin(), dims0.size() - dims1.size(), std::size_t(1));
}
else if (dims1.size() > dims0.size()) {
dims0.insert(dims0.cbegin(), dims1.size() - dims0.size(), std::size_t(1));
}
const std::size_t nbDims = dims0.size();
// Find the highest equal dimension
// std::size_t contiguousIdx = nbDims - 1;
std::size_t contiguousIdx = nbDims;
while (contiguousIdx-- > 0) {
// for (; contiguousIdx+1 > 0; --contiguousIdx) {
if (dims0[contiguousIdx] != dims1[contiguousIdx]) {
if (contiguousIdx == (nbDims -1)) { // last dimensions of one of the input Tensor are of size 1
const std::vector<std::size_t>& dims = (dims0[contiguousIdx] == 1) ? dims0 : dims1;
while ((contiguousIdx+1 > 0) && (dims[contiguousIdx] == 1)) {
--contiguousIdx;
}
}
break;
}
}
++contiguousIdx;
// Compute the highest number of contiguous data for each Tensor
const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin()+contiguousIdx, dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t input1_contiguous_size = std::accumulate(dims1.cbegin()+contiguousIdx, dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t output_contiguous_size = std::accumulate(outputDims.cbegin()+contiguousIdx, outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
// initialize strides to iterate through data because of broadcasting
std::unique_ptr<std::int32_t[]> stride_post0 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_post1 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_step0 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_step1 = std::make_unique<std::int32_t[]>(contiguousIdx);
if (contiguousIdx > 0) {
stride_post0[contiguousIdx - 1] = 1;
stride_post1[contiguousIdx - 1] = 1;
for (std::size_t i = contiguousIdx - 2; i != static_cast<std::size_t>(-1); --i) {
stride_post0[i] = stride_post0[i+1]*static_cast<std::int32_t>(dims0[i+1]);
stride_post1[i] = stride_post1[i+1]*static_cast<std::int32_t>(dims1[i+1]);
}
for (std::size_t i = 0; i != contiguousIdx; ++i) {
stride_step0[i] = (dims0[i] == 1) ? 1 - stride_post0[i] : 1;
stride_step1[i] = (dims1[i] == 1) ? 1 - stride_post1[i] : 1;
}
}
// variables for arrays offsets
std::size_t offsetIn0 = 0;
std::size_t offsetIn1 = 0;
std::size_t offsetOut = 0;
std::size_t dim = contiguousIdx - 1;
const std::size_t nbStacks = std::accumulate(outputDims.cbegin(), outputDims.cbegin() + contiguousIdx, std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t stack = 0; stack < nbStacks;) {
equal_contiguous_arrays<I,O>(input0_contiguous_size, input1_contiguous_size, output_contiguous_size,
input_0 + offsetIn0*input0_contiguous_size,
input_1 + offsetIn1*input1_contiguous_size,
output + offsetOut*output_contiguous_size);
if (++stack < nbStacks) {
std::size_t tmp_stack = stack;
while(tmp_stack % outputDims[dim] == 0) {
tmp_stack /= outputDims[dim];
dim--;
}
offsetIn0 += stride_step0[dim];
offsetIn1 += stride_step1[dim];
++offsetOut;
dim = contiguousIdx - 1;
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(EqualImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}},
{ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(EqualImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}},
{ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(EqualImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}},
{ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr});
REGISTRAR(EqualImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int64}},
{ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_EQUALIMPL_KERNELS_H_ */
...@@ -28,6 +28,7 @@ namespace Aidge { ...@@ -28,6 +28,7 @@ namespace Aidge {
using MaxPooling2D_Op = MaxPooling_Op<2>; using MaxPooling2D_Op = MaxPooling_Op<2>;
using MaxPoolingImpl2D_cpu = OperatorImpl_cpu<MaxPooling_Op<2>, using MaxPoolingImpl2D_cpu = OperatorImpl_cpu<MaxPooling_Op<2>,
void(const std::array<DimSize_t, 2>&, void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&, const std::array<DimSize_t, 2>&,
const bool, const bool,
const std::array<DimSize_t, 4> &, const std::array<DimSize_t, 4> &,
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <cmath> #include <cmath>
#include <tuple> #include <tuple>
#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" #include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/data/Data.hpp" #include "aidge/data/Data.hpp"
...@@ -35,28 +36,29 @@ namespace Aidge { ...@@ -35,28 +36,29 @@ namespace Aidge {
template <class I, class O> template <class I, class O>
void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& kernelDims, const std::array<DimSize_t, 2>& kernelDims,
const bool /*ceilMode*/, const std::array<DimSize_t, 2>& dilations,
const bool ceilMode,
const std::array<DimSize_t, 4> &dims, const std::array<DimSize_t, 4> &dims,
const void *input_, const void *input_,
void *output_) { void *output_) {
// FIXME: missing convolution parameters as arguments
const I *input = static_cast<const I *>(input_); const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_); O *output = static_cast<O *>(output_);
// output H size // output H size
const std::size_t oxSize = const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) / ceilMode
static_cast<float>(strideDims[0]))); ? static_cast<std::size_t>(std::ceil(static_cast<float>(dims[2] - (kernelDims[0] - 1) * dilations[0] - 1 + strideDims[0]) /
static_cast<float>(strideDims[0])))
: static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - (kernelDims[0] - 1) * dilations[0] - 1 + strideDims[0]) /
static_cast<float>(strideDims[0])));
// output W size // output W size
const std::size_t oySize = const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) / ceilMode
static_cast<float>(strideDims[1]))); ? static_cast<std::size_t>(std::ceil(static_cast<float>(dims[3] - (kernelDims[1] - 1) * dilations[1] - 1 + strideDims[1]) /
static_cast<float>(strideDims[1])))
: static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - (kernelDims[1] - 1) * dilations[1] - 1 + strideDims[1]) /
static_cast<float>(strideDims[1])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, ch, Xin, Yin)
// weight (outCh, ch, kernelX, kernelY)
// does not take Dilation parameter into account
using signedsize = std::make_signed<std::size_t>::type; using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < dims[0]; ++batch) { for (std::size_t batch = 0; batch < dims[0]; ++batch) {
for (std::size_t ch = 0; ch < dims[1]; ++ch) { for (std::size_t ch = 0; ch < dims[1]; ++ch) {
...@@ -77,12 +79,15 @@ void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD ...@@ -77,12 +79,15 @@ void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD
I poolValue(0.0); I poolValue(0.0);
bool valid = false; bool valid = false;
for (unsigned int channel = 0; channel < dims[1]; for (unsigned int sy = syMin; sy < syMax; ++sy) {
++channel){ for (unsigned int sx = sxMin; sx < sxMax; ++sx) {
for (unsigned int sy = syMin; sy < syMax; ++sy) { // Apply dilation factor to kernel indices
for (unsigned int sx = sxMin; sx < sxMax; ++sx) const std::size_t dilated_sx = sx * dilations[0];
{ const std::size_t dilated_sy = sy * dilations[1];
const I value = input[iIndex + (ix+sx)*dims[3] + (iy+sy)];
// Ensure indices are within bounds
if ((ix + dilated_sx) < dims[2] && (iy + dilated_sy) < dims[3]) {
const I value = input[iIndex + (ix + dilated_sx) * dims[3] + (iy + dilated_sy)];
if (!valid || value > poolValue) { if (!valid || value > poolValue) {
poolValue = value; poolValue = value;
...@@ -98,106 +103,6 @@ void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD ...@@ -98,106 +103,6 @@ void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD
} }
} }
//N2D2 version
/*
template <class T>
void N2D2::PoolCell_Frame_Kernels::forwardMax(const T* alpha,
const Tensor<T>&
inputs,
const Descriptor& desc,
const T* beta,
Tensor<T>& outputs,
Tensor<ArgMax>& argMax,
bool useArgMax,
const Tensor<bool>& maps)
{
const unsigned int size = inputs.dimB() * outputs.dimZ();
#if defined(_OPENMP) && _OPENMP >= 200805
#pragma omp parallel for collapse(2) if (size > 16)
#else
#pragma omp parallel for if (inputs.dimB() > 4 && size > 16)
#endif
for (int batchPos = 0; batchPos < (int)inputs.dimB(); ++batchPos) {
for (unsigned int output = 0; output < outputs.dimZ(); ++output) {
for (unsigned int oy = 0; oy < outputs.dimY(); ++oy) {
for (unsigned int ox = 0; ox < outputs.dimX(); ++ox) {
const unsigned int sxMin = (unsigned int)std::max(
desc.padding[0] - (int)(ox * desc.stride[0]), 0);
const unsigned int syMin = (unsigned int)std::max(
desc.padding[1] - (int)(oy * desc.stride[1]), 0);
const unsigned int sxMax = Utils::clamp
<int>(inputs.dimX() + desc.padding[0] - ox * desc.stride[0],
0,
desc.pool[0]);
const unsigned int syMax = Utils::clamp
<int>(inputs.dimY() + desc.padding[1] - oy * desc.stride[1],
0,
desc.pool[1]);
const int ix = (int)(ox * desc.stride[0]) - desc.padding[0];
const int iy = (int)(oy * desc.stride[1]) - desc.padding[1];
T poolValue(0.0);
// For each output, compute the pool value
if (useArgMax) {
const ArgMax inputMax
= argMax(ox, oy, output, batchPos);
if (inputMax.valid) {
poolValue = inputs(inputMax.ix,
inputMax.iy,
inputMax.channel,
batchPos);
}
}
else {
unsigned int ixMax = 0;
unsigned int iyMax = 0;
unsigned int channelMax = 0;
bool valid = false;
for (unsigned int channel = 0; channel < inputs.dimZ();
++channel)
{
if (!maps.empty() && !maps(output, channel))
continue;
for (unsigned int sy = syMin; sy < syMax; ++sy) {
for (unsigned int sx = sxMin; sx < sxMax; ++sx)
{
const T value = inputs(ix + sx,
iy + sy,
channel,
batchPos);
if (!valid || value > poolValue) {
poolValue = value;
valid = true;
ixMax = ix + sx;
iyMax = iy + sy;
channelMax = channel;
}
}
}
}
argMax(ox, oy, output, batchPos)
= ArgMax(ixMax, iyMax, channelMax, valid);
}
outputs(ox, oy, output, batchPos)
= (*alpha) * poolValue
+ (*beta) * outputs(ox, oy, output, batchPos);
}
}
}
}
}
*/
// Kernels registration to implementation entry point // Kernels registration to implementation entry point
REGISTRAR(MaxPoolingImpl2D_cpu, REGISTRAR(MaxPoolingImpl2D_cpu,
......
...@@ -32,7 +32,9 @@ void Aidge::AvgPoolingImpl2D_cpu::forward() { ...@@ -32,7 +32,9 @@ void Aidge::AvgPoolingImpl2D_cpu::forward() {
// Call kernel // Call kernel
impl.forward(op_.strideDims(), impl.forward(op_.strideDims(),
op_.kernelDims(), op_.kernelDims(),
op_.dilations(),
op_.getInput(0)->template dims<4>(), op_.getInput(0)->template dims<4>(),
op_.ceilMode(),
getCPUPtr(op_.getInput(0)), getCPUPtr(op_.getInput(0)),
getCPUPtr(op_.getOutput(0))); getCPUPtr(op_.getOutput(0)));
} }
......
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <vector>
#include "aidge/operator/Equal.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/Broadcasting.hpp"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/EqualImpl.hpp"
#include "aidge/backend/cpu/operator/EqualImpl_kernels.hpp"
template <>
void Aidge::EqualImpl_cpu::forward() {
const Equal_Op& op = static_cast<const Equal_Op&>(mOp);
// Check inputs
AIDGE_ASSERT(op.getInput(0), "missing input in Equal operator");
AIDGE_ASSERT(op.getInput(0)->hasImpl(), "cannot run Equal forward because the 0-th input has no implementation.");
AIDGE_ASSERT(op.getInput(1), "missing input in Equal operator");
AIDGE_ASSERT(op.getInput(1)->hasImpl(), "cannot run Equal forward because the 1st input has no implementation.");
AIDGE_ASSERT(op.getInput(1)->dataType() == op.getInput(0)->dataType(), "Cannot Equal inputs with two differents data type.");
// Find the correct kernel type
const auto impl = Registrar<EqualImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Convert input data (no overhead if not needed!)
// TODO: right now, if needed, memory will be allocated/deallocated at each
// call to forward(). We might put the following shared_ptr as members of
// this class to avoid that.
std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback;
const auto& input0 = op.getInput(0)->refCastFrom(input0Fallback, *op.getInput(0));
const auto& input1 = op.getInput(1)->refCastFrom(input1Fallback, *op.getInput(1));
impl.forward(op.getInput(0)->dims(),
op.getInput(1)->dims(),
op.getOutput(0)->dims(),
input0.getImpl()->rawPtr(),
input1.getImpl()->rawPtr(),
getCPUPtr(op.getRawOutput(0)));
}
template <>
void Aidge::EqualImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Equal_Op on backend cpu");
}
...@@ -30,6 +30,7 @@ void Aidge::MaxPoolingImpl2D_cpu::forward() { ...@@ -30,6 +30,7 @@ void Aidge::MaxPoolingImpl2D_cpu::forward() {
// Call kernel // Call kernel
impl.forward(op_.strideDims(), impl.forward(op_.strideDims(),
op_.kernelDims(), op_.kernelDims(),
op_.dilations(),
op_.ceilMode(), op_.ceilMode(),
op_.getInput(0)->template dims<4>(), op_.getInput(0)->template dims<4>(),
getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawInput(0)),
......
...@@ -26,75 +26,92 @@ ...@@ -26,75 +26,92 @@
using namespace Aidge; using namespace Aidge;
TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") { TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") {
SECTION("ForwardDims") SECTION("ForwardDims") {
{
constexpr std::uint16_t NBTRIALS = 10; constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator // Create a random number generator
std::random_device rd; std::random_device rd;
std::mt19937 gen(rd()); std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 std::uniform_int_distribution<int> boolDist(0, 1); // Use 0 for false, 1 for true
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); std::uniform_int_distribution<std::size_t> dimSizeDist(2, 10);
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); std::uniform_int_distribution<std::size_t> nbDimsDist(1, 5);
std::uniform_int_distribution<int> boolDist(0,1);
SECTION("Same dimensions") { SECTION("Same dimensions") {
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
DimSize_t nbDims = nbDimsDist(gen); DimSize_t nbDims = nbDimsDist(gen);
std::vector<DimSize_t> dims(nbDims); std::vector<DimSize_t> dims(nbDims);
for (std::size_t i = 0; i < nbDims; i++) { for (std::size_t i = 0; i < nbDims; ++i) {
dims[i] = dimSizeDist(gen); dims[i] = dimSizeDist(gen);
} }
const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
float* array0 = new float[nb_elements];
float* array1 = new float[nb_elements];
for (std::size_t i = 0; i < nb_elements; ++i) {
array0[i] = boolDist(gen);
array1[i] = boolDist(gen);
}
std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims); std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims);
myInput1->setBackend("cpu");
myInput1->setDataType(DataType::Float32);
myInput1->zeros();
std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims); std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims);
myInput2->setBackend("cpu"); myInput1->setDataType(DataType::Float32);
myInput2->setDataType(DataType::Float32); myInput2->setDataType(DataType::Float32);
myInput2->zeros(); myInput1->setBackend("cpu");
myInput2->setBackend("cpu");
myInput1 -> getImpl() -> setRawPtr(array0, nb_elements);
myInput2 -> getImpl() -> setRawPtr(array1, nb_elements);
std::shared_ptr<Node> myAnd = And(); std::shared_ptr<Node> myAnd = And();
auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); auto op = std::static_pointer_cast<OperatorTensor>(myAnd->getOperator());
op->associateInput(0,myInput1); op->associateInput(0, myInput1);
op->associateInput(1,myInput2); op->associateInput(1, myInput2);
op->setDataType(DataType::Float32); op->setDataType(DataType::Float32);
op->setBackend("cpu"); op->setBackend("cpu");
op->forwardDims(); op->forwardDims();
const auto outputDims = op->getOutput(0)->dims(); const auto outputDims = op->getOutput(0)->dims();
REQUIRE(outputDims == dims); REQUIRE(outputDims == dims);
delete[] array0;
delete[] array1;
} }
} }
SECTION("Broadcasting") { SECTION("Broadcasting") {
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
DimSize_t nbDims = nbDimsDist(gen); DimSize_t nbDims = nbDimsDist(gen);
std::vector<DimSize_t> dims1(nbDims, 1); std::vector<DimSize_t> dims1(nbDims, 1);
std::vector<DimSize_t> dims2(nbDims, 1); std::vector<DimSize_t> dims2(nbDims, 1);
std::vector<DimSize_t> expectedOutDims; std::vector<DimSize_t> expectedOutDims;
for (std::size_t i = 0; i < nbDims; i++) { for (std::size_t i = 0; i < nbDims; ++i) {
DimSize_t dim = dimSizeDist(gen); DimSize_t dim = dimSizeDist(gen);
if (boolDist(gen)) { if (boolDist(gen)) dims1[i] = dim;
dims1[i] = dim; if (boolDist(gen)) dims2[i] = dim;
} expectedOutDims.push_back(std::max(dims1[i], dims2[i]));
if (boolDist(gen)) {
dims2[i] = dim;
}
expectedOutDims.push_back(std::max(dims1[i],dims2[i]));
} }
const std::size_t nb_elements0 = std::accumulate(dims1.cbegin(), dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t nb_elements1 = std::accumulate(dims2.cbegin(), dims2.cend(), std::size_t(1), std::multiplies<std::size_t>());
float* array0 = new float[nb_elements0];
float* array1 = new float[nb_elements1];
for (std::size_t i = 0; i < nb_elements0; ++i) {
array0[i] = boolDist(gen);
}
for (std::size_t i = 0; i < nb_elements1; ++i) {
array1[i] = boolDist(gen);
}
std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims1); std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims1);
myInput1->setBackend("cpu");
myInput1->setDataType(DataType::Float32);
myInput1->zeros();
std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims2); std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims2);
myInput2->setBackend("cpu"); myInput1->setDataType(DataType::Float32);
myInput2->setDataType(DataType::Float32); myInput2->setDataType(DataType::Float32);
myInput2->zeros(); myInput1->setBackend("cpu");
myInput2->setBackend("cpu");
myInput1 -> getImpl() -> setRawPtr(array0, nb_elements0);
myInput2 -> getImpl() -> setRawPtr(array1, nb_elements1);
std::shared_ptr<Node> myAnd = And(); std::shared_ptr<Node> myAnd = And();
auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); auto op = std::static_pointer_cast<OperatorTensor>(myAnd->getOperator());
op->associateInput(0,myInput1); op->associateInput(0, myInput1);
op->associateInput(1,myInput2); op->associateInput(1, myInput2);
op->setDataType(DataType::Float32); op->setDataType(DataType::Float32);
op->setBackend("cpu"); op->setBackend("cpu");
...@@ -102,110 +119,67 @@ TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") { ...@@ -102,110 +119,67 @@ TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") {
const auto outputDims = op->getOutput(0)->dims(); const auto outputDims = op->getOutput(0)->dims();
REQUIRE(outputDims == expectedOutDims); REQUIRE(outputDims == expectedOutDims);
delete[] array0;
delete[] array1;
} }
} }
} }
SECTION("Same size inputs") { SECTION("Same size inputs") {
std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<float, 2, 2, 2, 2>{
{ //
{ //
{{20, 15},{31, 11},{22, 49}}, //
{{41, 10},{24, 51},{27, 52}}, //
{{26, 53},{27, 54},{28, 55}} //
}, //
{ //
{{29, 56},{30, 57},{31, 58}}, //
{{32, 59},{33, 60},{34, 61}}, //
{{35, 62},{36, 63},{37, 64}} //
}, //
{ //
{{38, 65},{39, 66},{40, 67}}, //
{{41, 68},{42, 69},{43, 70}}, //
{{44, 71},{45, 72},{46, 73}} //
} //
} //
}); //
std::shared_ptr<Tensor> input2 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
{ //
{ //
{{20, 47},{21, 48},{22, 49}}, //
{{23, 50},{24, 51},{25, 52}}, //
{{17, 53},{27, 26},{14, 33}} //
}, //
{ //
{{29, 56},{30, 57},{31, 58}}, //
{{72, 44},{33, 20},{27, 55}}, //
{{35, 24},{25, 63},{28, 64}} //
}, //
{ //
{{32, 65},{39, 66},{40, 70}}, //
{{41, 53},{42, 60},{34, 70}}, //
{{44, 71},{30, 12},{46, 73}} //
} //
} //
}); //
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
{ {
{ {{{1, 0}, {0, 1}},
{{1, 0},{0, 0},{1, 1}}, {{1, 1}, {0, 0}}},
{{0, 0},{1, 1},{0, 1}}, {{{0, 1}, {1, 0}},
{{0, 1},{1, 0},{0, 0}} {{1, 0}, {0, 1}}}}
}, });
{ std::shared_ptr<Tensor> input2 = std::make_shared<Tensor>(Array4D<float, 2, 2, 2, 2>{
{{1, 1},{1, 1},{1, 1}}, {
{{0, 0},{1, 0},{0, 0}}, {{{1, 1}, {0, 0}},
{{1, 0},{0, 1},{0, 1}} {{0, 1}, {1, 1}}},
}, {{{1, 1}, {0, 0}},
{ {{0, 1}, {1, 0}}}}
{{0, 1},{1, 1},{1, 0}}, });
{{1, 0},{1, 0},{0, 1}}, std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float, 2, 2, 2, 2>{
{{1, 1},{0, 0},{1, 1}} {
} {{{1, 0}, {0, 0}},
} {{0, 1}, {0, 0}}},
}); {{{0, 1}, {0, 0}},
{{0, 0}, {0, 0}}}}
});
std::shared_ptr<Node> myAnd = And(); std::shared_ptr<Node> myAnd = And();
auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); auto op = std::static_pointer_cast<OperatorTensor>(myAnd->getOperator());
op->associateInput(0, input1); op->associateInput(0, input1);
op->associateInput(1, input2); op->associateInput(1, input2);
op->setBackend("cpu"); op->setBackend("cpu");
op->setDataType(DataType::Int32); op->setDataType(DataType::Float32);
myAnd->forward(); myAnd->forward();
op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == *expectedOutput); REQUIRE(*(op->getOutput(0)) == *expectedOutput);
} }
SECTION("Broadcasting") { SECTION("Broadcasting") {
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<float, 1, 2, 2, 2>{
{ // {
{ // {{{1, 0}, {1, 0}},
{{10, 20},{22, 23},{20, 20}}, // {{1, 1}, {0, 0}}}}
{{10, 15},{10, 29},{20, 20}}, // });
{{26, 25},{33, 20},{10, 20}} // std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float, 2>{{1, 0}});
} // std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float, 1, 2, 2, 2>{
} // {
}); // {{{1, 0}, {1, 0}},
{{1, 0}, {0, 0}}}}
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{10, 20}}); });
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,1,3,3,2> {
{ //
{ //
{{ 1, 1},{ 0, 0},{ 0, 1}}, //
{{ 1, 0},{ 1, 0},{ 0, 1}}, //
{{ 0, 0},{ 0, 1},{ 1, 1}} //
} //
} //
}); //
std::shared_ptr<Node> myAnd = And(); std::shared_ptr<Node> myAnd = And();
auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); auto op = std::static_pointer_cast<OperatorTensor>(myAnd->getOperator());
op->associateInput(0, input_1); op->associateInput(0, input_1);
op->associateInput(1, input_2); op->associateInput(1, input_2);
op->setDataType(DataType::Int32); op->setDataType(DataType::Float32);
op->setBackend("cpu"); op->setBackend("cpu");
myAnd->forward(); myAnd->forward();
op->getOutput(0)->print();
expectedOutput->print(); REQUIRE(*(op->getOutput(0)) == *expectedOutput);
REQUIRE(*op->getOutput(0) == *expectedOutput);
} }
} }
\ No newline at end of file
...@@ -110,5 +110,95 @@ TEST_CASE("[cpu/operator] AvgPooling(forward)", "[AvgPooling][CPU]") { ...@@ -110,5 +110,95 @@ TEST_CASE("[cpu/operator] AvgPooling(forward)", "[AvgPooling][CPU]") {
REQUIRE(std::abs(outPtr[i] - expectedOutPtr[i]) < 0.00001); REQUIRE(std::abs(outPtr[i] - expectedOutPtr[i]) < 0.00001);
} }
} }
// std::cout << static_cast<Tensor>((*op)["weight"])[0][0][0][0] << std::endl; SECTION("Dilations") {
std::shared_ptr<Tensor> myInput3 = std::make_shared<Tensor>(Array4D<float,1,1,5,5> { // NCHW
{
{
{{ 1, 2, 3, 4, 5},
{ 6, 7, 8, 9, 10},
{11, 12, 13, 14, 15},
{16, 17, 18, 19, 20},
{21, 22, 23, 24, 25}}
}
}
});
// Dilation of 2 means we take every second element in the window
std::shared_ptr<Node> myAvgPool = AvgPooling({2,2}, "mycdw", {1,1}, {2,2});
auto op = std::static_pointer_cast<AvgPooling_Op<2>>(myAvgPool -> getOperator());
std::shared_ptr<Tensor> myOutput3 = std::make_shared<Tensor>(Array4D<float,1,1,3,3> {
{
{
{{ 7, 8, 9},
{ 12, 13, 14},
{ 17, 18, 19}}
}
}
});
op->associateInput(0, myInput3);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
myAvgPool->forward();
op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == *myOutput3);
}
SECTION("Ceil Mode") {
std::shared_ptr<Tensor> myInput4 = std::make_shared<Tensor>(Array4D<float,1,1,5,5> { // NCHW
{
{
{
{ 1, 2, 3, 4, 5},
{ 6, 7, 8, 9, 10},
{11, 12, 13, 14, 15},
{16, 17, 18, 19, 20},
{21, 22, 23, 24, 25}
}
}
}
});
// AvgPool with ceil_mode = true
std::shared_ptr<Node> myAvgPool1 = AvgPooling({2,2}, "mycdw", {2,2}, {1,1}, true);
auto op1 = std::static_pointer_cast<AvgPooling_Op<2>>(myAvgPool1 -> getOperator());
std::shared_ptr<Tensor> myOutput4 = std::make_shared<Tensor>(Array4D<float,1,1,3,3> {
{
{
{
{ 4.0, 6.0, 7.5 },
{ 14.0, 16.0, 17.5 },
{ 21.5, 23.5, 25.0 }
}
}
}
});
op1->associateInput(0, myInput4);
op1->setDataType(DataType::Float32);
op1->setBackend("cpu");
myAvgPool1->forward();
op1->getOutput(0)->print();
REQUIRE(*(op1->getOutput(0)) == *myOutput4);
// AvgPool with ceil_mode = false
std::shared_ptr<Node> myAvgPool2 = AvgPooling({2,2}, "mycdw", {2,2}, {1,1}, false);
auto op2 = std::static_pointer_cast<AvgPooling_Op<2>>(myAvgPool2 -> getOperator());
std::shared_ptr<Tensor> myOutput5 = std::make_shared<Tensor>(Array4D<float,1,1,2,2> {
{
{
{
{ 4.0, 6.0 },
{ 14.0, 16.0 }
}
}
}
});
op2->associateInput(0, myInput4);
op2->setDataType(DataType::Float32);
op2->setBackend("cpu");
myAvgPool2->forward();
op2->getOutput(0)->print();
REQUIRE(*(op2->getOutput(0)) == *myOutput5);
}
} }
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Equal.hpp"
using namespace Aidge;
TEST_CASE("[cpu/operator] Equal(forwardDims)", "[Equal][CPU]") {
constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
std::uniform_int_distribution<int> boolDist(0,1);
SECTION("Same dimensions") {
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
DimSize_t nbDims = nbDimsDist(gen);
std::vector<DimSize_t> dims(nbDims);
for (std::size_t i = 0; i < nbDims; i++) {
dims[i] = dimSizeDist(gen);
}
std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims);
myInput1->setBackend("cpu");
myInput1->setDataType(DataType::Float32);
myInput1->zeros();
std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims);
myInput2->setBackend("cpu");
myInput2->setDataType(DataType::Float32);
myInput2->zeros();
std::shared_ptr<Node> myEqual = Equal();
auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator());
op->associateInput(0,myInput1);
op->associateInput(1,myInput2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forwardDims();
const auto outputDims = op->getOutput(0)->dims();
REQUIRE(outputDims == dims);
}
}
SECTION("Broadcasting") {
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
DimSize_t nbDims = nbDimsDist(gen);
std::vector<DimSize_t> dims1(nbDims, 1);
std::vector<DimSize_t> dims2(nbDims, 1);
std::vector<DimSize_t> expectedOutDims;
for (std::size_t i = 0; i < nbDims; i++) {
DimSize_t dim = dimSizeDist(gen);
if (boolDist(gen)) {
dims1[i] = dim;
}
if (boolDist(gen)) {
dims2[i] = dim;
}
expectedOutDims.push_back(std::max(dims1[i],dims2[i]));
}
std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims1);
myInput1->setBackend("cpu");
myInput1->setDataType(DataType::Float32);
myInput1->zeros();
std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims2);
myInput2->setBackend("cpu");
myInput2->setDataType(DataType::Float32);
myInput2->zeros();
std::shared_ptr<Node> myEqual = Equal();
auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator());
op->associateInput(0,myInput1);
op->associateInput(1,myInput2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forwardDims();
const auto outputDims = op->getOutput(0)->dims();
REQUIRE(outputDims == expectedOutDims);
}
}
}
TEST_CASE("[cpu/operator] Equal(forward)", "[Equal][CPU]") {
SECTION("Same size inputs") {
std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
{ //
{ //
{{20, 15},{31, 11},{22, 49}}, //
{{41, 10},{24, 51},{27, 52}}, //
{{26, 53},{27, 54},{28, 55}} //
}, //
{ //
{{29, 56},{30, 57},{31, 58}}, //
{{32, 59},{33, 60},{34, 61}}, //
{{35, 62},{36, 63},{37, 64}} //
}, //
{ //
{{38, 65},{39, 66},{40, 67}}, //
{{41, 68},{42, 69},{43, 70}}, //
{{44, 71},{45, 72},{46, 73}} //
} //
} //
}); //
std::shared_ptr<Tensor> input2 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
{ //
{ //
{{20, 47},{21, 48},{22, 49}}, //
{{23, 50},{24, 51},{25, 52}}, //
{{17, 53},{27, 26},{14, 33}} //
}, //
{ //
{{29, 56},{30, 57},{31, 58}}, //
{{72, 44},{33, 20},{27, 55}}, //
{{35, 24},{25, 63},{28, 64}} //
}, //
{ //
{{32, 65},{39, 66},{40, 70}}, //
{{41, 53},{42, 60},{34, 70}}, //
{{44, 71},{30, 12},{46, 73}} //
} //
} //
}); //
Tensor expectedOutput =Tensor(Array4D<int,3,3,3,2> {
{
{
{{1, 0},{0, 0},{1, 1}},
{{0, 0},{1, 1},{0, 1}},
{{0, 1},{1, 0},{0, 0}}
},
{
{{1, 1},{1, 1},{1, 1}},
{{0, 0},{1, 0},{0, 0}},
{{1, 0},{0, 1},{0, 1}}
},
{
{{0, 1},{1, 1},{1, 0}},
{{1, 0},{1, 0},{0, 1}},
{{1, 1},{0, 0},{1, 1}}
}
}
});
std::shared_ptr<Node> myEqual = Equal();
auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator());
op->associateInput(0, input1);
op->associateInput(1, input2);
op->setBackend("cpu");
op->setDataType(DataType::Int32);
myEqual->forward();
REQUIRE(*(op->getOutput(0)) == expectedOutput);
}
SECTION("Broadcasting") {
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<int,1,3,3,2> {
{ //
{ //
{{10, 20},{22, 23},{20, 20}}, //
{{10, 15},{10, 29},{20, 20}}, //
{{26, 25},{33, 20},{10, 20}} //
} //
} //
}); //
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{10, 20}});
Tensor expectedOutput = Tensor(Array4D<int,1,3,3,2> {
{ //
{ //
{{ 1, 1},{ 0, 0},{ 0, 1}}, //
{{ 1, 0},{ 1, 0},{ 0, 1}}, //
{{ 0, 0},{ 0, 1},{ 1, 1}} //
} //
} //
}); //
std::shared_ptr<Node> myEqual = Equal();
auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator());
op->associateInput(0, input_1);
op->associateInput(1, input_2);
op->setDataType(DataType::Int32);
op->setBackend("cpu");
myEqual->forward();
op->getOutput(0)->print();
REQUIRE(*op->getOutput(0) == expectedOutput);
}
}
\ No newline at end of file
...@@ -80,4 +80,96 @@ TEST_CASE("[cpu/operator] MaxPooling(forward)", "[MaxPooling][CPU]") { ...@@ -80,4 +80,96 @@ TEST_CASE("[cpu/operator] MaxPooling(forward)", "[MaxPooling][CPU]") {
op->getOutput(0)->print(); op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == myOutput); REQUIRE(*(op->getOutput(0)) == myOutput);
} }
SECTION("Dilation") {
std::shared_ptr<Node> myMaxPool = MaxPooling({2,2}, "mycdw", {2,2}, {2,2}); // Dilation 2x2
auto op = std::static_pointer_cast<OperatorTensor>(myMaxPool -> getOperator());
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<float,2,2,2,2> {
{
{
{
{0.71470, 0.52770},
{0.71470, 0.48740}
},
{
{2.23290, 0.48590},
{2.23290, 0.07000}
}
},
{
{
{1.76530, 1.20710},
{1.76530, 1.20710}
},
{
{1.04290, 0.67760},
{1.72170, 0.67760}
}
}
}
});
myMaxPool->getOperator()->associateInput(0,myInput);
myMaxPool->getOperator()->setDataType(DataType::Float32);
myMaxPool->getOperator()->setBackend("cpu");
myMaxPool->forward();
op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == *myOutput);
}
SECTION("Ceil Mode") {
std::shared_ptr<Tensor> myInput4 = std::make_shared<Tensor>(Array4D<float,1,1,5,5> { // NCHW
{
{
{
{ 1, 2, 3, 4, 5},
{ 6, 7, 8, 9, 10},
{11, 12, 13, 14, 15},
{16, 17, 18, 19, 20},
{21, 22, 23, 24, 25}
}
}
}
});
// MaxPool with ceil_mode = true
std::shared_ptr<Node> myMaxPool1 = MaxPooling({2,2}, "mycdw", {2,2}, {1,1}, true);
auto op1 = std::static_pointer_cast<OperatorTensor>(myMaxPool1 -> getOperator());
std::shared_ptr<Tensor> myOutput4 = std::make_shared<Tensor>(Array4D<float,1,1,3,3> {
{
{
{
{ 7.0, 9.0, 10.0 },
{ 17.0, 19.0, 20.0 },
{ 22.0, 24.0, 25.0 }
}
}
}
});
op1->associateInput(0, myInput4);
op1->setDataType(DataType::Float32);
op1->setBackend("cpu");
myMaxPool1->forward();
op1->getOutput(0)->print();
REQUIRE(*(op1->getOutput(0)) == *myOutput4);
// MaxPool with ceil_mode = false
std::shared_ptr<Node> myMaxPool2 = MaxPooling({2,2}, "mycdw", {2,2}, {1,1}, false);
auto op2 = std::static_pointer_cast<OperatorTensor>(myMaxPool2 -> getOperator());
std::shared_ptr<Tensor> myOutput5 = std::make_shared<Tensor>(Array4D<float,1,1,2,2> {
{
{
{
{ 7.0, 9.0 },
{ 17.0, 19.0 }
}
}
}
});
op2->associateInput(0, myInput4);
op2->setDataType(DataType::Float32);
op2->setBackend("cpu");
myMaxPool2->forward();
op2->getOutput(0)->print();
REQUIRE(*(op2->getOutput(0)) == *myOutput5);
}
} }
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment