diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index 5db19a2b7a2f88dae13d8baf24cf95f961e730a0..ffc03ae5d6cb1d44637bc223ce4099af88f08070 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -29,6 +29,7 @@ #include "aidge/backend/cpu/operator/ConvImpl.hpp" #include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp" #include "aidge/backend/cpu/operator/DivImpl.hpp" +#include "aidge/backend/cpu/operator/EqualImpl.hpp" #include "aidge/backend/cpu/operator/ErfImpl.hpp" #include "aidge/backend/cpu/operator/ExpandImpl.hpp" #include "aidge/backend/cpu/operator/FCImpl.hpp" diff --git a/include/aidge/backend/cpu/operator/AndImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AndImpl_kernels.hpp index 73b710e021ac5031923eb1e9a2492502c02a3633..d7c8ebcf19f64cb60aa2b62f312f4b46351e6ec2 100644 --- a/include/aidge/backend/cpu/operator/AndImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AndImpl_kernels.hpp @@ -20,7 +20,7 @@ namespace Aidge { namespace { // suppose values are contiguous in memory template <class I, class O> -void equal_contiguous_arrays(const std::size_t input1size, +void and_contiguous_arrays(const std::size_t input1size, const std::size_t input2size, const std::size_t output1size, const I* input1, @@ -31,14 +31,14 @@ void equal_contiguous_arrays(const std::size_t input1size, { const std::size_t in1_id = (input1size != 1) ? i : 0; const std::size_t in2_id = (input2size != 1) ? i : 0; - output[i] = static_cast<O>(input1[in1_id] == input2[in2_id]); + output[i] = static_cast<O>(input1[in1_id] && input2[in2_id]); } } } template <class I, class O> -void EqualImpl_cpu_forward_kernel(std::vector<std::size_t> dims0, +void AndImpl_cpu_forward_kernel(std::vector<std::size_t> dims0, std::vector<std::size_t> dims1, const std::vector<std::size_t>& outputDims, const void* input0_, @@ -60,9 +60,8 @@ void EqualImpl_cpu_forward_kernel(std::vector<std::size_t> dims0, // special case for equal dimensions, the kernel is called with the entire arrays at once if (dims0 == dims1) { const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>()); - for (std::size_t i = 0; i < input0_contiguous_size; ++i) - { - output[i] = static_cast<O>(input_0[i] == input_1[i]); + for (std::size_t i = 0; i < input0_contiguous_size; ++i) { + output[i] = static_cast<O>(input_0[i] && input_1[i]); } return; } @@ -126,7 +125,7 @@ void EqualImpl_cpu_forward_kernel(std::vector<std::size_t> dims0, std::size_t dim = contiguousIdx - 1; const std::size_t nbStacks = std::accumulate(outputDims.cbegin(), outputDims.cbegin() + contiguousIdx, std::size_t(1), std::multiplies<std::size_t>()); for (std::size_t stack = 0; stack < nbStacks;) { - equal_contiguous_arrays<I,O>(input0_contiguous_size, input1_contiguous_size, output_contiguous_size, + and_contiguous_arrays<I,O>(input0_contiguous_size, input1_contiguous_size, output_contiguous_size, input_0 + offsetIn0*input0_contiguous_size, input_1 + offsetIn1*input1_contiguous_size, output + offsetOut*output_contiguous_size); @@ -146,17 +145,17 @@ void EqualImpl_cpu_forward_kernel(std::vector<std::size_t> dims0, // Kernels registration to implementation entry point REGISTRAR(AndImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<float, float>, nullptr}); + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}}, + {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<float, float>, nullptr}); REGISTRAR(AndImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<double, double>, nullptr}); + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}}, + {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<double, double>, nullptr}); REGISTRAR(AndImpl_cpu, - {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr}); + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}}, + {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr}); REGISTRAR(AndImpl_cpu, - {DataType::Int64}, - {ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, nullptr}); + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int64}}, + {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, nullptr}); } // namespace Aidge diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp index adea96ca43a1ad9d2a49777426913ca4676e4f32..7c76657f7d100255f49ab1e675672407c5fbbaf8 100644 --- a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp +++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp @@ -28,8 +28,10 @@ namespace Aidge { using AvgPooling2D_Op = AvgPooling_Op<2>; using AvgPoolingImpl2D_cpu = OperatorImpl_cpu<AvgPooling_Op<2>, void(const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, const std::array<DimSize_t, 2>&, const std::array<DimSize_t, 4>&, + bool, const void *, void *)>; diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp index f6da9dcb026101b93de862499d42ae8734532d52..78f8446a1acd86a8d569dc0afb22a2517bcfede0 100644 --- a/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp @@ -35,66 +35,68 @@ namespace Aidge { template <class I, class O> void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, const std::array<DimSize_t, 2>& kernelDims, + const std::array<DimSize_t, 2>& dilations, const std::array<DimSize_t, 4> &dims, + bool ceilMode, const void *input_, void *output_) { - // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); O *output = static_cast<O *>(output_); - // output H size - const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) / - static_cast<float>(strideDims[0]))); + const std::size_t oxSize = + ceilMode + ? static_cast<std::size_t>(std::ceil(static_cast<float>(dims[2] - (kernelDims[0] - 1) * dilations[0] - 1 + strideDims[0]) / + static_cast<float>(strideDims[0]))) + : static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - (kernelDims[0] - 1) * dilations[0] - 1 + strideDims[0]) / + static_cast<float>(strideDims[0]))); // output W size - const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) / - static_cast<float>(strideDims[1]))); - - // TODO: kernel computation - // output (batch, outCh, Xout, Yout) - // input (batch, ch, Xin, Yin) - // weight (outCh, ch, kernelX, kernelY) - // does not take Dilation attribute into account + const std::size_t oySize = + ceilMode + ? static_cast<std::size_t>(std::ceil(static_cast<float>(dims[3] - (kernelDims[1] - 1) * dilations[1] - 1 + strideDims[1]) / + static_cast<float>(strideDims[1]))) + : static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - (kernelDims[1] - 1) * dilations[1] - 1 + strideDims[1]) / + static_cast<float>(strideDims[1]))); + using signedsize = std::make_signed<std::size_t>::type; + for (std::size_t batch = 0; batch < dims[0]; ++batch) { for (std::size_t ch = 0; ch < dims[1]; ++ch) { - const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize; - const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; - std::fill(output + oIndex, output+(oIndex+oxSize*oySize), 0); + const std::size_t oIndex = (ch + batch * dims[1]) * oxSize * oySize; + const std::size_t iIndex = (ch + batch * dims[1]) * dims[2] * dims[3]; + for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); + const signedsize difx = static_cast<signedsize>(-ox * strideDims[0]); const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] : dims[2] + difx); + for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); + const signedsize dify = static_cast<signedsize>(-oy * strideDims[1]); const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > kernelDims[1] ? kernelDims[1] : dims[3] + dify); - const std::size_t oIndexFull = oIndex + ox*oySize + oy; + + const std::size_t oIndexFull = oIndex + ox * oySize + oy; const std::size_t ix = ox * strideDims[0]; const std::size_t iy = oy * strideDims[1]; - if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { - output[oIndexFull] += static_cast<O>( - input[iIndex + (ix+0)*dims[3] + (iy+0)] + - input[iIndex + (ix+0)*dims[3] + (iy+1)] + - input[iIndex + (ix+0)*dims[3] + (iy+2)] + - input[iIndex + (ix+1)*dims[3] + (iy+0)] + - input[iIndex + (ix+1)*dims[3] + (iy+1)] + - input[iIndex + (ix+1)*dims[3] + (iy+2)] + - input[iIndex + (ix+2)*dims[3] + (iy+0)] + - input[iIndex + (ix+2)*dims[3] + (iy+1)] + - input[iIndex + (ix+2)*dims[3] + (iy+2)]) / O(9); - } else { - for (std::size_t sx = sxMin; sx < sxMax; ++sx) { - for (std::size_t sy = syMin; sy < syMax; ++sy) { - output[oIndexFull] += input[iIndex + (ix+sx)*dims[3] + (iy+sy)]; + O sum = static_cast<O>(0); + std::size_t count = 0; + + for (unsigned int sy = syMin; sy < syMax; ++sy) { + for (unsigned int sx = sxMin; sx < sxMax; ++sx) { + // Apply dilation factor + const std::size_t dilated_sx = sx * dilations[0]; + const std::size_t dilated_sy = sy * dilations[1]; + + // Ensure within bounds + if ((ix + dilated_sx) < dims[2] && (iy + dilated_sy) < dims[3]) { + sum += static_cast<O>(input[iIndex + (ix + dilated_sx) * dims[3] + (iy + dilated_sy)]); + ++count; } } - // padding not used - output[oIndexFull] /= (sxMax - sxMin) * (syMax - syMin); } + + output[oIndexFull] = count > 0 ? sum / static_cast<O>(count) : 0; } } } diff --git a/include/aidge/backend/cpu/operator/EqualImpl.hpp b/include/aidge/backend/cpu/operator/EqualImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e2489096067a49139f6291898056d525a77db522 --- /dev/null +++ b/include/aidge/backend/cpu/operator/EqualImpl.hpp @@ -0,0 +1,32 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_EQUALIMPL_H_ +#define AIDGE_CPU_OPERATOR_EQUALIMPL_H_ + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/Equal.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include <memory> +#include <vector> + +namespace Aidge { +// Operator implementation entry point for the backend +using EqualImpl_cpu = OperatorImpl_cpu<Equal_Op, + void(std::vector<std::size_t>, std::vector<std::size_t>, const std::vector<std::size_t>&, const void*, const void*, void*)>; + +// Implementation entry point registration to Operator +REGISTRAR(Equal_Op, "cpu", Aidge::EqualImpl_cpu::create); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_EQUALIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/EqualImpl_kernels.hpp b/include/aidge/backend/cpu/operator/EqualImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3c8ff0f4742e0393efd8cbbf637822c443edffb3 --- /dev/null +++ b/include/aidge/backend/cpu/operator/EqualImpl_kernels.hpp @@ -0,0 +1,163 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_EQUALIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_EQUALIMPL_KERNELS_H_ + +#include "aidge/backend/cpu/operator/EqualImpl.hpp" +#include "aidge/utils/Registrar.hpp" + +namespace Aidge { + +namespace { +// suppose values are contiguous in memory +template <class I, class O> +void equal_contiguous_arrays(const std::size_t input1size, + const std::size_t input2size, + const std::size_t output1size, + const I* input1, + const I* input2, + O* output) +{ + for (std::size_t i = 0; i < output1size; ++i) + { + const std::size_t in1_id = (input1size != 1) ? i : 0; + const std::size_t in2_id = (input2size != 1) ? i : 0; + output[i] = static_cast<O>(input1[in1_id] == input2[in2_id]); + } +} +} + + +template <class I, class O> +void EqualImpl_cpu_forward_kernel(std::vector<std::size_t> dims0, + std::vector<std::size_t> dims1, + const std::vector<std::size_t>& outputDims, + const void* input0_, + const void* input1_, + void* output_) { + + const I* input_0 = static_cast<const I*>(input0_); + const I* input_1 = static_cast<const I*>(input1_); + O* output = static_cast<O*>(output_); + + // [5,2,1,7] & [2,6,7] + // 1. Same number of dimensions -> [5,2,1,7] & [1,2,6,7] + // 2. Find the highest equal dimension -> 3 + // Exception: if the first diverging dimension is the last one, then -> 4 (dims.size()) + // 3. Compute the highest number of contiguous data -> 7 + // 4. Compute stride and offset step for the broadcast mechanism + // 5. Call a simple kernel + + // special case for equal dimensions, the kernel is called with the entire arrays at once + if (dims0 == dims1) { + const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>()); + for (std::size_t i = 0; i < input0_contiguous_size; ++i) + { + output[i] = static_cast<O>(input_0[i] == input_1[i]); + } + return; + } + + // set dimensions to be of equal size by filling the smallest one with ones. + if (dims0.size() > dims1.size()) { + dims1.insert(dims1.cbegin(), dims0.size() - dims1.size(), std::size_t(1)); + } + else if (dims1.size() > dims0.size()) { + dims0.insert(dims0.cbegin(), dims1.size() - dims0.size(), std::size_t(1)); + } + + const std::size_t nbDims = dims0.size(); + + // Find the highest equal dimension + // std::size_t contiguousIdx = nbDims - 1; + std::size_t contiguousIdx = nbDims; + while (contiguousIdx-- > 0) { + // for (; contiguousIdx+1 > 0; --contiguousIdx) { + if (dims0[contiguousIdx] != dims1[contiguousIdx]) { + if (contiguousIdx == (nbDims -1)) { // last dimensions of one of the input Tensor are of size 1 + const std::vector<std::size_t>& dims = (dims0[contiguousIdx] == 1) ? dims0 : dims1; + while ((contiguousIdx+1 > 0) && (dims[contiguousIdx] == 1)) { + --contiguousIdx; + } + } + break; + } + } + ++contiguousIdx; + + // Compute the highest number of contiguous data for each Tensor + const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin()+contiguousIdx, dims0.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t input1_contiguous_size = std::accumulate(dims1.cbegin()+contiguousIdx, dims1.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t output_contiguous_size = std::accumulate(outputDims.cbegin()+contiguousIdx, outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + + // initialize strides to iterate through data because of broadcasting + std::unique_ptr<std::int32_t[]> stride_post0 = std::make_unique<std::int32_t[]>(contiguousIdx); + std::unique_ptr<std::int32_t[]> stride_post1 = std::make_unique<std::int32_t[]>(contiguousIdx); + std::unique_ptr<std::int32_t[]> stride_step0 = std::make_unique<std::int32_t[]>(contiguousIdx); + std::unique_ptr<std::int32_t[]> stride_step1 = std::make_unique<std::int32_t[]>(contiguousIdx); + if (contiguousIdx > 0) { + stride_post0[contiguousIdx - 1] = 1; + stride_post1[contiguousIdx - 1] = 1; + for (std::size_t i = contiguousIdx - 2; i != static_cast<std::size_t>(-1); --i) { + stride_post0[i] = stride_post0[i+1]*static_cast<std::int32_t>(dims0[i+1]); + stride_post1[i] = stride_post1[i+1]*static_cast<std::int32_t>(dims1[i+1]); + } + for (std::size_t i = 0; i != contiguousIdx; ++i) { + stride_step0[i] = (dims0[i] == 1) ? 1 - stride_post0[i] : 1; + stride_step1[i] = (dims1[i] == 1) ? 1 - stride_post1[i] : 1; + } + } + + // variables for arrays offsets + std::size_t offsetIn0 = 0; + std::size_t offsetIn1 = 0; + std::size_t offsetOut = 0; + + + std::size_t dim = contiguousIdx - 1; + const std::size_t nbStacks = std::accumulate(outputDims.cbegin(), outputDims.cbegin() + contiguousIdx, std::size_t(1), std::multiplies<std::size_t>()); + for (std::size_t stack = 0; stack < nbStacks;) { + equal_contiguous_arrays<I,O>(input0_contiguous_size, input1_contiguous_size, output_contiguous_size, + input_0 + offsetIn0*input0_contiguous_size, + input_1 + offsetIn1*input1_contiguous_size, + output + offsetOut*output_contiguous_size); + if (++stack < nbStacks) { + std::size_t tmp_stack = stack; + while(tmp_stack % outputDims[dim] == 0) { + tmp_stack /= outputDims[dim]; + dim--; + } + offsetIn0 += stride_step0[dim]; + offsetIn1 += stride_step1[dim]; + ++offsetOut; + dim = contiguousIdx - 1; + } + } +} + +// Kernels registration to implementation entry point +REGISTRAR(EqualImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}}, + {ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<float, float>, nullptr}); +REGISTRAR(EqualImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}}, + {ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<double, double>, nullptr}); +REGISTRAR(EqualImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}}, + {ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr}); +REGISTRAR(EqualImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int64}}, + {ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, nullptr}); + +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_EQUALIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp index 68cc3621514de97d9837e10bcf90218abe559aaa..062088a13c0fcca8116a08f71457e6b0b01d1a65 100644 --- a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp +++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp @@ -28,6 +28,7 @@ namespace Aidge { using MaxPooling2D_Op = MaxPooling_Op<2>; using MaxPoolingImpl2D_cpu = OperatorImpl_cpu<MaxPooling_Op<2>, void(const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, const std::array<DimSize_t, 2>&, const bool, const std::array<DimSize_t, 4> &, diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp index 7b6f04f141eb701849a8d436561bcf9e37471cfa..027fc02a3e88d90082ade06bae53356f1a676094 100644 --- a/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp @@ -16,6 +16,7 @@ #include <cmath> #include <tuple> + #include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/data/Data.hpp" @@ -35,28 +36,29 @@ namespace Aidge { template <class I, class O> void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, const std::array<DimSize_t, 2>& kernelDims, - const bool /*ceilMode*/, + const std::array<DimSize_t, 2>& dilations, + const bool ceilMode, const std::array<DimSize_t, 4> &dims, const void *input_, void *output_) { - // FIXME: missing convolution parameters as arguments const I *input = static_cast<const I *>(input_); O *output = static_cast<O *>(output_); // output H size - const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) / - static_cast<float>(strideDims[0]))); + const std::size_t oxSize = + ceilMode + ? static_cast<std::size_t>(std::ceil(static_cast<float>(dims[2] - (kernelDims[0] - 1) * dilations[0] - 1 + strideDims[0]) / + static_cast<float>(strideDims[0]))) + : static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - (kernelDims[0] - 1) * dilations[0] - 1 + strideDims[0]) / + static_cast<float>(strideDims[0]))); // output W size - const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) / - static_cast<float>(strideDims[1]))); + const std::size_t oySize = + ceilMode + ? static_cast<std::size_t>(std::ceil(static_cast<float>(dims[3] - (kernelDims[1] - 1) * dilations[1] - 1 + strideDims[1]) / + static_cast<float>(strideDims[1]))) + : static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - (kernelDims[1] - 1) * dilations[1] - 1 + strideDims[1]) / + static_cast<float>(strideDims[1]))); - // TODO: kernel computation - // output (batch, outCh, Xout, Yout) - // input (batch, ch, Xin, Yin) - // weight (outCh, ch, kernelX, kernelY) - // does not take Dilation parameter into account using signedsize = std::make_signed<std::size_t>::type; for (std::size_t batch = 0; batch < dims[0]; ++batch) { for (std::size_t ch = 0; ch < dims[1]; ++ch) { @@ -77,12 +79,15 @@ void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD I poolValue(0.0); bool valid = false; - for (unsigned int channel = 0; channel < dims[1]; - ++channel){ - for (unsigned int sy = syMin; sy < syMax; ++sy) { - for (unsigned int sx = sxMin; sx < sxMax; ++sx) - { - const I value = input[iIndex + (ix+sx)*dims[3] + (iy+sy)]; + for (unsigned int sy = syMin; sy < syMax; ++sy) { + for (unsigned int sx = sxMin; sx < sxMax; ++sx) { + // Apply dilation factor to kernel indices + const std::size_t dilated_sx = sx * dilations[0]; + const std::size_t dilated_sy = sy * dilations[1]; + + // Ensure indices are within bounds + if ((ix + dilated_sx) < dims[2] && (iy + dilated_sy) < dims[3]) { + const I value = input[iIndex + (ix + dilated_sx) * dims[3] + (iy + dilated_sy)]; if (!valid || value > poolValue) { poolValue = value; @@ -98,106 +103,6 @@ void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD } } -//N2D2 version -/* -template <class T> -void N2D2::PoolCell_Frame_Kernels::forwardMax(const T* alpha, - const Tensor<T>& - inputs, - const Descriptor& desc, - const T* beta, - Tensor<T>& outputs, - Tensor<ArgMax>& argMax, - bool useArgMax, - const Tensor<bool>& maps) -{ - const unsigned int size = inputs.dimB() * outputs.dimZ(); - -#if defined(_OPENMP) && _OPENMP >= 200805 -#pragma omp parallel for collapse(2) if (size > 16) -#else -#pragma omp parallel for if (inputs.dimB() > 4 && size > 16) -#endif - for (int batchPos = 0; batchPos < (int)inputs.dimB(); ++batchPos) { - for (unsigned int output = 0; output < outputs.dimZ(); ++output) { - for (unsigned int oy = 0; oy < outputs.dimY(); ++oy) { - for (unsigned int ox = 0; ox < outputs.dimX(); ++ox) { - const unsigned int sxMin = (unsigned int)std::max( - desc.padding[0] - (int)(ox * desc.stride[0]), 0); - const unsigned int syMin = (unsigned int)std::max( - desc.padding[1] - (int)(oy * desc.stride[1]), 0); - const unsigned int sxMax = Utils::clamp - <int>(inputs.dimX() + desc.padding[0] - ox * desc.stride[0], - 0, - desc.pool[0]); - const unsigned int syMax = Utils::clamp - <int>(inputs.dimY() + desc.padding[1] - oy * desc.stride[1], - 0, - desc.pool[1]); - - const int ix = (int)(ox * desc.stride[0]) - desc.padding[0]; - const int iy = (int)(oy * desc.stride[1]) - desc.padding[1]; - - T poolValue(0.0); - - // For each output, compute the pool value - if (useArgMax) { - const ArgMax inputMax - = argMax(ox, oy, output, batchPos); - - if (inputMax.valid) { - poolValue = inputs(inputMax.ix, - inputMax.iy, - inputMax.channel, - batchPos); - } - } - else { - unsigned int ixMax = 0; - unsigned int iyMax = 0; - unsigned int channelMax = 0; - bool valid = false; - - for (unsigned int channel = 0; channel < inputs.dimZ(); - ++channel) - { - if (!maps.empty() && !maps(output, channel)) - continue; - - for (unsigned int sy = syMin; sy < syMax; ++sy) { - for (unsigned int sx = sxMin; sx < sxMax; ++sx) - { - const T value = inputs(ix + sx, - iy + sy, - channel, - batchPos); - - if (!valid || value > poolValue) { - poolValue = value; - valid = true; - - ixMax = ix + sx; - iyMax = iy + sy; - channelMax = channel; - } - } - } - } - - argMax(ox, oy, output, batchPos) - = ArgMax(ixMax, iyMax, channelMax, valid); - } - - outputs(ox, oy, output, batchPos) - = (*alpha) * poolValue - + (*beta) * outputs(ox, oy, output, batchPos); - } - } - } - } -} - -*/ // Kernels registration to implementation entry point REGISTRAR(MaxPoolingImpl2D_cpu, diff --git a/src/operator/AvgPoolingImpl.cpp b/src/operator/AvgPoolingImpl.cpp index 01a5e8cf1772161f5cf98d3a8bd52f43ac7a1d0d..eb5ef87bd16620cdef33330bd7b39ece1783ecfc 100644 --- a/src/operator/AvgPoolingImpl.cpp +++ b/src/operator/AvgPoolingImpl.cpp @@ -32,7 +32,9 @@ void Aidge::AvgPoolingImpl2D_cpu::forward() { // Call kernel impl.forward(op_.strideDims(), op_.kernelDims(), + op_.dilations(), op_.getInput(0)->template dims<4>(), + op_.ceilMode(), getCPUPtr(op_.getInput(0)), getCPUPtr(op_.getOutput(0))); } diff --git a/src/operator/EqualImpl.cpp b/src/operator/EqualImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5926212e8453a32e54de7691343d44f9c6849a05 --- /dev/null +++ b/src/operator/EqualImpl.cpp @@ -0,0 +1,61 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <chrono> // std::chrono::milliseconds +#include <numeric> // std::accumulate +#include <thread> // std::this_thread::sleep_for +#include <vector> + +#include "aidge/operator/Equal.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/Broadcasting.hpp" +#include "aidge/backend/cpu/data/GetCPUPtr.h" + +#include "aidge/backend/cpu/operator/EqualImpl.hpp" +#include "aidge/backend/cpu/operator/EqualImpl_kernels.hpp" + +template <> +void Aidge::EqualImpl_cpu::forward() { + const Equal_Op& op = static_cast<const Equal_Op&>(mOp); + // Check inputs + AIDGE_ASSERT(op.getInput(0), "missing input in Equal operator"); + AIDGE_ASSERT(op.getInput(0)->hasImpl(), "cannot run Equal forward because the 0-th input has no implementation."); + + AIDGE_ASSERT(op.getInput(1), "missing input in Equal operator"); + AIDGE_ASSERT(op.getInput(1)->hasImpl(), "cannot run Equal forward because the 1st input has no implementation."); + + AIDGE_ASSERT(op.getInput(1)->dataType() == op.getInput(0)->dataType(), "Cannot Equal inputs with two differents data type."); + + // Find the correct kernel type + const auto impl = Registrar<EqualImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. + std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; + const auto& input0 = op.getInput(0)->refCastFrom(input0Fallback, *op.getInput(0)); + const auto& input1 = op.getInput(1)->refCastFrom(input1Fallback, *op.getInput(1)); + + + impl.forward(op.getInput(0)->dims(), + op.getInput(1)->dims(), + op.getOutput(0)->dims(), + input0.getImpl()->rawPtr(), + input1.getImpl()->rawPtr(), + getCPUPtr(op.getRawOutput(0))); +} + +template <> +void Aidge::EqualImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Equal_Op on backend cpu"); +} diff --git a/src/operator/MaxPoolingImpl.cpp b/src/operator/MaxPoolingImpl.cpp index 90075a397be3f082ef95fd4df074c99d926fd385..13ef75b0eb92cea700e5b487a06046fc9285d5ad 100644 --- a/src/operator/MaxPoolingImpl.cpp +++ b/src/operator/MaxPoolingImpl.cpp @@ -30,6 +30,7 @@ void Aidge::MaxPoolingImpl2D_cpu::forward() { // Call kernel impl.forward(op_.strideDims(), op_.kernelDims(), + op_.dilations(), op_.ceilMode(), op_.getInput(0)->template dims<4>(), getCPUPtr(mOp.getRawInput(0)), diff --git a/unit_tests/operator/Test_AndImpl.cpp b/unit_tests/operator/Test_AndImpl.cpp index c2309dce5f32862ad9aeceaf98430b75ab7be6ef..148298d5f44b9f7744ab18bcfc5ce675f77a784c 100644 --- a/unit_tests/operator/Test_AndImpl.cpp +++ b/unit_tests/operator/Test_AndImpl.cpp @@ -26,75 +26,92 @@ using namespace Aidge; TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") { - SECTION("ForwardDims") - { + SECTION("ForwardDims") { constexpr std::uint16_t NBTRIALS = 10; // Create a random number generator std::random_device rd; std::mt19937 gen(rd()); - std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 - std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); - std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); - std::uniform_int_distribution<int> boolDist(0,1); + std::uniform_int_distribution<int> boolDist(0, 1); // Use 0 for false, 1 for true + std::uniform_int_distribution<std::size_t> dimSizeDist(2, 10); + std::uniform_int_distribution<std::size_t> nbDimsDist(1, 5); SECTION("Same dimensions") { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { DimSize_t nbDims = nbDimsDist(gen); std::vector<DimSize_t> dims(nbDims); - for (std::size_t i = 0; i < nbDims; i++) { + for (std::size_t i = 0; i < nbDims; ++i) { dims[i] = dimSizeDist(gen); } - + const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + float* array0 = new float[nb_elements]; + float* array1 = new float[nb_elements]; + for (std::size_t i = 0; i < nb_elements; ++i) { + array0[i] = boolDist(gen); + array1[i] = boolDist(gen); + } std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims); - myInput1->setBackend("cpu"); - myInput1->setDataType(DataType::Float32); - myInput1->zeros(); std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims); - myInput2->setBackend("cpu"); + myInput1->setDataType(DataType::Float32); myInput2->setDataType(DataType::Float32); - myInput2->zeros(); + myInput1->setBackend("cpu"); + myInput2->setBackend("cpu"); + + myInput1 -> getImpl() -> setRawPtr(array0, nb_elements); + myInput2 -> getImpl() -> setRawPtr(array1, nb_elements); + std::shared_ptr<Node> myAnd = And(); - auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); - op->associateInput(0,myInput1); - op->associateInput(1,myInput2); + auto op = std::static_pointer_cast<OperatorTensor>(myAnd->getOperator()); + op->associateInput(0, myInput1); + op->associateInput(1, myInput2); op->setDataType(DataType::Float32); op->setBackend("cpu"); op->forwardDims(); const auto outputDims = op->getOutput(0)->dims(); REQUIRE(outputDims == dims); + delete[] array0; + delete[] array1; } } + SECTION("Broadcasting") { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { DimSize_t nbDims = nbDimsDist(gen); std::vector<DimSize_t> dims1(nbDims, 1); std::vector<DimSize_t> dims2(nbDims, 1); std::vector<DimSize_t> expectedOutDims; - for (std::size_t i = 0; i < nbDims; i++) { + for (std::size_t i = 0; i < nbDims; ++i) { DimSize_t dim = dimSizeDist(gen); - if (boolDist(gen)) { - dims1[i] = dim; - } - if (boolDist(gen)) { - dims2[i] = dim; - } - expectedOutDims.push_back(std::max(dims1[i],dims2[i])); + if (boolDist(gen)) dims1[i] = dim; + if (boolDist(gen)) dims2[i] = dim; + expectedOutDims.push_back(std::max(dims1[i], dims2[i])); } + const std::size_t nb_elements0 = std::accumulate(dims1.cbegin(), dims1.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t nb_elements1 = std::accumulate(dims2.cbegin(), dims2.cend(), std::size_t(1), std::multiplies<std::size_t>()); + float* array0 = new float[nb_elements0]; + float* array1 = new float[nb_elements1]; + for (std::size_t i = 0; i < nb_elements0; ++i) { + array0[i] = boolDist(gen); + } + for (std::size_t i = 0; i < nb_elements1; ++i) { + array1[i] = boolDist(gen); + } std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims1); - myInput1->setBackend("cpu"); - myInput1->setDataType(DataType::Float32); - myInput1->zeros(); std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims2); - myInput2->setBackend("cpu"); + myInput1->setDataType(DataType::Float32); myInput2->setDataType(DataType::Float32); - myInput2->zeros(); + myInput1->setBackend("cpu"); + myInput2->setBackend("cpu"); + myInput1 -> getImpl() -> setRawPtr(array0, nb_elements0); + myInput2 -> getImpl() -> setRawPtr(array1, nb_elements1); + + std::shared_ptr<Node> myAnd = And(); - auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); - op->associateInput(0,myInput1); - op->associateInput(1,myInput2); + auto op = std::static_pointer_cast<OperatorTensor>(myAnd->getOperator()); + op->associateInput(0, myInput1); + op->associateInput(1, myInput2); op->setDataType(DataType::Float32); op->setBackend("cpu"); @@ -102,110 +119,67 @@ TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") { const auto outputDims = op->getOutput(0)->dims(); REQUIRE(outputDims == expectedOutDims); + delete[] array0; + delete[] array1; } } } + SECTION("Same size inputs") { - std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { - { // - { // - {{20, 15},{31, 11},{22, 49}}, // - {{41, 10},{24, 51},{27, 52}}, // - {{26, 53},{27, 54},{28, 55}} // - }, // - { // - {{29, 56},{30, 57},{31, 58}}, // - {{32, 59},{33, 60},{34, 61}}, // - {{35, 62},{36, 63},{37, 64}} // - }, // - { // - {{38, 65},{39, 66},{40, 67}}, // - {{41, 68},{42, 69},{43, 70}}, // - {{44, 71},{45, 72},{46, 73}} // - } // - } // - }); // - std::shared_ptr<Tensor> input2 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { - { // - { // - {{20, 47},{21, 48},{22, 49}}, // - {{23, 50},{24, 51},{25, 52}}, // - {{17, 53},{27, 26},{14, 33}} // - }, // - { // - {{29, 56},{30, 57},{31, 58}}, // - {{72, 44},{33, 20},{27, 55}}, // - {{35, 24},{25, 63},{28, 64}} // - }, // - { // - {{32, 65},{39, 66},{40, 70}}, // - {{41, 53},{42, 60},{34, 70}}, // - {{44, 71},{30, 12},{46, 73}} // - } // - } // - }); // - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<float, 2, 2, 2, 2>{ { - { - {{1, 0},{0, 0},{1, 1}}, - {{0, 0},{1, 1},{0, 1}}, - {{0, 1},{1, 0},{0, 0}} - }, - { - {{1, 1},{1, 1},{1, 1}}, - {{0, 0},{1, 0},{0, 0}}, - {{1, 0},{0, 1},{0, 1}} - }, - { - {{0, 1},{1, 1},{1, 0}}, - {{1, 0},{1, 0},{0, 1}}, - {{1, 1},{0, 0},{1, 1}} - } - } - }); + {{{1, 0}, {0, 1}}, + {{1, 1}, {0, 0}}}, + {{{0, 1}, {1, 0}}, + {{1, 0}, {0, 1}}}} + }); + std::shared_ptr<Tensor> input2 = std::make_shared<Tensor>(Array4D<float, 2, 2, 2, 2>{ + { + {{{1, 1}, {0, 0}}, + {{0, 1}, {1, 1}}}, + {{{1, 1}, {0, 0}}, + {{0, 1}, {1, 0}}}} + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float, 2, 2, 2, 2>{ + { + {{{1, 0}, {0, 0}}, + {{0, 1}, {0, 0}}}, + {{{0, 1}, {0, 0}}, + {{0, 0}, {0, 0}}}} + }); std::shared_ptr<Node> myAnd = And(); - auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); + auto op = std::static_pointer_cast<OperatorTensor>(myAnd->getOperator()); op->associateInput(0, input1); op->associateInput(1, input2); op->setBackend("cpu"); - op->setDataType(DataType::Int32); + op->setDataType(DataType::Float32); myAnd->forward(); - + op->getOutput(0)->print(); REQUIRE(*(op->getOutput(0)) == *expectedOutput); } SECTION("Broadcasting") { - std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { - { // - { // - {{10, 20},{22, 23},{20, 20}}, // - {{10, 15},{10, 29},{20, 20}}, // - {{26, 25},{33, 20},{10, 20}} // - } // - } // - }); // - - std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{10, 20}}); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { - { // - { // - {{ 1, 1},{ 0, 0},{ 0, 1}}, // - {{ 1, 0},{ 1, 0},{ 0, 1}}, // - {{ 0, 0},{ 0, 1},{ 1, 1}} // - } // - } // - }); // + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<float, 1, 2, 2, 2>{ + { + {{{1, 0}, {1, 0}}, + {{1, 1}, {0, 0}}}} + }); + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float, 2>{{1, 0}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float, 1, 2, 2, 2>{ + { + {{{1, 0}, {1, 0}}, + {{1, 0}, {0, 0}}}} + }); std::shared_ptr<Node> myAnd = And(); - auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); + auto op = std::static_pointer_cast<OperatorTensor>(myAnd->getOperator()); op->associateInput(0, input_1); op->associateInput(1, input_2); - op->setDataType(DataType::Int32); + op->setDataType(DataType::Float32); op->setBackend("cpu"); myAnd->forward(); - op->getOutput(0)->print(); - expectedOutput->print(); - REQUIRE(*op->getOutput(0) == *expectedOutput); + + REQUIRE(*(op->getOutput(0)) == *expectedOutput); } -} \ No newline at end of file +} diff --git a/unit_tests/operator/Test_AvgPoolingImpl.cpp b/unit_tests/operator/Test_AvgPoolingImpl.cpp index 372febc61d04c2ba983dd33f009fe5bf1d2908a0..f116934cd61375d3498ecf4e9aeb54f1eea37d3f 100644 --- a/unit_tests/operator/Test_AvgPoolingImpl.cpp +++ b/unit_tests/operator/Test_AvgPoolingImpl.cpp @@ -110,5 +110,95 @@ TEST_CASE("[cpu/operator] AvgPooling(forward)", "[AvgPooling][CPU]") { REQUIRE(std::abs(outPtr[i] - expectedOutPtr[i]) < 0.00001); } } - // std::cout << static_cast<Tensor>((*op)["weight"])[0][0][0][0] << std::endl; + SECTION("Dilations") { + std::shared_ptr<Tensor> myInput3 = std::make_shared<Tensor>(Array4D<float,1,1,5,5> { // NCHW + { + { + {{ 1, 2, 3, 4, 5}, + { 6, 7, 8, 9, 10}, + {11, 12, 13, 14, 15}, + {16, 17, 18, 19, 20}, + {21, 22, 23, 24, 25}} + } + } + }); + + // Dilation of 2 means we take every second element in the window + std::shared_ptr<Node> myAvgPool = AvgPooling({2,2}, "mycdw", {1,1}, {2,2}); + auto op = std::static_pointer_cast<AvgPooling_Op<2>>(myAvgPool -> getOperator()); + + std::shared_ptr<Tensor> myOutput3 = std::make_shared<Tensor>(Array4D<float,1,1,3,3> { + { + { + {{ 7, 8, 9}, + { 12, 13, 14}, + { 17, 18, 19}} + } + } + }); + + op->associateInput(0, myInput3); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myAvgPool->forward(); + op->getOutput(0)->print(); + REQUIRE(*(op->getOutput(0)) == *myOutput3); + } + SECTION("Ceil Mode") { + std::shared_ptr<Tensor> myInput4 = std::make_shared<Tensor>(Array4D<float,1,1,5,5> { // NCHW + { + { + { + { 1, 2, 3, 4, 5}, + { 6, 7, 8, 9, 10}, + {11, 12, 13, 14, 15}, + {16, 17, 18, 19, 20}, + {21, 22, 23, 24, 25} + } + } + } + }); + + // AvgPool with ceil_mode = true + std::shared_ptr<Node> myAvgPool1 = AvgPooling({2,2}, "mycdw", {2,2}, {1,1}, true); + auto op1 = std::static_pointer_cast<AvgPooling_Op<2>>(myAvgPool1 -> getOperator()); + + std::shared_ptr<Tensor> myOutput4 = std::make_shared<Tensor>(Array4D<float,1,1,3,3> { + { + { + { + { 4.0, 6.0, 7.5 }, + { 14.0, 16.0, 17.5 }, + { 21.5, 23.5, 25.0 } + } + } + } + }); + op1->associateInput(0, myInput4); + op1->setDataType(DataType::Float32); + op1->setBackend("cpu"); + myAvgPool1->forward(); + op1->getOutput(0)->print(); + REQUIRE(*(op1->getOutput(0)) == *myOutput4); + + // AvgPool with ceil_mode = false + std::shared_ptr<Node> myAvgPool2 = AvgPooling({2,2}, "mycdw", {2,2}, {1,1}, false); + auto op2 = std::static_pointer_cast<AvgPooling_Op<2>>(myAvgPool2 -> getOperator()); + std::shared_ptr<Tensor> myOutput5 = std::make_shared<Tensor>(Array4D<float,1,1,2,2> { + { + { + { + { 4.0, 6.0 }, + { 14.0, 16.0 } + } + } + } + }); + op2->associateInput(0, myInput4); + op2->setDataType(DataType::Float32); + op2->setBackend("cpu"); + myAvgPool2->forward(); + op2->getOutput(0)->print(); + REQUIRE(*(op2->getOutput(0)) == *myOutput5); + } } \ No newline at end of file diff --git a/unit_tests/operator/Test_EqualImpl.cpp b/unit_tests/operator/Test_EqualImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bd9fa94fdd09ef70af2e218b3b33636f3d83cc97 --- /dev/null +++ b/unit_tests/operator/Test_EqualImpl.cpp @@ -0,0 +1,202 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/Equal.hpp" + +using namespace Aidge; + +TEST_CASE("[cpu/operator] Equal(forwardDims)", "[Equal][CPU]") { + constexpr std::uint16_t NBTRIALS = 10; + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); + std::uniform_int_distribution<int> boolDist(0,1); + + SECTION("Same dimensions") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + } + + std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims); + myInput1->setBackend("cpu"); + myInput1->setDataType(DataType::Float32); + myInput1->zeros(); + std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims); + myInput2->setBackend("cpu"); + myInput2->setDataType(DataType::Float32); + myInput2->zeros(); + std::shared_ptr<Node> myEqual = Equal(); + auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator()); + op->associateInput(0,myInput1); + op->associateInput(1,myInput2); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == dims); + } + } + SECTION("Broadcasting") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims1(nbDims, 1); + std::vector<DimSize_t> dims2(nbDims, 1); + std::vector<DimSize_t> expectedOutDims; + for (std::size_t i = 0; i < nbDims; i++) { + DimSize_t dim = dimSizeDist(gen); + if (boolDist(gen)) { + dims1[i] = dim; + } + if (boolDist(gen)) { + dims2[i] = dim; + } + expectedOutDims.push_back(std::max(dims1[i],dims2[i])); + } + + + std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims1); + myInput1->setBackend("cpu"); + myInput1->setDataType(DataType::Float32); + myInput1->zeros(); + std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims2); + myInput2->setBackend("cpu"); + myInput2->setDataType(DataType::Float32); + myInput2->zeros(); + std::shared_ptr<Node> myEqual = Equal(); + auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator()); + op->associateInput(0,myInput1); + op->associateInput(1,myInput2); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == expectedOutDims); + } + } +} +TEST_CASE("[cpu/operator] Equal(forward)", "[Equal][CPU]") { + SECTION("Same size inputs") { + std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + { // + { // + {{20, 15},{31, 11},{22, 49}}, // + {{41, 10},{24, 51},{27, 52}}, // + {{26, 53},{27, 54},{28, 55}} // + }, // + { // + {{29, 56},{30, 57},{31, 58}}, // + {{32, 59},{33, 60},{34, 61}}, // + {{35, 62},{36, 63},{37, 64}} // + }, // + { // + {{38, 65},{39, 66},{40, 67}}, // + {{41, 68},{42, 69},{43, 70}}, // + {{44, 71},{45, 72},{46, 73}} // + } // + } // + }); // + std::shared_ptr<Tensor> input2 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + { // + { // + {{20, 47},{21, 48},{22, 49}}, // + {{23, 50},{24, 51},{25, 52}}, // + {{17, 53},{27, 26},{14, 33}} // + }, // + { // + {{29, 56},{30, 57},{31, 58}}, // + {{72, 44},{33, 20},{27, 55}}, // + {{35, 24},{25, 63},{28, 64}} // + }, // + { // + {{32, 65},{39, 66},{40, 70}}, // + {{41, 53},{42, 60},{34, 70}}, // + {{44, 71},{30, 12},{46, 73}} // + } // + } // + }); // + Tensor expectedOutput =Tensor(Array4D<int,3,3,3,2> { + { + { + {{1, 0},{0, 0},{1, 1}}, + {{0, 0},{1, 1},{0, 1}}, + {{0, 1},{1, 0},{0, 0}} + }, + { + {{1, 1},{1, 1},{1, 1}}, + {{0, 0},{1, 0},{0, 0}}, + {{1, 0},{0, 1},{0, 1}} + }, + { + {{0, 1},{1, 1},{1, 0}}, + {{1, 0},{1, 0},{0, 1}}, + {{1, 1},{0, 0},{1, 1}} + } + } + }); + + std::shared_ptr<Node> myEqual = Equal(); + auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator()); + op->associateInput(0, input1); + op->associateInput(1, input2); + op->setBackend("cpu"); + op->setDataType(DataType::Int32); + myEqual->forward(); + + REQUIRE(*(op->getOutput(0)) == expectedOutput); + } + + SECTION("Broadcasting") { + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { + { // + { // + {{10, 20},{22, 23},{20, 20}}, // + {{10, 15},{10, 29},{20, 20}}, // + {{26, 25},{33, 20},{10, 20}} // + } // + } // + }); // + + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{10, 20}}); + Tensor expectedOutput = Tensor(Array4D<int,1,3,3,2> { + { // + { // + {{ 1, 1},{ 0, 0},{ 0, 1}}, // + {{ 1, 0},{ 1, 0},{ 0, 1}}, // + {{ 0, 0},{ 0, 1},{ 1, 1}} // + } // + } // + }); // + + std::shared_ptr<Node> myEqual = Equal(); + auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator()); + op->associateInput(0, input_1); + op->associateInput(1, input_2); + op->setDataType(DataType::Int32); + op->setBackend("cpu"); + myEqual->forward(); + op->getOutput(0)->print(); + + REQUIRE(*op->getOutput(0) == expectedOutput); + } +} \ No newline at end of file diff --git a/unit_tests/operator/Test_MaxPoolingImpl.cpp b/unit_tests/operator/Test_MaxPoolingImpl.cpp index de02df2b73bc461bbd76b089cd555d7c82bd173e..d480fc3004957e12f72584138bb1c9dd10ee969a 100644 --- a/unit_tests/operator/Test_MaxPoolingImpl.cpp +++ b/unit_tests/operator/Test_MaxPoolingImpl.cpp @@ -80,4 +80,96 @@ TEST_CASE("[cpu/operator] MaxPooling(forward)", "[MaxPooling][CPU]") { op->getOutput(0)->print(); REQUIRE(*(op->getOutput(0)) == myOutput); } + SECTION("Dilation") { + std::shared_ptr<Node> myMaxPool = MaxPooling({2,2}, "mycdw", {2,2}, {2,2}); // Dilation 2x2 + auto op = std::static_pointer_cast<OperatorTensor>(myMaxPool -> getOperator()); + + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<float,2,2,2,2> { + { + { + { + {0.71470, 0.52770}, + {0.71470, 0.48740} + }, + { + {2.23290, 0.48590}, + {2.23290, 0.07000} + } + }, + { + { + {1.76530, 1.20710}, + {1.76530, 1.20710} + }, + { + {1.04290, 0.67760}, + {1.72170, 0.67760} + } + } + } + }); + myMaxPool->getOperator()->associateInput(0,myInput); + myMaxPool->getOperator()->setDataType(DataType::Float32); + myMaxPool->getOperator()->setBackend("cpu"); + myMaxPool->forward(); + op->getOutput(0)->print(); + REQUIRE(*(op->getOutput(0)) == *myOutput); + } + SECTION("Ceil Mode") { + std::shared_ptr<Tensor> myInput4 = std::make_shared<Tensor>(Array4D<float,1,1,5,5> { // NCHW + { + { + { + { 1, 2, 3, 4, 5}, + { 6, 7, 8, 9, 10}, + {11, 12, 13, 14, 15}, + {16, 17, 18, 19, 20}, + {21, 22, 23, 24, 25} + } + } + } + }); + + // MaxPool with ceil_mode = true + std::shared_ptr<Node> myMaxPool1 = MaxPooling({2,2}, "mycdw", {2,2}, {1,1}, true); + auto op1 = std::static_pointer_cast<OperatorTensor>(myMaxPool1 -> getOperator()); + + std::shared_ptr<Tensor> myOutput4 = std::make_shared<Tensor>(Array4D<float,1,1,3,3> { + { + { + { + { 7.0, 9.0, 10.0 }, + { 17.0, 19.0, 20.0 }, + { 22.0, 24.0, 25.0 } + } + } + } + }); + op1->associateInput(0, myInput4); + op1->setDataType(DataType::Float32); + op1->setBackend("cpu"); + myMaxPool1->forward(); + op1->getOutput(0)->print(); + REQUIRE(*(op1->getOutput(0)) == *myOutput4); + + // MaxPool with ceil_mode = false + std::shared_ptr<Node> myMaxPool2 = MaxPooling({2,2}, "mycdw", {2,2}, {1,1}, false); + auto op2 = std::static_pointer_cast<OperatorTensor>(myMaxPool2 -> getOperator()); + std::shared_ptr<Tensor> myOutput5 = std::make_shared<Tensor>(Array4D<float,1,1,2,2> { + { + { + { + { 7.0, 9.0 }, + { 17.0, 19.0 } + } + } + } + }); + op2->associateInput(0, myInput4); + op2->setDataType(DataType::Float32); + op2->setBackend("cpu"); + myMaxPool2->forward(); + op2->getOutput(0)->print(); + REQUIRE(*(op2->getOutput(0)) == *myOutput5); + } } \ No newline at end of file