diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index b81145dec13d39ea701f2ae9dfcbd6b6d17bff39..4134f5c50220198840e078dc78b8f37ded09b78e 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -12,7 +12,10 @@ #ifndef AIDGE_CPU_IMPORTS_H_ #define AIDGE_CPU_IMPORTS_H_ +#include "aidge/backend/cpu/operator/AbsImpl.hpp" #include "aidge/backend/cpu/operator/AddImpl.hpp" +#include "aidge/backend/cpu/operator/AndImpl.hpp" +#include "aidge/backend/cpu/operator/ArgMaxImpl.hpp" #include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp" #include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" #include "aidge/backend/cpu/operator/BatchNormImpl.hpp" @@ -24,11 +27,13 @@ #include "aidge/backend/cpu/operator/FoldImpl.hpp" #include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp" #include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" +#include "aidge/backend/cpu/operator/LnImpl.hpp" #include "aidge/backend/cpu/operator/MatMulImpl.hpp" #include "aidge/backend/cpu/operator/MulImpl.hpp" #include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/backend/cpu/operator/PowImpl.hpp" #include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp" +#include "aidge/backend/cpu/operator/ReduceSumImpl.hpp" #include "aidge/backend/cpu/operator/ReLUImpl.hpp" #include "aidge/backend/cpu/operator/ScalingImpl.hpp" #include "aidge/backend/cpu/operator/SigmoidImpl.hpp" diff --git a/include/aidge/backend/cpu/operator/AbsImpl.hpp b/include/aidge/backend/cpu/operator/AbsImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..faba3ef69ff27fbfb92393e1e0dacaebd5d69b07 --- /dev/null +++ b/include/aidge/backend/cpu/operator/AbsImpl.hpp @@ -0,0 +1,50 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_ABSIMPL_H_ +#define AIDGE_CPU_OPERATOR_ABSIMPL_H_ + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/operator/Abs.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include <memory> +#include <vector> + +namespace Aidge { +// class Abs_Op; + +// compute kernel registry for forward and backward +class AbsImplForward_cpu + : public Registrable<AbsImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> { +}; +class AbsImplBackward_cpu + : public Registrable<AbsImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> { +}; + +class AbsImpl_cpu : public OperatorImpl { +public: + AbsImpl_cpu(const Abs_Op& op) : OperatorImpl(op, "cpu") {} + + static std::unique_ptr<AbsImpl_cpu> create(const Abs_Op& op) { + return std::make_unique<AbsImpl_cpu>(op); + } + + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; + +namespace { +static Registrar<Abs_Op> registrarAbsImpl_cpu("cpu", Aidge::AbsImpl_cpu::create); +} +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_ABSIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/AbsImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AbsImpl_forward_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4922d6273b681515a4682b86c720b42381598042 --- /dev/null +++ b/include/aidge/backend/cpu/operator/AbsImpl_forward_kernels.hpp @@ -0,0 +1,45 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_ABSIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_ABSIMPL_FORWARD_KERNEL_H_ + +#include <cmath> + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/AbsImpl.hpp" + +namespace Aidge { +template <class I, class O> +void AbsImpl_cpu_forward_kernel(std::size_t inputLenght, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + for (std::size_t i = 0; i < inputLenght; ++i) { + output[i] = std::abs(input[i]); + } +} + +namespace { +static Registrar<AbsImplForward_cpu> registrarAbsImplForward_cpu_Float32( + {DataType::Float32, DataType::Float32}, Aidge::AbsImpl_cpu_forward_kernel<float, float>); +static Registrar<AbsImplForward_cpu> registrarAbsImplForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, Aidge::AbsImpl_cpu_forward_kernel<int, int>); +static Registrar<AbsImplForward_cpu> registrarAbsImplForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, Aidge::AbsImpl_cpu_forward_kernel<double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_ABSIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/AndImpl.hpp b/include/aidge/backend/cpu/operator/AndImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..139b1f08e4c4e2900e07d2bb470cb27fb878807f --- /dev/null +++ b/include/aidge/backend/cpu/operator/AndImpl.hpp @@ -0,0 +1,49 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_ANDIMPL_H_ +#define AIDGE_CPU_OPERATOR_ANDIMPL_H_ + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/operator/And.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include <memory> +#include <vector> + +namespace Aidge { +// compute kernel registry for forward and backward +class AndImplForward_cpu + : public Registrable<AndImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*)> { +}; +class AndImplBackward_cpu + : public Registrable<AndImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, void*)> { +}; + +class AndImpl_cpu : public OperatorImpl { +public: + AndImpl_cpu(const And_Op& op) : OperatorImpl(op, "cpu") {} + + static std::unique_ptr<AndImpl_cpu> create(const And_Op& op) { + return std::make_unique<AndImpl_cpu>(op); + } + + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; + +namespace { +static Registrar<And_Op> registrarAndImpl_cpu("cpu", Aidge::AndImpl_cpu::create); +} +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_ANDIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/AndImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AndImpl_forward_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c537863c83fcdd2f766911386467da8e74138253 --- /dev/null +++ b/include/aidge/backend/cpu/operator/AndImpl_forward_kernels.hpp @@ -0,0 +1,64 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_ANDIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_ANDIMPL_FORWARD_KERNEL_H_ + +#include "aidge/backend/cpu/data/Broadcasting.hpp" +#include "aidge/backend/cpu/operator/AndImpl.hpp" +#include "aidge/utils/Registrar.hpp" + +namespace Aidge { +template <class I1, class I2, class O> +void AndImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims, + const std::vector<std::size_t>& input2Dims, + const std::vector<std::size_t>& outputDims, + const void* input1_, + const void* input2_, + void* output_) { + + const I1* input_1 = static_cast<const I1*>(input1_); + const I2* input_2 = static_cast<const I2*>(input2_); + O* output = static_cast<O*>(output_); + + size_t totalElements = 1; + for (size_t dimSize : outputDims) { + totalElements *= dimSize; + } + + for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) + { + std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex); + + std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); + std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); + + output[oIndex] = static_cast<O>(input_1[idx1] == input_2[idx2]); + } +} + +namespace { +static Registrar<AndImplForward_cpu> registrarAndImplForward_cpu_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::AndImpl_cpu_forward_kernel<float, float, float>); +static Registrar<AndImplForward_cpu> registrarAndImplForward_cpu_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::AndImpl_cpu_forward_kernel<double, double, double>); +static Registrar<AndImplForward_cpu> registrarAndImplForward_cpu_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32}, + Aidge::AndImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>); +static Registrar<AndImplForward_cpu> registrarAndImplForward_cpu_Int64( + {DataType::Int64, DataType::Int64, DataType::Int64}, + Aidge::AndImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_ANDIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ArgMaxImpl.hpp b/include/aidge/backend/cpu/operator/ArgMaxImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f93abbbca9630a8b60290bae7660f6428b41b0b3 --- /dev/null +++ b/include/aidge/backend/cpu/operator/ArgMaxImpl.hpp @@ -0,0 +1,60 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_ARGMAXIMPL_H_ +#define AIDGE_CPU_OPERATOR_ARGMAXIMPL_H_ + +#include <array> +#include <memory> +#include <tuple> +#include <vector> + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/operator/ArgMax.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +class ArgMaxImplForward_cpu + : public Registrable<ArgMaxImplForward_cpu, + std::tuple<DataType, DataType>, + void(std::int32_t, + DimSize_t, + const std::vector<DimSize_t>&, + const void *, + void *)> {}; +class ArgMaxImplBackward_cpu + : public Registrable<ArgMaxImplBackward_cpu, + std::tuple<DataType, DataType>, + void(std::int32_t, + DimSize_t, + const std::vector<DimSize_t>&, + const void *, + void *)> {}; + +class ArgMaxImpl_cpu : public OperatorImpl { + public: + ArgMaxImpl_cpu(const ArgMax_Op& op) : OperatorImpl(op, "cpu") {} + + static std::unique_ptr<ArgMaxImpl_cpu> create(const ArgMax_Op &op) { + return std::make_unique<ArgMaxImpl_cpu>(op); + } + + public: + void forward() override; +}; + +namespace { +static Registrar<ArgMax_Op> registrarArgMaxImpl_cpu("cpu", Aidge::ArgMaxImpl_cpu::create); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_ARGMAXIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ArgMaxImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ArgMaxImpl_forward_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cea7d973fce947d980ffe497581c56cd6fe59a8e --- /dev/null +++ b/include/aidge/backend/cpu/operator/ArgMaxImpl_forward_kernels.hpp @@ -0,0 +1,85 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_ARGMAXIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_ARGMAXIMPL_FORWARD_KERNEL_H_ + +#include <algorithm> // std::for_each +#include <cstddef> // std::size_t +#include <cstdint> // std::int32_t +#include <functional> //std::multiplies +#include <numeric> //std::accumulate +#include <vector> +#include <limits> + +#include "aidge/backend/cpu/operator/ArgMaxImpl.hpp" +#include "aidge/data/Data.hpp" +#include "aidge/operator/ArgMax.hpp" +#include "aidge/utils/Registrar.hpp" + +namespace Aidge { +template <class I, class O> +void ArgMaxImpl_cpu_forward_kernel(std::int32_t axis_, + DimSize_t select_last_index, + const std::vector<DimSize_t>& inputDims, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + const std::size_t axis = static_cast<std::size_t>(axis_); + + std::size_t stride_post = 1; + for (std::size_t i = axis + 1; i < inputDims.size(); ++i) { + stride_post *= inputDims[i]; + } + std::size_t stride_pre = 1; + for (std::size_t i = 0; i < axis; ++i) { + stride_pre *= inputDims[i]; + } + const std::size_t dim_i = inputDims[axis]; + for (std::size_t pre = 0; pre < stride_pre; ++pre) { + for (std::size_t post = 0; post < stride_post; ++post) { + const std::size_t idx_i = pre * dim_i * stride_post + post; + const std::size_t idx_o = pre * stride_post + post; + I max = std::numeric_limits<I>::min(); + for (std::size_t i = 0; i < dim_i; ++i) { + I curr_value = input[idx_i + i*stride_post]; + if (select_last_index) { + if (curr_value>=max) { + output[idx_o] = i; + max = curr_value; + } + } + else { + if (curr_value > max) { + output[idx_o] = i; + max = curr_value; + } + } + } + } + } + +} + +namespace { +static Registrar<ArgMaxImplForward_cpu> registrarArgMaxImplForward_cpu_Float32( + {DataType::Float32, DataType::Float32}, Aidge::ArgMaxImpl_cpu_forward_kernel<float, float>); +static Registrar<ArgMaxImplForward_cpu> registrarArgMaxImplForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, Aidge::ArgMaxImpl_cpu_forward_kernel<int, int>); +static Registrar<ArgMaxImplForward_cpu> registrarArgMaxImplForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, Aidge::ArgMaxImpl_cpu_forward_kernel<double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_ARGMAXIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp index bba355e16958bb1a22bde1d24304d992a658ade8..fb14893fdc96f9d91f1b8ee6375fd536a7a8a1c7 100644 --- a/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp @@ -38,7 +38,10 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes, const std::size_t nb_dims = inputDims.size(); const std::size_t totalElements = std::accumulate(inputDims.cbegin(), inputDims.cend(), 1, std::multiplies<std::size_t>()); - if (axes.size() == 1) { + if (axes.empty()){ + std::copy_n(input,totalElements, output); + } + else if (axes.size() == 1) { const std::size_t stride_pre = std::accumulate(inputDims.cbegin(), inputDims.cbegin() + axes[0], 1, std::multiplies<std::size_t>()); const std::size_t stride_post = std::accumulate(inputDims.crbegin(), inputDims.crbegin() + nb_dims -1 - axes[0], 1, std::multiplies<std::size_t>()); diff --git a/include/aidge/backend/cpu/operator/ReduceSumImpl.hpp b/include/aidge/backend/cpu/operator/ReduceSumImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3b265e134e7282a81476b5aa562237ecbc93141e --- /dev/null +++ b/include/aidge/backend/cpu/operator/ReduceSumImpl.hpp @@ -0,0 +1,60 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_REDUCESUMIMPL_H_ +#define AIDGE_CPU_OPERATOR_REDUCESUMIMPL_H_ + +#include <array> +#include <memory> +#include <tuple> +#include <vector> + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/operator/ReduceSum.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +class ReduceSumImplForward_cpu + : public Registrable<ReduceSumImplForward_cpu, + std::tuple<DataType, DataType>, + void(const std::vector<std::int32_t>&, + DimSize_t, + const std::vector<DimSize_t>&, + const void *, + void *)> {}; +class ReduceSumImpl1DBackward_cpu + : public Registrable<ReduceSumImpl1DBackward_cpu, + std::tuple<DataType, DataType>, + void(const std::vector<std::int32_t>&, + DimSize_t, + const std::vector<DimSize_t>&, + const void *, + void *)> {}; + +class ReduceSumImpl_cpu : public OperatorImpl { + public: + ReduceSumImpl_cpu(const ReduceSum_Op& op) : OperatorImpl(op, "cpu") {} + + static std::unique_ptr<ReduceSumImpl_cpu> create(const ReduceSum_Op &op) { + return std::make_unique<ReduceSumImpl_cpu>(op); + } + + public: + void forward() override; +}; + +namespace { +static Registrar<ReduceSum_Op> registrarReduceSumImpl_cpu("cpu", Aidge::ReduceSumImpl_cpu::create); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_REDUCESUMIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ReduceSumImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ReduceSumImpl_forward_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f215065af459a886a34a43d958ecd9e09ada4041 --- /dev/null +++ b/include/aidge/backend/cpu/operator/ReduceSumImpl_forward_kernels.hpp @@ -0,0 +1,118 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_REDUCESUMIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_REDUCESUMIMPL_FORWARD_KERNEL_H_ + +#include <algorithm> // std::for_each +#include <cstddef> // std::size_t +#include <cstdint> // std::int32_t +#include <functional> //std::multiplies +#include <numeric> //std::accumulate +#include <vector> + +#include "aidge/backend/cpu/operator/ReduceSumImpl.hpp" +#include "aidge/data/Data.hpp" +#include "aidge/operator/ReduceSum.hpp" +#include "aidge/utils/Registrar.hpp" + +namespace Aidge { +template <class I, class O> +void ReduceSumImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes, + DimSize_t /*keepDims*/, + const std::vector<DimSize_t>& inputDims, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + const std::size_t nb_dims = inputDims.size(); + const std::size_t totalElements = std::accumulate(inputDims.cbegin(), inputDims.cend(), 1, std::multiplies<std::size_t>()); + + if (axes.empty()){ + std::copy_n(input,totalElements, output); + } + else if (axes.size() == 1) { + const std::size_t stride_pre = std::accumulate(inputDims.cbegin(), inputDims.cbegin() + axes[0], 1, std::multiplies<std::size_t>()); + const std::size_t stride_post = std::accumulate(inputDims.crbegin(), inputDims.crbegin() + nb_dims -1 - axes[0], 1, std::multiplies<std::size_t>()); + + const std::size_t dim_i = inputDims[axes[0]]; + for (std::size_t pre = 0; pre < stride_pre; ++pre) { + for (std::size_t post = 0; post < stride_post; ++post) { + const std::size_t idx_i = pre * dim_i * stride_post + post; + const std::size_t idx_o = pre * stride_post + post; + O sum = 0; + for (std::size_t i = 0; i < dim_i; ++i) { + sum +=input[idx_i + i*stride_post]; + } + output[idx_o] = sum; + } + } + } else { + std::size_t outputElements = totalElements; + + auto stride_post = std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]); + stride_post[nb_dims - 1] = 1; + for (std::size_t i = nb_dims-2; i != static_cast<std::size_t>(-1); --i) { + stride_post[i] = stride_post[i+1]*inputDims[i+1]; + } + auto stride_pre = std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]); + stride_pre[0] = 1; + for (std::size_t i = 1; i < nb_dims; ++i) { + stride_pre[i] = stride_pre[i-1]*inputDims[i-1]; + } + + const I* inputAccumulation = input; + I* outputAccumulation = nullptr; + + for (const auto& axisInt : axes) { + const std::size_t a = static_cast<std::size_t>(axisInt); + outputElements /= inputDims[a]; + outputAccumulation = new I[outputElements]; + const std::size_t dim_i = inputDims[a]; + for (std::size_t pre = 0; pre < stride_pre[a]; ++pre) { + for (std::size_t post = 0; post < stride_post[a]; ++post) { + const std::size_t idx_i = pre * dim_i * stride_post[a] + post; + const std::size_t idx_o = pre * stride_post[a] + post; + I sum = 0; + for (std::size_t i = 0; i < dim_i; ++i) { + sum += inputAccumulation[idx_i + i*stride_post[a]]; + } + outputAccumulation[idx_o] = sum; + } + } + std::for_each(stride_pre.get()+a+1, stride_pre.get()+nb_dims, [dim_i] (std::size_t& val) { val /= dim_i; }); + if (inputAccumulation != input) { + delete[] inputAccumulation; + } + inputAccumulation = outputAccumulation; + } + + // Copy elements from inputAccumulation to output while dividing by divisor + std::copy(inputAccumulation, inputAccumulation + outputElements, output); + if (outputAccumulation) { + delete[] outputAccumulation; + } + } +} + +namespace { +static Registrar<ReduceSumImplForward_cpu> registrarReduceSumImplForward_cpu_Float32( + {DataType::Float32, DataType::Float32}, Aidge::ReduceSumImpl_cpu_forward_kernel<float, float>); +static Registrar<ReduceSumImplForward_cpu> registrarReduceSumImplForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, Aidge::ReduceSumImpl_cpu_forward_kernel<int, int>); +static Registrar<ReduceSumImplForward_cpu> registrarReduceSumImplForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, Aidge::ReduceSumImpl_cpu_forward_kernel<double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_REDUCESUMIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp index cc384c38e34d01887fc328d11de383aeef39fb8e..6ff8b3ddf39412aa6febdc188b7c27e8bfdcc178 100644 --- a/include/aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp @@ -39,17 +39,23 @@ void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSi for (std::size_t i = 0; i < preAxisElems; ++i) { for (std::size_t j = 0; j < postAxisElems; ++j) { + I maxVal = input[i * inputDims[axisIdx] * postAxisElems + j]; + for (std::size_t k = 1; k < inputDims[axisIdx]; ++k) { + std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j; + maxVal = std::max(maxVal, input[inIdx]); + } + // Calculate sum of exponentials within the axis I sumExp = 0; for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) { std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j; - sumExp += std::exp(input[inIdx]); + sumExp += std::exp(input[inIdx] - maxVal); } // Calculate softmax for the current slice along the axis for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) { std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j; - output[inIdx] = std::exp(input[inIdx]) / sumExp; + output[inIdx] = std::exp(input[inIdx] - maxVal) / sumExp; } } } diff --git a/src/operator/AbsImpl.cpp b/src/operator/AbsImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1eb86c91289d3000f9cc0792e5ca2da29d4d8c24 --- /dev/null +++ b/src/operator/AbsImpl.cpp @@ -0,0 +1,42 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/backend/cpu/operator/AbsImpl.hpp" + +#include <memory> +#include <vector> + +#include "aidge/backend/cpu/operator/AbsImpl_forward_kernels.hpp" +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/Abs.hpp" +#include "aidge/utils/Types.h" + +Aidge::Elts_t Aidge::AbsImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return Elts_t::DataElts(0); +} + +void Aidge::AbsImpl_cpu::forward() { + const Abs_Op& op = static_cast<const Abs_Op&>(mOp); + + // Find the correct kernel type + auto kernelFunc = Registrar<AbsImplForward_cpu>::create({ + op.getInput(0)->dataType(), + op.getOutput(0)->dataType() + }); + + // Call kernel + kernelFunc( + op.getInput(0)->size(), + op.getInput(0)->getImpl()->rawPtr(), + op.getOutput(0)->getImpl()->rawPtr() + ); +} diff --git a/src/operator/AndImpl.cpp b/src/operator/AndImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bc447e74a1af797a69c942eab9ff816bc195388a --- /dev/null +++ b/src/operator/AndImpl.cpp @@ -0,0 +1,50 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <chrono> // std::chrono::milliseconds +#include <numeric> // std::accumulate +#include <thread> // std::this_thread::sleep_for +#include <vector> + +#include "aidge/operator/And.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/Broadcasting.hpp" +#include "aidge/backend/cpu/data/GetCPUPtr.h" + +#include "aidge/backend/cpu/operator/AndImpl.hpp" +#include "aidge/backend/cpu/operator/AndImpl_forward_kernels.hpp" + +Aidge::Elts_t Aidge::AndImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return Elts_t::DataElts(0); +} + +void Aidge::AndImpl_cpu::forward() { + // Find the correct kernel type + auto kernelFunc = Registrar<AndImplForward_cpu>::create({ + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + + const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()); + const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims()); + + // Call kernel + kernelFunc(inputDims0, + inputDims1, + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawInput(1)), + getCPUPtr(mOp.getRawOutput(0))); +} diff --git a/src/operator/ArgMaxImpl.cpp b/src/operator/ArgMaxImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..eda3c0b2de62e8e170c2562945999273bdb921a7 --- /dev/null +++ b/src/operator/ArgMaxImpl.cpp @@ -0,0 +1,34 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/backend/cpu/operator/ArgMaxImpl.hpp" + +#include <memory> +#include <vector> + +#include "aidge/utils/Types.h" +#include "aidge/operator/ArgMax.hpp" +#include "aidge/backend/cpu/operator/ArgMaxImpl_forward_kernels.hpp" + +void Aidge::ArgMaxImpl_cpu::forward() { + const ArgMax_Op& op_ = dynamic_cast<const ArgMax_Op&>(mOp); + // Find the correct kernel type + auto kernelFunc = Registrar<ArgMaxImplForward_cpu>::create({ + op_.getInput(0)->dataType(), + op_.getOutput(0)->dataType()}); + + // Call kernel + kernelFunc(op_.axis(), + op_.selectLastIndex(), + op_.getInput(0)->dims(), + op_.getInput(0)->getImpl()->rawPtr(), + op_.getOutput(0)->getImpl()->rawPtr()); +} diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp index b57ffd5fc2557d1f01582360f27ff83b40928f4e..8df5934c4daa08a04aec0abf705c41252095d751 100644 --- a/src/operator/ConvImpl.cpp +++ b/src/operator/ConvImpl.cpp @@ -45,7 +45,7 @@ void Aidge::ConvImpl1D_cpu::forward() { op_.dilationDims(), op_.kernelDims(), op_.getInput(0)->template dims<3>(), // input dimensions - dynamic_cast<const Conv_Op<2>&>(mOp).outChannels(), // outChannels + dynamic_cast<const Conv_Op<1>&>(mOp).outChannels(), // outChannels input0.getImpl()->rawPtr(), // input input1.getImpl()->rawPtr(), // weight op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, // bias diff --git a/src/operator/ReduceSumImpl.cpp b/src/operator/ReduceSumImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d9b7eea71c6f6bd078ad6e98f1058ca1dafd1c11 --- /dev/null +++ b/src/operator/ReduceSumImpl.cpp @@ -0,0 +1,34 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/backend/cpu/operator/ReduceSumImpl.hpp" + +#include <memory> +#include <vector> + +#include "aidge/utils/Types.h" +#include "aidge/operator/ReduceSum.hpp" +#include "aidge/backend/cpu/operator/ReduceSumImpl_forward_kernels.hpp" + +void Aidge::ReduceSumImpl_cpu::forward() { + const ReduceSum_Op& op_ = dynamic_cast<const ReduceSum_Op&>(mOp); + // Find the correct kernel type + auto kernelFunc = Registrar<ReduceSumImplForward_cpu>::create({ + op_.getInput(0)->dataType(), + op_.getOutput(0)->dataType()}); + + // Call kernel + kernelFunc(op_.axes(), + op_.keepDims(), + op_.getInput(0)->dims(), + op_.getInput(0)->getImpl()->rawPtr(), + op_.getOutput(0)->getImpl()->rawPtr()); +} diff --git a/unit_tests/operator/Test_AndImpl.cpp b/unit_tests/operator/Test_AndImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..053bb3ea4ed913bd388f3ae049c4d6402ad58d59 --- /dev/null +++ b/unit_tests/operator/Test_AndImpl.cpp @@ -0,0 +1,205 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/And.hpp" + +#include "aidge/backend/cpu.hpp" + +using namespace Aidge; + +TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") { + SECTION("ForwardDims") + { + constexpr std::uint16_t NBTRIALS = 10; + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); + std::uniform_int_distribution<int> boolDist(0,1); + + SECTION("Same dimensions") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + } + + std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims); + myInput1->setBackend("cpu"); + myInput1->setDataType(DataType::Float32); + myInput1->zeros(); + std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims); + myInput2->setBackend("cpu"); + myInput2->setDataType(DataType::Float32); + myInput2->zeros(); + std::shared_ptr<Node> myAnd = And(); + auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); + op->associateInput(0,myInput1); + op->associateInput(1,myInput2); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == dims); + } + } + SECTION("Broadcasting") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims1(nbDims, 1); + std::vector<DimSize_t> dims2(nbDims, 1); + std::vector<DimSize_t> expectedOutDims; + for (std::size_t i = 0; i < nbDims; i++) { + DimSize_t dim = dimSizeDist(gen); + if (boolDist(gen)) { + dims1[i] = dim; + } + if (boolDist(gen)) { + dims2[i] = dim; + } + expectedOutDims.push_back(std::max(dims1[i],dims2[i])); + } + + + std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims1); + myInput1->setBackend("cpu"); + myInput1->setDataType(DataType::Float32); + myInput1->zeros(); + std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims2); + myInput2->setBackend("cpu"); + myInput2->setDataType(DataType::Float32); + myInput2->zeros(); + std::shared_ptr<Node> myAnd = And(); + auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); + op->associateInput(0,myInput1); + op->associateInput(1,myInput2); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == expectedOutDims); + } + } + } + SECTION("Same size inputs") { + std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + { // + { // + {{20, 15},{31, 11},{22, 49}}, // + {{41, 10},{24, 51},{27, 52}}, // + {{26, 53},{27, 54},{28, 55}} // + }, // + { // + {{29, 56},{30, 57},{31, 58}}, // + {{32, 59},{33, 60},{34, 61}}, // + {{35, 62},{36, 63},{37, 64}} // + }, // + { // + {{38, 65},{39, 66},{40, 67}}, // + {{41, 68},{42, 69},{43, 70}}, // + {{44, 71},{45, 72},{46, 73}} // + } // + } // + }); // + std::shared_ptr<Tensor> input2 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + { // + { // + {{20, 47},{21, 48},{22, 49}}, // + {{23, 50},{24, 51},{25, 52}}, // + {{17, 53},{27, 26},{14, 33}} // + }, // + { // + {{29, 56},{30, 57},{31, 58}}, // + {{72, 44},{33, 20},{27, 55}}, // + {{35, 24},{25, 63},{28, 64}} // + }, // + { // + {{32, 65},{39, 66},{40, 70}}, // + {{41, 53},{42, 60},{34, 70}}, // + {{44, 71},{30, 12},{46, 73}} // + } // + } // + }); // + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + { + { + {{1, 0},{0, 0},{1, 1}}, + {{0, 0},{1, 1},{0, 1}}, + {{0, 1},{1, 0},{0, 0}} + }, + { + {{1, 1},{1, 1},{1, 1}}, + {{0, 0},{1, 0},{0, 0}}, + {{1, 0},{0, 1},{0, 1}} + }, + { + {{0, 1},{1, 1},{1, 0}}, + {{1, 0},{1, 0},{0, 1}}, + {{1, 1},{0, 0},{1, 1}} + } + } + }); + + std::shared_ptr<Node> myAnd = And(); + auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); + op->associateInput(0, input1); + op->associateInput(1, input2); + op->setBackend("cpu"); + op->setDataType(DataType::Int32); + myAnd->forward(); + + REQUIRE(*(op->getOutput(0)) == *expectedOutput); + } + + SECTION("Broadcasting") { + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { + { // + { // + {{10, 20},{22, 23},{20, 20}}, // + {{10, 15},{10, 29},{20, 20}}, // + {{26, 25},{33, 20},{10, 20}} // + } // + } // + }); // + + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{10, 20}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { + { // + { // + {{ 1, 1},{ 0, 0},{ 0, 1}}, // + {{ 1, 0},{ 1, 0},{ 0, 1}}, // + {{ 0, 0},{ 0, 1},{ 1, 1}} // + } // + } // + }); // + + std::shared_ptr<Node> myAnd = And(); + auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); + op->associateInput(0, input_1); + op->associateInput(1, input_2); + op->setDataType(DataType::Int32); + op->setBackend("cpu"); + myAnd->forward(); + op->getOutput(0)->print(); + expectedOutput->print(); + REQUIRE(*op->getOutput(0) == *expectedOutput); + } +} \ No newline at end of file diff --git a/unit_tests/operator/Test_ArgMaxImpl.cpp b/unit_tests/operator/Test_ArgMaxImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9915d90423e976db1bdd2a694a2cfd7beb380cee --- /dev/null +++ b/unit_tests/operator/Test_ArgMaxImpl.cpp @@ -0,0 +1,227 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <memory> +#include <numeric> // std::accumulate +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/ArgMax.hpp" +#include "aidge/operator/Conv.hpp" + +#include "aidge/backend/cpu.hpp" +#include "aidge/utils/TensorUtils.hpp" + +using namespace Aidge; + +TEST_CASE("[cpu/operator] ArgMax(forward)", "[ArgMax][CPU]") { + SECTION("ForwardDims") + { + constexpr std::uint16_t NBTRIALS = 10; + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); + std::uniform_int_distribution<int> boolDist(0,1); + + SECTION("KeepDims") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + std::vector<DimSize_t> expectedOutDims(nbDims); + std::uniform_int_distribution<std::int32_t> axisDist(std::int32_t(0), std::int32_t(nbDims-1)); + std::int32_t axis = axisDist(gen); + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + if (i == axis) { + expectedOutDims[i] = 1; + } + else { + expectedOutDims[i] = dims[i]; + } + } + + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + myInput->setBackend("cpu"); + myInput->setDataType(DataType::Float32); + myInput->zeros(); + std::shared_ptr<Node> myArgMax = ArgMax(axis); + auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == expectedOutDims); + } + } + SECTION("Not KeepDims") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + std::vector<DimSize_t> expectedOutDims; + std::uniform_int_distribution<std::int32_t> axisDist(std::int32_t(0), std::int32_t(nbDims-1)); + std::int32_t axis = axisDist(gen); + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + if(i != axis) { + expectedOutDims.push_back(dims[i]); + } + } + if(expectedOutDims.empty()) { + expectedOutDims.push_back(1); + } + + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + myInput->setBackend("cpu"); + myInput->setDataType(DataType::Float32); + std::shared_ptr<Node> myArgMax = ArgMax(axis, false); + auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == expectedOutDims); + } + } + } + SECTION("3D Tensor") { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,2,3,4> { + { + { + { 1.0, 2.0, 3.0, 4.0}, + { 8.0, 0.0, 17.0, 1.0}, + { 5.0, 10.0, 6.0, 0.0} + }, + { + { 7.0, 1.0, 9.0, 4.0}, + { 0.0, 8.0, 4.0, 2.0}, + { 9.0, 2.0, 0.0, 5.0} + } + } + }); + SECTION("Axis 2") { + + Tensor myOutput = Tensor(Array3D<float,2,3, 1> { + { + { + {3.0}, + {2.0}, + {1.0} + }, + { + {2.0}, + {1.0}, + {0.0} + } + } + }); + + std::shared_ptr<Node> myArgMax = ArgMax(2); + auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myArgMax->forward(); + + REQUIRE(*(op->getOutput(0)) == myOutput); + } + SECTION("Axis 2 with keep_dims false") { + + Tensor myOutput = Tensor(Array2D<float,2,3> { + { + { 3.0, 2.0, 1.0 }, + { 2.0, 1.0, 0.0 } + } + }); + + std::shared_ptr<Node> myArgMax = ArgMax(2,0); + auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myArgMax->forward(); + + REQUIRE(*(op->getOutput(0)) == myOutput); + } + SECTION("Axis 1") { + Tensor myOutput = Tensor(Array3D<float,2,1,4> { + { + { + { 1.0, 2.0, 1.0, 0.0 } + }, + { + { 2.0, 1.0, 0.0, 2.0 } + } + } + }); + + std::shared_ptr<Node> myArgMax = ArgMax(1); + auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myArgMax->forward(); + + REQUIRE(*(op->getOutput(0)) == myOutput); + } + SECTION("Axis 0") { + Tensor myOutput = Tensor(Array3D<float,1,3,4> { + { + { + { 1.0, 0.0, 1.0, 0.0 }, + { 0.0, 1.0, 0.0, 1.0 }, + { 1.0, 0.0, 0.0, 1.0 } + } + } + }); + + std::shared_ptr<Node> myArgMax = ArgMax(0); + auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + std::cout << " ............... "<< std::endl; + myArgMax->forward(); + op->getOutput(0)->print(); + std::cout <<"------"<<std::endl; + myOutput.print(); + + REQUIRE(*(op->getOutput(0)) == myOutput); + } + } + SECTION("Select_Last_Index") { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array1D<float,10> { + { + 1.0, 5.0, 9.0, 0.0, 6.0, 2.0, 9.0, 4.0, 3.0, 9.0 + } + }); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> {{9}}); + + std::shared_ptr<Node> myArgMax = ArgMax(0, 1, 1); + auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myArgMax->forward(); + op->getOutput(0)->print(); + + REQUIRE(*(op->getOutput(0)) == *myOutput); + + } +} \ No newline at end of file diff --git a/unit_tests/operator/Test_ReduceMeanImpl.cpp b/unit_tests/operator/Test_ReduceMeanImpl.cpp index 0269622740b5a0282a093d509d4b565f7acc3e76..dd647c7ba3f90fe7f3554aae7133e97ffa9c99ba 100644 --- a/unit_tests/operator/Test_ReduceMeanImpl.cpp +++ b/unit_tests/operator/Test_ReduceMeanImpl.cpp @@ -11,6 +11,8 @@ #include <catch2/catch_test_macros.hpp> #include <memory> +#include <numeric> // std::accumulate +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution #include "aidge/data/Tensor.hpp" #include "aidge/operator/ReduceMean.hpp" @@ -22,6 +24,129 @@ using namespace Aidge; TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") { + SECTION("ForwardDims") + { + constexpr std::uint16_t NBTRIALS = 10; + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); + std::uniform_int_distribution<int> boolDist(0,1); + + SECTION("KeepDims") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + std::vector<DimSize_t> expectedOutDims(nbDims); + std::vector<std::int32_t> axes; + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + expectedOutDims[i] = dims[i]; + if(boolDist(gen)) { + axes.push_back(i); + expectedOutDims[i] = 1; + } + } + if (axes.empty()) { // Default behaviour if no axes are provided is to reduce all dimensions + std::fill(expectedOutDims.begin(), expectedOutDims.end(), 1); + } + + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + myInput->setBackend("cpu"); + myInput->setDataType(DataType::Float32); + myInput->zeros(); + std::shared_ptr<Node> myReduceMean = ReduceMean(axes, true); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == expectedOutDims); + } + } + SECTION("Not KeepDims") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + std::vector<DimSize_t> expectedOutDims; + std::vector<std::int32_t> axes; + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + if(boolDist(gen)) { + axes.push_back(i); + } + else { + expectedOutDims.push_back(dims[i]); + } + } + if (axes.empty() || expectedOutDims.empty()) { // Default behaviour if no axes are provided is to reduce all dimensions + expectedOutDims = std::vector<DimSize_t>{1}; + } + + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + myInput->setBackend("cpu"); + myInput->setDataType(DataType::Float32); + std::shared_ptr<Node> myReduceMean = ReduceMean(axes, false); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == expectedOutDims); + } + } + SECTION("NoopWithEmptyAxes") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + } + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + myInput->setBackend("cpu"); + myInput->setDataType(DataType::Float32); + std::shared_ptr<Node> myReduceMean = ReduceMean(std::vector<int32_t>{}, false, true); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == dims); + } + } + SECTION("Not NoopWithEmptyAxes") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + } + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + myInput->setBackend("cpu"); + myInput->setDataType(DataType::Float32); + std::shared_ptr<Node> myReduceMean = ReduceMean({}, false, false); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + op->forwardDims(); + + REQUIRE(op->getOutput(0)->nbDims() == 1); + REQUIRE(op->getOutput(0)->size() == 1); + } + } + } SECTION("KeepDims") { SECTION("test 1") { std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> { @@ -157,7 +282,7 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") { {18.25} }); - std::shared_ptr<Node> myReduceMean = ReduceMean({0, 1, 2}, 0); + std::shared_ptr<Node> myReduceMean = ReduceMean({}, 0); auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); op->associateInput(0,myInput); op->setDataType(DataType::Float32); @@ -179,15 +304,42 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") { {0.1293547f} }); - std::shared_ptr<Node> myReduceMean = ReduceMean({0, 1}, 0); + std::shared_ptr<Node> myReduceMean = ReduceMean({}, 0); auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); op->associateInput(0,myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); myReduceMean->forward(); - op->getOutput(0)->print(); - // approxEq<float>(*(op->getOutput(0)), *myOutput); + REQUIRE(approxEq<float>(*(op->getOutput(0)), *myOutput)); } + SECTION("noop_with_empty_axes") { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> { + { + { + { 5.0, 1.0 }, + { 20.0, 2.0 } + }, + { + { 30.0, 1.0 }, + { 40.0, 2.0 } + }, + { + { 55.0, 1.0 }, + { 60.0, 2.0 } + } + } + }); + + std::shared_ptr<Node> myReduceMean = ReduceMean({}, 0, 1); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myReduceMean->forward(); + op->getOutput(0)->print(); + + REQUIRE(*(op->getOutput(0)) == *myInput); + } } } \ No newline at end of file diff --git a/unit_tests/operator/Test_ReduceSumImpl.cpp b/unit_tests/operator/Test_ReduceSumImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..49569d1f65ff6c51f9681632b16375605ab326e7 --- /dev/null +++ b/unit_tests/operator/Test_ReduceSumImpl.cpp @@ -0,0 +1,345 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <memory> +#include <numeric> // std::accumulate +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/ReduceSum.hpp" +#include "aidge/operator/Conv.hpp" + +#include "aidge/backend/cpu.hpp" +#include "aidge/utils/TensorUtils.hpp" + +using namespace Aidge; + +TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") { + SECTION("ForwardDims") + { + constexpr std::uint16_t NBTRIALS = 10; + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); + std::uniform_int_distribution<int> boolDist(0,1); + + SECTION("KeepDims") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + std::vector<DimSize_t> expectedOutDims(nbDims); + std::vector<std::int32_t> axes; + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + expectedOutDims[i] = dims[i]; + if(boolDist(gen)) { + axes.push_back(i); + expectedOutDims[i] = 1; + } + } + if (axes.empty()) { // Default behaviour if no axes are provided is to reduce all dimensions + std::fill(expectedOutDims.begin(), expectedOutDims.end(), 1); + } + + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + myInput->setBackend("cpu"); + myInput->setDataType(DataType::Float32); + myInput->zeros(); + std::shared_ptr<Node> myReduceSum = ReduceSum(axes, true); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == expectedOutDims); + } + } + SECTION("Not KeepDims") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + std::vector<DimSize_t> expectedOutDims; + std::vector<std::int32_t> axes; + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + if(boolDist(gen)) { + axes.push_back(i); + } + else { + expectedOutDims.push_back(dims[i]); + } + } + if (axes.empty() || expectedOutDims.empty()) { // Default behaviour if no axes are provided is to reduce all dimensions + expectedOutDims = std::vector<DimSize_t>{1}; + } + + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + myInput->setBackend("cpu"); + myInput->setDataType(DataType::Float32); + std::shared_ptr<Node> myReduceSum = ReduceSum(axes, false); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == expectedOutDims); + } + } + SECTION("NoopWithEmptyAxes") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + } + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + myInput->setBackend("cpu"); + myInput->setDataType(DataType::Float32); + std::shared_ptr<Node> myReduceSum = ReduceSum(std::vector<int32_t>{}, false, true); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == dims); + } + } + SECTION("Not NoopWithEmptyAxes") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + } + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + myInput->setBackend("cpu"); + myInput->setDataType(DataType::Float32); + std::shared_ptr<Node> myReduceSum = ReduceSum({}, false, false); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + op->forwardDims(); + + REQUIRE(op->getOutput(0)->nbDims() == 1); + REQUIRE(op->getOutput(0)->size() == 1); + } + } + } + SECTION("KeepDims") { + SECTION("test 1") { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> { + { + { + { 5.0, 1.0 }, + { 20.0, 2.0 } + }, + { + { 30.0, 1.0 }, + { 40.0, 2.0 } + }, + { + { 55.0, 1.0 }, + { 60.0, 2.0 } + } + } + }); + Tensor myOutput = Tensor(Array3D<float,3,1,2> { + { + + {{ 25.0, 3.0 }}, + {{ 70.0, 3.0 }}, + {{ 115.0, 3.0 }} + } + }); + + std::shared_ptr<Node> myReduceSum = ReduceSum({1}, 1); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myReduceSum->forward(); + op->getOutput(0)->print(); + + REQUIRE(*(op->getOutput(0)) == myOutput); + } + SECTION("test 2") { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,3,2> { + { + { + { 0.0, 0.0 }, + { 1.0, 1.0 }, + { 2.0, 2.0 } + }, + { + { 3.0, 3.0 }, + { 4.0, 4.0 }, + { 5.0, 5.0 } + }, + { + { 6.0, 6.0 }, + { 7.0, 7.0 }, + { 8.0, 8.0 } + } + } + }); + Tensor myOutput = Tensor(Array3D<float,3,1,1> { + { + + {{ 6.0 }}, + {{ 24.0 }}, + {{ 42.0 }} + } + }); + + std::shared_ptr<Node> myReduceSum = ReduceSum({1, 2}, 1); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myReduceSum->forward(); + myOutput.print(); + op->getOutput(0)->print(); + REQUIRE(*(op->getOutput(0)) == myOutput); + } + } + SECTION("not_KeepDims") { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> { + { + { + { 5.0, 1.0 }, + { 20.0, 2.0 } + }, + { + { 30.0, 1.0 }, + { 40.0, 2.0 } + }, + { + { 55.0, 1.0 }, + { 60.0, 2.0 } + } + } + }); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<float,3,2> { + { + { 25.0, 3.0 }, + { 70.0, 3.0 }, + { 115.0, 3.0 } + } + }); + + std::shared_ptr<Node> myReduceSum = ReduceSum({1}, 0); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myReduceSum->forward(); + op->getOutput(0)->print(); + + REQUIRE(*(op->getOutput(0)) == *myOutput); + + } + SECTION("all_axes") { + SECTION("1") { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> { + { + { + { 5.0, 1.0 }, + { 20.0, 2.0 } + }, + { + { 30.0, 1.0 }, + { 40.0, 2.0 } + }, + { + { 55.0, 1.0 }, + { 60.0, 2.0 } + } + } + }); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> { + {219.0} + }); + + std::shared_ptr<Node> myReduceSum = ReduceSum({}, 0); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myReduceSum->forward(); + op->getOutput(0)->print(); + + REQUIRE(*(op->getOutput(0)) == *myOutput); + } + SECTION("2") { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array2D<float,5,4> { + {{ 0.004232f, 0.105120f, 0.045124f, 0.009205f}, + { 0.000766f, 0.272162f, 0.503560f, 0.044163f}, + { 0.049755f, 0.000305f, 0.143634f, 0.013253f}, + { 0.096258f, 0.311231f, 0.358143f, 0.000452f}, + { 0.468617f, 0.015693f, 0.145316f, 0.000105f}} + }); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> { + {2.587094f} + }); + + std::shared_ptr<Node> myReduceSum = ReduceSum({0, 1}, 0); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myReduceSum->forward(); + op->getOutput(0)->print(); + REQUIRE(approxEq<float>(*(op->getOutput(0)), *myOutput)); + } + SECTION("noop_with_empty_axes") { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> { + { + { + { 5.0, 1.0 }, + { 20.0, 2.0 } + }, + { + { 30.0, 1.0 }, + { 40.0, 2.0 } + }, + { + { 55.0, 1.0 }, + { 60.0, 2.0 } + } + } + }); + + std::shared_ptr<Node> myReduceSum = ReduceSum({}, 0, 1); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myReduceSum->forward(); + op->getOutput(0)->print(); + + REQUIRE(*(op->getOutput(0)) == *myInput); + } + } +} \ No newline at end of file