diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index 694275067b8b9708bab868da83688716f34e4fae..0faca9651d8fa7688693df26aef3d815ab47b0ac 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -40,6 +40,7 @@ #include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp" #include "aidge/backend/cpu/operator/ReduceSumImpl.hpp" #include "aidge/backend/cpu/operator/ReLUImpl.hpp" +#include "aidge/backend/cpu/operator/RoundImpl.hpp" #include "aidge/backend/cpu/operator/ScalingImpl.hpp" #include "aidge/backend/cpu/operator/SigmoidImpl.hpp" #include "aidge/backend/cpu/operator/SqrtImpl.hpp" diff --git a/include/aidge/backend/cpu/operator/RoundImpl.hpp b/include/aidge/backend/cpu/operator/RoundImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c595e251cc18348b6f732f1c36a05de54f647204 --- /dev/null +++ b/include/aidge/backend/cpu/operator/RoundImpl.hpp @@ -0,0 +1,34 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_ROUNDIMPL_H_ +#define AIDGE_CPU_OPERATOR_ROUNDIMPL_H_ + +#include <cstddef> // std::size_t +#include <memory> +#include <tuple> +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/Round.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// Operator implementation entry point for the backend +using RoundImpl_cpu = OperatorImpl_cpu<Round_Op, + void(const std::size_t, const void*, void*)>; + +// Implementation entry point registration to Operator +REGISTRAR(Round_Op, "cpu", Aidge::RoundImpl_cpu::create); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_ROUNDIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/RoundImpl_kernels.hpp b/include/aidge/backend/cpu/operator/RoundImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ba9c63bc3618ba81e238d7721147c894b54cf832 --- /dev/null +++ b/include/aidge/backend/cpu/operator/RoundImpl_kernels.hpp @@ -0,0 +1,46 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_ROUNDIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_ROUNDIMPL_KERNELS_H_ + +#include <cmath> //std::round +#include <cstddef> // std::size_t + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/RoundImpl.hpp" + +namespace Aidge { +template <class I, class O> +void RoundImpl_cpu_forward_kernel(const std::size_t inputLenght, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + for (std::size_t i = 0; i < inputLenght; ++i) { + //std::round would not work since it doesn't follow the halves rules (See ONNX Round) + output[i] = static_cast<O>(std::nearbyint(static_cast<float>(input[i]))); + } +} + + +REGISTRAR(RoundImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::RoundImpl_cpu_forward_kernel<float, float>,nullptr}); +REGISTRAR(RoundImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::RoundImpl_cpu_forward_kernel<double, double>,nullptr}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_ROUNDIMPL_KERNELS_H_ */ diff --git a/src/operator/RoundImpl.cpp b/src/operator/RoundImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6f19f064cabfaa6bde7b434b0defe53f5c1b78cf --- /dev/null +++ b/src/operator/RoundImpl.cpp @@ -0,0 +1,40 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <memory> +#include <vector> + +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/Round.hpp" +#include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/operator/RoundImpl.hpp" +#include "aidge/backend/cpu/operator/RoundImpl_kernels.hpp" + +template <> +void Aidge::RoundImpl_cpu::forward() { + std::shared_ptr<Tensor> in0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0)); + std::shared_ptr<Tensor> out0 = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)); + AIDGE_ASSERT(in0, "missing input #0"); + + // Find the correct kernel type + const auto impl = Registrar<RoundImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.forward(in0->size(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); +} +template <> +void Aidge::RoundImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Round_Op on backend cpu"); +} \ No newline at end of file diff --git a/unit_tests/operator/Test_RoundImpl.cpp b/unit_tests/operator/Test_RoundImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f6f3c2ac0cf1a1850315902ecd8c1a5b6e84e023 --- /dev/null +++ b/unit_tests/operator/Test_RoundImpl.cpp @@ -0,0 +1,115 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t +#include <chrono> +#include <iostream> +#include <memory> +#include <numeric> +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <iomanip> +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/Round.hpp" +#include "aidge/utils/TensorUtils.hpp" + +namespace Aidge { + +TEST_CASE("[cpu/operator] Round_Test", "[Round][CPU]") { + constexpr std::uint16_t NBTRIALS = 15; + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> valueDist(-15, 15); + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(5)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(3)); + + // Create BitShift Operator + std::shared_ptr<Node> myRound = Round(); + auto op = std::static_pointer_cast<OperatorTensor>(myRound-> getOperator()); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + // Create 2 input Tensors + std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>(); + op->associateInput(0,T0); + T0->setDataType(DataType::Float32); + T0->setBackend("cpu"); + // Create results Tensor + std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(); + Tres->setDataType(DataType::Float32); + Tres->setBackend("cpu"); + + // To measure execution time of 'BitShift_Op::forward()' member function call + std::chrono::time_point<std::chrono::system_clock> start; + std::chrono::time_point<std::chrono::system_clock> end; + std::chrono::duration<double, std::micro> duration{}; + + SECTION("Round [Forward]") { + SECTION("Test Forward Kernel") { + std::size_t number_of_operation = 0; + + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + + // generate 2 random Tensors + const std::size_t nbDims = nbDimsDist(gen); + std::vector<std::size_t> dims; + for (std::size_t i = 0; i < nbDims; ++i) { + dims.push_back(dimSizeDist(gen)); + } + const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + number_of_operation += nb_elements; + + // without broadcasting + float* array0 = new float[nb_elements]; + float* result = new float[nb_elements]; + + for (std::size_t i = 0; i < nb_elements; ++i) { + array0[i] = valueDist(gen); + result[i] = std::nearbyint(array0[i]); + + } + + // input0 + T0->resize(dims); + T0 -> getImpl() -> setRawPtr(array0, nb_elements); + + // results + Tres->resize(dims); + Tres -> getImpl() -> setRawPtr(result, nb_elements); + + op->forwardDims(); + start = std::chrono::system_clock::now(); + myRound->forward(); + end = std::chrono::system_clock::now(); + duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + + bool is_eq = approxEq<float>(*(op->getOutput(0)), *Tres); + + auto Output = *(op->getOutput(0)); + + auto prt = Output.getImpl()->rawPtr(); + + REQUIRE(is_eq); + + + delete[] array0; + delete[] result; + + + } + std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; + std::cout << "total time: " << duration.count() << "μs" << std::endl; + } + } +} // namespace Aidge +} \ No newline at end of file