diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index 5db19a2b7a2f88dae13d8baf24cf95f961e730a0..ffc03ae5d6cb1d44637bc223ce4099af88f08070 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -29,6 +29,7 @@ #include "aidge/backend/cpu/operator/ConvImpl.hpp" #include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp" #include "aidge/backend/cpu/operator/DivImpl.hpp" +#include "aidge/backend/cpu/operator/EqualImpl.hpp" #include "aidge/backend/cpu/operator/ErfImpl.hpp" #include "aidge/backend/cpu/operator/ExpandImpl.hpp" #include "aidge/backend/cpu/operator/FCImpl.hpp" diff --git a/include/aidge/backend/cpu/operator/EqualImpl.hpp b/include/aidge/backend/cpu/operator/EqualImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e2489096067a49139f6291898056d525a77db522 --- /dev/null +++ b/include/aidge/backend/cpu/operator/EqualImpl.hpp @@ -0,0 +1,32 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_EQUALIMPL_H_ +#define AIDGE_CPU_OPERATOR_EQUALIMPL_H_ + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/Equal.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include <memory> +#include <vector> + +namespace Aidge { +// Operator implementation entry point for the backend +using EqualImpl_cpu = OperatorImpl_cpu<Equal_Op, + void(std::vector<std::size_t>, std::vector<std::size_t>, const std::vector<std::size_t>&, const void*, const void*, void*)>; + +// Implementation entry point registration to Operator +REGISTRAR(Equal_Op, "cpu", Aidge::EqualImpl_cpu::create); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_EQUALIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/EqualImpl_kernels.hpp b/include/aidge/backend/cpu/operator/EqualImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3c8ff0f4742e0393efd8cbbf637822c443edffb3 --- /dev/null +++ b/include/aidge/backend/cpu/operator/EqualImpl_kernels.hpp @@ -0,0 +1,163 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_EQUALIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_EQUALIMPL_KERNELS_H_ + +#include "aidge/backend/cpu/operator/EqualImpl.hpp" +#include "aidge/utils/Registrar.hpp" + +namespace Aidge { + +namespace { +// suppose values are contiguous in memory +template <class I, class O> +void equal_contiguous_arrays(const std::size_t input1size, + const std::size_t input2size, + const std::size_t output1size, + const I* input1, + const I* input2, + O* output) +{ + for (std::size_t i = 0; i < output1size; ++i) + { + const std::size_t in1_id = (input1size != 1) ? i : 0; + const std::size_t in2_id = (input2size != 1) ? i : 0; + output[i] = static_cast<O>(input1[in1_id] == input2[in2_id]); + } +} +} + + +template <class I, class O> +void EqualImpl_cpu_forward_kernel(std::vector<std::size_t> dims0, + std::vector<std::size_t> dims1, + const std::vector<std::size_t>& outputDims, + const void* input0_, + const void* input1_, + void* output_) { + + const I* input_0 = static_cast<const I*>(input0_); + const I* input_1 = static_cast<const I*>(input1_); + O* output = static_cast<O*>(output_); + + // [5,2,1,7] & [2,6,7] + // 1. Same number of dimensions -> [5,2,1,7] & [1,2,6,7] + // 2. Find the highest equal dimension -> 3 + // Exception: if the first diverging dimension is the last one, then -> 4 (dims.size()) + // 3. Compute the highest number of contiguous data -> 7 + // 4. Compute stride and offset step for the broadcast mechanism + // 5. Call a simple kernel + + // special case for equal dimensions, the kernel is called with the entire arrays at once + if (dims0 == dims1) { + const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>()); + for (std::size_t i = 0; i < input0_contiguous_size; ++i) + { + output[i] = static_cast<O>(input_0[i] == input_1[i]); + } + return; + } + + // set dimensions to be of equal size by filling the smallest one with ones. + if (dims0.size() > dims1.size()) { + dims1.insert(dims1.cbegin(), dims0.size() - dims1.size(), std::size_t(1)); + } + else if (dims1.size() > dims0.size()) { + dims0.insert(dims0.cbegin(), dims1.size() - dims0.size(), std::size_t(1)); + } + + const std::size_t nbDims = dims0.size(); + + // Find the highest equal dimension + // std::size_t contiguousIdx = nbDims - 1; + std::size_t contiguousIdx = nbDims; + while (contiguousIdx-- > 0) { + // for (; contiguousIdx+1 > 0; --contiguousIdx) { + if (dims0[contiguousIdx] != dims1[contiguousIdx]) { + if (contiguousIdx == (nbDims -1)) { // last dimensions of one of the input Tensor are of size 1 + const std::vector<std::size_t>& dims = (dims0[contiguousIdx] == 1) ? dims0 : dims1; + while ((contiguousIdx+1 > 0) && (dims[contiguousIdx] == 1)) { + --contiguousIdx; + } + } + break; + } + } + ++contiguousIdx; + + // Compute the highest number of contiguous data for each Tensor + const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin()+contiguousIdx, dims0.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t input1_contiguous_size = std::accumulate(dims1.cbegin()+contiguousIdx, dims1.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t output_contiguous_size = std::accumulate(outputDims.cbegin()+contiguousIdx, outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + + // initialize strides to iterate through data because of broadcasting + std::unique_ptr<std::int32_t[]> stride_post0 = std::make_unique<std::int32_t[]>(contiguousIdx); + std::unique_ptr<std::int32_t[]> stride_post1 = std::make_unique<std::int32_t[]>(contiguousIdx); + std::unique_ptr<std::int32_t[]> stride_step0 = std::make_unique<std::int32_t[]>(contiguousIdx); + std::unique_ptr<std::int32_t[]> stride_step1 = std::make_unique<std::int32_t[]>(contiguousIdx); + if (contiguousIdx > 0) { + stride_post0[contiguousIdx - 1] = 1; + stride_post1[contiguousIdx - 1] = 1; + for (std::size_t i = contiguousIdx - 2; i != static_cast<std::size_t>(-1); --i) { + stride_post0[i] = stride_post0[i+1]*static_cast<std::int32_t>(dims0[i+1]); + stride_post1[i] = stride_post1[i+1]*static_cast<std::int32_t>(dims1[i+1]); + } + for (std::size_t i = 0; i != contiguousIdx; ++i) { + stride_step0[i] = (dims0[i] == 1) ? 1 - stride_post0[i] : 1; + stride_step1[i] = (dims1[i] == 1) ? 1 - stride_post1[i] : 1; + } + } + + // variables for arrays offsets + std::size_t offsetIn0 = 0; + std::size_t offsetIn1 = 0; + std::size_t offsetOut = 0; + + + std::size_t dim = contiguousIdx - 1; + const std::size_t nbStacks = std::accumulate(outputDims.cbegin(), outputDims.cbegin() + contiguousIdx, std::size_t(1), std::multiplies<std::size_t>()); + for (std::size_t stack = 0; stack < nbStacks;) { + equal_contiguous_arrays<I,O>(input0_contiguous_size, input1_contiguous_size, output_contiguous_size, + input_0 + offsetIn0*input0_contiguous_size, + input_1 + offsetIn1*input1_contiguous_size, + output + offsetOut*output_contiguous_size); + if (++stack < nbStacks) { + std::size_t tmp_stack = stack; + while(tmp_stack % outputDims[dim] == 0) { + tmp_stack /= outputDims[dim]; + dim--; + } + offsetIn0 += stride_step0[dim]; + offsetIn1 += stride_step1[dim]; + ++offsetOut; + dim = contiguousIdx - 1; + } + } +} + +// Kernels registration to implementation entry point +REGISTRAR(EqualImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}}, + {ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<float, float>, nullptr}); +REGISTRAR(EqualImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}}, + {ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<double, double>, nullptr}); +REGISTRAR(EqualImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}}, + {ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr}); +REGISTRAR(EqualImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int64}}, + {ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, nullptr}); + +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_EQUALIMPL_KERNELS_H_ */ diff --git a/src/operator/EqualImpl.cpp b/src/operator/EqualImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5926212e8453a32e54de7691343d44f9c6849a05 --- /dev/null +++ b/src/operator/EqualImpl.cpp @@ -0,0 +1,61 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <chrono> // std::chrono::milliseconds +#include <numeric> // std::accumulate +#include <thread> // std::this_thread::sleep_for +#include <vector> + +#include "aidge/operator/Equal.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/Broadcasting.hpp" +#include "aidge/backend/cpu/data/GetCPUPtr.h" + +#include "aidge/backend/cpu/operator/EqualImpl.hpp" +#include "aidge/backend/cpu/operator/EqualImpl_kernels.hpp" + +template <> +void Aidge::EqualImpl_cpu::forward() { + const Equal_Op& op = static_cast<const Equal_Op&>(mOp); + // Check inputs + AIDGE_ASSERT(op.getInput(0), "missing input in Equal operator"); + AIDGE_ASSERT(op.getInput(0)->hasImpl(), "cannot run Equal forward because the 0-th input has no implementation."); + + AIDGE_ASSERT(op.getInput(1), "missing input in Equal operator"); + AIDGE_ASSERT(op.getInput(1)->hasImpl(), "cannot run Equal forward because the 1st input has no implementation."); + + AIDGE_ASSERT(op.getInput(1)->dataType() == op.getInput(0)->dataType(), "Cannot Equal inputs with two differents data type."); + + // Find the correct kernel type + const auto impl = Registrar<EqualImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. + std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; + const auto& input0 = op.getInput(0)->refCastFrom(input0Fallback, *op.getInput(0)); + const auto& input1 = op.getInput(1)->refCastFrom(input1Fallback, *op.getInput(1)); + + + impl.forward(op.getInput(0)->dims(), + op.getInput(1)->dims(), + op.getOutput(0)->dims(), + input0.getImpl()->rawPtr(), + input1.getImpl()->rawPtr(), + getCPUPtr(op.getRawOutput(0))); +} + +template <> +void Aidge::EqualImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Equal_Op on backend cpu"); +} diff --git a/unit_tests/operator/Test_EqualImpl.cpp b/unit_tests/operator/Test_EqualImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a229b8ce3ebcd7672323f2585e3a48343f544c3d --- /dev/null +++ b/unit_tests/operator/Test_EqualImpl.cpp @@ -0,0 +1,205 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/Equal.hpp" + +#include "aidge/backend/cpu.hpp" + +using namespace Aidge; + +TEST_CASE("[cpu/operator] Equal(forward)", "[Equal][CPU]") { + SECTION("ForwardDims") + { + constexpr std::uint16_t NBTRIALS = 10; + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); + std::uniform_int_distribution<int> boolDist(0,1); + + SECTION("Same dimensions") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + } + + std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims); + myInput1->setBackend("cpu"); + myInput1->setDataType(DataType::Float32); + myInput1->zeros(); + std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims); + myInput2->setBackend("cpu"); + myInput2->setDataType(DataType::Float32); + myInput2->zeros(); + std::shared_ptr<Node> myEqual = Equal(); + auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator()); + op->associateInput(0,myInput1); + op->associateInput(1,myInput2); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == dims); + } + } + SECTION("Broadcasting") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims1(nbDims, 1); + std::vector<DimSize_t> dims2(nbDims, 1); + std::vector<DimSize_t> expectedOutDims; + for (std::size_t i = 0; i < nbDims; i++) { + DimSize_t dim = dimSizeDist(gen); + if (boolDist(gen)) { + dims1[i] = dim; + } + if (boolDist(gen)) { + dims2[i] = dim; + } + expectedOutDims.push_back(std::max(dims1[i],dims2[i])); + } + + + std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims1); + myInput1->setBackend("cpu"); + myInput1->setDataType(DataType::Float32); + myInput1->zeros(); + std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims2); + myInput2->setBackend("cpu"); + myInput2->setDataType(DataType::Float32); + myInput2->zeros(); + std::shared_ptr<Node> myEqual = Equal(); + auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator()); + op->associateInput(0,myInput1); + op->associateInput(1,myInput2); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == expectedOutDims); + } + } + } + SECTION("Same size inputs") { + std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + { // + { // + {{20, 15},{31, 11},{22, 49}}, // + {{41, 10},{24, 51},{27, 52}}, // + {{26, 53},{27, 54},{28, 55}} // + }, // + { // + {{29, 56},{30, 57},{31, 58}}, // + {{32, 59},{33, 60},{34, 61}}, // + {{35, 62},{36, 63},{37, 64}} // + }, // + { // + {{38, 65},{39, 66},{40, 67}}, // + {{41, 68},{42, 69},{43, 70}}, // + {{44, 71},{45, 72},{46, 73}} // + } // + } // + }); // + std::shared_ptr<Tensor> input2 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + { // + { // + {{20, 47},{21, 48},{22, 49}}, // + {{23, 50},{24, 51},{25, 52}}, // + {{17, 53},{27, 26},{14, 33}} // + }, // + { // + {{29, 56},{30, 57},{31, 58}}, // + {{72, 44},{33, 20},{27, 55}}, // + {{35, 24},{25, 63},{28, 64}} // + }, // + { // + {{32, 65},{39, 66},{40, 70}}, // + {{41, 53},{42, 60},{34, 70}}, // + {{44, 71},{30, 12},{46, 73}} // + } // + } // + }); // + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + { + { + {{1, 0},{0, 0},{1, 1}}, + {{0, 0},{1, 1},{0, 1}}, + {{0, 1},{1, 0},{0, 0}} + }, + { + {{1, 1},{1, 1},{1, 1}}, + {{0, 0},{1, 0},{0, 0}}, + {{1, 0},{0, 1},{0, 1}} + }, + { + {{0, 1},{1, 1},{1, 0}}, + {{1, 0},{1, 0},{0, 1}}, + {{1, 1},{0, 0},{1, 1}} + } + } + }); + + std::shared_ptr<Node> myEqual = Equal(); + auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator()); + op->associateInput(0, input1); + op->associateInput(1, input2); + op->setBackend("cpu"); + op->setDataType(DataType::Int32); + myEqual->forward(); + + REQUIRE(*(op->getOutput(0)) == *expectedOutput); + } + + SECTION("Broadcasting") { + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { + { // + { // + {{10, 20},{22, 23},{20, 20}}, // + {{10, 15},{10, 29},{20, 20}}, // + {{26, 25},{33, 20},{10, 20}} // + } // + } // + }); // + + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{10, 20}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { + { // + { // + {{ 1, 1},{ 0, 0},{ 0, 1}}, // + {{ 1, 0},{ 1, 0},{ 0, 1}}, // + {{ 0, 0},{ 0, 1},{ 1, 1}} // + } // + } // + }); // + + std::shared_ptr<Node> myEqual = Equal(); + auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator()); + op->associateInput(0, input_1); + op->associateInput(1, input_2); + op->setDataType(DataType::Int32); + op->setBackend("cpu"); + myEqual->forward(); + op->getOutput(0)->print(); + expectedOutput->print(); + REQUIRE(*op->getOutput(0) == *expectedOutput); + } +} \ No newline at end of file