Skip to content
Snippets Groups Projects
Commit 45bedc95 authored by Houssem ROUIS's avatar Houssem ROUIS Committed by Maxence Naud
Browse files

add Equal operator

parent a30d9359
No related branches found
No related tags found
No related merge requests found
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "aidge/backend/cpu/operator/ConvImpl.hpp" #include "aidge/backend/cpu/operator/ConvImpl.hpp"
#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp" #include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp"
#include "aidge/backend/cpu/operator/DivImpl.hpp" #include "aidge/backend/cpu/operator/DivImpl.hpp"
#include "aidge/backend/cpu/operator/EqualImpl.hpp"
#include "aidge/backend/cpu/operator/ErfImpl.hpp" #include "aidge/backend/cpu/operator/ErfImpl.hpp"
#include "aidge/backend/cpu/operator/ExpandImpl.hpp" #include "aidge/backend/cpu/operator/ExpandImpl.hpp"
#include "aidge/backend/cpu/operator/FCImpl.hpp" #include "aidge/backend/cpu/operator/FCImpl.hpp"
......
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_EQUALIMPL_H_
#define AIDGE_CPU_OPERATOR_EQUALIMPL_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Equal.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
namespace Aidge {
// Operator implementation entry point for the backend
using EqualImpl_cpu = OperatorImpl_cpu<Equal_Op,
void(std::vector<std::size_t>, std::vector<std::size_t>, const std::vector<std::size_t>&, const void*, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Equal_Op, "cpu", Aidge::EqualImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_EQUALIMPL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_EQUALIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_EQUALIMPL_KERNELS_H_
#include "aidge/backend/cpu/operator/EqualImpl.hpp"
#include "aidge/utils/Registrar.hpp"
namespace Aidge {
namespace {
// suppose values are contiguous in memory
template <class I, class O>
void equal_contiguous_arrays(const std::size_t input1size,
const std::size_t input2size,
const std::size_t output1size,
const I* input1,
const I* input2,
O* output)
{
for (std::size_t i = 0; i < output1size; ++i)
{
const std::size_t in1_id = (input1size != 1) ? i : 0;
const std::size_t in2_id = (input2size != 1) ? i : 0;
output[i] = static_cast<O>(input1[in1_id] == input2[in2_id]);
}
}
}
template <class I, class O>
void EqualImpl_cpu_forward_kernel(std::vector<std::size_t> dims0,
std::vector<std::size_t> dims1,
const std::vector<std::size_t>& outputDims,
const void* input0_,
const void* input1_,
void* output_) {
const I* input_0 = static_cast<const I*>(input0_);
const I* input_1 = static_cast<const I*>(input1_);
O* output = static_cast<O*>(output_);
// [5,2,1,7] & [2,6,7]
// 1. Same number of dimensions -> [5,2,1,7] & [1,2,6,7]
// 2. Find the highest equal dimension -> 3
// Exception: if the first diverging dimension is the last one, then -> 4 (dims.size())
// 3. Compute the highest number of contiguous data -> 7
// 4. Compute stride and offset step for the broadcast mechanism
// 5. Call a simple kernel
// special case for equal dimensions, the kernel is called with the entire arrays at once
if (dims0 == dims1) {
const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t i = 0; i < input0_contiguous_size; ++i)
{
output[i] = static_cast<O>(input_0[i] == input_1[i]);
}
return;
}
// set dimensions to be of equal size by filling the smallest one with ones.
if (dims0.size() > dims1.size()) {
dims1.insert(dims1.cbegin(), dims0.size() - dims1.size(), std::size_t(1));
}
else if (dims1.size() > dims0.size()) {
dims0.insert(dims0.cbegin(), dims1.size() - dims0.size(), std::size_t(1));
}
const std::size_t nbDims = dims0.size();
// Find the highest equal dimension
// std::size_t contiguousIdx = nbDims - 1;
std::size_t contiguousIdx = nbDims;
while (contiguousIdx-- > 0) {
// for (; contiguousIdx+1 > 0; --contiguousIdx) {
if (dims0[contiguousIdx] != dims1[contiguousIdx]) {
if (contiguousIdx == (nbDims -1)) { // last dimensions of one of the input Tensor are of size 1
const std::vector<std::size_t>& dims = (dims0[contiguousIdx] == 1) ? dims0 : dims1;
while ((contiguousIdx+1 > 0) && (dims[contiguousIdx] == 1)) {
--contiguousIdx;
}
}
break;
}
}
++contiguousIdx;
// Compute the highest number of contiguous data for each Tensor
const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin()+contiguousIdx, dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t input1_contiguous_size = std::accumulate(dims1.cbegin()+contiguousIdx, dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t output_contiguous_size = std::accumulate(outputDims.cbegin()+contiguousIdx, outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
// initialize strides to iterate through data because of broadcasting
std::unique_ptr<std::int32_t[]> stride_post0 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_post1 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_step0 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_step1 = std::make_unique<std::int32_t[]>(contiguousIdx);
if (contiguousIdx > 0) {
stride_post0[contiguousIdx - 1] = 1;
stride_post1[contiguousIdx - 1] = 1;
for (std::size_t i = contiguousIdx - 2; i != static_cast<std::size_t>(-1); --i) {
stride_post0[i] = stride_post0[i+1]*static_cast<std::int32_t>(dims0[i+1]);
stride_post1[i] = stride_post1[i+1]*static_cast<std::int32_t>(dims1[i+1]);
}
for (std::size_t i = 0; i != contiguousIdx; ++i) {
stride_step0[i] = (dims0[i] == 1) ? 1 - stride_post0[i] : 1;
stride_step1[i] = (dims1[i] == 1) ? 1 - stride_post1[i] : 1;
}
}
// variables for arrays offsets
std::size_t offsetIn0 = 0;
std::size_t offsetIn1 = 0;
std::size_t offsetOut = 0;
std::size_t dim = contiguousIdx - 1;
const std::size_t nbStacks = std::accumulate(outputDims.cbegin(), outputDims.cbegin() + contiguousIdx, std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t stack = 0; stack < nbStacks;) {
equal_contiguous_arrays<I,O>(input0_contiguous_size, input1_contiguous_size, output_contiguous_size,
input_0 + offsetIn0*input0_contiguous_size,
input_1 + offsetIn1*input1_contiguous_size,
output + offsetOut*output_contiguous_size);
if (++stack < nbStacks) {
std::size_t tmp_stack = stack;
while(tmp_stack % outputDims[dim] == 0) {
tmp_stack /= outputDims[dim];
dim--;
}
offsetIn0 += stride_step0[dim];
offsetIn1 += stride_step1[dim];
++offsetOut;
dim = contiguousIdx - 1;
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(EqualImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}},
{ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(EqualImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}},
{ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(EqualImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}},
{ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr});
REGISTRAR(EqualImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int64}},
{ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_EQUALIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <vector>
#include "aidge/operator/Equal.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/Broadcasting.hpp"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/EqualImpl.hpp"
#include "aidge/backend/cpu/operator/EqualImpl_kernels.hpp"
template <>
void Aidge::EqualImpl_cpu::forward() {
const Equal_Op& op = static_cast<const Equal_Op&>(mOp);
// Check inputs
AIDGE_ASSERT(op.getInput(0), "missing input in Equal operator");
AIDGE_ASSERT(op.getInput(0)->hasImpl(), "cannot run Equal forward because the 0-th input has no implementation.");
AIDGE_ASSERT(op.getInput(1), "missing input in Equal operator");
AIDGE_ASSERT(op.getInput(1)->hasImpl(), "cannot run Equal forward because the 1st input has no implementation.");
AIDGE_ASSERT(op.getInput(1)->dataType() == op.getInput(0)->dataType(), "Cannot Equal inputs with two differents data type.");
// Find the correct kernel type
const auto impl = Registrar<EqualImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Convert input data (no overhead if not needed!)
// TODO: right now, if needed, memory will be allocated/deallocated at each
// call to forward(). We might put the following shared_ptr as members of
// this class to avoid that.
std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback;
const auto& input0 = op.getInput(0)->refCastFrom(input0Fallback, *op.getInput(0));
const auto& input1 = op.getInput(1)->refCastFrom(input1Fallback, *op.getInput(1));
impl.forward(op.getInput(0)->dims(),
op.getInput(1)->dims(),
op.getOutput(0)->dims(),
input0.getImpl()->rawPtr(),
input1.getImpl()->rawPtr(),
getCPUPtr(op.getRawOutput(0)));
}
template <>
void Aidge::EqualImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Equal_Op on backend cpu");
}
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Equal.hpp"
#include "aidge/backend/cpu.hpp"
using namespace Aidge;
TEST_CASE("[cpu/operator] Equal(forward)", "[Equal][CPU]") {
SECTION("ForwardDims")
{
constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
std::uniform_int_distribution<int> boolDist(0,1);
SECTION("Same dimensions") {
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
DimSize_t nbDims = nbDimsDist(gen);
std::vector<DimSize_t> dims(nbDims);
for (std::size_t i = 0; i < nbDims; i++) {
dims[i] = dimSizeDist(gen);
}
std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims);
myInput1->setBackend("cpu");
myInput1->setDataType(DataType::Float32);
myInput1->zeros();
std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims);
myInput2->setBackend("cpu");
myInput2->setDataType(DataType::Float32);
myInput2->zeros();
std::shared_ptr<Node> myEqual = Equal();
auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator());
op->associateInput(0,myInput1);
op->associateInput(1,myInput2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forwardDims();
const auto outputDims = op->getOutput(0)->dims();
REQUIRE(outputDims == dims);
}
}
SECTION("Broadcasting") {
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
DimSize_t nbDims = nbDimsDist(gen);
std::vector<DimSize_t> dims1(nbDims, 1);
std::vector<DimSize_t> dims2(nbDims, 1);
std::vector<DimSize_t> expectedOutDims;
for (std::size_t i = 0; i < nbDims; i++) {
DimSize_t dim = dimSizeDist(gen);
if (boolDist(gen)) {
dims1[i] = dim;
}
if (boolDist(gen)) {
dims2[i] = dim;
}
expectedOutDims.push_back(std::max(dims1[i],dims2[i]));
}
std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims1);
myInput1->setBackend("cpu");
myInput1->setDataType(DataType::Float32);
myInput1->zeros();
std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims2);
myInput2->setBackend("cpu");
myInput2->setDataType(DataType::Float32);
myInput2->zeros();
std::shared_ptr<Node> myEqual = Equal();
auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator());
op->associateInput(0,myInput1);
op->associateInput(1,myInput2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forwardDims();
const auto outputDims = op->getOutput(0)->dims();
REQUIRE(outputDims == expectedOutDims);
}
}
}
SECTION("Same size inputs") {
std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
{ //
{ //
{{20, 15},{31, 11},{22, 49}}, //
{{41, 10},{24, 51},{27, 52}}, //
{{26, 53},{27, 54},{28, 55}} //
}, //
{ //
{{29, 56},{30, 57},{31, 58}}, //
{{32, 59},{33, 60},{34, 61}}, //
{{35, 62},{36, 63},{37, 64}} //
}, //
{ //
{{38, 65},{39, 66},{40, 67}}, //
{{41, 68},{42, 69},{43, 70}}, //
{{44, 71},{45, 72},{46, 73}} //
} //
} //
}); //
std::shared_ptr<Tensor> input2 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
{ //
{ //
{{20, 47},{21, 48},{22, 49}}, //
{{23, 50},{24, 51},{25, 52}}, //
{{17, 53},{27, 26},{14, 33}} //
}, //
{ //
{{29, 56},{30, 57},{31, 58}}, //
{{72, 44},{33, 20},{27, 55}}, //
{{35, 24},{25, 63},{28, 64}} //
}, //
{ //
{{32, 65},{39, 66},{40, 70}}, //
{{41, 53},{42, 60},{34, 70}}, //
{{44, 71},{30, 12},{46, 73}} //
} //
} //
}); //
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
{
{
{{1, 0},{0, 0},{1, 1}},
{{0, 0},{1, 1},{0, 1}},
{{0, 1},{1, 0},{0, 0}}
},
{
{{1, 1},{1, 1},{1, 1}},
{{0, 0},{1, 0},{0, 0}},
{{1, 0},{0, 1},{0, 1}}
},
{
{{0, 1},{1, 1},{1, 0}},
{{1, 0},{1, 0},{0, 1}},
{{1, 1},{0, 0},{1, 1}}
}
}
});
std::shared_ptr<Node> myEqual = Equal();
auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator());
op->associateInput(0, input1);
op->associateInput(1, input2);
op->setBackend("cpu");
op->setDataType(DataType::Int32);
myEqual->forward();
REQUIRE(*(op->getOutput(0)) == *expectedOutput);
}
SECTION("Broadcasting") {
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<int,1,3,3,2> {
{ //
{ //
{{10, 20},{22, 23},{20, 20}}, //
{{10, 15},{10, 29},{20, 20}}, //
{{26, 25},{33, 20},{10, 20}} //
} //
} //
}); //
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{10, 20}});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,1,3,3,2> {
{ //
{ //
{{ 1, 1},{ 0, 0},{ 0, 1}}, //
{{ 1, 0},{ 1, 0},{ 0, 1}}, //
{{ 0, 0},{ 0, 1},{ 1, 1}} //
} //
} //
}); //
std::shared_ptr<Node> myEqual = Equal();
auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator());
op->associateInput(0, input_1);
op->associateInput(1, input_2);
op->setDataType(DataType::Int32);
op->setBackend("cpu");
myEqual->forward();
op->getOutput(0)->print();
expectedOutput->print();
REQUIRE(*op->getOutput(0) == *expectedOutput);
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment