Skip to content
Snippets Groups Projects
Commit 34b83b0d authored by Noam Zerah's avatar Noam Zerah
Browse files

Implementation of clip operator as defined by ONNX standard (Min and Max value...

Implementation of clip operator as defined by ONNX standard (Min and Max value as Tensor of empty dim
parent 2e1f36e8
No related branches found
No related tags found
No related merge requests found
Pipeline #55109 failed
......@@ -28,13 +28,13 @@ namespace Aidge {
// compute kernel registry for forward and backward
class ClipImplForward_cpu
: public Registrable <ClipImplForward_cpu,
std::tuple<DataType, DataType>,
void(const float, const float, const std::size_t, const void*, void*)>{};
std::tuple<DataType, DataType,DataType>,
void(const void*, const void*, const void*,const std::size_t, void*)>{};
class ClipImplBackward_cpu
/*class ClipImplBackward_cpu
: public Registrable <ClipImplBackward_cpu,
std::tuple<DataType, DataType, DataType>,
void(const float, const float, const std::size_t, const void*, const void*, void*)> {};
void(const float, const float, const std::size_t, const void*, const void*, void*)> {};*/
class ClipImpl_cpu : public OperatorImpl {
public:
......@@ -48,7 +48,7 @@ public:
void forward() override final;
void backward() override final;
//void backward() override final;
};
namespace {
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
......@@ -19,31 +20,34 @@
namespace Aidge {
template <class I, class O>
void ClipImpl_cpu_forward_kernel(
const float min_,
const float max_,
const std::size_t length,
const void* min_,
const void* max_,
const void* input_,
const std::size_t length,
void* output_)
{
const I min = static_cast<const I>(min_);
const I max = static_cast<const I>(max_);
const I* min = static_cast<const I*>(min_);
const I* max = static_cast<const I*>(max_);
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
//#pragma omp parallel for if (length > 1024)
for (std::size_t i = 0; i < length; ++i) {
output[i] = std::max(min, std::min(input[i], max));
output[i] = std::max(min[0], std::min(input[i], max[0]));
}
}
namespace {
static Registrar<ClipImplForward_cpu> registrarClipImplForward_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::ClipImpl_cpu_forward_kernel<float, float>);
{DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::ClipImpl_cpu_forward_kernel<float,float>);
static Registrar<ClipImplForward_cpu> registrarClipImplForward_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::ClipImpl_cpu_forward_kernel<int, int>);
{DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::ClipImpl_cpu_forward_kernel<int,int>);
static Registrar<ClipImplForward_cpu> registrarClipImplForward_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::ClipImpl_cpu_forward_kernel<double, double>);
{DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::ClipImpl_cpu_forward_kernel<double, double>);
} // namespace
} // namespace Aidge
......
......@@ -20,7 +20,7 @@
#include "aidge/backend/cpu/operator/ClipImpl.hpp"
#include "aidge/backend/cpu/operator/ClipImpl_forward_kernels.hpp"
#include "aidge/backend/cpu/operator/ClipImpl_backward_kernels.hpp"
//#include "aidge/backend/cpu/operator/ClipImpl_backward_kernels.hpp"
Aidge::Elts_t Aidge::ClipImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
// this implementation can be in-place
......@@ -31,25 +31,29 @@ void Aidge::ClipImpl_cpu::forward() {
const Clip_Op& op_ = dynamic_cast<const Clip_Op&>(mOp);
std::shared_ptr<Tensor> in0 = op_.getInput(0);
std::shared_ptr<Tensor> in1 = op_.getInput(1);
std::shared_ptr<Tensor> in2 = op_.getInput(2);
std::shared_ptr<Tensor> out0 = op_.getOutput(0);
AIDGE_ASSERT(in0, "missing input #0");
// Find the correct kernel type
auto kernelFunc = Registrar<ClipImplForward_cpu>::create({
in0->dataType(),
out0->dataType()});
in1->dataType(),
in2->dataType(),
});
// Call kernel
kernelFunc(
op_.min(),
op_.max(),
in0->size(),
getCPUPtr(mOp.getRawInput(0)),
getCPUPtr(mOp.getRawOutput(0))
getCPUPtr(mOp.getRawInput(1)),
getCPUPtr(mOp.getRawInput(2)),
getCPUPtr(mOp.getRawInput(0)),
in0->size(),
getCPUPtr(mOp.getRawOutput(0))
);
}
void Aidge::ClipImpl_cpu::backward() {
/*void Aidge::ClipImpl_cpu::backward() {
const Clip_Op& op_ = dynamic_cast<const Clip_Op&>(mOp);
std::shared_ptr<Tensor> in0 = op_.getInput(0);
......@@ -74,4 +78,4 @@ void Aidge::ClipImpl_cpu::backward() {
getCPUPtr(gra_out0),
getCPUPtr(gra_in0)
);
}
}*/
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <chrono>
#include <iostream>
#include <vector>
#include <memory>
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Clip.hpp"
#include "aidge/operator/OperatorTensor.hpp"
#include "aidge/utils/TensorUtils.hpp"
#include "aidge/backend/cpu.hpp"
namespace Aidge {
int azertBpoint()
{
return 0;
}
TEST_CASE("[cpu/operator] Clip(forward)", "[Clip][CPU]") {
const std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> dis(0.0, 10.0); // Random float distribution between 0 and 1
std::uniform_real_distribution<float> dismin(0.0, 4.5); // Random float distribution between 0 and 1
std::uniform_real_distribution<float> dismax(5.5, 10.0); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> distDims(5,15);
std::uniform_int_distribution<std::size_t> distNbMatrix(1, 5);
// Create MatMul Operator
std::shared_ptr<Node> myClip = Aidge::Clip("nop");
azertBpoint();
auto op = std::static_pointer_cast<OperatorTensor>(myClip -> getOperator());
// To measure execution time of 'MatMul_Op::forward()' member function call
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration;
SECTION("2-D Tensors") {
std::size_t totalComputation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate Tensors dimensions
const std::size_t dim0 = distDims(gen);
const std::size_t dim1 = distDims(gen);
totalComputation += dim0*dim1;
// Create and populate the array with random float values
float* Array = new float[dim0*dim1];
for (int i = 0; i < dim0*dim1; ++i) {
Array[i] = dis(gen); // Generate random float value
}
// Convert bigArray1 to Tensor
std::shared_ptr<Tensor> TInput = std::make_shared<Tensor>(DataType::Float32);
TInput -> resize({dim0,dim1});
TInput -> setBackend("cpu");
TInput -> getImpl() -> setRawPtr(Array, dim0*dim1);
// Convert bigArray2 to Tensor
float a = dismin(gen);
std::shared_ptr<Tensor> Tmin = std::make_shared<Tensor>(DataType::Float32);
Tmin -> resize({});
Tmin -> setBackend("cpu");
Tmin -> getImpl() -> setRawPtr(&a,1);
float b = dismax(gen);
std::shared_ptr<Tensor> Tmax = std::make_shared<Tensor>(DataType::Float32);
Tmax -> resize({});
Tmax -> setBackend("cpu");
Tmax -> getImpl() -> setRawPtr(&b,1);
// convert res to Tensor
std::vector<float> GT(Array, Array + (dim0*dim1));
std::for_each(GT.begin(), GT.end(),a,b {
valeur = std::clamp(valeur,a,b);
});
// Affichage des éléments du vecteur pour vérifier le clampage
for (const auto& valeur : GT)
{
std::cout << valeur << " ";
}
float* gt_raw = GT.data();
//std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
op->associateInput(0, TInput);
op->associateInput(1, Tmin);
op->associateInput(2, Tmax);
op->setDataType(DataType::Float32);
azertBpoint();
op->setBackend("cpu");
op->forwardDims();
start = std::chrono::system_clock::now();
myClip->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), gt_raw));
}
std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
std::cout << "total time: " << duration.count() << std::endl;
}
/* SECTION("3-D Tensors") {
std::size_t totalComputation = 0;
duration = std::chrono::duration<double, std::micro>::zero();
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate Tensors dimensions
const std::size_t dimNb = distNbMatrix(gen);
const std::size_t dim0 = distDims(gen);
const std::size_t dim1 = distDims(gen);
const std::size_t dim2 = distDims(gen);
totalComputation += dim0*dim1*dim2*dimNb;
// Create and populate the array with random float values
float* bigArray1 = new float[dimNb*dim0*dim1];
for (std::size_t i = 0; i < dimNb*dim0*dim1; ++i) {
bigArray1[i] = dis(gen); // Generate random float value
}
float* bigArray2 = new float[dimNb*dim1*dim2];
for (int i = 0; i < dimNb*dim1*dim2; ++i) {
bigArray2[i] = dis(gen); // Generate random float value
}
float* res = new float[dimNb*dim0*dim2];
for (std::size_t n = 0; n < dimNb; ++n) {
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim2; ++j) {
float sum = 0.0;
for (int k = 0; k < dim1; ++k) {
sum += bigArray1[n*dim0*dim1 + i*dim1 + k] * bigArray2[n*dim2*dim1+k*dim2+j];
}
res[n*dim0*dim2+i*dim2+j] = sum;
}
}
}
// Convert bigArray1 to Tensor
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
T1 -> resize({dimNb,dim0,dim1});
T1 -> setBackend("cpu");
T1 -> getImpl() -> setRawPtr(bigArray1, dimNb*dim0*dim1);
// Convert bigArray2 to Tensor
std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
T2 -> resize({dimNb,dim1,dim2});
T2 -> setBackend("cpu");
T2 -> getImpl() -> setRawPtr(bigArray2, dimNb*dim1*dim2);
// convert res to Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
Tres -> resize({dimNb,dim0,dim2});
Tres -> setBackend("cpu");
Tres -> getImpl() -> setRawPtr(res, dimNb*dim0*dim2);
op->associateInput(0, T1);
op->associateInput(1, T2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forwardDims();
start = std::chrono::system_clock::now();
myMatMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
}
std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
std::cout << "total time: " << duration.count() << std::endl;
}
SECTION("4-D Tensors") {
std::size_t totalComputation = 0;
duration = std::chrono::duration<double, std::micro>::zero();
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate Tensors dimensions
const std::size_t dimNb1 = distNbMatrix(gen);
const std::size_t dimNb2 = distNbMatrix(gen);
const std::size_t dim0 = distDims(gen);
const std::size_t dim1 = distDims(gen);
const std::size_t dim2 = distDims(gen);
totalComputation += dim0*dim1*dim2*dimNb1*dimNb2;
// Create and populate the array with random float values
float* bigArray1 = new float[dimNb1*dimNb2*dim0*dim1];
for (std::size_t i = 0; i < dimNb1*dimNb2*dim0*dim1; ++i) {
bigArray1[i] = dis(gen); // Generate random float value
}
float* bigArray2 = new float[dimNb1*dimNb2*dim1*dim2];
for (std::size_t i = 0; i < dimNb1*dimNb2*dim1*dim2; ++i) {
bigArray2[i] = dis(gen); // Generate random float value
}
float* res = new float[dimNb1*dimNb2*dim0*dim2];
for (std::size_t n1 = 0; n1 < dimNb1; ++n1) {
for (std::size_t n2 = 0; n2 < dimNb2; ++n2) {
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim2; ++j) {
float sum = 0.0;
for (int k = 0; k < dim1; ++k) {
sum += bigArray1[n1*dimNb2*dim0*dim1+n2*dim0*dim1+i*dim1+k] * bigArray2[n1*dimNb2*dim1*dim2+n2*dim1*dim2+k*dim2+j];
}
res[n1*dimNb2*dim0*dim2+n2*dim0*dim2+i*dim2+j] = sum;
}
}
}
}
// Convert bigArray1 to Tensor
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
T1 -> resize({dimNb1,dimNb2,dim0,dim1});
T1 -> setBackend("cpu");
T1 -> getImpl() -> setRawPtr(bigArray1, dimNb1*dimNb2*dim0*dim1);
// Convert bigArray2 to Tensor
std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
T2 -> resize({dimNb1,dimNb2,dim1,dim2});
T2 -> setBackend("cpu");
T2 -> getImpl() -> setRawPtr(bigArray2, dimNb1*dimNb2*dim1*dim2);
// convert res to Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
Tres -> resize({dimNb1,dimNb2,dim0,dim2});
Tres -> setBackend("cpu");
Tres -> getImpl() -> setRawPtr(res, dimNb1*dimNb2*dim0*dim2);
op->associateInput(0, T1);
op->associateInput(1, T2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forwardDims();
start = std::chrono::system_clock::now();
myMatMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
}
std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
std::cout << "total time: " << duration.count() << std::endl;
}
SECTION("+2-D / 1-D") {
// allows to test both computation with a 1-D Tensor and broadcasting
// input_0
std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
op->associateInput(0,T0);
const std::size_t dim0 = distNbMatrix(gen);
const std::size_t dim1 = distNbMatrix(gen) + 1;
const std::size_t dim2 = distNbMatrix(gen);
const std::size_t dim3 = distNbMatrix(gen);
T0->resize({dim0,dim1,dim2,dim3});
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
// input_1
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
op -> associateInput(1,T1);
T1->resize({dim3});
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forwardDims();
myMatMul->forward();
}*/
}
} // namespace Aidge
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment