Skip to content
Snippets Groups Projects
Commit 0c6edb52 authored by Maxence Naud's avatar Maxence Naud
Browse files

[Fix] ReduceMean operator forward kernel with refactor

parent d87663c3
No related branches found
No related tags found
2 merge requests!50version 0.2.0,!20Vit operators
Pipeline #38688 failed
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
#define AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_FORWARD_KERNEL_H_ #define AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_FORWARD_KERNEL_H_
#include <cstddef> #include <cstddef>
#include <algorithm> #include <algorithm> // std::copy, std::for_each
#include <numeric> //std::accumulate #include <numeric> //std::accumulate
#include <functional> //std::multiplies #include <functional> //std::multiplies
...@@ -32,57 +32,56 @@ void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op<DIM>::Attrs& ...@@ -32,57 +32,56 @@ void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op<DIM>::Attrs&
const I* input = static_cast<const I*>(input_); const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_); O* output = static_cast<O*>(output_);
const DimSize_t keepDims = std::get<1>(attrs); const std::size_t nb_dims = inputDims.size();
// Calculate the total number of elements in the input array
const std::size_t totalElements = std::accumulate(inputDims.cbegin(), inputDims.cend(), 1, std::multiplies<std::size_t>());
// Create a temporary arrays to store intermediate input/output for each Reduce op
std::vector<I> tempInArray(input, input + totalElements);
std::vector<I> tempOutArray(input, input + totalElements);
std::vector<std::size_t> currentDims = inputDims;
std::size_t addedElems = 0; const std::size_t totalElements = std::accumulate(inputDims.cbegin(), inputDims.cend(), 1, std::multiplies<std::size_t>());
for(std::size_t i = 0; i < DIM ; ++i) std::size_t outputElements = totalElements;
{
addedElems = 0;
const std::size_t axis = static_cast<std::size_t>(std::get<0>(attrs)[i]);
I* tempOutArrayPtr = tempOutArray.data();
std::size_t postAxisElems = 1; std::size_t *stride_post = new std::size_t[nb_dims];
for (std::size_t d = axis + 1; d < inputDims.size(); ++d) { stride_post[nb_dims - 1] = 1;
postAxisElems *= inputDims[d]; for (std::size_t i = nb_dims-2; i != static_cast<std::size_t>(-1); --i) {
} stride_post[i] = stride_post[i+1]*inputDims[i+1];
std::size_t preAxisElems = 1; }
for (std::size_t d = 0; d < axis; ++d) { std::size_t *stride_pre = new std::size_t[nb_dims];
preAxisElems *= inputDims[d]; stride_pre[0] = 1;
} for (std::size_t i = 1; i < nb_dims; ++i) {
stride_pre[i] = stride_pre[i-1]*inputDims[i-1];
}
for (std::size_t j=0; j<preAxisElems; ++j) const I* inputAccumulation = input;
{ I* outputAccumulation = nullptr;
for (std::size_t k=0; k<postAxisElems; ++k)
{ for (const std::size_t& a : std::get<0>(attrs)) {
// Compute the mean value for the element k of each stride outputElements /= inputDims[a];
I mean = 0; outputAccumulation = new I[outputElements];
for(std::size_t l=0; l<currentDims[axis];l++) const std::size_t dim_i = inputDims[a];
{ for (std::size_t pre = 0; pre < stride_pre[a]; ++pre) {
std::size_t idx = j * (postAxisElems * currentDims[axis]) + l * postAxisElems + k; for (std::size_t post = 0; post < stride_post[a]; ++post) {
mean += tempInArray[idx]; const std::size_t idx_i = pre * dim_i * stride_post[a] + post;
const std::size_t idx_o = pre * stride_post[a] + post;
outputAccumulation[idx_o] = inputAccumulation[idx_i];
for (std::size_t i = 1; i < dim_i; ++i) {
outputAccumulation[idx_o] += inputAccumulation[idx_i + i*stride_post[a]];
} }
tempOutArrayPtr[addedElems] = mean / currentDims[axis];
addedElems++;
} }
} }
std::for_each(stride_pre+a+1, stride_pre+nb_dims, [dim_i] (std::size_t& val) { val /= dim_i; });
if (inputAccumulation != input) {
delete[] inputAccumulation;
}
inputAccumulation = outputAccumulation;
}
// Update the input for the next reduce operation // Copy elements from inputAccumulation to output while dividing by divisor
tempInArray.assign(tempOutArray.begin(), tempOutArray.begin() + addedElems); I divisor = totalElements / outputElements;
if(keepDims) std::transform(inputAccumulation, inputAccumulation + outputElements, output,
currentDims[axis] = 1; [divisor](int element) { return element / divisor; });
else if (currentDims.size()>1) if (outputAccumulation) {
currentDims.erase(currentDims.begin()+axis); delete[] outputAccumulation;
} }
std::copy_n(tempInArray.cbegin(), addedElems, output); delete[] stride_post;
delete[] stride_pre;
} }
namespace { namespace {
// DIM = 1 // DIM = 1
......
...@@ -22,41 +22,83 @@ using namespace Aidge; ...@@ -22,41 +22,83 @@ using namespace Aidge;
TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") { TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") {
SECTION("KeepDims") { SECTION("KeepDims") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> { SECTION("test 1") {
{ std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
{
{ 5.0, 1.0 },
{ 20.0, 2.0 }
},
{ {
{ 30.0, 1.0 }, {
{ 40.0, 2.0 } { 5.0, 1.0 },
}, { 20.0, 2.0 }
},
{
{ 30.0, 1.0 },
{ 40.0, 2.0 }
},
{
{ 55.0, 1.0 },
{ 60.0, 2.0 }
}
}
});
Tensor myOutput = Tensor(Array3D<float,3,1,2> {
{ {
{ 55.0, 1.0 },
{ 60.0, 2.0 } {{ 12.5, 1.5 }},
{{ 35.0, 1.5 }},
{{ 57.5, 1.5 }}
} }
} });
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array3D<float,3,1,2> {
{
{{ 12.5, 1.5 }}, std::shared_ptr<Node> myReduceMean = ReduceMean({1}, 1);
{{ 35.0, 1.5 }}, auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator());
{{ 57.5, 1.5 }} op->associateInput(0,myInput);
} op->setDataType(DataType::Float32);
}); op->setBackend("cpu");
op->computeOutputDims();
myReduceMean->forward();
op->getOutput(0)->print();
std::shared_ptr<Node> myReduceMean = ReduceMean({1}, 1); REQUIRE(*(op->getOutput(0)) == myOutput);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); }
op->associateInput(0,myInput); SECTION("test 2") {
op->setDataType(DataType::Float32); std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,3,2> {
op->setBackend("cpu"); {
op->computeOutputDims(); {
myReduceMean->forward(); { 0.0, 0.0 },
op->getOutput(0)->print(); { 1.0, 1.0 },
{ 2.0, 2.0 }
},
{
{ 3.0, 3.0 },
{ 4.0, 4.0 },
{ 5.0, 5.0 }
},
{
{ 6.0, 6.0 },
{ 7.0, 7.0 },
{ 8.0, 8.0 }
}
}
});
Tensor myOutput = Tensor(Array3D<float,3,1,1> {
{
REQUIRE(*(op->getOutput(0)) == *myOutput); {{ 1.0 }},
{{ 4.0 }},
{{ 7.0 }}
}
});
std::shared_ptr<Node> myReduceMean = ReduceMean({1, 2}, 1);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->computeOutputDims();
myReduceMean->forward();
myOutput.print();
op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == myOutput);
}
} }
SECTION("not_KeepDims") { SECTION("not_KeepDims") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> { std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment