Skip to content
Snippets Groups Projects
Commit a424079c authored by Olivier BICHLER's avatar Olivier BICHLER
Browse files

Merge branch 'fix_meanint' into 'patch_v0-5-1'

Fix bug eclipse/aidge/aidge_backend_cpu#40

See merge request eclipse/aidge/aidge_backend_cpu!133
parents b2e93d76 9eadee68
Branches main
Tags v0.5.1
No related merge requests found
...@@ -25,6 +25,40 @@ ...@@ -25,6 +25,40 @@
namespace Aidge { namespace Aidge {
template <typename T>
typename std::enable_if<std::is_floating_point<T>::value, T>::type
stableMean(const T* vec, size_t size) {
T mean = 0;
for (size_t i = 0; i < size; ++i) {
mean = std::fma<T>(vec[i] - mean, 1.0f / (i + 1), mean);
}
return mean;
}
// Specialization for integers: perform the mean computation in float
template <typename T>
typename std::enable_if<!std::is_floating_point<T>::value, T>::type
stableMean(const T* vec, size_t size) {
double mean = 0;
for (size_t i = 0; i < size; ++i) {
mean = std::fma<double>(vec[i] - mean, 1.0f / (i + 1), mean);
}
return mean;
}
template <typename T>
typename std::enable_if<std::is_floating_point<T>::value, T>::type
castFromFloat(T value) {
return value;
}
template <typename T>
typename std::enable_if<!std::is_floating_point<T>::value, T>::type
castFromFloat(double value) {
return static_cast<T>(std::nearbyint(value));
}
template <class I, class O> template <class I, class O>
void GlobalAveragePoolingImpl_cpu_forward_kernel( void GlobalAveragePoolingImpl_cpu_forward_kernel(
const std::vector<DimSize_t> &dims, const void *input_, void *output_) { const std::vector<DimSize_t> &dims, const void *input_, void *output_) {
...@@ -49,12 +83,7 @@ void GlobalAveragePoolingImpl_cpu_forward_kernel( ...@@ -49,12 +83,7 @@ void GlobalAveragePoolingImpl_cpu_forward_kernel(
for (DimSize_t channel = 0; channel < dims[1]; ++channel) { for (DimSize_t channel = 0; channel < dims[1]; ++channel) {
const I *filter_start = std::next( const I *filter_start = std::next(
input, (batch * in_batch_nb_elems) + (channel * in_channel_nb_elems)); input, (batch * in_batch_nb_elems) + (channel * in_channel_nb_elems));
I mean = 0; output[batch * out_batch_nb_elems + channel] = castFromFloat<O>(stableMean<I>(filter_start, in_channel_nb_elems));
for (size_t i = 0; i < in_channel_nb_elems; ++i) {
// Single pass numerically stable mean, using the fmaf
mean = fmaf(filter_start[i] - mean, 1.0f/(i+1), mean);
}
output[batch * out_batch_nb_elems + channel] = mean;
} }
} }
} }
......
...@@ -25,6 +25,40 @@ ...@@ -25,6 +25,40 @@
#include "aidge/utils/Registrar.hpp" #include "aidge/utils/Registrar.hpp"
namespace Aidge { namespace Aidge {
template <typename T>
typename std::enable_if<std::is_floating_point<T>::value, T>::type
stableMean(const T* vec, size_t len, size_t stride) {
T mean = 0;
for (size_t i = 0; i < len; ++i) {
mean = std::fma<T>(vec[i * stride] - mean, 1.0f / (i + 1), mean);
}
return mean;
}
// Specialization for integers: perform the mean computation in float
template <typename T>
typename std::enable_if<!std::is_floating_point<T>::value, T>::type
stableMean(const T* vec, size_t len, size_t stride) {
double mean = 0;
for (size_t i = 0; i < len; ++i) {
mean = std::fma<double>(vec[i * stride] - mean, 1.0f / (i + 1), mean);
}
return mean;
}
template <typename T>
typename std::enable_if<std::is_floating_point<T>::value, T>::type
castFromFloat(T value) {
return value;
}
template <typename T>
typename std::enable_if<!std::is_floating_point<T>::value, T>::type
castFromFloat(double value) {
return static_cast<T>(std::nearbyint(value));
}
template <class I, class O> template <class I, class O>
void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes, void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes,
DimSize_t /*keepDims*/, DimSize_t /*keepDims*/,
...@@ -50,12 +84,7 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes, ...@@ -50,12 +84,7 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes,
for (std::size_t post = 0; post < stride_post; ++post) { for (std::size_t post = 0; post < stride_post; ++post) {
const std::size_t idx_i = pre * dim_i * stride_post + post; const std::size_t idx_i = pre * dim_i * stride_post + post;
const std::size_t idx_o = pre * stride_post + post; const std::size_t idx_o = pre * stride_post + post;
O mean = 0; output[idx_o] = castFromFloat<O>(stableMean(input + idx_i, dim_i, stride_post));
for (std::size_t i = 0; i < dim_i; ++i) {
// Single pass numerically stable mean, using the fmaf
mean = fmaf(input[idx_i + i*stride_post] - mean, 1.0f/(i+1), mean);
}
output[idx_o] = mean;
} }
} }
} else { } else {
...@@ -72,8 +101,9 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes, ...@@ -72,8 +101,9 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes,
stride_pre[i] = stride_pre[i-1]*inputDims[i-1]; stride_pre[i] = stride_pre[i-1]*inputDims[i-1];
} }
const I* inputAccumulation = input; // Type should be the return type of stableMean<I>(), which is always floating point
I* outputAccumulation = nullptr; const decltype(stableMean<I>(input, 0, 0))* inputAccumulation = nullptr;
decltype(stableMean<I>(input, 0, 0))* outputAccumulation = nullptr;
for (const auto& axisInt : axes) { for (const auto& axisInt : axes) {
const std::size_t a = static_cast<std::size_t>(axisInt); const std::size_t a = static_cast<std::size_t>(axisInt);
...@@ -84,23 +114,23 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes, ...@@ -84,23 +114,23 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes,
for (std::size_t post = 0; post < stride_post[a]; ++post) { for (std::size_t post = 0; post < stride_post[a]; ++post) {
const std::size_t idx_i = pre * dim_i * stride_post[a] + post; const std::size_t idx_i = pre * dim_i * stride_post[a] + post;
const std::size_t idx_o = pre * stride_post[a] + post; const std::size_t idx_o = pre * stride_post[a] + post;
I mean = 0; if (inputAccumulation == nullptr) {
for (std::size_t i = 0; i < dim_i; ++i) { outputAccumulation[idx_o] = stableMean<I>(input + idx_i, dim_i, stride_post[a]);
// Single pass numerically stable mean, using the fmaf }
mean = fmaf(inputAccumulation[idx_i + i*stride_post[a]] - mean, 1.0f/(i+1), mean); else {
outputAccumulation[idx_o] = stableMean<I>(inputAccumulation + idx_i, dim_i, stride_post[a]);
} }
outputAccumulation[idx_o] = mean;
} }
} }
std::for_each(stride_pre.get()+a+1, stride_pre.get()+nb_dims, [dim_i] (std::size_t& val) { val /= dim_i; }); std::for_each(stride_pre.get()+a+1, stride_pre.get()+nb_dims, [dim_i] (std::size_t& val) { val /= dim_i; });
if (inputAccumulation != input) { if (inputAccumulation != nullptr) {
delete[] inputAccumulation; delete[] inputAccumulation;
} }
inputAccumulation = outputAccumulation; inputAccumulation = outputAccumulation;
} }
// Copy elements from inputAccumulation to output while dividing by divisor std::transform(inputAccumulation, inputAccumulation + outputElements, output,
std::copy(inputAccumulation, inputAccumulation + outputElements, output); [](auto value) { return castFromFloat<O>(value); });
if (outputAccumulation) { if (outputAccumulation) {
delete[] outputAccumulation; delete[] outputAccumulation;
} }
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment