Skip to content
Snippets Groups Projects
Commit c7b0cbe7 authored by Olivier BICHLER's avatar Olivier BICHLER
Browse files

Fix rounding

parent 886a9f95
No related branches found
No related tags found
2 merge requests!790.6.1,!78Removed wrong assertations
Pipeline #73114 passed
......@@ -21,11 +21,12 @@
#include "aidge/data/Data.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/utils/Types.h"
#include "aidge/utils/Rounding.hpp"
namespace Aidge {
template <class T>
void roundForward(const T* input, T* output,int size);
void roundForward(const T* input, T* output, int size, RoundingMode mode);
}
#endif /* AIDGE_CUDA_OPERATOR_ROUNDIMPL_KERNELS_H_ */
......
......@@ -51,5 +51,5 @@ void Aidge::RoundImpl_cuda::forward_()
int size = op.getInput(0)->size();
const T* inputPtr = static_cast<T*>(op.getInput(0)->getImpl()->rawPtr());
T* outputPtr = static_cast<T*>(op.getOutput(0)->getImpl()->rawPtr());
Aidge::roundForward<T>(inputPtr,outputPtr,size);
Aidge::roundForward<T>(inputPtr,outputPtr,size,op.roundingMode());
}
......@@ -14,67 +14,163 @@
// Helper function for Round
#include <math.h>
template <typename T>
__device__ T round_util(T a) {
if (a - floor(a) == 0.5) {
if (fmod(floor(a), 2.0) == 0.0) {
return floor(a);
} else {
return ceil(a);
template <class T>
__device__ T round_util(T x, Aidge::RoundingMode mode) {
switch (mode) {
// Directed rounding
case Aidge::RoundingMode::Down:
return std::floor(x);
case Aidge::RoundingMode::Up:
return std::ceil(x);
case Aidge::RoundingMode::TowardZero:
return std::trunc(x);
case Aidge::RoundingMode::AwayFromZero:
return std::signbit(x) ? std::floor(x) : std::ceil(x);
// Round to nearest
case Aidge::RoundingMode::HalfDown:
return std::ceil(x - 0.5);
case Aidge::RoundingMode::HalfUp:
return std::floor(x + 0.5);
case Aidge::RoundingMode::HalfTowardZero:
return std::copysign(std::ceil(std::abs(x) - 0.5), x);
case Aidge::RoundingMode::HalfAwayFromZero:
return std::round(x);
case Aidge::RoundingMode::HalfToEven:
return std::rint(x); // In CUDA: "halfway cases rounded to the nearest even integer value."
case Aidge::RoundingMode::HalfToOdd: {
const T r = std::round(x); // Result is round-half-away-from-zero
const T d = r - x; // Difference
// Result is not half, RHAFZ result same as RHTO
if ((d != T(0.5)) && (d != -T(0.5))) {
return r;
}
// Check if RHAFZ result is even, switch to odd value
if (std::fmod(r, T(2.0)) == T(0.0)) {
return x - d;
}
// RHAFZ result is odd, then RHAFZ result same as RHTO
return r;
}
}
return round(a);
return x;
}
template <>
__device__ float round_util<float>(float a) {
if (a - floor(a) == 0.5) {
if (fmodf(floor(a), 2.0) == 0.0) {
return floor(a);
} else {
return ceil(a);
__device__ float round_util(float x, Aidge::RoundingMode mode) {
switch (mode) {
// Directed rounding
case Aidge::RoundingMode::Down:
return floorf(x);
case Aidge::RoundingMode::Up:
return ceilf(x);
case Aidge::RoundingMode::TowardZero:
return truncf(x);
case Aidge::RoundingMode::AwayFromZero:
return signbit(x) ? floorf(x) : ceilf(x);
// Round to nearest
case Aidge::RoundingMode::HalfDown:
return ceilf(x - 0.5);
case Aidge::RoundingMode::HalfUp:
return floorf(x + 0.5);
case Aidge::RoundingMode::HalfTowardZero:
return copysignf(ceilf(fabsf(x) - 0.5), x);
case Aidge::RoundingMode::HalfAwayFromZero:
return roundf(x);
case Aidge::RoundingMode::HalfToEven:
return rintf(x); // In CUDA: "halfway cases rounded to the nearest even integer value."
case Aidge::RoundingMode::HalfToOdd: {
const float r = roundf(x); // Result is round-half-away-from-zero
const float d = r - x; // Difference
// Result is not half, RHAFZ result same as RHTO
if ((d != 0.5f) && (d != -0.5f)) {
return r;
}
}
return roundf(a);
}
// Check if RHAFZ result is even, switch to odd value
if (fmodf(r, 2.0f) == 0.0f) {
return x - d;
}
template <>
__device__ half round_util<half>(half a) {
#if __CUDA_ARCH__ >= 530 && defined(CUDART_VERSION) && CUDART_VERSION >= 8000
return __float2half_rn(__half2float(a));
#else
float af = __half2float(a);
return __float2half(round_util(af));
#endif
// RHAFZ result is odd, then RHAFZ result same as RHTO
return r;
}
}
return x;
}
template <>
__device__ half round_util(half x, Aidge::RoundingMode mode) {
switch (mode) {
// Directed rounding
case Aidge::RoundingMode::Down:
return hfloor(x);
case Aidge::RoundingMode::Up:
return hceil(x);
case Aidge::RoundingMode::TowardZero:
return htrunc(x);
case Aidge::RoundingMode::AwayFromZero:
return (x >= half(0.0f)) ? hfloor(x) : hceil(x);
// Round to nearest
case Aidge::RoundingMode::HalfDown:
return hceil(x - half(0.5f));
case Aidge::RoundingMode::HalfUp:
return hfloor(x + half(0.5f));
case Aidge::RoundingMode::HalfTowardZero:
return copysignf(hceil(__habs(x) - half(0.5f)), x);
case Aidge::RoundingMode::HalfAwayFromZero:
return roundf(x);
case Aidge::RoundingMode::HalfToEven:
return hrint(x); // In CUDA: "halfway cases rounded to the nearest even integer value."
case Aidge::RoundingMode::HalfToOdd: {
const half r = roundf(x); // Result is round-half-away-from-zero
const half d = r - x; // Difference
// Result is not half, RHAFZ result same as RHTO
if ((d != half(0.5f)) && (d != half(-0.5f))) {
return r;
}
// Check if RHAFZ result is even, switch to odd value
if (half(fmodf(r, 2.0f)) == half(0.0f)) {
return x - d;
}
// RHAFZ result is odd, then RHAFZ result same as RHTO
return r;
}
}
return x;
}
template <class T>
__global__ void roundKernel(const T* input, T* output, int size) {
__global__ void roundKernel(const T* input, T* output, int size, Aidge::RoundingMode mode) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx >= size) return;
output[idx] = round_util(input[idx]);
output[idx] = round_util(input[idx], mode);
}
template <class T>
void Aidge::roundForward(const T* input, T* output,int size)
void Aidge::roundForward(const T* input, T* output,int size, Aidge::RoundingMode mode)
{
int blockSize = 256;
int numBlocks = (size + blockSize - 1) / blockSize;
roundKernel<<<numBlocks, blockSize>>>(input, output, size);
roundKernel<<<numBlocks, blockSize>>>(input, output, size, mode);
CHECK_CUDA_STATUS(cudaGetLastError());
CHECK_CUDA_STATUS(cudaDeviceSynchronize());
};
template void Aidge::roundForward<double>(const double* input, double* output, int size);
template void Aidge::roundForward<double>(const double* input, double* output, int size, Aidge::RoundingMode mode);
template void Aidge::roundForward<float>(const float* input, float* output, int size);
template void Aidge::roundForward<float>(const float* input, float* output, int size, Aidge::RoundingMode mode);
template void Aidge::roundForward<half>(const half* input, half* output, int size);
template void Aidge::roundForward<half>(const half* input, half* output, int size, Aidge::RoundingMode mode);
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment