Skip to content
Snippets Groups Projects
Commit f517018e authored by Houssem ROUIS's avatar Houssem ROUIS
Browse files

add half_precision support

parent ac87b48e
No related branches found
No related tags found
3 merge requests!15version 0.2.0,!12Lenetop,!10Lenet operators
...@@ -25,12 +25,18 @@ namespace Aidge { ...@@ -25,12 +25,18 @@ namespace Aidge {
template <class T> template <class T>
cublasStatus_t cublasGemm(cublasHandle_t handle, cublasStatus_t cublasGemm(cublasHandle_t handle,
cublasOperation_t transa, cublasOperation_t transb, cublasOperation_t transa, cublasOperation_t transb,
int m, int n, int k, int m, int n, int k,
const T *alpha, const T *alpha,
const T *A, int lda, const T *A, int lda,
const T *B, int ldb, const T *B, int ldb,
const T *beta, const T *beta,
T *C, int ldc); T *C, int ldc);
// cublasGemm(cublasContext*&, cublasOperation_t, cublasOperation_t, int&, int&, int&,
// const type*,
// const __half*&, int&,
// const __half*&, int&,
// const type*,
// __half*&, int&)’
} }
#endif /* AIDGE_CUDA_OPERATOR_FCIMPL_FORWARD_KERNEL_H_ */ #endif /* AIDGE_CUDA_OPERATOR_FCIMPL_FORWARD_KERNEL_H_ */
\ No newline at end of file
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
#include <cuda.h> #include <cuda.h>
#include <cudnn.h> #include <cudnn.h>
#include "aidge/data/half.hpp"
#define CHECK_CUDNN_STATUS(status) \ #define CHECK_CUDNN_STATUS(status) \
do { \ do { \
const cudnnStatus_t e = (status); \ const cudnnStatus_t e = (status); \
...@@ -62,6 +64,29 @@ ...@@ -62,6 +64,29 @@
namespace Aidge { namespace Aidge {
namespace Cuda { namespace Cuda {
// CuDNN scaling parameters are typically "alpha" and "beta".
// Their type must be "float" for HALF and FLOAT (default template)
// and "double" for DOUBLE (specialized template)
template <class T>
struct cudnn_scaling_type {
typedef float type;
};
template <>
struct cudnn_scaling_type<double> {
typedef double type;
};
template <class T>
struct cuda_type {
typedef T type;
};
template <>
struct cuda_type<half_float::half> {
typedef __half type;
};
const char* cublasGetErrorString(cublasStatus_t error); const char* cublasGetErrorString(cublasStatus_t error);
// Enable Peer-to-Peer communications between devices // Enable Peer-to-Peer communications between devices
......
...@@ -10,17 +10,14 @@ ...@@ -10,17 +10,14 @@
********************************************************************************/ ********************************************************************************/
#include <cassert> #include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <vector> #include <vector>
#include "aidge/utils/Types.h"
#include "aidge/operator/AvgPooling.hpp"
#include "aidge/backend/cuda/data/TensorImpl.hpp" #include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/AvgPoolingImpl.hpp" #include "aidge/backend/cuda/operator/AvgPoolingImpl.hpp"
#include "aidge/backend/cuda/utils/CudaContext.hpp" #include "aidge/backend/cuda/utils/CudaContext.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/operator/AvgPooling.hpp"
#include "aidge/utils/Types.h"
template <Aidge::DimIdx_t DIM> template <Aidge::DimIdx_t DIM>
void Aidge::AvgPoolingImpl_cuda<DIM>::forward() { void Aidge::AvgPoolingImpl_cuda<DIM>::forward() {
...@@ -49,11 +46,18 @@ void Aidge::AvgPoolingImpl_cuda<DIM>::forward() { ...@@ -49,11 +46,18 @@ void Aidge::AvgPoolingImpl_cuda<DIM>::forward() {
&strides[0])); &strides[0]));
} }
if (std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType() == DataType::Float64) { switch(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()) {
forward_<double>(input); case DataType::Float64:
} forward_<double>(input);
else { break;
forward_<float>(input); case DataType::Float32:
forward_<float>(input);
break;
case DataType::Float16:
forward_<half>(input);
break;
default:
AIDGE_THROW_OR_ABORT(std::runtime_error, "Data type is not supported by Backend Cuda");
} }
} }
...@@ -61,8 +65,8 @@ template <Aidge::DimIdx_t DIM> ...@@ -61,8 +65,8 @@ template <Aidge::DimIdx_t DIM>
template <class T> template <class T>
void Aidge::AvgPoolingImpl_cuda<DIM>::forward_(const Tensor& input) { void Aidge::AvgPoolingImpl_cuda<DIM>::forward_(const Tensor& input) {
const OperatorTensor& op = static_cast<const OperatorTensor&>(mOp); const OperatorTensor& op = static_cast<const OperatorTensor&>(mOp);
const T alpha = 1.0f; const typename Cuda::cudnn_scaling_type<T>::type alpha = 1.0f;
const T beta = 0.0f; const typename Cuda::cudnn_scaling_type<T>::type beta = 0.0f;
CHECK_CUDNN_STATUS( CHECK_CUDNN_STATUS(
cudnnPoolingForward( cudnnPoolingForward(
CudaContext::cudnnHandle(), CudaContext::cudnnHandle(),
...@@ -83,6 +87,5 @@ Aidge::AvgPoolingImpl_cuda<DIM>::~AvgPoolingImpl_cuda() { ...@@ -83,6 +87,5 @@ Aidge::AvgPoolingImpl_cuda<DIM>::~AvgPoolingImpl_cuda() {
cudnnDestroyPoolingDescriptor(mAvgPoolingDesc); cudnnDestroyPoolingDescriptor(mAvgPoolingDesc);
} }
// Template declarations // Template declarations
template class Aidge::AvgPoolingImpl_cuda<2>; template class Aidge::AvgPoolingImpl_cuda<2>;
...@@ -10,17 +10,13 @@ ...@@ -10,17 +10,13 @@
********************************************************************************/ ********************************************************************************/
#include <cassert> #include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <vector> #include <vector>
#include "aidge/utils/Types.h"
#include "aidge/operator/Conv.hpp"
#include "aidge/backend/cuda/data/TensorImpl.hpp" #include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/ConvImpl.hpp" #include "aidge/backend/cuda/operator/ConvImpl.hpp"
#include "aidge/backend/cuda/utils/CudaContext.hpp" #include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/operator/Conv.hpp"
#include "aidge/utils/Types.h"
template <Aidge::DimIdx_t DIM> template <Aidge::DimIdx_t DIM>
void Aidge::ConvImpl_cuda<DIM>::forward() { void Aidge::ConvImpl_cuda<DIM>::forward() {
...@@ -106,11 +102,18 @@ void Aidge::ConvImpl_cuda<DIM>::forward() { ...@@ -106,11 +102,18 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
// Do the actual forward computation // Do the actual forward computation
// Template is only for scaling parameters, which are always in float // Template is only for scaling parameters, which are always in float
// excepted when the convolution is performed in double precision. // excepted when the convolution is performed in double precision.
if (op.getOutput(0)->dataType() == DataType::Float64) { switch(op.getOutput(0)->dataType()) {
forward_<double>(input0, input1, input2); case DataType::Float64:
} forward_<double>(input0, input1, input2);
else { break;
forward_<float>(input0, input1, input2); case DataType::Float32:
forward_<float>(input0, input1, input2);
break;
case DataType::Float16:
forward_<half>(input0, input1, input2);
break;
default:
AIDGE_THROW_OR_ABORT(std::runtime_error, "Data type is not supported by Backend Cuda");
} }
} }
...@@ -118,9 +121,8 @@ template <Aidge::DimIdx_t DIM> ...@@ -118,9 +121,8 @@ template <Aidge::DimIdx_t DIM>
template <class T> template <class T>
void Aidge::ConvImpl_cuda<DIM>::forward_(const Tensor& input0, const Tensor& input1, const Tensor& input2) { void Aidge::ConvImpl_cuda<DIM>::forward_(const Tensor& input0, const Tensor& input1, const Tensor& input2) {
const OperatorTensor& op = static_cast<const OperatorTensor&>(mOp); const OperatorTensor& op = static_cast<const OperatorTensor&>(mOp);
const T alpha = 1.0f; const typename Cuda::cudnn_scaling_type<T>::type alpha = 1.0f;
const T beta = 0.0f; const typename Cuda::cudnn_scaling_type<T>::type beta = 0.0f;
CHECK_CUDNN_STATUS(cudnnConvolutionForward(CudaContext::cudnnHandle(), CHECK_CUDNN_STATUS(cudnnConvolutionForward(CudaContext::cudnnHandle(),
&alpha, &alpha,
std::dynamic_pointer_cast<TensorImpl_cuda_>(input0.getImpl())->getCudnnTensorDesc(input0), std::dynamic_pointer_cast<TensorImpl_cuda_>(input0.getImpl())->getCudnnTensorDesc(input0),
......
...@@ -15,15 +15,13 @@ ...@@ -15,15 +15,13 @@
#include <thread> // std::this_thread::sleep_for #include <thread> // std::this_thread::sleep_for
#include <vector> #include <vector>
#include "aidge/utils/Types.h"
#include "aidge/operator/FC.hpp"
#include "aidge/backend/cuda/data/TensorImpl.hpp" #include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/FCImpl.hpp" #include "aidge/backend/cuda/operator/FCImpl.hpp"
#include "aidge/backend/cuda/operator/FCImpl_CUDA_kernels.hpp" #include "aidge/backend/cuda/operator/FCImpl_CUDA_kernels.hpp"
#include "aidge/backend/cuda/utils/CudaContext.hpp" #include "aidge/backend/cuda/utils/CudaContext.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp" #include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/operator/FC.hpp"
#include "aidge/utils/Types.h"
void Aidge::FCImpl_cuda::forward() { void Aidge::FCImpl_cuda::forward() {
assert(mOp.getRawInput(0) && "missing input #0"); assert(mOp.getRawInput(0) && "missing input #0");
...@@ -39,11 +37,18 @@ void Aidge::FCImpl_cuda::forward() { ...@@ -39,11 +37,18 @@ void Aidge::FCImpl_cuda::forward() {
bool noBias = fcOp.template getAttr<FCAttr::NoBias>(); bool noBias = fcOp.template getAttr<FCAttr::NoBias>();
std::size_t outChannels = static_cast<std::size_t>(fcOp.template getAttr<FCAttr::OutChannels>()); std::size_t outChannels = static_cast<std::size_t>(fcOp.template getAttr<FCAttr::OutChannels>());
if (std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType() == DataType::Float64) { switch(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()) {
forward_<double>(input0, input1, input2, noBias, outChannels); case DataType::Float64:
} forward_<double>(input0, input1, input2, noBias, outChannels);
else { break;
forward_<float>(input0, input1, input2, noBias, outChannels); case DataType::Float32:
forward_<float>(input0, input1, input2, noBias, outChannels);
break;
case DataType::Float16:
forward_<half>(input0, input1, input2, noBias, outChannels);
break;
default:
AIDGE_THROW_OR_ABORT(std::runtime_error, "Data type is not supported by Backend Cuda");
} }
} }
...@@ -61,26 +66,26 @@ void Aidge::FCImpl_cuda::forward_(const Tensor& input0, const Tensor& input1, co ...@@ -61,26 +66,26 @@ void Aidge::FCImpl_cuda::forward_(const Tensor& input0, const Tensor& input1, co
int lda = k; int lda = k;
int ldb = k; int ldb = k;
int ldc = n; int ldc = n;
const T alpha = T(1.0); const T alpha = 1.0f;
const T beta = T(0.0); const T beta = 0.0f;
CHECK_CUBLAS_STATUS(cublasGemm(CudaContext::cublasHandle(), CHECK_CUBLAS_STATUS(cublasGemm(CudaContext::cublasHandle(),
CUBLAS_OP_T, CUBLAS_OP_T,
CUBLAS_OP_N, CUBLAS_OP_N,
n, n,
m, m,
k, k,
&alpha, reinterpret_cast<const typename Cuda::cuda_type<T>::type*>(&alpha),
weights, weights,
ldb, ldb,
input, input,
lda, lda,
&beta, reinterpret_cast<const typename Cuda::cuda_type<T>::type*>(&beta),
output, output,
ldc)); ldc));
if(!noBias){ if(!noBias){
T* onesVector; T* onesVector;
cudaMalloc((void**)&onesVector, m * sizeof(T)); CHECK_CUDA_STATUS(cudaMalloc((void**)&onesVector, m * sizeof(T)));
// Fill the vector with ones // Fill the vector with ones
std::vector<T> onesVec(m, T(1.0)); std::vector<T> onesVec(m, T(1.0));
CHECK_CUDA_STATUS(cudaMemcpy(onesVector, CHECK_CUDA_STATUS(cudaMemcpy(onesVector,
...@@ -95,12 +100,12 @@ void Aidge::FCImpl_cuda::forward_(const Tensor& input0, const Tensor& input1, co ...@@ -95,12 +100,12 @@ void Aidge::FCImpl_cuda::forward_(const Tensor& input0, const Tensor& input1, co
n, n,
m, m,
1, 1,
&alpha, reinterpret_cast<const typename Cuda::cuda_type<T>::type*>(&alpha),
biases, biases,
n, n,
onesVector, onesVector,
1, 1,
&alpha, reinterpret_cast<const typename Cuda::cuda_type<T>::type*>(&alpha),
output, output,
n)); n));
......
...@@ -16,61 +16,61 @@ namespace Aidge{ ...@@ -16,61 +16,61 @@ namespace Aidge{
template <> template <>
cublasStatus_t cublasGemm<__half>(cublasHandle_t handle, cublasStatus_t cublasGemm<__half>(cublasHandle_t handle,
cublasOperation_t transa, cublasOperation_t transb, cublasOperation_t transa, cublasOperation_t transb,
int m, int n, int k, int m, int n, int k,
const __half *alpha, const __half *alpha,
const __half *A, int lda, const __half *A, int lda,
const __half *B, int ldb, const __half *B, int ldb,
const __half *beta, const __half *beta,
__half *C, int ldc) __half *C, int ldc)
{ {
return cublasHgemm(handle, return cublasHgemm(handle,
transa, transb, transa, transb,
m, n, k, m, n, k,
alpha, alpha,
A, lda, A, lda,
B, ldb, B, ldb,
beta, beta,
C, ldc); C, ldc);
} }
template <> template <>
cublasStatus_t cublasGemm<float>(cublasHandle_t handle, cublasStatus_t cublasGemm<float>(cublasHandle_t handle,
cublasOperation_t transa, cublasOperation_t transb, cublasOperation_t transa, cublasOperation_t transb,
int m, int n, int k, int m, int n, int k,
const float *alpha, const float *alpha,
const float *A, int lda, const float *A, int lda,
const float *B, int ldb, const float *B, int ldb,
const float *beta, const float *beta,
float *C, int ldc) float *C, int ldc)
{ {
return cublasSgemm(handle, return cublasSgemm(handle,
transa, transb, transa, transb,
m, n, k, m, n, k,
alpha, alpha,
A, lda, A, lda,
B, ldb, B, ldb,
beta, beta,
C, ldc); C, ldc);
} }
template <> template <>
cublasStatus_t cublasGemm<double>(cublasHandle_t handle, cublasStatus_t cublasGemm<double>(cublasHandle_t handle,
cublasOperation_t transa, cublasOperation_t transb, cublasOperation_t transa, cublasOperation_t transb,
int m, int n, int k, int m, int n, int k,
const double *alpha, const double *alpha,
const double *A, int lda, const double *A, int lda,
const double *B, int ldb, const double *B, int ldb,
const double *beta, const double *beta,
double *C, int ldc) double *C, int ldc)
{ {
return cublasDgemm(handle, return cublasDgemm(handle,
transa, transb, transa, transb,
m, n, k, m, n, k,
alpha, alpha,
A, lda, A, lda,
B, ldb, B, ldb,
beta, beta,
C, ldc); C, ldc);
} }
} }
\ No newline at end of file
...@@ -10,17 +10,14 @@ ...@@ -10,17 +10,14 @@
********************************************************************************/ ********************************************************************************/
#include <cassert> #include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <vector> #include <vector>
#include "aidge/utils/Types.h"
#include "aidge/operator/MaxPooling.hpp"
#include "aidge/backend/cuda/data/TensorImpl.hpp" #include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/MaxPoolingImpl.hpp" #include "aidge/backend/cuda/operator/MaxPoolingImpl.hpp"
#include "aidge/backend/cuda/utils/CudaContext.hpp" #include "aidge/backend/cuda/utils/CudaContext.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/operator/MaxPooling.hpp"
#include "aidge/utils/Types.h"
template <Aidge::DimIdx_t DIM> template <Aidge::DimIdx_t DIM>
void Aidge::MaxPoolingImpl_cuda<DIM>::forward() { void Aidge::MaxPoolingImpl_cuda<DIM>::forward() {
...@@ -49,11 +46,18 @@ void Aidge::MaxPoolingImpl_cuda<DIM>::forward() { ...@@ -49,11 +46,18 @@ void Aidge::MaxPoolingImpl_cuda<DIM>::forward() {
&strides[0])); &strides[0]));
} }
if (std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType() == DataType::Float64) { switch(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()) {
forward_<double>(input); case DataType::Float64:
} forward_<double>(input);
else { break;
forward_<float>(input); case DataType::Float32:
forward_<float>(input);
break;
case DataType::Float16:
forward_<half>(input);
break;
default:
AIDGE_THROW_OR_ABORT(std::runtime_error, "Data type is not supported by Backend Cuda");
} }
} }
...@@ -61,8 +65,8 @@ template <Aidge::DimIdx_t DIM> ...@@ -61,8 +65,8 @@ template <Aidge::DimIdx_t DIM>
template <class T> template <class T>
void Aidge::MaxPoolingImpl_cuda<DIM>::forward_(const Tensor& input) { void Aidge::MaxPoolingImpl_cuda<DIM>::forward_(const Tensor& input) {
const OperatorTensor& op = static_cast<const OperatorTensor&>(mOp); const OperatorTensor& op = static_cast<const OperatorTensor&>(mOp);
const T alpha = 1.0f; const typename Cuda::cudnn_scaling_type<T>::type alpha = 1.0f;
const T beta = 0.0f; const typename Cuda::cudnn_scaling_type<T>::type beta = 0.0f;
CHECK_CUDNN_STATUS( CHECK_CUDNN_STATUS(
cudnnPoolingForward( cudnnPoolingForward(
CudaContext::cudnnHandle(), CudaContext::cudnnHandle(),
......
...@@ -10,17 +10,14 @@ ...@@ -10,17 +10,14 @@
********************************************************************************/ ********************************************************************************/
#include <cassert> #include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <vector> #include <vector>
#include "aidge/utils/Types.h"
#include "aidge/operator/ReLU.hpp"
#include "aidge/backend/cuda/data/TensorImpl.hpp" #include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/ReLUImpl.hpp" #include "aidge/backend/cuda/operator/ReLUImpl.hpp"
#include "aidge/backend/cuda/utils/CudaContext.hpp" #include "aidge/backend/cuda/utils/CudaContext.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/operator/ReLU.hpp"
#include "aidge/utils/Types.h"
void Aidge::ReLUImpl_cuda::forward() { void Aidge::ReLUImpl_cuda::forward() {
const OperatorTensor& op = static_cast<const OperatorTensor&>(mOp); const OperatorTensor& op = static_cast<const OperatorTensor&>(mOp);
...@@ -52,8 +49,8 @@ void Aidge::ReLUImpl_cuda::forward() { ...@@ -52,8 +49,8 @@ void Aidge::ReLUImpl_cuda::forward() {
template <class T> template <class T>
void Aidge::ReLUImpl_cuda::forward_(const Tensor& input) { void Aidge::ReLUImpl_cuda::forward_(const Tensor& input) {
const OperatorTensor& op = static_cast<const OperatorTensor&>(mOp); const OperatorTensor& op = static_cast<const OperatorTensor&>(mOp);
const T alpha = 1.0f; const typename Cuda::cudnn_scaling_type<T>::type alpha = 1.0f;
const T beta = 0.0f; const typename Cuda::cudnn_scaling_type<T>::type beta = 0.0f;
CHECK_CUDNN_STATUS( CHECK_CUDNN_STATUS(
cudnnActivationForward(CudaContext::cudnnHandle(), cudnnActivationForward(CudaContext::cudnnHandle(),
mReLUDesc, mReLUDesc,
......
...@@ -12,9 +12,11 @@ ...@@ -12,9 +12,11 @@
#include <array> #include <array>
#include <catch2/catch_test_macros.hpp> #include <catch2/catch_test_macros.hpp>
#include <cuda_fp16.h>
#include "Test_cuda.hpp" #include "Test_cuda.hpp"
#include "aidge/data/half.hpp"
#include "aidge/data/Tensor.hpp" #include "aidge/data/Tensor.hpp"
#include "aidge/backend/cpu.hpp" #include "aidge/backend/cpu.hpp"
...@@ -122,4 +124,38 @@ TEST_CASE("[gpu/operator] AvgPooling(forward)", "[AvgPooling][GPU]") { ...@@ -122,4 +124,38 @@ TEST_CASE("[gpu/operator] AvgPooling(forward)", "[AvgPooling][GPU]") {
delete[] computedOutput; delete[] computedOutput;
} }
SECTION("half") {
std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(Array4D<half_float::half,1,1,3,3> { //NCHW
{
{
{{half_float::half(0.3745), half_float::half(0.9507), half_float::half(0.7320)},
{half_float::half(0.5987), half_float::half(0.1560), half_float::half(0.1560)},
{half_float::half(0.0581), half_float::half(0.8662), half_float::half(0.6011)}}
}
}
});
myInput2->setBackend("cuda");
std::shared_ptr<Node> myAvgPool = AvgPooling({3,3}, "mycdw", {3,3});
auto op = std::static_pointer_cast<OperatorTensor>(myAvgPool -> getOperator());
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<half_float::half,1,1,1,1> {
{{{{(half_float::half(0.3745) + half_float::half(0.9507) + half_float::half(0.7320) + half_float::half(0.5987) + half_float::half(0.1560) + half_float::half(0.1560) + half_float::half(0.0581) + half_float::half(0.8662) + half_float::half(0.6011))/half_float::half(9.0)}}}}
});
op->associateInput(0,myInput2);
op->setDataType(DataType::Float16);
op->setBackend("cuda");
op->computeOutputDims();
myAvgPool->forward();
half* computedOutput = new half[myOutput->size()]();
cudaMemcpy(computedOutput, op->getOutput(0)->getImpl()->rawPtr(), sizeof(half) * myOutput->size(), cudaMemcpyDeviceToHost);
for(int i = 0; i < myOutput->size(); i++){
const half_float::half targetOutput = *(static_cast<half_float::half*>(myOutput->getImpl()->rawPtr()) + i);
REQUIRE(fabs(computedOutput[i] - targetOutput) < 1e-6);
}
delete[] computedOutput;
}
} }
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment