Skip to content
Snippets Groups Projects
Commit f517018e authored by Houssem ROUIS's avatar Houssem ROUIS
Browse files

add half_precision support

parent ac87b48e
No related branches found
No related tags found
3 merge requests!15version 0.2.0,!12Lenetop,!10Lenet operators
......@@ -25,12 +25,18 @@ namespace Aidge {
template <class T>
cublasStatus_t cublasGemm(cublasHandle_t handle,
cublasOperation_t transa, cublasOperation_t transb,
int m, int n, int k,
const T *alpha,
const T *A, int lda,
const T *B, int ldb,
const T *beta,
T *C, int ldc);
cublasOperation_t transa, cublasOperation_t transb,
int m, int n, int k,
const T *alpha,
const T *A, int lda,
const T *B, int ldb,
const T *beta,
T *C, int ldc);
// cublasGemm(cublasContext*&, cublasOperation_t, cublasOperation_t, int&, int&, int&,
// const type*,
// const __half*&, int&,
// const __half*&, int&,
// const type*,
// __half*&, int&)’
}
#endif /* AIDGE_CUDA_OPERATOR_FCIMPL_FORWARD_KERNEL_H_ */
\ No newline at end of file
......@@ -11,6 +11,8 @@
#include <cuda.h>
#include <cudnn.h>
#include "aidge/data/half.hpp"
#define CHECK_CUDNN_STATUS(status) \
do { \
const cudnnStatus_t e = (status); \
......@@ -62,6 +64,29 @@
namespace Aidge {
namespace Cuda {
// CuDNN scaling parameters are typically "alpha" and "beta".
// Their type must be "float" for HALF and FLOAT (default template)
// and "double" for DOUBLE (specialized template)
template <class T>
struct cudnn_scaling_type {
typedef float type;
};
template <>
struct cudnn_scaling_type<double> {
typedef double type;
};
template <class T>
struct cuda_type {
typedef T type;
};
template <>
struct cuda_type<half_float::half> {
typedef __half type;
};
const char* cublasGetErrorString(cublasStatus_t error);
// Enable Peer-to-Peer communications between devices
......
......@@ -10,17 +10,14 @@
********************************************************************************/
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <vector>
#include "aidge/utils/Types.h"
#include "aidge/operator/AvgPooling.hpp"
#include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/AvgPoolingImpl.hpp"
#include "aidge/backend/cuda/utils/CudaContext.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/operator/AvgPooling.hpp"
#include "aidge/utils/Types.h"
template <Aidge::DimIdx_t DIM>
void Aidge::AvgPoolingImpl_cuda<DIM>::forward() {
......@@ -49,11 +46,18 @@ void Aidge::AvgPoolingImpl_cuda<DIM>::forward() {
&strides[0]));
}
if (std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType() == DataType::Float64) {
forward_<double>(input);
}
else {
forward_<float>(input);
switch(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()) {
case DataType::Float64:
forward_<double>(input);
break;
case DataType::Float32:
forward_<float>(input);
break;
case DataType::Float16:
forward_<half>(input);
break;
default:
AIDGE_THROW_OR_ABORT(std::runtime_error, "Data type is not supported by Backend Cuda");
}
}
......@@ -61,8 +65,8 @@ template <Aidge::DimIdx_t DIM>
template <class T>
void Aidge::AvgPoolingImpl_cuda<DIM>::forward_(const Tensor& input) {
const OperatorTensor& op = static_cast<const OperatorTensor&>(mOp);
const T alpha = 1.0f;
const T beta = 0.0f;
const typename Cuda::cudnn_scaling_type<T>::type alpha = 1.0f;
const typename Cuda::cudnn_scaling_type<T>::type beta = 0.0f;
CHECK_CUDNN_STATUS(
cudnnPoolingForward(
CudaContext::cudnnHandle(),
......@@ -83,6 +87,5 @@ Aidge::AvgPoolingImpl_cuda<DIM>::~AvgPoolingImpl_cuda() {
cudnnDestroyPoolingDescriptor(mAvgPoolingDesc);
}
// Template declarations
template class Aidge::AvgPoolingImpl_cuda<2>;
......@@ -10,17 +10,13 @@
********************************************************************************/
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <vector>
#include "aidge/utils/Types.h"
#include "aidge/operator/Conv.hpp"
#include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/ConvImpl.hpp"
#include "aidge/backend/cuda/utils/CudaContext.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/operator/Conv.hpp"
#include "aidge/utils/Types.h"
template <Aidge::DimIdx_t DIM>
void Aidge::ConvImpl_cuda<DIM>::forward() {
......@@ -106,11 +102,18 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
// Do the actual forward computation
// Template is only for scaling parameters, which are always in float
// excepted when the convolution is performed in double precision.
if (op.getOutput(0)->dataType() == DataType::Float64) {
forward_<double>(input0, input1, input2);
}
else {
forward_<float>(input0, input1, input2);
switch(op.getOutput(0)->dataType()) {
case DataType::Float64:
forward_<double>(input0, input1, input2);
break;
case DataType::Float32:
forward_<float>(input0, input1, input2);
break;
case DataType::Float16:
forward_<half>(input0, input1, input2);
break;
default:
AIDGE_THROW_OR_ABORT(std::runtime_error, "Data type is not supported by Backend Cuda");
}
}
......@@ -118,9 +121,8 @@ template <Aidge::DimIdx_t DIM>
template <class T>
void Aidge::ConvImpl_cuda<DIM>::forward_(const Tensor& input0, const Tensor& input1, const Tensor& input2) {
const OperatorTensor& op = static_cast<const OperatorTensor&>(mOp);
const T alpha = 1.0f;
const T beta = 0.0f;
const typename Cuda::cudnn_scaling_type<T>::type alpha = 1.0f;
const typename Cuda::cudnn_scaling_type<T>::type beta = 0.0f;
CHECK_CUDNN_STATUS(cudnnConvolutionForward(CudaContext::cudnnHandle(),
&alpha,
std::dynamic_pointer_cast<TensorImpl_cuda_>(input0.getImpl())->getCudnnTensorDesc(input0),
......
......@@ -15,15 +15,13 @@
#include <thread> // std::this_thread::sleep_for
#include <vector>
#include "aidge/utils/Types.h"
#include "aidge/operator/FC.hpp"
#include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/FCImpl.hpp"
#include "aidge/backend/cuda/operator/FCImpl_CUDA_kernels.hpp"
#include "aidge/backend/cuda/utils/CudaContext.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/operator/FC.hpp"
#include "aidge/utils/Types.h"
void Aidge::FCImpl_cuda::forward() {
assert(mOp.getRawInput(0) && "missing input #0");
......@@ -39,11 +37,18 @@ void Aidge::FCImpl_cuda::forward() {
bool noBias = fcOp.template getAttr<FCAttr::NoBias>();
std::size_t outChannels = static_cast<std::size_t>(fcOp.template getAttr<FCAttr::OutChannels>());
if (std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType() == DataType::Float64) {
forward_<double>(input0, input1, input2, noBias, outChannels);
}
else {
forward_<float>(input0, input1, input2, noBias, outChannels);
switch(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()) {
case DataType::Float64:
forward_<double>(input0, input1, input2, noBias, outChannels);
break;
case DataType::Float32:
forward_<float>(input0, input1, input2, noBias, outChannels);
break;
case DataType::Float16:
forward_<half>(input0, input1, input2, noBias, outChannels);
break;
default:
AIDGE_THROW_OR_ABORT(std::runtime_error, "Data type is not supported by Backend Cuda");
}
}
......@@ -61,26 +66,26 @@ void Aidge::FCImpl_cuda::forward_(const Tensor& input0, const Tensor& input1, co
int lda = k;
int ldb = k;
int ldc = n;
const T alpha = T(1.0);
const T beta = T(0.0);
const T alpha = 1.0f;
const T beta = 0.0f;
CHECK_CUBLAS_STATUS(cublasGemm(CudaContext::cublasHandle(),
CUBLAS_OP_T,
CUBLAS_OP_N,
n,
m,
k,
&alpha,
reinterpret_cast<const typename Cuda::cuda_type<T>::type*>(&alpha),
weights,
ldb,
input,
lda,
&beta,
reinterpret_cast<const typename Cuda::cuda_type<T>::type*>(&beta),
output,
ldc));
if(!noBias){
T* onesVector;
cudaMalloc((void**)&onesVector, m * sizeof(T));
CHECK_CUDA_STATUS(cudaMalloc((void**)&onesVector, m * sizeof(T)));
// Fill the vector with ones
std::vector<T> onesVec(m, T(1.0));
CHECK_CUDA_STATUS(cudaMemcpy(onesVector,
......@@ -95,12 +100,12 @@ void Aidge::FCImpl_cuda::forward_(const Tensor& input0, const Tensor& input1, co
n,
m,
1,
&alpha,
reinterpret_cast<const typename Cuda::cuda_type<T>::type*>(&alpha),
biases,
n,
onesVector,
1,
&alpha,
reinterpret_cast<const typename Cuda::cuda_type<T>::type*>(&alpha),
output,
n));
......
......@@ -16,61 +16,61 @@ namespace Aidge{
template <>
cublasStatus_t cublasGemm<__half>(cublasHandle_t handle,
cublasOperation_t transa, cublasOperation_t transb,
int m, int n, int k,
const __half *alpha,
const __half *A, int lda,
const __half *B, int ldb,
const __half *beta,
__half *C, int ldc)
cublasOperation_t transa, cublasOperation_t transb,
int m, int n, int k,
const __half *alpha,
const __half *A, int lda,
const __half *B, int ldb,
const __half *beta,
__half *C, int ldc)
{
return cublasHgemm(handle,
transa, transb,
m, n, k,
alpha,
A, lda,
B, ldb,
beta,
C, ldc);
transa, transb,
m, n, k,
alpha,
A, lda,
B, ldb,
beta,
C, ldc);
}
template <>
cublasStatus_t cublasGemm<float>(cublasHandle_t handle,
cublasOperation_t transa, cublasOperation_t transb,
int m, int n, int k,
const float *alpha,
const float *A, int lda,
const float *B, int ldb,
const float *beta,
float *C, int ldc)
cublasOperation_t transa, cublasOperation_t transb,
int m, int n, int k,
const float *alpha,
const float *A, int lda,
const float *B, int ldb,
const float *beta,
float *C, int ldc)
{
return cublasSgemm(handle,
transa, transb,
m, n, k,
alpha,
A, lda,
B, ldb,
beta,
C, ldc);
transa, transb,
m, n, k,
alpha,
A, lda,
B, ldb,
beta,
C, ldc);
}
template <>
cublasStatus_t cublasGemm<double>(cublasHandle_t handle,
cublasOperation_t transa, cublasOperation_t transb,
int m, int n, int k,
const double *alpha,
const double *A, int lda,
const double *B, int ldb,
const double *beta,
double *C, int ldc)
cublasOperation_t transa, cublasOperation_t transb,
int m, int n, int k,
const double *alpha,
const double *A, int lda,
const double *B, int ldb,
const double *beta,
double *C, int ldc)
{
return cublasDgemm(handle,
transa, transb,
m, n, k,
alpha,
A, lda,
B, ldb,
beta,
C, ldc);
transa, transb,
m, n, k,
alpha,
A, lda,
B, ldb,
beta,
C, ldc);
}
}
\ No newline at end of file
......@@ -10,17 +10,14 @@
********************************************************************************/
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <vector>
#include "aidge/utils/Types.h"
#include "aidge/operator/MaxPooling.hpp"
#include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/MaxPoolingImpl.hpp"
#include "aidge/backend/cuda/utils/CudaContext.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/operator/MaxPooling.hpp"
#include "aidge/utils/Types.h"
template <Aidge::DimIdx_t DIM>
void Aidge::MaxPoolingImpl_cuda<DIM>::forward() {
......@@ -49,11 +46,18 @@ void Aidge::MaxPoolingImpl_cuda<DIM>::forward() {
&strides[0]));
}
if (std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType() == DataType::Float64) {
forward_<double>(input);
}
else {
forward_<float>(input);
switch(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()) {
case DataType::Float64:
forward_<double>(input);
break;
case DataType::Float32:
forward_<float>(input);
break;
case DataType::Float16:
forward_<half>(input);
break;
default:
AIDGE_THROW_OR_ABORT(std::runtime_error, "Data type is not supported by Backend Cuda");
}
}
......@@ -61,8 +65,8 @@ template <Aidge::DimIdx_t DIM>
template <class T>
void Aidge::MaxPoolingImpl_cuda<DIM>::forward_(const Tensor& input) {
const OperatorTensor& op = static_cast<const OperatorTensor&>(mOp);
const T alpha = 1.0f;
const T beta = 0.0f;
const typename Cuda::cudnn_scaling_type<T>::type alpha = 1.0f;
const typename Cuda::cudnn_scaling_type<T>::type beta = 0.0f;
CHECK_CUDNN_STATUS(
cudnnPoolingForward(
CudaContext::cudnnHandle(),
......
......@@ -10,17 +10,14 @@
********************************************************************************/
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <vector>
#include "aidge/utils/Types.h"
#include "aidge/operator/ReLU.hpp"
#include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/ReLUImpl.hpp"
#include "aidge/backend/cuda/utils/CudaContext.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/operator/ReLU.hpp"
#include "aidge/utils/Types.h"
void Aidge::ReLUImpl_cuda::forward() {
const OperatorTensor& op = static_cast<const OperatorTensor&>(mOp);
......@@ -52,8 +49,8 @@ void Aidge::ReLUImpl_cuda::forward() {
template <class T>
void Aidge::ReLUImpl_cuda::forward_(const Tensor& input) {
const OperatorTensor& op = static_cast<const OperatorTensor&>(mOp);
const T alpha = 1.0f;
const T beta = 0.0f;
const typename Cuda::cudnn_scaling_type<T>::type alpha = 1.0f;
const typename Cuda::cudnn_scaling_type<T>::type beta = 0.0f;
CHECK_CUDNN_STATUS(
cudnnActivationForward(CudaContext::cudnnHandle(),
mReLUDesc,
......
......@@ -12,9 +12,11 @@
#include <array>
#include <catch2/catch_test_macros.hpp>
#include <cuda_fp16.h>
#include "Test_cuda.hpp"
#include "aidge/data/half.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/backend/cpu.hpp"
......@@ -122,4 +124,38 @@ TEST_CASE("[gpu/operator] AvgPooling(forward)", "[AvgPooling][GPU]") {
delete[] computedOutput;
}
SECTION("half") {
std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(Array4D<half_float::half,1,1,3,3> { //NCHW
{
{
{{half_float::half(0.3745), half_float::half(0.9507), half_float::half(0.7320)},
{half_float::half(0.5987), half_float::half(0.1560), half_float::half(0.1560)},
{half_float::half(0.0581), half_float::half(0.8662), half_float::half(0.6011)}}
}
}
});
myInput2->setBackend("cuda");
std::shared_ptr<Node> myAvgPool = AvgPooling({3,3}, "mycdw", {3,3});
auto op = std::static_pointer_cast<OperatorTensor>(myAvgPool -> getOperator());
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<half_float::half,1,1,1,1> {
{{{{(half_float::half(0.3745) + half_float::half(0.9507) + half_float::half(0.7320) + half_float::half(0.5987) + half_float::half(0.1560) + half_float::half(0.1560) + half_float::half(0.0581) + half_float::half(0.8662) + half_float::half(0.6011))/half_float::half(9.0)}}}}
});
op->associateInput(0,myInput2);
op->setDataType(DataType::Float16);
op->setBackend("cuda");
op->computeOutputDims();
myAvgPool->forward();
half* computedOutput = new half[myOutput->size()]();
cudaMemcpy(computedOutput, op->getOutput(0)->getImpl()->rawPtr(), sizeof(half) * myOutput->size(), cudaMemcpyDeviceToHost);
for(int i = 0; i < myOutput->size(); i++){
const half_float::half targetOutput = *(static_cast<half_float::half*>(myOutput->getImpl()->rawPtr()) + i);
REQUIRE(fabs(computedOutput[i] - targetOutput) < 1e-6);
}
delete[] computedOutput;
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment