Skip to content
Snippets Groups Projects
Commit 6180818c authored by Houssem ROUIS's avatar Houssem ROUIS
Browse files

fix BatchNorm for cases where second inputs are 1d tensors

parent 0f6fc784
No related branches found
No related tags found
2 merge requests!32version 0.2.1,!14MobileNet operators
......@@ -10,19 +10,15 @@
********************************************************************************/
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <vector>
#include <iostream>
#include "aidge/utils/Types.h"
#include "aidge/operator/BatchNorm.hpp"
#include <cuda_runtime.h>
#include <cudnn.h>
#include <vector>
#include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/BatchNormImpl.hpp"
#include "aidge/backend/cuda/utils/CudaContext.hpp"
#include "aidge/operator/BatchNorm.hpp"
#include "aidge/utils/Types.h"
template <Aidge::DimIdx_t DIM>
void Aidge::BatchNormImpl_cuda<DIM>::forward() {
......@@ -53,7 +49,6 @@ void Aidge::BatchNormImpl_cuda<DIM>::forward() {
}
CHECK_CUDNN_STATUS(cudnnCreateTensorDescriptor(&mBNDesc));
// auto tst = dynamic_cast<TensorImpl_cuda_*>(input0.getImpl().get())->getCudnnTensorDesc();
CHECK_CUDNN_STATUS(cudnnDeriveBNTensorDescriptor(
mBNDesc, std::dynamic_pointer_cast<TensorImpl_cuda_>(input0.getImpl())->getCudnnTensorDesc(input0), mMode));
......@@ -73,11 +68,18 @@ void Aidge::BatchNormImpl_cuda<DIM>::forward() {
strides.resize(nbDims);
}
if (std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType() == DataType::Float64) {
forward_<double>(input0, input1, input2, input3, input4);
}
else {
forward_<float>(input0, input1, input2, input3, input4);
switch(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()) {
case DataType::Float64:
forward_<double>(input0, input1, input2, input3, input4);
break;
case DataType::Float32:
forward_<float>(input0, input1, input2, input3, input4);
break;
case DataType::Float16:
forward_<half>(input0, input1, input2, input3, input4);
break;
default:
AIDGE_THROW_OR_ABORT(std::runtime_error, "Data type is not supported by Backend Cuda");
}
}
......@@ -88,24 +90,55 @@ void Aidge::BatchNormImpl_cuda<DIM>::forward_(const Tensor& input0, const Tensor
const typename Cuda::cudnn_scaling_type<T>::type alpha = 1.0f;
const typename Cuda::cudnn_scaling_type<T>::type beta = 0.0f;
CHECK_CUDNN_STATUS(
cudnnBatchNormalizationForwardInference(
CudaContext::cudnnHandle(),
mMode,
&alpha,
&beta,
std::dynamic_pointer_cast<TensorImpl_cuda_>(input0.getImpl())->getCudnnTensorDesc(input0),
input0.getImpl()->rawPtr(),
std::dynamic_pointer_cast<TensorImpl_cuda_>(op.getOutput(0)->getImpl())->getCudnnTensorDesc(*op.getOutput(0)),
std::static_pointer_cast<Tensor>(op.getRawOutput(0))->getImpl()->rawPtr(),
std::dynamic_pointer_cast<TensorImpl_cuda_>(input1.getImpl())->getCudnnTensorDesc(input1),//scaleBiasMeanVarDesc,
input1.getImpl()->rawPtr(),
input2.getImpl()->rawPtr(),
input3.getImpl()->rawPtr(),
input4.getImpl()->rawPtr(),
mEpsilon)
);
// For scale, bias, var and mean, the input if we have a 1D tensor, the dim should not go on the channels
if (input1.nbDims() == 1)
{
cudnnTensorDescriptor_t tensorDesc;
CHECK_CUDNN_STATUS(cudnnCreateTensorDescriptor(&tensorDesc));
const std::vector<int> dims = {1, static_cast<int>(input1.size()),1, 1};
const std::vector<int> strides = {static_cast<int>(input1.size()), 1, 1, 1};
CHECK_CUDNN_STATUS(cudnnSetTensorNdDescriptor(tensorDesc, CudaContext::data_type<T>::value, dims.size(), dims.data(), strides.data()));
CHECK_CUDNN_STATUS(
cudnnBatchNormalizationForwardInference(
CudaContext::cudnnHandle(),
mMode,
&alpha,
&beta,
std::dynamic_pointer_cast<TensorImpl_cuda_>(input0.getImpl())->getCudnnTensorDesc(input0),
input0.getImpl()->rawPtr(),
std::dynamic_pointer_cast<TensorImpl_cuda_>(op.getOutput(0)->getImpl())->getCudnnTensorDesc(*op.getOutput(0)),
std::static_pointer_cast<Tensor>(op.getRawOutput(0))->getImpl()->rawPtr(),
tensorDesc,
input1.getImpl()->rawPtr(),
input2.getImpl()->rawPtr(),
input3.getImpl()->rawPtr(),
input4.getImpl()->rawPtr(),
mEpsilon)
);
CHECK_CUDNN_STATUS(cudnnDestroyTensorDescriptor(tensorDesc));
}
else
{
CHECK_CUDNN_STATUS(
cudnnBatchNormalizationForwardInference(
CudaContext::cudnnHandle(),
mMode,
&alpha,
&beta,
std::dynamic_pointer_cast<TensorImpl_cuda_>(input0.getImpl())->getCudnnTensorDesc(input0),
input0.getImpl()->rawPtr(),
std::dynamic_pointer_cast<TensorImpl_cuda_>(op.getOutput(0)->getImpl())->getCudnnTensorDesc(*op.getOutput(0)),
std::static_pointer_cast<Tensor>(op.getRawOutput(0))->getImpl()->rawPtr(),
std::dynamic_pointer_cast<TensorImpl_cuda_>(input1.getImpl())->getCudnnTensorDesc(input1),
input1.getImpl()->rawPtr(),
input2.getImpl()->rawPtr(),
input3.getImpl()->rawPtr(),
input4.getImpl()->rawPtr(),
mEpsilon)
);
}
}
template <Aidge::DimIdx_t DIM>
Aidge::BatchNormImpl_cuda<DIM>::~BatchNormImpl_cuda() {
if(mBNDesc != nullptr)
......@@ -114,6 +147,5 @@ Aidge::BatchNormImpl_cuda<DIM>::~BatchNormImpl_cuda() {
}
}
// Template declarations
template class Aidge::BatchNormImpl_cuda<2>;
......@@ -15,7 +15,6 @@
#include "Test_cuda.hpp"
#include <iostream>
#include "aidge/data/Tensor.hpp"
#include "aidge/backend/cpu.hpp"
......@@ -28,10 +27,10 @@ TEST_CASE("[gpu/operator] BatchNorm(forward)") {
auto op = std::static_pointer_cast<OperatorTensor>(myBatchNorm -> getOperator());
op->setDataType(DataType::Float32);
op->setBackend("cuda");
std::shared_ptr<Tensor> myWeights= std::make_shared<Tensor>(Array2D<float,1,3> {{{0.9159252643585205, 0.18772238492965698, 0.4479946792125702}}});
std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array2D<float,1,3> {{{0.33898890018463135, 0.3167555630207062, 0.7047033309936523}}});
std::shared_ptr<Tensor> myMean = std::make_shared<Tensor>(Array2D<float,1,3> {{{0.45547693967819214, 0.22650663554668427, 0.6612948179244995}}});
std::shared_ptr<Tensor> myVar = std::make_shared<Tensor>(Array2D<float,1,3> {{{0.02570258639752865, 0.026536229997873306, 0.15111008286476135}}});
std::shared_ptr<Tensor> myWeights= std::make_shared<Tensor>(Array1D<float,3> {{0.9159252643585205, 0.18772238492965698, 0.4479946792125702}});
std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<float,3> {{0.33898890018463135, 0.3167555630207062, 0.7047033309936523}});
std::shared_ptr<Tensor> myMean = std::make_shared<Tensor>(Array1D<float,3> {{0.45547693967819214, 0.22650663554668427, 0.6612948179244995}});
std::shared_ptr<Tensor> myVar = std::make_shared<Tensor>(Array1D<float,3> {{0.02570258639752865, 0.026536229997873306, 0.15111008286476135}});
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<float,2,3,3,3> { //NCHW
{
{
......@@ -113,7 +112,6 @@ TEST_CASE("[gpu/operator] BatchNorm(forward)") {
for(int i = 0; i < myOutput->size(); i++){
const float targetOutput = *(static_cast<float*>(myOutput->getImpl()->rawPtr()) + i);
std::cout << "Computed : " << computedOutput[i] << " , target: " << targetOutput << std::endl;
REQUIRE(fabs(computedOutput[i] - targetOutput) < 1e-5);
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment