Skip to content
Snippets Groups Projects

New proposal for handling tensor views

Merged Olivier BICHLER requested to merge view into dev
1 file
+ 34
32
Compare changes
  • Side-by-side
  • Inline
+ 34
32
@@ -24,14 +24,16 @@
template <Aidge::DimIdx_t DIM>
void Aidge::ConvImpl_cuda<DIM>::forward() {
const OperatorTensor& op = static_cast<const OperatorTensor&>(mOp);
// FIXME: uncomment the following code once memory handling will work
assert(mOp.getRawInput(0) && "missing input #0");
assert(mOp.getRawInput(1) && "missing input #1");
// Convert input data (no overhead if not needed!)
const auto& input0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->refCastFrom(mInput0Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
const auto& input1 = std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->refCastFrom(mInput1Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
const auto& input2 = std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->refCastFrom(mInput2Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
const auto& input0 = op.getInput(0)->refCastFrom(mInput0Fallback, *op.getOutput(0));
const auto& input1 = op.getInput(1)->refCastFrom(mInput1Fallback, *op.getOutput(0));
const auto& input2 = op.getInput(2)->refCastFrom(mInput2Fallback, *op.getOutput(0));
// Lazy-initialize CuDNN convolution descriptor
if (mConvDesc == nullptr) {
@@ -41,14 +43,13 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
const std::vector<int> upscales(convOp.template getAttr<ConvAttr::DilationDims>().begin(), convOp.template getAttr<ConvAttr::DilationDims>().end());
CHECK_CUDNN_STATUS(cudnnCreateConvolutionDescriptor(&mConvDesc));
CHECK_CUDNN_STATUS(
cudnnSetConvolutionNdDescriptor(mConvDesc,
DIM,
&paddings[0],
&strides[0],
&upscales[0],
CUDNN_CROSS_CORRELATION,
DataTypeToCudnn(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType())));
CHECK_CUDNN_STATUS(cudnnSetConvolutionNdDescriptor(mConvDesc,
DIM,
&paddings[0],
&strides[0],
&upscales[0],
CUDNN_CROSS_CORRELATION,
DataTypeToCudnn(op.getOutput(0)->dataType())));
}
// Lazy-initialize CuDNN filter descriptor
@@ -57,10 +58,10 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
CHECK_CUDNN_STATUS(cudnnCreateFilterDescriptor(&mFilterDesc));
CHECK_CUDNN_STATUS(cudnnSetFilterNdDescriptor(mFilterDesc,
DataTypeToCudnn(input1.dataType()),
CUDNN_TENSOR_NCHW,
kernels.size(),
&kernels[0]));
DataTypeToCudnn(input1.dataType()),
CUDNN_TENSOR_NCHW,
kernels.size(),
&kernels[0]));
}
// Set forward algorithm and allocate the required workspace
@@ -76,14 +77,14 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
std::vector<cudnnConvolutionFwdAlgoPerf_t> returnFwdAlgo(maxAlgoIterations);
CHECK_CUDNN_STATUS(cudnnFindConvolutionForwardAlgorithm(
CudaContext::cudnnHandle(),
dynamic_cast<TensorImpl_cuda_*>(input0.getImpl().get())->getCudnnTensorDesc(input0),
mFilterDesc,
mConvDesc,
dynamic_cast<TensorImpl_cuda_*>(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl().get())->getCudnnTensorDesc(*std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))),
maxAlgoIterations,
&returnAlgoCounts,
&returnFwdAlgo[0]));
CudaContext::cudnnHandle(),
std::dynamic_pointer_cast<TensorImpl_cuda_>(input0.getImpl())->getCudnnTensorDesc(input0),
mFilterDesc,
mConvDesc,
std::dynamic_pointer_cast<TensorImpl_cuda_>(op.getOutput(0)->getImpl())->getCudnnTensorDesc(*op.getOutput(0)),
maxAlgoIterations,
&returnAlgoCounts,
&returnFwdAlgo[0]));
mFwdAlgo = returnFwdAlgo[0].algo;
// Allocate the workspace required by the chosen CuDNN forward algorithm
@@ -91,10 +92,10 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
CHECK_CUDNN_STATUS(cudnnGetConvolutionForwardWorkspaceSize(
CudaContext::cudnnHandle(),
dynamic_cast<TensorImpl_cuda_*>(input0.getImpl().get())->getCudnnTensorDesc(input0),
std::dynamic_pointer_cast<TensorImpl_cuda_>(input0.getImpl())->getCudnnTensorDesc(input0),
mFilterDesc,
mConvDesc,
dynamic_cast<TensorImpl_cuda_*>(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl().get())->getCudnnTensorDesc(*std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))),
std::dynamic_pointer_cast<TensorImpl_cuda_>(op.getOutput(0)->getImpl())->getCudnnTensorDesc(*op.getOutput(0)),
mFwdAlgo,
&workspaceSize));
@@ -105,7 +106,7 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
// Do the actual forward computation
// Template is only for scaling parameters, which are always in float
// excepted when the convolution is performed in double precision.
if (std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType() == DataType::Float64) {
if (op.getOutput(0)->dataType() == DataType::Float64) {
forward_<double>(input0, input1, input2);
}
else {
@@ -116,12 +117,13 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
template <Aidge::DimIdx_t DIM>
template <class T>
void Aidge::ConvImpl_cuda<DIM>::forward_(const Tensor& input0, const Tensor& input1, const Tensor& input2) {
const OperatorTensor& op = static_cast<const OperatorTensor&>(mOp);
const T alpha = 1.0f;
const T beta = 0.0f;
CHECK_CUDNN_STATUS(cudnnConvolutionForward(CudaContext::cudnnHandle(),
&alpha,
dynamic_cast<TensorImpl_cuda_*>(input0.getImpl().get())->getCudnnTensorDesc(input0),
std::dynamic_pointer_cast<TensorImpl_cuda_>(input0.getImpl())->getCudnnTensorDesc(input0),
input0.getImpl()->rawPtr(),
mFilterDesc,
input1.getImpl()->rawPtr(),
@@ -130,8 +132,8 @@ void Aidge::ConvImpl_cuda<DIM>::forward_(const Tensor& input0, const Tensor& inp
mFwdWorkspace,
mWorkspaceSize,
&beta,
dynamic_cast<TensorImpl_cuda_*>(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl().get())->getCudnnTensorDesc(*std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()));
std::dynamic_pointer_cast<TensorImpl_cuda_>(op.getOutput(0)->getImpl())->getCudnnTensorDesc(*op.getOutput(0)),
op.getOutput(0)->getImpl()->rawPtr()));
// Add bias (if there is any)
if (mOp.getRawInput(2) && input2.size() > 0) {
@@ -147,11 +149,11 @@ void Aidge::ConvImpl_cuda<DIM>::forward_(const Tensor& input0, const Tensor& inp
CHECK_CUDNN_STATUS(cudnnAddTensor(CudaContext::cudnnHandle(),
&alpha,
dynamic_cast<TensorImpl_cuda_*>(bias.getImpl().get())->getCudnnTensorDesc(bias),
std::dynamic_pointer_cast<TensorImpl_cuda_>(bias.getImpl())->getCudnnTensorDesc(bias),
input2.getImpl()->rawPtr(),
&alpha,
dynamic_cast<TensorImpl_cuda_*>(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl().get())->getCudnnTensorDesc(*std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()));
std::dynamic_pointer_cast<TensorImpl_cuda_>(op.getOutput(0)->getImpl())->getCudnnTensorDesc(*op.getOutput(0)),
op.getOutput(0)->getImpl()->rawPtr()));
}
}
Loading