Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
aidge_backend_cuda
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Eclipse Projects
aidge
aidge_backend_cuda
Commits
4d17363d
Commit
4d17363d
authored
1 year ago
by
Olivier BICHLER
Committed by
Maxence Naud
1 year ago
Browse files
Options
Downloads
Patches
Plain Diff
Improved style
parent
0c47d7d3
No related branches found
No related tags found
2 merge requests
!15
version 0.2.0
,
!5
New proposal for handling tensor views
Pipeline
#38129
failed
1 year ago
Stage: build
Stage: test
Stage: coverage
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/operator/ConvImpl.cpp
+34
-32
34 additions, 32 deletions
src/operator/ConvImpl.cpp
with
34 additions
and
32 deletions
src/operator/ConvImpl.cpp
+
34
−
32
View file @
4d17363d
...
@@ -24,14 +24,16 @@
...
@@ -24,14 +24,16 @@
template
<
Aidge
::
DimIdx_t
DIM
>
template
<
Aidge
::
DimIdx_t
DIM
>
void
Aidge
::
ConvImpl_cuda
<
DIM
>::
forward
()
{
void
Aidge
::
ConvImpl_cuda
<
DIM
>::
forward
()
{
const
OperatorTensor
&
op
=
static_cast
<
const
OperatorTensor
&>
(
mOp
);
// FIXME: uncomment the following code once memory handling will work
// FIXME: uncomment the following code once memory handling will work
assert
(
mOp
.
getRawInput
(
0
)
&&
"missing input #0"
);
assert
(
mOp
.
getRawInput
(
0
)
&&
"missing input #0"
);
assert
(
mOp
.
getRawInput
(
1
)
&&
"missing input #1"
);
assert
(
mOp
.
getRawInput
(
1
)
&&
"missing input #1"
);
// Convert input data (no overhead if not needed!)
// Convert input data (no overhead if not needed!)
const
auto
&
input0
=
std
::
static_pointer_cast
<
Tensor
>
(
mO
p
.
get
Raw
Input
(
0
)
)
->
refCastFrom
(
mInput0Fallback
,
*
std
::
static_pointer_cast
<
Tensor
>
(
mO
p
.
get
Raw
Output
(
0
))
)
;
const
auto
&
input0
=
o
p
.
getInput
(
0
)
->
refCastFrom
(
mInput0Fallback
,
*
o
p
.
getOutput
(
0
));
const
auto
&
input1
=
std
::
static_pointer_cast
<
Tensor
>
(
mO
p
.
get
Raw
Input
(
1
)
)
->
refCastFrom
(
mInput1Fallback
,
*
std
::
static_pointer_cast
<
Tensor
>
(
mO
p
.
get
Raw
Output
(
0
))
)
;
const
auto
&
input1
=
o
p
.
getInput
(
1
)
->
refCastFrom
(
mInput1Fallback
,
*
o
p
.
getOutput
(
0
));
const
auto
&
input2
=
std
::
static_pointer_cast
<
Tensor
>
(
mO
p
.
get
Raw
Input
(
2
)
)
->
refCastFrom
(
mInput2Fallback
,
*
std
::
static_pointer_cast
<
Tensor
>
(
mO
p
.
get
Raw
Output
(
0
))
)
;
const
auto
&
input2
=
o
p
.
getInput
(
2
)
->
refCastFrom
(
mInput2Fallback
,
*
o
p
.
getOutput
(
0
));
// Lazy-initialize CuDNN convolution descriptor
// Lazy-initialize CuDNN convolution descriptor
if
(
mConvDesc
==
nullptr
)
{
if
(
mConvDesc
==
nullptr
)
{
...
@@ -41,14 +43,13 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
...
@@ -41,14 +43,13 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
const
std
::
vector
<
int
>
upscales
(
convOp
.
template
getAttr
<
ConvAttr
::
DilationDims
>().
begin
(),
convOp
.
template
getAttr
<
ConvAttr
::
DilationDims
>().
end
());
const
std
::
vector
<
int
>
upscales
(
convOp
.
template
getAttr
<
ConvAttr
::
DilationDims
>().
begin
(),
convOp
.
template
getAttr
<
ConvAttr
::
DilationDims
>().
end
());
CHECK_CUDNN_STATUS
(
cudnnCreateConvolutionDescriptor
(
&
mConvDesc
));
CHECK_CUDNN_STATUS
(
cudnnCreateConvolutionDescriptor
(
&
mConvDesc
));
CHECK_CUDNN_STATUS
(
CHECK_CUDNN_STATUS
(
cudnnSetConvolutionNdDescriptor
(
mConvDesc
,
cudnnSetConvolutionNdDescriptor
(
mConvDesc
,
DIM
,
DIM
,
&
paddings
[
0
],
&
paddings
[
0
],
&
strides
[
0
],
&
strides
[
0
],
&
upscales
[
0
],
&
upscales
[
0
],
CUDNN_CROSS_CORRELATION
,
CUDNN_CROSS_CORRELATION
,
DataTypeToCudnn
(
op
.
getOutput
(
0
)
->
dataType
())));
DataTypeToCudnn
(
std
::
static_pointer_cast
<
Tensor
>
(
mOp
.
getRawOutput
(
0
))
->
dataType
())));
}
}
// Lazy-initialize CuDNN filter descriptor
// Lazy-initialize CuDNN filter descriptor
...
@@ -57,10 +58,10 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
...
@@ -57,10 +58,10 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
CHECK_CUDNN_STATUS
(
cudnnCreateFilterDescriptor
(
&
mFilterDesc
));
CHECK_CUDNN_STATUS
(
cudnnCreateFilterDescriptor
(
&
mFilterDesc
));
CHECK_CUDNN_STATUS
(
cudnnSetFilterNdDescriptor
(
mFilterDesc
,
CHECK_CUDNN_STATUS
(
cudnnSetFilterNdDescriptor
(
mFilterDesc
,
DataTypeToCudnn
(
input1
.
dataType
()),
DataTypeToCudnn
(
input1
.
dataType
()),
CUDNN_TENSOR_NCHW
,
CUDNN_TENSOR_NCHW
,
kernels
.
size
(),
kernels
.
size
(),
&
kernels
[
0
]));
&
kernels
[
0
]));
}
}
// Set forward algorithm and allocate the required workspace
// Set forward algorithm and allocate the required workspace
...
@@ -76,14 +77,14 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
...
@@ -76,14 +77,14 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
std
::
vector
<
cudnnConvolutionFwdAlgoPerf_t
>
returnFwdAlgo
(
maxAlgoIterations
);
std
::
vector
<
cudnnConvolutionFwdAlgoPerf_t
>
returnFwdAlgo
(
maxAlgoIterations
);
CHECK_CUDNN_STATUS
(
cudnnFindConvolutionForwardAlgorithm
(
CHECK_CUDNN_STATUS
(
cudnnFindConvolutionForwardAlgorithm
(
CudaContext
::
cudnnHandle
(),
CudaContext
::
cudnnHandle
(),
dynamic
_cast
<
TensorImpl_cuda_
*
>
(
input0
.
getImpl
()
.
get
()
)
->
getCudnnTensorDesc
(
input0
),
std
::
dynamic_pointer
_cast
<
TensorImpl_cuda_
>
(
input0
.
getImpl
())
->
getCudnnTensorDesc
(
input0
),
mFilterDesc
,
mFilterDesc
,
mConvDesc
,
mConvDesc
,
dynamic
_cast
<
TensorImpl_cuda_
*
>
(
std
::
static_pointer_cast
<
Tensor
>
(
mO
p
.
get
Raw
Output
(
0
)
)
->
getImpl
()
.
get
()
)
->
getCudnnTensorDesc
(
*
std
::
static_pointer_cast
<
Tensor
>
(
mO
p
.
get
Raw
Output
(
0
))
)
,
std
::
dynamic_pointer
_cast
<
TensorImpl_cuda_
>
(
o
p
.
getOutput
(
0
)
->
getImpl
())
->
getCudnnTensorDesc
(
*
o
p
.
getOutput
(
0
)),
maxAlgoIterations
,
maxAlgoIterations
,
&
returnAlgoCounts
,
&
returnAlgoCounts
,
&
returnFwdAlgo
[
0
]));
&
returnFwdAlgo
[
0
]));
mFwdAlgo
=
returnFwdAlgo
[
0
].
algo
;
mFwdAlgo
=
returnFwdAlgo
[
0
].
algo
;
// Allocate the workspace required by the chosen CuDNN forward algorithm
// Allocate the workspace required by the chosen CuDNN forward algorithm
...
@@ -91,10 +92,10 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
...
@@ -91,10 +92,10 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
CHECK_CUDNN_STATUS
(
cudnnGetConvolutionForwardWorkspaceSize
(
CHECK_CUDNN_STATUS
(
cudnnGetConvolutionForwardWorkspaceSize
(
CudaContext
::
cudnnHandle
(),
CudaContext
::
cudnnHandle
(),
dynamic_cast
<
TensorImpl_cuda_
*
>
(
input0
.
getImpl
()
.
get
()
)
->
getCudnnTensorDesc
(
input0
),
std
::
dynamic_
pointer_
cast
<
TensorImpl_cuda_
>
(
input0
.
getImpl
())
->
getCudnnTensorDesc
(
input0
),
mFilterDesc
,
mFilterDesc
,
mConvDesc
,
mConvDesc
,
dynamic_cast
<
TensorImpl_cuda_
*
>
(
std
::
static_pointer_cast
<
Tensor
>
(
mO
p
.
get
Raw
Output
(
0
)
)
->
getImpl
()
.
get
()
)
->
getCudnnTensorDesc
(
*
std
::
static_pointer_cast
<
Tensor
>
(
mO
p
.
get
Raw
Output
(
0
))
)
,
std
::
dynamic_
pointer_
cast
<
TensorImpl_cuda_
>
(
o
p
.
getOutput
(
0
)
->
getImpl
())
->
getCudnnTensorDesc
(
*
o
p
.
getOutput
(
0
)),
mFwdAlgo
,
mFwdAlgo
,
&
workspaceSize
));
&
workspaceSize
));
...
@@ -105,7 +106,7 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
...
@@ -105,7 +106,7 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
// Do the actual forward computation
// Do the actual forward computation
// Template is only for scaling parameters, which are always in float
// Template is only for scaling parameters, which are always in float
// excepted when the convolution is performed in double precision.
// excepted when the convolution is performed in double precision.
if
(
std
::
static_pointer_cast
<
Tensor
>
(
mO
p
.
get
Raw
Output
(
0
)
)
->
dataType
()
==
DataType
::
Float64
)
{
if
(
o
p
.
getOutput
(
0
)
->
dataType
()
==
DataType
::
Float64
)
{
forward_
<
double
>
(
input0
,
input1
,
input2
);
forward_
<
double
>
(
input0
,
input1
,
input2
);
}
}
else
{
else
{
...
@@ -116,12 +117,13 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
...
@@ -116,12 +117,13 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
template
<
Aidge
::
DimIdx_t
DIM
>
template
<
Aidge
::
DimIdx_t
DIM
>
template
<
class
T
>
template
<
class
T
>
void
Aidge
::
ConvImpl_cuda
<
DIM
>::
forward_
(
const
Tensor
&
input0
,
const
Tensor
&
input1
,
const
Tensor
&
input2
)
{
void
Aidge
::
ConvImpl_cuda
<
DIM
>::
forward_
(
const
Tensor
&
input0
,
const
Tensor
&
input1
,
const
Tensor
&
input2
)
{
const
OperatorTensor
&
op
=
static_cast
<
const
OperatorTensor
&>
(
mOp
);
const
T
alpha
=
1.0
f
;
const
T
alpha
=
1.0
f
;
const
T
beta
=
0.0
f
;
const
T
beta
=
0.0
f
;
CHECK_CUDNN_STATUS
(
cudnnConvolutionForward
(
CudaContext
::
cudnnHandle
(),
CHECK_CUDNN_STATUS
(
cudnnConvolutionForward
(
CudaContext
::
cudnnHandle
(),
&
alpha
,
&
alpha
,
dynamic_cast
<
TensorImpl_cuda_
*
>
(
input0
.
getImpl
()
.
get
()
)
->
getCudnnTensorDesc
(
input0
),
std
::
dynamic_
pointer_
cast
<
TensorImpl_cuda_
>
(
input0
.
getImpl
())
->
getCudnnTensorDesc
(
input0
),
input0
.
getImpl
()
->
rawPtr
(),
input0
.
getImpl
()
->
rawPtr
(),
mFilterDesc
,
mFilterDesc
,
input1
.
getImpl
()
->
rawPtr
(),
input1
.
getImpl
()
->
rawPtr
(),
...
@@ -130,8 +132,8 @@ void Aidge::ConvImpl_cuda<DIM>::forward_(const Tensor& input0, const Tensor& inp
...
@@ -130,8 +132,8 @@ void Aidge::ConvImpl_cuda<DIM>::forward_(const Tensor& input0, const Tensor& inp
mFwdWorkspace
,
mFwdWorkspace
,
mWorkspaceSize
,
mWorkspaceSize
,
&
beta
,
&
beta
,
dynamic_cast
<
TensorImpl_cuda_
*
>
(
std
::
static_pointer_cast
<
Tensor
>
(
mO
p
.
get
Raw
Output
(
0
)
)
->
getImpl
()
.
get
()
)
->
getCudnnTensorDesc
(
*
std
::
static_pointer_cast
<
Tensor
>
(
mO
p
.
get
Raw
Output
(
0
))
)
,
std
::
dynamic_
pointer_
cast
<
TensorImpl_cuda_
>
(
o
p
.
getOutput
(
0
)
->
getImpl
())
->
getCudnnTensorDesc
(
*
o
p
.
getOutput
(
0
)),
std
::
static_pointer_cast
<
Tensor
>
(
mO
p
.
get
Raw
Output
(
0
)
)
->
getImpl
()
->
rawPtr
()));
o
p
.
getOutput
(
0
)
->
getImpl
()
->
rawPtr
()));
// Add bias (if there is any)
// Add bias (if there is any)
if
(
mOp
.
getRawInput
(
2
)
&&
input2
.
size
()
>
0
)
{
if
(
mOp
.
getRawInput
(
2
)
&&
input2
.
size
()
>
0
)
{
...
@@ -147,11 +149,11 @@ void Aidge::ConvImpl_cuda<DIM>::forward_(const Tensor& input0, const Tensor& inp
...
@@ -147,11 +149,11 @@ void Aidge::ConvImpl_cuda<DIM>::forward_(const Tensor& input0, const Tensor& inp
CHECK_CUDNN_STATUS
(
cudnnAddTensor
(
CudaContext
::
cudnnHandle
(),
CHECK_CUDNN_STATUS
(
cudnnAddTensor
(
CudaContext
::
cudnnHandle
(),
&
alpha
,
&
alpha
,
dynamic_cast
<
TensorImpl_cuda_
*
>
(
bias
.
getImpl
()
.
get
()
)
->
getCudnnTensorDesc
(
bias
),
std
::
dynamic_
pointer_
cast
<
TensorImpl_cuda_
>
(
bias
.
getImpl
())
->
getCudnnTensorDesc
(
bias
),
input2
.
getImpl
()
->
rawPtr
(),
input2
.
getImpl
()
->
rawPtr
(),
&
alpha
,
&
alpha
,
dynamic_cast
<
TensorImpl_cuda_
*
>
(
std
::
static_pointer_cast
<
Tensor
>
(
mO
p
.
get
Raw
Output
(
0
)
)
->
getImpl
()
.
get
()
)
->
getCudnnTensorDesc
(
*
std
::
static_pointer_cast
<
Tensor
>
(
mO
p
.
get
Raw
Output
(
0
))
)
,
std
::
dynamic_
pointer_
cast
<
TensorImpl_cuda_
>
(
o
p
.
getOutput
(
0
)
->
getImpl
())
->
getCudnnTensorDesc
(
*
o
p
.
getOutput
(
0
)),
std
::
static_pointer_cast
<
Tensor
>
(
mO
p
.
get
Raw
Output
(
0
)
)
->
getImpl
()
->
rawPtr
()));
o
p
.
getOutput
(
0
)
->
getImpl
()
->
rawPtr
()));
}
}
}
}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment