Skip to content
Snippets Groups Projects
Commit 8e4c52ea authored by Cyril Moineau's avatar Cyril Moineau
Browse files

Add an implementation for Sub operator.

parent 2fa582bb
No related branches found
No related tags found
2 merge requests!32version 0.2.1,!27Add Sub impl
Pipeline #47807 passed
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "aidge/backend/cuda/data/TensorImpl.hpp" #include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/AddImpl.hpp" #include "aidge/backend/cuda/operator/AddImpl.hpp"
#include "aidge/backend/cuda/operator/SubImpl.hpp"
#include "aidge/backend/cuda/operator/AvgPoolingImpl.hpp" #include "aidge/backend/cuda/operator/AvgPoolingImpl.hpp"
#include "aidge/backend/cuda/operator/BatchNormImpl.hpp" #include "aidge/backend/cuda/operator/BatchNormImpl.hpp"
#include "aidge/backend/cuda/operator/ConvImpl.hpp" #include "aidge/backend/cuda/operator/ConvImpl.hpp"
...@@ -26,4 +27,4 @@ ...@@ -26,4 +27,4 @@
#include "aidge/backend/cuda/operator/SigmoidImpl.hpp" #include "aidge/backend/cuda/operator/SigmoidImpl.hpp"
#include "aidge/backend/cuda/operator/TanhImpl.hpp" #include "aidge/backend/cuda/operator/TanhImpl.hpp"
#endif /* AIDGE_BACKEND_CUDA_IMPORTS_H_ */ #endif /* AIDGE_BACKEND_CUDA_IMPORTS_H_ */
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_BACKEND_CUDA_OPERATOR_SUBIMPL_H_
#define AIDGE_BACKEND_CUDA_OPERATOR_SUBIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include <cudnn.h>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Sub.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
namespace Aidge {
class SubImpl_cuda : public OperatorImpl {
private:
public:
SubImpl_cuda(const Sub_Op &op) : OperatorImpl(op, "cuda") {}
static std::unique_ptr<SubImpl_cuda> create(const Sub_Op &op) {
return std::make_unique<SubImpl_cuda>(op);
}
public:
void forward();
// ~SubImpl_cuda();
private:
template <class T> void forward_(const std::vector<Tensor>& inputs, const std::vector<std::vector<int>>& inputsDims, const std::vector<std::vector<int>>& inputsStrides);
};
namespace {
// add cuda backend to Sub_Op implementation registry
static Registrar<Sub_Op> registrarSubImpl_cuda("cuda", Aidge::SubImpl_cuda::create);
} // namespace
} // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_SUBIMPL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <algorithm>
#include <cassert>
#include <numeric>
#include <vector>
#include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/SubImpl.hpp"
#include "aidge/backend/cuda/utils/CudaContext.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/operator/Sub.hpp"
#include "aidge/utils/Types.h"
void Aidge::SubImpl_cuda::forward() {
const Sub_Op& op = static_cast<const Sub_Op&>(mOp);
// Check inputs
AIDGE_ASSERT(op.getInput(0), "missing input in Sub operator");
AIDGE_ASSERT(op.getInput(0)->hasImpl(), "cannot run Sub forward because the 0-th input has no implementation.");
DataType datatypeFirstInput = op.getInput(0)->dataType();
for (IOIndex_t i = 1; i < op.nbInputs(); ++i) {
AIDGE_ASSERT(op.getInput(i), "missing input in Sub operator");
AIDGE_ASSERT(op.getInput(i)->hasImpl(), "cannot run Sub forward because the {}-th input has no implementation.", i);
AIDGE_ASSERT(op.getInput(i)->dataType() == datatypeFirstInput, "Cannot add inputs with two differents data type.");
}
std::vector<std::shared_ptr<Tensor>> inputFallbacks(op.nbInputs());
std::vector<Tensor> inputs(op.nbInputs());
std::vector<std::vector<int>> dims(op.nbInputs()); // For broadcasted dims
std::vector<std::vector<int>> strides(op.nbInputs()); // For the cooresponding strides
for (IOIndex_t i = 0; i < op.nbInputs(); ++i) {
inputs[i] = op.getInput(i)->refCastFrom(inputFallbacks[i], *op.getOutput(0));
// Get tensor dims and broadcast them
std::copy(inputs[i].dims().begin(), inputs[i].dims().end(), std::back_inserter(dims[i]));
dims[i].insert(dims[i].cbegin(), op.getOutput(0)->nbDims() - dims[i].size(), int(1));
// Compute the corresponding strides
std::vector<int> tensorStrides(dims[i].size());
int product = 1;
for (size_t j = dims[i].size(); j > 0; --j) {
tensorStrides[j - 1] = product;
product *= dims[i][j - 1];
}
strides[i] = tensorStrides;
}
switch(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()) {
case DataType::Float64:
forward_<double>(inputs, dims, strides);
break;
case DataType::Float32:
forward_<float>(inputs, dims, strides);
break;
case DataType::Float16:
forward_<half>(inputs, dims, strides);
break;
default:
AIDGE_THROW_OR_ABORT(std::runtime_error, "Data type is not supported by Backend Cuda");
}
}
template <class T>
void Aidge::SubImpl_cuda::forward_(const std::vector<Tensor>& inputs, const std::vector<std::vector<int>>& inputsDims, const std::vector<std::vector<int>>& inputsStrides) {
const OperatorTensor& op = static_cast<const OperatorTensor&>(mOp);
const typename Cuda::cudnn_scaling_type<T>::type alpha = 1.0f;
const typename Cuda::cudnn_scaling_type<T>::type beta = 0.0f;
const typename Cuda::cudnn_scaling_type<T>::type gamma = -1.0f;
// Create a Tensor descriptor with the broadcasted dims and strides
cudnnTensorDescriptor_t tensorDesc;
CHECK_CUDNN_STATUS(cudnnSetTensorNdDescriptor(tensorDesc, CudaContext::data_type<T>::value, inputsDims[0].size(), inputsDims[0].data(), inputsStrides[0].data()));
// Add first input
CHECK_CUDNN_STATUS(
cudnnAddTensor(CudaContext::cudnnHandle(),
&alpha,
tensorDesc,
inputs[0].getImpl()->rawPtr(),
&beta,
std::dynamic_pointer_cast<TensorImpl_cuda_>(op.getOutput(0)->getImpl())->getCudnnTensorDesc(*op.getOutput(0)),
std::static_pointer_cast<Tensor>(op.getRawOutput(0))->getImpl()->rawPtr())
);
// Add other inputs if there are any
for (size_t i = 1; i < op.nbInputs(); ++i)
{
CHECK_CUDNN_STATUS(cudnnSetTensorNdDescriptor(tensorDesc, CudaContext::data_type<T>::value, inputsDims[i].size(), inputsDims[i].data(), inputsStrides[i].data()));
CHECK_CUDNN_STATUS(
cudnnAddTensor(CudaContext::cudnnHandle(),
&gamma,
tensorDesc,
inputs[i].getImpl()->rawPtr(),
&alpha,
std::dynamic_pointer_cast<TensorImpl_cuda_>(op.getOutput(0)->getImpl())->getCudnnTensorDesc(*op.getOutput(0)),
std::static_pointer_cast<Tensor>(op.getRawOutput(0))->getImpl()->rawPtr())
);
}
CHECK_CUDNN_STATUS(cudnnDestroyTensorDescriptor(tensorDesc));
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment