Skip to content
Snippets Groups Projects
Commit 96a9b1c8 authored by Grégoire Kubler's avatar Grégoire Kubler
Browse files

Merge branch 'dev' into feat/release_pip

parents dfe4925e f5968e18
No related branches found
No related tags found
2 merge requests!38version 0.3.0,!19feat : release_pip
Pipeline #54952 failed
Showing
with 897 additions and 1 deletion
......@@ -14,13 +14,21 @@
#include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/AddImpl.hpp"
#include "aidge/backend/cuda/operator/AndImpl.hpp"
#include "aidge/backend/cuda/operator/ArgMaxImpl.hpp"
#include "aidge/backend/cuda/operator/AvgPoolingImpl.hpp"
#include "aidge/backend/cuda/operator/BatchNormImpl.hpp"
#include "aidge/backend/cuda/operator/ConvImpl.hpp"
#include "aidge/backend/cuda/operator/DivImpl.hpp"
#include "aidge/backend/cuda/operator/FCImpl.hpp"
#include "aidge/backend/cuda/operator/GlobalAveragePoolingImpl.hpp"
#include "aidge/backend/cuda/operator/LnImpl.hpp"
#include "aidge/backend/cuda/operator/MaxPoolingImpl.hpp"
#include "aidge/backend/cuda/operator/MulImpl.hpp"
#include "aidge/backend/cuda/operator/PadImpl.hpp"
#include "aidge/backend/cuda/operator/PowImpl.hpp"
#include "aidge/backend/cuda/operator/ReduceMeanImpl.hpp"
#include "aidge/backend/cuda/operator/ReduceSumImpl.hpp"
#include "aidge/backend/cuda/operator/ReLUImpl.hpp"
#include "aidge/backend/cuda/operator/ShiftMaxImpl.hpp"
#include "aidge/backend/cuda/operator/ShiftGELUImpl.hpp"
......
......@@ -221,7 +221,39 @@ public:
&strides[0]));
}
}
else {
// Compare if the shape of the tensor has changed
cudnnDataType_t currentDataType;
int currentNbDims;
// Since we don't know the nb dims of the current tensor, we init with CUDNN_DIM_MAX then remove the trailing zeros
std::vector<int> currentDims(CUDNN_DIM_MAX);
std::vector<int> currentStrides(CUDNN_DIM_MAX);
CHECK_CUDNN_STATUS(cudnnGetTensorNdDescriptor(mCudnnTensor, CUDNN_DIM_MAX, &currentDataType, &currentNbDims, currentDims.data(), currentStrides.data()));
// Remove the trailing zeros
currentDims.erase(std::find_if(currentDims.rbegin(), currentDims.rend(), [](int x) { return x != 0; }).base(),
currentDims.end());
std::vector<int> dims(tensor.dims().cbegin(), tensor.dims().cend());
if (dims.size() < 4) {
dims.resize(4, 1);
}
// Update descriptor if shape has changed
if (dims!=currentDims) {
std::vector<int> strides(tensor.strides().cbegin(), tensor.strides().cend());
if (strides.size() < 4) {
strides.resize(4, 1);
}
CHECK_CUDNN_STATUS(cudnnSetTensorNdDescriptor(mCudnnTensor,
CudaContext::data_type<T>::value,
dims.size(),
&dims[0],
&strides[0]));
}
}
return mCudnnTensor;
}
......
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_BACKEND_CUDA_OPERATOR_ANDIMPL_H_
#define AIDGE_BACKEND_CUDA_OPERATOR_ANDIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include <cudnn.h>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/And.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
namespace Aidge {
class AndImpl_cuda : public OperatorImpl {
private:
public:
AndImpl_cuda(const And_Op &op) : OperatorImpl(op, "cuda") {}
static std::unique_ptr<AndImpl_cuda> create(const And_Op &op) {
return std::make_unique<AndImpl_cuda>(op);
}
public:
void forward();
private:
template <class T> void forward_(const std::vector<Tensor>& inputs, const std::vector<std::vector<int>>& inputsDims, const std::vector<std::vector<int>>& inputsStrides);
};
namespace {
// add cuda backend to And_Op implementation registry
static Registrar<And_Op> registrarAndImpl_cuda("cuda", Aidge::AndImpl_cuda::create);
} // namespace
} // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_ANDIMPL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CUDA_OPERATOR_ANDIMPL_FORWARD_KERNEL_H_
#define AIDGE_CUDA_OPERATOR_ANDIMPL_FORWARD_KERNEL_H_
#include <stdexcept>
#include <cfloat>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_fp16.h>
#include "aidge/data/Data.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
namespace Aidge {
template <class T>
void AndForward(const T* input1, const T* input2, T* output,
const std::vector<int>& input1Dims,const std::vector<int>& input2Dims,
const std::vector<int>& inputStrides, const std::vector<int>& input2Strides,const std::vector<int>& outputStrides,
int outSize);
}
#endif /* AIDGE_CUDA_OPERATOR_ANDIMPL_FORWARD_KERNEL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_BACKEND_CUDA_OPERATOR_ARGMAXIMPL_H_
#define AIDGE_BACKEND_CUDA_OPERATOR_ARGMAXIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include <cudnn.h>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/ArgMax.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
namespace Aidge {
class ArgMaxImpl_cuda : public OperatorImpl {
private:
// CuDNN specific variables
std::shared_ptr<Tensor> mInputFallback, mOutputGradFallback;
public:
ArgMaxImpl_cuda(const ArgMax_Op &op) : OperatorImpl(op, "cuda") {}
static std::unique_ptr<ArgMaxImpl_cuda> create(const ArgMax_Op &op) {
return std::make_unique<ArgMaxImpl_cuda>(op);
}
public:
void forward();
private:
template <class T> void forward_(const Tensor& input, std::int32_t axis, DimSize_t selectLastIdx);
};
namespace {
// add cuda backend to ArgMax_Op implementation registry
static Registrar<ArgMax_Op> registrarArgMaxImpl_cuda("cuda", Aidge::ArgMaxImpl_cuda::create);
} // namespace
} // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_ARGMAXIMPL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CUDA_OPERATOR_ARGMAXIMPL_KERNEL_H_
#define AIDGE_CUDA_OPERATOR_ARGMAXIMPL_KERNEL_H_
#include <stdexcept>
#include <cfloat>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_fp16.h>
#include "aidge/data/Data.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
namespace Aidge
{
template <class T>
void ArgMax_cuda_forward_kernel(const T* input, T* output,
const std::vector<int>& inputDims, const std::vector<int>& inputStrides,
int axis, int total_elems, std::size_t selectLastIdx);
}
#endif /* AIDGE_CUDA_OPERATOR_ARGMAXIMPL_KERNEL_H_ */
\ No newline at end of file
......@@ -35,7 +35,7 @@ private:
// CuDNN specific variables
cudnnConvolutionDescriptor_t mConvDesc = nullptr;
cudnnFilterDescriptor_t mFilterDesc = nullptr;
cudnnConvolutionFwdAlgo_t mFwdAlgo;
cudnnConvolutionFwdAlgo_t mFwdAlgo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
cudnnConvolutionBwdFilterAlgo_t mBwdFilterAlgo;
cudnnConvolutionBwdDataAlgo_t mBwdDataAlgo;
size_t mWorkspaceSize = 0;
......
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_BACKEND_CUDA_OPERATOR_DIVIMPL_H_
#define AIDGE_BACKEND_CUDA_OPERATOR_DIVIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include <cudnn.h>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Div.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
namespace Aidge {
class DivImpl_cuda : public OperatorImpl {
private:
public:
DivImpl_cuda(const Div_Op &op) : OperatorImpl(op, "cuda") {}
static std::unique_ptr<DivImpl_cuda> create(const Div_Op &op) {
return std::make_unique<DivImpl_cuda>(op);
}
public:
void forward();
void backward();
// ~DivImpl_cuda();
private:
template <class T> void forward_(const std::vector<Tensor>& inputs, const std::vector<std::vector<int>>& inputsDims, const std::vector<std::vector<int>>& inputsStrides);
template <class T> void backward_(const Tensor& outGrad);
};
namespace {
// add cuda backend to Div_Op implementation registry
static Registrar<Div_Op> registrarDivImpl_cuda("cuda", Aidge::DivImpl_cuda::create);
} // namespace
} // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_DIVIMPL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CUDA_OPERATOR_DIVIMPL_FORWARD_KERNEL_H_
#define AIDGE_CUDA_OPERATOR_DIVIMPL_FORWARD_KERNEL_H_
#include <stdexcept>
#include <cfloat>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_fp16.h>
#include "aidge/data/Data.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
template <class T>
void divForward(const T* input1, T* output, const T* intput2,
const std::vector<int>& input1Dims,const std::vector<int>& input2Dims, const std::vector<int>& outputDims,
const std::vector<int>& input1Strides, const std::vector<int>& input2Strides,const std::vector<int>& outputStrides,
int outSize);
}
#endif /* AIDGE_CUDA_OPERATOR_DIVIMPL_FORWARD_KERNEL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_BACKEND_CUDA_OPERATOR_LNIMPL_H_
#define AIDGE_BACKEND_CUDA_OPERATOR_LNIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include <cudnn.h>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Ln.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
namespace Aidge {
class LnImpl_cuda : public OperatorImpl {
private:
std::shared_ptr<Tensor> mInputFallback;
std::shared_ptr<Tensor> mOutputGradFallback;
public:
LnImpl_cuda(const Ln_Op &op) : OperatorImpl(op, "cuda") {}
static std::unique_ptr<LnImpl_cuda> create(const Ln_Op &op) {
return std::make_unique<LnImpl_cuda>(op);
}
public:
void forward();
void backward();
// ~LnImpl_cuda();
private:
template <class T> void forward_(const Tensor& input);
template <class T> void backward_(const Tensor& output_grad);
};
namespace {
// add cuda backend to Ln_Op implementation registry
static Registrar<Ln_Op> registrarLnImpl_cuda("cuda", Aidge::LnImpl_cuda::create);
} // namespace
} // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_LNIMPL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CUDA_OPERATOR_LNIMPL_FORWARD_KERNEL_H_
#define AIDGE_CUDA_OPERATOR_LNIMPL_FORWARD_KERNEL_H_
#include <stdexcept>
#include <cfloat>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_fp16.h>
#include "aidge/data/Data.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
template <class T>
void lnForward(const T* input, T* output, int size);
}
#endif /* AIDGE_CUDA_OPERATOR_LNIMPL_FORWARD_KERNEL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_BACKEND_CUDA_OPERATOR_MULIMPL_H_
#define AIDGE_BACKEND_CUDA_OPERATOR_MULIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include <cudnn.h>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Mul.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
namespace Aidge {
class MulImpl_cuda : public OperatorImpl {
private:
public:
MulImpl_cuda(const Mul_Op &op) : OperatorImpl(op, "cuda") {}
static std::unique_ptr<MulImpl_cuda> create(const Mul_Op &op) {
return std::make_unique<MulImpl_cuda>(op);
}
public:
void forward();
void backward();
private:
template <class T> void forward_(const std::vector<Tensor>& inputs, const std::vector<std::vector<int>>& inputsDims, const std::vector<std::vector<int>>& inputsStrides);
template <class T> void backward_(const Tensor& outputGrad, const std::vector<std::vector<int>>& inputsDims, const std::vector<std::vector<int>>& inputsStrides);
};
namespace {
// add cuda backend to Mul_Op implementation registry
static Registrar<Mul_Op> registrarMulImpl_cuda("cuda", Aidge::MulImpl_cuda::create);
} // namespace
} // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_MULIMPL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_BACKEND_CUDA_OPERATOR_POWIMPL_H_
#define AIDGE_BACKEND_CUDA_OPERATOR_POWIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include <cudnn.h>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Pow.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
namespace Aidge {
class PowImpl_cuda : public OperatorImpl {
private:
public:
PowImpl_cuda(const Pow_Op &op) : OperatorImpl(op, "cuda") {}
static std::unique_ptr<PowImpl_cuda> create(const Pow_Op &op) {
return std::make_unique<PowImpl_cuda>(op);
}
public:
void forward();
void backward();
// ~PowImpl_cuda();
private:
template <class T> void forward_(const std::vector<Tensor>& inputs, const std::vector<std::vector<int>>& inputsDims, const std::vector<std::vector<int>>& inputsStrides);
template <class T> void backward_(const Tensor& outGrad);
};
namespace {
// add cuda backend to Pow_Op implementation registry
static Registrar<Pow_Op> registrarPowImpl_cuda("cuda", Aidge::PowImpl_cuda::create);
} // namespace
} // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_POWIMPL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CUDA_OPERATOR_POWIMPL_FORWARD_KERNEL_H_
#define AIDGE_CUDA_OPERATOR_POWIMPL_FORWARD_KERNEL_H_
#include <stdexcept>
#include <cfloat>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_fp16.h>
#include "aidge/data/Data.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
namespace Aidge {
template <class T>
void powForward(const T* input, T* output, const T* exponent,
const std::vector<int>& inputDims,const std::vector<int>& exponentDims, const std::vector<int>& outputDims,
const std::vector<int>& inputStrides, const std::vector<int>& exponentStrides,const std::vector<int>& outputStrides,
int outSize);
}
#endif /* AIDGE_CUDA_OPERATOR_POWIMPL_FORWARD_KERNEL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CUDA_OPERATOR_REDUCEIMPL_KERNEL_H_
#define AIDGE_CUDA_OPERATOR_REDUCEIMPL_KERNEL_H_
#include "aidge/data/Data.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
namespace Aidge
{
template <class T>
void ReduceBackward(const T* input,
T* output,
const std::vector<std::size_t>& inputDims,
const std::vector<std::size_t>& outputDims,
const std::vector<int>& axes,
const std::vector<std::size_t>& factors,
int outSize);
}
#endif /* AIDGE_CUDA_OPERATOR_REDUCEIMPL_KERNEL_H_ */
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_BACKEND_CUDA_OPERATOR_REDUCEMEANIMPL_H_
#define AIDGE_BACKEND_CUDA_OPERATOR_REDUCEMEANIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include <cudnn.h>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/ReduceMean.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
namespace Aidge {
class ReduceMeanImpl_cuda : public OperatorImpl {
private:
// CuDNN specific variables
std::shared_ptr<Tensor> mInputFallback, mOutputGradFallback;
public:
ReduceMeanImpl_cuda(const ReduceMean_Op &op) : OperatorImpl(op, "cuda") {}
static std::unique_ptr<ReduceMeanImpl_cuda> create(const ReduceMean_Op &op) {
return std::make_unique<ReduceMeanImpl_cuda>(op);
}
public:
void forward();
void backward();
// ~ReduceMeanImpl_cuda();
private:
template <class T> void forward_(const Tensor& input, const std::vector<int>& axes, bool keepDims);
template <class T> void backward_(const Tensor& output_grad, const std::vector<int>& axes);
};
namespace {
// add cuda backend to ReduceMean_Op implementation registry
static Registrar<ReduceMean_Op> registrarReduceMeanImpl_cuda("cuda", Aidge::ReduceMeanImpl_cuda::create);
} // namespace
} // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_REDUCEMEANIMPL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_BACKEND_CUDA_OPERATOR_REDUCESUMIMPL_H_
#define AIDGE_BACKEND_CUDA_OPERATOR_REDUCESUMIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include <cudnn.h>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/ReduceSum.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
namespace Aidge {
class ReduceSumImpl_cuda : public OperatorImpl {
private:
// CuDNN specific variables
std::shared_ptr<Tensor> mInputFallback, mOutputGradFallback;
public:
ReduceSumImpl_cuda(const ReduceSum_Op &op) : OperatorImpl(op, "cuda") {}
static std::unique_ptr<ReduceSumImpl_cuda> create(const ReduceSum_Op &op) {
return std::make_unique<ReduceSumImpl_cuda>(op);
}
public:
void forward();
void backward();
private:
template <class T> void forward_(const Tensor& input, const std::vector<int>& axes, bool keepDims);
template <class T> void backward_(const Tensor& output_grad, const std::vector<int>& axes);
};
namespace {
// add cuda backend to ReduceSum_Op implementation registry
static Registrar<ReduceSum_Op> registrarReduceSumImpl_cuda("cuda", Aidge::ReduceSumImpl_cuda::create);
} // namespace
} // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_REDUCESUMIMPL_H_ */
......@@ -44,6 +44,10 @@ void Aidge::AddImpl_cuda::forward() {
std::copy(inputs[i].dims().begin(), inputs[i].dims().end(), std::back_inserter(dims[i]));
dims[i].insert(dims[i].cbegin(), op.getOutput(0)->nbDims() - dims[i].size(), int(1));
if (dims[i].size() < 4) {
dims[i].resize(4, 1);
}
// Compute the corresponding strides
std::vector<int> tensorStrides(dims[i].size());
int product = 1;
......
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <algorithm>
#include <cassert>
#include <numeric>
#include <vector>
#include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/AndImpl.hpp"
#include "aidge/backend/cuda/operator/AndImpl_CUDA_kernels.hpp"
#include "aidge/backend/cuda/utils/CudaContext.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/operator/And.hpp"
#include "aidge/utils/Types.h"
void Aidge::AndImpl_cuda::forward() {
const And_Op& op = static_cast<const And_Op&>(mOp);
// Check inputs
AIDGE_ASSERT(op.getInput(0), "missing input in And operator");
AIDGE_ASSERT(op.getInput(0)->hasImpl(), "cannot run And forward because the 0-th input has no implementation.");
DataType datatypeFirstInput = op.getInput(0)->dataType();
for (IOIndex_t i = 1; i < op.nbInputs(); ++i) {
AIDGE_ASSERT(op.getInput(i), "missing input in And operator");
AIDGE_ASSERT(op.getInput(i)->hasImpl(), "cannot run And forward because the {}-th input has no implementation.", i);
AIDGE_ASSERT(op.getInput(i)->dataType() == datatypeFirstInput, "Cannot And inputs with two differents data type.");
}
std::vector<std::shared_ptr<Tensor>> inputFallbacks(op.nbInputs());
std::vector<Tensor> inputs(op.nbInputs());
std::vector<std::vector<int>> dims(op.nbInputs()); // For broadcasted dims
std::vector<std::vector<int>> strides(op.nbInputs()); // For the cooresponding strides
for (IOIndex_t i = 0; i < op.nbInputs(); ++i) {
inputs[i] = op.getInput(i)->refCastFrom(inputFallbacks[i], *op.getOutput(0));
// Get tensor dims and broadcast them
std::copy(inputs[i].dims().begin(), inputs[i].dims().end(), std::back_inserter(dims[i]));
dims[i].insert(dims[i].cbegin(), op.getOutput(0)->nbDims() - dims[i].size(), int(1));
if (dims[i].size() < 4) {
dims[i].resize(4, 1);
}
// Compute the corresponding strides
std::vector<int> tensorStrides(dims[i].size());
int product = 1;
for (size_t j = dims[i].size(); j > 0; --j) {
tensorStrides[j - 1] = product;
product *= dims[i][j - 1];
}
strides[i] = tensorStrides;
}
switch(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()) {
case DataType::Float64:
forward_<double>(inputs, dims, strides);
break;
case DataType::Float32:
forward_<float>(inputs, dims, strides);
break;
case DataType::Float16:
forward_<half>(inputs, dims, strides);
break;
default:
AIDGE_THROW_OR_ABORT(std::runtime_error, "Data type is not supported by Backend Cuda");
}
}
template <class T>
void Aidge::AndImpl_cuda::forward_(const std::vector<Tensor>& inputs, const std::vector<std::vector<int>>& inputsDims, const std::vector<std::vector<int>>& inputsStrides) {
const OperatorTensor& op = static_cast<const OperatorTensor&>(mOp);
const T * input1Ptr = static_cast<const T*>(inputs[0].getImpl()->rawPtr());
const T * input2Ptr = static_cast<const T*>(inputs[1].getImpl()->rawPtr());
T * outputPtr = static_cast<T*>(op.getOutput(0)->getImpl()->rawPtr());
std::vector<int> outputStrides(op.getOutput(0)->nbDims(), 1);
if(op.getOutput(0)->nbDims()>1) {
for (int i = op.getOutput(0)->nbDims()-2; i >= 0; i--) {
outputStrides[i] = outputStrides[i+1] * op.getOutput(0)->dims()[i+1];
}
}
Aidge::AndForward<T>(input1Ptr, input2Ptr, outputPtr,
inputsDims[0], inputsDims[1],
inputsStrides[0], inputsStrides[1], outputStrides,
static_cast<int>(op.getOutput(0)->size()));
}
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <cuda_fp16.h>
#include "aidge/backend/cuda/operator/AndImpl_CUDA_kernels.hpp"
// Helper function for comparison
template <typename T>
__device__ bool compareE(T a, T b) {
return a == b;
}
template <>
__device__ bool compareE<half>(half a, half b) {
return __half2float(a) == __half2float(b);
}
template <typename T>
__global__ void and_cuda_Kernel(const T* input1, const T* input2, T* output,
int* input1_shape, int* input2_shape,
int* input1_strides, int* input2_strides, int* output_strides,
int num_dims, int size) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx >= size) return;
int input1_idx = 0, input2_idx = 0;
int temp_idx = idx;
for (int i = 0; i < num_dims; ++i) {
int dim = temp_idx / output_strides[i];
temp_idx %= output_strides[i];
input1_idx += (input1_shape[i] == 1 ? 0 : dim) * input1_strides[i];
input2_idx += (input2_shape[i] == 1 ? 0 : dim) * input2_strides[i];
}
output[idx] = static_cast<T>(compareE(input1[input1_idx], input2[input2_idx]));
}
template <typename T>
void Aidge::AndForward(const T* input1, const T* input2, T* output,
const std::vector<int>& input1Dims,const std::vector<int>& input2Dims,
const std::vector<int>& input1Strides, const std::vector<int>& input2Strides,const std::vector<int>& outputStrides,
int outSize)
{
int *d_input1_strides, *d_input2_strides, *d_output_strides, *d_input1_shape, *d_input2_shape;
// Allocate device memory
CHECK_CUDA_STATUS(cudaMalloc(&d_input1_shape, input1Dims.size() * sizeof(int)));
CHECK_CUDA_STATUS(cudaMalloc(&d_input2_shape, input1Dims.size() * sizeof(int)));
CHECK_CUDA_STATUS(cudaMalloc(&d_input1_strides, input1Dims.size() * sizeof(int)));
CHECK_CUDA_STATUS(cudaMalloc(&d_input2_strides, input1Dims.size() * sizeof(int)));
CHECK_CUDA_STATUS(cudaMalloc(&d_output_strides, input1Dims.size() * sizeof(int)));
// Copy data from host to device;
CHECK_CUDA_STATUS(cudaMemcpy(d_input1_shape, input1Dims.data(), input1Dims.size() * sizeof(int), cudaMemcpyHostToDevice));
CHECK_CUDA_STATUS(cudaMemcpy(d_input2_shape, input2Dims.data(), input1Dims.size() * sizeof(int), cudaMemcpyHostToDevice));
CHECK_CUDA_STATUS(cudaMemcpy(d_input1_strides, input1Strides.data(), input1Dims.size() * sizeof(int), cudaMemcpyHostToDevice));
CHECK_CUDA_STATUS(cudaMemcpy(d_input2_strides, input2Strides.data(), input1Dims.size() * sizeof(int), cudaMemcpyHostToDevice));
CHECK_CUDA_STATUS(cudaMemcpy(d_output_strides, outputStrides.data(), input1Dims.size() * sizeof(int), cudaMemcpyHostToDevice));
int blockSize = 256;
int numBlocks = (outSize + blockSize - 1) / blockSize;
int num_dims = input1Dims.size();
// Launch the kernel
and_cuda_Kernel<<<numBlocks, blockSize>>>(input1, input2, output,
d_input1_shape, d_input2_shape,
d_input1_strides, d_input2_strides, d_output_strides,
num_dims, outSize);
CHECK_CUDA_STATUS(cudaFree(d_input1_shape));
CHECK_CUDA_STATUS(cudaFree(d_input2_shape));
CHECK_CUDA_STATUS(cudaFree(d_input1_strides));
CHECK_CUDA_STATUS(cudaFree(d_input2_strides));
CHECK_CUDA_STATUS(cudaFree(d_output_strides));
};
template void Aidge::AndForward(const double* input1, const double* input2, double* output,
const std::vector<int>& input1Dims,const std::vector<int>& input2Dims,
const std::vector<int>& inputStrides, const std::vector<int>& input2Strides,const std::vector<int>& outputStrides,
int outSize);
template void Aidge::AndForward(const float* input1, const float* input2, float* output,
const std::vector<int>& input1Dims,const std::vector<int>& input2Dims,
const std::vector<int>& inputStrides, const std::vector<int>& input2Strides,const std::vector<int>& outputStrides,
int outSize);
template void Aidge::AndForward(const half* input1, const half* input2, half* output,
const std::vector<int>& input1Dims,const std::vector<int>& input2Dims,
const std::vector<int>& inputStrides, const std::vector<int>& input2Strides,const std::vector<int>& outputStrides,
int outSize);
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment