Skip to content
Snippets Groups Projects
Commit ccb400be authored by Grégoire Kubler's avatar Grégoire Kubler
Browse files

chore : formatted cpp files

parent dbff9526
No related branches found
No related tags found
1 merge request!44Feat formatting
Pipeline #58484 canceled
Showing
with 514 additions and 326 deletions
...@@ -27,19 +27,18 @@ ...@@ -27,19 +27,18 @@
#include "aidge/backend/cuda/operator/MulImpl.hpp" #include "aidge/backend/cuda/operator/MulImpl.hpp"
#include "aidge/backend/cuda/operator/PadImpl.hpp" #include "aidge/backend/cuda/operator/PadImpl.hpp"
#include "aidge/backend/cuda/operator/PowImpl.hpp" #include "aidge/backend/cuda/operator/PowImpl.hpp"
#include "aidge/backend/cuda/operator/ReLUImpl.hpp"
#include "aidge/backend/cuda/operator/ReduceMeanImpl.hpp" #include "aidge/backend/cuda/operator/ReduceMeanImpl.hpp"
#include "aidge/backend/cuda/operator/ReduceSumImpl.hpp" #include "aidge/backend/cuda/operator/ReduceSumImpl.hpp"
#include "aidge/backend/cuda/operator/ReLUImpl.hpp"
#include "aidge/backend/cuda/operator/ShiftMaxImpl.hpp"
#include "aidge/backend/cuda/operator/ShiftGELUImpl.hpp"
#include "aidge/backend/cuda/operator/ReshapeImpl.hpp" #include "aidge/backend/cuda/operator/ReshapeImpl.hpp"
#include "aidge/backend/cuda/operator/ShiftGELUImpl.hpp"
#include "aidge/backend/cuda/operator/ShiftMaxImpl.hpp"
#include "aidge/backend/cuda/operator/SigmoidImpl.hpp" #include "aidge/backend/cuda/operator/SigmoidImpl.hpp"
#include "aidge/backend/cuda/operator/SubImpl.hpp" #include "aidge/backend/cuda/operator/SubImpl.hpp"
#include "aidge/backend/cuda/operator/TanhImpl.hpp" #include "aidge/backend/cuda/operator/TanhImpl.hpp"
#include "aidge/backend/cuda/operator/ShiftMaxImpl.hpp"
#include "aidge/backend/cuda/operator/ShiftGELUImpl.hpp"
#include "aidge/backend/cuda/operator/ILayerNormImpl.hpp" #include "aidge/backend/cuda/operator/ILayerNormImpl.hpp"
#include "aidge/backend/cuda/operator/ShiftGELUImpl.hpp"
#include "aidge/backend/cuda/operator/ShiftMaxImpl.hpp"
#endif /* AIDGE_BACKEND_CUDA_IMPORTS_H_ */ #endif /* AIDGE_BACKEND_CUDA_IMPORTS_H_ */
This diff is collapsed.
...@@ -29,10 +29,10 @@ ...@@ -29,10 +29,10 @@
namespace Aidge { namespace Aidge {
// Operator implementation entry point for the backend // Operator implementation entry point for the backend
class AddImpl_cuda : public OperatorImpl { class AddImpl_cuda : public OperatorImpl {
public: public:
AddImpl_cuda(const Add_Op& op) : OperatorImpl(op, "cuda") {} AddImpl_cuda(const Add_Op &op) : OperatorImpl(op, "cuda") {}
static std::unique_ptr<AddImpl_cuda> create(const Add_Op& op) { static std::unique_ptr<AddImpl_cuda> create(const Add_Op &op) {
return std::make_unique<AddImpl_cuda>(op); return std::make_unique<AddImpl_cuda>(op);
} }
...@@ -47,13 +47,19 @@ public: ...@@ -47,13 +47,19 @@ public:
void forward() override; void forward() override;
void backward() override; void backward() override;
private: private:
template <class T> void forward_(const std::vector<Tensor>& inputs, const std::vector<std::vector<int>>& inputsDims, const std::vector<std::vector<int>>& inputsStrides); template <class T>
template <class T> void backward_(const Tensor& outGrad, const std::vector<std::vector<int>>& inputsDims, const std::vector<std::vector<int>>& inputsStrides); void forward_(const std::vector<Tensor> &inputs,
const std::vector<std::vector<int>> &inputsDims,
const std::vector<std::vector<int>> &inputsStrides);
template <class T>
void backward_(const Tensor &outGrad,
const std::vector<std::vector<int>> &inputsDims,
const std::vector<std::vector<int>> &inputsStrides);
}; };
// Implementation entry point registration to Operator // Implementation entry point registration to Operator
REGISTRAR(Add_Op, "cuda", Aidge::AddImpl_cuda::create); REGISTRAR(Add_Op, "cuda", Aidge::AddImpl_cuda::create);
} // namespace Aidge } // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_ADDIMPL_H_ */ #endif /* AIDGE_BACKEND_CUDA_OPERATOR_ADDIMPL_H_ */
...@@ -29,10 +29,10 @@ ...@@ -29,10 +29,10 @@
namespace Aidge { namespace Aidge {
// Operator implementation entry point for the backend // Operator implementation entry point for the backend
class AndImpl_cuda : public OperatorImpl { class AndImpl_cuda : public OperatorImpl {
public: public:
AndImpl_cuda(const And_Op& op) : OperatorImpl(op, "cuda") {} AndImpl_cuda(const And_Op &op) : OperatorImpl(op, "cuda") {}
static std::unique_ptr<AndImpl_cuda> create(const And_Op& op) { static std::unique_ptr<AndImpl_cuda> create(const And_Op &op) {
return std::make_unique<AndImpl_cuda>(op); return std::make_unique<AndImpl_cuda>(op);
} }
...@@ -46,12 +46,15 @@ public: ...@@ -46,12 +46,15 @@ public:
void forward() override; void forward() override;
private: private:
template <class T> void forward_(const std::vector<Tensor>& inputs, const std::vector<std::vector<int>>& inputsDims, const std::vector<std::vector<int>>& inputsStrides); template <class T>
void forward_(const std::vector<Tensor> &inputs,
const std::vector<std::vector<int>> &inputsDims,
const std::vector<std::vector<int>> &inputsStrides);
}; };
// Implementation entry point registration to Operator // Implementation entry point registration to Operator
REGISTRAR(And_Op, "cuda", Aidge::AndImpl_cuda::create); REGISTRAR(And_Op, "cuda", Aidge::AndImpl_cuda::create);
} // namespace Aidge } // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_ANDIMPL_H_ */ #endif /* AIDGE_BACKEND_CUDA_OPERATOR_ANDIMPL_H_ */
...@@ -12,26 +12,26 @@ ...@@ -12,26 +12,26 @@
#ifndef AIDGE_CUDA_OPERATOR_ANDIMPL_KERNELS_H_ #ifndef AIDGE_CUDA_OPERATOR_ANDIMPL_KERNELS_H_
#define AIDGE_CUDA_OPERATOR_ANDIMPL_KERNELS_H_ #define AIDGE_CUDA_OPERATOR_ANDIMPL_KERNELS_H_
#include <stdexcept>
#include <cfloat> #include <cfloat>
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_fp16.h> #include <cuda_fp16.h>
#include <cuda_runtime_api.h>
#include <stdexcept>
#include "aidge/data/Data.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp" #include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/data/Data.hpp"
namespace Aidge { namespace Aidge {
template <class T> template <class T>
void AndForward(const T* input1, const T* input2, T* output, void AndForward(const T *input1,
const std::vector<int>& input1Dims,const std::vector<int>& input2Dims, const T *input2,
const std::vector<int>& inputStrides, const std::vector<int>& input2Strides,const std::vector<int>& outputStrides, T *output,
const std::vector<int> &input1Dims,
const std::vector<int> &input2Dims,
const std::vector<int> &inputStrides,
const std::vector<int> &input2Strides,
const std::vector<int> &outputStrides,
int outSize); int outSize);
} }
#endif /* AIDGE_CUDA_OPERATOR_ANDIMPL_KERNELS_H_ */ #endif /* AIDGE_CUDA_OPERATOR_ANDIMPL_KERNELS_H_ */
...@@ -29,10 +29,10 @@ ...@@ -29,10 +29,10 @@
namespace Aidge { namespace Aidge {
// Operator implementation entry point for the backend // Operator implementation entry point for the backend
class ArgMaxImpl_cuda : public OperatorImpl { class ArgMaxImpl_cuda : public OperatorImpl {
public: public:
ArgMaxImpl_cuda(const ArgMax_Op& op) : OperatorImpl(op, "cuda") {} ArgMaxImpl_cuda(const ArgMax_Op &op) : OperatorImpl(op, "cuda") {}
static std::unique_ptr<ArgMaxImpl_cuda> create(const ArgMax_Op& op) { static std::unique_ptr<ArgMaxImpl_cuda> create(const ArgMax_Op &op) {
return std::make_unique<ArgMaxImpl_cuda>(op); return std::make_unique<ArgMaxImpl_cuda>(op);
} }
...@@ -46,15 +46,17 @@ public: ...@@ -46,15 +46,17 @@ public:
void forward() override; void forward() override;
private: private:
// CuDNN specific variables // CuDNN specific variables
std::shared_ptr<Tensor> mInputFallback, mOutputGradFallback; std::shared_ptr<Tensor> mInputFallback, mOutputGradFallback;
template <class T> void forward_(const Tensor& input, std::int32_t axis, DimSize_t selectLastIdx); template <class T>
void
forward_(const Tensor &input, std::int32_t axis, DimSize_t selectLastIdx);
}; };
// Implementation entry point registration to Operator // Implementation entry point registration to Operator
REGISTRAR(ArgMax_Op, "cuda", Aidge::ArgMaxImpl_cuda::create); REGISTRAR(ArgMax_Op, "cuda", Aidge::ArgMaxImpl_cuda::create);
} // namespace Aidge } // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_ARGMAXIMPL_H_ */ #endif /* AIDGE_BACKEND_CUDA_OPERATOR_ARGMAXIMPL_H_ */
...@@ -12,20 +12,23 @@ ...@@ -12,20 +12,23 @@
#ifndef AIDGE_CUDA_OPERATOR_ARGMAXIMPL_KERNEL_H_ #ifndef AIDGE_CUDA_OPERATOR_ARGMAXIMPL_KERNEL_H_
#define AIDGE_CUDA_OPERATOR_ARGMAXIMPL_KERNEL_H_ #define AIDGE_CUDA_OPERATOR_ARGMAXIMPL_KERNEL_H_
#include <stdexcept>
#include <cfloat> #include <cfloat>
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_fp16.h> #include <cuda_fp16.h>
#include <cuda_runtime_api.h>
#include <stdexcept>
#include "aidge/data/Data.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp" #include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/data/Data.hpp"
namespace Aidge namespace Aidge {
{ template <class T>
template <class T> void ArgMax_cuda_forward_kernel(const T *input,
void ArgMax_cuda_forward_kernel(const T* input, T* output, T *output,
const std::vector<int>& inputDims, const std::vector<int>& inputStrides, const std::vector<int> &inputDims,
int axis, int total_elems, std::size_t selectLastIdx); const std::vector<int> &inputStrides,
int axis,
int total_elems,
std::size_t selectLastIdx);
} }
#endif /* AIDGE_CUDA_OPERATOR_ARGMAXIMPL_KERNEL_H_ */ #endif /* AIDGE_CUDA_OPERATOR_ARGMAXIMPL_KERNEL_H_ */
\ No newline at end of file
...@@ -28,12 +28,13 @@ ...@@ -28,12 +28,13 @@
namespace Aidge { namespace Aidge {
// Operator implementation entry point for the backend // Operator implementation entry point for the backend
template <DimIdx_t DIM> template <DimIdx_t DIM> class AvgPoolingImpl_cuda : public OperatorImpl {
class AvgPoolingImpl_cuda : public OperatorImpl { public:
public: AvgPoolingImpl_cuda(const AvgPooling_Op<DIM> &op)
AvgPoolingImpl_cuda(const AvgPooling_Op<DIM>& op) : OperatorImpl(op, "cuda") {} : OperatorImpl(op, "cuda") {}
static std::unique_ptr<AvgPoolingImpl_cuda> create(const AvgPooling_Op<DIM>& op) { static std::unique_ptr<AvgPoolingImpl_cuda>
create(const AvgPooling_Op<DIM> &op) {
return std::make_unique<AvgPoolingImpl_cuda>(op); return std::make_unique<AvgPoolingImpl_cuda>(op);
} }
...@@ -49,19 +50,19 @@ public: ...@@ -49,19 +50,19 @@ public:
void backward() override; void backward() override;
~AvgPoolingImpl_cuda(); ~AvgPoolingImpl_cuda();
private: private:
// CuDNN specific variables // CuDNN specific variables
cudnnPoolingDescriptor_t mAvgPoolingDesc = nullptr; cudnnPoolingDescriptor_t mAvgPoolingDesc = nullptr;
cudnnPoolingMode_t mMode = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING; cudnnPoolingMode_t mMode = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
std::shared_ptr<Tensor> mInputFallback, mOutputGradFallback; std::shared_ptr<Tensor> mInputFallback, mOutputGradFallback;
template <class T> void forward_(const Tensor& input); template <class T> void forward_(const Tensor &input);
template <class T> void backward_(const Tensor& output_grad); template <class T> void backward_(const Tensor &output_grad);
}; };
// Implementation entry point registration to Operator // Implementation entry point registration to Operator
using AvgPooling2D_Op = AvgPooling_Op<2>; using AvgPooling2D_Op = AvgPooling_Op<2>;
REGISTRAR(AvgPooling2D_Op, "cuda", Aidge::AvgPoolingImpl_cuda<2>::create); REGISTRAR(AvgPooling2D_Op, "cuda", Aidge::AvgPoolingImpl_cuda<2>::create);
} // namespace Aidge } // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_AVGPOOLINGIMPL_H_ */ #endif /* AIDGE_BACKEND_CUDA_OPERATOR_AVGPOOLINGIMPL_H_ */
...@@ -28,12 +28,13 @@ ...@@ -28,12 +28,13 @@
namespace Aidge { namespace Aidge {
// Operator implementation entry point for the backend // Operator implementation entry point for the backend
template <DimIdx_t DIM> template <DimIdx_t DIM> class BatchNormImpl_cuda : public OperatorImpl {
class BatchNormImpl_cuda : public OperatorImpl { public:
public: BatchNormImpl_cuda(const BatchNorm_Op<DIM> &op)
BatchNormImpl_cuda(const BatchNorm_Op<DIM>& op) : OperatorImpl(op, "cuda") {} : OperatorImpl(op, "cuda") {}
static std::unique_ptr<BatchNormImpl_cuda> create(const BatchNorm_Op<DIM>& op) { static std::unique_ptr<BatchNormImpl_cuda>
create(const BatchNorm_Op<DIM> &op) {
return std::make_unique<BatchNormImpl_cuda>(op); return std::make_unique<BatchNormImpl_cuda>(op);
} }
...@@ -49,19 +50,27 @@ public: ...@@ -49,19 +50,27 @@ public:
void backward() override; void backward() override;
~BatchNormImpl_cuda(); ~BatchNormImpl_cuda();
private: private:
// CuDNN specific variables // CuDNN specific variables
cudnnTensorDescriptor_t mBNDesc = nullptr; cudnnTensorDescriptor_t mBNDesc = nullptr;
cudnnBatchNormMode_t mMode; cudnnBatchNormMode_t mMode;
double mEpsilon; double mEpsilon;
template <class T> void forward_(const Tensor& input0, const Tensor& input1, const Tensor& input2, const Tensor& input3, const Tensor& input4); template <class T>
template <class T> void backward_(const Tensor& input0, const Tensor& input1, const Tensor& input2); void forward_(const Tensor &input0,
const Tensor &input1,
const Tensor &input2,
const Tensor &input3,
const Tensor &input4);
template <class T>
void backward_(const Tensor &input0,
const Tensor &input1,
const Tensor &input2);
}; };
// Implementation entry point registration to Operator // Implementation entry point registration to Operator
using BatchNorm2D_Op = BatchNorm_Op<2>; using BatchNorm2D_Op = BatchNorm_Op<2>;
REGISTRAR(BatchNorm2D_Op, "cuda", Aidge::BatchNormImpl_cuda<2>::create); REGISTRAR(BatchNorm2D_Op, "cuda", Aidge::BatchNormImpl_cuda<2>::create);
} // namespace Aidge } // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_BATCHNORMIMPL_H_ */ #endif /* AIDGE_BACKEND_CUDA_OPERATOR_BATCHNORMIMPL_H_ */
...@@ -27,49 +27,53 @@ ...@@ -27,49 +27,53 @@
#include "aidge/backend/cuda/utils/CudaUtils.hpp" #include "aidge/backend/cuda/utils/CudaUtils.hpp"
namespace Aidge { namespace Aidge {
// Operator implementation entry point for the backend // Operator implementation entry point for the backend
template <DimIdx_t DIM> template <DimIdx_t DIM> class ConvImpl_cuda : public OperatorImpl {
class ConvImpl_cuda : public OperatorImpl { public:
public: ConvImpl_cuda(const Operator &op, bool depthWise = false)
ConvImpl_cuda(const Operator&op, bool depthWise = false) : OperatorImpl(op, "cuda"), mDepthWise(depthWise) {} : OperatorImpl(op, "cuda"), mDepthWise(depthWise) {}
static std::unique_ptr<ConvImpl_cuda<DIM>> create(const Conv_Op<DIM>& op) { static std::unique_ptr<ConvImpl_cuda<DIM>> create(const Conv_Op<DIM> &op) {
return std::make_unique<ConvImpl_cuda<DIM>>(op); return std::make_unique<ConvImpl_cuda<DIM>>(op);
} }
static std::unique_ptr<ConvImpl_cuda<DIM>> createDW(const ConvDepthWise_Op<DIM> &op) { static std::unique_ptr<ConvImpl_cuda<DIM>>
createDW(const ConvDepthWise_Op<DIM> &op) {
return std::make_unique<ConvImpl_cuda<DIM>>(op, true); return std::make_unique<ConvImpl_cuda<DIM>>(op, true);
} }
virtual std::vector<ImplSpec> getAvailableImplSpecs() const override { virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
return { return {{DataType::Any}};
{DataType::Any}
};
} }
void forward() override; void forward() override;
void backward() override; void backward() override;
~ConvImpl_cuda(); ~ConvImpl_cuda();
private: private:
// CuDNN specific variables // CuDNN specific variables
cudnnConvolutionDescriptor_t mConvDesc = nullptr; cudnnConvolutionDescriptor_t mConvDesc = nullptr;
cudnnFilterDescriptor_t mFilterDesc = nullptr; cudnnFilterDescriptor_t mFilterDesc = nullptr;
cudnnConvolutionFwdAlgo_t mFwdAlgo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; cudnnConvolutionFwdAlgo_t mFwdAlgo =
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
cudnnConvolutionBwdFilterAlgo_t mBwdFilterAlgo; cudnnConvolutionBwdFilterAlgo_t mBwdFilterAlgo;
cudnnConvolutionBwdDataAlgo_t mBwdDataAlgo; cudnnConvolutionBwdDataAlgo_t mBwdDataAlgo;
size_t mWorkspaceSize = 0; size_t mWorkspaceSize = 0;
void* mFwdWorkspace = nullptr; void *mFwdWorkspace = nullptr;
void* mBwdWorkspace = nullptr; void *mBwdWorkspace = nullptr;
std::shared_ptr<Tensor> mInput0Fallback; std::shared_ptr<Tensor> mInput0Fallback;
std::shared_ptr<Tensor> mInput1Fallback; std::shared_ptr<Tensor> mInput1Fallback;
std::shared_ptr<Tensor> mInput2Fallback; std::shared_ptr<Tensor> mInput2Fallback;
bool mDepthWise = false; bool mDepthWise = false;
template <class T> void forward_(const Tensor& input0, const Tensor& input1, const Tensor& input2); template <class T>
template <class T> void backward_(const Tensor& input0, const Tensor& input1, const Tensor& input2); void
forward_(const Tensor &input0, const Tensor &input1, const Tensor &input2);
template <class T>
void backward_(const Tensor &input0,
const Tensor &input1,
const Tensor &input2);
}; };
// Implementation entry point registration to Operator // Implementation entry point registration to Operator
...@@ -77,6 +81,6 @@ using Conv2D_Op = Conv_Op<2>; ...@@ -77,6 +81,6 @@ using Conv2D_Op = Conv_Op<2>;
using ConvDepthWise2D_Op = ConvDepthWise_Op<2>; using ConvDepthWise2D_Op = ConvDepthWise_Op<2>;
REGISTRAR(Conv2D_Op, "cuda", Aidge::ConvImpl_cuda<2>::create); REGISTRAR(Conv2D_Op, "cuda", Aidge::ConvImpl_cuda<2>::create);
REGISTRAR(ConvDepthWise2D_Op, "cuda", Aidge::ConvImpl_cuda<2>::createDW); REGISTRAR(ConvDepthWise2D_Op, "cuda", Aidge::ConvImpl_cuda<2>::createDW);
} // namespace Aidge } // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_CONVIMPL_H_ */ #endif /* AIDGE_BACKEND_CUDA_OPERATOR_CONVIMPL_H_ */
...@@ -29,10 +29,10 @@ ...@@ -29,10 +29,10 @@
namespace Aidge { namespace Aidge {
// Operator implementation entry point for the backend // Operator implementation entry point for the backend
class DivImpl_cuda : public OperatorImpl { class DivImpl_cuda : public OperatorImpl {
public: public:
DivImpl_cuda(const Div_Op& op) : OperatorImpl(op, "cuda") {} DivImpl_cuda(const Div_Op &op) : OperatorImpl(op, "cuda") {}
static std::unique_ptr<DivImpl_cuda> create(const Div_Op& op) { static std::unique_ptr<DivImpl_cuda> create(const Div_Op &op) {
return std::make_unique<DivImpl_cuda>(op); return std::make_unique<DivImpl_cuda>(op);
} }
...@@ -47,13 +47,16 @@ public: ...@@ -47,13 +47,16 @@ public:
void forward() override; void forward() override;
void backward() override; void backward() override;
private: private:
template <class T> void forward_(const std::vector<Tensor>& inputs, const std::vector<std::vector<int>>& inputsDims, const std::vector<std::vector<int>>& inputsStrides); template <class T>
template <class T> void backward_(const Tensor& outGrad); void forward_(const std::vector<Tensor> &inputs,
const std::vector<std::vector<int>> &inputsDims,
const std::vector<std::vector<int>> &inputsStrides);
template <class T> void backward_(const Tensor &outGrad);
}; };
// Implementation entry point registration to Operator // Implementation entry point registration to Operator
REGISTRAR(Div_Op, "cuda", Aidge::DivImpl_cuda::create); REGISTRAR(Div_Op, "cuda", Aidge::DivImpl_cuda::create);
} // namespace Aidge } // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_DIVIMPL_H_ */ #endif /* AIDGE_BACKEND_CUDA_OPERATOR_DIVIMPL_H_ */
...@@ -12,28 +12,29 @@ ...@@ -12,28 +12,29 @@
#ifndef AIDGE_CUDA_OPERATOR_DIVIMPL_KERNELS_H_ #ifndef AIDGE_CUDA_OPERATOR_DIVIMPL_KERNELS_H_
#define AIDGE_CUDA_OPERATOR_DIVIMPL_KERNELS_H_ #define AIDGE_CUDA_OPERATOR_DIVIMPL_KERNELS_H_
#include <stdexcept>
#include <cfloat> #include <cfloat>
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_fp16.h> #include <cuda_fp16.h>
#include <cuda_runtime_api.h>
#include <stdexcept>
#include "aidge/data/Data.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp" #include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/utils/Types.h" #include "aidge/utils/Types.h"
namespace Aidge { namespace Aidge {
template <class T> template <class T>
void divForward(const T* input1, T* output, const T* intput2, void divForward(const T *input1,
const std::vector<int>& input1Dims,const std::vector<int>& input2Dims, const std::vector<int>& outputDims, T *output,
const std::vector<int>& input1Strides, const std::vector<int>& input2Strides,const std::vector<int>& outputStrides, const T *intput2,
const std::vector<int> &input1Dims,
const std::vector<int> &input2Dims,
const std::vector<int> &outputDims,
const std::vector<int> &input1Strides,
const std::vector<int> &input2Strides,
const std::vector<int> &outputStrides,
int outSize); int outSize);
} }
#endif /* AIDGE_CUDA_OPERATOR_DIVIMPL_KERNELS_H_ */ #endif /* AIDGE_CUDA_OPERATOR_DIVIMPL_KERNELS_H_ */
...@@ -29,10 +29,10 @@ ...@@ -29,10 +29,10 @@
namespace Aidge { namespace Aidge {
// Operator implementation entry point for the backend // Operator implementation entry point for the backend
class FCImpl_cuda : public OperatorImpl { class FCImpl_cuda : public OperatorImpl {
public: public:
FCImpl_cuda(const FC_Op& op) : OperatorImpl(op, "cuda") {} FCImpl_cuda(const FC_Op &op) : OperatorImpl(op, "cuda") {}
static std::unique_ptr<FCImpl_cuda> create(const FC_Op& op) { static std::unique_ptr<FCImpl_cuda> create(const FC_Op &op) {
return std::make_unique<FCImpl_cuda>(op); return std::make_unique<FCImpl_cuda>(op);
} }
...@@ -47,17 +47,25 @@ public: ...@@ -47,17 +47,25 @@ public:
void forward() override; void forward() override;
void backward() override; void backward() override;
private: private:
std::shared_ptr<Tensor> mInput0Fallback; std::shared_ptr<Tensor> mInput0Fallback;
std::shared_ptr<Tensor> mInput1Fallback; std::shared_ptr<Tensor> mInput1Fallback;
std::shared_ptr<Tensor> mInput2Fallback; std::shared_ptr<Tensor> mInput2Fallback;
template <class T> void forward_(const Tensor& input0, const Tensor& input1, const Tensor& input2, std::size_t outChannels); template <class T>
template <class T> void backward_(const Tensor& input0, const Tensor& input1, const Tensor& input2, std::size_t outChannels); void forward_(const Tensor &input0,
const Tensor &input1,
const Tensor &input2,
std::size_t outChannels);
template <class T>
void backward_(const Tensor &input0,
const Tensor &input1,
const Tensor &input2,
std::size_t outChannels);
}; };
// Implementation entry point registration to Operator // Implementation entry point registration to Operator
REGISTRAR(FC_Op, "cuda", Aidge::FCImpl_cuda::create); REGISTRAR(FC_Op, "cuda", Aidge::FCImpl_cuda::create);
} // namespace Aidge } // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_FCIMPL_H_ */ #endif /* AIDGE_BACKEND_CUDA_OPERATOR_FCIMPL_H_ */
...@@ -12,34 +12,45 @@ ...@@ -12,34 +12,45 @@
#ifndef AIDGE_CUDA_OPERATOR_FCIMPL_KERNELS_H_ #ifndef AIDGE_CUDA_OPERATOR_FCIMPL_KERNELS_H_
#define AIDGE_CUDA_OPERATOR_FCIMPL_KERNELS_H_ #define AIDGE_CUDA_OPERATOR_FCIMPL_KERNELS_H_
#include <stdexcept>
#include <cfloat> #include <cfloat>
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_fp16.h> #include <cuda_fp16.h>
#include <cuda_runtime_api.h>
#include <stdexcept>
#include "aidge/data/Data.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp" #include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/data/Data.hpp"
namespace Aidge { namespace Aidge {
template <class T> template <class T>
cublasStatus_t cublasGemm(cublasHandle_t handle, cublasStatus_t cublasGemm(cublasHandle_t handle,
cublasOperation_t transa, cublasOperation_t transb, cublasOperation_t transa,
int m, int n, int k, cublasOperation_t transb,
int m,
int n,
int k,
const T *alpha, const T *alpha,
const T *A, int lda, const T *A,
const T *B, int ldb, int lda,
const T *B,
int ldb,
const T *beta, const T *beta,
T *C, int ldc); T *C,
int ldc);
template <class T> template <class T>
cublasStatus_t cublasGemv(cublasHandle_t handle, cublasOperation_t trans, cublasStatus_t cublasGemv(cublasHandle_t handle,
int m, int n, cublasOperation_t trans,
const T *alpha, int m,
const T *A, int lda, int n,
const T *x, int incx, const T *alpha,
const T *A,
int lda,
const T *x,
int incx,
const T *beta, const T *beta,
T *y, int incy); T *y,
} int incy);
} // namespace Aidge
#endif /* AIDGE_CUDA_OPERATOR_FCIMPL_KERNELS_H_ */ #endif /* AIDGE_CUDA_OPERATOR_FCIMPL_KERNELS_H_ */
\ No newline at end of file
...@@ -29,35 +29,37 @@ ...@@ -29,35 +29,37 @@
namespace Aidge { namespace Aidge {
// Operator implementation entry point for the backend // Operator implementation entry point for the backend
class GlobalAveragePoolingImpl_cuda : public OperatorImpl { class GlobalAveragePoolingImpl_cuda : public OperatorImpl {
public: public:
GlobalAveragePoolingImpl_cuda(const GlobalAveragePooling_Op& op) : OperatorImpl(op, "cuda") {} GlobalAveragePoolingImpl_cuda(const GlobalAveragePooling_Op &op)
: OperatorImpl(op, "cuda") {}
static std::unique_ptr<GlobalAveragePoolingImpl_cuda> create(const GlobalAveragePooling_Op& op) { static std::unique_ptr<GlobalAveragePoolingImpl_cuda>
create(const GlobalAveragePooling_Op &op) {
return std::make_unique<GlobalAveragePoolingImpl_cuda>(op); return std::make_unique<GlobalAveragePoolingImpl_cuda>(op);
} }
virtual std::vector<ImplSpec> getAvailableImplSpecs() const override { virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
return { return {{DataType::Any}};
{DataType::Any}
};
} }
void forward() override; void forward() override;
void backward() override; void backward() override;
~GlobalAveragePoolingImpl_cuda(); ~GlobalAveragePoolingImpl_cuda();
private: private:
// CuDNN specific variables // CuDNN specific variables
cudnnPoolingDescriptor_t mGlobalAveragePoolingDesc = nullptr; cudnnPoolingDescriptor_t mGlobalAveragePoolingDesc = nullptr;
cudnnPoolingMode_t mMode = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING; cudnnPoolingMode_t mMode = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
std::shared_ptr<Tensor> mInputFallback, mOutputGradFallback; std::shared_ptr<Tensor> mInputFallback, mOutputGradFallback;
template <class T> void forward_(const Tensor& input); template <class T> void forward_(const Tensor &input);
template <class T> void backward_(const Tensor& output_grad); template <class T> void backward_(const Tensor &output_grad);
}; };
// Implementation entry point registration to Operator // Implementation entry point registration to Operator
REGISTRAR(GlobalAveragePooling_Op, "cuda", Aidge::GlobalAveragePoolingImpl_cuda::create); REGISTRAR(GlobalAveragePooling_Op,
} // namespace Aidge "cuda",
Aidge::GlobalAveragePoolingImpl_cuda::create);
} // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_H_ */ #endif /* AIDGE_BACKEND_CUDA_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_H_ */
...@@ -30,10 +30,11 @@ ...@@ -30,10 +30,11 @@
namespace Aidge { namespace Aidge {
class ILayerNormImpl_cuda : public OperatorImpl { class ILayerNormImpl_cuda : public OperatorImpl {
public: public:
ILayerNormImpl_cuda(const ILayerNorm_Op &op) : OperatorImpl(op, "cuda") {} ILayerNormImpl_cuda(const ILayerNorm_Op &op) : OperatorImpl(op, "cuda") {}
static std::unique_ptr<ILayerNormImpl_cuda> create(const ILayerNorm_Op &op) { static std::unique_ptr<ILayerNormImpl_cuda>
create(const ILayerNorm_Op &op) {
return std::make_unique<ILayerNormImpl_cuda>(op); return std::make_unique<ILayerNormImpl_cuda>(op);
} }
...@@ -48,18 +49,20 @@ public: ...@@ -48,18 +49,20 @@ public:
void forward() override; void forward() override;
void backward() override; void backward() override;
private: private:
std::shared_ptr<Tensor> mInput0Fallback; std::shared_ptr<Tensor> mInput0Fallback;
std::shared_ptr<Tensor> mInput1Fallback; std::shared_ptr<Tensor> mInput1Fallback;
std::shared_ptr<Tensor> mInput2Fallback; std::shared_ptr<Tensor> mInput2Fallback;
std::shared_ptr<Tensor> mOutputGradFallback; std::shared_ptr<Tensor> mOutputGradFallback;
template <class T> void forward_(const Tensor& input0, const Tensor& input1, const Tensor& input2); template <class T>
template <class T> void backward_(const Tensor& output_grad); void
forward_(const Tensor &input0, const Tensor &input1, const Tensor &input2);
template <class T> void backward_(const Tensor &output_grad);
}; };
// Implementation entry point registration to Operator // Implementation entry point registration to Operator
REGISTRAR(ILayerNorm_Op, "cuda", Aidge::ILayerNormImpl_cuda::create); REGISTRAR(ILayerNorm_Op, "cuda", Aidge::ILayerNormImpl_cuda::create);
} // namespace Aidge } // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_ILAYERNORMIMPL_H_ */ #endif /* AIDGE_BACKEND_CUDA_OPERATOR_ILAYERNORMIMPL_H_ */
...@@ -14,79 +14,113 @@ ...@@ -14,79 +14,113 @@
#ifndef AIDGE_CUDA_OPERATOR_ILAYERNORMIMPL_FORWARD_KERNEL_H_ #ifndef AIDGE_CUDA_OPERATOR_ILAYERNORMIMPL_FORWARD_KERNEL_H_
#define AIDGE_CUDA_OPERATOR_ILAYERNORMIMPL_FORWARD_KERNEL_H_ #define AIDGE_CUDA_OPERATOR_ILAYERNORMIMPL_FORWARD_KERNEL_H_
#include <stdexcept>
#include <cfloat> #include <cfloat>
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_fp16.h> #include <cuda_fp16.h>
#include <cuda_runtime_api.h>
#include <stdexcept>
#include "aidge/data/Data.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp" #include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/data/Data.hpp"
namespace Aidge { namespace Aidge {
/** /**
* @brief Compute the forward for ILayerNorm * @brief Compute the forward for ILayerNorm
* @param input: Input tensor * @param input: Input tensor
* @param SF: Scaling factor of input tensor * @param SF: Scaling factor of input tensor
* @param dims: Dimensions of input tensor * @param dims: Dimensions of input tensor
* @param quantized_tensor: Quantized output tensor * @param quantized_tensor: Quantized output tensor
* @param square_tensor: Tensor use for computation * @param square_tensor: Tensor use for computation
* @param weight: weight of ILayerNorm layer * @param weight: weight of ILayerNorm layer
* @param bias: bias of ILayerNorm layer * @param bias: bias of ILayerNorm layer
* @param new_SF: Scaling factor of output that can be use to dequantify * @param new_SF: Scaling factor of output that can be use to dequantify
*/ */
template <class T> template <class T>
__global__ void ILayerNormforward_(T* input, double SF, int* dims, int* quantized_tensor,long long int* square_tensor, T* weight, T* biase, double new_SF); __global__ void ILayerNormforward_(T *input,
double SF,
int *dims,
int *quantized_tensor,
long long int *square_tensor,
T *weight,
T *biase,
double new_SF);
/** /**
* @brief Wrapper function to execute ILayerNormforward_ * @brief Wrapper function to execute ILayerNormforward_
* @note Output correspond to the non-quantized tensor, to obtain the quantized tensor we need to copy quantized_tensor and not input_cuda_tensor * @note Output correspond to the non-quantized tensor, to obtain the quantized
* @param input: Input tensor * tensor we need to copy quantized_tensor and not input_cuda_tensor
* @param output: Output tensor (not quantized) * @param input: Input tensor
* @param SF: Scaling factor of input tensor * @param output: Output tensor (not quantized)
* @param weight_raw: weight of ILayerNorm layer * @param SF: Scaling factor of input tensor
* @param bias_raw: bias of ILayerNorm layer * @param weight_raw: weight of ILayerNorm layer
* @param size: Number of elements in the input tensor * @param bias_raw: bias of ILayerNorm layer
* @param dims: Dimensions of input tensor * @param size: Number of elements in the input tensor
*/ * @param dims: Dimensions of input tensor
*/
template <class T> template <class T>
void ILayerNormforward(const T* input, T* output, double SF, const T* weight_raw, const T* bias_raw, size_t size, std::vector<long unsigned int> dims_input); void ILayerNormforward(const T *input,
T *output,
double SF,
const T *weight_raw,
const T *bias_raw,
size_t size,
std::vector<long unsigned int> dims_input);
/** /**
* @brief Compute the backward for ILayerNorm * @brief Compute the backward for ILayerNorm
* @param output_grad: Gradient of output tensor * @param output_grad: Gradient of output tensor
* @param input_tensor: Input tensor * @param input_tensor: Input tensor
* @param output_tensor: Output tensor obtained after forward * @param output_tensor: Output tensor obtained after forward
* @param mean: Arithmetic mean of input tensor * @param mean: Arithmetic mean of input tensor
* @param var: Arithmetic variance of input tensor * @param var: Arithmetic variance of input tensor
* @param weight: weight of ILayerNorm layer * @param weight: weight of ILayerNorm layer
* @param bias: bias of ILayerNorm layer * @param bias: bias of ILayerNorm layer
* @param input_grad: Gradient of input tensor * @param input_grad: Gradient of input tensor
* @param weight_grad: Gradient of ILayerNorm weight * @param weight_grad: Gradient of ILayerNorm weight
* @param bias_grad: Gradient of ILayerNorm bias * @param bias_grad: Gradient of ILayerNorm bias
* @param size: Number of elements in the input tensor * @param size: Number of elements in the input tensor
*/ */
template <class T> template <class T>
__global__ void ILayerNormbackward_(T* output_grad, T* input_tensor, T* output_tensor, T* mean, T* var, T* weight, T* bias, T* input_grad, T* weight_grad, T* bias_grad, int size); __global__ void ILayerNormbackward_(T *output_grad,
T *input_tensor,
T *output_tensor,
T *mean,
T *var,
T *weight,
T *bias,
T *input_grad,
T *weight_grad,
T *bias_grad,
int size);
/** /**
* @brief Wrapper function to execute ILayerNormbackward_ * @brief Wrapper function to execute ILayerNormbackward_
* @param input_tensor: Input tensor * @param input_tensor: Input tensor
* @param output_grad: Gradient of output tensor * @param output_grad: Gradient of output tensor
* @param output_tensor: Output tensor obtained after forward * @param output_tensor: Output tensor obtained after forward
* @param mean: Arithmetic mean of input tensor * @param mean: Arithmetic mean of input tensor
* @param var: Arithmetic variance of input tensor * @param var: Arithmetic variance of input tensor
* @param weight: weight of ILayerNorm layer * @param weight: weight of ILayerNorm layer
* @param bias: bias of ILayerNorm layer * @param bias: bias of ILayerNorm layer
* @param input_grad: Gradient of input tensor * @param input_grad: Gradient of input tensor
* @param weight_grad: Gradient of ILayerNorm weight * @param weight_grad: Gradient of ILayerNorm weight
* @param bias_grad: Gradient of ILayerNorm bias * @param bias_grad: Gradient of ILayerNorm bias
* @param size: Number of elements in the input tensor * @param size: Number of elements in the input tensor
*/ */
template <class T> template <class T>
void ILayerNormbackward(const T* input_tensor, const T* output_grad, const T* output_tensor,const T* mean,const T* var, const T* weight, const T* bias, T* input_grad, T* weight_grad, T* bias_grad, size_t size); void ILayerNormbackward(const T *input_tensor,
const T *output_grad,
const T *output_tensor,
const T *mean,
const T *var,
const T *weight,
const T *bias,
T *input_grad,
T *weight_grad,
T *bias_grad,
size_t size);
} } // namespace Aidge
#endif /* AIDGE_CUDA_OPERATOR_ILAYERNORMIMPL_FORWARD_KERNEL_H_ */ #endif /* AIDGE_CUDA_OPERATOR_ILAYERNORMIMPL_FORWARD_KERNEL_H_ */
\ No newline at end of file
...@@ -29,10 +29,10 @@ ...@@ -29,10 +29,10 @@
namespace Aidge { namespace Aidge {
// Operator implementation entry point for the backend // Operator implementation entry point for the backend
class LnImpl_cuda : public OperatorImpl { class LnImpl_cuda : public OperatorImpl {
public: public:
LnImpl_cuda(const Ln_Op& op) : OperatorImpl(op, "cuda") {} LnImpl_cuda(const Ln_Op &op) : OperatorImpl(op, "cuda") {}
static std::unique_ptr<LnImpl_cuda> create(const Ln_Op& op) { static std::unique_ptr<LnImpl_cuda> create(const Ln_Op &op) {
return std::make_unique<LnImpl_cuda>(op); return std::make_unique<LnImpl_cuda>(op);
} }
...@@ -47,16 +47,16 @@ public: ...@@ -47,16 +47,16 @@ public:
void forward() override; void forward() override;
void backward() override; void backward() override;
private: private:
std::shared_ptr<Tensor> mInputFallback; std::shared_ptr<Tensor> mInputFallback;
std::shared_ptr<Tensor> mOutputGradFallback; std::shared_ptr<Tensor> mOutputGradFallback;
template <class T> void forward_(const Tensor& input); template <class T> void forward_(const Tensor &input);
template <class T> void backward_(const Tensor& output_grad); template <class T> void backward_(const Tensor &output_grad);
}; };
// Implementation entry point registration to Operator // Implementation entry point registration to Operator
REGISTRAR(Ln_Op, "cuda", Aidge::LnImpl_cuda::create); REGISTRAR(Ln_Op, "cuda", Aidge::LnImpl_cuda::create);
} // namespace Aidge } // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_LNIMPL_H_ */ #endif /* AIDGE_BACKEND_CUDA_OPERATOR_LNIMPL_H_ */
...@@ -12,25 +12,19 @@ ...@@ -12,25 +12,19 @@
#ifndef AIDGE_CUDA_OPERATOR_LNIMPL_KERNELS_H_ #ifndef AIDGE_CUDA_OPERATOR_LNIMPL_KERNELS_H_
#define AIDGE_CUDA_OPERATOR_LNIMPL_KERNELS_H_ #define AIDGE_CUDA_OPERATOR_LNIMPL_KERNELS_H_
#include <stdexcept>
#include <cfloat> #include <cfloat>
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_fp16.h> #include <cuda_fp16.h>
#include <cuda_runtime_api.h>
#include <stdexcept>
#include "aidge/data/Data.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp" #include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/utils/Types.h" #include "aidge/utils/Types.h"
namespace Aidge { namespace Aidge {
template <class T> template <class T> void lnForward(const T *input, T *output, int size);
void lnForward(const T* input, T* output, int size);
} }
#endif /* AIDGE_CUDA_OPERATOR_LNIMPL_KERNELS_H_ */ #endif /* AIDGE_CUDA_OPERATOR_LNIMPL_KERNELS_H_ */
...@@ -28,38 +28,37 @@ ...@@ -28,38 +28,37 @@
namespace Aidge { namespace Aidge {
// Operator implementation entry point for the backend // Operator implementation entry point for the backend
template <DimIdx_t DIM> template <DimIdx_t DIM> class MaxPoolingImpl_cuda : public OperatorImpl {
class MaxPoolingImpl_cuda : public OperatorImpl { public:
public: MaxPoolingImpl_cuda(const MaxPooling_Op<DIM> &op)
MaxPoolingImpl_cuda(const MaxPooling_Op<DIM>& op) : OperatorImpl(op, "cuda") {} : OperatorImpl(op, "cuda") {}
static std::unique_ptr<MaxPoolingImpl_cuda> create(const MaxPooling_Op<DIM>& op) { static std::unique_ptr<MaxPoolingImpl_cuda>
create(const MaxPooling_Op<DIM> &op) {
return std::make_unique<MaxPoolingImpl_cuda>(op); return std::make_unique<MaxPoolingImpl_cuda>(op);
} }
virtual std::vector<ImplSpec> getAvailableImplSpecs() const override { virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
return { return {{DataType::Any}};
{DataType::Any}
};
} }
void forward() override; void forward() override;
void backward() override; void backward() override;
~MaxPoolingImpl_cuda(); ~MaxPoolingImpl_cuda();
private: private:
// CuDNN specific variables // CuDNN specific variables
cudnnPoolingDescriptor_t mMaxPoolingDesc = nullptr; cudnnPoolingDescriptor_t mMaxPoolingDesc = nullptr;
cudnnPoolingMode_t mMode = CUDNN_POOLING_MAX; cudnnPoolingMode_t mMode = CUDNN_POOLING_MAX;
std::shared_ptr<Tensor> mInputFallback, mOutputGradFallback; std::shared_ptr<Tensor> mInputFallback, mOutputGradFallback;
template <class T> void forward_(const Tensor& input); template <class T> void forward_(const Tensor &input);
template <class T> void backward_(const Tensor& output_grad); template <class T> void backward_(const Tensor &output_grad);
}; };
// Implementation entry point registration to Operator // Implementation entry point registration to Operator
using MaxPooling2D_Op = MaxPooling_Op<2>; using MaxPooling2D_Op = MaxPooling_Op<2>;
REGISTRAR(MaxPooling2D_Op, "cuda", Aidge::MaxPoolingImpl_cuda<2>::create); REGISTRAR(MaxPooling2D_Op, "cuda", Aidge::MaxPoolingImpl_cuda<2>::create);
} // namespace Aidge } // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_MAXPOOLINGIMPL_H_ */ #endif /* AIDGE_BACKEND_CUDA_OPERATOR_MAXPOOLINGIMPL_H_ */
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment