Skip to content
Snippets Groups Projects
Commit 26d8afbd authored by Houssem ROUIS's avatar Houssem ROUIS
Browse files

add Relu operator

parent 27aa2357
No related branches found
No related tags found
3 merge requests!15version 0.2.0,!12Lenetop,!10Lenet operators
......@@ -15,5 +15,6 @@
#include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/ConvImpl.hpp"
#include "aidge/backend/cuda/operator/ProducerImpl.hpp"
#include "aidge/backend/cuda/operator/ReLUImpl.hpp"
#endif /* AIDGE_BACKEND_CUDA_IMPORTS_H_ */
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_BACKEND_CUDA_OPERATOR_RELUIMPL_H_
#define AIDGE_BACKEND_CUDA_OPERATOR_RELUIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include <cudnn.h>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/ReLU.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
namespace Aidge {
class ReLUImpl_cuda : public OperatorImpl {
private:
// CuDNN specific variables
#if CUDNN_VERSION >= 5000
cudnnActivationDescriptor_t mReLUDesc = nullptr;
#else
cudnnActivationMode_t mReLUDesc = nullptr;
#endif
public:
ReLUImpl_cuda(const ReLU_Op &op) : OperatorImpl(op) {}
static std::unique_ptr<ReLUImpl_cuda> create(const ReLU_Op &op) {
return std::make_unique<ReLUImpl_cuda>(op);
}
public:
void forward();
~ReLUImpl_cuda();
private:
template <class T> void forward_(const Tensor& input);
};
namespace {
// add cuda backend to ReLU_Op implementation registry
static Registrar<ReLU_Op> registrarReLUImpl_cuda("cuda", Aidge::ReLUImpl_cuda::create);
} // namespace
} // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_RELUIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <vector>
#include "aidge/utils/Types.h"
#include "aidge/operator/ReLU.hpp"
#include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/ReLUImpl.hpp"
#include "aidge/backend/cuda/utils/CudaContext.hpp"
void Aidge::ReLUImpl_cuda::forward() {
assert(mOp.getRawInput(0) && "missing input #0");
std::shared_ptr<Tensor> inputFallback;
const auto& input = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->refCastFrom(inputFallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
// Lazy-initialize CuDNN ReLU descriptor
if (mReLUDesc == nullptr) {
#if CUDNN_VERSION >= 5000
CHECK_CUDNN_STATUS(cudnnCreateActivationDescriptor(&mReLUDesc));
CHECK_CUDNN_STATUS(cudnnSetActivationDescriptor(
mReLUDesc, CUDNN_ACTIVATION_RELU, CUDNN_NOT_PROPAGATE_NAN, 0.0));
#else
mReLUDesc = CUDNN_ACTIVATION_RELU;
#endif
}
if (std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType() == DataType::Float64) {
forward_<double>(input);
}
else {
forward_<float>(input);
}
}
template <class T>
void Aidge::ReLUImpl_cuda::forward_(const Tensor& input) {
const T alpha = 1.0f;
const T beta = 0.0f;
CHECK_CUDNN_STATUS(
cudnnActivationForward(CudaContext::cudnnHandle(),
mReLUDesc,
&alpha,
dynamic_cast<TensorImpl_cuda_*>(input.getImpl().get())->getCudnnTensorDesc(),
input.getImpl()->rawPtr(),
&beta,
dynamic_cast<TensorImpl_cuda_*>(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl().get())->getCudnnTensorDesc(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()));
}
Aidge::ReLUImpl_cuda::~ReLUImpl_cuda() {
if (mReLUDesc != nullptr) {
cudnnDestroyActivationDescriptor(mReLUDesc);
}
}
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <array>
#include <catch2/catch_test_macros.hpp>
#include "Test_cuda.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/backend/cpu.hpp"
#include "aidge/backend/cuda.hpp"
using namespace Aidge;
TEST_CASE("[gpu/operator] ReLU(forward)", "[ReLU][GPU]") {
SECTION("1D Tensor") {
std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<float,10> {
{0, 1, 2,-3, 4,-5,-6, 7, 8, 9}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,10> {
{0, 1, 2, 0, 4, 0, 0, 7, 8, 9}
});
std::shared_ptr<Node> myReLU = ReLU();
auto op = std::static_pointer_cast<OperatorTensor>(myReLU -> getOperator());
op->associateInput(0,input0);
op->setDataType(DataType::Float32);
op->setBackend("cuda");
op->computeOutputDims();
myReLU->forward();
float* computedOutput = new float[myOutput->size()]();
cudaMemcpy(computedOutput, op->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * myOutput->size(), cudaMemcpyDeviceToHost);
for(int i = 0; i < myOutput->size(); i++){
const float targetOutput = *(static_cast<float*>(myOutput->getImpl()->rawPtr()) + i);
REQUIRE(fabs(computedOutput[i] - targetOutput) < 1e-6);
}
delete[] computedOutput;
}
SECTION("2D Tensor") {
std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array2D<float,2,10> {
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<float,2,10> {
{
{ 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
{ 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
}
});
std::shared_ptr<Node> myReLU = ReLU();
auto op = std::static_pointer_cast<OperatorTensor>(myReLU -> getOperator());
op->associateInput(0,input0);
op->setDataType(DataType::Float32);
op->setBackend("cuda");
op->computeOutputDims();
myReLU->forward();
float* computedOutput = new float[myOutput->size()]();
cudaMemcpy(computedOutput, op->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * myOutput->size(), cudaMemcpyDeviceToHost);
for(int i = 0; i < myOutput->size(); i++){
const float targetOutput = *(static_cast<float*>(myOutput->getImpl()->rawPtr()) + i);
REQUIRE(fabs(computedOutput[i] - targetOutput) < 1e-6);
}
delete[] computedOutput;
}
SECTION("3D Tensor") {
std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array3D<float,2,2,10> {
{
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
},
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
}
}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array3D<float,2,2,10> {
{
{
{ 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
{ 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
},
{
{ 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
{ 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
}
}
});
std::shared_ptr<Node> myReLU = ReLU();
auto op = std::static_pointer_cast<OperatorTensor>(myReLU -> getOperator());
op->associateInput(0,input0);
op->setDataType(DataType::Float32);
op->setBackend("cuda");
op->computeOutputDims();
myReLU->forward();
float* computedOutput = new float[myOutput->size()]();
cudaMemcpy(computedOutput, op->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * myOutput->size(), cudaMemcpyDeviceToHost);
for(int i = 0; i < myOutput->size(); i++){
const float targetOutput = *(static_cast<float*>(myOutput->getImpl()->rawPtr()) + i);
REQUIRE(fabs(computedOutput[i] - targetOutput) < 1e-6);
}
delete[] computedOutput;
}
SECTION("4D Tensor") {
std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<float,2,2,2,10> {
{
{
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
},
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
}
},
{
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
},
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
}
}
}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<float,2,2,2,10> {
{
{
{
{ 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
{ 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
},
{
{ 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
{ 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
}
},
{
{
{ 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
{ 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
},
{
{ 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
{ 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
}
}
}
});
std::shared_ptr<Node> myReLU = ReLU();
auto op = std::static_pointer_cast<OperatorTensor>(myReLU -> getOperator());
op->associateInput(0,input0);
op->setDataType(DataType::Float32);
op->setBackend("cuda");
op->computeOutputDims();
op->forward();
float* computedOutput = new float[myOutput->size()]();
cudaMemcpy(computedOutput, op->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * myOutput->size(), cudaMemcpyDeviceToHost);
for(int i = 0; i < myOutput->size(); i++){
const float targetOutput = *(static_cast<float*>(myOutput->getImpl()->rawPtr()) + i);
REQUIRE(fabs(computedOutput[i] - targetOutput) < 1e-6);
}
delete[] computedOutput;
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment