diff --git a/include/aidge/backend/cuda.hpp b/include/aidge/backend/cuda.hpp index e8b8772dc92a4a7b5cd8849fa08c62606149d8cc..55ad6a12fe892c3ae7716391be7cf0b843283447 100644 --- a/include/aidge/backend/cuda.hpp +++ b/include/aidge/backend/cuda.hpp @@ -12,6 +12,8 @@ #ifndef AIDGE_BACKEND_CUDA_IMPORTS_H_ #define AIDGE_BACKEND_CUDA_IMPORTS_H_ +#include "aidge/backend/cuda/operator/OperatorImpl.hpp" + #include "aidge/backend/cuda/data/TensorImpl.hpp" #include "aidge/backend/cuda/operator/AddImpl.hpp" #include "aidge/backend/cuda/operator/AndImpl.hpp" diff --git a/include/aidge/backend/cuda/operator/OperatorImpl.hpp b/include/aidge/backend/cuda/operator/OperatorImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4cbd29a617e6f89c73939f214e727b2f96e78149 --- /dev/null +++ b/include/aidge/backend/cuda/operator/OperatorImpl.hpp @@ -0,0 +1,53 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CUDA_OPERATOR_IMPL_H_ +#define AIDGE_CUDA_OPERATOR_IMPL_H_ + +#include <cstddef> // std::size_t +#include <memory> +#include <tuple> // std::tuple +#include <vector> + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { + +template <class Op, class FwdFunc, class BwdFunc = void()> +class OperatorImpl_cuda : public OperatorImpl, + public Registrable<OperatorImpl_cuda<Op, FwdFunc, BwdFunc>, ImplSpec, Impl<FwdFunc, BwdFunc>> +{ +public: + OperatorImpl_cuda(const Op& op) : OperatorImpl(op, "cuda") {} + + static std::unique_ptr<OperatorImpl_cuda<Op, FwdFunc, BwdFunc>> create(const Op& op) { + return std::make_unique<OperatorImpl_cuda<Op, FwdFunc, BwdFunc>>(op); + } + + virtual std::shared_ptr<ProdConso> getProdConso() const override { + const auto impl = Registrar<OperatorImpl_cuda>::create(getBestMatch(getRequiredSpec())); + return impl.prodConso(mOp); + } + + virtual std::vector<ImplSpec> getAvailableImplSpecs() const override { + // return Registrar<OperatorImpl_cuda>::getKeys(); // Note: cannot return set due to python binding + std::set<ImplSpec> implSpecsSet = Registrar<OperatorImpl_cuda>::getKeys(); + return std::vector<ImplSpec>(implSpecsSet.begin(), implSpecsSet.end()); + } + + void forward() override; + void backward() override; +}; +} // namespace Aidge + +#endif /* AIDGE_CUDA_OPERATOR_IMPL_H_ */ diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp index 1a7bb8edb51312d08467354e20723ad19176bfee..6fdd16317efe71b98558db4160f91613e050f675 100644 --- a/src/operator/FCImpl.cpp +++ b/src/operator/FCImpl.cpp @@ -116,6 +116,7 @@ void Aidge::FCImpl_cuda::forward_(const Tensor& input0, const Tensor& input1, co } void Aidge::FCImpl_cuda::backward() { + AIDGE_ASSERT(mOp.getRawInput(0), "missing input #0"); AIDGE_ASSERT(mOp.getRawInput(1), "missing input #1"); AIDGE_ASSERT(mOp.getRawInput(2), "missing input #2"); @@ -154,6 +155,9 @@ void Aidge::FCImpl_cuda::backward_(const Tensor& input0, const Tensor& input1, c const T * outputGrad = static_cast<const T*>(op.getOutput(0)->grad()->getImpl()->rawPtr()); T * weightsGrad = static_cast<T*>(op.getInput(1)->grad()->getImpl()->rawPtr()); + + + // Performing weightsGrad = (input) * T(outputGrad) // [n x m] = [n x k] * [k x m] int m = input1.dims()[1]; @@ -200,6 +204,18 @@ void Aidge::FCImpl_cuda::backward_(const Tensor& input0, const Tensor& input1, c 1)); CHECK_CUDA_STATUS(cudaFree(onesVector)); } + + // XXX XXX XXX +/* + op.getOutput(0)->grad()->setBackend("cpu"); + float * test_ptr = static_cast<float *> (op.getOutput(0)->grad()->getImpl()->rawPtr()); + float acc = 0; + for (int i = 0; i < op.getOutput(0)->grad()->size(); i++) + acc += test_ptr[i]; + printf(" FC OUT GRAD = %f \n", 1000 * acc); + op.getOutput(0)->grad()->setBackend("cuda"); +*/ + // Performing inputGrad = (weights) * (outputGrad) CHECK_CUBLAS_STATUS(cublasGemm( CudaContext::cublasHandle(), @@ -217,4 +233,14 @@ void Aidge::FCImpl_cuda::backward_(const Tensor& input0, const Tensor& input1, c static_cast<T*>(op.getInput(0)->grad()->getImpl()->rawPtr()),//dX op.getInput(1)->grad()->size()/outChannels)); + // XXX XXX XXX +/* + op.getInput(1)->grad()->setBackend("cpu"); + test_ptr = static_cast<float *> (op.getInput(1)->grad()->getImpl()->rawPtr()); + acc = 0; + for (int i = 0; i < op.getInput(1)->grad()->size(); i++) + acc += test_ptr[i]; + printf(" FC IN GRAD = %f \n", 1000 * acc); + op.getInput(1)->grad()->setBackend("cuda"); +*/ }