Skip to content
Snippets Groups Projects
Commit e9e8c07e authored by Maxence Naud's avatar Maxence Naud
Browse files

Upd ReLU, LeakyReLU and FC backward functions

parent e61adf11
No related branches found
No related tags found
2 merge requests!50version 0.2.0,!39Scheduler backprop
Pipeline #42431 canceled
...@@ -48,6 +48,8 @@ class FCImplBackward_cpu : public Registrable<FCImplBackward_cpu, ...@@ -48,6 +48,8 @@ class FCImplBackward_cpu : public Registrable<FCImplBackward_cpu,
const void *, const void *,
const void *, const void *,
const void *, const void *,
void *,
void *,
void *)> {}; void *)> {};
class FCImpl_cpu : public OperatorImpl { class FCImpl_cpu : public OperatorImpl {
...@@ -58,7 +60,8 @@ public: ...@@ -58,7 +60,8 @@ public:
return std::make_unique<FCImpl_cpu>(op); return std::make_unique<FCImpl_cpu>(op);
} }
void forward() override; void forward() override final;
void backward() override final;
}; };
namespace { namespace {
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_FCIMPL_BACKWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_FCIMPL_BACKWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include <algorithm>
#include "aidge/backend/cpu/operator/FCImpl.hpp"
namespace Aidge {
template <class I, class O, class W, class B>
void FCImpl_cpu_backward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize,
const void* input_, const void* originalInput_, const void* weight_, void* output_, void* weightGrad_, void* biasesGrad_) {
// FIXME: missing FC attributes as arguments
const I* input = static_cast<const I*>(input_);
const I* originalInput = static_cast<const I*>(originalInput_);
const W* weight = static_cast<const W*>(weight_);
O* output = static_cast<O*>(output_);
W* weightGrad = static_cast<W*>(weightGrad_);
B* biasesGrad = static_cast<B*>(biasesGrad_);
// bias grad
if (std::get<1>(attrs)) { // no bias
std::fill(biasesGrad, biasesGrad + std::get<0>(attrs), B(0));
} else {
for (std::size_t o = 0; o < std::get<0>(attrs); ++o) { // nb outputs
B sum{0};
for (std::size_t b = 0; b < batchSize; ++b) {
sum += input[b*std::get<0>(attrs) + o];
}
biasesGrad[o] = sum;
}
}
// weight grad
for (std::size_t o = 0; o < std::get<0>(attrs); ++o) {
for (std::size_t c = 0; c < oneInputSize; ++c) {
W sum{0};
for (std::size_t b = 0; b < batchSize; ++b) {
sum += originalInput[b*oneInputSize + c]*input[b*std::get<0>(attrs) + o];
}
weightGrad[o*oneInputSize + c] = sum;
}
}
// input grad
for (std::size_t b = 0; b < batchSize; ++b) {
for (std::size_t c = 0; c < oneInputSize; ++c) {
O sum{0};
for (std::size_t o = 0; o < std::get<0>(attrs); ++o) {
sum += weight[o*oneInputSize + c] * input[b*std::get<0>(attrs) + o];
}
output[b*oneInputSize + c] = sum;
}
}
}
namespace {
static Registrar<FCImplBackward_cpu> registrarFCImpl2DBackward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::FCImpl_cpu_backward_kernel<float, float, float, float>);
static Registrar<FCImplBackward_cpu> registrarFCImpl2DBackward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::FCImpl_cpu_backward_kernel<int, int, int, int>);
static Registrar<FCImplBackward_cpu> registrarFCImpl2DBackward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::FCImpl_cpu_backward_kernel<double, double, double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_FCIMPL_BACKWARD_KERNEL_H_ */
...@@ -28,7 +28,7 @@ void LeakyReLUImpl_cpu_backward_kernel(const LeakyReLU_Op::Attrs& attrs, ...@@ -28,7 +28,7 @@ void LeakyReLUImpl_cpu_backward_kernel(const LeakyReLU_Op::Attrs& attrs,
I negativeSlope = static_cast<I>(std::get<0>(attrs)); I negativeSlope = static_cast<I>(std::get<0>(attrs));
for (std::size_t i = 0; i < inputLenght; ++i) { for (std::size_t i = 0; i < inputLenght; ++i) {
output[i] = input[i] > 0 ? 1 : negativeSlope; output[i] = input[i] > 0 ? input[i] : negativeSlope*input[i];
} }
} }
......
...@@ -28,7 +28,7 @@ void ReLUImpl_cpu_backward_kernel(const std::size_t inputLenght, ...@@ -28,7 +28,7 @@ void ReLUImpl_cpu_backward_kernel(const std::size_t inputLenght,
O* output = static_cast<O*>(output_); O* output = static_cast<O*>(output_);
for (std::size_t i = 0; i < inputLenght; ++i) { for (std::size_t i = 0; i < inputLenght; ++i) {
output[i] = (input[i] > I(0)) ? O(1) : O(0); output[i] = (input[i] > I(0)) ? static_cast<O>(input[i]) : O(0);
} }
} }
......
...@@ -9,25 +9,27 @@ ...@@ -9,25 +9,27 @@
* *
********************************************************************************/ ********************************************************************************/
#include <cassert> #include "aidge/backend/cpu/operator/FCImpl.hpp"
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate #include <cstddef> // std::size_t
#include <thread> // std::this_thread::sleep_for #include <functional>
#include <vector> #include <memory>
#include <tuple>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp"
#include "aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp"
#include "aidge/operator/FC.hpp" #include "aidge/operator/FC.hpp"
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/utils/Types.h" #include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/FCImpl.hpp"
#include "aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp"
void Aidge::FCImpl_cpu::forward() void Aidge::FCImpl_cpu::forward()
{ {
const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp); const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp);
assert((op_.getInput(0)) && "missing input #0"); AIDGE_ASSERT(op_.getInput(0), "missing input #0");
assert((op_.getInput(1)) && "missing input #1"); AIDGE_ASSERT(op_.getInput(1), "missing input #1");
assert((op_.getInput(2)) && "missing input #2"); AIDGE_ASSERT(op_.getInput(2), "missing input #2");
// Find the correct kernel type // Find the correct kernel type
const auto outputDataType = op_.getOutput(0)->dataType(); const auto outputDataType = op_.getOutput(0)->dataType();
...@@ -66,44 +68,48 @@ void Aidge::FCImpl_cpu::forward() ...@@ -66,44 +68,48 @@ void Aidge::FCImpl_cpu::forward()
getCPUPtr(mOp.getRawOutput(0))); getCPUPtr(mOp.getRawOutput(0)));
} }
// void Aidge::FCImpl_cpu::backward() void Aidge::FCImpl_cpu::backward()
// { {
// const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp); const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp);
// const auto& fc_grad = op_.getOutput(0)->grad(); const auto& fc_grad = op_.getOutput(0)->grad();
// assert(fc_grad && "missing ouput #0 gradient"); assert(fc_grad && "missing ouput #0 gradient");
// // Find the correct kernel type // Find the correct kernel type
// const Registrar<FCImplBackward_cpu>::registrar_key registrarKey = { const Registrar<FCImplBackward_cpu>::registrar_key registrarKey = {
// op_.getInput(0)->grad()->dataType(), fc_grad->dataType(),
// op_.getInput(1)->grad()->dataType(), op_.getInput(0)->grad()->dataType(),
// op_.getInput(2)->grad()->dataType(), op_.getInput(1)->grad()->dataType(),
// fc_grad->dataType()}; op_.getInput(2)->grad()->dataType()};
// Registrar<FCImplBackward_cpu>::registrar_type kernelFunc; Registrar<FCImplBackward_cpu>::registrar_type kernelFunc;
// if (Registrar<FCImplBackward_cpu>::exists(registrarKey)) { if (Registrar<FCImplBackward_cpu>::exists(registrarKey)) {
// // One exists with the right inputs/output types // One exists with the right inputs/output types
// kernelFunc = Registrar<FCImplBackward_cpu>::create(registrarKey); kernelFunc = Registrar<FCImplBackward_cpu>::create(registrarKey);
// } }
// else { else {
// // Otherwise, fallback to the kernel with all types matching output type // Otherwise, fallback to the kernel with all types matching output type
// kernelFunc = Registrar<FCImplBackward_cpu>::create({ kernelFunc = Registrar<FCImplBackward_cpu>::create({
// fc_grad->dataType(), fc_grad->dataType(), fc_grad->dataType(), fc_grad->dataType()}); fc_grad->dataType(), fc_grad->dataType(), fc_grad->dataType(), fc_grad->dataType()});
// } }
// // Convert input data (no overhead if not needed!) // Convert input data (no overhead if not needed!)
// // TODO: right now, if needed, memory will be allocated/deallocated at each // TODO: right now, if needed, memory will be allocated/deallocated at each
// // call to forward(). We might put the following shared_ptr as members of // call to forward(). We might put the following shared_ptr as members of
// // this class to avoid that. // this class to avoid that.
// std::shared_ptr<Tensor> input0gradFallback, input1gradFallback, input2gradFallback; std::shared_ptr<Tensor> input0gradFallback, input1gradFallback, input2gradFallback;
// const auto& input0grad = op_.getInput(0)->grad()->refCastFrom(input0gradFallback, *(op_.getOutput(0))); const auto& input0grad = op_.getInput(0)->grad()->refCastFrom(input0gradFallback, *(op_.getOutput(0)));
// const auto& input1grad = op_.getInput(1)->grad()->refCastFrom(input1gradFallback, *(op_.getOutput(0))); const auto& input1grad = op_.getInput(1)->grad()->refCastFrom(input1gradFallback, *(op_.getOutput(0)));
// const auto& input2grad = op_.getInput(2)->grad()->refCastFrom(input2gradFallback, *(op_.getOutput(0))); const auto& input2grad = op_.getInput(2)->grad()->refCastFrom(input2gradFallback, *(op_.getOutput(0)));
// // Call kernel // Call kernel
// const auto batchSize = (input0.dims().size() > 1) ? input0.dims()[0] : 1; const auto batchSize = (input0grad.dims().size() > 1) ? input0grad.dims()[0] : 1;
// kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(), kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(),
// batchSize, batchSize,
// input0.size() / batchSize, input0grad.size() / batchSize,
// input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(), getCPUPtr(fc_grad),
// getCPUPtr(mOp.getRawOutput(0))); getCPUPtr(op_.getInput(0)),
// } getCPUPtr(mOp.getRawInput(1)),
input0grad.getImpl()->rawPtr(),
input1grad.getImpl()->rawPtr(),
input2grad.getImpl()->rawPtr());
}
...@@ -28,8 +28,9 @@ Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbRequiredProtected(const Aidge::IO ...@@ -28,8 +28,9 @@ Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbRequiredProtected(const Aidge::IO
} }
void Aidge::LeakyReLUImpl_cpu::forward() { void Aidge::LeakyReLUImpl_cpu::forward() {
std::shared_ptr<Tensor> in0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0)); const LeakyReLU_Op& op_ = dynamic_cast<const LeakyReLU_Op&>(mOp);
std::shared_ptr<Tensor> out0 = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)); std::shared_ptr<Tensor> in0 = op_.getInput(0);
std::shared_ptr<Tensor> out0 = op_.getOutput(0);
AIDGE_ASSERT(in0, "missing input #0"); AIDGE_ASSERT(in0, "missing input #0");
// Find the correct kernel type // Find the correct kernel type
...@@ -46,8 +47,9 @@ void Aidge::LeakyReLUImpl_cpu::forward() { ...@@ -46,8 +47,9 @@ void Aidge::LeakyReLUImpl_cpu::forward() {
void Aidge::LeakyReLUImpl_cpu::backward() { void Aidge::LeakyReLUImpl_cpu::backward() {
// reversing in and out Data for backprop // reversing in and out Data for backprop
std::shared_ptr<Tensor> in0 = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)); const LeakyReLU_Op& op_ = dynamic_cast<const LeakyReLU_Op&>(mOp);
std::shared_ptr<Tensor> out0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0)); std::shared_ptr<Tensor> in0 = op_.getOutput(0)->grad();
std::shared_ptr<Tensor> out0 = op_.getInput(0)->grad();
AIDGE_ASSERT(in0, "missing input #0"); AIDGE_ASSERT(in0, "missing input #0");
// Find the correct kernel type // Find the correct kernel type
......
...@@ -44,9 +44,10 @@ void Aidge::ReLUImpl_cpu::forward() { ...@@ -44,9 +44,10 @@ void Aidge::ReLUImpl_cpu::forward() {
void Aidge::ReLUImpl_cpu::backward() { void Aidge::ReLUImpl_cpu::backward() {
// reversing in and out Tensors // reversing in and out Tensors
std::shared_ptr<Tensor> in0 = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->grad(); const ReLU_Op& op_ = dynamic_cast<const ReLU_Op&>(mOp);
std::shared_ptr<Tensor> out0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->grad(); std::shared_ptr<Tensor> in0 = op_.getOutput(0)->grad();
AIDGE_ASSERT(out0, "missing input #0"); std::shared_ptr<Tensor> out0 = op_.getInput(0)->grad();
AIDGE_ASSERT(out0, "current {} operator output#0 has not gradient Tensor.", op_.type());
// Find the correct kernel type // Find the correct kernel type
auto kernelFunc = Registrar<ReLUImplBackward_cpu>::create({ auto kernelFunc = Registrar<ReLUImplBackward_cpu>::create({
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment