Skip to content
Snippets Groups Projects
Commit e9e8c07e authored by Maxence Naud's avatar Maxence Naud
Browse files

Upd ReLU, LeakyReLU and FC backward functions

parent e61adf11
No related branches found
No related tags found
2 merge requests!50version 0.2.0,!39Scheduler backprop
Pipeline #42431 canceled
......@@ -48,6 +48,8 @@ class FCImplBackward_cpu : public Registrable<FCImplBackward_cpu,
const void *,
const void *,
const void *,
void *,
void *,
void *)> {};
class FCImpl_cpu : public OperatorImpl {
......@@ -58,7 +60,8 @@ public:
return std::make_unique<FCImpl_cpu>(op);
}
void forward() override;
void forward() override final;
void backward() override final;
};
namespace {
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_FCIMPL_BACKWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_FCIMPL_BACKWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include <algorithm>
#include "aidge/backend/cpu/operator/FCImpl.hpp"
namespace Aidge {
template <class I, class O, class W, class B>
void FCImpl_cpu_backward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize,
const void* input_, const void* originalInput_, const void* weight_, void* output_, void* weightGrad_, void* biasesGrad_) {
// FIXME: missing FC attributes as arguments
const I* input = static_cast<const I*>(input_);
const I* originalInput = static_cast<const I*>(originalInput_);
const W* weight = static_cast<const W*>(weight_);
O* output = static_cast<O*>(output_);
W* weightGrad = static_cast<W*>(weightGrad_);
B* biasesGrad = static_cast<B*>(biasesGrad_);
// bias grad
if (std::get<1>(attrs)) { // no bias
std::fill(biasesGrad, biasesGrad + std::get<0>(attrs), B(0));
} else {
for (std::size_t o = 0; o < std::get<0>(attrs); ++o) { // nb outputs
B sum{0};
for (std::size_t b = 0; b < batchSize; ++b) {
sum += input[b*std::get<0>(attrs) + o];
}
biasesGrad[o] = sum;
}
}
// weight grad
for (std::size_t o = 0; o < std::get<0>(attrs); ++o) {
for (std::size_t c = 0; c < oneInputSize; ++c) {
W sum{0};
for (std::size_t b = 0; b < batchSize; ++b) {
sum += originalInput[b*oneInputSize + c]*input[b*std::get<0>(attrs) + o];
}
weightGrad[o*oneInputSize + c] = sum;
}
}
// input grad
for (std::size_t b = 0; b < batchSize; ++b) {
for (std::size_t c = 0; c < oneInputSize; ++c) {
O sum{0};
for (std::size_t o = 0; o < std::get<0>(attrs); ++o) {
sum += weight[o*oneInputSize + c] * input[b*std::get<0>(attrs) + o];
}
output[b*oneInputSize + c] = sum;
}
}
}
namespace {
static Registrar<FCImplBackward_cpu> registrarFCImpl2DBackward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::FCImpl_cpu_backward_kernel<float, float, float, float>);
static Registrar<FCImplBackward_cpu> registrarFCImpl2DBackward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::FCImpl_cpu_backward_kernel<int, int, int, int>);
static Registrar<FCImplBackward_cpu> registrarFCImpl2DBackward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::FCImpl_cpu_backward_kernel<double, double, double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_FCIMPL_BACKWARD_KERNEL_H_ */
......@@ -28,7 +28,7 @@ void LeakyReLUImpl_cpu_backward_kernel(const LeakyReLU_Op::Attrs& attrs,
I negativeSlope = static_cast<I>(std::get<0>(attrs));
for (std::size_t i = 0; i < inputLenght; ++i) {
output[i] = input[i] > 0 ? 1 : negativeSlope;
output[i] = input[i] > 0 ? input[i] : negativeSlope*input[i];
}
}
......
......@@ -28,7 +28,7 @@ void ReLUImpl_cpu_backward_kernel(const std::size_t inputLenght,
O* output = static_cast<O*>(output_);
for (std::size_t i = 0; i < inputLenght; ++i) {
output[i] = (input[i] > I(0)) ? O(1) : O(0);
output[i] = (input[i] > I(0)) ? static_cast<O>(input[i]) : O(0);
}
}
......
......@@ -9,25 +9,27 @@
*
********************************************************************************/
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <vector>
#include "aidge/backend/cpu/operator/FCImpl.hpp"
#include <cstddef> // std::size_t
#include <functional>
#include <memory>
#include <tuple>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp"
#include "aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp"
#include "aidge/operator/FC.hpp"
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/FCImpl.hpp"
#include "aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp"
void Aidge::FCImpl_cpu::forward()
{
const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp);
assert((op_.getInput(0)) && "missing input #0");
assert((op_.getInput(1)) && "missing input #1");
assert((op_.getInput(2)) && "missing input #2");
AIDGE_ASSERT(op_.getInput(0), "missing input #0");
AIDGE_ASSERT(op_.getInput(1), "missing input #1");
AIDGE_ASSERT(op_.getInput(2), "missing input #2");
// Find the correct kernel type
const auto outputDataType = op_.getOutput(0)->dataType();
......@@ -66,44 +68,48 @@ void Aidge::FCImpl_cpu::forward()
getCPUPtr(mOp.getRawOutput(0)));
}
// void Aidge::FCImpl_cpu::backward()
// {
// const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp);
// const auto& fc_grad = op_.getOutput(0)->grad();
// assert(fc_grad && "missing ouput #0 gradient");
void Aidge::FCImpl_cpu::backward()
{
const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp);
const auto& fc_grad = op_.getOutput(0)->grad();
assert(fc_grad && "missing ouput #0 gradient");
// // Find the correct kernel type
// const Registrar<FCImplBackward_cpu>::registrar_key registrarKey = {
// op_.getInput(0)->grad()->dataType(),
// op_.getInput(1)->grad()->dataType(),
// op_.getInput(2)->grad()->dataType(),
// fc_grad->dataType()};
// Find the correct kernel type
const Registrar<FCImplBackward_cpu>::registrar_key registrarKey = {
fc_grad->dataType(),
op_.getInput(0)->grad()->dataType(),
op_.getInput(1)->grad()->dataType(),
op_.getInput(2)->grad()->dataType()};
// Registrar<FCImplBackward_cpu>::registrar_type kernelFunc;
// if (Registrar<FCImplBackward_cpu>::exists(registrarKey)) {
// // One exists with the right inputs/output types
// kernelFunc = Registrar<FCImplBackward_cpu>::create(registrarKey);
// }
// else {
// // Otherwise, fallback to the kernel with all types matching output type
// kernelFunc = Registrar<FCImplBackward_cpu>::create({
// fc_grad->dataType(), fc_grad->dataType(), fc_grad->dataType(), fc_grad->dataType()});
// }
Registrar<FCImplBackward_cpu>::registrar_type kernelFunc;
if (Registrar<FCImplBackward_cpu>::exists(registrarKey)) {
// One exists with the right inputs/output types
kernelFunc = Registrar<FCImplBackward_cpu>::create(registrarKey);
}
else {
// Otherwise, fallback to the kernel with all types matching output type
kernelFunc = Registrar<FCImplBackward_cpu>::create({
fc_grad->dataType(), fc_grad->dataType(), fc_grad->dataType(), fc_grad->dataType()});
}
// // Convert input data (no overhead if not needed!)
// // TODO: right now, if needed, memory will be allocated/deallocated at each
// // call to forward(). We might put the following shared_ptr as members of
// // this class to avoid that.
// std::shared_ptr<Tensor> input0gradFallback, input1gradFallback, input2gradFallback;
// const auto& input0grad = op_.getInput(0)->grad()->refCastFrom(input0gradFallback, *(op_.getOutput(0)));
// const auto& input1grad = op_.getInput(1)->grad()->refCastFrom(input1gradFallback, *(op_.getOutput(0)));
// const auto& input2grad = op_.getInput(2)->grad()->refCastFrom(input2gradFallback, *(op_.getOutput(0)));
// Convert input data (no overhead if not needed!)
// TODO: right now, if needed, memory will be allocated/deallocated at each
// call to forward(). We might put the following shared_ptr as members of
// this class to avoid that.
std::shared_ptr<Tensor> input0gradFallback, input1gradFallback, input2gradFallback;
const auto& input0grad = op_.getInput(0)->grad()->refCastFrom(input0gradFallback, *(op_.getOutput(0)));
const auto& input1grad = op_.getInput(1)->grad()->refCastFrom(input1gradFallback, *(op_.getOutput(0)));
const auto& input2grad = op_.getInput(2)->grad()->refCastFrom(input2gradFallback, *(op_.getOutput(0)));
// // Call kernel
// const auto batchSize = (input0.dims().size() > 1) ? input0.dims()[0] : 1;
// kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(),
// batchSize,
// input0.size() / batchSize,
// input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(),
// getCPUPtr(mOp.getRawOutput(0)));
// }
// Call kernel
const auto batchSize = (input0grad.dims().size() > 1) ? input0grad.dims()[0] : 1;
kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(),
batchSize,
input0grad.size() / batchSize,
getCPUPtr(fc_grad),
getCPUPtr(op_.getInput(0)),
getCPUPtr(mOp.getRawInput(1)),
input0grad.getImpl()->rawPtr(),
input1grad.getImpl()->rawPtr(),
input2grad.getImpl()->rawPtr());
}
......@@ -28,8 +28,9 @@ Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbRequiredProtected(const Aidge::IO
}
void Aidge::LeakyReLUImpl_cpu::forward() {
std::shared_ptr<Tensor> in0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0));
std::shared_ptr<Tensor> out0 = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0));
const LeakyReLU_Op& op_ = dynamic_cast<const LeakyReLU_Op&>(mOp);
std::shared_ptr<Tensor> in0 = op_.getInput(0);
std::shared_ptr<Tensor> out0 = op_.getOutput(0);
AIDGE_ASSERT(in0, "missing input #0");
// Find the correct kernel type
......@@ -46,8 +47,9 @@ void Aidge::LeakyReLUImpl_cpu::forward() {
void Aidge::LeakyReLUImpl_cpu::backward() {
// reversing in and out Data for backprop
std::shared_ptr<Tensor> in0 = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0));
std::shared_ptr<Tensor> out0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0));
const LeakyReLU_Op& op_ = dynamic_cast<const LeakyReLU_Op&>(mOp);
std::shared_ptr<Tensor> in0 = op_.getOutput(0)->grad();
std::shared_ptr<Tensor> out0 = op_.getInput(0)->grad();
AIDGE_ASSERT(in0, "missing input #0");
// Find the correct kernel type
......
......@@ -44,9 +44,10 @@ void Aidge::ReLUImpl_cpu::forward() {
void Aidge::ReLUImpl_cpu::backward() {
// reversing in and out Tensors
std::shared_ptr<Tensor> in0 = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->grad();
std::shared_ptr<Tensor> out0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->grad();
AIDGE_ASSERT(out0, "missing input #0");
const ReLU_Op& op_ = dynamic_cast<const ReLU_Op&>(mOp);
std::shared_ptr<Tensor> in0 = op_.getOutput(0)->grad();
std::shared_ptr<Tensor> out0 = op_.getInput(0)->grad();
AIDGE_ASSERT(out0, "current {} operator output#0 has not gradient Tensor.", op_.type());
// Find the correct kernel type
auto kernelFunc = Registrar<ReLUImplBackward_cpu>::create({
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment