Newer
Older
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include "aidge/operator/FC.hpp"
#include "aidge/utils/Types.h"
laurent soulier
committed
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/FCImpl.hpp"
#include "aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp"
const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp);
assert((op_.getInput(0)) && "missing input #0");
assert((op_.getInput(1)) && "missing input #1");
assert((op_.getInput(2)) && "missing input #2");
const auto outputDataType = op_.getOutput(0)->dataType();
const Registrar<FCImplForward_cpu>::registrar_key registrarKey = {
op_.getInput(0)->dataType(),
op_.getInput(1)->dataType(),
op_.getInput(2)->dataType(),
outputDataType};
Registrar<FCImplForward_cpu>::registrar_type kernelFunc;
if (Registrar<FCImplForward_cpu>::exists(registrarKey)) {
// One exists with the right inputs/output types
kernelFunc = Registrar<FCImplForward_cpu>::create(registrarKey);
}
else {
// Otherwise, fallback to the kernel with all types matching output type
kernelFunc = Registrar<FCImplForward_cpu>::create({
outputDataType, outputDataType, outputDataType, outputDataType});
}
// Convert input data (no overhead if not needed!)
// TODO: right now, if needed, memory will be allocated/deallocated at each
// call to forward(). We might put the following shared_ptr as members of
// this class to avoid that.
std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback;
const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *(op_.getOutput(0)));
const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *(op_.getOutput(0)));
const auto& input2 = op_.getInput(2)->refCastFrom(input2Fallback, *(op_.getOutput(0)));
const auto batchSize = (input0.dims().size() > 1) ? input0.dims()[0] : 1;
kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(),
batchSize,
input0.size() / batchSize,
input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(),
laurent soulier
committed
getCPUPtr(mOp.getRawOutput(0)));
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
// void Aidge::FCImpl_cpu::backward()
// {
// const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp);
// const auto& fc_grad = op_.getOutput(0)->grad();
// assert(fc_grad && "missing ouput #0 gradient");
// // Find the correct kernel type
// const Registrar<FCImplBackward_cpu>::registrar_key registrarKey = {
// op_.getInput(0)->grad()->dataType(),
// op_.getInput(1)->grad()->dataType(),
// op_.getInput(2)->grad()->dataType(),
// fc_grad->dataType()};
// Registrar<FCImplBackward_cpu>::registrar_type kernelFunc;
// if (Registrar<FCImplBackward_cpu>::exists(registrarKey)) {
// // One exists with the right inputs/output types
// kernelFunc = Registrar<FCImplBackward_cpu>::create(registrarKey);
// }
// else {
// // Otherwise, fallback to the kernel with all types matching output type
// kernelFunc = Registrar<FCImplBackward_cpu>::create({
// fc_grad->dataType(), fc_grad->dataType(), fc_grad->dataType(), fc_grad->dataType()});
// }
// // Convert input data (no overhead if not needed!)
// // TODO: right now, if needed, memory will be allocated/deallocated at each
// // call to forward(). We might put the following shared_ptr as members of
// // this class to avoid that.
// std::shared_ptr<Tensor> input0gradFallback, input1gradFallback, input2gradFallback;
// const auto& input0grad = op_.getInput(0)->grad()->refCastFrom(input0gradFallback, *(op_.getOutput(0)));
// const auto& input1grad = op_.getInput(1)->grad()->refCastFrom(input1gradFallback, *(op_.getOutput(0)));
// const auto& input2grad = op_.getInput(2)->grad()->refCastFrom(input2gradFallback, *(op_.getOutput(0)));
// // Call kernel
// const auto batchSize = (input0.dims().size() > 1) ? input0.dims()[0] : 1;
// kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(),
// batchSize,
// input0.size() / batchSize,
// input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(),
// getCPUPtr(mOp.getRawOutput(0)));
// }