/******************************************************************************** * Copyright (c) 2023 CEA-List * * This program and the accompanying materials are made available under the * terms of the Eclipse Public License 2.0 which is available at * http://www.eclipse.org/legal/epl-2.0. * * SPDX-License-Identifier: EPL-2.0 * ********************************************************************************/ #include <cassert> #include <chrono> // std::chrono::milliseconds #include <numeric> // std::accumulate #include <thread> // std::this_thread::sleep_for #include <vector> #include "aidge/operator/FC.hpp" #include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/FCImpl.hpp" #include "aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp" void Aidge::FCImpl_cpu::forward() { const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp); assert((op_.getInput(0)) && "missing input #0"); assert((op_.getInput(1)) && "missing input #1"); assert((op_.getInput(2)) && "missing input #2"); // Find the correct kernel type const auto outputDataType = op_.getOutput(0)->dataType(); const Registrar<FCImplForward_cpu>::registrar_key registrarKey = { op_.getInput(0)->dataType(), op_.getInput(1)->dataType(), op_.getInput(2)->dataType(), outputDataType}; Registrar<FCImplForward_cpu>::registrar_type kernelFunc; if (Registrar<FCImplForward_cpu>::exists(registrarKey)) { // One exists with the right inputs/output types kernelFunc = Registrar<FCImplForward_cpu>::create(registrarKey); } else { // Otherwise, fallback to the kernel with all types matching output type kernelFunc = Registrar<FCImplForward_cpu>::create({ outputDataType, outputDataType, outputDataType, outputDataType}); } // Convert input data (no overhead if not needed!) // TODO: right now, if needed, memory will be allocated/deallocated at each // call to forward(). We might put the following shared_ptr as members of // this class to avoid that. std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *(op_.getOutput(0))); const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *(op_.getOutput(0))); const auto& input2 = op_.getInput(2)->refCastFrom(input2Fallback, *(op_.getOutput(0))); // Call kernel const auto batchSize = (input0.dims().size() > 1) ? input0.dims()[0] : 1; kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(), batchSize, input0.size() / batchSize, input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(), getCPUPtr(mOp.getRawOutput(0))); } // void Aidge::FCImpl_cpu::backward() // { // const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp); // const auto& fc_grad = op_.getOutput(0)->grad(); // assert(fc_grad && "missing ouput #0 gradient"); // // Find the correct kernel type // const Registrar<FCImplBackward_cpu>::registrar_key registrarKey = { // op_.getInput(0)->grad()->dataType(), // op_.getInput(1)->grad()->dataType(), // op_.getInput(2)->grad()->dataType(), // fc_grad->dataType()}; // Registrar<FCImplBackward_cpu>::registrar_type kernelFunc; // if (Registrar<FCImplBackward_cpu>::exists(registrarKey)) { // // One exists with the right inputs/output types // kernelFunc = Registrar<FCImplBackward_cpu>::create(registrarKey); // } // else { // // Otherwise, fallback to the kernel with all types matching output type // kernelFunc = Registrar<FCImplBackward_cpu>::create({ // fc_grad->dataType(), fc_grad->dataType(), fc_grad->dataType(), fc_grad->dataType()}); // } // // Convert input data (no overhead if not needed!) // // TODO: right now, if needed, memory will be allocated/deallocated at each // // call to forward(). We might put the following shared_ptr as members of // // this class to avoid that. // std::shared_ptr<Tensor> input0gradFallback, input1gradFallback, input2gradFallback; // const auto& input0grad = op_.getInput(0)->grad()->refCastFrom(input0gradFallback, *(op_.getOutput(0))); // const auto& input1grad = op_.getInput(1)->grad()->refCastFrom(input1gradFallback, *(op_.getOutput(0))); // const auto& input2grad = op_.getInput(2)->grad()->refCastFrom(input2gradFallback, *(op_.getOutput(0))); // // Call kernel // const auto batchSize = (input0.dims().size() > 1) ? input0.dims()[0] : 1; // kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(), // batchSize, // input0.size() / batchSize, // input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(), // getCPUPtr(mOp.getRawOutput(0))); // }