/******************************************************************************** * Copyright (c) 2023 CEA-List * * This program and the accompanying materials are made available under the * terms of the Eclipse Public License 2.0 which is available at * http://www.eclipse.org/legal/epl-2.0. * * SPDX-License-Identifier: EPL-2.0 * ********************************************************************************/ #include <cassert> #include <chrono> // std::chrono::milliseconds #include <numeric> // std::accumulate #include <thread> // std::this_thread::sleep_for #include <vector> #include "aidge/operator/FC.hpp" #include "aidge/utils/Types.h" #include "aidge/backend/cpu/operator/FCImpl.hpp" #include "aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp" void Aidge::FCImpl_cpu::forward() { assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(1)) && "missing input #1"); assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(2)) && "missing input #2"); // Find the correct kernel type const auto outputDataType = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType(); const Registrar<FCImplForward_cpu>::registrar_key registrarKey = { std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(), outputDataType}; Registrar<FCImplForward_cpu>::registrar_type kernelFunc; if (Registrar<FCImplForward_cpu>::exists(registrarKey)) { // One exists with the right inputs/output types kernelFunc = Registrar<FCImplForward_cpu>::create(registrarKey); } else { // Otherwise, fallback to the kernel with all types matching output type kernelFunc = Registrar<FCImplForward_cpu>::create({ outputDataType, outputDataType, outputDataType, outputDataType}); } // Convert input data (no overhead if not needed!) // TODO: right now, if needed, memory will be allocated/deallocated at each // call to forward(). We might put the following shared_ptr as members of // this class to avoid that. std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; const auto& input0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->refCastFrom(input0Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); const auto& input1 = std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->refCastFrom(input1Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); const auto& input2 = std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->refCastFrom(input2Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); // Call kernel kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0], std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->sizeM1(), input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); }