/******************************************************************************** * Copyright (c) 2023 CEA-List * * This program and the accompanying materials are made available under the * terms of the Eclipse Public License 2.0 which is available at * http://www.eclipse.org/legal/epl-2.0. * * SPDX-License-Identifier: EPL-2.0 * ********************************************************************************/ #include <cassert> #include <numeric> #include <chrono> #include <thread> #include <vector> #include "operator/Conv.hpp" #include "operator/AddImpl.hpp" #include "operator/AddImpl_forward_kernels.hpp" #include "utils/Types.h" ////////////////////////////////// // AddImpl_cpu<1> ////////////////////////////////// Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const { assert(mOp.getInput(0) && "requires valid input"); // Requires the whole tensors return static_cast<int>(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size()); } Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! return 0; } Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getRequiredMemory(const Aidge::IOIndex_t /*outputIdx*/, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const { // Requires the whole tensors, regardless of available data on inputs return std::static_pointer_cast<Tensor>(mOp.getOutput(0))->size(); } Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbConsumedData(Aidge::IOIndex_t /*inputIdx*/) const { return mNbConsumedData[0]; } Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const { return mNbProducedData[0]; } void Aidge::AddImpl_cpu<1>::forward() { // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); // Find the correct kernel type auto kernelFunc = Registrar<AddImplForward_cpu<1>>::create({ mOp.getInput(0)->dataType(), mOp.getOutput(0)->dataType()}); // Call kernel kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(), mOp.getInput(0)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); // Update producer-consumer data for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass mNbProducedData[0]+= getRequiredMemory(0, {}); } void Aidge::AddImpl_cpu<1>::backward() { printf("Not implemented yet.\n"); } ////////////////////////////////// // AddImpl_cpu<2> ////////////////////////////////// Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { assert(mOp.getInput(inputIdx) && "requires valid input"); // Requires the whole tensors const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims(); return std::accumulate(inputDims.begin(), inputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); } Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! return 0; } Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, __attribute__((unused)) const std::vector<Aidge::DimSize_t>& inputsSize) const { // Requires the whole tensors, regardless of available data on inputs assert(outputIdx == 0 && "operator has only one output"); const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); } Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbConsumedData(Aidge::IOIndex_t inputIdx) const { assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size()); return mNbConsumedData[static_cast<std::size_t>(inputIdx)]; } Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const { return mNbProducedData[0]; } void Aidge::AddImpl_cpu<2>::forward() { // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); assert(mOp.mInputs[1] && "missing input #1"); // Find the correct kernel type auto kernelFunc = Registrar<AddImplForward_cpu<2>>::create({ mOp.getInput(0)->dataType(), mOp.mInputs[1]->dataType(), mOp.getOutput(0)->dataType()}); // Call kernel kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(), mOp.getInput(0)->getImpl()->rawPtr(), mOp.mInputs[1]->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); // Update producer-consumer data for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx) mNbConsumedData[inputIdx]+= getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum amount for a forward pass mNbProducedData[0]+= getRequiredMemory(0, {}); } void Aidge::AddImpl_cpu<2>::backward() { printf("Not implemented yet.\n"); } ////////////////////////////////// // AddImpl_cpu<3> ////////////////////////////////// Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { assert(mOp.getInput(inputIdx) && "requires valid input"); // Requires the whole tensors const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims(); return std::accumulate(inputDims.begin(), inputDims.end(), Aidge::NbElts_t(1), std::multiplies<Aidge::NbElts_t>()); } Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! return 0; } Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const { // Requires the whole tensors, regardless of available data on inputs assert(outputIdx == 0 && "operator has only one output"); const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); } Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbConsumedData(Aidge::IOIndex_t inputIdx) const { assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size()); return mNbConsumedData[inputIdx]; } Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbProducedData(Aidge::IOIndex_t outputIdx) const { assert(static_cast<std::size_t>(outputIdx) < mNbProducedData.size()); return mNbProducedData[static_cast<std::size_t>(outputIdx)]; } void Aidge::AddImpl_cpu<3>::forward() { // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); assert(mOp.mInputs[1] && "missing input #1"); assert(mOp.mInputs[2] && "missing input #2"); // Find the correct kernel type auto kernelFunc = Registrar<AddImplForward_cpu<3>>::create({ mOp.getInput(0)->dataType(), mOp.mInputs[1]->dataType(), mOp.mInputs[2]->dataType(), mOp.getOutput(0)->dataType()}); // Call kernel kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(), mOp.getInput(0)->getImpl()->rawPtr(), mOp.mInputs[1]->getImpl()->rawPtr(), mOp.mInputs[2]->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); // Update producer-consumer data for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass mNbProducedData[0]+= getRequiredMemory(0, {}); } void Aidge::AddImpl_cpu<3>::backward() { printf("Not implemented yet.\n"); }