Forked from
Eclipse Projects / aidge / aidge_backend_cpu
715 commits behind the upstream repository.
-
Cyril Moineau authoredCyril Moineau authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
AddImpl.cpp 8.42 KiB
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <vector>
#include "aidge/operator/Conv.hpp"
#include "aidge/operator/AddImpl.hpp"
#include "aidge/operator/AddImpl_forward_kernels.hpp"
#include "aidge/utils/Types.h"
//////////////////////////////////
// AddImpl_cpu<1>
//////////////////////////////////
Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const {
assert(mOp.getInput(0) && "requires valid input");
// Requires the whole tensors
return static_cast<int>(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size());
}
Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
// for the direct convolution algorithm, convolutions can be in-place, if there is no padding!
return 0;
}
Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getRequiredMemory(const Aidge::IOIndex_t /*outputIdx*/, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const {
// Requires the whole tensors, regardless of available data on inputs
return std::static_pointer_cast<Tensor>(mOp.getOutput(0))->size();
}
Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbConsumedData(Aidge::IOIndex_t /*inputIdx*/) const {
return mNbConsumedData[0];
}
Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const {
return mNbProducedData[0];
}
void Aidge::AddImpl_cpu<1>::updateConsummerProducer(){
// Update producer-consumer data
for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx)
mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass
mNbProducedData[0]+= getRequiredMemory(0, {});
}
void Aidge::AddImpl_cpu<1>::forward() {
// FIXME: uncomment the following code once memory handling will work
assert(mOp.getInput(0) && "missing input #0");
// Find the correct kernel type
auto kernelFunc = Registrar<AddImplForward_cpu<1>>::create({
mOp.getInput(0)->dataType(),
mOp.getOutput(0)->dataType()});
// Call kernel
kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
mOp.getInput(0)->getImpl()->rawPtr(),
mOp.getOutput(0)->getImpl()->rawPtr());
}
void Aidge::AddImpl_cpu<1>::backward() {
printf("Not implemented yet.\n");
}
//////////////////////////////////
// AddImpl_cpu<2>
//////////////////////////////////
Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
assert(mOp.getInput(inputIdx) && "requires valid input");
// Requires the whole tensors
const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims();
return std::accumulate(inputDims.begin(), inputDims.end(),
NbElts_t(1), std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
// for the direct convolution algorithm, convolutions can be in-place, if there is no padding!
return 0;
}
Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const {
// Requires the whole tensors, regardless of available data on inputs
assert(outputIdx == 0 && "operator has only one output");
const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
return std::accumulate(outputDims.begin(), outputDims.end(),
NbElts_t(1), std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbConsumedData(Aidge::IOIndex_t inputIdx) const {
assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size());
return mNbConsumedData[static_cast<std::size_t>(inputIdx)];
}
Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const {
return mNbProducedData[0];
}
void Aidge::AddImpl_cpu<2>::updateConsummerProducer(){
// Update producer-consumer data
for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx)
mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass
mNbProducedData[0]+= getRequiredMemory(0, {});
}
void Aidge::AddImpl_cpu<2>::forward() {
// FIXME: uncomment the following code once memory handling will work
assert(mOp.getInput(0) && "missing input #0");
assert(mOp.mInputs[1] && "missing input #1");
// Find the correct kernel type
auto kernelFunc = Registrar<AddImplForward_cpu<2>>::create({
mOp.getInput(0)->dataType(),
mOp.mInputs[1]->dataType(),
mOp.getOutput(0)->dataType()});
// Call kernel
kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
mOp.getInput(0)->getImpl()->rawPtr(),
mOp.mInputs[1]->getImpl()->rawPtr(),
mOp.getOutput(0)->getImpl()->rawPtr());
}
void Aidge::AddImpl_cpu<2>::backward() {
printf("Not implemented yet.\n");
}
//////////////////////////////////
// AddImpl_cpu<3>
//////////////////////////////////
Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
assert(mOp.getInput(inputIdx) && "requires valid input");
// Requires the whole tensors
const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims();
return std::accumulate(inputDims.begin(), inputDims.end(),
Aidge::NbElts_t(1), std::multiplies<Aidge::NbElts_t>());
}
Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
// for the direct convolution algorithm, convolutions can be in-place, if there is no padding!
return 0;
}
Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const {
// Requires the whole tensors, regardless of available data on inputs
assert(outputIdx == 0 && "operator has only one output");
const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
return std::accumulate(outputDims.begin(), outputDims.end(),
NbElts_t(1), std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbConsumedData(Aidge::IOIndex_t inputIdx) const {
assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size());
return mNbConsumedData[inputIdx];
}
Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbProducedData(Aidge::IOIndex_t outputIdx) const {
assert(static_cast<std::size_t>(outputIdx) < mNbProducedData.size());
return mNbProducedData[static_cast<std::size_t>(outputIdx)];
}
void Aidge::AddImpl_cpu<3>::updateConsummerProducer(){
// Update producer-consumer data
for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx)
mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass
mNbProducedData[0]+= getRequiredMemory(0, {});
}
void Aidge::AddImpl_cpu<3>::forward() {
// FIXME: uncomment the following code once memory handling will work
assert(mOp.getInput(0) && "missing input #0");
assert(mOp.mInputs[1] && "missing input #1");
assert(mOp.mInputs[2] && "missing input #2");
// Find the correct kernel type
auto kernelFunc = Registrar<AddImplForward_cpu<3>>::create({
mOp.getInput(0)->dataType(),
mOp.mInputs[1]->dataType(),
mOp.mInputs[2]->dataType(),
mOp.getOutput(0)->dataType()});
// Call kernel
kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
mOp.getInput(0)->getImpl()->rawPtr(),
mOp.mInputs[1]->getImpl()->rawPtr(),
mOp.mInputs[2]->getImpl()->rawPtr(),
mOp.getOutput(0)->getImpl()->rawPtr());
}
void Aidge::AddImpl_cpu<3>::backward() {
printf("Not implemented yet.\n");
}