Newer
Older
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include "aidge/operator/FC.hpp"
#include "aidge/operator/FCImpl.hpp"
#include "aidge/operator/FCImpl_forward_kernels.hpp"
#include "aidge/utils/Types.h"
Aidge::NbElts_t Aidge::FCImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const
{
assert(mOp.getInput(inputIdx) && "requires valid input");
// Requires the whole tensors
const auto &inputDims
= std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims();
return std::accumulate(
inputDims.begin(),
inputDims.end(),
Aidge::NbElts_t(1),
std::multiplies<Aidge::NbElts_t>());
}
Aidge::NbElts_t
Aidge::FCImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const
{
// for the direct convolution algorithm, convolutions can be in-place, if
// there is no padding!
return 0;
}
Aidge::NbElts_t Aidge::FCImpl_cpu::getRequiredMemory(
__attribute__((unused)) const IOIndex_t outputIdx, __attribute__((unused)) const std::vector<DimSize_t> &inputsSize) const
{
// Requires the whole tensors, regardless of available data on inputs
assert(outputIdx == 0 && "operator has only one output");
const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
return std::accumulate(
outputDims.begin(),
outputDims.end(),
static_cast<NbElts_t>(1),
std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::FCImpl_cpu::getNbConsumedData(Aidge::IOIndex_t inputIdx) const
{
assert((inputIdx != gk_IODefaultIndex) && (inputIdx < mNbConsumedData.size()));
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
return mNbConsumedData[static_cast<std::size_t>(inputIdx)];
}
Aidge::NbElts_t Aidge::FCImpl_cpu::getNbProducedData(Aidge::IOIndex_t outputIdx) const
{
assert(static_cast<std::size_t>(outputIdx) < mNbProducedData.size());
return mNbProducedData[static_cast<std::size_t>(outputIdx)];
}
void Aidge::FCImpl_cpu::forward()
{
// FIXME: uncomment the following code once memory handling will work
assert(mOp.getInput(0) && "missing input #0");
assert(mOp.mInputs[1] && "missing input #1");
assert(mOp.mInputs[2] && "missing input #2");
// Find the correct kernel type
auto kernelFunc = Registrar<FCImplForward_cpu>::create(
{mOp.getInput(0)->dataType(),
mOp.mInputs[1]->dataType(),
mOp.mInputs[2]->dataType(),
mOp.getOutput(0)->dataType()});
// Call kernel
// if (mOp.getInput(0)->nbDims() == 4) {
// kernelFunc(
// mOp.getParams(),
// std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(),
// mOp.getInput(0)->getImpl()->rawPtr(),
// mOp.mInputs[1]->getImpl()->rawPtr(),
// mOp.mInputs[2]->getImpl()->rawPtr(),
// mOp.getOutput(0)->getImpl()->rawPtr());
// }
// else
kernelFunc(
mOp.getParams(),
mOp.getInput(0)->dims()[0],
mOp.getInput(0)->sizeM1(),
mOp.getInput(0)->getImpl()->rawPtr(),
mOp.mInputs[1]->getImpl()->rawPtr(),
mOp.mInputs[2]->getImpl()->rawPtr(),
mOp.getOutput(0)->getImpl()->rawPtr());
// FIXME: Dummy wait for some earlier scheduler tests
std::this_thread::sleep_for(std::chrono::milliseconds(mOp.get<FCParam::OutChannels>()));
// Update producer-consumer data
for (IOIndex_t inputIdx = 0; static_cast<std::size_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx)
mNbConsumedData[inputIdx]
+= getNbRequiredData(static_cast<std::size_t>(inputIdx)); // each input is consumed by the minimum
// amount for a forward pass
mNbProducedData[0] += getRequiredMemory(0, {});
}
void Aidge::FCImpl_cpu::backward()
{
printf("Not implemented yet.\n");
}