Newer
Older
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <cassert>
#include <numeric>
#include <chrono>
#include <thread>
#include <vector>
#include "operator/Conv.hpp"
#include "operator/AddImpl.hpp"
#include "operator/AddImpl_forward_kernels.hpp"
#include "utils/Types.h"
//////////////////////////////////
// AddImpl_cpu<1>
//////////////////////////////////
Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const {
assert(mOp.getInput(0) && "requires valid input");
// Requires the whole tensors
return static_cast<int>(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size());
}
Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
// for the direct convolution algorithm, convolutions can be in-place, if there is no padding!
return 0;
}
Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getRequiredMemory(const Aidge::IOIndex_t /*outputIdx*/, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const {
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
// Requires the whole tensors, regardless of available data on inputs
return std::static_pointer_cast<Tensor>(mOp.getOutput(0))->size();
}
Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbConsumedData(Aidge::IOIndex_t /*inputIdx*/) const {
return mNbConsumedData[0];
}
Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const {
return mNbProducedData[0];
}
void Aidge::AddImpl_cpu<1>::forward() {
// FIXME: uncomment the following code once memory handling will work
assert(mOp.getInput(0) && "missing input #0");
// Find the correct kernel type
auto kernelFunc = Registrar<AddImplForward_cpu<1>>::create({
mOp.getInput(0)->dataType(),
mOp.getOutput(0)->dataType()});
// Call kernel
kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
mOp.getInput(0)->getImpl()->rawPtr(),
mOp.getOutput(0)->getImpl()->rawPtr());
// Update producer-consumer data
for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx)
mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass
mNbProducedData[0]+= getRequiredMemory(0, {});
}
void Aidge::AddImpl_cpu<1>::backward() {
printf("Not implemented yet.\n");
}
//////////////////////////////////
// AddImpl_cpu<2>
//////////////////////////////////
Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
assert(mOp.getInput(inputIdx) && "requires valid input");
// Requires the whole tensors
const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims();
return std::accumulate(inputDims.begin(), inputDims.end(),
NbElts_t(1), std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
// for the direct convolution algorithm, convolutions can be in-place, if there is no padding!
return 0;
}
Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, __attribute__((unused)) const std::vector<Aidge::DimSize_t>& inputsSize) const {
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
// Requires the whole tensors, regardless of available data on inputs
assert(outputIdx == 0 && "operator has only one output");
const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
return std::accumulate(outputDims.begin(), outputDims.end(),
NbElts_t(1), std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbConsumedData(Aidge::IOIndex_t inputIdx) const {
assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size());
return mNbConsumedData[static_cast<std::size_t>(inputIdx)];
}
Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const {
return mNbProducedData[0];
}
void Aidge::AddImpl_cpu<2>::forward() {
// FIXME: uncomment the following code once memory handling will work
assert(mOp.getInput(0) && "missing input #0");
assert(mOp.mInputs[1] && "missing input #1");
// Find the correct kernel type
auto kernelFunc = Registrar<AddImplForward_cpu<2>>::create({
mOp.getInput(0)->dataType(),
mOp.mInputs[1]->dataType(),
mOp.getOutput(0)->dataType()});
// Call kernel
kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
mOp.getInput(0)->getImpl()->rawPtr(),
mOp.mInputs[1]->getImpl()->rawPtr(),
mOp.getOutput(0)->getImpl()->rawPtr());
// Update producer-consumer data
for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx)
mNbConsumedData[inputIdx]+= getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum amount for a forward pass
mNbProducedData[0]+= getRequiredMemory(0, {});
}
void Aidge::AddImpl_cpu<2>::backward() {
printf("Not implemented yet.\n");
}
//////////////////////////////////
// AddImpl_cpu<3>
//////////////////////////////////
Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
assert(mOp.getInput(inputIdx) && "requires valid input");
// Requires the whole tensors
const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims();
return std::accumulate(inputDims.begin(), inputDims.end(),
Aidge::NbElts_t(1), std::multiplies<Aidge::NbElts_t>());
}
Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
// for the direct convolution algorithm, convolutions can be in-place, if there is no padding!
return 0;
}
Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const {
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
// Requires the whole tensors, regardless of available data on inputs
assert(outputIdx == 0 && "operator has only one output");
const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
return std::accumulate(outputDims.begin(), outputDims.end(),
NbElts_t(1), std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbConsumedData(Aidge::IOIndex_t inputIdx) const {
assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size());
return mNbConsumedData[inputIdx];
}
Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbProducedData(Aidge::IOIndex_t outputIdx) const {
assert(static_cast<std::size_t>(outputIdx) < mNbProducedData.size());
return mNbProducedData[static_cast<std::size_t>(outputIdx)];
}
void Aidge::AddImpl_cpu<3>::forward() {
// FIXME: uncomment the following code once memory handling will work
assert(mOp.getInput(0) && "missing input #0");
assert(mOp.mInputs[1] && "missing input #1");
assert(mOp.mInputs[2] && "missing input #2");
// Find the correct kernel type
auto kernelFunc = Registrar<AddImplForward_cpu<3>>::create({
mOp.getInput(0)->dataType(),
mOp.mInputs[1]->dataType(),
mOp.mInputs[2]->dataType(),
mOp.getOutput(0)->dataType()});
// Call kernel
kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
mOp.getInput(0)->getImpl()->rawPtr(),
mOp.mInputs[1]->getImpl()->rawPtr(),
mOp.mInputs[2]->getImpl()->rawPtr(),
mOp.getOutput(0)->getImpl()->rawPtr());
// Update producer-consumer data
for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx)
mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass
mNbProducedData[0]+= getRequiredMemory(0, {});
}
void Aidge::AddImpl_cpu<3>::backward() {
printf("Not implemented yet.\n");
}