Skip to content
Snippets Groups Projects
Commit b5ed7853 authored by Grégoire Kubler's avatar Grégoire Kubler
Browse files

feat : speed up pad execution

parent 4e576581
Branches feat_speed_up_pad
No related tags found
2 merge requests!44feat_operator_reducemean,!42Feat speed up pad
Pipeline #72626 passed
......@@ -3,44 +3,83 @@
#include "network/typedefs.hpp"
#include "network/utils.hpp"
#include <cstdio>
// Todo add border value and border type (Reflect, Constant, Wrap...) and add the two missing pad value (bottom and right)
template<int NB_BATCHES, int NB_CHANNELS,
int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
int NB_OUTPUTS,
int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
int PADDING_TOP,
int PADDING_LEFT,
int PADDING_BOTTOM,
int PADDING_RIGHT,
typename Input_T, typename Output_T>
__attribute__((always_inline)) inline
void pad_forward(
double borderValue,
const Input_T* __restrict inputs,
Output_T* __restrict outputs
)
{
const unsigned int oySize = CHANNELS_HEIGHT + PADDING_TOP + PADDING_BOTTOM;
const unsigned int oxSize = CHANNELS_WIDTH + PADDING_LEFT + PADDING_RIGHT;
for (unsigned int batch = 0; batch < NB_BATCHES; ++batch) {
for (unsigned int ch = 0; ch < NB_CHANNELS; ++ch) {
const unsigned int preIndex = batch * NB_CHANNELS * CHANNELS_HEIGHT * CHANNELS_WIDTH + ch * CHANNELS_HEIGHT * CHANNELS_WIDTH;
for (unsigned int oy = 0; oy < oySize; ++oy) {
for (unsigned int ox = 0; ox < oxSize; ++ox) {
const unsigned int outIndex = batch * NB_CHANNELS * oySize * oxSize + ch * oySize * oxSize + oy * oxSize + ox;
outputs[outIndex] = borderValue;
const unsigned int inputX = ox - PADDING_LEFT;
const unsigned int inputY = oy - PADDING_TOP;
if (inputY >= 0 and inputY < CHANNELS_HEIGHT and inputX >= 0 and inputX < CHANNELS_WIDTH)
{
outputs[outIndex] = inputs[preIndex + inputY * CHANNELS_WIDTH + inputX];
// TODO : add border value and border type (Reflect, Constant, Wrap...) and add
// the two missing pad value (bottom and right)
template <int NB_BATCHES,
int NB_CHANNELS,
int CHANNELS_HEIGHT,
int CHANNELS_WIDTH,
int NB_OUTPUTS,
int OUTPUTS_HEIGHT,
int OUTPUTS_WIDTH,
int PADDING_TOP,
int PADDING_LEFT,
int PADDING_BOTTOM,
int PADDING_RIGHT,
typename Input_T,
typename Output_T>
__attribute__((always_inline)) inline void
pad_forward(double borderValue,
const Input_T *__restrict inputs,
Output_T *__restrict outputs) {
constexpr unsigned int oySize =
CHANNELS_HEIGHT + PADDING_TOP + PADDING_BOTTOM;
constexpr unsigned int oxSize =
CHANNELS_WIDTH + PADDING_LEFT + PADDING_RIGHT;
constexpr unsigned int inputStrides[3] = {
NB_CHANNELS * CHANNELS_HEIGHT * CHANNELS_WIDTH,
CHANNELS_WIDTH * CHANNELS_HEIGHT,
CHANNELS_WIDTH};
constexpr unsigned int outputStrides[3] = {
NB_CHANNELS * oySize * oxSize,
oySize * oxSize,
oxSize,
};
for (unsigned int batch = 0, inBatchOffset = 0, outBatchOffset = 0;
batch < NB_BATCHES;
++batch,
inBatchOffset += inputStrides[0],
outBatchOffset += outputStrides[0]) {
for (unsigned int ch = 0,
inChannelOffset = inBatchOffset,
outChannelOffset = outBatchOffset;
ch < NB_CHANNELS;
++ch,
inChannelOffset += inputStrides[1],
outChannelOffset += outputStrides[1]) {
for (int oY = 0,
oDimYOffset = outChannelOffset,
iY = oY - PADDING_TOP,
// iDimOffset won't be used unless iY >= 0 hence no risk
// of negative idx
iDimYOffset = inChannelOffset + iY * inputStrides[2];
static_cast<unsigned int>(oY) < oySize;
++oY,
++iY,
iDimYOffset += inputStrides[2],
oDimYOffset += outputStrides[2]) {
if (iY < 0 or iY >= CHANNELS_HEIGHT) {
for (Output_T *o = outputs + oDimYOffset;
o != outputs + oDimYOffset + outputStrides[2];
++o) {
*o = borderValue;
}
continue;
}
for (unsigned int oX = 0; oX < oxSize; ++oX) {
const int iX = static_cast<int>(oX - PADDING_LEFT);
if (iX < 0 or iX >= CHANNELS_WIDTH) {
outputs[oDimYOffset + oX] = borderValue;
} else {
outputs[oDimYOffset + oX] = inputs[iDimYOffset + iX];
}
}
}
......@@ -48,4 +87,4 @@ void pad_forward(
}
}
#endif // __AIDGE_EXPORT_CPP_KERNELS_PAD2D__
#endif // __AIDGE_EXPORT_CPP_KERNELS_PAD2D__
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment