diff --git a/aidge_export_cpp/kernels/conv1D.hpp b/aidge_export_cpp/kernels/conv1D.hpp deleted file mode 100644 index f7d2b001634851c21b11a6243c892325f55f8355..0000000000000000000000000000000000000000 --- a/aidge_export_cpp/kernels/conv1D.hpp +++ /dev/null @@ -1,130 +0,0 @@ -#ifndef __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__ -#define __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__ - -#include "network/typedefs.hpp" -#include "network/rescaling_utils.hpp" -#include "network/utils.hpp" -#include "network/macs.hpp" -#include "network/activation_utils.hpp" - - -template<int NB_CHANNELS, - int CHANNELS_WIDTH, - int NB_OUTPUTS, - int OUTPUTS_WIDTH, - int PADDING_X, - int STRIDE_X, - int DILATION_X, - int KERNEL_WIDTH, - ActivationFunction_T ACTIVATION, - typename Input_T, typename Output_T, - typename Weight_T, typename Bias_T, - typename Rescaling_T> -__attribute__((always_inline)) inline -void conv1D_forward( - const Input_T* __restrict inputs, - Output_T* __restrict outputs, - const Weight_T* __restrict weights, - const Bias_T* __restrict biases, - const Rescaling_T& __restrict rescaling) -{ - constexpr int DILATED_KERNEL_WIDTH - = KERNEL_WIDTH + (DILATION_X - 1) * (KERNEL_WIDTH - 1); - - constexpr int OUTPUTS_WIDTH_NOPAD - = (CHANNELS_WIDTH - DILATION_X * (KERNEL_WIDTH - 1) - 1 + STRIDE_X) / STRIDE_X; - -#ifdef _OPENMP -#pragma omp parallel for collapse(2) -#endif - for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) { - for (int output = 0; output < NB_OUTPUTS; ++output) { - // moved to inner loop for collapsing --> - const int sxMin = (PADDING_X == 0) ? 0 - : max(PADDING_X - (ox * STRIDE_X), 0); - const int sxMax = (PADDING_X == 0 - && OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD) - ? DILATED_KERNEL_WIDTH - : clamp(CHANNELS_WIDTH + PADDING_X - (ox * STRIDE_X), - 0, DILATED_KERNEL_WIDTH); - const int ix = (ox * STRIDE_X) - PADDING_X; - - const int oOffset = NB_OUTPUTS * ox; - - // <-- - // Check if the biases are defined - Bias_T weightedSum = biases ? biases[output] : 0; - - const int iOffset = NB_CHANNELS * ix; - - const int wOffset = output * KERNEL_WIDTH * NB_CHANNELS; - - if (DILATION_X == 1 && ((PADDING_X == 0 && OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD) - || sxMax - sxMin == KERNEL_WIDTH)) - { - macsOnRange<KERNEL_WIDTH * NB_CHANNELS>( - inputs + iOffset, - weights + wOffset, - weightedSum); - } - else { - for (int sx = 0; sx < KERNEL_WIDTH; ++sx) { - if ((PADDING_X != 0 - || OUTPUTS_WIDTH != OUTPUTS_WIDTH_NOPAD) - && ((sx*DILATION_X < sxMin) || (sx*DILATION_X >= sxMax))) - { - continue; - } - - const int iOffsetInRange = iOffset - + sx * DILATION_X * NB_CHANNELS; - - macsOnRange<NB_CHANNELS>( - // same input line so no wrapping can occur - inputs + iOffsetInRange, - weights + wOffset + sx * NB_CHANNELS, - weightedSum); - } - } - - outputs[oOffset + output] = activation_forward_value<Output_T>(weightedSum, output, ACTIVATION, rescaling); - } - } -} - -// Template specialization when biases are not given to the convolution -template<int NB_CHANNELS, - int CHANNELS_WIDTH, - int NB_OUTPUTS, - int OUTPUTS_WIDTH, - int PADDING_X, - int STRIDE_X, - int DILATION_X, - int KERNEL_WIDTH, - ActivationFunction_T ACTIVATION, - typename Input_T, typename Output_T, - typename Weight_T, - typename Rescaling_T> -__attribute__((always_inline)) inline -void conv1D_forward( - const Input_T* __restrict inputs, - Output_T* __restrict outputs, - const Weight_T* __restrict weights, - std::nullptr_t __restrict, - const Rescaling_T& __restrict rescaling) -{ - const float* b = nullptr; - - conv1D_forward<NB_CHANNELS, - CHANNELS_WIDTH, - NB_OUTPUTS, - OUTPUTS_WIDTH, - PADDING_X, - STRIDE_X, - DILATION_X, - KERNEL_WIDTH, - ACTIVATION> - (inputs, outputs, weights, b, rescaling); -} - -#endif // __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__ diff --git a/aidge_export_cpp/kernels/convolution.hpp b/aidge_export_cpp/kernels/convolution.hpp index ed62401e69ff8d53b23ba9f88917bb54acd3740a..f002bb3cddae0cb8d4a27f249d41895bc0a9baa3 100644 --- a/aidge_export_cpp/kernels/convolution.hpp +++ b/aidge_export_cpp/kernels/convolution.hpp @@ -17,6 +17,18 @@ template<int NB_CHANNELS, int DILATION_Y, int DILATION_X, int KERNEL_HEIGHT, int KERNEL_WIDTH, ActivationFunction_T ACTIVATION, + // Memory mapping: inputs + int INPUT_MEM_CONT_OFFSET, + int INPUT_MEM_CONT_SIZE, + int INPUT_MEM_WRAP_OFFSET, + int INPUT_MEM_WRAP_SIZE, + int INPUT_MEM_STRIDE, + // Memory mapping: outputs + int OUTPUT_MEM_CONT_OFFSET, + int OUTPUT_MEM_CONT_SIZE, + int OUTPUT_MEM_WRAP_OFFSET, + int OUTPUT_MEM_WRAP_SIZE, + int OUTPUT_MEM_STRIDE, typename Input_T, typename Output_T, typename Weight_T, typename Bias_T, typename Rescaling_T> @@ -64,7 +76,12 @@ void convolution_forward( const int ix = (ox * STRIDE_X) - PADDING_X; const int oPos = (ox + OUTPUTS_WIDTH * oy); - const int oOffset = NB_OUTPUTS * oPos; + int oOffset = (OUTPUT_MEM_STRIDE / sizeof(Output_T)) * oPos; + + if (OUTPUT_MEM_WRAP_SIZE > 0 && oOffset >= (OUTPUT_MEM_CONT_SIZE / sizeof(Output_T))) { + oOffset += (OUTPUT_MEM_WRAP_OFFSET - OUTPUT_MEM_CONT_OFFSET + - OUTPUT_MEM_CONT_SIZE) / sizeof(Output_T); + } // <-- // Check if the biases are defined @@ -78,12 +95,33 @@ void convolution_forward( continue; } - const int iPos = ix + CHANNELS_WIDTH * (iy + sy*DILATION_Y); - const int iOffset = NB_CHANNELS * iPos; + const int iPos = ((sxMin + ix) + + CHANNELS_WIDTH * (iy + syMin + sy * DILATION_Y)); + int iOffset = (INPUT_MEM_STRIDE / sizeof(Input_T)) * iPos; - const int wOffset = (output*KERNEL_HEIGHT + sy) * KERNEL_WIDTH * NB_CHANNELS; + // Wrapping cannot occur in the middle of a line, except if + // there is only one line (1D)! + bool wrapInRange = false; - if (DILATION_X == 1 && ((PADDING_X == 0 && OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD) + if (INPUT_MEM_WRAP_SIZE > 0 + && iOffset >= (INPUT_MEM_STRIDE / sizeof(Input_T))) + { + iOffset += (INPUT_MEM_WRAP_OFFSET - INPUT_MEM_CONT_OFFSET + - INPUT_MEM_CONT_SIZE) / sizeof(Input_T); + } + else if (INPUT_MEM_WRAP_SIZE > 0 && KERNEL_WIDTH > 1 + && CHANNELS_HEIGHT == 1 // single line (1D)! + && iOffset + KERNEL_WIDTH * NB_CHANNELS + > (INPUT_MEM_STRIDE / sizeof(Input_T))) + { + wrapInRange = true; + } + + const int wOffset = NB_CHANNELS * (sxMin + + KERNEL_WIDTH * (syMin + sy + KERNEL_HEIGHT * output)); + + if (!wrapInRange && NB_CHANNELS == (INPUT_MEM_STRIDE / sizeof(Input_T)) + && DILATION_X == 1 && ((PADDING_X == 0 && OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD) || sxMax - sxMin == KERNEL_WIDTH)) { macsOnRange<KERNEL_WIDTH * NB_CHANNELS>( @@ -100,8 +138,16 @@ void convolution_forward( continue; } - const int iOffsetInRange = iOffset - + sx * DILATION_X * NB_CHANNELS; + int iOffsetInRange = iOffset + + sx * DILATION_X * (INPUT_MEM_STRIDE / sizeof(Input_T)); + + if (wrapInRange + && iOffsetInRange >= (INPUT_MEM_STRIDE / sizeof(Input_T))) + { + iOffsetInRange += (INPUT_MEM_WRAP_OFFSET + - INPUT_MEM_CONT_OFFSET + - INPUT_MEM_CONT_SIZE) / sizeof(Input_T); + } macsOnRange<NB_CHANNELS>( // same input line so no wrapping can occur @@ -128,6 +174,18 @@ template<int NB_CHANNELS, int DILATION_Y, int DILATION_X, int KERNEL_HEIGHT, int KERNEL_WIDTH, ActivationFunction_T ACTIVATION, + // Memory mapping: inputs + int INPUT_MEM_CONT_OFFSET, + int INPUT_MEM_CONT_SIZE, + int INPUT_MEM_WRAP_OFFSET, + int INPUT_MEM_WRAP_SIZE, + int INPUT_MEM_STRIDE, + // Memory mapping: outputs + int OUTPUT_MEM_CONT_OFFSET, + int OUTPUT_MEM_CONT_SIZE, + int OUTPUT_MEM_WRAP_OFFSET, + int OUTPUT_MEM_WRAP_SIZE, + int OUTPUT_MEM_STRIDE, typename Input_T, typename Output_T, typename Weight_T, typename Rescaling_T> @@ -155,7 +213,19 @@ void convolution_forward( DILATION_X, KERNEL_HEIGHT, KERNEL_WIDTH, - ACTIVATION> + ACTIVATION, + // Memory mapping: inputs + INPUT_MEM_CONT_OFFSET, + INPUT_MEM_CONT_SIZE, + INPUT_MEM_WRAP_OFFSET, + INPUT_MEM_WRAP_SIZE, + INPUT_MEM_STRIDE, + // Memory mapping: outputs + OUTPUT_MEM_CONT_OFFSET, + OUTPUT_MEM_CONT_SIZE, + OUTPUT_MEM_WRAP_OFFSET, + OUTPUT_MEM_WRAP_SIZE, + OUTPUT_MEM_STRIDE> (inputs, outputs, weights, b, rescaling); } diff --git a/aidge_export_cpp/kernels/convolution_depthwise.hpp b/aidge_export_cpp/kernels/convolution_depthwise.hpp index 244dd86bc01be7142474380f1e3393ce32446aaf..86353847c1ef2ac4ee5682ebd07622f09fc80d11 100644 --- a/aidge_export_cpp/kernels/convolution_depthwise.hpp +++ b/aidge_export_cpp/kernels/convolution_depthwise.hpp @@ -16,6 +16,18 @@ template<int NB_CHANNELS, int DILATION_Y, int DILATION_X, int KERNEL_HEIGHT, int KERNEL_WIDTH, ActivationFunction_T ACTIVATION, + // Memory mapping: inputs + int INPUT_MEM_CONT_OFFSET, + int INPUT_MEM_CONT_SIZE, + int INPUT_MEM_WRAP_OFFSET, + int INPUT_MEM_WRAP_SIZE, + int INPUT_MEM_STRIDE, + // Memory mapping: outputs + int OUTPUT_MEM_CONT_OFFSET, + int OUTPUT_MEM_CONT_SIZE, + int OUTPUT_MEM_WRAP_OFFSET, + int OUTPUT_MEM_WRAP_SIZE, + int OUTPUT_MEM_STRIDE, typename Input_T, typename Output_T, typename Weight_T, typename Bias_T, typename Rescaling_T> @@ -66,7 +78,12 @@ void convolution_depthwise_forward( const int ix = (ox * STRIDE_X) - PADDING_X; const int oPos = (ox + OUTPUTS_WIDTH * oy); - const int oOffset = NB_OUTPUTS * oPos; + int oOffset = (OUTPUT_MEM_STRIDE / sizeof(Output_T)) * oPos; + + if (OUTPUT_MEM_WRAP_SIZE > 0 && oOffset >= (OUTPUT_MEM_CONT_SIZE / sizeof(Output_T))) { + oOffset += (OUTPUT_MEM_WRAP_OFFSET - OUTPUT_MEM_CONT_OFFSET + - OUTPUT_MEM_CONT_SIZE) / sizeof(Output_T); + } // <-- const int channel = (output * NB_CHANNELS) / NB_OUTPUTS; @@ -82,12 +99,31 @@ void convolution_depthwise_forward( } const int iPos = ix + CHANNELS_WIDTH * (iy + sy*DILATION_Y); - const int iOffset = NB_CHANNELS * iPos; + int iOffset = (INPUT_MEM_STRIDE / sizeof(Input_T)) * iPos; + + // Wrapping cannot occur in the middle of a line, except if + // there is only one line (1D)! + bool wrapInRange = false; + + if (INPUT_MEM_WRAP_SIZE > 0 + && iOffset >= (INPUT_MEM_STRIDE / sizeof(Input_T))) + { + iOffset += (INPUT_MEM_WRAP_OFFSET - INPUT_MEM_CONT_OFFSET + - INPUT_MEM_CONT_SIZE) / sizeof(Input_T); + } + else if (INPUT_MEM_WRAP_SIZE > 0 && KERNEL_WIDTH > 1 + && CHANNELS_HEIGHT == 1 // single line (1D)! + && iOffset + KERNEL_WIDTH * NB_CHANNELS + > (INPUT_MEM_STRIDE / sizeof(Input_T))) + { + wrapInRange = true; + } const int wOffset = (output*KERNEL_HEIGHT + sy) * KERNEL_WIDTH; - if (DILATION_X == 1 && ((PADDING_X == 0 + if (!wrapInRange && NB_CHANNELS == (INPUT_MEM_STRIDE / sizeof(Input_T)) + && DILATION_X == 1 && ((PADDING_X == 0 && OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD) || sxMax - sxMin == KERNEL_WIDTH)) { @@ -105,8 +141,16 @@ void convolution_depthwise_forward( continue; } - const int iOffsetInRange = iOffset - + sx * DILATION_X * NB_CHANNELS; + int iOffsetInRange = iOffset + + sx * DILATION_X * (INPUT_MEM_STRIDE / sizeof(Input_T)); + + if (wrapInRange + && iOffsetInRange >= (INPUT_MEM_CONT_SIZE / sizeof(Input_T))) + { + iOffsetInRange += (INPUT_MEM_WRAP_OFFSET + - INPUT_MEM_CONT_OFFSET + - INPUT_MEM_CONT_SIZE) / sizeof(Input_T); + } weightedSum += inputs[iOffsetInRange + channel] * weights[wOffset + sx]; @@ -130,6 +174,18 @@ template<int NB_CHANNELS, int DILATION_Y, int DILATION_X, int KERNEL_HEIGHT, int KERNEL_WIDTH, ActivationFunction_T ACTIVATION, + // Memory mapping: inputs + int INPUT_MEM_CONT_OFFSET, + int INPUT_MEM_CONT_SIZE, + int INPUT_MEM_WRAP_OFFSET, + int INPUT_MEM_WRAP_SIZE, + int INPUT_MEM_STRIDE, + // Memory mapping: outputs + int OUTPUT_MEM_CONT_OFFSET, + int OUTPUT_MEM_CONT_SIZE, + int OUTPUT_MEM_WRAP_OFFSET, + int OUTPUT_MEM_WRAP_SIZE, + int OUTPUT_MEM_STRIDE, typename Input_T, typename Output_T, typename Weight_T, typename Rescaling_T> @@ -157,7 +213,19 @@ void convolution_depthwise_forward( DILATION_X, KERNEL_HEIGHT, KERNEL_WIDTH, - ACTIVATION> + ACTIVATION, + // Memory mapping: inputs + INPUT_MEM_CONT_OFFSET, + INPUT_MEM_CONT_SIZE, + INPUT_MEM_WRAP_OFFSET, + INPUT_MEM_WRAP_SIZE, + INPUT_MEM_STRIDE, + // Memory mapping: outputs + OUTPUT_MEM_CONT_OFFSET, + OUTPUT_MEM_CONT_SIZE, + OUTPUT_MEM_WRAP_OFFSET, + OUTPUT_MEM_WRAP_SIZE, + OUTPUT_MEM_STRIDE> (inputs, outputs, weights, b, rescaling); } diff --git a/aidge_export_cpp/kernels/elemwise.hpp b/aidge_export_cpp/kernels/elemwise.hpp index f486c27f1a120db45a9be00e0995eeb1b066e649..03a0d06de52f196921390ceacc5089f8f85d7189 100644 --- a/aidge_export_cpp/kernels/elemwise.hpp +++ b/aidge_export_cpp/kernels/elemwise.hpp @@ -8,6 +8,23 @@ template<int NB_MAT, ElemWise_T ELEM_OP, int INPUT1_CONT_SIZE, int INPUT2_CONT_SIZE, int OUTPUT_CONT_SIZE, const int OFFSET_IN1[], const int OFFSET_IN2[], ActivationFunction_T ACTIVATION, + // Memory mapping: inputs + int INPUT1_MEM_CONT_OFFSET, + int INPUT1_MEM_CONT_SIZE, + int INPUT1_MEM_WRAP_OFFSET, + int INPUT1_MEM_WRAP_SIZE, + int INPUT1_MEM_STRIDE, + int INPUT2_MEM_CONT_OFFSET, + int INPUT2_MEM_CONT_SIZE, + int INPUT2_MEM_WRAP_OFFSET, + int INPUT2_MEM_WRAP_SIZE, + int INPUT2_MEM_STRIDE, + // Memory mapping: outputs + int OUTPUT_MEM_CONT_OFFSET, + int OUTPUT_MEM_CONT_SIZE, + int OUTPUT_MEM_WRAP_OFFSET, + int OUTPUT_MEM_WRAP_SIZE, + int OUTPUT_MEM_STRIDE, typename Input_T, typename Output_T, typename Rescaling_T> __attribute__((always_inline)) inline void elemwise_forward( @@ -16,6 +33,10 @@ void elemwise_forward( const Input_T* __restrict inputs1, const Input_T* __restrict inputs2) { + static_assert(INPUT1_MEM_WRAP_SIZE == 0, "Incompatible input memory wrapping"); + static_assert(INPUT2_MEM_WRAP_SIZE == 0, "Incompatible input memory wrapping"); + static_assert(OUTPUT_MEM_CONT_SIZE % OUTPUT_CONT_SIZE == 0, "Incompatible output memory wrapping"); + auto apply_op = [](auto a, auto b) -> Output_T { switch (ELEM_OP) { case Add: return a + b; @@ -29,7 +50,12 @@ void elemwise_forward( for (int stack = 0; stack < NB_MAT; ++stack) { const int offset_in1 = OFFSET_IN1[stack] * INPUT1_CONT_SIZE; const int offset_in2 = OFFSET_IN2[stack] * INPUT2_CONT_SIZE; - const int out_offset = stack * OUTPUT_CONT_SIZE; + int out_offset = stack * OUTPUT_CONT_SIZE; + + if (OUTPUT_MEM_WRAP_SIZE > 0 && out_offset >= (OUTPUT_MEM_CONT_SIZE / sizeof(Output_T))) { + out_offset += (OUTPUT_MEM_WRAP_OFFSET - OUTPUT_MEM_CONT_OFFSET + - OUTPUT_MEM_CONT_SIZE) / sizeof(Output_T); + } for (int i = 0; i < OUTPUT_CONT_SIZE; ++i) { const int in0_id = (INPUT1_CONT_SIZE != 1) ? i : 0; diff --git a/aidge_export_cpp/kernels/fullyconnected.hpp b/aidge_export_cpp/kernels/fullyconnected.hpp index 33840559408b5bca5a92106dc80446c1d328b0e0..4311700cce5d88094a511af667c8cddc37e5733f 100644 --- a/aidge_export_cpp/kernels/fullyconnected.hpp +++ b/aidge_export_cpp/kernels/fullyconnected.hpp @@ -12,6 +12,18 @@ template<int NB_CHANNELS, int NB_OUTPUTS, int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH, ActivationFunction_T ACTIVATION, + // Memory mapping: inputs + int INPUT_MEM_CONT_OFFSET, + int INPUT_MEM_CONT_SIZE, + int INPUT_MEM_WRAP_OFFSET, + int INPUT_MEM_WRAP_SIZE, + int INPUT_MEM_STRIDE, + // Memory mapping: outputs + int OUTPUT_MEM_CONT_OFFSET, + int OUTPUT_MEM_CONT_SIZE, + int OUTPUT_MEM_WRAP_OFFSET, + int OUTPUT_MEM_WRAP_SIZE, + int OUTPUT_MEM_STRIDE, typename Input_T, typename Output_T, typename Weight_T, typename Bias_T, typename Rescaling_T> @@ -30,14 +42,52 @@ void fullyconnected_forward ( Bias_T weightedSum = (biases) ? biases[och] : Bias_T(0); for (int iy = 0; iy < CHANNELS_HEIGHT; ++iy) { - const int iOffset = (NB_CHANNELS * CHANNELS_WIDTH * iy); + const int iPos = (CHANNELS_WIDTH * iy); + int iOffset = (INPUT_MEM_STRIDE / sizeof(Input_T)) * iPos; + + // Wrapping cannot occur in the middle of a line, except if + // there is only one line (1D)! + bool wrapInRange = false; + + if (INPUT_MEM_WRAP_SIZE > 0 && iOffset >= (INPUT_MEM_CONT_SIZE / sizeof(Input_T))) { + iOffset += (INPUT_MEM_WRAP_OFFSET - INPUT_MEM_CONT_OFFSET + - INPUT_MEM_CONT_SIZE) / sizeof(Input_T); + } + else if (INPUT_MEM_WRAP_SIZE > 0 && CHANNELS_WIDTH > 1 + && CHANNELS_HEIGHT == 1 // single line (1D)! + && iOffset + CHANNELS_WIDTH * NB_CHANNELS + > (INPUT_MEM_CONT_SIZE / sizeof(Input_T))) + { + wrapInRange = true; + } + const int wOffset = NB_CHANNELS * CHANNELS_WIDTH * (iy + CHANNELS_HEIGHT * och); - macsOnRange<NB_CHANNELS * CHANNELS_WIDTH>( - inputs + iOffset, - weights + wOffset, - weightedSum); + if (!wrapInRange && (INPUT_MEM_STRIDE / sizeof(Input_T)) == NB_CHANNELS) { + macsOnRange<NB_CHANNELS * CHANNELS_WIDTH>( + inputs + iOffset, + weights + wOffset, + weightedSum); + } + else { + for (int ix = 0; ix < CHANNELS_WIDTH; ++ix) { + int iOffsetInRange = iOffset + ix * (INPUT_MEM_STRIDE / sizeof(Input_T)); + + if (wrapInRange + && iOffsetInRange >= (INPUT_MEM_CONT_SIZE / sizeof(Input_T))) + { + iOffsetInRange += (INPUT_MEM_WRAP_OFFSET + - INPUT_MEM_CONT_OFFSET + - INPUT_MEM_CONT_SIZE) / sizeof(Input_T); + } + + macsOnRange<NB_CHANNELS>( + inputs + iOffsetInRange, + weights + wOffset + ix * NB_CHANNELS, + weightedSum); + } + } } outputs[och] = activation_forward_value<Output_T>(weightedSum, och, ACTIVATION, rescaling); @@ -50,6 +100,18 @@ template<int NB_CHANNELS, int NB_OUTPUTS, int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH, ActivationFunction_T ACTIVATION, + // Memory mapping: inputs + int INPUT_MEM_CONT_OFFSET, + int INPUT_MEM_CONT_SIZE, + int INPUT_MEM_WRAP_OFFSET, + int INPUT_MEM_WRAP_SIZE, + int INPUT_MEM_STRIDE, + // Memory mapping: outputs + int OUTPUT_MEM_CONT_OFFSET, + int OUTPUT_MEM_CONT_SIZE, + int OUTPUT_MEM_WRAP_OFFSET, + int OUTPUT_MEM_WRAP_SIZE, + int OUTPUT_MEM_STRIDE, typename Input_T, typename Output_T, typename Weight_T, typename Bias_T, typename Rescaling_T> diff --git a/aidge_export_cpp/kernels/pooling.hpp b/aidge_export_cpp/kernels/pooling.hpp index 12ac69ffcf30e72c6d854753d4d2a22b1ce4419c..56f10c229d5ae9ee3769df23010b6c82ae77c90f 100644 --- a/aidge_export_cpp/kernels/pooling.hpp +++ b/aidge_export_cpp/kernels/pooling.hpp @@ -17,6 +17,18 @@ template<int NB_CHANNELS, int POOL_HEIGHT, int POOL_WIDTH, Pooling_T POOLING_TYPE, ActivationFunction_T ACTIVATION, + // Memory mapping: inputs + int INPUT_MEM_CONT_OFFSET, + int INPUT_MEM_CONT_SIZE, + int INPUT_MEM_WRAP_OFFSET, + int INPUT_MEM_WRAP_SIZE, + int INPUT_MEM_STRIDE, + // Memory mapping: outputs + int OUTPUT_MEM_CONT_OFFSET, + int OUTPUT_MEM_CONT_SIZE, + int OUTPUT_MEM_WRAP_OFFSET, + int OUTPUT_MEM_WRAP_SIZE, + int OUTPUT_MEM_STRIDE, typename Input_T, typename Output_T> __attribute__((always_inline)) inline void pooling_forward( @@ -53,7 +65,12 @@ void pooling_forward( const int ix = (ox * STRIDE_X) - PADDING_X; const int oPos = (ox + OUTPUTS_WIDTH * oy); - int oOffset = NB_OUTPUTS * oPos; + int oOffset = (OUTPUT_MEM_STRIDE / sizeof(Output_T)) * oPos; + + if (OUTPUT_MEM_WRAP_SIZE > 0 && oOffset >= (OUTPUT_MEM_CONT_SIZE / sizeof(Output_T))) { + oOffset += (OUTPUT_MEM_WRAP_OFFSET - OUTPUT_MEM_CONT_OFFSET + - OUTPUT_MEM_CONT_SIZE) / sizeof(Output_T); + } // <-- if (POOLING_TYPE == Max) { @@ -69,7 +86,25 @@ void pooling_forward( const int iPos = ((sxMin + ix) + CHANNELS_WIDTH * (iy + syMin + sy)); - int iOffset = NB_CHANNELS * iPos; + int iOffset = (INPUT_MEM_STRIDE / sizeof(Input_T)) * iPos; + + // Wrapping cannot occur in the middle of a line, except if + // there is only one line (1D)! + bool wrapInRange = false; + + if (INPUT_MEM_WRAP_SIZE > 0 + && iOffset >= (INPUT_MEM_CONT_SIZE / sizeof(Input_T))) + { + iOffset += (INPUT_MEM_WRAP_OFFSET - INPUT_MEM_CONT_OFFSET + - INPUT_MEM_CONT_SIZE) / sizeof(Input_T); + } + else if (INPUT_MEM_WRAP_SIZE > 0 && POOL_WIDTH > 1 + && CHANNELS_HEIGHT == 1 // single line (1D)! + && iOffset + POOL_WIDTH * (INPUT_MEM_STRIDE / sizeof(Input_T)) + > (INPUT_MEM_CONT_SIZE / sizeof(Input_T))) + { + wrapInRange = true; + } for (int sx = 0; sx < POOL_WIDTH; ++sx) { if ((PADDING_X != 0 @@ -79,7 +114,16 @@ void pooling_forward( break; } - int iOffsetInRange = iOffset + output + sx * NB_CHANNELS; + int iOffsetInRange = iOffset + output + + sx * (INPUT_MEM_STRIDE / sizeof(Input_T)); + + if (wrapInRange && + iOffsetInRange >= (INPUT_MEM_CONT_SIZE / sizeof(Input_T))) + { + iOffsetInRange += (INPUT_MEM_WRAP_OFFSET + - INPUT_MEM_CONT_OFFSET + - INPUT_MEM_CONT_SIZE) / sizeof(Input_T); + } if (inputs[iOffsetInRange] > maxVal) maxVal = inputs[iOffsetInRange]; @@ -101,7 +145,25 @@ void pooling_forward( const int iPos = ((sxMin + ix) + CHANNELS_WIDTH * (iy + syMin + sy)); - int iOffset = NB_CHANNELS * iPos; + int iOffset = (INPUT_MEM_STRIDE / sizeof(Input_T)) * iPos; + + // Wrapping cannot occur in the middle of a line, except if + // there is only one line (1D)! + bool wrapInRange = false; + + if (INPUT_MEM_WRAP_SIZE > 0 + && iOffset >= (INPUT_MEM_CONT_SIZE / sizeof(Input_T))) + { + iOffset += (INPUT_MEM_WRAP_OFFSET - INPUT_MEM_CONT_OFFSET + - INPUT_MEM_CONT_SIZE) / sizeof(Input_T); + } + else if (INPUT_MEM_WRAP_SIZE > 0 && POOL_WIDTH > 1 + && CHANNELS_HEIGHT == 1 // single line (1D)! + && iOffset + POOL_WIDTH * (INPUT_MEM_STRIDE / sizeof(Input_T)) + > (INPUT_MEM_CONT_SIZE / sizeof(Input_T))) + { + wrapInRange = true; + } for (int sx = 0; sx < POOL_WIDTH; ++sx) { if ((PADDING_X != 0 @@ -111,7 +173,17 @@ void pooling_forward( break; } - int iOffsetInRange = iOffset + output + sx * NB_CHANNELS; + int iOffsetInRange = iOffset + output + + sx * (INPUT_MEM_STRIDE / sizeof(Input_T)); + + if (wrapInRange && + iOffsetInRange >= (INPUT_MEM_CONT_SIZE / sizeof(Input_T))) + { + iOffsetInRange += (INPUT_MEM_WRAP_OFFSET + - INPUT_MEM_CONT_OFFSET + - INPUT_MEM_CONT_SIZE) / sizeof(Input_T); + } + sum += inputs[iOffsetInRange]; } } diff --git a/aidge_export_cpp/operators/BatchNorm.py b/aidge_export_cpp/operators/BatchNorm.py index 8676449723c36b8184f72f6a90ff982ee579a05d..4552a009a2bc46726e5b8acc6473dfde467c2cf2 100644 --- a/aidge_export_cpp/operators/BatchNorm.py +++ b/aidge_export_cpp/operators/BatchNorm.py @@ -3,7 +3,9 @@ from aidge_core.export_utils import ExportNodeCpp from aidge_export_cpp import ROOT from aidge_export_cpp import ExportLibCpp -@ExportLibCpp.register("BatchNorm2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32, aidge_core.dformat.nchw))) +@ExportLibCpp.register("BatchNorm2D", + aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32, aidge_core.dformat.nchw)), + aidge_core.ProdConso.in_place_model) class BatchNorm(ExportNodeCpp): def __init__(self, node, mem_info): super().__init__(node, mem_info) diff --git a/aidge_export_cpp/operators/Conv.py b/aidge_export_cpp/operators/Conv.py index c2c8f69fec97046d20f937a5e093b734ffcc3add..a813c2f6c82743b0e8983894dab62450eeb8d7c9 100644 --- a/aidge_export_cpp/operators/Conv.py +++ b/aidge_export_cpp/operators/Conv.py @@ -2,6 +2,56 @@ import aidge_core from aidge_core.export_utils import ExportNodeCpp, get_node_from_metaop from aidge_export_cpp import ROOT, ExportLibCpp, set_scaling_attributes +@ExportLibCpp.register("Conv1D", + aidge_core.ImplSpec( + [ # Input specifications + aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nwc), + aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nwc), + aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.any) + ], + [ # Output specifications + aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nwc) + ], + ), + aidge_core.ProdConso.in_place_model) +class Conv(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Initialize kernel attributes + self.attributes["padding"] = [0, 0, 0, 0] + self.attributes["activation"] = "Linear" + self.attributes["aidge_cmp"] = node.attributes().has_attr("aidge_cmp") + + ## Scaling + self.attributes["rescaling"] = "NoScaling" + self.attributes["shift_value"] = 0 + + # Browse the metaop to update kernel attributes + ConvNode = get_node_from_metaop(node, "Conv1D") + self.attributes["kernel_dims"] = ConvNode[0].get_operator().attr.kernel_dims + self.attributes["stride_dims"] = ConvNode[0].get_operator().attr.stride_dims + self.attributes["dilation_dims"] = ConvNode[0].get_operator().attr.dilation_dims + + # Template for layer configutation file generation + self.config_template = str(ROOT / "templates" / "configuration" / "convolution_config.jinja") + + # Template layer call function generation within the forward file + self.forward_template = str(ROOT / "templates" / "kernel_forward" / "convolution_forward.jinja") + + # Files to include within the generated forward.cpp file + self.include_list = [] + + # Path to the kernel(s) files to copy + self.add_kernel_to_copy(ROOT / "kernels" / "convolution.hpp") + self.add_kernel_to_copy(ROOT / "static" / "macs.hpp", "include/network", fwd_include=False) + + # Include aidge outputs within the fwd file + if self.attributes["aidge_cmp"]: + self.include_list.append("network/utils.hpp") # aidge_cmp function + self.include_list.append("data/aidge_outputs/" + node.name() + ".hpp") + + @ExportLibCpp.register("Conv2D", aidge_core.ImplSpec( [ # Input specifications @@ -12,7 +62,8 @@ from aidge_export_cpp import ROOT, ExportLibCpp, set_scaling_attributes [ # Output specifications aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nhwc) ], - )) + ), + aidge_core.ProdConso.in_place_model) class Conv(ExportNodeCpp): def __init__(self, node, mem_info): super().__init__(node, mem_info) @@ -61,7 +112,8 @@ class Conv(ExportNodeCpp): [ # Output specifications aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nhwc) ], - )) + ), + aidge_core.ProdConso.in_place_model) class QConv(Conv): def __init__(self, node, mem_info): super().__init__(node, mem_info) @@ -104,7 +156,8 @@ class PadConv(QConv): [ # Output specifications aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nhwc) ], - )) + ), + aidge_core.ProdConso.in_place_model) class ConvAct(QConv): def __init__(self, node, mem_info): super().__init__(node, mem_info) diff --git a/aidge_export_cpp/operators/Conv1D.py b/aidge_export_cpp/operators/Conv1D.py deleted file mode 100644 index 66c23d18672504d0667e45fbfad32ccf065b838f..0000000000000000000000000000000000000000 --- a/aidge_export_cpp/operators/Conv1D.py +++ /dev/null @@ -1,51 +0,0 @@ -import aidge_core -from aidge_core.export_utils import ExportNodeCpp, get_node_from_metaop -from aidge_export_cpp import ROOT, ExportLibCpp - -@ExportLibCpp.register("Conv1D", - aidge_core.ImplSpec( - [ # Input specifications - aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nwc), - aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nwc), - aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.any) - ], - [ # Output specifications - aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nwc) - ], - )) -class Conv1D(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - - # Initialize kernel attributes - self.attributes["padding"] = [0, 0] - self.attributes["activation"] = "Linear" - self.attributes["aidge_cmp"] = node.attributes().has_attr("aidge_cmp") - - ## Scaling - self.attributes["rescaling"] = "NoScaling" - self.attributes["shift_value"] = 0 - - # Browse the metaop to update kernel attributes - ConvNode = get_node_from_metaop(node, "Conv1D") - self.attributes["kernel_dims"] = ConvNode[0].get_operator().attr.kernel_dims - self.attributes["stride_dims"] = ConvNode[0].get_operator().attr.stride_dims - self.attributes["dilation_dims"] = ConvNode[0].get_operator().attr.dilation_dims - - # Template for layer configutation file generation - self.config_template = str(ROOT / "templates" / "configuration" / "conv1D_config.jinja") - - # Template layer call function generation within the forward file - self.forward_template = str(ROOT / "templates" / "kernel_forward" / "conv1D_forward.jinja") - - # Files to include within the generated forward.cpp file - self.include_list = [] - - # Path to the kernel(s) files to copy - self.add_kernel_to_copy(ROOT / "kernels" / "conv1D.hpp") - self.add_kernel_to_copy(ROOT / "static" / "macs.hpp", "include/network", fwd_include=False) - - # Include aidge outputs within the fwd file - if self.attributes["aidge_cmp"]: - self.include_list.append("network/utils.hpp") # aidge_cmp function - self.include_list.append("data/aidge_outputs/" + node.name() + ".hpp") diff --git a/aidge_export_cpp/operators/ConvDw.py b/aidge_export_cpp/operators/ConvDw.py index dff7fd6de6c0427396e519b30585a121ef912025..a38030e362a015f520119e6322fb230d8599764d 100644 --- a/aidge_export_cpp/operators/ConvDw.py +++ b/aidge_export_cpp/operators/ConvDw.py @@ -12,7 +12,8 @@ from aidge_export_cpp import ROOT, ExportLibCpp, set_scaling_attributes [ # Output specifications aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nhwc) ], - )) + ), + aidge_core.ProdConso.in_place_model) class ConvDw(ExportNodeCpp): def __init__(self, node, mem_info): super().__init__(node, mem_info) @@ -62,7 +63,8 @@ class ConvDw(ExportNodeCpp): [ # Output specifications aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nhwc) ], - )) + ), + aidge_core.ProdConso.in_place_model) class QConvDw(ConvDw): def __init__(self, node, mem_info): super().__init__(node, mem_info) @@ -105,7 +107,8 @@ class PadConvDw(QConvDw): [ # Output specifications aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nhwc) ], - )) + ), + aidge_core.ProdConso.in_place_model) class ConvDwAct(QConvDw): def __init__(self, node, mem_info): super().__init__(node, mem_info) diff --git a/aidge_export_cpp/operators/ElemWise.py b/aidge_export_cpp/operators/ElemWise.py index 9f87f6745d23a133862b94a73749f829dd891cb9..751fb3a7ec89d1f6e27daa3a1aae478709d715ac 100644 --- a/aidge_export_cpp/operators/ElemWise.py +++ b/aidge_export_cpp/operators/ElemWise.py @@ -121,20 +121,26 @@ class QElemWise(ElemWise): self.attributes["rescaling"] = "SingleShiftScaling" -@ExportLibCpp.register("Add", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +@ExportLibCpp.register("Add", + aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)), + aidge_core.ProdConso.in_place_model) class Add(ElemWise): def __init__(self, node, mem_info): super().__init__(node, mem_info) self.attributes["elemwise_op"] = "Add" -@ExportLibCpp.register_metaop("QAdd", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +@ExportLibCpp.register_metaop("QAdd", + aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)), + aidge_core.ProdConso.in_place_model) class QAdd(QElemWise, Add): def __init__(self, node, mem_info): super().__init__(node, mem_info) -@ExportLibCpp.register_metaop("AddAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +@ExportLibCpp.register_metaop("AddAct", + aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)), + aidge_core.ProdConso.in_place_model) class AddAct(QAdd): def __init__(self, node, mem_info): super().__init__(node, mem_info) @@ -146,20 +152,26 @@ class AddAct(QAdd): aidge_core.Log.error(f"{node.type()} activation is not yet supported.") -@ExportLibCpp.register("Sub", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +@ExportLibCpp.register("Sub", + aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)), + aidge_core.ProdConso.in_place_model) class Sub(ElemWise): def __init__(self, node, mem_info): super().__init__(node, mem_info) self.attributes["elemwise_op"] = "Sub" -@ExportLibCpp.register_metaop("QSub", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +@ExportLibCpp.register_metaop("QSub", + aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)), + aidge_core.ProdConso.in_place_model) class QSub(QElemWise, Sub): def __init__(self, node, mem_info): super().__init__(node, mem_info) -@ExportLibCpp.register_metaop("SubAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +@ExportLibCpp.register_metaop("SubAct", + aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)), + aidge_core.ProdConso.in_place_model) class SubAct(QSub): def __init__(self, node, mem_info): super().__init__(node, mem_info) @@ -171,14 +183,18 @@ class SubAct(QSub): aidge_core.Log.error(f"{node.type()} activation is not yet supported.") -@ExportLibCpp.register("Mul", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +@ExportLibCpp.register("Mul", + aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)), + aidge_core.ProdConso.in_place_model) class Mul(QElemWise): def __init__(self, node, mem_info): super().__init__(node, mem_info) self.attributes["elemwise_op"] = "Mul" -@ExportLibCpp.register_metaop("MulAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +@ExportLibCpp.register_metaop("MulAct", + aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)), + aidge_core.ProdConso.in_place_model) class MulAct(Mul): def __init__(self, node, mem_info): super().__init__(node, mem_info) @@ -189,14 +205,18 @@ class MulAct(Mul): else: aidge_core.Log.error(f"{node.type()} activation is not yet supported.") -@ExportLibCpp.register("Div", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +@ExportLibCpp.register("Div", + aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)), + aidge_core.ProdConso.in_place_model) class Div(QElemWise): def __init__(self, node, mem_info): super().__init__(node, mem_info) self.attributes["elemwise_op"] = "Div" -@ExportLibCpp.register_metaop("DivAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +@ExportLibCpp.register_metaop("DivAct", + aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)), + aidge_core.ProdConso.in_place_model) class DivAct(Div): def __init__(self, node, mem_info): super().__init__(node, mem_info) diff --git a/aidge_export_cpp/operators/Erf.py b/aidge_export_cpp/operators/Erf.py index c92275e636ade5e5ed1dbb9fdcd22d0e37052c86..b754484ab56856277a5f24eb018f35b8f94b64bb 100644 --- a/aidge_export_cpp/operators/Erf.py +++ b/aidge_export_cpp/operators/Erf.py @@ -2,7 +2,9 @@ import aidge_core from aidge_core.export_utils import ExportNodeCpp, get_node_from_metaop from aidge_export_cpp import ROOT, ExportLibCpp, set_scaling_attributes -@ExportLibCpp.register("Erf", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) +@ExportLibCpp.register("Erf", + aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)), + aidge_core.ProdConso.in_place_model) class ErfCPP(ExportNodeCpp): def __init__(self, node, mem_info): super().__init__(node, mem_info) diff --git a/aidge_export_cpp/operators/Pool.py b/aidge_export_cpp/operators/Pool.py index a1eaef634e590a2c464d2c3981e68d45174049f9..d6e9bfc82b4db19d9a800928c728e4c0209aa61a 100644 --- a/aidge_export_cpp/operators/Pool.py +++ b/aidge_export_cpp/operators/Pool.py @@ -52,7 +52,9 @@ class PoolAct(Pool): aidge_core.Log.error(f"{node.type()} activation is not yet supported.") -@ExportLibCpp.register("MaxPooling2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nhwc))) +@ExportLibCpp.register("MaxPooling2D", + aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nhwc)), + aidge_core.ProdConso.in_place_model) class MaxPool(Pool): def __init__(self, node, mem_info): super().__init__(node, mem_info) @@ -70,7 +72,9 @@ class PadMaxPool(MaxPool, PadPool): super().__init__(node, mem_info) -@ExportLibCpp.register_metaop("MaxPoolAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nhwc))) +@ExportLibCpp.register_metaop("MaxPoolAct", + aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nhwc)), + aidge_core.ProdConso.in_place_model) class MaxPoolAct(MaxPool, PoolAct): def __init__(self, node, mem_info): super().__init__(node, mem_info) @@ -82,7 +86,9 @@ class PadMaxPoolAct(PadMaxPool, MaxPoolAct): super().__init__(node, mem_info) -@ExportLibCpp.register("AvgPooling2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nhwc))) +@ExportLibCpp.register("AvgPooling2D", + aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nhwc)), + aidge_core.ProdConso.in_place_model) class AvgPool(Pool): def __init__(self, node, mem_info): super().__init__(node, mem_info) @@ -100,7 +106,9 @@ class PadAvgPool(AvgPool, PadPool): super().__init__(node, mem_info) -@ExportLibCpp.register_metaop("AvgPoolAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nhwc))) +@ExportLibCpp.register_metaop("AvgPoolAct", + aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nhwc)), + aidge_core.ProdConso.in_place_model) class AvgPoolAct(AvgPool, PoolAct): def __init__(self, node, mem_info): super().__init__(node, mem_info) @@ -112,7 +120,9 @@ class PadAvgPoolAct(PadAvgPool, AvgPoolAct): super().__init__(node, mem_info) -@ExportLibCpp.register("GlobalAveragePooling", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nhwc))) +@ExportLibCpp.register("GlobalAveragePooling", + aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nhwc)), + aidge_core.ProdConso.in_place_model) class GlobalAvgPool(Pool): def __init__(self, node, mem_info): super().__init__(node, mem_info) @@ -127,7 +137,9 @@ class PadGlobalAvgPool(GlobalAvgPool, PadPool): super().__init__(node, mem_info) -@ExportLibCpp.register_metaop("GlobalAvgPoolAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nhwc))) +@ExportLibCpp.register_metaop("GlobalAvgPoolAct", + aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nhwc)), + aidge_core.ProdConso.in_place_model) class GlobalAvgPoolAct(GlobalAvgPool, PoolAct): def __init__(self, node, mem_info): super().__init__(node, mem_info) diff --git a/aidge_export_cpp/operators/ReLU.py b/aidge_export_cpp/operators/ReLU.py index 55e7e19425e0a5b61790b58a2d36a8f233f75228..cbb0f64a7c7e56605f1b862193c35191137d53d3 100644 --- a/aidge_export_cpp/operators/ReLU.py +++ b/aidge_export_cpp/operators/ReLU.py @@ -2,7 +2,9 @@ import aidge_core from aidge_core.export_utils import ExportNodeCpp from aidge_export_cpp import ROOT, ExportLibCpp, set_scaling_attributes -@ExportLibCpp.register("ReLU", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +@ExportLibCpp.register("ReLU", + aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)), + aidge_core.ProdConso.in_place_model) class ReLU(ExportNodeCpp): def __init__(self, node, mem_info): super().__init__(node, mem_info) diff --git a/aidge_export_cpp/operators/Reshape.py b/aidge_export_cpp/operators/Reshape.py index dd7d2c1b162a0fb2e5a61d050ed76a011ebf4025..8ce7b273b86f5a916de3411ec2eeb33b26c89274 100644 --- a/aidge_export_cpp/operators/Reshape.py +++ b/aidge_export_cpp/operators/Reshape.py @@ -3,7 +3,9 @@ from aidge_core.export_utils import ExportNodeCpp from aidge_export_cpp import ROOT from aidge_export_cpp import ExportLibCpp -@ExportLibCpp.register("Reshape", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +@ExportLibCpp.register("Reshape", + aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)), + aidge_core.ProdConso.in_place_model) class ReshapeCPP(ExportNodeCpp): def __init__(self, node, mem_info): super().__init__(node, mem_info) diff --git a/aidge_export_cpp/templates/configuration/conv1D_config.jinja b/aidge_export_cpp/templates/configuration/conv1D_config.jinja deleted file mode 100644 index b8b6ee6b6a5fa016461038e972e8d5e126ed612a..0000000000000000000000000000000000000000 --- a/aidge_export_cpp/templates/configuration/conv1D_config.jinja +++ /dev/null @@ -1,23 +0,0 @@ -{#- For name header -#} -#ifndef {{ name|upper }}_LAYER_H -#define {{ name|upper }}_LAYER_H -#include "network/rescaling_utils.hpp" -{# For layer configuration -#} -{% include "./_def_io.jinja" %} -{% include "./_meminfo.jinja" %} -#define {{ name|upper }}_PADDING_X {{ padding[0] }} -#define {{ name|upper }}_STRIDE_X {{ stride_dims[0] }} -#define {{ name|upper }}_DILATION_X {{ dilation_dims[0] }} -#define {{ name|upper }}_KERNEL_WIDTH {{ kernel_dims[0] }} -#define {{ name|upper }}_ACTIVATION {{ activation }} -{% include "./_rescaling.jinja" %} - -{#- Calculate sizes #} -{#%- set weights_size = out_chan[0] * kernel_dims[0] if depthwise is defined - else out_chan[0] * in_chan[0] * kernel_dims[0] %#} -{%- set weights_size = out_chan[0] * in_chan[0] * kernel_dims[0] %} -#define {{ name|upper }}_WEIGHTS_SIZE {{ weights_size }} -#define {{ name|upper }}_BIASES_SIZE {{ out_chan[0] }} - - -#endif /* {{ name|upper }}_LAYER_H */ diff --git a/aidge_export_cpp/templates/configuration/convolution_config.jinja b/aidge_export_cpp/templates/configuration/convolution_config.jinja index b72df4d10f5342f661e921f4b2a7dbaf79d32e85..7cdaf05e59caf20f5b702db664fdd79dcd7e15e4 100644 --- a/aidge_export_cpp/templates/configuration/convolution_config.jinja +++ b/aidge_export_cpp/templates/configuration/convolution_config.jinja @@ -5,20 +5,20 @@ {# For layer configuration -#} {% include "./_def_io.jinja" %} {% include "./_meminfo.jinja" %} -#define {{ name|upper }}_PADDING_Y {{ padding[0] }} -#define {{ name|upper }}_PADDING_X {{ padding[1] }} -#define {{ name|upper }}_STRIDE_Y {{ stride_dims[0] }} -#define {{ name|upper }}_STRIDE_X {{ stride_dims[1] }} -#define {{ name|upper }}_DILATION_Y {{ dilation_dims[0] }} -#define {{ name|upper }}_DILATION_X {{ dilation_dims[1] }} -#define {{ name|upper }}_KERNEL_HEIGHT {{ kernel_dims[0] }} -#define {{ name|upper }}_KERNEL_WIDTH {{ kernel_dims[1] }} +#define {{ name|upper }}_PADDING_Y {{ padding[0] if padding|length > 1 else 0 }} +#define {{ name|upper }}_PADDING_X {{ padding[1] if padding|length > 1 else padding[0] }} +#define {{ name|upper }}_STRIDE_Y {{ stride_dims[0] if stride_dims|length > 1 else 1 }} +#define {{ name|upper }}_STRIDE_X {{ stride_dims[1] if stride_dims|length > 1 else stride_dims[0] }} +#define {{ name|upper }}_DILATION_Y {{ dilation_dims[0] if dilation_dims|length > 1 else 1 }} +#define {{ name|upper }}_DILATION_X {{ dilation_dims[1] if dilation_dims|length > 1 else dilation_dims[0] }} +#define {{ name|upper }}_KERNEL_HEIGHT {{ kernel_dims[0] if kernel_dims|length > 1 else 1 }} +#define {{ name|upper }}_KERNEL_WIDTH {{ kernel_dims[1] if kernel_dims|length > 1 else kernel_dims[0] }} #define {{ name|upper }}_ACTIVATION {{ activation }} {% include "./_rescaling.jinja" %} {#- Calculate sizes #} -{%- set weights_size = out_chan[0] * kernel_dims[1] * kernel_dims[0] if depthwise is defined - else out_chan[0] * in_chan[0] * kernel_dims[1] * kernel_dims[0] %} +{%- set weights_size = out_chan[0] * (kernel_dims[1] if kernel_dims|length > 1 else 1) * kernel_dims[0] if depthwise is defined + else out_chan[0] * in_chan[0] * (kernel_dims[1] if kernel_dims|length > 1 else 1) * kernel_dims[0] %} #define {{ name|upper }}_WEIGHTS_SIZE {{ weights_size }} #define {{ name|upper }}_BIASES_SIZE {{ out_chan[0] }} diff --git a/aidge_export_cpp/templates/kernel_forward/conv1D_forward.jinja b/aidge_export_cpp/templates/kernel_forward/conv1D_forward.jinja deleted file mode 100644 index b11875ef2dcd10f784810b255d217118dd3101b0..0000000000000000000000000000000000000000 --- a/aidge_export_cpp/templates/kernel_forward/conv1D_forward.jinja +++ /dev/null @@ -1,15 +0,0 @@ -{% filter indent(width=4, first=False) %} -{% include "./_mem_offset.jinja" %} -conv1D{{ "_depthwise" if depthwise is defined else "" }}_forward<{{ in_name[0]|upper }}_NB_CHANNELS, - {{ in_name[0]|upper }}_IN_WIDTH, - {{ out_name[0]|upper }}_NB_OUTPUTS, - {{ out_name[0]|upper }}_OUT_WIDTH, - {{name|upper}}_PADDING_X, - {{name|upper}}_STRIDE_X, - {{name|upper}}_DILATION_X, - {{name|upper}}_KERNEL_WIDTH, - {{name|upper}}_ACTIVATION> - ({{in_name[0]}}, {{out_name[0]}}, {{in_name[1]}}, {{in_name[2]}}, {{name|upper}}_RESCALING); -{% include "./_save_outputs.jinja" %} -{% include "./_aidge_cmp.jinja" %} -{% endfilter %} diff --git a/aidge_export_cpp/templates/kernel_forward/convolution_forward.jinja b/aidge_export_cpp/templates/kernel_forward/convolution_forward.jinja index bdde325707eeb497a93ba2084c0672bd7f7e5daa..e4a5642f9f10057eac426a01ae2dadab186bf248 100644 --- a/aidge_export_cpp/templates/kernel_forward/convolution_forward.jinja +++ b/aidge_export_cpp/templates/kernel_forward/convolution_forward.jinja @@ -14,7 +14,17 @@ convolution{{ "_depthwise" if depthwise is defined else "" }}_forward<{{ in_name {{name|upper}}_DILATION_X, {{name|upper}}_KERNEL_HEIGHT, {{name|upper}}_KERNEL_WIDTH, - {{name|upper}}_ACTIVATION> + {{name|upper}}_ACTIVATION, + {{ in_name[0]|upper }}_MEM_CONT_OFFSET, + {{ in_name[0]|upper }}_MEM_CONT_SIZE, + {{ in_name[0]|upper }}_MEM_WRAP_OFFSET, + {{ in_name[0]|upper }}_MEM_WRAP_SIZE, + {{ in_name[0]|upper }}_MEM_STRIDE, + {{ out_name[0]|upper }}_MEM_CONT_OFFSET, + {{ out_name[0]|upper }}_MEM_CONT_SIZE, + {{ out_name[0]|upper }}_MEM_WRAP_OFFSET, + {{ out_name[0]|upper }}_MEM_WRAP_SIZE, + {{ out_name[0]|upper }}_MEM_STRIDE> ({{in_name[0]}}, {{out_name[0]}}, {{in_name[1]}}, {{in_name[2]}}, {{name|upper}}_RESCALING); {% include "./_save_outputs.jinja" %} {% include "./_aidge_cmp.jinja" %} diff --git a/aidge_export_cpp/templates/kernel_forward/elemwise_forward.jinja b/aidge_export_cpp/templates/kernel_forward/elemwise_forward.jinja index 95c48ae5be92d30fdd10e3ddf27cb19d169b473b..4d3c8d023dbb13264f3da72944aff2bc789d4d0e 100644 --- a/aidge_export_cpp/templates/kernel_forward/elemwise_forward.jinja +++ b/aidge_export_cpp/templates/kernel_forward/elemwise_forward.jinja @@ -7,7 +7,22 @@ elemwise_forward<{{name|upper}}_NB_MAT, {{name|upper}}_OUTPUT_CONT_SIZE, {{name|upper}}_OFFSET_IN1, {{name|upper}}_OFFSET_IN2, - {{name|upper}}_ACTIVATION> + {{name|upper}}_ACTIVATION, + {{ in_name[0]|upper }}_MEM_CONT_OFFSET, + {{ in_name[0]|upper }}_MEM_CONT_SIZE, + {{ in_name[0]|upper }}_MEM_WRAP_OFFSET, + {{ in_name[0]|upper }}_MEM_WRAP_SIZE, + {{ in_name[0]|upper }}_MEM_STRIDE, + {{ in_name[1]|upper }}_MEM_CONT_OFFSET, + {{ in_name[1]|upper }}_MEM_CONT_SIZE, + {{ in_name[1]|upper }}_MEM_WRAP_OFFSET, + {{ in_name[1]|upper }}_MEM_WRAP_SIZE, + {{ in_name[1]|upper }}_MEM_STRIDE, + {{ out_name[0]|upper }}_MEM_CONT_OFFSET, + {{ out_name[0]|upper }}_MEM_CONT_SIZE, + {{ out_name[0]|upper }}_MEM_WRAP_OFFSET, + {{ out_name[0]|upper }}_MEM_WRAP_SIZE, + {{ out_name[0]|upper }}_MEM_STRIDE> ({{out_name[0]}}, {{name|upper}}_RESCALING, {{in_name[0]}}, {{in_name[1]}}); {% include "./_save_outputs.jinja" %} {% endfilter %} diff --git a/aidge_export_cpp/templates/kernel_forward/fullyconnected_forward.jinja b/aidge_export_cpp/templates/kernel_forward/fullyconnected_forward.jinja index 198e83bd531065352c07468a97d06c463a7f4ea1..2415ce44bdbb35443dd4195fffa2cf1f4e7db0fd 100644 --- a/aidge_export_cpp/templates/kernel_forward/fullyconnected_forward.jinja +++ b/aidge_export_cpp/templates/kernel_forward/fullyconnected_forward.jinja @@ -6,7 +6,17 @@ fullyconnected{{ "_nhwc" if nhwc is defined else "" }}_forward<{{ in_name[0]|upp {{ out_name[0]|upper }}_NB_OUTPUTS, {{ out_name[0]|upper }}_OUT_HEIGHT, {{ out_name[0]|upper }}_OUT_WIDTH, - {{name|upper}}_ACTIVATION> + {{name|upper}}_ACTIVATION, + {{ in_name[0]|upper }}_MEM_CONT_OFFSET, + {{ in_name[0]|upper }}_MEM_CONT_SIZE, + {{ in_name[0]|upper }}_MEM_WRAP_OFFSET, + {{ in_name[0]|upper }}_MEM_WRAP_SIZE, + {{ in_name[0]|upper }}_MEM_STRIDE, + {{ out_name[0]|upper }}_MEM_CONT_OFFSET, + {{ out_name[0]|upper }}_MEM_CONT_SIZE, + {{ out_name[0]|upper }}_MEM_WRAP_OFFSET, + {{ out_name[0]|upper }}_MEM_WRAP_SIZE, + {{ out_name[0]|upper }}_MEM_STRIDE> ({{in_name[0]}}, {{out_name[0]}}, {{in_name[1]}}, {{in_name[2]}}, {{name|upper}}_RESCALING); {% include "./_save_outputs.jinja" %} {% include "./_aidge_cmp.jinja" %} diff --git a/aidge_export_cpp/templates/kernel_forward/pooling_forward.jinja b/aidge_export_cpp/templates/kernel_forward/pooling_forward.jinja index fb1f2b7e0a1b33602c93b96856533a93eeec9023..42bed98cb99aaec9f72af2ebdc15e0c46b5331f3 100644 --- a/aidge_export_cpp/templates/kernel_forward/pooling_forward.jinja +++ b/aidge_export_cpp/templates/kernel_forward/pooling_forward.jinja @@ -13,7 +13,17 @@ pooling_forward<{{ in_name[0]|upper }}_NB_CHANNELS, {{name|upper}}_KERNEL_HEIGHT, {{name|upper}}_KERNEL_WIDTH, {{name|upper}}_POOLING_TYPE, - {{name|upper}}_ACTIVATION> + {{name|upper}}_ACTIVATION, + {{ in_name[0]|upper }}_MEM_CONT_OFFSET, + {{ in_name[0]|upper }}_MEM_CONT_SIZE, + {{ in_name[0]|upper }}_MEM_WRAP_OFFSET, + {{ in_name[0]|upper }}_MEM_WRAP_SIZE, + {{ in_name[0]|upper }}_MEM_STRIDE, + {{ out_name[0]|upper }}_MEM_CONT_OFFSET, + {{ out_name[0]|upper }}_MEM_CONT_SIZE, + {{ out_name[0]|upper }}_MEM_WRAP_OFFSET, + {{ out_name[0]|upper }}_MEM_WRAP_SIZE, + {{ out_name[0]|upper }}_MEM_STRIDE> ({{in_name[0]}}, {{out_name[0]}}); {% include "./_save_outputs.jinja" %} {% include "./_aidge_cmp.jinja" %} diff --git a/aidge_export_cpp/unit_tests/test_examples.py b/aidge_export_cpp/unit_tests/test_examples.py index 77b69762e34196571755b9b64c279cbfad89c7ef..98b5e749a44d2e9e5642b625045ed4790f165890 100644 --- a/aidge_export_cpp/unit_tests/test_examples.py +++ b/aidge_export_cpp/unit_tests/test_examples.py @@ -11,6 +11,7 @@ EXAMPLES_DIR = CURRENT_DIR / "../../examples" TEST_CASES = { "lenet-no-args": ("export_LeNet/lenet.py", [], ["MODEL ACCURACY = 100.0 %", "Prediction out#0: 7 (1)"]), "lenet-int8": ("export_LeNet/lenet.py", ["--dtype=int8"], ["MODEL ACCURACY = 100.0 %", "MODEL ACCURACY = 100.0 %", "QUANTIZED ACCURACY = 100.0 %", "Prediction out#0: 7 (119)"]), + "lenet-int8-wrap": ("export_LeNet/lenet.py", ["--dtype=int8", "--mem_wrap"], ["MODEL ACCURACY = 100.0 %", "MODEL ACCURACY = 100.0 %", "QUANTIZED ACCURACY = 100.0 %", "Prediction out#0: 7 (119)"]), "resnet18-no-args": ("export_ResNet18/resnet18.py", ["--mock_db"], []), "resnet18-int8": ("export_ResNet18/resnet18.py", ["--mock_db", "--dtype=int8"], []) } diff --git a/examples/export_LeNet/lenet.py b/examples/export_LeNet/lenet.py index 672b0f6231d1194f2533f77201e66ccb90b94d06..1d7960b0837cae7fe46aa23037b4712935579811 100644 --- a/examples/export_LeNet/lenet.py +++ b/examples/export_LeNet/lenet.py @@ -39,6 +39,7 @@ supported_types = ["float32", "int8"] parser = argparse.ArgumentParser(description="Export the LeNet model with the aidge_export_cpp module.") parser.add_argument("--dev", action="store_true", help="Export in dev mode") +parser.add_argument("--mem_wrap", action="store_true", help="Use memory wrapping") parser.add_argument("--no_cuda", action="store_true", help="Disable USE_CUDA usage to perform inferences and training.") parser.add_argument("--dtype", type=str, choices=supported_types, default="float32", help="Specify the targeted datatype : [int8, float32]") parser.add_argument("--aidge_cmp", action="store_true", help="Use aidge tensor results as reference.") @@ -489,7 +490,8 @@ aidge_export_cpp.export(EXPORT_FOLDER, # tensors[0], labels = aidge_core.Tensor(labels[0]), dev_mode = DEV_MODE, - aidge_cmp = AIDGE_CMP) + aidge_cmp = AIDGE_CMP, + memory_manager_args = {"wrapping": True} if args.mem_wrap else {}) print("\n### Compiling the export ###") try: for std_line in aidge_core.utils.run_command(["make"], cwd=EXPORT_FOLDER):