diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Activation/Relu/aidge_relu_float32.c b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Activation/Relu/aidge_relu_float32.c index 4b35bdc35db8f1085d773e908cccebbc09693ceb..5e1bb6bca480aff223484653e2f08702299a33e3 100644 --- a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Activation/Relu/aidge_relu_float32.c +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Activation/Relu/aidge_relu_float32.c @@ -5,7 +5,6 @@ void aidge_relu_float32 (float* inputs, unsigned int size) { for (unsigned int i = 0; i < size; ++i) { - if (inputs[i] < 0.0f) - outputs[i] = 0.0f; + outputs[i] = (inputs[i] < 0.0f) ? 0.0f : inputs[i]; } } \ No newline at end of file diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Pooling/aidge_maxpool2d_float32.c b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Pooling/aidge_maxpool2d_float32.c index 31cbdd532da44d85b6764dd240542ed09cace6d5..ebfb8f7ebdbb060e75f56cdfb4e9abe78aa810aa 100644 --- a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Pooling/aidge_maxpool2d_float32.c +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Pooling/aidge_maxpool2d_float32.c @@ -10,55 +10,66 @@ void aidge_maxpool2d_float32(float* inputs, const int padding_width, const int padding_height, const int stride_width, const int stride_height) { - int outputOffset = 0; + const int OUTPUTS_HEIGHT_NOPAD + = (channel_height - kernel_height + stride_height) / stride_height; + const int OUTPUTS_WIDTH_NOPAD + = (channel_width - kernel_width + stride_width) / stride_width; - int iy = 0; for (int oy = 0; oy < output_height; ++oy) { + const int syMin = (padding_height == 0) ? 0 + : max(padding_height - (oy * stride_height), 0); + const int syMax = (padding_height == 0 + && output_height == OUTPUTS_HEIGHT_NOPAD) ? kernel_height + : clamp(channel_height + padding_height - (oy * stride_height), + 0, kernel_height); + const int iy = (oy * stride_height) - padding_height; - const int syMin = (padding_height == 0) - ? 0 : max(padding_height - iy, 0); - const int syMax = (padding_height == 0) - ? kernel_height - : clamp(channel_height + padding_height - iy, - 0, kernel_height); - - int ix = 0; for (int ox = 0; ox < output_width; ++ox) { + for (int output = 0; output < nb_outputs; ++output) { + + const int sxMin = (padding_width == 0) ? 0 + : max(padding_width - (ox * stride_width), 0); + const int sxMax = (padding_width == 0 + && output_width == OUTPUTS_WIDTH_NOPAD) + ? kernel_width + : clamp(channel_width + padding_width - (ox * stride_width), + 0, kernel_width); + const int ix = (ox * stride_width) - padding_width; - const int sxMin = (padding_width == 0) - ? 0 : max(padding_width - ix, 0); - const int sxMax = (padding_width == 0) - ? kernel_width - : clamp(channel_width + padding_width - ix, - 0, kernel_width); - - for (int och = 0; och < nb_outputs; ++och) { - // Not the best system to init this value... - float max_val = -100000; + const int oPos = (ox + output_width * oy); + int oOffset = nb_outputs * oPos; + + float maxVal = -1000.f; for (int sy = 0; sy < kernel_height; ++sy) { - if (padding_height != 0 && (sy < syMin || sy >= syMax)) { - continue; + if ((padding_height != 0 + || output_height != OUTPUTS_HEIGHT_NOPAD) + && sy >= syMax - syMin) + { + break; } - const int inputsOffset = (iy + sy - padding_height)*channel_width*nb_channels + - (ix - padding_width)*nb_channels + och; + const int iPos = ((sxMin + ix) + + channel_width * (iy + syMin + sy)); + int iOffset = nb_channels * iPos; for (int sx = 0; sx < kernel_width; ++sx) { - if(sx < sxMin || sx >= sxMax) { - continue; + if ((padding_width != 0 + || output_width != OUTPUTS_WIDTH_NOPAD) + && sx >= sxMax - sxMin) + { + break; } - if (inputs[inputsOffset + sx*nb_channels] > max_val) { - max_val = inputs[inputsOffset + sx*nb_channels]; - } + int iOffsetInRange = iOffset + output + sx * nb_channels; + + if (inputs[iOffsetInRange] > maxVal) + maxVal = inputs[iOffsetInRange]; } } - outputs[outputOffset] = max_val; - ++outputOffset; + + outputs[oOffset + output] = maxVal; } - ix += stride_width; } - iy += stride_height; } }