diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Convolution/ConvDW.hpp b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Convolution/ConvDW.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cefe933057f0e858e54dbf45e6f0b548407bd593 --- /dev/null +++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Convolution/ConvDW.hpp @@ -0,0 +1,177 @@ +/* + (C) Copyright 2017 CEA LIST. All Rights Reserved. + Contributor(s): N2D2 Team + + This software is governed by the CeCILL-C license under French law and + abiding by the rules of distribution of free software. You can use, + modify and/ or redistribute the software under the terms of the CeCILL-C + license as circulated by CEA, CNRS and INRIA at the following URL + "http://www.cecill.info". + + As a counterpart to the access to the source code and rights to copy, + modify and redistribute granted by the license, users are provided only + with a limited warranty and the software's author, the holder of the + economic rights, and the successive licensors have only limited + liability. + + The fact that you are presently reading this means that you have had + knowledge of the CeCILL-C license and that you accept its terms. +*/ + +#ifndef __N2D2_EXPORT_CPP_CONV_DW_HPP__ +#define __N2D2_EXPORT_CPP_CONV_DW_HPP__ + +#include "typedefs.h" +#include "assert.h" +#include "utils.hpp" +#include "kernels/Macs.hpp" + +namespace N2D2_Export { + +template<int NB_CHANNELS, + int CHANNELS_HEIGHT, int CHANNELS_WIDTH, + int NB_OUTPUTS, + int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH, + int PADDING_Y, int PADDING_X, + int STRIDE_Y, int STRIDE_X, + int KERNEL_HEIGHT, int KERNEL_WIDTH, + ActivationFunction_T ACTIVATION, + // Memory mapping: inputs + int INPUT_MEM_CONT_OFFSET, + int INPUT_MEM_CONT_SIZE, + int INPUT_MEM_WRAP_OFFSET, + int INPUT_MEM_WRAP_SIZE, + int INPUT_MEM_STRIDE, + // Memory mapping: outputs + int OUTPUT_MEM_CONT_OFFSET, + int OUTPUT_MEM_CONT_SIZE, + int OUTPUT_MEM_WRAP_OFFSET, + int OUTPUT_MEM_WRAP_SIZE, + int OUTPUT_MEM_STRIDE, + typename Input_T, typename Output_T, + typename Weight_T, typename Bias_T, + typename Rescaling_T> +__attribute__((always_inline)) inline void convcellDWPropagate( + const Input_T* __restrict inputs, + Output_T* __restrict outputs, + const Bias_T* __restrict biasses, + const Weight_T* __restrict weights, + const Rescaling_T& __restrict rescaling) +{ + static_assert(NB_OUTPUTS % NB_CHANNELS == 0, + "NB_OUTPUTS should be a multiple of NB_CHANNELS."); + + constexpr int OUTPUTS_HEIGHT_NOPAD + = (CHANNELS_HEIGHT - KERNEL_HEIGHT + STRIDE_Y) / STRIDE_Y; + constexpr int OUTPUTS_WIDTH_NOPAD + = (CHANNELS_WIDTH - KERNEL_WIDTH + STRIDE_X) / STRIDE_X; + + for (int oy = 0; oy < OUTPUTS_HEIGHT; ++oy) { + const int syMin = (PADDING_Y == 0) ? 0 + : max(PADDING_Y - (oy * STRIDE_Y), 0); + const int syMax = (PADDING_Y == 0 + && OUTPUTS_HEIGHT == OUTPUTS_HEIGHT_NOPAD) ? KERNEL_HEIGHT + : clamp(CHANNELS_HEIGHT + PADDING_Y - (oy * STRIDE_Y), + 0, KERNEL_HEIGHT); + const int iy = (oy * STRIDE_Y) - PADDING_Y; + + for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) { + const int sxMin = (PADDING_X == 0) ? 0 + : max(PADDING_X - (ox * STRIDE_X), 0); + const int sxMax = (PADDING_X == 0 + && OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD) + ? KERNEL_WIDTH + : clamp(CHANNELS_WIDTH + PADDING_X - (ox * STRIDE_X), + 0, KERNEL_WIDTH); + const int ix = (ox * STRIDE_X) - PADDING_X; + + const int oPos = (ox + OUTPUTS_WIDTH * oy); + int oOffset = OUTPUT_MEM_STRIDE * oPos; + + if (OUTPUT_MEM_WRAP_SIZE > 0 && oOffset >= OUTPUT_MEM_CONT_SIZE) { + oOffset += OUTPUT_MEM_WRAP_OFFSET - OUTPUT_MEM_CONT_OFFSET + - OUTPUT_MEM_CONT_SIZE; + } + + for (int output = 0; output < NB_OUTPUTS; ++output) { + const int channel = (output * NB_CHANNELS) / NB_OUTPUTS; + + SUM_T weightedSum = biasses[output]; + + for (int sy = 0; sy < KERNEL_HEIGHT; ++sy) { + if ((PADDING_Y != 0 + || OUTPUTS_HEIGHT != OUTPUTS_HEIGHT_NOPAD) + && sy >= syMax - syMin) + { + break; + } + + const int iPos = ((sxMin + ix) + + CHANNELS_WIDTH * (iy + syMin + sy)); + int iOffset = INPUT_MEM_STRIDE * iPos; + + // Wrapping cannot occur in the middle of a line, except if + // there is only one line (1D)! + bool wrapInRange = false; + + if (INPUT_MEM_WRAP_SIZE > 0 + && iOffset >= INPUT_MEM_CONT_SIZE) + { + iOffset += INPUT_MEM_WRAP_OFFSET - INPUT_MEM_CONT_OFFSET + - INPUT_MEM_CONT_SIZE; + } + else if (INPUT_MEM_WRAP_SIZE > 0 && KERNEL_WIDTH > 1 + && CHANNELS_HEIGHT == 1 // single line (1D)! + && iOffset + KERNEL_WIDTH * INPUT_MEM_STRIDE + > INPUT_MEM_CONT_SIZE) + { + wrapInRange = true; + } + + const int wOffset = (sxMin + + KERNEL_WIDTH * (syMin + sy + KERNEL_HEIGHT * output)); + + if (!wrapInRange && ((PADDING_X == 0 + && OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD) + || sxMax - sxMin == KERNEL_WIDTH)) + { + macsOnRange<KERNEL_WIDTH, INPUT_MEM_STRIDE>( + inputs + iOffset + channel, + weights + wOffset, + weightedSum); + } + else { + for (int sx = 0; sx < KERNEL_WIDTH; ++sx) { + if ((PADDING_X != 0 + || OUTPUTS_WIDTH != OUTPUTS_WIDTH_NOPAD) + && sx >= sxMax - sxMin) + { + break; + } + + int iOffsetInRange = iOffset + + sx * INPUT_MEM_STRIDE; + + if (wrapInRange && + iOffsetInRange >= INPUT_MEM_CONT_SIZE) + { + iOffsetInRange += INPUT_MEM_WRAP_OFFSET + - INPUT_MEM_CONT_OFFSET + - INPUT_MEM_CONT_SIZE; + } + + weightedSum += inputs[channel + iOffsetInRange] + * weights[wOffset + sx]; + } + } + } + + outputs[output + oOffset] + = sat<Output_T>(weightedSum, output, ACTIVATION, rescaling); + } + } + } +} +} // N2D2_Export + +#endif // __N2D2_EXPORT_CPP_CONV_HPP__ diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/templates/configuration/softmax.jinja b/aidge_export_arm_cortexm/_Aidge_Arm/templates/configuration/softmax.jinja new file mode 100644 index 0000000000000000000000000000000000000000..9f1eb503251117c70c07e81e478bdd84d6beb566 --- /dev/null +++ b/aidge_export_arm_cortexm/_Aidge_Arm/templates/configuration/softmax.jinja @@ -0,0 +1,13 @@ +{#- For name header -#} +#ifndef {{ name|upper }}_LAYER_H +#define {{ name|upper }}_LAYER_H + +{# For layer configuration -#} +#define {{ name|upper }}_INPUTS_SIZE {{ in_size[0] }} +#define {{ name|upper }}_OUTPUTS_SIZE {{ out_size[0] }} +#define {{ name|upper }}_DIMS {{ in_dims[0] }} +#define {{ name|upper }}_AXIS {{ axis }} +#define {{ name|upper }}_INPUT_DIMS_SIZE {{ in_dims[0]|length}} + + +#endif /* {{ name|upper }}_LAYER_H */ diff --git a/aidge_export_arm_cortexm/_Aidge_Arm/templates/forward_call/softmax.jinja b/aidge_export_arm_cortexm/_Aidge_Arm/templates/forward_call/softmax.jinja new file mode 100644 index 0000000000000000000000000000000000000000..0b2a30bfed9d85c9ec2e7766b2e9565c3f154c69 --- /dev/null +++ b/aidge_export_arm_cortexm/_Aidge_Arm/templates/forward_call/softmax.jinja @@ -0,0 +1,4 @@ +{% filter indent(width=4, first=False) %} +{% include "./_mem_offset.jinja" %} +aidge_softmax_chw_float32({{in_name[0]}}, {{out_name[0]}}, {{name|upper}}_DIMS, {{name|upper}}_AXIS, {{name|upper}}_INPUT_DIMS_SIZE, {{name|upper}}_OUTPUTS_SIZE); +{% endfilter %} diff --git a/aidge_export_arm_cortexm/operators.py b/aidge_export_arm_cortexm/operators.py index 23c1dcf3651c5d9373d643b54068fdef4f42631a..64eb57c5cb6d0780e33d3986b481384b484d4b2e 100644 --- a/aidge_export_arm_cortexm/operators.py +++ b/aidge_export_arm_cortexm/operators.py @@ -224,6 +224,49 @@ class Conv_ARMCortexM(ExportNodeCpp): str(ROOT / "_Aidge_Arm" / "kernels" / "Convolution" / "Conv.hpp") ] +@ExportLibAidgeARM.register("ConvDepthWise2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) +class ConvDW_ARMCortexM(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + self.attributes["activation"] = "Linear" + self.attributes.update(Scaling()("no_scaling")) + # No padding with Conv + # Use PaddedConv to add padding attribute + self.attributes["padding"] = [0, 0] + + self.config_template = str(ROOT / "_Aidge_Arm" / "templates" / "configuration" / "conv_config.jinja") + self.forward_template = str(ROOT / "_Aidge_Arm" / "templates" / "forward_call" / "conv_kernel.jinja") + self.include_list = [] + self.kernels_to_copy = [ + str(ROOT / "_Aidge_Arm" / "kernels" / "Convolution" / "ConvDW.hpp") + ] + +@ExportLibAidgeARM.register_metaop("PaddedConvDepthWise2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) +class PaddedConvDW_ARMCortexM(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + self.attributes["activation"] = "Linear" + self.attributes.update(Scaling()("no_scaling")) + for n in self.operator.get_micro_graph().get_nodes(): + if n.type() == "Pad2D": + self.attributes["padding"] = n.get_operator( + ).attr.begin_end_borders + if n.type() == "ConvDepthWise2D": + self.attributes["kernel_dims"] = n.get_operator( + ).attr.kernel_dims + self.attributes["stride_dims"] = n.get_operator( + ).attr.stride_dims + self.attributes["dilation_dims"] = n.get_operator( + ).attr.dilation_dims + + self.config_template = str(ROOT / "_Aidge_Arm" / "templates" / "configuration" / "conv_config.jinja") + self.forward_template = str(ROOT / "_Aidge_Arm" / "templates" / "forward_call" / "conv_kernel.jinja") + self.include_list = [] + self.kernels_to_copy = [ + str(ROOT / "_Aidge_Arm" / "kernels" / "Convolution" / "ConvDW.hpp") + ] + + @ExportLibAidgeARM.register_metaop("PaddedConv2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) class PaddedConv_ARMCortexM(ExportNodeCpp): @@ -231,7 +274,6 @@ class PaddedConv_ARMCortexM(ExportNodeCpp): super().__init__(node, mem_info) self.attributes["activation"] = "Linear" self.attributes.update(Scaling()("no_scaling")) - print(self.attributes) for n in self.operator.get_micro_graph().get_nodes(): if n.type() == "Pad2D": self.attributes["padding"] = n.get_operator( @@ -361,10 +403,8 @@ class Mul_ARMCortexM(ExportNodeCpp): class Softmax_ARMCortexM(ExportNodeCpp): def __init__(self, node, mem_info): super().__init__(node, mem_info) - self.attributes["activation_type"] = "\"SOFTMAX\"" - - self.config_template = str(ROOT / "_Aidge_Arm" / "templates" / "configuration" / "activation.jinja") - self.forward_template = str(ROOT / "_Aidge_Arm" / "templates" / "forward_call" / "activation_chw.jinja") + self.config_template = str(ROOT / "_Aidge_Arm" / "templates" / "configuration" / "softmax.jinja") + self.forward_template = str(ROOT / "_Aidge_Arm" / "templates" / "forward_call" / "softmax.jinja") self.include_list = [] self.kernels_to_copy = [ str(ROOT / "_Aidge_Arm" / "kernels" / "Softmax" / "aidge_softmax_chw_float32.h"),