Skip to content
Snippets Groups Projects
Commit 10868393 authored by Cyril Moineau's avatar Cyril Moineau
Browse files

Add ConvDW and Softmax to export.

parent 2ae66838
No related branches found
No related tags found
3 merge requests!17v0.1.0,!12v0.4.0,!11Export refactor
/*
(C) Copyright 2017 CEA LIST. All Rights Reserved.
Contributor(s): N2D2 Team
This software is governed by the CeCILL-C license under French law and
abiding by the rules of distribution of free software. You can use,
modify and/ or redistribute the software under the terms of the CeCILL-C
license as circulated by CEA, CNRS and INRIA at the following URL
"http://www.cecill.info".
As a counterpart to the access to the source code and rights to copy,
modify and redistribute granted by the license, users are provided only
with a limited warranty and the software's author, the holder of the
economic rights, and the successive licensors have only limited
liability.
The fact that you are presently reading this means that you have had
knowledge of the CeCILL-C license and that you accept its terms.
*/
#ifndef __N2D2_EXPORT_CPP_CONV_DW_HPP__
#define __N2D2_EXPORT_CPP_CONV_DW_HPP__
#include "typedefs.h"
#include "assert.h"
#include "utils.hpp"
#include "kernels/Macs.hpp"
namespace N2D2_Export {
template<int NB_CHANNELS,
int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
int NB_OUTPUTS,
int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
int PADDING_Y, int PADDING_X,
int STRIDE_Y, int STRIDE_X,
int KERNEL_HEIGHT, int KERNEL_WIDTH,
ActivationFunction_T ACTIVATION,
// Memory mapping: inputs
int INPUT_MEM_CONT_OFFSET,
int INPUT_MEM_CONT_SIZE,
int INPUT_MEM_WRAP_OFFSET,
int INPUT_MEM_WRAP_SIZE,
int INPUT_MEM_STRIDE,
// Memory mapping: outputs
int OUTPUT_MEM_CONT_OFFSET,
int OUTPUT_MEM_CONT_SIZE,
int OUTPUT_MEM_WRAP_OFFSET,
int OUTPUT_MEM_WRAP_SIZE,
int OUTPUT_MEM_STRIDE,
typename Input_T, typename Output_T,
typename Weight_T, typename Bias_T,
typename Rescaling_T>
__attribute__((always_inline)) inline void convcellDWPropagate(
const Input_T* __restrict inputs,
Output_T* __restrict outputs,
const Bias_T* __restrict biasses,
const Weight_T* __restrict weights,
const Rescaling_T& __restrict rescaling)
{
static_assert(NB_OUTPUTS % NB_CHANNELS == 0,
"NB_OUTPUTS should be a multiple of NB_CHANNELS.");
constexpr int OUTPUTS_HEIGHT_NOPAD
= (CHANNELS_HEIGHT - KERNEL_HEIGHT + STRIDE_Y) / STRIDE_Y;
constexpr int OUTPUTS_WIDTH_NOPAD
= (CHANNELS_WIDTH - KERNEL_WIDTH + STRIDE_X) / STRIDE_X;
for (int oy = 0; oy < OUTPUTS_HEIGHT; ++oy) {
const int syMin = (PADDING_Y == 0) ? 0
: max(PADDING_Y - (oy * STRIDE_Y), 0);
const int syMax = (PADDING_Y == 0
&& OUTPUTS_HEIGHT == OUTPUTS_HEIGHT_NOPAD) ? KERNEL_HEIGHT
: clamp(CHANNELS_HEIGHT + PADDING_Y - (oy * STRIDE_Y),
0, KERNEL_HEIGHT);
const int iy = (oy * STRIDE_Y) - PADDING_Y;
for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) {
const int sxMin = (PADDING_X == 0) ? 0
: max(PADDING_X - (ox * STRIDE_X), 0);
const int sxMax = (PADDING_X == 0
&& OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
? KERNEL_WIDTH
: clamp(CHANNELS_WIDTH + PADDING_X - (ox * STRIDE_X),
0, KERNEL_WIDTH);
const int ix = (ox * STRIDE_X) - PADDING_X;
const int oPos = (ox + OUTPUTS_WIDTH * oy);
int oOffset = OUTPUT_MEM_STRIDE * oPos;
if (OUTPUT_MEM_WRAP_SIZE > 0 && oOffset >= OUTPUT_MEM_CONT_SIZE) {
oOffset += OUTPUT_MEM_WRAP_OFFSET - OUTPUT_MEM_CONT_OFFSET
- OUTPUT_MEM_CONT_SIZE;
}
for (int output = 0; output < NB_OUTPUTS; ++output) {
const int channel = (output * NB_CHANNELS) / NB_OUTPUTS;
SUM_T weightedSum = biasses[output];
for (int sy = 0; sy < KERNEL_HEIGHT; ++sy) {
if ((PADDING_Y != 0
|| OUTPUTS_HEIGHT != OUTPUTS_HEIGHT_NOPAD)
&& sy >= syMax - syMin)
{
break;
}
const int iPos = ((sxMin + ix)
+ CHANNELS_WIDTH * (iy + syMin + sy));
int iOffset = INPUT_MEM_STRIDE * iPos;
// Wrapping cannot occur in the middle of a line, except if
// there is only one line (1D)!
bool wrapInRange = false;
if (INPUT_MEM_WRAP_SIZE > 0
&& iOffset >= INPUT_MEM_CONT_SIZE)
{
iOffset += INPUT_MEM_WRAP_OFFSET - INPUT_MEM_CONT_OFFSET
- INPUT_MEM_CONT_SIZE;
}
else if (INPUT_MEM_WRAP_SIZE > 0 && KERNEL_WIDTH > 1
&& CHANNELS_HEIGHT == 1 // single line (1D)!
&& iOffset + KERNEL_WIDTH * INPUT_MEM_STRIDE
> INPUT_MEM_CONT_SIZE)
{
wrapInRange = true;
}
const int wOffset = (sxMin
+ KERNEL_WIDTH * (syMin + sy + KERNEL_HEIGHT * output));
if (!wrapInRange && ((PADDING_X == 0
&& OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
|| sxMax - sxMin == KERNEL_WIDTH))
{
macsOnRange<KERNEL_WIDTH, INPUT_MEM_STRIDE>(
inputs + iOffset + channel,
weights + wOffset,
weightedSum);
}
else {
for (int sx = 0; sx < KERNEL_WIDTH; ++sx) {
if ((PADDING_X != 0
|| OUTPUTS_WIDTH != OUTPUTS_WIDTH_NOPAD)
&& sx >= sxMax - sxMin)
{
break;
}
int iOffsetInRange = iOffset
+ sx * INPUT_MEM_STRIDE;
if (wrapInRange &&
iOffsetInRange >= INPUT_MEM_CONT_SIZE)
{
iOffsetInRange += INPUT_MEM_WRAP_OFFSET
- INPUT_MEM_CONT_OFFSET
- INPUT_MEM_CONT_SIZE;
}
weightedSum += inputs[channel + iOffsetInRange]
* weights[wOffset + sx];
}
}
}
outputs[output + oOffset]
= sat<Output_T>(weightedSum, output, ACTIVATION, rescaling);
}
}
}
}
} // N2D2_Export
#endif // __N2D2_EXPORT_CPP_CONV_HPP__
{#- For name header -#}
#ifndef {{ name|upper }}_LAYER_H
#define {{ name|upper }}_LAYER_H
{# For layer configuration -#}
#define {{ name|upper }}_INPUTS_SIZE {{ in_size[0] }}
#define {{ name|upper }}_OUTPUTS_SIZE {{ out_size[0] }}
#define {{ name|upper }}_DIMS {{ in_dims[0] }}
#define {{ name|upper }}_AXIS {{ axis }}
#define {{ name|upper }}_INPUT_DIMS_SIZE {{ in_dims[0]|length}}
#endif /* {{ name|upper }}_LAYER_H */
{% filter indent(width=4, first=False) %}
{% include "./_mem_offset.jinja" %}
aidge_softmax_chw_float32({{in_name[0]}}, {{out_name[0]}}, {{name|upper}}_DIMS, {{name|upper}}_AXIS, {{name|upper}}_INPUT_DIMS_SIZE, {{name|upper}}_OUTPUTS_SIZE);
{% endfilter %}
...@@ -224,6 +224,49 @@ class Conv_ARMCortexM(ExportNodeCpp): ...@@ -224,6 +224,49 @@ class Conv_ARMCortexM(ExportNodeCpp):
str(ROOT / "_Aidge_Arm" / "kernels" / "Convolution" / "Conv.hpp") str(ROOT / "_Aidge_Arm" / "kernels" / "Convolution" / "Conv.hpp")
] ]
@ExportLibAidgeARM.register("ConvDepthWise2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
class ConvDW_ARMCortexM(ExportNodeCpp):
def __init__(self, node, mem_info):
super().__init__(node, mem_info)
self.attributes["activation"] = "Linear"
self.attributes.update(Scaling()("no_scaling"))
# No padding with Conv
# Use PaddedConv to add padding attribute
self.attributes["padding"] = [0, 0]
self.config_template = str(ROOT / "_Aidge_Arm" / "templates" / "configuration" / "conv_config.jinja")
self.forward_template = str(ROOT / "_Aidge_Arm" / "templates" / "forward_call" / "conv_kernel.jinja")
self.include_list = []
self.kernels_to_copy = [
str(ROOT / "_Aidge_Arm" / "kernels" / "Convolution" / "ConvDW.hpp")
]
@ExportLibAidgeARM.register_metaop("PaddedConvDepthWise2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
class PaddedConvDW_ARMCortexM(ExportNodeCpp):
def __init__(self, node, mem_info):
super().__init__(node, mem_info)
self.attributes["activation"] = "Linear"
self.attributes.update(Scaling()("no_scaling"))
for n in self.operator.get_micro_graph().get_nodes():
if n.type() == "Pad2D":
self.attributes["padding"] = n.get_operator(
).attr.begin_end_borders
if n.type() == "ConvDepthWise2D":
self.attributes["kernel_dims"] = n.get_operator(
).attr.kernel_dims
self.attributes["stride_dims"] = n.get_operator(
).attr.stride_dims
self.attributes["dilation_dims"] = n.get_operator(
).attr.dilation_dims
self.config_template = str(ROOT / "_Aidge_Arm" / "templates" / "configuration" / "conv_config.jinja")
self.forward_template = str(ROOT / "_Aidge_Arm" / "templates" / "forward_call" / "conv_kernel.jinja")
self.include_list = []
self.kernels_to_copy = [
str(ROOT / "_Aidge_Arm" / "kernels" / "Convolution" / "ConvDW.hpp")
]
@ExportLibAidgeARM.register_metaop("PaddedConv2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) @ExportLibAidgeARM.register_metaop("PaddedConv2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
class PaddedConv_ARMCortexM(ExportNodeCpp): class PaddedConv_ARMCortexM(ExportNodeCpp):
...@@ -231,7 +274,6 @@ class PaddedConv_ARMCortexM(ExportNodeCpp): ...@@ -231,7 +274,6 @@ class PaddedConv_ARMCortexM(ExportNodeCpp):
super().__init__(node, mem_info) super().__init__(node, mem_info)
self.attributes["activation"] = "Linear" self.attributes["activation"] = "Linear"
self.attributes.update(Scaling()("no_scaling")) self.attributes.update(Scaling()("no_scaling"))
print(self.attributes)
for n in self.operator.get_micro_graph().get_nodes(): for n in self.operator.get_micro_graph().get_nodes():
if n.type() == "Pad2D": if n.type() == "Pad2D":
self.attributes["padding"] = n.get_operator( self.attributes["padding"] = n.get_operator(
...@@ -361,10 +403,8 @@ class Mul_ARMCortexM(ExportNodeCpp): ...@@ -361,10 +403,8 @@ class Mul_ARMCortexM(ExportNodeCpp):
class Softmax_ARMCortexM(ExportNodeCpp): class Softmax_ARMCortexM(ExportNodeCpp):
def __init__(self, node, mem_info): def __init__(self, node, mem_info):
super().__init__(node, mem_info) super().__init__(node, mem_info)
self.attributes["activation_type"] = "\"SOFTMAX\"" self.config_template = str(ROOT / "_Aidge_Arm" / "templates" / "configuration" / "softmax.jinja")
self.forward_template = str(ROOT / "_Aidge_Arm" / "templates" / "forward_call" / "softmax.jinja")
self.config_template = str(ROOT / "_Aidge_Arm" / "templates" / "configuration" / "activation.jinja")
self.forward_template = str(ROOT / "_Aidge_Arm" / "templates" / "forward_call" / "activation_chw.jinja")
self.include_list = [] self.include_list = []
self.kernels_to_copy = [ self.kernels_to_copy = [
str(ROOT / "_Aidge_Arm" / "kernels" / "Softmax" / "aidge_softmax_chw_float32.h"), str(ROOT / "_Aidge_Arm" / "kernels" / "Softmax" / "aidge_softmax_chw_float32.h"),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment