Skip to content
Snippets Groups Projects
Commit 2a06f059 authored by Matthew  Newson's avatar Matthew Newson
Browse files

Add new parameter groups for convolution

parent 81d728ac
No related branches found
No related tags found
1 merge request!33Add and modify operators to run the ConvNeXt onnx model
#ifndef __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__
#define __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__
#include "network/typedefs.hpp"
#include "kernels/rescaling.hpp"
#include "network/utils.hpp"
#include "kernels/macs.hpp"
#include "kernels/activation.hpp"
#include <omp.h>
#include <iostream>
// Weights index en NHWC
constexpr int inds_pos(int n, int c, int h, int w, int N, int C, int H, int W) {
return n * (H * W * C) +
h * (W * C) +
w * C +
c;
}
// Image index in CHW
constexpr int inds_pos(int c, int h, int w, int C, int H, int W) {
return c * (H * W) +
h * W +
w;
}
template<int NB_CHANNELS,
int IN_HEIGHT, int IN_WIDTH,
int NB_OUTPUTS, int GROUPS,
int OUT_HEIGHT, int OUT_WIDTH,
int PADDING_Y, int PADDING_X,
int STRIDE_Y, int STRIDE_X,
int DILATION_Y, int DILATION_X,
int KERNEL_HEIGHT, int KERNEL_WIDTH,
ActivationFunction_T ACTIVATION,
typename Input_T, typename Output_T,
typename Weight_T, typename Bias_T,
typename Rescaling_T>
__attribute__((always_inline)) inline
void convolution_forward(
const Input_T* __restrict inputs,
Output_T* __restrict outputs,
const Weight_T* __restrict weights,
const Bias_T* __restrict biases,
const Rescaling_T& __restrict rescaling)
{
if (NB_CHANNELS % GROUPS != 0 || NB_OUTPUTS % GROUPS != 0) {
throw std::invalid_argument("Groups must be a divisor of both NB_CHANNELS and NB_OUTPUTS!");
}
int c_in_g = NB_CHANNELS / GROUPS;
int c_out_g = NB_OUTPUTS / GROUPS;
#pragma omp parallel for
for (int oc = 0; oc < NB_OUTPUTS; oc++) {
int g_oc = oc / c_out_g;
#pragma omp parallel for
for (int i = 0; i < OUT_HEIGHT; ++i) {
#pragma omp parallel for
for (int j = 0; j < OUT_WIDTH; ++j) {
Output_T value = biases[oc];
#pragma omp parallel for
for (int ic = g_oc * c_in_g; ic < (g_oc + 1) * c_in_g; ++ic) {
#pragma omp parallel for
for (int m = 0; m < KERNEL_HEIGHT; ++m) {
#pragma omp parallel for
for (int n = 0; n < KERNEL_WIDTH; ++n) {
int i_p = i * STRIDE_X - PADDING_X + m * DILATION_X;
int j_p = j * STRIDE_Y - PADDING_Y + n * DILATION_Y;
if (i_p >= 0 && i_p < IN_HEIGHT && j_p >= 0 && j_p < IN_WIDTH) {
value += weights[inds_pos(oc, ic % c_in_g, m, n, NB_OUTPUTS, c_in_g, KERNEL_HEIGHT, KERNEL_WIDTH)] *
inputs[inds_pos(ic, i_p, j_p, NB_CHANNELS, IN_HEIGHT, IN_WIDTH)];
}
}
}
}
outputs[inds_pos(oc, i, j, NB_OUTPUTS, OUT_HEIGHT, OUT_WIDTH)] = activation_forward_value<Output_T>(value, oc, ACTIVATION, rescaling);
}
}
}
}
#endif // __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__
\ No newline at end of file
......@@ -4,14 +4,27 @@ from pathlib import Path
import aidge_core
from aidge_core.export_utils import ExportNode, ExportNodeCpp, generate_file
from aidge_export_cpp.utils import ROOT
from aidge_export_cpp.utils.converter import numpy_dtype2ctype
from aidge_export_cpp import ExportLibCpp
##############################################
############## Export functions ##############
##############################################
def numpy_dtype2ctype(dtype):
if dtype == np.int8:
return "int8_t"
elif dtype == np.int16:
return "int16_t"
elif dtype == np.int32:
return "int32_t"
elif dtype == np.int64:
return "int64_t"
elif dtype == np.float32:
return "float"
elif dtype == np.float64:
return "double"
# Add more dtype mappings as needed
else:
raise ValueError(f"Unsupported {dtype} dtype")
def export_params(name: str,
array: np.ndarray,
......@@ -43,7 +56,7 @@ class ProducerCPP(ExportNode):
super().__init__(node, mem_info)
self.values = np.array(self.operator.get_output(0))
if len(self.values.shape) == 4: # Note: export in HWC
if len(self.values.shape) == 4: # Note: export in HWC
self.values = np.transpose(self.values, (0, 2, 3, 1))
def export(self, export_folder: Path):
......@@ -130,6 +143,24 @@ def _setup_conv2D(conv):
str(ROOT / "kernels" / "rescaling.hpp")
]
def _setup_elemwise_op(elemwise, op):
"""Common code (template and kernel setup) shared across all the different elementWise operator (Add, Sub,...)."""
elemwise.attributes["elemwise_op"] = op
elemwise.attributes["activation"] = "Linear"
elemwise.attributes["rescaling"] = "NoScaling"
elemwise.config_template = str(
ROOT / "templates" / "configuration" / "elemwise_config.jinja")
elemwise.forward_template = str(
ROOT / "templates" / "kernel_forward" / "elemwise_forward.jinja")
elemwise.include_list = []
elemwise.kernels_to_copy = [
str(ROOT / "kernels" / "elemwise.hpp"),
str(ROOT / "kernels" / "activation.hpp"),
str(ROOT / "kernels" / "rescaling.hpp")
]
@ExportLibCpp.register("Conv2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
class ConvCPP(ExportNodeCpp):
def __init__(self, node, mem_info):
......@@ -137,7 +168,7 @@ class ConvCPP(ExportNodeCpp):
# No padding with Conv
# Use PaddedConv to add padding attribute
self.attributes["padding"] = [0, 0]
self.attributes["groups"] = 1
_setup_conv2D(self)
@ExportLibCpp.register_metaop("PaddedConv2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
......@@ -156,25 +187,28 @@ class PaddedConvCPP(ExportNodeCpp):
).attr.stride_dims
self.attributes["dilation_dims"] = n.get_operator(
).attr.dilation_dims
self.attributes["groups"] = 1
_setup_conv2D(self)
def _setup_elemwise_op(elemwise, op):
"""Common code (template and kernel setup) shared across all the different elementWise operator (Add, Sub,...)."""
@ExportLibCpp.register_metaop("PaddedConvDepthWise2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
class PaddedConvDepthWiseCPP(ExportNodeCpp):
def __init__(self, node, mem_info):
super().__init__(node, mem_info)
# TODO find a way to retrive attr for meta op
for n in self.operator.get_micro_graph().get_nodes():
if n.type() == "Pad2D":
self.attributes["padding"] = n.get_operator(
).attr.begin_end_borders
if n.type() == "ConvDepthWise2D":
self.attributes["kernel_dims"] = n.get_operator(
).attr.kernel_dims
self.attributes["stride_dims"] = n.get_operator(
).attr.stride_dims
self.attributes["dilation_dims"] = n.get_operator(
).attr.dilation_dims
elemwise.attributes["elemwise_op"] = op
elemwise.attributes["activation"] = "Linear"
elemwise.attributes["rescaling"] = "NoScaling"
elemwise.config_template = str(
ROOT / "templates" / "configuration" / "elemwise_config.jinja")
elemwise.forward_template = str(
ROOT / "templates" / "kernel_forward" / "elemwise_forward.jinja")
elemwise.include_list = []
elemwise.kernels_to_copy = [
str(ROOT / "kernels" / "elemwise.hpp"),
str(ROOT / "kernels" / "activation.hpp"),
str(ROOT / "kernels" / "rescaling.hpp")
]
self.attributes["groups"] = self.attributes["out_chan"][0]
_setup_conv2D(self)
@ExportLibCpp.register("Add", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
class AddCPP(ExportNodeCpp):
......@@ -197,6 +231,14 @@ class MulCPP(ExportNodeCpp):
_setup_elemwise_op(self, "Mul")
@ExportLibCpp.register("Div", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
class MulCPP(ExportNodeCpp):
def __init__(self, node, mem_info):
super().__init__(node, mem_info)
_setup_elemwise_op(self, "Div")
def _setup_pooling(pooling):
"""Common code (template and kernel setup) shared across all the different pooling operator."""
......@@ -211,25 +253,6 @@ def _setup_pooling(pooling):
str(ROOT / "kernels" / "rescaling.hpp")
]
@ExportLibCpp.register("Div", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
class DivCPP(ExportNodeCpp):
def __init__(self, node, mem_info):
super().__init__(node, mem_info)
self.attributes["div_op"] = "Div"
self.attributes["activation"] = "Linear"
self.attributes["rescaling"] = "NoScaling"
self.config_template = str(
ROOT / "templates" / "configuration" / "div_config.jinja")
self.forward_template = str(
ROOT / "templates" / "kernel_forward" / "div_forward.jinja")
self.include_list = []
self.kernels_to_copy = [
str(ROOT / "kernels" / "div.hpp"),
str(ROOT / "kernels" / "activation.hpp"),
str(ROOT / "kernels" / "rescaling.hpp")
]
@ExportLibCpp.register("MaxPooling2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
class MaxPoolCPP(ExportNodeCpp):
def __init__(self, node, mem_info):
......@@ -297,23 +320,20 @@ class FcCPP(ExportNodeCpp):
str(ROOT / "kernels" / "rescaling.hpp")
]
@ExportLibCpp.register("MatMul", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
class MatMulCPP(ExportNodeCpp):
@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)))
class TransposeCPP(ExportNodeCpp):
def __init__(self, node, mem_info):
super().__init__(node, mem_info)
self.attributes["activation"] = "Linear"
self.attributes["rescaling"] = "NoScaling"
self.config_template = str(
ROOT / "templates" / "configuration" / "matmul_config.jinja")
ROOT / "templates" / "configuration" / "transpose_ND_config.jinja")
self.forward_template = str(
ROOT / "templates" / "kernel_forward" / "matmul_forward.jinja")
ROOT / "templates" / "kernel_forward" / "transpose_ND_forward.jinja")
self.include_list = []
self.kernels_to_copy = [
str(ROOT / "kernels" / "matmul.hpp"),
str(ROOT / "kernels" / "activation.hpp"),
str(ROOT / "kernels" / "rescaling.hpp")
str(ROOT / "kernels" / "transpose.hpp")
]
@ExportLibCpp.register("Erf", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
class ErfCPP(ExportNodeCpp):
def __init__(self, node, mem_info):
......@@ -331,26 +351,6 @@ class ErfCPP(ExportNodeCpp):
str(ROOT / "kernels" / "rescaling.hpp")
]
@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
class TransposeCPP(ExportNodeCpp):
def __init__(self, node, mem_info):
super().__init__(node, mem_info)
# Get parameter permutation from transpose
self.attributes["output_dims_order"] = self.operator.attr.get_attr("output_dims_order")
self.attributes["activation"] = "Linear"
self.attributes["rescaling"] = "NoScaling"
self.config_template = str(
ROOT / "templates" / "configuration" / "transpose_config.jinja")
self.forward_template = str(
ROOT / "templates" / "kernel_forward" / "transpose_forward.jinja")
self.include_list = []
self.kernels_to_copy = [
str(ROOT / "kernels" / "transpose.hpp"),
str(ROOT / "kernels" / "activation.hpp"),
str(ROOT / "kernels" / "rescaling.hpp")
]
@ExportLibCpp.register("BatchNorm2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
class BatchNorm2DCPP(ExportNodeCpp):
def __init__(self, node, mem_info):
......@@ -366,16 +366,4 @@ class BatchNorm2DCPP(ExportNodeCpp):
str(ROOT / "kernels" / "batchnorm.hpp"),
str(ROOT / "kernels" / "activation.hpp"),
str(ROOT / "kernels" / "rescaling.hpp")
]
@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)))
class TransposeCPP(ExportNodeCpp):
def __init__(self, node, mem_info):
super().__init__(node, mem_info)
self.config_template = str(
ROOT / "templates" / "configuration" / "transpose_ND_config.jinja")
self.forward_template = str(
ROOT / "templates" / "kernel_forward" / "transpose_ND_forward.jinja")
self.include_list = []
self.kernels_to_copy = [
str(ROOT / "kernels" / "transpose.hpp")
]
\ No newline at end of file
......@@ -23,5 +23,4 @@ static const {{ rescaling }} {{ name|upper }}_RESCALING = {};
#define {{ name|upper }}_WEIGHTS_SIZE {{ weights_size }}
#define {{ name|upper }}_BIASES_SIZE {{ out_chan[0] }}
#endif /* {{ name|upper }}_LAYER_H */
......@@ -19,4 +19,4 @@ convolution_forward<{{ in_name[0]|upper }}_NB_CHANNELS,
({{in_name[0]}}, {{out_name[0]}}, {{in_name[1]}}, {{in_name[2]}}, {{name|upper}}_RESCALING);
{% include "./_save_outputs.jinja" %}
{% endfilter %}
{% endfilter %}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment