Skip to content
Snippets Groups Projects
Commit fd867582 authored by Maxence Naud's avatar Maxence Naud
Browse files

Merge branch 'dev' into 'main'

v0.2.0

See merge request !27
parents 0d5c0581 05400e59
No related branches found
Tags v0.2.0
No related merge requests found
Showing
with 552 additions and 1179 deletions
# Version 0.2.0 (december 6, 2024)
# Version 0.0.1 (January 23, 2024)
Initial release
......@@ -2,6 +2,7 @@ r"""
Aidge Export for CPP standalone projects
"""
from .export_registry import ExportLibCpp
from .operators import *
from collections import defaultdict
......
import re
import os
from pathlib import Path
import shutil
import numpy as np
from typing import List, Union
from jinja2 import Environment, FileSystemLoader
import aidge_core
from aidge_core.export_utils.code_generation import *
from aidge_export_cpp.utils import (ROOT, OPERATORS_REGISTRY, supported_operators)
from aidge_export_cpp.utils.converter import aidge_datatype2ctype, numpy_dtype2ctype
import aidge_export_cpp.operators
from aidge_core.mem_info import compute_default_mem_info
from aidge_export_cpp.utils import ROOT
from aidge_export_cpp.utils.converter import numpy_dtype2ctype
from aidge_export_cpp import ExportLibCpp
from aidge_export_cpp.utils.generation import *
from aidge_export_cpp.memory import *
# from aidge_export_cpp.memory import *
def generate_input_file(export_folder:str,
......@@ -33,86 +32,100 @@ def generate_input_file(export_folder:str,
)
def export(export_folder_name, graphview, scheduler):
export_folder = Path().absolute() / export_folder_name
os.makedirs(str(export_folder), exist_ok=True)
dnn_folder = export_folder / "dnn"
os.makedirs(str(dnn_folder), exist_ok=True)
list_actions = []
list_configs = []
list_forward_nodes = scheduler.get_static_scheduling()
for node in list_forward_nodes:
if node.type() in supported_operators():
op = OPERATORS_REGISTRY[node.type()](node)
# For configuration files
list_configs = op.export(dnn_folder, list_configs)
# For forward file
list_actions = op.forward(list_actions)
# Memory management
mem_size, mem_info = compute_default_mem_info(scheduler)
# Generate the memory file
generate_file(
str(dnn_folder / "memory" / "mem_info.h"),
str(ROOT / "templates" / "memory" / "mem_info.jinja"),
mem_size = mem_size,
mem_info_legends = MEMORY_INFO_TEMPLATE,
mem_info = mem_info
)
list_configs.append("memory/mem_info.h")
# Get entry nodes
# It supposes the entry nodes are producers with constant=false
# Store the datatype & name
list_inputs_name = []
for node in graphview.get_nodes():
if node.type() == "Producer":
if not node.get_operator().attr.constant:
export_type = aidge_datatype2ctype(node.get_operator().get_output(0).dtype())
list_inputs_name.append((export_type, node.name()))
# Get output nodes
# Store the datatype & name, like entry nodes
list_outputs_name = []
for node in graphview.get_nodes():
if len(node.get_children()) == 0:
export_type = aidge_datatype2ctype(node.get_operator().get_output(0).dtype())
list_outputs_name.append((export_type, node.name()))
# Generate forward file
# TODO: for now the mem type is bound for all intermediate results, should change.
# Note that we may have all inputs constants, hence select output type
assert len(list_outputs_name) >= 1, f"TODO: requires some output to determine mem type"
mem_ctype = list_outputs_name[0][0]
generate_file(
str(dnn_folder / "src" / "forward.cpp"),
str(ROOT / "templates" / "network" / "network_forward.jinja"),
headers=list_configs,
actions=list_actions,
inputs= list_inputs_name,
outputs=list_outputs_name,
mem_ctype=mem_ctype,
)
# Generate dnn API
generate_file(
str(dnn_folder / "include" / "dnn.hpp"),
str(ROOT / "templates" / "network" / "dnn_header.jinja"),
libraries=[],
functions=get_functions_from_c_file(str(dnn_folder / "src" / "forward.cpp")),
def export(export_folder_name, graphview, scheduler, mem_wrapping=False):
aidge_core.export_utils.scheduler_export(
scheduler,
export_folder_name,
ExportLibCpp,
memory_manager=compute_default_mem_info
)
# Copy all static files in the export
shutil.copy(str(ROOT / "static" / "main.cpp"), str(export_folder))
shutil.copy(str(ROOT / "static" / "Makefile"), str(export_folder))
shutil.copytree(str(ROOT / "static" / "include"), str(dnn_folder / "include"), dirs_exist_ok=True)
# export_folder = Path().absolute() / export_folder_name
# os.makedirs(str(export_folder), exist_ok=True)
# dnn_folder = export_folder / "dnn"
# os.makedirs(str(dnn_folder), exist_ok=True)
# list_actions = []
# list_configs = []
# peak_mem, mem_info = compute_default_mem_info(scheduler)
# list_forward_nodes = scheduler.get_static_scheduling()
# for node in list_forward_nodes:
# if ExportLibCpp.exportable(node):
# op = ExportLibCpp.get_export_node(node)(node, mem_info[node])
# # For configuration files
# list_configs = op.export(dnn_folder, list_configs)
# # For forward file
# list_actions = op.forward(list_actions)
# else:
# raise RuntimeError(f"Operator not supported: {node.type()} !")
# # Memory management
# # stats_folder = export_folder / "statistics"
# # os.makedirs(str(stats_folder), exist_ok=True)
# # mem_size, mem_info = generate_optimized_memory_info(stats_folder, scheduler, mem_wrapping)
# # peak_mem, mem_info = compute_default_mem_info(scheduler)
# # Generate the memory file
# # generate_file(
# # str(dnn_folder / "memory" / "mem_info.h"),
# # str(ROOT / "templates" / "memory" / "mem_info.jinja"),
# # mem_size = mem_size,
# # mem_info_legends = MEMORY_INFO_TEMPLATE,
# # mem_info = mem_info
# # )
# # list_configs.append("memory/mem_info.h")
# # Get entry nodes
# # Store the datatype & name
# list_inputs_name = []
# for node in graphview.get_input_nodes():
# for idx, node_input_tuple in enumerate(node.inputs()):
# node_input, _ = node_input_tuple
# if node_input is None:
# export_type = aidge2c(node.get_operator().get_output(0).dtype())
# list_inputs_name.append((export_type, f"{node.name()}_input_{idx}"))
# elif node_input not in graphview.get_nodes():
# export_type = aidge2c(node_input.get_operator().get_output(0).dtype())
# list_inputs_name.append((export_type, node_input.name()))
# # Get output nodes
# # Store the datatype & name, like entry nodes
# list_outputs_name = []
# for node in graphview.get_nodes():
# if len(node.get_children()) == 0:
# export_type = aidge2c(node.get_operator().get_output(0).dtype())
# list_outputs_name.append((export_type, f"{node.name()}_output_0"))
# # Generate forward file
# # TODO: for now the mem type is bound for all intermediate results, should change.
# # Note that we may have all inputs constants, hence select output type
# assert len(list_outputs_name) >= 1, f"TODO: requires some output to determine mem type"
# mem_ctype = list_outputs_name[0][0]
# generate_file(
# str(dnn_folder / "src" / "forward.cpp"),
# str(ROOT / "templates" / "network" / "network_forward.jinja"),
# headers=set(list_configs),
# actions=list_actions,
# inputs= list_inputs_name,
# outputs=list_outputs_name,
# mem_ctype=mem_ctype,
# peak_mem=peak_mem
# )
# # Generate dnn API
# generate_file(
# str(dnn_folder / "include" / "dnn.hpp"),
# str(ROOT / "templates" / "network" / "dnn_header.jinja"),
# libraries=[],
# functions=get_functions_from_c_file(str(dnn_folder / "src" / "forward.cpp")),
# )
# # Copy all static files in the export
# shutil.copy(str(ROOT / "static" / "main.cpp"), str(export_folder))
# shutil.copy(str(ROOT / "static" / "Makefile"), str(export_folder))
# shutil.copytree(str(ROOT / "static" / "include"), str(dnn_folder / "include"), dirs_exist_ok=True)
from aidge_core.export_utils import ExportLib
from aidge_export_cpp.utils import ROOT
class ExportLibCpp(ExportLib):
_name="export_cpp"
static_files={
str(ROOT / "static" / "Makefile"): "",
str(ROOT / "static" / "include" / "network" / "typedefs.hpp"): "dnn/include/network",
str(ROOT / "static" / "include" / "network" / "utils.hpp"): "dnn/include/network",
}
......@@ -4,36 +4,36 @@
#include <type_traits>
#include "network/typedefs.hpp"
#include "network/utils.hpp"
#include "network/rescaling.hpp"
#include "kernels/rescaling.hpp"
template<typename Output_T, typename T,
template<typename Output_T, typename T,
typename std::enable_if<std::is_floating_point<T>::value>::type* = nullptr>
__attribute__((always_inline)) inline
Output_T saturate (T value, int32_t /*sat*/)
Output_T saturate (T value, int32_t /*sat*/)
{
return value;
}
template<typename Output_T, typename T,
template<typename Output_T, typename T,
typename std::enable_if<!std::is_floating_point<T>::value>::type* = nullptr>
__attribute__((always_inline)) inline
Output_T saturate (T value, uint32_t sat)
Output_T saturate (T value, uint32_t sat)
{
if (std::is_unsigned<Output_T>::value) {
return clamp(value, T(0), (T(1) << sat) - 1);
} else {
return clamp(value, -(T(1) << (sat - 1)), (T(1) << (sat - 1)) - 1);
}
}
}
template<typename Output_T,
typename Sum_T,
template<typename Output_T,
typename Sum_T,
typename Rescaling_T>
__attribute__((always_inline)) inline
Output_T activation_forward_value (Sum_T weightedSum,
int output,
ActivationFunction_T func,
const Rescaling_T& __restrict rescaling)
__attribute__((always_inline)) inline
Output_T activation_forward_value (Sum_T weightedSum,
int output,
ActivationFunction_T func,
const Rescaling_T& __restrict rescaling)
{
switch(func) {
case Linear:
......@@ -49,7 +49,7 @@ Output_T activation_forward_value (Sum_T weightedSum,
break;
}
// Value fixed here for now but it should be generated by
// Value fixed here for now but it should be generated by
// the export module or determined by the type of Output_T
// For now only works for int8_t and uint8_t
const uint32_t NB_BITS = 8;
......@@ -60,7 +60,7 @@ Output_T activation_forward_value (Sum_T weightedSum,
template<int NB_DATA,
ActivationFunction_T ACTIVATION,
typename Input_T, typename Output_T, typename Rescaling_T>
__attribute__((always_inline)) inline
__attribute__((always_inline)) inline
void activation_forward (
const Input_T* __restrict inputs,
Output_T* __restrict outputs,
......
......@@ -2,17 +2,17 @@
#define __AIDGE_EXPORT_CPP_KERNELS_BATCHNORM__
#include "network/typedefs.hpp"
#include "network/rescaling.hpp"
#include "kernels/rescaling.hpp"
#include <math.h>
// WARNING: this kernel only works for 32-bits floating point values
template<int NB_OUTPUTS,
template<int NB_OUTPUTS,
int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
ActivationFunction_T ACTIVATION,
typename Input_T, typename Output_T,
typename Input_T, typename Output_T,
typename Param_T>
__attribute__((always_inline)) inline
__attribute__((always_inline)) inline
void batchnorm_forward (
const Input_T* __restrict inputs,
Output_T* __restrict outputs,
......
......@@ -2,13 +2,13 @@
#define __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__
#include "network/typedefs.hpp"
#include "network/rescaling.hpp"
#include "kernels/rescaling.hpp"
#include "network/utils.hpp"
#include "kernels/macs.hpp"
#include "kernels/activation.hpp"
template<int NB_CHANNELS,
template<int NB_CHANNELS,
int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
int NB_OUTPUTS,
int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
......@@ -17,10 +17,10 @@ template<int NB_CHANNELS,
int DILATION_Y, int DILATION_X,
int KERNEL_HEIGHT, int KERNEL_WIDTH,
ActivationFunction_T ACTIVATION,
typename Input_T, typename Output_T,
typename Input_T, typename Output_T,
typename Weight_T, typename Bias_T,
typename Rescaling_T>
__attribute__((always_inline)) inline
__attribute__((always_inline)) inline
void convolution_forward(
const Input_T* __restrict inputs,
Output_T* __restrict outputs,
......@@ -28,10 +28,10 @@ void convolution_forward(
const Bias_T* __restrict biases,
const Rescaling_T& __restrict rescaling)
{
constexpr int DILATED_KERNEL_HEIGHT
constexpr int DILATED_KERNEL_HEIGHT
= KERNEL_HEIGHT + (DILATION_Y - 1) * (KERNEL_HEIGHT - 1);
constexpr int DILATED_KERNEL_WIDTH
constexpr int DILATED_KERNEL_WIDTH
= KERNEL_WIDTH + (DILATION_X - 1) * (KERNEL_WIDTH - 1);
constexpr int OUTPUTS_HEIGHT_NOPAD
......@@ -44,7 +44,7 @@ void convolution_forward(
: max(PADDING_Y - (oy * STRIDE_Y), 0);
const int syMax = (PADDING_Y == 0
&& OUTPUTS_HEIGHT == OUTPUTS_HEIGHT_NOPAD) ? DILATED_KERNEL_HEIGHT
: clamp(CHANNELS_HEIGHT + PADDING_Y - (oy * STRIDE_Y),
: clamp(CHANNELS_HEIGHT + PADDING_Y - (oy * STRIDE_Y),
0, DILATED_KERNEL_HEIGHT);
const int iy = (oy * STRIDE_Y) - PADDING_Y;
......@@ -57,7 +57,7 @@ void convolution_forward(
const int sxMax = (PADDING_X == 0
&& OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
? DILATED_KERNEL_WIDTH
: clamp(CHANNELS_WIDTH + PADDING_X - (ox * STRIDE_X),
: clamp(CHANNELS_WIDTH + PADDING_X - (ox * STRIDE_X),
0, DILATED_KERNEL_WIDTH);
const int ix = (ox * STRIDE_X) - PADDING_X;
......@@ -85,8 +85,8 @@ void convolution_forward(
|| sxMax - sxMin == KERNEL_WIDTH))
{
macsOnRange<KERNEL_WIDTH * NB_CHANNELS>(
inputs + iOffset,
weights + wOffset,
inputs + iOffset,
weights + wOffset,
weightedSum);
}
else {
......@@ -100,11 +100,11 @@ void convolution_forward(
int iOffsetInRange = iOffset
+ sx * DILATION_X * NB_CHANNELS;
macsOnRange<NB_CHANNELS>(
// same input line so no wrapping can occur
inputs + iOffsetInRange,
weights + wOffset + sx * NB_CHANNELS,
inputs + iOffsetInRange,
weights + wOffset + sx * NB_CHANNELS,
weightedSum);
}
}
......
......@@ -2,20 +2,20 @@
#define __AIDGE_EXPORT_CPP_KERNELS_FULLYCONNECTED__
#include "network/typedefs.hpp"
#include "network/rescaling.hpp"
#include "kernels/rescaling.hpp"
#include "network/utils.hpp"
#include "kernels/macs.hpp"
#include "kernels/activation.hpp"
template<int NB_CHANNELS,
template<int NB_CHANNELS,
int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
int NB_OUTPUTS,
int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
ActivationFunction_T ACTIVATION,
typename Input_T, typename Output_T,
typename Input_T, typename Output_T,
typename Weight_T, typename Bias_T,
typename Rescaling_T>
__attribute__((always_inline)) inline
__attribute__((always_inline)) inline
void fullyconnected_forward (
const Input_T* __restrict inputs,
Output_T* __restrict outputs,
......@@ -35,7 +35,7 @@ void fullyconnected_forward (
for (int iy = 0; iy < CHANNELS_HEIGHT; ++iy) {
for (int ix = 0; ix < CHANNELS_WIDTH; ++ix) {
for (int ch = 0; ch < NB_CHANNELS; ++ch) {
weightedSum += inputs[CHANNELS_WIDTH*NB_CHANNELS*iy + NB_CHANNELS*ix + ch]
weightedSum += inputs[CHANNELS_WIDTH*NB_CHANNELS*iy + NB_CHANNELS*ix + ch]
* weights[CHANNELS_HEIGHT*CHANNELS_WIDTH*NB_CHANNELS*och + CHANNELS_HEIGHT*CHANNELS_WIDTH*ch + CHANNELS_HEIGHT*iy + ix];
}
}
......@@ -58,8 +58,8 @@ Here the kernel to use with inputs in NHWC and weights in NHWC
* (iy + CHANNELS_HEIGHT * och);
macsOnRange<NB_CHANNELS * CHANNELS_WIDTH>(
inputs + iOffset,
weights + wOffset,
inputs + iOffset,
weights + wOffset,
weightedSum);
}
......@@ -69,4 +69,4 @@ Here the kernel to use with inputs in NHWC and weights in NHWC
}
#endif // __AIDGE_EXPORT_CPP_KERNELS_FULLYCONNECTED__
\ No newline at end of file
#endif // __AIDGE_EXPORT_CPP_KERNELS_FULLYCONNECTED__
import aidge_core
import aidge_backend_cpu
from typing import List
# for each layer, name: [size, offset start]
# Example:
#define ENV_MEM_SIZE 3
#define ENV_OFFSET 0
MEMORY_INFO_TEMPLATE = ["layer_name", "size", "offset"]
# Default memory management, which can be used for development
def compute_default_mem_info(scheduler: aidge_core.Scheduler):
list_forward_nodes = scheduler.get_static_scheduling()
mem_info = []
mem_size = 0
# Exclude Producers and the last layers (because the results are stored outside the export)
for i, node in enumerate(list_forward_nodes):
if node.type() != "Producer":
if len(node.get_children()) != 0:
dims = node.get_operator().get_output(0).dims()
mem = 1
for dim in dims:
mem *= dim
# Add memeory info
mem_info.append([node.name(), mem, mem_size])
# Increment offset for the next layer
mem_size += mem
return mem_size, mem_info
def generate_optimized_memory_info(scheduler: aidge_core.Scheduler,
wrapping:bool = False):
# The forward dims has to done outside the function
# Generate the memory manager
mem_manager = scheduler.generate_memory(inc_producers=False, wrap_around_buffer=wrapping)
mem_size = 0
mem_info = []
return mem_size, mem_info
\ No newline at end of file
This diff is collapsed.
from aidge_core import ExportNode
import numpy as np
from jinja2 import Environment, FileSystemLoader
import os
import shutil
from aidge_export_cpp.register import export_cpp_register
dirpath = os.path.dirname(__file__)
class KERNELS:
ACTIVATION = dirpath + "/kernels/activation.hpp"
BATCHNORM = dirpath + "/kernels/batchnorm.hpp"
CONV = dirpath + "/kernels/convolution.hpp"
ADD = dirpath + "/kernels/elemwise.hpp"
FC = dirpath + "/kernels//fullyconnected.hpp"
POOLING = dirpath + "/kernels/pooling.hpp"
LEAKYRELU = dirpath + "/kernels/leakyrelu.hpp"
class KERNELS_FORWARD:
ACTIVATION = dirpath + "/templates/kernel_forward/activation_forward.jinja"
BATCHNORM = dirpath + "/templates/kernel_forward/batchnorm_forward.jinja"
CONV = dirpath + "/templates/kernel_forward/convolution_forward.jinja"
ADD = dirpath + "/templates/kernel_forward/elemwise_forward.jinja"
FC = dirpath + "/templates/kernel_forward/fullyconnected_forward.jinja"
POOLING = dirpath + "/templates/kernel_forward/pooling_forward.jinja"
LEAKYRELU = dirpath + "/templates/kernel_forward/leakyrelu_forward.jinja"
class CONFIGURATIONS:
ACTIVATION = dirpath + "/templates/configuration/activation_config.jinja"
BATCHNORM = dirpath + "/templates/configuration/batchnorm_config.jinja"
CONV = dirpath + "/templates/configuration/convolution_config.jinja"
ADD = dirpath + "/templates/configuration/elemwise_config.jinja"
FC = dirpath + "/templates/configuration//fullyconnected_config.jinja"
POOLING = dirpath + "/templates/configuration/pooling_config.jinja"
LEAKYRELU = dirpath + "/templates/configuration/leakyrelu_config.jinja"
##############################################
############## Export functions ##############
##############################################
def generate_file(filename, templatename, **kwargs):
# Get directory name of the file
dirname = os.path.dirname(filename)
# If directory doesn't exist, create it
if not os.path.exists(dirname):
os.makedirs(dirname)
# Get directory name and name of the template
template_dir = os.path.dirname(templatename)
template_name = os.path.basename(templatename)
# Select template
template = Environment(loader=FileSystemLoader(template_dir)).get_template(template_name)
# Generate file
content = template.render(kwargs)
with open(filename, mode="w", encoding="utf-8") as message:
message.write(content)
def generate_action(template_path, **kwargs):
dirname = os.path.dirname(template_path)
filename = os.path.basename(template_path)
template = Environment(loader=FileSystemLoader(dirname)).get_template(filename)
return template.render(kwargs)
def copyfile(filename, dst_folder):
# If directory doesn't exist, create it
if not os.path.exists(dst_folder):
os.makedirs(dst_folder)
shutil.copy(filename, dst_folder)
def export_to_static(name, array, filepath):
# Get directory name of the file
dirname = os.path.dirname(filepath)
# If directory doesn't exist, create it
if not os.path.exists(dirname):
os.makedirs(dirname)
generate_file(
filepath,
dirpath + "/templates/data/data_static.jinja",
dims = array.shape,
data_t = "float",
name = name,
values = array.tolist()
)
##############################################
################### Utils ####################
##############################################
def get_node_parents(node):
parents = []
for parent in node.get_parents():
if parent.type() != "Producer":
parents.append(parent)
return parents
def get_producer_parents(node):
parents = []
for parent in node.get_parents():
if parent.type() == "Producer":
parents.append(parent)
return parents
##############################################
################### Actions ##################
##############################################
def set_up_output(name, datatype):
return f"{datatype}* {name} = ({datatype}*) mem + {name.upper()}_OFFSET;"
##############################################
############## Operators helper ##############
##############################################
@export_cpp_register("Conv")
class ConvCPP(ExportNode):
def __init__(self, node):
super().__init__(node)
self.kernel = node.get_operator().get_attr("KernelDims")
self.stride = node.get_operator().get_attr("StrideDims")
# Not working anymore because Padding is a standalone operator
# self.padding = node.get_operator().get_attr("PaddingDims")
self.padding = [1, 1]
self.dilation = node.get_operator().get_attr("DilationDims")
self.nb_channels = node.get_operator().get_attr("InChannels")
self.nb_outputs = node.get_operator().get_attr("OutChannels")
def export(self, export_folder:str, list_configs:list):
copyfile(KERNELS.CONV, f"{export_folder}/include/kernels/")
copyfile(dirpath + "/kernels/macs.hpp", f"{export_folder}/include/kernels/")
copyfile(dirpath + "/kernels/activation.hpp", f"{export_folder}/include/kernels/")
list_configs.append("kernels/convolution.hpp")
list_configs.append(f"layers/{self.name}.h")
generate_file(
f"{export_folder}/layers/{self.name}.h",
CONFIGURATIONS.CONV,
name=self.name,
input_dims=self.inputs_dims[0][1:],
output_dims=self.outputs_dims[0][1:],
kernel=self.kernel,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
activation="Linear",
rescaling="NoScaling")
return list_configs
def forward(self, list_actions:list):
if not self.is_last:
list_actions.append(set_up_output(self.name, "float"))
list_actions.append(generate_action(
KERNELS_FORWARD.CONV,
name=self.name,
input_name=self.inputs[0].name(),
output_name=self.name,
weights_name=self.inputs[1].name(),
biases_name=self.inputs[2].name()
))
return list_actions
class BatchNormCPP:
def __init__(self, node):
self.name = node.name()
self.epsilon = node.get_operator().get_attr("Epsilon")
self.producers = get_producer_parents(node)
self.scales = np.array(self.producers[0].get_operator().get_output(0)).reshape(-1).tolist()
self.biases = np.array(self.producers[1].get_operator().get_output(0)).reshape(-1).tolist()
self.means = np.array(self.producers[2].get_operator().get_output(0)).reshape(-1).tolist()
self.vars = np.array(self.producers[3].get_operator().get_output(0)).reshape(-1).tolist()
parents = get_node_parents(node)
if len(parents) == 0:
self.input_name = "in"
else :
self.input_name = parents[0].name()
def export(self, export_folder:str, list_configs:list):
list_configs.append(f"layers/{self.name}.h")
generate_file(
f"{export_folder}/layers/{self.name}.h",
"tensorrt/templates/configuration/batchnorm_config.jinja",
name=self.name,
input_dims=[0, 0, 0],
output_dims=[0, 0, 0],
activation="Linear",
epsilon=self.epsilon)
# export the batchnorm parameters
return list_configs
def forward(self, list_actions:list):
list_actions.append(set_up_output(self.name, "float"))
list_actions.append(generate_action(
"cpp/templates/kernel_forward/batchnorm_forward.jinja",
name=self.name,
input_name=self.input_name,
output_name=self.name,
biases_name=self.producers[0].name(),
variances_name=self.producers[1].name(),
means_name=self.producers[2].name(),
scales_name=self.producers[3].name()
))
return list_actions
@export_cpp_register("ReLU")
class ReLUCPP(ExportNode):
def __init__(self, node):
super().__init__(node)
self.nb_data = 1
for i in self.inputs_dims[0]:
self.nb_data *= i
def export(self, export_folder:str, list_configs:list):
copyfile(KERNELS.ACTIVATION, f"{export_folder}/include/kernels/")
list_configs.append("kernels/activation.hpp")
list_configs.append(f"layers/{self.name}.h")
generate_file(
f"{export_folder}/layers/{self.name}.h",
CONFIGURATIONS.ACTIVATION,
name=self.name,
nb_data=self.nb_data,
activation="Rectifier",
rescaling="NoScaling")
return list_configs
def forward(self, list_actions:list):
if not self.is_last:
list_actions.append(set_up_output(self.name, "float"))
list_actions.append(generate_action(
KERNELS_FORWARD.ACTIVATION,
name=self.name,
input_name=self.inputs[0].name(),
output_name=self.name
))
return list_actions
@export_cpp_register("LeakyReLU")
class LeakyReLUCPP(ExportNode):
def __init__(self, node):
super().__init__(node)
self.alpha = node.get_operator().get_attr("NegativeSlope")
self.nb_data = 1
for i in self.inputs_dims[0]:
self.nb_data *= i
def export(self, export_folder:str, list_configs:list):
copyfile(KERNELS.LEAKYRELU, f"{export_folder}/include/kernels/")
list_configs.append("kernels/activation.hpp")
list_configs.append(f"layers/{self.name}.h")
generate_file(
f"{export_folder}/layers/{self.name}.h",
CONFIGURATIONS.LEAKYRELU,
name=self.name,
nb_data=self.nb_data,
alpha = self.alpha)
return list_configs
def forward(self, list_actions:list):
if not self.is_last:
list_actions.append(set_up_output(self.name, "float"))
list_actions.append(generate_action(
KERNELS_FORWARD.LEAKYRELU,
name=self.name,
input_name=self.inputs[0].name(),
output_name=self.name
))
return list_actions
class AddCPP:
def __init__(self, node):
self.name = node.name()
self.parents = get_node_parents(node)
def export(self, export_folder:str, list_configs:list):
list_configs.append(f"layers/{self.name}.h")
generate_file(
f"{export_folder}/layers/{self.name}.h",
CONFIGURATIONS.ADD,
name=self.name,
input_dims=[0, 0, 0],
output_dims=[0, 0, 0],
activation="Linear",
elemwise_op="Sum")
return list_configs
def forward(self, list_actions:list):
list_actions.append(set_up_output(self.name, "float"))
list_actions.append(generate_action(
"cpp/templates/kernel_forward/elemwise_forward.jinja",
name=self.name,
input1_name=self.parents[0].name(),
input2_name=self.parents[1].name(),
output_name=self.name
))
return list_actions
@export_cpp_register("MaxPooling")
class MaxPoolCPP(ExportNode):
def __init__(self, node):
super().__init__(node)
self.kernel = node.get_operator().get_attr("KernelDims")
self.stride = node.get_operator().get_attr("StrideDims")
# Not supported by the core...
# self.padding = node.get_operator().get_attr("PaddingDims")
self.padding = [0, 0]
def export(self, export_folder:str, list_configs:list):
copyfile(KERNELS.POOLING, f"{export_folder}/include/kernels/")
list_configs.append("kernels/pooling.hpp")
list_configs.append(f"layers/{self.name}.h")
generate_file(
f"{export_folder}/layers/{self.name}.h",
CONFIGURATIONS.POOLING,
name=self.name,
input_dims=self.inputs_dims[0],
output_dims=self.outputs_dims[0],
kernel=self.kernel,
stride=self.stride,
padding=self.padding,
pool_type="Max",
activation="Linear")
return list_configs
def forward(self, list_actions:list):
if not self.is_last:
list_actions.append(set_up_output(self.name, "float"))
list_actions.append(generate_action(
KERNELS_FORWARD.POOLING,
name=self.name,
input_name=self.inputs[0].name(),
output_name=self.name
))
return list_actions
class GlobalAvgPoolCPP:
def __init__(self, node):
# node.get_operator().set_compute_output_dims(lambda x: [[x[0][0], x[0][1], 1, 1]])
pass
def export(self, export_folder:str, list_configs:list):
return list_configs
def forward(self, list_actions:list):
list_actions.append(set_up_output(self.name, "float"))
list_actions.append(generate_action(
"cpp/templates/kernel_forward/pooling_forward.jinja",
name=self.name,
input_name=self.input_name,
output_name=self.name
))
return list_actions
@export_cpp_register("FC")
class FcCPP(ExportNode):
def __init__(self, node):
super().__init__(node)
if len(self.inputs_dims[0]) == 2:
self.inputs_dims[0] = [self.inputs_dims[0][1], 1, 1]
elif len(self.inputs_dims[0]) == 4:
self.inputs_dims[0] = self.inputs_dims[0][1:]
if len(self.outputs_dims[0]) == 2:
self.outputs_dims[0] = [self.outputs_dims[0][1], 1, 1]
def export(self, export_folder:str, list_configs:list):
copyfile(KERNELS.FC, f"{export_folder}/include/kernels/")
copyfile(dirpath + "/kernels/macs.hpp", f"{export_folder}/include/kernels/")
copyfile(dirpath + "/kernels/activation.hpp", f"{export_folder}/include/kernels/")
list_configs.append("kernels/fullyconnected.hpp")
list_configs.append(f"layers/{self.name}.h")
generate_file(
f"{export_folder}/layers/{self.name}.h",
CONFIGURATIONS.FC,
name=self.name,
input_dims=self.inputs_dims[0],
output_dims=self.outputs_dims[0],
activation="Linear",
rescaling="NoScaling")
return list_configs
def forward(self, list_actions:list):
if not self.is_last:
list_actions.append(set_up_output(self.name, "float"))
list_actions.append(generate_action(
KERNELS_FORWARD.FC,
name=self.name,
input_name=self.inputs[0].name() if self.inputs[0] else self.name + "_input",
output_name=self.name,
weights_name=self.inputs[1].name(),
biases_name=self.inputs[2].name()
))
return list_actions
@export_cpp_register("Producer")
class ProducerCPP(ExportNode):
def __init__(self, node):
super().__init__(node)
self.values = np.array(self.operator.get_output(0))
def export(self, export_folder:str, list_configs:list):
list_configs.append(f"parameters/{self.name}.h")
export_to_static(self.name,
self.values.reshape(-1),
f"{export_folder}/parameters/{self.name}.h")
return list_configs
def forward(self, list_actions:list):
return list_actions
#ifndef __AIDGE_EXPORT_CPP_NETWORK_UTILS__
#define __AIDGE_EXPORT_CPP_NETWORK_UTILS__
#ifdef SAVE_OUTPUTS
#include <sys/types.h>
#include <sys/stat.h>
#include <cstdio> // fprintf
#include <type_traits> // std::is_floating_point
#endif
/**
* @brief Integer clamping
* @param[in] v Value to be clamped
* @param[in] lo Saturating lower bound
* @param[in] hi Saturating higher bound
* @returns Value clamped between lo and hi
*
*
*/
__attribute__((always_inline)) static inline
int clamp (int v, int lo, int hi)
int clamp (int v, int lo, int hi)
{
if(v < lo) {
return lo;
......@@ -27,7 +34,7 @@ int clamp (int v, int lo, int hi)
* @brief Maximum of two integer values
*/
__attribute__((always_inline)) static inline
int max (int lhs, int rhs)
int max (int lhs, int rhs)
{
return (lhs >= rhs) ? lhs : rhs;
}
......@@ -36,9 +43,107 @@ int max (int lhs, int rhs)
* @brief Minimum of two integer values
*/
__attribute__((always_inline)) static inline
int min (int lhs, int rhs)
int min (int lhs, int rhs)
{
return (lhs <= rhs) ? lhs : rhs;
}
#endif // __AIDGE_EXPORT_CPP_NETWORK_UTILS__
#ifdef SAVE_OUTPUTS
enum class Format {
Default,
NCHW,
NHWC,
CHWN,
NCDHW,
NDHWC,
CDHWN
};
template<typename Output_T>
inline void saveOutputs(
int NB_OUTPUTS,
int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
int OUTPUT_MEM_CONT_OFFSET,
int OUTPUT_MEM_CONT_SIZE,
int OUTPUT_MEM_WRAP_OFFSET,
int OUTPUT_MEM_WRAP_SIZE,
int OUTPUT_MEM_STRIDE,
const Output_T* __restrict outputs,
FILE* pFile,
Format format)
{
// default is NHCW !
if (format == Format::NHWC) {
fprintf(pFile, "(");
for(int oy = 0; oy < OUTPUTS_HEIGHT; oy++) {
fprintf(pFile, "(");
for(int ox = 0; ox < OUTPUTS_WIDTH; ox++) {
fprintf(pFile, "(");
const int oPos = (ox + OUTPUTS_WIDTH * oy);
int oOffset = OUTPUT_MEM_STRIDE * oPos;
if (OUTPUT_MEM_WRAP_SIZE > 0
&& oOffset >= OUTPUT_MEM_CONT_SIZE)
{
oOffset += OUTPUT_MEM_WRAP_OFFSET - OUTPUT_MEM_CONT_OFFSET
- OUTPUT_MEM_CONT_SIZE;
}
for (int output = 0; output < NB_OUTPUTS; output++) {
if (std::is_floating_point<Output_T>::value)
fprintf(pFile, "%f", static_cast<float>(outputs[oOffset + output]));
else
fprintf(pFile, "%d", static_cast<int>(outputs[oOffset + output]));
fprintf(pFile, ", ");
}
fprintf(pFile, "), \n");
}
fprintf(pFile, "), \n");
}
fprintf(pFile, ")\n");
}
else if (format == Format::NCHW || format == Format::Default) {
for(int output = 0; output < NB_OUTPUTS; output++) {
fprintf(pFile, "%d:\n", output);
for(int oy = 0; oy < OUTPUTS_HEIGHT; oy++) {
for(int ox = 0; ox < OUTPUTS_WIDTH; ox++) {
const int oPos = (ox + OUTPUTS_WIDTH * oy);
int oOffset = OUTPUT_MEM_STRIDE * oPos;
if (OUTPUT_MEM_WRAP_SIZE > 0
&& oOffset >= OUTPUT_MEM_CONT_SIZE)
{
oOffset += OUTPUT_MEM_WRAP_OFFSET
- OUTPUT_MEM_CONT_OFFSET - OUTPUT_MEM_CONT_SIZE;
}
if (std::is_floating_point<Output_T>::value)
fprintf(pFile, "%f", static_cast<float>(outputs[oOffset + output]));
else
fprintf(pFile, "%d", static_cast<int>(outputs[oOffset + output]));
fprintf(pFile, " ");
}
fprintf(pFile, "\n");
}
fprintf(pFile, "\n");
}
fprintf(pFile, "\n");
}
else {
printf("Warning unsupported dataformat.\n");
}
}
#endif // SAVE_OUTPUTS
#endif // __AIDGE_EXPORT_CPP_NETWORK_UTILS__
#include <iostream>
#include "dnn.hpp"
#include "inputs.h"
int main()
{
// Example for MNIST dataset
// Feel free to change this file for your own projects
const unsigned int nb_classes = 10;
float results[nb_classes];
model_forward(inputs, results);
for (unsigned int i = 0; i < nb_classes; ++i)
{
std::cout << i << ": " << results[i] << std::endl;
}
return 0;
}
\ No newline at end of file
{# NOTE: Suppose input is first #}
// INPUT CONF
{% for inidx in range(nb_in) -%}
#define {{ in_name[inidx]|upper }}_NB_CHANNELS {{ in_chan[inidx] }}
#define {{ in_name[inidx]|upper }}_IN_HEIGHT {{ in_height[inidx] }}
#define {{ in_name[inidx]|upper }}_IN_WIDTH {{ in_width[inidx] }}
{% endfor %}
// OUTPUT CONF
{% for outidx in range(nb_out) -%}
#define {{ out_name[outidx]|upper }}_NB_OUTPUTS {{ out_chan[outidx] }}
#define {{ out_name[outidx]|upper }}_OUT_HEIGHT {{ out_height[outidx] }}
#define {{ out_name[outidx]|upper }}_OUT_WIDTH {{ out_width[outidx] }}
{% endfor %}
// MEMINFO CONF
{% for outidx in range(nb_out) -%}
#define {{ out_name[outidx]|upper }}_SIZE {{ mem_info_size[outidx]}}
#define {{ out_name[outidx]|upper }}_OFFSET {{ mem_info_offset[outidx]}}
#define {{ out_name[outidx]|upper }}_STRIDE {{ mem_info_stride[outidx]}}
#define {{ out_name[outidx]|upper }}_LENGTH {{ mem_info_length[outidx]}}
#define {{ out_name[outidx]|upper }}_CONT_SIZE {{ mem_info_cont_size[outidx]}}
#define {{ out_name[outidx]|upper }}_CONT_OFFSET {{ mem_info_cont_offset[outidx]}}
#define {{ out_name[outidx]|upper }}_WRAP_OFFSET {{ mem_info_wrap_offset[outidx]}}
#define {{ out_name[outidx]|upper }}_WRAP_SIZE {{ mem_info_wrap_size[outidx]}}
{% endfor %}
{#- For name header -#}
#ifndef {{ name|upper }}_LAYER_H
#define {{ name|upper }}_LAYER_H
#include "kernels/rescaling.hpp"
{# For layer configuration -#}
{%- set nb_data = in_chan[0] * in_height[0] * in_width[0] %}
#define {{ name|upper }}_NB_DATA {{ nb_data }}
#define {{ name|upper }}_ACTIVATION {{ activation }}
{% include "./_def_io.jinja" %}
{% include "./_meminfo.jinja" %}
static const {{ rescaling }} {{ name|upper }}_RESCALING = {};
#endif /* {{ name|upper }}_LAYER_H */
......@@ -3,12 +3,8 @@
#define {{ name|upper }}_LAYER_H
{# For layer configuration -#}
#define {{ name|upper }}_NB_CHANNELS {{ input_dims[0] }}
#define {{ name|upper }}_CHANNELS_HEIGHT {{ input_dims[1] }}
#define {{ name|upper }}_CHANNELS_WIDTH {{ input_dims[2] }}
#define {{ name|upper }}_NB_OUTPUTS {{ output_dims[0] }}
#define {{ name|upper }}_OUTPUTS_HEIGHT {{ output_dims[1] }}
#define {{ name|upper }}_OUTPUTS_WIDTH {{ output_dims[2] }}
{% include "./_def_io.jinja" %}
{% include "./_meminfo.jinja" %}
#define {{ name|upper }}_ACTIVATION {{ activation }}
#define {{ name|upper }}_EPSILON {{ epsilon }}
......
{#- For name header -#}
#ifndef {{ name|upper }}_LAYER_H
#define {{ name|upper }}_LAYER_H
#include "kernels/rescaling.hpp"
{# For layer configuration -#}
#define {{ name|upper }}_NB_CHANNELS {{ input_dims[0] }}
#define {{ name|upper }}_CHANNELS_HEIGHT {{ input_dims[1] }}
#define {{ name|upper }}_CHANNELS_WIDTH {{ input_dims[2] }}
#define {{ name|upper }}_NB_OUTPUTS {{ output_dims[0] }}
#define {{ name|upper }}_OUTPUTS_HEIGHT {{ output_dims[1] }}
#define {{ name|upper }}_OUTPUTS_WIDTH {{ output_dims[2] }}
{% include "./_def_io.jinja" %}
{% include "./_meminfo.jinja" %}
#define {{ name|upper }}_PADDING_Y {{ padding[1] }}
#define {{ name|upper }}_PADDING_X {{ padding[0] }}
#define {{ name|upper }}_STRIDE_Y {{ stride[1] }}
#define {{ name|upper }}_STRIDE_X {{ stride[0] }}
#define {{ name|upper }}_DILATION_Y {{ dilation[1] }}
#define {{ name|upper }}_DILATION_X {{ dilation[0] }}
#define {{ name|upper }}_KERNEL_HEIGHT {{ kernel[1] }}
#define {{ name|upper }}_KERNEL_WIDTH {{ kernel[0] }}
#define {{ name|upper }}_STRIDE_Y {{ stride_dims[1] }}
#define {{ name|upper }}_STRIDE_X {{ stride_dims[0] }}
#define {{ name|upper }}_DILATION_Y {{ dilation_dims[1] }}
#define {{ name|upper }}_DILATION_X {{ dilation_dims[0] }}
#define {{ name|upper }}_KERNEL_HEIGHT {{ kernel_dims[1] }}
#define {{ name|upper }}_KERNEL_WIDTH {{ kernel_dims[0] }}
#define {{ name|upper }}_ACTIVATION {{ activation }}
static const {{ rescaling }} {{ name|upper }}_RESCALING = {};
{#- Calculate sizes #}
{%- set weights_size = output_dims[0] * input_dims[0] * kernel[1] * kernel[0] %}
{%- set weights_size = out_chan[0] * in_chan[0] * kernel_dims[1] * kernel_dims[0] %}
#define {{ name|upper }}_WEIGHTS_SIZE {{ weights_size }}
#define {{ name|upper }}_BIASES_SIZE {{ output_dims[0] }}
#define {{ name|upper }}_BIASES_SIZE {{ out_chan[0] }}
#endif /* {{ name|upper }}_LAYER_H */
{#- For name header -#}
#ifndef {{ name|upper }}_LAYER_H
#define {{ name|upper }}_LAYER_H
#include "kernels/rescaling.hpp"
{% include "./_def_io.jinja" %}
{% include "./_meminfo.jinja" %}
{# For layer configuration -#}
#define {{ name|upper }}_NB_ELTS {{ nb_elts }}
#define {{ name|upper }}_NB_ELTS {{ in_dims[0]|join('*') }}
#define {{ name|upper }}_ACTIVATION {{ activation }}
#define {{ name|upper }}_ELEM_OP {{ elemwise_op }}
static const {{ rescaling }} {{ name|upper }}_RESCALING = {};
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment