AdaptToBackend CostantFolding cannot generate scheduling
What commit version of aidge do you use
-
aidge_core
: i'm using a version of aidge_core not merged yet which fix some sigsev !289 (comment 2943375)
Problem description
In the context of using cmsis_nn to export arm_cortex_m. I need to register the operators I need for export in accordance with the cmsis_nn specs, in particular the dataformat. Knowing that I'm using a meta operator because cmsis_nn does PaddedConvScalingRelu on the same kernel.
@ExportLibCMSISNN.register_metaop("PaddedConvScalingRelu", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any, aidge_core.dformat.nhwc)))
class Cmsis_Nn_Conv(ExportNodeCpp):
def __init__(self, node, mem_info):
super().__init__(node, mem_info)
self.attributes["activation"] = "Linear"
self.attributes["next_padding"] = [0, 0]
self.attributes["input_batches"] = 1
...
The dformat of cmsis_nn is aidge_core.dformat.nhwc as specified in the lib function call:
/**
* @brief Basic s8 convolution function
* @param[in, out] ctx Function context that contains the additional buffer if required by the function.
* arm_convolve_s8_get_buffer_size will return the buffer_size if required.
* The caller is expected to clear the buffer, if applicable, for security reasons.
* @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
* Range of conv_params->input_offset : [-127, 128]
* Range of conv_params->output_offset : [-128, 127]
* @param[in] quant_params Per-channel quantization info.
* It contains the multiplier and shift values to be applied to each output channel
* @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
* @param[in] input_data Input (activation) data pointer. Data type: int8
* @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
* spatial filter dimensions
* @param[in] filter_data Filter data pointer. Data type: int8
* @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
* @param[in] bias_data Optional bias data pointer. Data type: int32
* @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
* @param[out] output_data Output data pointer. Data type: int8
* @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
*
* @details
* 1. Supported framework: TensorFlow Lite micro
* 2. Additional memory is required for optimization. Refer to argument 'ctx' for details.
*
*/
arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx,
...
To adapt the graph to the requested specs I first use the adapt_to_backend() function. Then the constant_folding() function, which is supposed to remove transpositions.
For simple use case, a simple model :
%%{init: {'flowchart': { 'curve': 'monotoneY'}, 'fontFamily': 'Verdana' } }%%
flowchart TB
Conv2D_0("conv<br/><sub><em>(Conv2D#0)</em></sub>"):::rootCls
Producer_0("conv_w<br/><sub><em>(Producer#0)</em></sub>"):::producerCls
Producer_1("conv_b<br/><sub><em>(Producer#1)</em></sub>"):::producerCls
Quantizer_0("scale<br/><sub><em>(Quantizer#0)</em></sub>"):::metaCls
Conv2D_0-->|"0<br/>↓<br/>0"|Quantizer_0
Producer_0-->|"0 [2, 2, 1, 1] Float32<br/>↓<br/>1"|Conv2D_0
Producer_1-->|"0 [2] Float32<br/>↓<br/>2"|Conv2D_0
input0((in#0)):::inputCls--->|"↓<br/>0"|Conv2D_0
input1((in#1)):::inputCls--->|"↓<br/>1"|Quantizer_0
input2((in#2)):::inputCls--->|"↓<br/>2"|Quantizer_0
Quantizer_0--->|"0<br/>↓"|output0((out#0)):::outputCls
classDef inputCls fill:#afa
classDef outputCls fill:#ffa
classDef externalCls fill:#ccc
classDef producerCls fill:#ccf
classDef genericCls fill:#f9f9ff,stroke-width:1px,stroke-dasharray: 5 5
classDef metaCls stroke-width:5px
classDef rootCls stroke:#f00
classDef producerCls_rootCls stroke:#f00,fill:#ccf
classDef genericCls_rootCls stroke:#f00,fill:#f9f9ff,stroke-width:1px,stroke-dasharray: 5 5
classDef metaCls_rootCls stroke:#f00,stroke-width:5px
In the context of cmsis_nn I fuse the Quantizer with the Conv aidge_core.fuse_to_metaops(model, "Conv2D->Quantizer", "PaddedConvScalingRelu")
:
%%{init: {'flowchart': { 'curve': 'monotoneY'}, 'fontFamily': 'Verdana' } }%%
flowchart TB
Producer_0("conv_w<br/><sub><em>(Producer#0)</em></sub>"):::producerCls_rootCls
Producer_1("conv_b<br/><sub><em>(Producer#1)</em></sub>"):::producerCls
PaddedConvScalingRelu_0(<em>PaddedConvScalingRelu#0</em>):::metaCls
Producer_0-->|"0 [2, 2, 1, 1] Float32<br/>↓<br/>1"|PaddedConvScalingRelu_0
Producer_1-->|"0 [2] Float32<br/>↓<br/>2"|PaddedConvScalingRelu_0
input0((in#0)):::inputCls--->|" [1, 2, 5, 5] Float32<br/>↓<br/>0"|PaddedConvScalingRelu_0
input1((in#1)):::inputCls--->|"↓<br/>3"|PaddedConvScalingRelu_0
input2((in#2)):::inputCls--->|"↓<br/>4"|PaddedConvScalingRelu_0
PaddedConvScalingRelu_0--->|"0 [1, 2, 5, 5] Float32<br/>↓"|output0((out#0)):::outputCls
classDef inputCls fill:#afa
classDef outputCls fill:#ffa
classDef externalCls fill:#ccc
classDef producerCls fill:#ccf
classDef genericCls fill:#f9f9ff,stroke-width:1px,stroke-dasharray: 5 5
classDef metaCls stroke-width:5px
classDef rootCls stroke:#f00
classDef producerCls_rootCls stroke:#f00,fill:#ccf
classDef genericCls_rootCls stroke:#f00,fill:#f9f9ff,stroke-width:1px,stroke-dasharray: 5 5
classDef metaCls_rootCls stroke:#f00,stroke-width:5px
Then setting backend cmsis_nn with adapt_to_backend() :
%%{init: {'flowchart': { 'curve': 'monotoneY'}, 'fontFamily': 'Verdana' } }%%
flowchart TB
Adapted_Producer_0(<em>Adapted_Producer#0</em>):::metaCls_rootCls
Adapted_Producer_1(<em>Adapted_Producer#1</em>):::metaCls
Adapted_PaddedConvScalingRelu_0(<em>Adapted_PaddedConvScalingRelu#0</em>):::metaCls
Adapted_Producer_0-->|"0 [2, 1, 1, 2] Int8<br/>↓<br/>1"|Adapted_PaddedConvScalingRelu_0
Adapted_Producer_1-->|"0 [2] Int32<br/>↓<br/>2"|Adapted_PaddedConvScalingRelu_0
input0((in#0)):::inputCls--->|"↓<br/>0"|Adapted_PaddedConvScalingRelu_0
input1((in#1)):::inputCls--->|"↓<br/>3"|Adapted_PaddedConvScalingRelu_0
input2((in#2)):::inputCls--->|"↓<br/>4"|Adapted_PaddedConvScalingRelu_0
Adapted_PaddedConvScalingRelu_0--->|"0<br/>↓"|output0((out#0)):::outputCls
classDef inputCls fill:#afa
classDef outputCls fill:#ffa
classDef externalCls fill:#ccc
classDef producerCls fill:#ccf
classDef genericCls fill:#f9f9ff,stroke-width:1px,stroke-dasharray: 5 5
classDef metaCls stroke-width:5px
classDef rootCls stroke:#f00
classDef producerCls_rootCls stroke:#f00,fill:#ccf
classDef genericCls_rootCls stroke:#f00,fill:#f9f9ff,stroke-width:1px,stroke-dasharray: 5 5
classDef metaCls_rootCls stroke:#f00,stroke-width:5px
Here we're faced with the first ‘issue’, adapt_to_backend() transforms my operators and my meta operators into a new meta operator adapted_‘opname’. But these metaOPs are not registered on my cmsis_nn backend. Solution -> expand_meta_op, which gives us :
%%{init: {'flowchart': { 'curve': 'monotoneY'}, 'fontFamily': 'Verdana' } }%%
flowchart TB
Producer_0(<em>Producer#0</em>):::producerCls_rootCls
Producer_1(<em>Producer#1</em>):::producerCls
PaddedConvScalingRelu_0(<em>PaddedConvScalingRelu#0</em>):::metaCls
Transpose_0(<em>Transpose#0</em>)
Producer_0-->|"0 [2, 1, 1, 2] Int8<br/>↓<br/>1"|PaddedConvScalingRelu_0
Producer_1-->|"0 [2] Int32<br/>↓<br/>2"|PaddedConvScalingRelu_0
Transpose_0-->|"0<br/>↓<br/>0"|PaddedConvScalingRelu_0
input0((in#0)):::inputCls--->|"↓<br/>0"|Transpose_0
input1((in#1)):::inputCls--->|"↓<br/>3"|PaddedConvScalingRelu_0
input2((in#2)):::inputCls--->|"↓<br/>4"|PaddedConvScalingRelu_0
PaddedConvScalingRelu_0--->|"0<br/>↓"|output0((out#0)):::outputCls
classDef inputCls fill:#afa
classDef outputCls fill:#ffa
classDef externalCls fill:#ccc
classDef producerCls fill:#ccf
classDef genericCls fill:#f9f9ff,stroke-width:1px,stroke-dasharray: 5 5
classDef metaCls stroke-width:5px
classDef rootCls stroke:#f00
classDef producerCls_rootCls stroke:#f00,fill:#ccf
classDef genericCls_rootCls stroke:#f00,fill:#f9f9ff,stroke-width:1px,stroke-dasharray: 5 5
classDef metaCls_rootCls stroke:#f00,stroke-width:5px
Then I use constant_folding(), I thought its function was to remove transposes in the graph by transposing the weights, but in our case nothing happens. Is this because I'm using a metaOP and not the Conv2D OP?
Doing an scheduler.generate_scheduling()
after all of that raise this error, knowing that without adapt_to_backend, expand_metaops, constant_folding scheduler.generate_scheduling()
is working :
Assertion failed: exists(key) in /local2/is148265/wb274724/STM32_dev/dev/aidge/env_aidge/lib/libAidge/include/aidge/utils/Registrar.hpp:79
missing or invalid registrar key: [Any, NHWC, [], Int8, NHWC, [(2, 2), (1, 1), (1, 1), (2, 2)], Int32, NHWC, [(2, 2)]], [Float32, Default, []] for registrable object N5Aidge16OperatorImpl_cpuINS_7Conv_OpILh2EEEFvRKSt5arrayImLm2EES6_S6_RKS3_ImLm4EEmPKvSB_SB_PvEFvvEEE
Did you include/import the corresponding module?
If so, it is possible that the object is not yet supported.
Traceback (most recent call last):
File "/local2/is148265/wb274724/STM32_dev/dev/superpoint/aidge/aidge/aidge_export_arm_cortexm/uni_tests/test_cmsis_nn_conv.py", line 114, in <module>
scheduler.generate_scheduling()
RuntimeError: missing or invalid registrar key: [Any, NHWC, [], Int8, NHWC, [(2, 2), (1, 1), (1, 1), (2, 2)], Int32, NHWC, [(2, 2)]], [Float32, Default, []] for registrable object N5Aidge16OperatorImpl_cpuINS_7Conv_OpILh2EEEFvRKSt5arrayImLm2EES6_S6_RKS3_ImLm4EEmPKvSB_SB_PvEFvvEEE
Did you include/import the corresponding module?
If so, it is possible that the object is not yet supported.
Finally, an open question: how do we go about exporting models that need specific metaOPs, as in the case of cmsis_nn with PaddedConvScalingRelu
? Is it up to the user to do this or us during the export?
Reproducible example code
import aidge_core
import aidge_backend_cpu
import aidge_export_arm_cortexm
import aidge_export_cpp
import aidge_onnx
import aidge_quantization
import numpy as np
np.random.seed(123)
input_dims = [1,2,5,5]
#Creation du model
model = aidge_core.sequential([
aidge_core.Conv2D(in_channels=2, out_channels = 2, kernel_dims = [1,1], name = 'conv', stride_dims= [1, 1], dilation_dims = [1, 1], no_bias= False),
aidge_quantization.Quantizer(scalingFactor = 0.005, clip_min = -128.0, clip_max = 127.0, name="scale")
])
model.save("init")
#Init des poids
for n in model.get_nodes() :
print( "Node : " + str(n))
if n.name()=="conv_b" or n.name()=="conv_w":
#### pas pour scaling factor !!!!
dims = n.get_operator().get_output(0).dims()
array = np.random.randint(-128, 127, size=dims, dtype=np.int8)
n.get_operator().set_output(0, aidge_core.Tensor(array, backend = "cpu"))
n.get_operator().get_output(0).set_data_format(aidge_core.dformat.nchw)
print(str(n.get_operator().get_output(0).dformat()))
print("Data in :")
print(array)
if n.type()=="Quantizer":
print(str(n.get_parents()))
print("parents ")
# print( "\n")
model.set_datatype(aidge_core.dtype.float32)
model.set_backend("cpu")
# Init Producer
def propagate(model, scheduler, tensor):
# Setup the input
input_tensor = aidge_core.Tensor(tensor)
# Tensor backend must be set again ...
input_tensor.set_backend("cpu")
input_tensor.set_datatype(aidge_core.dtype.float32)
# Run the inference
scheduler.forward(True, [input_tensor])
# Gather the results
output_node = model.get_output_nodes().pop()
output_tensor = output_node.get_operator().get_output(0)
return np.array(output_tensor)
scheduler = aidge_core.SequentialScheduler(model)
input_array = np.random.randint(-128, 127, size=input_dims, dtype=np.int8)
output_array =propagate(model, scheduler, input_array)
print(output_array)
# Fuse en un seul MetaOP
model.save("PreFuse")
print("Fuse ConvScaling")
aidge_core.fuse_to_metaops(model, "Conv2D->Quantizer", "PaddedConvScalingRelu")
model.compile('cpu',aidge_core.dtype.float32, dims=[input_dims])
model.save("PostFuse")
scheduler = aidge_core.SequentialScheduler(model)
scheduler.generate_scheduling()
print("Generate Scheduling ok ")
# Change le type des poids
for node in model.get_nodes():
if node.type() != "Producer":
if node.type() in ["PaddedConvScalingRelu"]:
node.set_name("CSgo")
node.get_operator().get_input(0).set_datatype(aidge_core.dtype.int8)
node.get_operator().get_input(1).set_datatype(aidge_core.dtype.int8)
node.get_operator().get_input(2).set_datatype(aidge_core.dtype.int32)
node.get_operator().get_output(0).set_datatype(aidge_core.dtype.int8)
else:
node.get_operator().set_datatype(aidge_core.dtype.int8)
from aidge_export_arm_cortexm.export_registry import ExportLibAidgeARM, ExportLibCMSISNN
# Utilisation du Backend "cmsis_nn" de ExportLibCMSISNN pour les op
for node in model.get_nodes():
print(str(node))
aidge_core.Log.debug(f"Setting backend {ExportLibCMSISNN._name} to {node.name()}[{node.type()}].")
node.get_operator().set_backend(ExportLibCMSISNN._name)
print("bf adapt")
aidge_core.adapt_to_backend(model)
model.save("adapt")
aidge_core.expand_metaops(model)
model.save("expand")
aidge_core.constant_folding(model)
model.save("constant_folding")
scheduler = aidge_core.SequentialScheduler(model)
scheduler.generate_scheduling()