Skip to content
Snippets Groups Projects

Add and modify operators to run the ConvNeXt onnx model

Open Matthew Newson requested to merge mnewson/aidge_export_cpp:main into dev
Files
35
+ 99
0
#ifndef __AIDGE_EXPORT_CPP_KERNELS_ADD__
#define __AIDGE_EXPORT_CPP_KERNELS_ADD__
#include "network/typedefs.hpp"
#include "kernels/activation.hpp"
template<int NB_ELTS,
int INPUT_A_DIMS[], int INPUT_B_DIMS[], int OUTPUT_DIMS[],
int SIZE_DIM_IN_A, int SIZE_DIM_IN_B, int SIZE_DIM_OUT, int OUT_SIZE,
ActivationFunction_T ACTIVATION,
typename Input_T, typename Output_T>
__attribute__((always_inline)) inline
void add_forward (
Output_T* __restrict outputs,
const Input_T* __restrict inputs1,
const Input_T* __restrict inputs2)
{
int ndim_a[SIZE_DIM_OUT];
int ndim_b[SIZE_DIM_OUT];
for (int i= 0; i<SIZE_DIM_OUT; i++){
int idx = SIZE_DIM_OUT-SIZE_DIM_IN_A;
ndim_a[i] = (i< idx) ? 1 : INPUT_A_DIMS[i-idx];
}
for (int i= 0; i<SIZE_DIM_OUT; i++){
int idx = SIZE_DIM_OUT-SIZE_DIM_IN_B;
ndim_b[i] = (i< idx) ? 1 : INPUT_B_DIMS[i-idx];
}
// Find the highest equal dimension
int contiguousidx = SIZE_DIM_OUT -1 ;
for (int i = contiguousidx ; ndim_a[i] == ndim_b[i]; i--) {
contiguousidx = i;
}
// Compute the highest number of contiguous data for each Tensor
int input0_contiguous_size = 1;
for(int i = contiguousidx ; i<SIZE_DIM_OUT; ++i){
input0_contiguous_size *= ndim_a[i];
}
int input1_contiguous_size = 1;
for(int i = contiguousidx ; i<SIZE_DIM_OUT; ++i){
input1_contiguous_size *= ndim_b[i];
}
int output_contiguous_size = 1;
for(int i = contiguousidx ; i<SIZE_DIM_OUT; ++i){
output_contiguous_size *= OUTPUT_DIMS[i];
}
// initialize strides to iterate through data because of broadcasting
int stride_post0[contiguousidx ] ;
int stride_post1[contiguousidx ] ;
int stride_step0[contiguousidx ] ;
int stride_step1[contiguousidx ] ;
if (contiguousidx > 0) {
stride_post0[contiguousidx - 1] = 1;
stride_post1[contiguousidx - 1] = 1;
for (int i = contiguousidx -2; i != -1; --i) {
stride_post0[i] = stride_post0[i+1]*ndim_a[i+1];
stride_post1[i] = stride_post1[i+1]*ndim_b[i+1];
}
for (int i = 0; i < contiguousidx ; ++i) {
stride_step0[i] = (ndim_a[i] == 1) ? 1 - stride_post0[i] : 1;
stride_step1[i] = (ndim_b[i] == 1) ? 1 - stride_post1[i] : 1;
}
}
int offsetIn0 = 0;
int offsetIn1 = 0;
int offsetOut = 0;
int nbMatrices = 1;
for(int i = 0; i<contiguousidx ; ++i){
nbMatrices *= OUTPUT_DIMS[i];
}
int dim = contiguousidx - 1;
for(int stack = 0; stack < nbMatrices;){
for(int i = 0; i < output_contiguous_size; ++i){
int in0_id = (input0_contiguous_size != 1) ? i : 0;
int in1_id = (input1_contiguous_size != 1) ? i : 0;
outputs[i + offsetOut*output_contiguous_size] = inputs1[in0_id + offsetIn0*input0_contiguous_size] + inputs2[in1_id + offsetIn1*input1_contiguous_size];
}
if (++stack < nbMatrices) {
int tmp_stack = stack;
while(tmp_stack % OUTPUT_DIMS[dim] == 0) {
tmp_stack /= OUTPUT_DIMS[dim];
dim--;
}
offsetIn0 += stride_step0[dim];
offsetIn1 += stride_step1[dim];
++offsetOut;
dim = contiguousidx - 1;
}
}
}
#endif // __AIDGE_EXPORT_CPP_KERNELS_ADD__
\ No newline at end of file
Loading