Skip to content
Snippets Groups Projects
Commit 9a8a1216 authored by Vincent Baudelet's avatar Vincent Baudelet
Browse files

Kernel refactoring : Intermediate commit

parent b920a41f
No related tags found
No related merge requests found
Pipeline #64531 failed
Showing
with 171 additions and 0 deletions
template <unsigned int SIZE, typename Input_T, typename Output_T>
__attribute__((always_inline)) inline static
void aidge_add(Input_T* __restrict input_a, Input_T* __restrict input_b, Output_T* __restrict output) {
for (unsigned int i = 0; i < SIZE; ++i) {
// Note : no cast to get compiler warning if we lose precision during auto cast!
output[i] = input_a[i] + input_b[i];
}
}
\ No newline at end of file
#include <cmath>
template <
typename Input_T,
typename Output_T,
typename MeanVar_T,
typename ScaleBias_T,
typename SpatialDims_T,
unsigned int NB_Channels,
unsigned int NB_SpatialDims
>
__attribute__((always_inline)) inline static
void aidge_batchnorm(Input_T* __restrict inputs,
Output_T* __restrict outputs,
MeanVar_T* __restrict input_mean,
MeanVar_T* __restrict input_var,
ScaleBias_T* __restrict scale,
ScaleBias_T* __restrict bias,
SpatialDims_T* __restrict spatial_dims,
float epsilon)
{
int featureMapSize = 1;
for (int index = 0; index < NB_SpatialDims; ++index){
featureMapSize *= spatial_dims[index];
}
for (int current_channel = 0; current_channel < NB_Channels; ++current_channel){
int ioIndex = current_channel * featureMapSize;
for (int index = ioIndex; index < (ioIndex + featureMapSize); index++ ){
outputs[index] = bias[current_channel];
}
float var = sqrt(input_var[current_channel] + epsilon);
for (int current_feature = 0; current_feature < featureMapSize; ++current_feature){
outputs[ioIndex + current_feature] += scale[current_channel] * (inputs[ioIndex + current_feature] - input_mean[current_channel]) / var;
}
}
}
\ No newline at end of file
/*
@inputs Tensor values to broadcast
@output Tensor values to return
@input Tab of input dimension
@input Tab of output dimension
@input Size of input tab dim
@input Size of output tab dim
@input Count of output data needed
*/
#include <iostream>
#include <vector>
#include <stdexcept>
void broadcast(
const float* inputs,
float* output,
const int dim_input[],
const int dim_output[],
int nb_dimInput,
int nb_dimOutput,
int input_size,
int output_size) {
std::vector<int> stride_input(nb_dimInput, 1);
for (int i = nb_dimInput - 2; i >= 0; --i) {
stride_input[i] = stride_input[i + 1] * dim_input[i + 1];
}
for (int i = 0; i < output_size; ++i) {
int idx_input = 0;
int idx_output = i;
for (int d = nb_dimOutput - 1, d_in = nb_dimInput - 1; d >= 0; --d) {
int coord_out = idx_output % dim_output[d];
idx_output /= dim_output[d];
int coord_in = (d_in >= 0 && dim_input[d_in] == dim_output[d]) ? coord_out : 0;
if (d_in >= 0) {
idx_input += coord_in * stride_input[d_in];
--d_in;
}
}
if (idx_input < input_size) {
output[i] = inputs[idx_input];
} else {
throw std::out_of_range("Index out of range in input tensor.");
}
}
}
bool should_broadcast(
const float* tensor_a, const float* tensor_b,
const int dim_a[], const int dim_b[], const int dim_output[],
int nb_dimA, int nb_dimB, int nb_dimOutput,
int size_a, int size_b) {
bool need_broadcast_a = false;
bool need_broadcast_b = false;
for (int i = 0; i < nb_dimOutput; ++i) {
int dim_a_idx = (i >= nb_dimOutput - nb_dimA) ? i - (nb_dimOutput - nb_dimA) : -1;
int dim_b_idx = (i >= nb_dimOutput - nb_dimB) ? i - (nb_dimOutput - nb_dimB) : -1;
int dim_a_val = (dim_a_idx >= 0) ? dim_a[dim_a_idx] : 1;
int dim_b_val = (dim_b_idx >= 0) ? dim_b[dim_b_idx] : 1;
if (dim_a_val != dim_output[i]) {
if (dim_a_val != 1 && dim_a_val != dim_output[i]) {
throw std::invalid_argument("Tensor A cannot be broadcasted to output dimensions.");
}
return true;
}
}
return false;
}
template <unsigned int SIZE, typename Input_T, typename Output_T>
__attribute__((always_inline)) inline static
void aidge_div(Input_T* __restrict input_a, Input_T* __restrict input_b, Output_T* __restrict output) {
for (unsigned int i = 0; i < SIZE; ++i) {
// Note : no cast to get compiler warning if we lose precision during auto cast!
// [TODO] : input_b[i] = 0
output[i] = input_a[i] / input_b[i];
}
}
\ No newline at end of file
template <unsigned int SIZE, typename Input_T, typename Output_T>
__attribute__((always_inline)) inline static
void aidge_mul(Input_T* __restrict input_a, Input_T* __restrict input_b, Output_T* __restrict output) {
for (unsigned int i = 0; i < SIZE; ++i) {
// Note : no cast to get compiler warning if we lose precision during auto cast!
output[i] = input_a[i] * input_b[i];
}
}
\ No newline at end of file
template <unsigned int SIZE, typename Input_T, typename Output_T>
__attribute__((always_inline)) inline static
void aidge_relu(Input_T* __restrict input, Output_T* __restrict output) {
for (unsigned int i = 0; i < SIZE; ++i) {
output[i] = (input[i] < 0.0f) ? 0.0f : input[i];
}
}
\ No newline at end of file
template <unsigned int SIZE, typename Input_T, typename Output_T>
__attribute__((always_inline)) inline static
void aidge_reshape(Input_T* __restrict input, Output_T* __restrict output) {
for (unsigned int i = 0; i < SIZE; ++i) {
output[i] = input[i];
}
}
\ No newline at end of file
#include <math.h>
template <unsigned int SIZE, typename Input_T, typename Output_T>
__attribute__((always_inline)) inline static
void aidge_sigmoid(Input_T* __restrict input_a, Input_T* __restrict input_b, Output_T* __restrict output) {
for (unsigned int i = 0; i < SIZE; ++i) {
// Note : no cast to get compiler warning if we lose precision during auto cast!
output[i] = 1 / (1 + exp(-inputs[i]) );
}
}
\ No newline at end of file
template <unsigned int SIZE, typename Input_T, typename Output_T>
__attribute__((always_inline)) inline static
void aidge_sub(Input_T* __restrict input_a, Input_T* __restrict input_b, Output_T* __restrict output) {
for (unsigned int i = 0; i < SIZE; ++i) {
// Note : no cast to get compiler warning if we lose precision during auto cast!
output[i] = input_a[i] - input_b[i];
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment