Skip to content
Snippets Groups Projects
Commit bb81ff6e authored by Vincent Baudelet's avatar Vincent Baudelet
Browse files

[issue 24] Refactoring kernels

parent b920a41f
No related branches found
No related tags found
No related merge requests found
Pipeline #67334 failed
Showing
with 239 additions and 2 deletions
template <unsigned int SIZE, typename T>
__attribute__((always_inline)) inline static
void aidge_add(T* __restrict input_a, T* __restrict input_b, T* __restrict output) {
for (unsigned int i = 0; i < SIZE; ++i) {
// Note : no cast to get compiler warning if we lose precision during auto cast!
output[i] = input_a[i] + input_b[i];
}
}
\ No newline at end of file
#include <cmath>
template <unsigned int SIZE, typename Input_T, typename Output_T>
template <unsigned int SIZE, typename T>
__attribute__((always_inline)) inline static
void aidge_atan(Input_T* __restrict input, Output_T* __restrict output) {
void aidge_atan(T* __restrict input, T* __restrict output) {
for (unsigned int i = 0; i < SIZE; ++i) {
// Note : no cast to get compiler warning if we lose precision during auto cast!
output[i] = std::atan(input[i]);
......
#include <cmath>
template <
typename T,
typename MeanVar_T,
typename ScaleBias_T,
typename SpatialDims_T,
unsigned int NB_Channels,
unsigned int NB_SpatialDims
>
__attribute__((always_inline)) inline static
void aidge_batchnorm(T* __restrict inputs,
T* __restrict outputs,
MeanVar_T* __restrict input_mean,
MeanVar_T* __restrict input_var,
ScaleBias_T* __restrict scale,
ScaleBias_T* __restrict bias,
SpatialDims_T* __restrict spatial_dims,
float epsilon)
{
int featureMapSize = 1;
for (int index = 0; index < NB_SpatialDims; ++index){
featureMapSize *= spatial_dims[index];
}
for (int current_channel = 0; current_channel < NB_Channels; ++current_channel){
int ioIndex = current_channel * featureMapSize;
for (int index = ioIndex; index < (ioIndex + featureMapSize); index++ ){
outputs[index] = bias[current_channel];
}
float var = sqrt(input_var[current_channel] + epsilon);
for (int current_feature = 0; current_feature < featureMapSize; ++current_feature){
outputs[ioIndex + current_feature] += scale[current_channel] * (inputs[ioIndex + current_feature] - input_mean[current_channel]) / var;
}
}
}
\ No newline at end of file
template <unsigned int SIZE, typename T>
__attribute__((always_inline)) inline static
void aidge_div(T* __restrict input_a, T* __restrict input_b, T* __restrict output) {
for (unsigned int i = 0; i < SIZE; ++i) {
// Note : no cast to get compiler warning if we lose precision during auto cast!
// [TODO] : input_b[i] = 0
output[i] = input_a[i] / input_b[i];
}
}
\ No newline at end of file
template <
typename T,
typename Dim_T,
typename Size_T
>
__attribute__((always_inline)) inline static
void aidge_matmul(T* __restrict input_a,
T* __restrict input_b,
T* __restrict output,
Dim_T* __restrict dim_a,
Dim_T* __restrict dim_b,
Dim_T* __restrict dim_output,
Size_T __restrict size_aDim,
Size_T __restrict size_bDim,
Size_T __restrict size_outputDim)
{
int ndim_a[size_outputDim];
int ndim_b[size_outputDim];
if (size_aDim == 1) {
ndim_a[0] = 1;
ndim_a[1] = dim_a[0];
}
if (size_bDim == 1) {
ndim_b[0] = dim_b[0];
ndim_b[1] = 1;
}
for (int i = 0; i < size_outputDim; ++i) {
int idx = size_outputDim - size_aDim;
ndim_a[i] = (i < idx) ? 1 : dim_a[i - idx];
}
for (int i = 0; i < size_outputDim; ++i) {
int idx = size_outputDim - size_bDim;
ndim_b[i] = (i < idx) ? 1 : dim_b[i - idx];
}
int stride_post0[size_outputDim - 2];
int stride_post1[size_outputDim - 2];
int stride_step0[size_outputDim - 2];
int stride_step1[size_outputDim - 2];
if (size_outputDim > 2) {
stride_post0[size_outputDim - 3] = 1;
stride_post1[size_outputDim - 3] = 1;
for (int i = size_outputDim - 4; i != -1; --i) {
stride_post0[i] = stride_post0[i + 1] * ndim_a[i + 1];
stride_post1[i] = stride_post1[i + 1] * ndim_b[i + 1];
}
for (int i = 0; i < size_outputDim - 2; ++i) {
stride_step0[i] = (ndim_a[i] == 1) ? 1 - stride_post0[i] : 1;
stride_step1[i] = (ndim_b[i] == 1) ? 1 - stride_post1[i] : 1;
}
}
int nbMatrices = 1;
for (int i = size_outputDim - 3; i >= 0; --i) {
nbMatrices *= dim_output[i];
}
int dim = size_outputDim - 3;
int offsetIn0 = 0;
int offsetIn1 = 0;
int offsetOut = 0;
const int n = ndim_a[size_outputDim - 2];
const int k = ndim_a[size_outputDim - 1];
const int m = ndim_b[size_outputDim - 1];
const int matrix0Size = n * k;
const int matrix1Size = k * m;
const int matrixOutSize = n * m;
for(int stack = 0; stack < nbMatrices;){
for (int i = 0; i < n; ++i) {
for (int j = 0; j < m; ++j) {
float sum = 0;
for (int l = 0; l < k; ++l) {
sum += (input_a[ offsetIn0*matrix0Size + i*k + l] * input_b[offsetIn1*matrix1Size + l*m + j]);
}
output[ offsetOut*matrixOutSize + i*m + j] = sum;
}
}
if (++stack < nbMatrices) {
int tmp_stack = stack;
while(tmp_stack % dim_output[dim] == 0) {
tmp_stack /= dim_output[dim];
dim--;
}
offsetIn0 += stride_step0[dim];
offsetIn1 += stride_step1[dim];
++offsetOut;
dim = size_outputDim -3;
}
}
}
\ No newline at end of file
template <unsigned int SIZE, typename T>
__attribute__((always_inline)) inline static
void aidge_mul(T* __restrict input_a, T* __restrict input_b, T* __restrict output) {
for (unsigned int i = 0; i < SIZE; ++i) {
// Note : no cast to get compiler warning if we lose precision during auto cast!
output[i] = input_a[i] * input_b[i];
}
}
\ No newline at end of file
template <unsigned int SIZE, typename T>
__attribute__((always_inline)) inline static
void aidge_relu(T* __restrict input, T* __restrict output) {
for (unsigned int i = 0; i < SIZE; ++i) {
output[i] = (input[i] < 0.0f) ? 0.0f : input[i];
}
}
\ No newline at end of file
template <unsigned int SIZE, typename T>
__attribute__((always_inline)) inline static
void aidge_reshape(T* __restrict input, T* __restrict output) {
for (unsigned int i = 0; i < SIZE; ++i) {
output[i] = input[i];
}
}
\ No newline at end of file
#include <math.h>
template <unsigned int SIZE, typename T>
__attribute__((always_inline)) inline static
void aidge_sigmoid(T* __restrict inputs, T* __restrict outputs) {
for (unsigned int i = 0; i < SIZE; ++i) {
// Note : no cast to get compiler warning if we lose precision during auto cast!
outputs[i] = 1 / (1 + exp(-inputs[i]) );
}
}
\ No newline at end of file
#include <math.h>
#define MAX_DIMS_AXIS_SIZE 128 /** TODO : is 128 enough or to big ? | Other possibility is to use a shared buffer as param, but this could have a side effect on Aidge's overall mechanics **/
float exps[MAX_DIMS_AXIS_SIZE];
template <
typename T,
typename Dim_T,
typename Size_T
>
__attribute__((always_inline)) inline static
void aidge_softmax(T* __restrict input,
T* __restrict output,
Dim_T* __restrict dims,
Size_T __restrict dim_size,
int axis)
{
axis += (axis >= 0 ) ? 0 : dim_size;
int postAxisElems = 1;
for (unsigned int index = axis+1; index < dim_size; ++index) {
postAxisElems *= dims[index];
}
int preAxisElems = 1;
for (int index = 0; index < axis; ++index) {
preAxisElems *= dims[index];
}
for (int i = 0; i < preAxisElems; ++i) {
for (int j = 0; j < postAxisElems; ++j) {
float sumExp = 0.0;
int baseIdx = i * dims[axis] * postAxisElems + j;
for (int k = 0; k < dims[axis]; ++k) {
int inIdx = baseIdx + k * postAxisElems;
exps[k] = exp(input[inIdx]);
sumExp += exps[k];
}
for (int k = 0; k < dims[axis]; ++k) {
int inIdx = baseIdx + k * postAxisElems;
output[inIdx] = exps[k] / sumExp;
}
}
}
}
\ No newline at end of file
template <unsigned int SIZE, typename T>
__attribute__((always_inline)) inline static
void aidge_sub(T* __restrict input_a, T* __restrict input_b, T* __restrict output) {
for (unsigned int i = 0; i < SIZE; ++i) {
// Note : no cast to get compiler warning if we lose precision during auto cast!
output[i] = input_a[i] - input_b[i];
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment