Skip to content
Snippets Groups Projects
Commit 56506f56 authored by Cyril Moineau's avatar Cyril Moineau
Browse files

Merge branch 'allowNoInputProducer' into 'dev'

Export refactor

See merge request !11
parents 6f3ee49b 10868393
No related branches found
No related tags found
No related merge requests found
Showing
with 231 additions and 331 deletions
#include <math.h>
void aidge_atan_float32 (float* input,
float* output,
unsigned int size)
{
for (unsigned int i = 0; i < size; ++i) {
output[i] = atanf(input[i]);
}
}
void aidge_add_float32(float* input_a, void aidge_add_float32(const float* input_a,
float* input_b, const float* input_b,
float* output, float* output,
int dim_a[], const int dim_a[],
int dim_b[], const int dim_b[],
int output_Dim[], const int output_Dim[],
int size_dima, int size_dima,
int size_dimb, int size_dimb,
int size_outputDim, int size_outputDim,
int output_size) int output_size)
{ {
// Broadcast dims // Broadcast dims
int ndim_a[size_outputDim]; int ndim_a[size_outputDim];
int ndim_b[size_outputDim]; int ndim_b[size_outputDim];
for (int i= 0; i<size_outputDim; i++){ for (int i= 0; i<size_outputDim; i++){
int idx = size_outputDim-size_dima; int idx = size_outputDim-size_dima;
...@@ -96,4 +96,4 @@ void aidge_add_float32(float* input_a, ...@@ -96,4 +96,4 @@ void aidge_add_float32(float* input_a,
} }
} }
} }
\ No newline at end of file
#include <cmath>
template <unsigned int SIZE, typename Input_T, typename Output_T>
__attribute__((always_inline)) inline static
void aidge_atan(Input_T* __restrict input, Output_T* __restrict output) {
for (unsigned int i = 0; i < SIZE; ++i) {
// Note : no cast to get compiler warning if we lose precision during auto cast!
output[i] = std::atan(input[i]);
}
}
#include <stdarg.h>
void aidge_concat2_float32 (unsigned int axis,
float* input1,
unsigned int size1,
float* input2,
unsigned int size2,
float* output)
{
for (unsigned int i = 0; i < size1; ++i) {
output[i] = input1[i];
}
for (unsigned int i = 0; i < size2; ++i) {
output[i + size1] = input2[i];
}
}
void aidge_concat3_float32 (unsigned int axis,
float* input1,
unsigned int size1,
float* input2,
unsigned int size2,
float* input3,
unsigned int size3,
float* output)
{
for (unsigned int i = 0; i < size1; ++i) {
output[i] = input1[i];
}
for (unsigned int i = 0; i < size2; ++i) {
output[i + size1] = input2[i];
}
for (unsigned int i = 0; i < size3; ++i) {
output[i + size1 + size2] = input3[i];
}
}
void aidge_concat4_float32 (unsigned int axis,
float* input1,
unsigned int size1,
float* input2,
unsigned int size2,
float* input3,
unsigned int size3,
float* input4,
unsigned int size4,
float* output)
{
for (unsigned int i = 0; i < size1; ++i) {
output[i] = input1[i];
}
for (unsigned int i = 0; i < size2; ++i) {
output[i + size1] = input2[i];
}
for (unsigned int i = 0; i < size3; ++i) {
output[i + size1 + size2] = input3[i];
}
for (unsigned int i = 0; i < size4; ++i) {
output[i + size1 + size2 + size3] = input4[i];
}
}
void aidge_concat5_float32 (unsigned int axis,
float* input1,
unsigned int size1,
float* input2,
unsigned int size2,
float* input3,
unsigned int size3,
float* input4,
unsigned int size4,
float* input5,
unsigned int size5,
float* output)
{
for (unsigned int i = 0; i < size1; ++i) {
output[i] = input1[i];
}
for (unsigned int i = 0; i < size2; ++i) {
output[i + size1] = input2[i];
}
for (unsigned int i = 0; i < size3; ++i) {
output[i + size1 + size2] = input3[i];
}
for (unsigned int i = 0; i < size4; ++i) {
output[i + size1 + size2 + size3] = input4[i];
}
for (unsigned int i = 0; i < size5; ++i) {
output[i + size1 + size2 + size3 + size4] = input5[i];
}
}
template<typename T, unsigned int NB_INPUTS>
__attribute__((always_inline)) inline static
void aidge_concat(
const unsigned int axis,
const T* const * __restrict inputs,
const unsigned int* __restrict sizes,
T* __restrict output)
{
unsigned int offset = 0;
for (unsigned int n = 0; n < NB_INPUTS; ++n) {
for (unsigned int i = 0; i < sizes[n]; ++i) {
output[offset + i] = inputs[n][i];
}
offset += sizes[n];
}
}
/*
(C) Copyright 2017 CEA LIST. All Rights Reserved.
Contributor(s): N2D2 Team
This software is governed by the CeCILL-C license under French law and
abiding by the rules of distribution of free software. You can use,
modify and/ or redistribute the software under the terms of the CeCILL-C
license as circulated by CEA, CNRS and INRIA at the following URL
"http://www.cecill.info".
As a counterpart to the access to the source code and rights to copy,
modify and redistribute granted by the license, users are provided only
with a limited warranty and the software's author, the holder of the
economic rights, and the successive licensors have only limited
liability.
The fact that you are presently reading this means that you have had
knowledge of the CeCILL-C license and that you accept its terms.
*/
#ifndef __N2D2_EXPORT_CPP_CONV_DW_HPP__
#define __N2D2_EXPORT_CPP_CONV_DW_HPP__
#include "typedefs.h"
#include "assert.h"
#include "utils.hpp"
#include "kernels/Macs.hpp"
namespace N2D2_Export {
template<int NB_CHANNELS,
int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
int NB_OUTPUTS,
int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
int PADDING_Y, int PADDING_X,
int STRIDE_Y, int STRIDE_X,
int KERNEL_HEIGHT, int KERNEL_WIDTH,
ActivationFunction_T ACTIVATION,
// Memory mapping: inputs
int INPUT_MEM_CONT_OFFSET,
int INPUT_MEM_CONT_SIZE,
int INPUT_MEM_WRAP_OFFSET,
int INPUT_MEM_WRAP_SIZE,
int INPUT_MEM_STRIDE,
// Memory mapping: outputs
int OUTPUT_MEM_CONT_OFFSET,
int OUTPUT_MEM_CONT_SIZE,
int OUTPUT_MEM_WRAP_OFFSET,
int OUTPUT_MEM_WRAP_SIZE,
int OUTPUT_MEM_STRIDE,
typename Input_T, typename Output_T,
typename Weight_T, typename Bias_T,
typename Rescaling_T>
__attribute__((always_inline)) inline void convcellDWPropagate(
const Input_T* __restrict inputs,
Output_T* __restrict outputs,
const Bias_T* __restrict biasses,
const Weight_T* __restrict weights,
const Rescaling_T& __restrict rescaling)
{
static_assert(NB_OUTPUTS % NB_CHANNELS == 0,
"NB_OUTPUTS should be a multiple of NB_CHANNELS.");
constexpr int OUTPUTS_HEIGHT_NOPAD
= (CHANNELS_HEIGHT - KERNEL_HEIGHT + STRIDE_Y) / STRIDE_Y;
constexpr int OUTPUTS_WIDTH_NOPAD
= (CHANNELS_WIDTH - KERNEL_WIDTH + STRIDE_X) / STRIDE_X;
for (int oy = 0; oy < OUTPUTS_HEIGHT; ++oy) {
const int syMin = (PADDING_Y == 0) ? 0
: max(PADDING_Y - (oy * STRIDE_Y), 0);
const int syMax = (PADDING_Y == 0
&& OUTPUTS_HEIGHT == OUTPUTS_HEIGHT_NOPAD) ? KERNEL_HEIGHT
: clamp(CHANNELS_HEIGHT + PADDING_Y - (oy * STRIDE_Y),
0, KERNEL_HEIGHT);
const int iy = (oy * STRIDE_Y) - PADDING_Y;
for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) {
const int sxMin = (PADDING_X == 0) ? 0
: max(PADDING_X - (ox * STRIDE_X), 0);
const int sxMax = (PADDING_X == 0
&& OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
? KERNEL_WIDTH
: clamp(CHANNELS_WIDTH + PADDING_X - (ox * STRIDE_X),
0, KERNEL_WIDTH);
const int ix = (ox * STRIDE_X) - PADDING_X;
const int oPos = (ox + OUTPUTS_WIDTH * oy);
int oOffset = OUTPUT_MEM_STRIDE * oPos;
if (OUTPUT_MEM_WRAP_SIZE > 0 && oOffset >= OUTPUT_MEM_CONT_SIZE) {
oOffset += OUTPUT_MEM_WRAP_OFFSET - OUTPUT_MEM_CONT_OFFSET
- OUTPUT_MEM_CONT_SIZE;
}
for (int output = 0; output < NB_OUTPUTS; ++output) {
const int channel = (output * NB_CHANNELS) / NB_OUTPUTS;
SUM_T weightedSum = biasses[output];
for (int sy = 0; sy < KERNEL_HEIGHT; ++sy) {
if ((PADDING_Y != 0
|| OUTPUTS_HEIGHT != OUTPUTS_HEIGHT_NOPAD)
&& sy >= syMax - syMin)
{
break;
}
const int iPos = ((sxMin + ix)
+ CHANNELS_WIDTH * (iy + syMin + sy));
int iOffset = INPUT_MEM_STRIDE * iPos;
// Wrapping cannot occur in the middle of a line, except if
// there is only one line (1D)!
bool wrapInRange = false;
if (INPUT_MEM_WRAP_SIZE > 0
&& iOffset >= INPUT_MEM_CONT_SIZE)
{
iOffset += INPUT_MEM_WRAP_OFFSET - INPUT_MEM_CONT_OFFSET
- INPUT_MEM_CONT_SIZE;
}
else if (INPUT_MEM_WRAP_SIZE > 0 && KERNEL_WIDTH > 1
&& CHANNELS_HEIGHT == 1 // single line (1D)!
&& iOffset + KERNEL_WIDTH * INPUT_MEM_STRIDE
> INPUT_MEM_CONT_SIZE)
{
wrapInRange = true;
}
const int wOffset = (sxMin
+ KERNEL_WIDTH * (syMin + sy + KERNEL_HEIGHT * output));
if (!wrapInRange && ((PADDING_X == 0
&& OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
|| sxMax - sxMin == KERNEL_WIDTH))
{
macsOnRange<KERNEL_WIDTH, INPUT_MEM_STRIDE>(
inputs + iOffset + channel,
weights + wOffset,
weightedSum);
}
else {
for (int sx = 0; sx < KERNEL_WIDTH; ++sx) {
if ((PADDING_X != 0
|| OUTPUTS_WIDTH != OUTPUTS_WIDTH_NOPAD)
&& sx >= sxMax - sxMin)
{
break;
}
int iOffsetInRange = iOffset
+ sx * INPUT_MEM_STRIDE;
if (wrapInRange &&
iOffsetInRange >= INPUT_MEM_CONT_SIZE)
{
iOffsetInRange += INPUT_MEM_WRAP_OFFSET
- INPUT_MEM_CONT_OFFSET
- INPUT_MEM_CONT_SIZE;
}
weightedSum += inputs[channel + iOffsetInRange]
* weights[wOffset + sx];
}
}
}
outputs[output + oOffset]
= sat<Output_T>(weightedSum, output, ACTIVATION, rescaling);
}
}
}
}
} // N2D2_Export
#endif // __N2D2_EXPORT_CPP_CONV_HPP__
#include "include/aidge_supportfunctions.h"
void aidge_conv2d_hwc_float32(float* inputs,
float* weights,
float* biases,
float* outputs,
const int nb_channels,
const int channel_width, const int channel_height,
const int kernel_width, const int kernel_height,
const int nb_outputs,
const int output_width, const int output_height,
const int padding_width, const int padding_height,
const int stride_width, const int stride_height,
const int dilation_width, const int dilation_height)
{
int outputOffset = 0;
const int dilated_kernel_width
= kernel_width + (dilation_width - 1) * (kernel_width - 1);
const int dilated_kernel_height
= kernel_height + (dilation_height - 1) * (kernel_height - 1);
int iy = 0;
for (int oy = 0; oy < output_height; ++oy) {
const int syMin = (padding_height == 0)
? 0 : max(padding_height - iy, 0);
const int syMax = (padding_height == 0)
? dilated_kernel_height
: clamp(channel_height + padding_height - iy,
0, dilated_kernel_height);
int ix = 0;
for (int ox = 0; ox < output_width; ++ox) {
const int sxMin = (padding_width == 0)
? 0 : max(padding_width - ix, 0);
const int sxMax = (padding_width == 0)
? dilated_kernel_width
: clamp(channel_width + padding_width - ix,
0, dilated_kernel_width);
for (int och = 0; och < nb_outputs; ++och) {
float weightedSum = biases[och];
for (int sy = 0; sy < kernel_height; ++sy) {
if (padding_height != 0 && (sy*dilation_height < syMin || sy*dilation_height >= syMax)) {
continue;
}
const int inputsOffset = (iy + sy*dilation_height - padding_height)*channel_width*nb_channels +
(ix - padding_width)*nb_channels;
const int weightsOffset = och*kernel_height*kernel_width*nb_channels +
sy*kernel_width*nb_channels;
for (int sx = 0; sx < kernel_width; ++sx) {
if(sx*dilation_width < sxMin || sx*dilation_width >= sxMax) {
continue;
}
for (int ch = 0; ch < nb_channels; ++ch) {
weightedSum += inputs[inputsOffset + sx*dilation_width*nb_channels + ch]
* weights[weightsOffset + sx*nb_channels + ch];
}
}
}
outputs[outputOffset] = weightedSum;
++outputOffset;
}
ix += stride_width;
}
iy += stride_height;
}
}
void aidge_fc_chw_float32 (float* inputs,
float* weights,
float* biases,
float* outputs,
unsigned int nb_channels,
unsigned int channels_height,
unsigned int channels_width,
unsigned int nb_outputs)
{
for (unsigned int out = 0; out < nb_outputs; ++out) {
// Init with bias
float accum = biases[out];
for (int iy = 0; iy < channels_height; ++iy) {
for (int ix = 0; ix < channels_width; ++ix) {
for (int ch = 0; ch < nb_channels; ++ch) {
accum += inputs[channels_width*nb_channels*iy + nb_channels*ix + ch]
* weights[channels_height*channels_width*nb_channels*out + channels_height*channels_width*ch + channels_height*iy + ix];
}
}
}
// Store result
outputs[out] = accum;
}
}
void aidge_fc_float32 (float* inputs,
float* weights,
float* biases,
float* outputs,
unsigned int batch_size,
unsigned int nb_inputs,
unsigned int nb_outputs)
{
for (unsigned int batch = 0; batch < batch_size; ++batch){
for (unsigned int out = 0; out < nb_outputs; ++out) {
// Init with bias
float accum = biases[out];
for (unsigned int in = 0; in < nb_inputs; ++in) {
accum += inputs[batch*nb_inputs + in] * weights[out * nb_inputs + in];
}
// Store result
outputs[batch*nb_outputs + out] = accum;
}
}
}
void aidge_mul_float32(float* input_a, void aidge_mul_float32(const float* input_a,
float* input_b, const float* input_b,
float* output, float* output,
int dim_a[], const int dim_a[],
int dim_b[], const int dim_b[],
int output_Dim[], const int output_Dim[],
int size_dima, int size_dima,
int size_dimb, int size_dimb,
int size_outputDim, int size_outputDim,
int output_size) int output_size)
{ {
// Broadcast dims // Broadcast dims
int ndim_a[size_outputDim]; int ndim_a[size_outputDim];
int ndim_b[size_outputDim]; int ndim_b[size_outputDim];
for (int i= 0; i<size_outputDim; i++){ for (int i= 0; i<size_outputDim; i++){
int idx = size_outputDim-size_dima; int idx = size_outputDim-size_dima;
...@@ -96,4 +96,4 @@ void aidge_mul_float32(float* input_a, ...@@ -96,4 +96,4 @@ void aidge_mul_float32(float* input_a,
} }
} }
} }
\ No newline at end of file
#include "include/aidge_supportfunctions.h"
void aidge_maxpool2d_float32(float* inputs,
float* outputs,
const int nb_channels,
const int channel_width, const int channel_height,
const int kernel_width, const int kernel_height,
const int nb_outputs,
const int output_width, const int output_height,
const int padding_width, const int padding_height,
const int stride_width, const int stride_height)
{
const int OUTPUTS_HEIGHT_NOPAD
= (channel_height - kernel_height + stride_height) / stride_height;
const int OUTPUTS_WIDTH_NOPAD
= (channel_width - kernel_width + stride_width) / stride_width;
for (int oy = 0; oy < output_height; ++oy) {
const int syMin = (padding_height == 0) ? 0
: max(padding_height - (oy * stride_height), 0);
const int syMax = (padding_height == 0
&& output_height == OUTPUTS_HEIGHT_NOPAD) ? kernel_height
: clamp(channel_height + padding_height - (oy * stride_height),
0, kernel_height);
const int iy = (oy * stride_height) - padding_height;
for (int ox = 0; ox < output_width; ++ox) {
for (int output = 0; output < nb_outputs; ++output) {
const int sxMin = (padding_width == 0) ? 0
: max(padding_width - (ox * stride_width), 0);
const int sxMax = (padding_width == 0
&& output_width == OUTPUTS_WIDTH_NOPAD)
? kernel_width
: clamp(channel_width + padding_width - (ox * stride_width),
0, kernel_width);
const int ix = (ox * stride_width) - padding_width;
const int oPos = (ox + output_width * oy);
int oOffset = nb_outputs * oPos;
float maxVal = -1000.f;
for (int sy = 0; sy < kernel_height; ++sy) {
if ((padding_height != 0
|| output_height != OUTPUTS_HEIGHT_NOPAD)
&& sy >= syMax - syMin)
{
break;
}
const int iPos = ((sxMin + ix)
+ channel_width * (iy + syMin + sy));
int iOffset = nb_channels * iPos;
for (int sx = 0; sx < kernel_width; ++sx) {
if ((padding_width != 0
|| output_width != OUTPUTS_WIDTH_NOPAD)
&& sx >= sxMax - sxMin)
{
break;
}
int iOffsetInRange = iOffset + output + sx * nb_channels;
if (inputs[iOffsetInRange] > maxVal)
maxVal = inputs[iOffsetInRange];
}
}
outputs[oOffset + output] = maxVal;
}
}
}
}
void aidge_relu_float32 (float* inputs,
void aidge_relu_float32 (float* inputs,
float* outputs, float* outputs,
unsigned int size) unsigned int size)
{ {
for (unsigned int i = 0; i < size; ++i) { for (unsigned int i = 0; i < size; ++i) {
outputs[i] = (inputs[i] < 0.0f) ? 0.0f : inputs[i]; outputs[i] = (inputs[i] < 0.0f) ? 0.0f : inputs[i];
} }
} }
\ No newline at end of file
void aidge_slice_float32 (float* inputs,
float* outputs,
void aidge_slice_float32 (float* inputs, const int* axes,
float* outputs, const int* starts,
int* axes, const int* ends,
int* starts,
int* ends,
unsigned int input_dims, unsigned int input_dims,
unsigned int nb_axes) unsigned int nb_axes)
{ {
...@@ -13,4 +11,4 @@ void aidge_slice_float32 (float* inputs, ...@@ -13,4 +11,4 @@ void aidge_slice_float32 (float* inputs,
for (int i = starts[axes[0] - 1]; i < ends[axes[0] - 1]; ++i) { for (int i = starts[axes[0] - 1]; i < ends[axes[0] - 1]; ++i) {
outputs[out_index++] = inputs[i]; outputs[out_index++] = inputs[i];
} }
} }
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment