diff --git a/aidge_export_cpp/kernels/add.hpp b/aidge_export_cpp/kernels/add.hpp index eb8a93b87080ee6651fd018839082059104a247a..52b58f5a09591781357f64f27090a7e347850d35 100644 --- a/aidge_export_cpp/kernels/add.hpp +++ b/aidge_export_cpp/kernels/add.hpp @@ -55,12 +55,10 @@ void add_forward ( if (contiguousidx > 0) { stride_post0[contiguousidx - 1] = 1; stride_post1[contiguousidx - 1] = 1; - #pragma omp parallel for for (int i = contiguousidx -2; i != -1; --i) { stride_post0[i] = stride_post0[i+1]*ndim_a[i+1]; stride_post1[i] = stride_post1[i+1]*ndim_b[i+1]; } - #pragma omp parallel for for (int i = 0; i < contiguousidx ; ++i) { stride_step0[i] = (ndim_a[i] == 1) ? 1 - stride_post0[i] : 1; stride_step1[i] = (ndim_b[i] == 1) ? 1 - stride_post1[i] : 1; diff --git a/aidge_export_cpp/kernels/batchnorm.hpp b/aidge_export_cpp/kernels/batchnorm.hpp index 201ef16fb11991a872c86a732c96e406c2854d19..4100e6d1ef0d07ff16ad25b33cb497e1143f0c0b 100644 --- a/aidge_export_cpp/kernels/batchnorm.hpp +++ b/aidge_export_cpp/kernels/batchnorm.hpp @@ -25,15 +25,21 @@ void batchnorm_forward ( { int featureMapSize = OUTPUTS_HEIGHT * OUTPUTS_WIDTH; +#ifdef _OPENMP #pragma omp parallel for +#endif for (int ch = 0; ch < NB_OUTPUTS; ++ch) { int ioIndex = ch * featureMapSize; +#ifdef _OPENMP #pragma omp parallel for +#endif for (int i = ioIndex; i < ioIndex + featureMapSize; i++) { outputs[i] = biases[ch]; } float var = sqrt(variances[ch] + epsilon); +#ifdef _OPENMP #pragma omp parallel for +#endif for (int feature = 0; feature < featureMapSize; ++feature) { outputs[ioIndex + feature] += (scales[ch] * (inputs[ioIndex + feature] - means[ch]) / var); } diff --git a/aidge_export_cpp/kernels/convolution_groups.hpp b/aidge_export_cpp/kernels/convolution_groups.hpp index 321ffc7693b3b2931e20c888765fe2fc43779668..17cb1bfa7962063c59fca2395b1110c526e68015 100644 --- a/aidge_export_cpp/kernels/convolution_groups.hpp +++ b/aidge_export_cpp/kernels/convolution_groups.hpp @@ -6,7 +6,6 @@ #include "network/utils.hpp" #include "kernels/macs.hpp" #include "kernels/activation.hpp" -#include <omp.h> // Weights index en NHWC constexpr int inds_pos(int n, int c, int h, int w, int N, int C, int H, int W) { @@ -52,19 +51,22 @@ void convolution_forward( int c_in_g = NB_CHANNELS / GROUPS; int c_out_g = NB_OUTPUTS / GROUPS; - #pragma omp parallel for +#ifdef _OPENMP + #pragma omp parallel for collapse(3) +#endif for (int oc = 0; oc < NB_OUTPUTS; oc++) { - int g_oc = oc / c_out_g; - #pragma omp parallel for for (int i = 0; i < OUT_HEIGHT; ++i) { - #pragma omp parallel for for (int j = 0; j < OUT_WIDTH; ++j) { + int g_oc = oc / c_out_g; Output_T value = biases[oc]; - #pragma omp parallel for for (int ic = g_oc * c_in_g; ic < (g_oc + 1) * c_in_g; ++ic) { +#ifdef _OPENMP #pragma omp parallel for +#endif for (int m = 0; m < KERNEL_HEIGHT; ++m) { +#ifdef _OPENMP #pragma omp parallel for +#endif for (int n = 0; n < KERNEL_WIDTH; ++n) { int i_p = i * STRIDE_X - PADDING_X + m * DILATION_X; int j_p = j * STRIDE_Y - PADDING_Y + n * DILATION_Y; diff --git a/aidge_export_cpp/kernels/erf.hpp b/aidge_export_cpp/kernels/erf.hpp index 768f3b9de6ea861726f8293de63d634716773ff2..88aafe2407c4a1106b1e05acf2406a1337fa5ac6 100644 --- a/aidge_export_cpp/kernels/erf.hpp +++ b/aidge_export_cpp/kernels/erf.hpp @@ -19,8 +19,9 @@ void erf_forward ( double a5 = 1.061405429; double p = 0.3275911; - +#ifdef _OPENMP #pragma omp parallel for +#endif for (int i = 0; i < _NB_ELTS; ++i) { int sign = 1; if (inputs[i] < 0) diff --git a/aidge_export_cpp/kernels/mul.hpp b/aidge_export_cpp/kernels/mul.hpp index b3ff9e1b34eb023527f2e7000d701999a4b96556..5c1ba622ef5971dbac8979d89931dc1f4e13cce5 100644 --- a/aidge_export_cpp/kernels/mul.hpp +++ b/aidge_export_cpp/kernels/mul.hpp @@ -54,12 +54,10 @@ void mul_forward ( if (contiguousidx > 0) { stride_post0[contiguousidx - 1] = 1; stride_post1[contiguousidx - 1] = 1; - #pragma omp parallel for for (int i = contiguousidx -2; i != -1; --i) { stride_post0[i] = stride_post0[i+1]*ndim_a[i+1]; stride_post1[i] = stride_post1[i+1]*ndim_b[i+1]; } - #pragma omp parallel for for (int i = 0; i < contiguousidx ; ++i) { stride_step0[i] = (ndim_a[i] == 1) ? 1 - stride_post0[i] : 1; stride_step1[i] = (ndim_b[i] == 1) ? 1 - stride_post1[i] : 1;