From f74ec2c3a86cfb5783036d9abee25c7baa9289c7 Mon Sep 17 00:00:00 2001 From: Matthew Newson <matthew.newson@cea.fr> Date: Thu, 3 Apr 2025 12:10:41 +0000 Subject: [PATCH] Add ifdef pragma or delete unneeded pragma --- aidge_export_cpp/kernels/add.hpp | 2 -- aidge_export_cpp/kernels/batchnorm.hpp | 6 ++++++ aidge_export_cpp/kernels/convolution_groups.hpp | 14 ++++++++------ aidge_export_cpp/kernels/erf.hpp | 3 ++- aidge_export_cpp/kernels/mul.hpp | 2 -- 5 files changed, 16 insertions(+), 11 deletions(-) diff --git a/aidge_export_cpp/kernels/add.hpp b/aidge_export_cpp/kernels/add.hpp index eb8a93b..52b58f5 100644 --- a/aidge_export_cpp/kernels/add.hpp +++ b/aidge_export_cpp/kernels/add.hpp @@ -55,12 +55,10 @@ void add_forward ( if (contiguousidx > 0) { stride_post0[contiguousidx - 1] = 1; stride_post1[contiguousidx - 1] = 1; - #pragma omp parallel for for (int i = contiguousidx -2; i != -1; --i) { stride_post0[i] = stride_post0[i+1]*ndim_a[i+1]; stride_post1[i] = stride_post1[i+1]*ndim_b[i+1]; } - #pragma omp parallel for for (int i = 0; i < contiguousidx ; ++i) { stride_step0[i] = (ndim_a[i] == 1) ? 1 - stride_post0[i] : 1; stride_step1[i] = (ndim_b[i] == 1) ? 1 - stride_post1[i] : 1; diff --git a/aidge_export_cpp/kernels/batchnorm.hpp b/aidge_export_cpp/kernels/batchnorm.hpp index 201ef16..4100e6d 100644 --- a/aidge_export_cpp/kernels/batchnorm.hpp +++ b/aidge_export_cpp/kernels/batchnorm.hpp @@ -25,15 +25,21 @@ void batchnorm_forward ( { int featureMapSize = OUTPUTS_HEIGHT * OUTPUTS_WIDTH; +#ifdef _OPENMP #pragma omp parallel for +#endif for (int ch = 0; ch < NB_OUTPUTS; ++ch) { int ioIndex = ch * featureMapSize; +#ifdef _OPENMP #pragma omp parallel for +#endif for (int i = ioIndex; i < ioIndex + featureMapSize; i++) { outputs[i] = biases[ch]; } float var = sqrt(variances[ch] + epsilon); +#ifdef _OPENMP #pragma omp parallel for +#endif for (int feature = 0; feature < featureMapSize; ++feature) { outputs[ioIndex + feature] += (scales[ch] * (inputs[ioIndex + feature] - means[ch]) / var); } diff --git a/aidge_export_cpp/kernels/convolution_groups.hpp b/aidge_export_cpp/kernels/convolution_groups.hpp index 321ffc7..17cb1bf 100644 --- a/aidge_export_cpp/kernels/convolution_groups.hpp +++ b/aidge_export_cpp/kernels/convolution_groups.hpp @@ -6,7 +6,6 @@ #include "network/utils.hpp" #include "kernels/macs.hpp" #include "kernels/activation.hpp" -#include <omp.h> // Weights index en NHWC constexpr int inds_pos(int n, int c, int h, int w, int N, int C, int H, int W) { @@ -52,19 +51,22 @@ void convolution_forward( int c_in_g = NB_CHANNELS / GROUPS; int c_out_g = NB_OUTPUTS / GROUPS; - #pragma omp parallel for +#ifdef _OPENMP + #pragma omp parallel for collapse(3) +#endif for (int oc = 0; oc < NB_OUTPUTS; oc++) { - int g_oc = oc / c_out_g; - #pragma omp parallel for for (int i = 0; i < OUT_HEIGHT; ++i) { - #pragma omp parallel for for (int j = 0; j < OUT_WIDTH; ++j) { + int g_oc = oc / c_out_g; Output_T value = biases[oc]; - #pragma omp parallel for for (int ic = g_oc * c_in_g; ic < (g_oc + 1) * c_in_g; ++ic) { +#ifdef _OPENMP #pragma omp parallel for +#endif for (int m = 0; m < KERNEL_HEIGHT; ++m) { +#ifdef _OPENMP #pragma omp parallel for +#endif for (int n = 0; n < KERNEL_WIDTH; ++n) { int i_p = i * STRIDE_X - PADDING_X + m * DILATION_X; int j_p = j * STRIDE_Y - PADDING_Y + n * DILATION_Y; diff --git a/aidge_export_cpp/kernels/erf.hpp b/aidge_export_cpp/kernels/erf.hpp index 768f3b9..88aafe2 100644 --- a/aidge_export_cpp/kernels/erf.hpp +++ b/aidge_export_cpp/kernels/erf.hpp @@ -19,8 +19,9 @@ void erf_forward ( double a5 = 1.061405429; double p = 0.3275911; - +#ifdef _OPENMP #pragma omp parallel for +#endif for (int i = 0; i < _NB_ELTS; ++i) { int sign = 1; if (inputs[i] < 0) diff --git a/aidge_export_cpp/kernels/mul.hpp b/aidge_export_cpp/kernels/mul.hpp index b3ff9e1..5c1ba62 100644 --- a/aidge_export_cpp/kernels/mul.hpp +++ b/aidge_export_cpp/kernels/mul.hpp @@ -54,12 +54,10 @@ void mul_forward ( if (contiguousidx > 0) { stride_post0[contiguousidx - 1] = 1; stride_post1[contiguousidx - 1] = 1; - #pragma omp parallel for for (int i = contiguousidx -2; i != -1; --i) { stride_post0[i] = stride_post0[i+1]*ndim_a[i+1]; stride_post1[i] = stride_post1[i+1]*ndim_b[i+1]; } - #pragma omp parallel for for (int i = 0; i < contiguousidx ; ++i) { stride_step0[i] = (ndim_a[i] == 1) ? 1 - stride_post0[i] : 1; stride_step1[i] = (ndim_b[i] == 1) ? 1 - stride_post1[i] : 1; -- GitLab