From e37444e768679c5e43f21c73f8587fcfd8d1500a Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Mon, 28 Apr 2025 15:06:20 +0200 Subject: [PATCH] Added _OPENMP guards --- aidge_export_cpp/kernels/convolution.hpp | 4 ++++ aidge_export_cpp/kernels/fullyconnected.hpp | 5 +++++ aidge_export_cpp/kernels/leakyrelu.hpp | 2 ++ aidge_export_cpp/kernels/pooling.hpp | 2 ++ 4 files changed, 13 insertions(+) diff --git a/aidge_export_cpp/kernels/convolution.hpp b/aidge_export_cpp/kernels/convolution.hpp index 40f22c6..7df55ab 100644 --- a/aidge_export_cpp/kernels/convolution.hpp +++ b/aidge_export_cpp/kernels/convolution.hpp @@ -48,7 +48,9 @@ void convolution_forward( 0, DILATED_KERNEL_HEIGHT); const int iy = (oy * STRIDE_Y) - PADDING_Y; +#ifdef _OPENMP #pragma omp parallel for collapse(2) +#endif for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) { for (int output = 0; output < NB_OUTPUTS; ++output) { // moved to inner loop for collapsing --> @@ -200,7 +202,9 @@ void convolution_depthwise_forward( 0, DILATED_KERNEL_HEIGHT); const int iy = (oy * STRIDE_Y) - PADDING_Y; +#ifdef _OPENMP #pragma omp parallel for collapse(2) +#endif for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) { for (int output = 0; output < NB_OUTPUTS; ++output) { // moved to inner loop for collapsing --> diff --git a/aidge_export_cpp/kernels/fullyconnected.hpp b/aidge_export_cpp/kernels/fullyconnected.hpp index 2780de2..60805e7 100644 --- a/aidge_export_cpp/kernels/fullyconnected.hpp +++ b/aidge_export_cpp/kernels/fullyconnected.hpp @@ -28,6 +28,9 @@ void fullyconnected_forward ( // It is only an issue if the FC was after a flatten layer. // Otherwise it is not an issue for the other FC because CHANNELS_WIDTH = CHANNELS_HEIGHT = 1 // Solution: Add a system to check dataformat +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int och = 0; och < NB_OUTPUTS; och++) { Bias_T weightedSum = (biases) ? biases[och] : Bias_T(0); @@ -45,7 +48,9 @@ void fullyconnected_forward ( } /* Here the kernel to use with inputs in NHWC and weights in NHWC +#ifdef _OPENMP #pragma omp parallel for +#endif for (int och = 0; och < NB_OUTPUTS; och++) { Bias_T weightedSum = (biases) ? biases[och] : Bias_T(0); diff --git a/aidge_export_cpp/kernels/leakyrelu.hpp b/aidge_export_cpp/kernels/leakyrelu.hpp index 07352cd..5e6598d 100644 --- a/aidge_export_cpp/kernels/leakyrelu.hpp +++ b/aidge_export_cpp/kernels/leakyrelu.hpp @@ -11,7 +11,9 @@ void leakyrelu_forward ( Output_T* __restrict outputs, const float negative_slope) { +#ifdef _OPENMP #pragma omp parallel for +#endif for (int i = 0; i < NB_DATA; ++i) { if (inputs[i] >= 0) { outputs[i] = inputs[i]; diff --git a/aidge_export_cpp/kernels/pooling.hpp b/aidge_export_cpp/kernels/pooling.hpp index a86fd41..30fa766 100644 --- a/aidge_export_cpp/kernels/pooling.hpp +++ b/aidge_export_cpp/kernels/pooling.hpp @@ -36,7 +36,9 @@ void pooling_forward( 0, POOL_HEIGHT); const int iy = (oy * STRIDE_Y) - PADDING_Y; +#ifdef _OPENMP #pragma omp parallel for collapse(2) +#endif for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) { for (int output = 0; output < NB_OUTPUTS; ++output) { // moved to inner loop for collapsing --> -- GitLab