Skip to content
Snippets Groups Projects
Commit f74ec2c3 authored by Matthew  Newson's avatar Matthew Newson
Browse files

Add ifdef pragma or delete unneeded pragma

parent dc0225cb
No related branches found
No related tags found
No related merge requests found
Pipeline #72696 failed
...@@ -55,12 +55,10 @@ void add_forward ( ...@@ -55,12 +55,10 @@ void add_forward (
if (contiguousidx > 0) { if (contiguousidx > 0) {
stride_post0[contiguousidx - 1] = 1; stride_post0[contiguousidx - 1] = 1;
stride_post1[contiguousidx - 1] = 1; stride_post1[contiguousidx - 1] = 1;
#pragma omp parallel for
for (int i = contiguousidx -2; i != -1; --i) { for (int i = contiguousidx -2; i != -1; --i) {
stride_post0[i] = stride_post0[i+1]*ndim_a[i+1]; stride_post0[i] = stride_post0[i+1]*ndim_a[i+1];
stride_post1[i] = stride_post1[i+1]*ndim_b[i+1]; stride_post1[i] = stride_post1[i+1]*ndim_b[i+1];
} }
#pragma omp parallel for
for (int i = 0; i < contiguousidx ; ++i) { for (int i = 0; i < contiguousidx ; ++i) {
stride_step0[i] = (ndim_a[i] == 1) ? 1 - stride_post0[i] : 1; stride_step0[i] = (ndim_a[i] == 1) ? 1 - stride_post0[i] : 1;
stride_step1[i] = (ndim_b[i] == 1) ? 1 - stride_post1[i] : 1; stride_step1[i] = (ndim_b[i] == 1) ? 1 - stride_post1[i] : 1;
......
...@@ -25,15 +25,21 @@ void batchnorm_forward ( ...@@ -25,15 +25,21 @@ void batchnorm_forward (
{ {
int featureMapSize = OUTPUTS_HEIGHT * OUTPUTS_WIDTH; int featureMapSize = OUTPUTS_HEIGHT * OUTPUTS_WIDTH;
#ifdef _OPENMP
#pragma omp parallel for #pragma omp parallel for
#endif
for (int ch = 0; ch < NB_OUTPUTS; ++ch) { for (int ch = 0; ch < NB_OUTPUTS; ++ch) {
int ioIndex = ch * featureMapSize; int ioIndex = ch * featureMapSize;
#ifdef _OPENMP
#pragma omp parallel for #pragma omp parallel for
#endif
for (int i = ioIndex; i < ioIndex + featureMapSize; i++) { for (int i = ioIndex; i < ioIndex + featureMapSize; i++) {
outputs[i] = biases[ch]; outputs[i] = biases[ch];
} }
float var = sqrt(variances[ch] + epsilon); float var = sqrt(variances[ch] + epsilon);
#ifdef _OPENMP
#pragma omp parallel for #pragma omp parallel for
#endif
for (int feature = 0; feature < featureMapSize; ++feature) { for (int feature = 0; feature < featureMapSize; ++feature) {
outputs[ioIndex + feature] += (scales[ch] * (inputs[ioIndex + feature] - means[ch]) / var); outputs[ioIndex + feature] += (scales[ch] * (inputs[ioIndex + feature] - means[ch]) / var);
} }
......
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
#include "network/utils.hpp" #include "network/utils.hpp"
#include "kernels/macs.hpp" #include "kernels/macs.hpp"
#include "kernels/activation.hpp" #include "kernels/activation.hpp"
#include <omp.h>
// Weights index en NHWC // Weights index en NHWC
constexpr int inds_pos(int n, int c, int h, int w, int N, int C, int H, int W) { constexpr int inds_pos(int n, int c, int h, int w, int N, int C, int H, int W) {
...@@ -52,19 +51,22 @@ void convolution_forward( ...@@ -52,19 +51,22 @@ void convolution_forward(
int c_in_g = NB_CHANNELS / GROUPS; int c_in_g = NB_CHANNELS / GROUPS;
int c_out_g = NB_OUTPUTS / GROUPS; int c_out_g = NB_OUTPUTS / GROUPS;
#pragma omp parallel for #ifdef _OPENMP
#pragma omp parallel for collapse(3)
#endif
for (int oc = 0; oc < NB_OUTPUTS; oc++) { for (int oc = 0; oc < NB_OUTPUTS; oc++) {
int g_oc = oc / c_out_g;
#pragma omp parallel for
for (int i = 0; i < OUT_HEIGHT; ++i) { for (int i = 0; i < OUT_HEIGHT; ++i) {
#pragma omp parallel for
for (int j = 0; j < OUT_WIDTH; ++j) { for (int j = 0; j < OUT_WIDTH; ++j) {
int g_oc = oc / c_out_g;
Output_T value = biases[oc]; Output_T value = biases[oc];
#pragma omp parallel for
for (int ic = g_oc * c_in_g; ic < (g_oc + 1) * c_in_g; ++ic) { for (int ic = g_oc * c_in_g; ic < (g_oc + 1) * c_in_g; ++ic) {
#ifdef _OPENMP
#pragma omp parallel for #pragma omp parallel for
#endif
for (int m = 0; m < KERNEL_HEIGHT; ++m) { for (int m = 0; m < KERNEL_HEIGHT; ++m) {
#ifdef _OPENMP
#pragma omp parallel for #pragma omp parallel for
#endif
for (int n = 0; n < KERNEL_WIDTH; ++n) { for (int n = 0; n < KERNEL_WIDTH; ++n) {
int i_p = i * STRIDE_X - PADDING_X + m * DILATION_X; int i_p = i * STRIDE_X - PADDING_X + m * DILATION_X;
int j_p = j * STRIDE_Y - PADDING_Y + n * DILATION_Y; int j_p = j * STRIDE_Y - PADDING_Y + n * DILATION_Y;
......
...@@ -19,8 +19,9 @@ void erf_forward ( ...@@ -19,8 +19,9 @@ void erf_forward (
double a5 = 1.061405429; double a5 = 1.061405429;
double p = 0.3275911; double p = 0.3275911;
#ifdef _OPENMP
#pragma omp parallel for #pragma omp parallel for
#endif
for (int i = 0; i < _NB_ELTS; ++i) { for (int i = 0; i < _NB_ELTS; ++i) {
int sign = 1; int sign = 1;
if (inputs[i] < 0) if (inputs[i] < 0)
......
...@@ -54,12 +54,10 @@ void mul_forward ( ...@@ -54,12 +54,10 @@ void mul_forward (
if (contiguousidx > 0) { if (contiguousidx > 0) {
stride_post0[contiguousidx - 1] = 1; stride_post0[contiguousidx - 1] = 1;
stride_post1[contiguousidx - 1] = 1; stride_post1[contiguousidx - 1] = 1;
#pragma omp parallel for
for (int i = contiguousidx -2; i != -1; --i) { for (int i = contiguousidx -2; i != -1; --i) {
stride_post0[i] = stride_post0[i+1]*ndim_a[i+1]; stride_post0[i] = stride_post0[i+1]*ndim_a[i+1];
stride_post1[i] = stride_post1[i+1]*ndim_b[i+1]; stride_post1[i] = stride_post1[i+1]*ndim_b[i+1];
} }
#pragma omp parallel for
for (int i = 0; i < contiguousidx ; ++i) { for (int i = 0; i < contiguousidx ; ++i) {
stride_step0[i] = (ndim_a[i] == 1) ? 1 - stride_post0[i] : 1; stride_step0[i] = (ndim_a[i] == 1) ? 1 - stride_post0[i] : 1;
stride_step1[i] = (ndim_b[i] == 1) ? 1 - stride_post1[i] : 1; stride_step1[i] = (ndim_b[i] == 1) ? 1 - stride_post1[i] : 1;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment