diff --git a/include/aidge/backend/cpu/operator/FCImpl_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_kernels.hpp index b77f749f9d81af7ab2b94d078eca941218b3cd6f..ca4d5def783a2fae87ee55ae0b5007c795c8b599 100644 --- a/include/aidge/backend/cpu/operator/FCImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl_kernels.hpp @@ -96,21 +96,16 @@ void FCImpl_cpu_forward_kernel(const DimSize_t batchSize, const B* biases = static_cast<const B*>(biases_); O* output = static_cast<O*>(output_); - if (biases == nullptr) { - std::fill(output, output+(batchSize*outputFeatureSize), B(0)); - } - else { - for (std::size_t batch = 0; batch < batchSize; ++batch) { - std::copy(biases, biases+outputFeatureSize, output+(batch*outputFeatureSize)); - } - } - - for (std::size_t batch = 0; batch < batchSize; ++batch) { - for (std::size_t out = 0; out < outputFeatureSize; ++out) { +#ifdef _OPENMP + #pragma omp parallel for collapse(2) if (batchSize * outputFeatureSize > 32) +#endif + for (int batch = 0; batch < static_cast<int>(batchSize); ++batch) { + for (int out = 0; out < static_cast<int>(outputFeatureSize); ++out) { + const auto biasVal = (biases) ? biases[out] : B(0); output[out + batch*outputFeatureSize] = std::inner_product(input + batch*inputFeatureSize, input + (batch + 1)*inputFeatureSize, weights + out*inputFeatureSize, - output[out + batch*outputFeatureSize]); + biasVal); } } }