From 472e2c62c9ba5c76f1dc6bbe55686d85976aa28f Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Mon, 7 Apr 2025 14:23:28 +0200 Subject: [PATCH] Optimized Fc Impl --- .../backend/cpu/operator/FCImpl_kernels.hpp | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/include/aidge/backend/cpu/operator/FCImpl_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_kernels.hpp index b77f749f..ca4d5def 100644 --- a/include/aidge/backend/cpu/operator/FCImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl_kernels.hpp @@ -96,21 +96,16 @@ void FCImpl_cpu_forward_kernel(const DimSize_t batchSize, const B* biases = static_cast<const B*>(biases_); O* output = static_cast<O*>(output_); - if (biases == nullptr) { - std::fill(output, output+(batchSize*outputFeatureSize), B(0)); - } - else { - for (std::size_t batch = 0; batch < batchSize; ++batch) { - std::copy(biases, biases+outputFeatureSize, output+(batch*outputFeatureSize)); - } - } - - for (std::size_t batch = 0; batch < batchSize; ++batch) { - for (std::size_t out = 0; out < outputFeatureSize; ++out) { +#ifdef _OPENMP + #pragma omp parallel for collapse(2) if (batchSize * outputFeatureSize > 32) +#endif + for (int batch = 0; batch < static_cast<int>(batchSize); ++batch) { + for (int out = 0; out < static_cast<int>(outputFeatureSize); ++out) { + const auto biasVal = (biases) ? biases[out] : B(0); output[out + batch*outputFeatureSize] = std::inner_product(input + batch*inputFeatureSize, input + (batch + 1)*inputFeatureSize, weights + out*inputFeatureSize, - output[out + batch*outputFeatureSize]); + biasVal); } } } -- GitLab