Skip to content
Snippets Groups Projects
Commit 472e2c62 authored by Olivier BICHLER's avatar Olivier BICHLER
Browse files

Optimized Fc Impl

parent c08a3fa7
No related branches found
No related tags found
2 merge requests!166Update 0.5.0 -> 0.6.0,!158Added OpenMP
...@@ -96,21 +96,16 @@ void FCImpl_cpu_forward_kernel(const DimSize_t batchSize, ...@@ -96,21 +96,16 @@ void FCImpl_cpu_forward_kernel(const DimSize_t batchSize,
const B* biases = static_cast<const B*>(biases_); const B* biases = static_cast<const B*>(biases_);
O* output = static_cast<O*>(output_); O* output = static_cast<O*>(output_);
if (biases == nullptr) { #ifdef _OPENMP
std::fill(output, output+(batchSize*outputFeatureSize), B(0)); #pragma omp parallel for collapse(2) if (batchSize * outputFeatureSize > 32)
} #endif
else { for (int batch = 0; batch < static_cast<int>(batchSize); ++batch) {
for (std::size_t batch = 0; batch < batchSize; ++batch) { for (int out = 0; out < static_cast<int>(outputFeatureSize); ++out) {
std::copy(biases, biases+outputFeatureSize, output+(batch*outputFeatureSize)); const auto biasVal = (biases) ? biases[out] : B(0);
}
}
for (std::size_t batch = 0; batch < batchSize; ++batch) {
for (std::size_t out = 0; out < outputFeatureSize; ++out) {
output[out + batch*outputFeatureSize] = std::inner_product(input + batch*inputFeatureSize, output[out + batch*outputFeatureSize] = std::inner_product(input + batch*inputFeatureSize,
input + (batch + 1)*inputFeatureSize, input + (batch + 1)*inputFeatureSize,
weights + out*inputFeatureSize, weights + out*inputFeatureSize,
output[out + batch*outputFeatureSize]); biasVal);
} }
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment