Added OpenMP for Resize and TopK operators

a6373db1 · Olivier BICHLER · 7d8a52b4 · a6373db1 · a6373db1 · a6373db1
Commit a6373db1 authored 2 months ago by Olivier BICHLER
--- a/include/aidge/backend/cpu/operator/ResizeImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ResizeImpl_kernels.hpp
@@ -50,12 +50,13 @@ void ResizeImpl_cpu_forward_kernel(
                                          outputDims.cend(),
                                          1,
                                          std::multiplies<DimSize_t>());
-    std::vector<float> coordInApprox(inputDims.size());
-    std::vector<std::size_t> coordIn(inputDims.size());
+#ifdef _OPENMP
-    std::vector<DimSize_t> coordOut;
+    #pragma omp parallel for if (outputLen >= 16)
+#endif
    for (DimSize_t idxFlatOut = 0; idxFlatOut < outputLen; ++idxFlatOut) {
-        coordOut = Tensor::toCoord(outputDims, idxFlatOut);
+        const auto coordOut = Tensor::toCoord(outputDims, idxFlatOut);
-        coordInApprox =
+        auto coordInApprox =
            Interpolation::untransformCoordinates(coordOut,
                                                  inputDims,
                                                  outputDims,
@@ -72,6 +73,7 @@ void ResizeImpl_cpu_forward_kernel(
                    coordInApprox[i] = std::ceil(coordInApprox[i] - 0.5f);
                }
            }
+            std::vector<std::size_t> coordIn(inputDims.size());
            if (Tensor::isInBounds<float>(inputDims, coordInApprox)) {
                for (std::size_t i = 0; i < coordInApprox.size(); ++i) {
                    coordIn[i] = static_cast<std::size_t>(coordInApprox[i]);

--- a/include/aidge/backend/cpu/operator/TopKImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/TopKImpl_kernels.hpp
@@ -47,6 +47,9 @@ void TopKImpl_cpu_forward_kernel(int64_t axis,
    const std::size_t dim_i = inputDims[axis];
    std::vector<std::pair<I, int64_t>> buffer(dim_i);
+#ifdef _OPENMP
+    #pragma omp parallel for collapse(2) if (stride_pre * stride_post >= 16)
+#endif
    for (std::size_t pre = 0; pre < stride_pre; ++pre) {
        for (std::size_t post = 0; post < stride_post; ++post) {
            const std::size_t idx_i = pre * dim_i * stride_post + post;

--- a/src/data/Interpolation.cpp
+++ b/src/data/Interpolation.cpp
@@ -79,10 +79,10 @@ InterpolationCPU::linearRecurse(const std::vector<float> &coordToInterpolate,
            pointsCoords,
            alongDim);
    }
-    Log::debug("\nEntering linear recurse with {} points.", points.size());
+    //Log::debug("\nEntering linear recurse with {} points.", points.size());
-    Log::debug("Points : {}", extractPtCoords(points));
+    //Log::debug("Points : {}", extractPtCoords(points));
-    Log::debug("coordsToInterpolate : {}", coordToInterpolate);
+    //Log::debug("coordsToInterpolate : {}", coordToInterpolate);
-    Log::debug("alongDim : {}", alongDim);
+    //Log::debug("alongDim : {}", alongDim);
    ///////////////////
    // COMPUTATION
@@ -98,9 +98,9 @@ InterpolationCPU::linearRecurse(const std::vector<float> &coordToInterpolate,
            upperPoints.insert(point);
        }
    }
-    Log::debug("alongDim : {}", alongDim);
+    //Log::debug("alongDim : {}", alongDim);
-    Log::debug("lowerPoints : {}", extractPtCoords(lowerPoints));
+    //Log::debug("lowerPoints : {}", extractPtCoords(lowerPoints));
-    Log::debug("upperPoints : {}", extractPtCoords(upperPoints));
+    //Log::debug("upperPoints : {}", extractPtCoords(upperPoints));
    // Here are 3 cases
    // 1. upper/lowerPoints.size() == 0
@@ -174,7 +174,7 @@ InterpolationCPU::linearRecurse(const std::vector<float> &coordToInterpolate,
    // 0 is just a sanity check to ensure later that all dims have been
    // interpolate
    interpolatedPoint.first[alongDim] = 0;
-    Log::debug("successfully returned from alongDim : {}", alongDim);
+    //Log::debug("successfully returned from alongDim : {}", alongDim);
    return std::set<Point<T>>({interpolatedPoint});
 }