From e93fe6403d47529d087531899255dcf34c6e9c5c Mon Sep 17 00:00:00 2001
From: Olivier BICHLER <olivier.bichler@cea.fr>
Date: Sun, 30 Jun 2024 17:34:05 +0200
Subject: [PATCH] Initial commit

---
 include/aidge/backend/cpu.hpp                 |   1 +
 .../aidge/backend/cpu/operator/FoldImpl.hpp   |  50 +++++
 .../cpu/operator/FoldImpl_forward_kernels.hpp |  82 ++++++++
 src/operator/FoldImpl.cpp                     |  37 ++++
 unit_tests/operator/Test_FoldImpl.cpp         | 179 ++++++++++++++++++
 5 files changed, 349 insertions(+)
 create mode 100644 include/aidge/backend/cpu/operator/FoldImpl.hpp
 create mode 100644 include/aidge/backend/cpu/operator/FoldImpl_forward_kernels.hpp
 create mode 100644 src/operator/FoldImpl.cpp
 create mode 100644 unit_tests/operator/Test_FoldImpl.cpp

diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp
index 11f9c264..b81145de 100644
--- a/include/aidge/backend/cpu.hpp
+++ b/include/aidge/backend/cpu.hpp
@@ -21,6 +21,7 @@
 #include "aidge/backend/cpu/operator/DivImpl.hpp"
 #include "aidge/backend/cpu/operator/ErfImpl.hpp"
 #include "aidge/backend/cpu/operator/FCImpl.hpp"
+#include "aidge/backend/cpu/operator/FoldImpl.hpp"
 #include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp"
 #include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
 #include "aidge/backend/cpu/operator/MatMulImpl.hpp"
diff --git a/include/aidge/backend/cpu/operator/FoldImpl.hpp b/include/aidge/backend/cpu/operator/FoldImpl.hpp
new file mode 100644
index 00000000..b258745e
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/FoldImpl.hpp
@@ -0,0 +1,50 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_FOLDIMPL_H_
+#define AIDGE_CPU_OPERATOR_FOLDIMPL_H_
+
+#include <array>
+#include <memory>
+#include <tuple>
+#include <vector>
+
+#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/operator/Fold.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
+
+namespace Aidge {
+class FoldImpl2DForward_cpu
+    : public Registrable<FoldImpl2DForward_cpu,
+                         std::tuple<DataType, DataType>,
+                         void(const Fold_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
+                              void *)> {};
+
+class FoldImpl2D_cpu : public OperatorImpl {
+public:
+    FoldImpl2D_cpu(const Fold_Op<2> &op) : OperatorImpl(op, "cpu") {}
+
+    static std::unique_ptr<FoldImpl2D_cpu> create(const Fold_Op<2> &op) {
+        return std::make_unique<FoldImpl2D_cpu>(op);
+    }
+
+    void forward() override;
+};
+
+namespace {
+// add cpu backend to Fold_Op<2> implementation registry
+static Registrar<Fold_Op<2>> registrarFoldImpl2D_cpu("cpu", Aidge::FoldImpl2D_cpu::create);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_FOLDIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/FoldImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/FoldImpl_forward_kernels.hpp
new file mode 100644
index 00000000..5caad147
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/FoldImpl_forward_kernels.hpp
@@ -0,0 +1,82 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_FOLDIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_FOLDIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/FoldImpl.hpp"
+#include "aidge/utils/Types.h"
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
+#include <cmath>
+#include <array>
+#include <algorithm>
+
+namespace Aidge {
+template <class I, class O>
+void FoldImpl2D_cpu_forward_kernel(const Fold_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims,
+                                       const void *input_, void *output_)
+{
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
+
+    const auto kernelDims = std::get<3>(attrs);
+    const auto dilationDims = std::get<2>(attrs);
+    const auto strideDims = std::get<1>(attrs);
+    const DimSize_t inHeight = std::get<0>(attrs)[0];
+    const DimSize_t inWidth = std::get<0>(attrs)[1];
+
+    const DimSize_t kernelExtentHeight = dilationDims[0] *
+                                            (kernelDims[0] - 1) + 1;
+    const DimSize_t outHeight = 1 + static_cast<DimSize_t>(
+                    floor(static_cast<float>(inHeight - kernelExtentHeight) /
+                            static_cast<float>(strideDims[0])));
+    const DimSize_t kernelExtentWidth = dilationDims[1] *
+                                            (kernelDims[1] - 1) + 1;
+    const DimSize_t outWidth = 1 + static_cast<DimSize_t>(
+                    floor(static_cast<float>(inWidth - kernelExtentWidth) /
+                            static_cast<float>(strideDims[1])));
+    const DimSize_t outChannels = dims[1];
+
+    std::fill_n(output, outHeight * outWidth * outChannels, O(0));
+
+    for (DimSize_t outC = 0; outC < outChannels; ++outC) {
+        const auto inOffsetH = outC % kernelDims[0];
+        const auto inOffsetW = (outC / kernelDims[0]) % kernelDims[1];
+        const auto inC = outC / kernelDims[0] / kernelDims[1];
+
+        for (DimSize_t outH = 0; outH < outHeight; ++outH) {
+            const auto inH = outH * strideDims[1] + inOffsetH * dilationDims[1];
+
+            for (DimSize_t outW = 0; outW < outWidth; ++outW) {
+                const auto inW = outW * strideDims[0] + inOffsetW * dilationDims[0];
+
+                output[(inC * inHeight + inH) * inWidth + inW] += input[(outC * outHeight + outH) * outWidth + outW];
+            }
+        }
+    }
+}
+
+namespace {
+static Registrar<FoldImpl2DForward_cpu> registrarFoldImpl2DForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32},
+        Aidge::FoldImpl2D_cpu_forward_kernel<float, float>);
+static Registrar<FoldImpl2DForward_cpu> registrarFoldImpl2DForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32},
+        Aidge::FoldImpl2D_cpu_forward_kernel<int, int>);
+static Registrar<FoldImpl2DForward_cpu> registrarFoldImpl2DForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64},
+        Aidge::FoldImpl2D_cpu_forward_kernel<double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_FOLDIMPL_FORWARD_KERNEL_H_ */
diff --git a/src/operator/FoldImpl.cpp b/src/operator/FoldImpl.cpp
new file mode 100644
index 00000000..a885db4c
--- /dev/null
+++ b/src/operator/FoldImpl.cpp
@@ -0,0 +1,37 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <cassert>
+#include <chrono>  // std::chrono::milliseconds
+#include <numeric> // std::accumulate
+#include <thread>  // std::this_thread::sleep_for
+#include <vector>
+
+#include "aidge/utils/Types.h"
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
+#include "aidge/operator/Conv.hpp"
+
+#include "aidge/backend/cpu/operator/FoldImpl.hpp"
+#include "aidge/backend/cpu/operator/FoldImpl_forward_kernels.hpp"
+
+void Aidge::FoldImpl2D_cpu::forward() {
+    assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
+
+    // Find the correct kernel type
+    auto kernelFunc =
+            Registrar<FoldImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
+
+    // Call kernel
+    kernelFunc(dynamic_cast<const Fold_Op<2>&>(mOp).getStaticAttributes(),
+                        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(),
+                        getCPUPtr(mOp.getRawInput(0)),
+                        getCPUPtr(mOp.getRawOutput(0)));
+}
diff --git a/unit_tests/operator/Test_FoldImpl.cpp b/unit_tests/operator/Test_FoldImpl.cpp
new file mode 100644
index 00000000..079fca62
--- /dev/null
+++ b/unit_tests/operator/Test_FoldImpl.cpp
@@ -0,0 +1,179 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <catch2/catch_test_macros.hpp>
+#include <cstdlib>
+#include <memory>
+
+#include "aidge/data/Tensor.hpp"
+#include "aidge/graph/GraphView.hpp"
+#include "aidge/scheduler/SequentialScheduler.hpp"
+#include "aidge/operator/Fold.hpp"
+#include "aidge/operator/Unfold.hpp"
+#include "aidge/operator/MatMul.hpp"
+#include "aidge/operator/Reshape.hpp"
+
+#include "aidge/backend/cpu.hpp"
+
+using namespace Aidge;
+
+TEST_CASE("[cpu/operator] Fold(forward)", "[Fold][CPU]") {
+    std::shared_ptr<Node> myUnfold = Unfold({3,3}, "myunfold");
+    std::shared_ptr<Node> myReshape = Reshape({9, 12}, "myreshape");
+    std::shared_ptr<Node> myMatMul = MatMul("mymatmul");
+    std::shared_ptr<Node> myFold = Fold({3,3}, {3,3}, "myfold");
+    myUnfold->addChild(myMatMul, 0, 1);
+    myReshape->addChild(myMatMul, 0, 0);
+    myMatMul->addChild(myFold, 0, 0);
+
+    std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> {
+        {
+            {
+                {{  0,   1,   2},
+                {  3,   4,   5},
+                {  6,   7,   8}},
+                {{  9,  10,  11},
+                { 12,  13,  14},
+                { 15,  16,  17}},
+                {{ 18,  19,  20},
+                { 21,  22,  23},
+                { 24,  25,  26}}
+            },
+            {
+                {{ 27,  28,  29},
+                { 30,  31,  32},
+                { 33,  34,  35}},
+                {{ 36,  37,  38},
+                { 39,  40,  41},
+                { 42,  43,  44}},
+                {{ 45,  46,  47},
+                { 48,  49,  50},
+                { 51,  52,  53}}
+            },
+            {
+                {{ 54,  55,  56},
+                { 57,  58,  59},
+                { 60,  61,  62}},
+                {{ 63,  64,  65},
+                { 66,  67,  68},
+                { 69,  70,  71}},
+                {{ 72,  73,  74},
+                { 75,  76,  77},
+                { 78,  79,  80}}
+            },
+            {
+                {{ 81,  82,  83},
+                { 84,  85,  86},
+                { 87,  88,  89}},
+                {{ 90,  91,  92},
+                { 93,  94,  95},
+                { 96,  97,  98}},
+                {{ 99, 100, 101},
+                {102, 103, 104},
+                {105, 106, 107}}
+            }
+        }
+    });
+    std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
+        {
+            {
+                {{  0,   1,   2,   3,   4},
+                {  5,   6,   7,   8,   9},
+                { 10,  11,  12,  13,  14},
+                { 15,  16,  17,  18,  19},
+                { 20,  21,  22,  23,  24}},
+
+                {{ 25,  26,  27,  28,  29},
+                { 30,  31,  32,  33,  34},
+                { 35,  36,  37,  38,  39},
+                { 40,  41,  42,  43,  44},
+                { 45,  46,  47,  48,  49}},
+
+                {{ 50,  51,  52,  53,  54},
+                { 55,  56,  57,  58,  59},
+                { 60,  61,  62,  63,  64},
+                { 65,  66,  67,  68,  69},
+                { 70,  71,  72,  73,  74}}
+            },
+            {
+                {{ 75,  76,  77,  78,  79},
+                { 80,  81,  82,  83,  84},
+                { 85,  86,  87,  88,  89},
+                { 90,  91,  92,  93,  94},
+                { 95,  96,  97,  98,  99}},
+
+                {{100, 101, 102, 103, 104},
+                {105, 106, 107, 108, 109},
+                {110, 111, 112, 113, 114},
+                {115, 116, 117, 118, 119},
+                {120, 121, 122, 123, 124}},
+
+                {{125, 126, 127, 128, 129},
+                {130, 131, 132, 133, 134},
+                {135, 136, 137, 138, 139},
+                {140, 141, 142, 143, 144},
+                {145, 146, 147, 148, 149}}
+            }
+        }
+    });
+    std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> {
+        {
+            {
+                {{ 15226,  15577,  15928},
+                { 16981,  17332,  17683},
+                { 18736,  19087,  19438}},
+                {{ 37818,  38898,  39978},
+                { 43218,  44298,  45378},
+                { 48618,  49698,  50778}},
+                {{ 60426,  62235,  64044},
+                { 69471,  71280,  73089},
+                { 78516,  80325,  82134}},
+                {{ 83016,  85554,  88092},
+                { 95706,  98244, 100782},
+                {108396, 110934, 113472}}
+            },
+            {
+                {{ 41551,  41902,  42253},
+                { 43306,  43657,  44008},
+                { 45061,  45412,  45763}},
+                {{118818, 119898, 120978},
+                {124218, 125298, 126378},
+                {129618, 130698, 131778}},
+                {{196101, 197910, 199719},
+                {205146, 206955, 208764},
+                {214191, 216000, 217809}},
+                {{273366, 275904, 278442},
+                {286056, 288594, 291132},
+                {298746, 301284, 303822}}
+            }
+        }
+    });
+
+    auto opUnfold = std::static_pointer_cast<OperatorTensor>(myUnfold -> getOperator());
+    auto opReshape = std::static_pointer_cast<OperatorTensor>(myReshape -> getOperator());
+    auto opMatMul = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
+    auto opFold = std::static_pointer_cast<OperatorTensor>(myFold -> getOperator());
+    opUnfold->associateInput(0,myInput);
+    opReshape->associateInput(0,myWeights);
+
+    auto g = getConnectedGraphView(myMatMul);
+    g->setDataType(DataType::Int32);
+    g->setBackend("cpu");
+
+    g->save("unfold_matmul_fold");
+    g->forwardDims();
+    g->save("unfold_matmul_fold");
+
+    SequentialScheduler scheduler(g);
+    scheduler.forward();
+    // op->getOutput(0)->print();
+    REQUIRE(*(opFold->getOutput(0)) == *myOutput);
+}
\ No newline at end of file
-- 
GitLab