From b30d090173956f60b4dd3fe4a6df12d7bdc457e5 Mon Sep 17 00:00:00 2001
From: Olivier BICHLER <olivier.bichler@cea.fr>
Date: Thu, 7 Nov 2024 16:01:55 +0100
Subject: [PATCH] Working prototype

---
 include/aidge/backend/cpu/data/TensorImpl.hpp | 39 +++++----------
 src/operator/MatMul.cpp                       |  2 +-
 src/recipes/MatMulTiling.cpp                  | 26 +++++-----
 unit_tests/recipes/Test_MatMulTiling.cpp      | 48 -------------------
 4 files changed, 25 insertions(+), 90 deletions(-)
 delete mode 100644 unit_tests/recipes/Test_MatMulTiling.cpp

diff --git a/include/aidge/backend/cpu/data/TensorImpl.hpp b/include/aidge/backend/cpu/data/TensorImpl.hpp
index 234bd0ab7..fd2a0b3f4 100644
--- a/include/aidge/backend/cpu/data/TensorImpl.hpp
+++ b/include/aidge/backend/cpu/data/TensorImpl.hpp
@@ -119,34 +119,17 @@ private:
 template <typename T>
 const std::string TensorImpl_cpu<T>::Backend = "cpu";
 
-namespace {
-static Registrar<Tensor> registrarTensorImpl_cpu_Float64(
-        {"cpu", DataType::Float64}, Aidge::TensorImpl_cpu<double>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_Float32(
-        {"cpu", DataType::Float32}, Aidge::TensorImpl_cpu<float>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_Float16(
-        {"cpu", DataType::Float16}, Aidge::TensorImpl_cpu<half_float::half>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_Int64(
-        {"cpu", DataType::Int64}, Aidge::TensorImpl_cpu<int64_t>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_UInt64(
-        {"cpu", DataType::UInt64}, Aidge::TensorImpl_cpu<uint64_t>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_Int32(
-        {"cpu", DataType::Int32}, Aidge::TensorImpl_cpu<int32_t>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_UInt32(
-        {"cpu", DataType::UInt32}, Aidge::TensorImpl_cpu<uint32_t>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_Int16(
-        {"cpu", DataType::Int16}, Aidge::TensorImpl_cpu<int16_t>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_Int8(
-        {"cpu", DataType::Int8}, Aidge::TensorImpl_cpu<int8_t>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_UInt64(
-        {"cpu", DataType::UInt64}, Aidge::TensorImpl_cpu<uint64_t>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_UInt32(
-        {"cpu", DataType::UInt32}, Aidge::TensorImpl_cpu<uint32_t>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_UInt16(
-        {"cpu", DataType::UInt16}, Aidge::TensorImpl_cpu<uint16_t>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_UInt8(
-        {"cpu", DataType::UInt8}, Aidge::TensorImpl_cpu<uint8_t>::create);
-}  // namespace
+REGISTRAR(Tensor, {"cpu", DataType::Float64}, Aidge::TensorImpl_cpu<double>::create);
+REGISTRAR(Tensor, {"cpu", DataType::Float32}, Aidge::TensorImpl_cpu<float>::create);
+REGISTRAR(Tensor, {"cpu", DataType::Float16}, Aidge::TensorImpl_cpu<half_float::half>::create);
+REGISTRAR(Tensor, {"cpu", DataType::Int64}, Aidge::TensorImpl_cpu<int64_t>::create);
+REGISTRAR(Tensor, {"cpu", DataType::Int32}, Aidge::TensorImpl_cpu<int32_t>::create);
+REGISTRAR(Tensor, {"cpu", DataType::Int16}, Aidge::TensorImpl_cpu<int16_t>::create);
+REGISTRAR(Tensor, {"cpu", DataType::Int8}, Aidge::TensorImpl_cpu<int8_t>::create);
+REGISTRAR(Tensor, {"cpu", DataType::UInt64}, Aidge::TensorImpl_cpu<uint64_t>::create);
+REGISTRAR(Tensor, {"cpu", DataType::UInt32}, Aidge::TensorImpl_cpu<uint32_t>::create);
+REGISTRAR(Tensor, {"cpu", DataType::UInt16}, Aidge::TensorImpl_cpu<uint16_t>::create);
+REGISTRAR(Tensor, {"cpu", DataType::UInt8}, Aidge::TensorImpl_cpu<uint8_t>::create);
 }  // namespace Aidge
 
 #endif /* AIDGE_CPU_DATA_TENSORIMPL_H_ */
diff --git a/src/operator/MatMul.cpp b/src/operator/MatMul.cpp
index 668ffd04b..8fd2aa068 100644
--- a/src/operator/MatMul.cpp
+++ b/src/operator/MatMul.cpp
@@ -71,7 +71,7 @@ bool Aidge::MatMul_Op::forwardDims(bool /*allowDataDependency*/) {
 
             std::vector<std::size_t> outDims = std::vector<std::size_t>(dims_size-2, 1);
             for (std::size_t i = 0; i < dims_size-2; ++i) {
-                AIDGE_ASSERT((dims0[i] == dims1[i]) || (dims0[i] == 1) || (dims1[i] == 1), "Bad vector dimension.");
+                AIDGE_ASSERT((dims0[i] == dims1[i]) || (dims0[i] == 1) || (dims1[i] == 1), "Bad dimension {}: {} != {} for input #0 {} and #1 {}.", i, dims0[i], dims1[i], dims0, dims1);
                 outDims[i] = std::max(dims0[i], dims1[i]);
             }
 
diff --git a/src/recipes/MatMulTiling.cpp b/src/recipes/MatMulTiling.cpp
index b3a3c18d4..1e6858dff 100644
--- a/src/recipes/MatMulTiling.cpp
+++ b/src/recipes/MatMulTiling.cpp
@@ -45,29 +45,29 @@ void Aidge::matMulTiling(NodePtr matMul, const std::vector<DimSize_t>& maxDims)
     const auto& outputMatDims = std::vector<std::size_t>(outputDims.end() - 2, outputDims.end());;
 
     if (outputMatDims[0] > maxDims[0]) {
-        const size_t axis = 0;
-        const auto splitIndex = outputMatDims[axis] / 2;
+        const std::int32_t axis = -2;
+        const std::int64_t splitIndex = maxDims[0];
 
         auto identity0 = Identity();
         auto slice00 = Slice();
-        auto slice00_starts = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{0, 0}}), "", true);
+        auto slice00_starts = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{0, 0}}), "", true);
         slice00_starts->addChild(slice00, 0, 1);
-        auto slice00_ends = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{splitIndex, input0Dims[1]}}), "", true);
+        auto slice00_ends = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{splitIndex, static_cast<std::int64_t>(input0Dims.end()[-1])}}), "", true);
         slice00_ends->addChild(slice00, 0, 2);
-        auto slice00_axes = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{0, 1}}), "", true);
+        auto slice00_axes = Producer(std::make_shared<Tensor>(Vector<std::int8_t>{{-2, -1}}), "", true);
         slice00_axes->addChild(slice00, 0, 3);
-        auto slice00_steps = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{1, 1}}), "", true);
+        auto slice00_steps = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{1, 1}}), "", true);
         slice00_steps->addChild(slice00, 0, 4);
         auto matMul00 = MatMul();
         auto identity1 = Identity();
         auto slice01 = Slice();
-        auto slice01_starts = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{splitIndex, 0}}), "", true);
+        auto slice01_starts = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{splitIndex, 0}}), "", true);
         slice01_starts->addChild(slice01, 0, 1);
-        auto slice01_ends = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{input0Dims[0], input0Dims[1]}}), "", true);
+        auto slice01_ends = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{static_cast<std::int64_t>(input0Dims.end()[-2]), static_cast<std::int64_t>(input0Dims.end()[-1])}}), "", true);
         slice01_ends->addChild(slice01, 0, 2);
-        auto slice01_axes = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{0, 1}}), "", true);
+        auto slice01_axes = Producer(std::make_shared<Tensor>(Vector<std::int8_t>{{-2, -1}}), "", true);
         slice01_axes->addChild(slice01, 0, 3);
-        auto slice01_steps = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{1, 1}}), "", true);
+        auto slice01_steps = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{1, 1}}), "", true);
         slice01_steps->addChild(slice01, 0, 4);
         auto matMul01 = MatMul();
         auto concat0 = Concat(2, axis);
@@ -85,17 +85,17 @@ void Aidge::matMulTiling(NodePtr matMul, const std::vector<DimSize_t>& maxDims)
         gMatMul->add({matMul});
 
         auto g = std::make_shared<GraphView>();
-        g->add({identity0, identity1});
+        g->add({identity0});
+        g->add({identity1});
         g->add({slice00, slice00_starts, slice00_ends, slice00_axes, slice00_steps, matMul00, matMul01, slice01, slice01_starts, slice01_ends, slice01_axes, slice01_steps, concat0});
-        g->save("micrograph");
 
         auto replaced = GraphView::replace(gMatMul, g);
 
         if (replaced) {
             g->forwardDims({}, true);
+            g->save("micrograph");
 
             // Recursive tiling
-            matMulTiling(matMul00, maxDims);
             matMulTiling(matMul01, maxDims);
         }
         else {
diff --git a/unit_tests/recipes/Test_MatMulTiling.cpp b/unit_tests/recipes/Test_MatMulTiling.cpp
deleted file mode 100644
index 1f98af94b..000000000
--- a/unit_tests/recipes/Test_MatMulTiling.cpp
+++ /dev/null
@@ -1,48 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#include <catch2/catch_test_macros.hpp>
-
-#include "aidge/recipes/Recipes.hpp"
-#include "aidge/operator/MatMul.hpp"
-#include "aidge/operator/AvgPooling.hpp"
-#include "aidge/operator/MaxPooling.hpp"
-#include "aidge/operator/GenericOperator.hpp"
-#include "aidge/operator/Producer.hpp"
-#include "aidge/graph/OpArgs.hpp"
-#include <cstddef>
-
-using namespace Aidge;
-
-TEST_CASE("[MatMulTiling]") {
-    auto dataProvider = Producer({16, 3, 224, 224}, "dataProvider");
-    auto w1 = Producer({16, 3, 224, 224}, "w1");
-    auto matmul1 = MatMul("matmul1");
-    auto w2 = Producer({16, 3, 224, 224}, "w1");
-    auto matmul2 = MatMul("matmul2");
-    auto w3 = Producer({16, 3, 224, 224}, "w1");
-    auto matmul3 = MatMul("matmul3");
-
-    dataProvider->addChild(matmul1, 0, 0);
-    w1->addChild(matmul1, 0, 1);
-    matmul1->addChild(matmul2, 0, 0);
-    w2->addChild(matmul2, 0, 1);
-    matmul2->addChild(matmul3, 0, 0);
-    w3->addChild(matmul3, 0, 1);
-
-    auto g1 = getConnectedGraphView(matmul1);
-    g1->forwardDims();
-    g1->save("MatMulSplitting_graph");
-
-    matMulTiling(matmul1, {16, 16});
-    
-    g1->save("MatMulSplitting_graph_split");
-}
-- 
GitLab