Working prototype

b30d0901 · Olivier BICHLER · 19402c50 · b30d0901 · b30d0901 · b30d0901
Commit b30d0901 authored 5 months ago by Olivier BICHLER
--- a/include/aidge/backend/cpu/data/TensorImpl.hpp
+++ b/include/aidge/backend/cpu/data/TensorImpl.hpp
@@ -119,34 +119,17 @@ private:
 template <typename T>
 const std::string TensorImpl_cpu<T>::Backend = "cpu";
-namespace {
+REGISTRAR(Tensor, {"cpu", DataType::Float64}, Aidge::TensorImpl_cpu<double>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_Float64(
+REGISTRAR(Tensor, {"cpu", DataType::Float32}, Aidge::TensorImpl_cpu<float>::create);
-        {"cpu", DataType::Float64}, Aidge::TensorImpl_cpu<double>::create);
+REGISTRAR(Tensor, {"cpu", DataType::Float16}, Aidge::TensorImpl_cpu<half_float::half>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_Float32(
+REGISTRAR(Tensor, {"cpu", DataType::Int64}, Aidge::TensorImpl_cpu<int64_t>::create);
-        {"cpu", DataType::Float32}, Aidge::TensorImpl_cpu<float>::create);
+REGISTRAR(Tensor, {"cpu", DataType::Int32}, Aidge::TensorImpl_cpu<int32_t>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_Float16(
+REGISTRAR(Tensor, {"cpu", DataType::Int16}, Aidge::TensorImpl_cpu<int16_t>::create);
-        {"cpu", DataType::Float16}, Aidge::TensorImpl_cpu<half_float::half>::create);
+REGISTRAR(Tensor, {"cpu", DataType::Int8}, Aidge::TensorImpl_cpu<int8_t>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_Int64(
+REGISTRAR(Tensor, {"cpu", DataType::UInt64}, Aidge::TensorImpl_cpu<uint64_t>::create);
-        {"cpu", DataType::Int64}, Aidge::TensorImpl_cpu<int64_t>::create);
+REGISTRAR(Tensor, {"cpu", DataType::UInt32}, Aidge::TensorImpl_cpu<uint32_t>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_UInt64(
+REGISTRAR(Tensor, {"cpu", DataType::UInt16}, Aidge::TensorImpl_cpu<uint16_t>::create);
-        {"cpu", DataType::UInt64}, Aidge::TensorImpl_cpu<uint64_t>::create);
+REGISTRAR(Tensor, {"cpu", DataType::UInt8}, Aidge::TensorImpl_cpu<uint8_t>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_Int32(
-        {"cpu", DataType::Int32}, Aidge::TensorImpl_cpu<int32_t>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_UInt32(
-        {"cpu", DataType::UInt32}, Aidge::TensorImpl_cpu<uint32_t>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_Int16(
-        {"cpu", DataType::Int16}, Aidge::TensorImpl_cpu<int16_t>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_Int8(
-        {"cpu", DataType::Int8}, Aidge::TensorImpl_cpu<int8_t>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_UInt64(
-        {"cpu", DataType::UInt64}, Aidge::TensorImpl_cpu<uint64_t>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_UInt32(
-        {"cpu", DataType::UInt32}, Aidge::TensorImpl_cpu<uint32_t>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_UInt16(
-        {"cpu", DataType::UInt16}, Aidge::TensorImpl_cpu<uint16_t>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_UInt8(
-        {"cpu", DataType::UInt8}, Aidge::TensorImpl_cpu<uint8_t>::create);
-}  // namespace
 }  // namespace Aidge
 #endif /* AIDGE_CPU_DATA_TENSORIMPL_H_ */
--- a/src/operator/MatMul.cpp
+++ b/src/operator/MatMul.cpp
@@ -71,7 +71,7 @@ bool Aidge::MatMul_Op::forwardDims(bool /*allowDataDependency*/) {
            std::vector<std::size_t> outDims = std::vector<std::size_t>(dims_size-2, 1);
            for (std::size_t i = 0; i < dims_size-2; ++i) {
-                AIDGE_ASSERT((dims0[i] == dims1[i]) || (dims0[i] == 1) || (dims1[i] == 1), "Bad vector dimension.");
+                AIDGE_ASSERT((dims0[i] == dims1[i]) || (dims0[i] == 1) || (dims1[i] == 1), "Bad dimension {}: {} != {} for input #0 {} and #1 {}.", i, dims0[i], dims1[i], dims0, dims1);
                outDims[i] = std::max(dims0[i], dims1[i]);
            }

--- a/src/recipes/MatMulTiling.cpp
+++ b/src/recipes/MatMulTiling.cpp
@@ -45,29 +45,29 @@ void Aidge::matMulTiling(NodePtr matMul, const std::vector<DimSize_t>& maxDims)
    const auto& outputMatDims = std::vector<std::size_t>(outputDims.end() - 2, outputDims.end());;
    if (outputMatDims[0] > maxDims[0]) {
-        const size_t axis = 0;
+        const std::int32_t axis = -2;
-        const auto splitIndex = outputMatDims[axis] / 2;
+        const std::int64_t splitIndex = maxDims[0];
        auto identity0 = Identity();
        auto slice00 = Slice();
-        auto slice00_starts = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{0, 0}}), "", true);
+        auto slice00_starts = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{0, 0}}), "", true);
        slice00_starts->addChild(slice00, 0, 1);
-        auto slice00_ends = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{splitIndex, input0Dims[1]}}), "", true);
+        auto slice00_ends = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{splitIndex, static_cast<std::int64_t>(input0Dims.end()[-1])}}), "", true);
        slice00_ends->addChild(slice00, 0, 2);
-        auto slice00_axes = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{0, 1}}), "", true);
+        auto slice00_axes = Producer(std::make_shared<Tensor>(Vector<std::int8_t>{{-2, -1}}), "", true);
        slice00_axes->addChild(slice00, 0, 3);
-        auto slice00_steps = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{1, 1}}), "", true);
+        auto slice00_steps = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{1, 1}}), "", true);
        slice00_steps->addChild(slice00, 0, 4);
        auto matMul00 = MatMul();
        auto identity1 = Identity();
        auto slice01 = Slice();
-        auto slice01_starts = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{splitIndex, 0}}), "", true);
+        auto slice01_starts = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{splitIndex, 0}}), "", true);
        slice01_starts->addChild(slice01, 0, 1);
-        auto slice01_ends = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{input0Dims[0], input0Dims[1]}}), "", true);
+        auto slice01_ends = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{static_cast<std::int64_t>(input0Dims.end()[-2]), static_cast<std::int64_t>(input0Dims.end()[-1])}}), "", true);
        slice01_ends->addChild(slice01, 0, 2);
-        auto slice01_axes = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{0, 1}}), "", true);
+        auto slice01_axes = Producer(std::make_shared<Tensor>(Vector<std::int8_t>{{-2, -1}}), "", true);
        slice01_axes->addChild(slice01, 0, 3);
-        auto slice01_steps = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{1, 1}}), "", true);
+        auto slice01_steps = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{1, 1}}), "", true);
        slice01_steps->addChild(slice01, 0, 4);
        auto matMul01 = MatMul();
        auto concat0 = Concat(2, axis);
@@ -85,17 +85,17 @@ void Aidge::matMulTiling(NodePtr matMul, const std::vector<DimSize_t>& maxDims)
        gMatMul->add({matMul});
        auto g = std::make_shared<GraphView>();
-        g->add({identity0, identity1});
+        g->add({identity0});
+        g->add({identity1});
        g->add({slice00, slice00_starts, slice00_ends, slice00_axes, slice00_steps, matMul00, matMul01, slice01, slice01_starts, slice01_ends, slice01_axes, slice01_steps, concat0});
-        g->save("micrograph");
        auto replaced = GraphView::replace(gMatMul, g);
        if (replaced) {
            g->forwardDims({}, true);
+            g->save("micrograph");
            // Recursive tiling
-            matMulTiling(matMul00, maxDims);
            matMulTiling(matMul01, maxDims);
        }
        else {

--- a/unit_tests/recipes/Test_MatMulTiling.cpp
+++ b/unit_tests/recipes/Test_MatMulTiling.cpp
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-#include <catch2/catch_test_macros.hpp>
-#include "aidge/recipes/Recipes.hpp"
-#include "aidge/operator/MatMul.hpp"
-#include "aidge/operator/AvgPooling.hpp"
-#include "aidge/operator/MaxPooling.hpp"
-#include "aidge/operator/GenericOperator.hpp"
-#include "aidge/operator/Producer.hpp"
-#include "aidge/graph/OpArgs.hpp"
-#include <cstddef>
-using namespace Aidge;
-TEST_CASE("[MatMulTiling]") {
-    auto dataProvider = Producer({16, 3, 224, 224}, "dataProvider");
-    auto w1 = Producer({16, 3, 224, 224}, "w1");
-    auto matmul1 = MatMul("matmul1");
-    auto w2 = Producer({16, 3, 224, 224}, "w1");
-    auto matmul2 = MatMul("matmul2");
-    auto w3 = Producer({16, 3, 224, 224}, "w1");
-    auto matmul3 = MatMul("matmul3");
-    dataProvider->addChild(matmul1, 0, 0);
-    w1->addChild(matmul1, 0, 1);
-    matmul1->addChild(matmul2, 0, 0);
-    w2->addChild(matmul2, 0, 1);
-    matmul2->addChild(matmul3, 0, 0);
-    w3->addChild(matmul3, 0, 1);
-    auto g1 = getConnectedGraphView(matmul1);
-    g1->forwardDims();
-    g1->save("MatMulSplitting_graph");
-    matMulTiling(matmul1, {16, 16});
-    g1->save("MatMulSplitting_graph_split");
-}