From b30d090173956f60b4dd3fe4a6df12d7bdc457e5 Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Thu, 7 Nov 2024 16:01:55 +0100 Subject: [PATCH] Working prototype --- include/aidge/backend/cpu/data/TensorImpl.hpp | 39 +++++---------- src/operator/MatMul.cpp | 2 +- src/recipes/MatMulTiling.cpp | 26 +++++----- unit_tests/recipes/Test_MatMulTiling.cpp | 48 ------------------- 4 files changed, 25 insertions(+), 90 deletions(-) delete mode 100644 unit_tests/recipes/Test_MatMulTiling.cpp diff --git a/include/aidge/backend/cpu/data/TensorImpl.hpp b/include/aidge/backend/cpu/data/TensorImpl.hpp index 234bd0ab7..fd2a0b3f4 100644 --- a/include/aidge/backend/cpu/data/TensorImpl.hpp +++ b/include/aidge/backend/cpu/data/TensorImpl.hpp @@ -119,34 +119,17 @@ private: template <typename T> const std::string TensorImpl_cpu<T>::Backend = "cpu"; -namespace { -static Registrar<Tensor> registrarTensorImpl_cpu_Float64( - {"cpu", DataType::Float64}, Aidge::TensorImpl_cpu<double>::create); -static Registrar<Tensor> registrarTensorImpl_cpu_Float32( - {"cpu", DataType::Float32}, Aidge::TensorImpl_cpu<float>::create); -static Registrar<Tensor> registrarTensorImpl_cpu_Float16( - {"cpu", DataType::Float16}, Aidge::TensorImpl_cpu<half_float::half>::create); -static Registrar<Tensor> registrarTensorImpl_cpu_Int64( - {"cpu", DataType::Int64}, Aidge::TensorImpl_cpu<int64_t>::create); -static Registrar<Tensor> registrarTensorImpl_cpu_UInt64( - {"cpu", DataType::UInt64}, Aidge::TensorImpl_cpu<uint64_t>::create); -static Registrar<Tensor> registrarTensorImpl_cpu_Int32( - {"cpu", DataType::Int32}, Aidge::TensorImpl_cpu<int32_t>::create); -static Registrar<Tensor> registrarTensorImpl_cpu_UInt32( - {"cpu", DataType::UInt32}, Aidge::TensorImpl_cpu<uint32_t>::create); -static Registrar<Tensor> registrarTensorImpl_cpu_Int16( - {"cpu", DataType::Int16}, Aidge::TensorImpl_cpu<int16_t>::create); -static Registrar<Tensor> registrarTensorImpl_cpu_Int8( - {"cpu", DataType::Int8}, Aidge::TensorImpl_cpu<int8_t>::create); -static Registrar<Tensor> registrarTensorImpl_cpu_UInt64( - {"cpu", DataType::UInt64}, Aidge::TensorImpl_cpu<uint64_t>::create); -static Registrar<Tensor> registrarTensorImpl_cpu_UInt32( - {"cpu", DataType::UInt32}, Aidge::TensorImpl_cpu<uint32_t>::create); -static Registrar<Tensor> registrarTensorImpl_cpu_UInt16( - {"cpu", DataType::UInt16}, Aidge::TensorImpl_cpu<uint16_t>::create); -static Registrar<Tensor> registrarTensorImpl_cpu_UInt8( - {"cpu", DataType::UInt8}, Aidge::TensorImpl_cpu<uint8_t>::create); -} // namespace +REGISTRAR(Tensor, {"cpu", DataType::Float64}, Aidge::TensorImpl_cpu<double>::create); +REGISTRAR(Tensor, {"cpu", DataType::Float32}, Aidge::TensorImpl_cpu<float>::create); +REGISTRAR(Tensor, {"cpu", DataType::Float16}, Aidge::TensorImpl_cpu<half_float::half>::create); +REGISTRAR(Tensor, {"cpu", DataType::Int64}, Aidge::TensorImpl_cpu<int64_t>::create); +REGISTRAR(Tensor, {"cpu", DataType::Int32}, Aidge::TensorImpl_cpu<int32_t>::create); +REGISTRAR(Tensor, {"cpu", DataType::Int16}, Aidge::TensorImpl_cpu<int16_t>::create); +REGISTRAR(Tensor, {"cpu", DataType::Int8}, Aidge::TensorImpl_cpu<int8_t>::create); +REGISTRAR(Tensor, {"cpu", DataType::UInt64}, Aidge::TensorImpl_cpu<uint64_t>::create); +REGISTRAR(Tensor, {"cpu", DataType::UInt32}, Aidge::TensorImpl_cpu<uint32_t>::create); +REGISTRAR(Tensor, {"cpu", DataType::UInt16}, Aidge::TensorImpl_cpu<uint16_t>::create); +REGISTRAR(Tensor, {"cpu", DataType::UInt8}, Aidge::TensorImpl_cpu<uint8_t>::create); } // namespace Aidge #endif /* AIDGE_CPU_DATA_TENSORIMPL_H_ */ diff --git a/src/operator/MatMul.cpp b/src/operator/MatMul.cpp index 668ffd04b..8fd2aa068 100644 --- a/src/operator/MatMul.cpp +++ b/src/operator/MatMul.cpp @@ -71,7 +71,7 @@ bool Aidge::MatMul_Op::forwardDims(bool /*allowDataDependency*/) { std::vector<std::size_t> outDims = std::vector<std::size_t>(dims_size-2, 1); for (std::size_t i = 0; i < dims_size-2; ++i) { - AIDGE_ASSERT((dims0[i] == dims1[i]) || (dims0[i] == 1) || (dims1[i] == 1), "Bad vector dimension."); + AIDGE_ASSERT((dims0[i] == dims1[i]) || (dims0[i] == 1) || (dims1[i] == 1), "Bad dimension {}: {} != {} for input #0 {} and #1 {}.", i, dims0[i], dims1[i], dims0, dims1); outDims[i] = std::max(dims0[i], dims1[i]); } diff --git a/src/recipes/MatMulTiling.cpp b/src/recipes/MatMulTiling.cpp index b3a3c18d4..1e6858dff 100644 --- a/src/recipes/MatMulTiling.cpp +++ b/src/recipes/MatMulTiling.cpp @@ -45,29 +45,29 @@ void Aidge::matMulTiling(NodePtr matMul, const std::vector<DimSize_t>& maxDims) const auto& outputMatDims = std::vector<std::size_t>(outputDims.end() - 2, outputDims.end());; if (outputMatDims[0] > maxDims[0]) { - const size_t axis = 0; - const auto splitIndex = outputMatDims[axis] / 2; + const std::int32_t axis = -2; + const std::int64_t splitIndex = maxDims[0]; auto identity0 = Identity(); auto slice00 = Slice(); - auto slice00_starts = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{0, 0}}), "", true); + auto slice00_starts = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{0, 0}}), "", true); slice00_starts->addChild(slice00, 0, 1); - auto slice00_ends = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{splitIndex, input0Dims[1]}}), "", true); + auto slice00_ends = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{splitIndex, static_cast<std::int64_t>(input0Dims.end()[-1])}}), "", true); slice00_ends->addChild(slice00, 0, 2); - auto slice00_axes = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{0, 1}}), "", true); + auto slice00_axes = Producer(std::make_shared<Tensor>(Vector<std::int8_t>{{-2, -1}}), "", true); slice00_axes->addChild(slice00, 0, 3); - auto slice00_steps = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{1, 1}}), "", true); + auto slice00_steps = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{1, 1}}), "", true); slice00_steps->addChild(slice00, 0, 4); auto matMul00 = MatMul(); auto identity1 = Identity(); auto slice01 = Slice(); - auto slice01_starts = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{splitIndex, 0}}), "", true); + auto slice01_starts = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{splitIndex, 0}}), "", true); slice01_starts->addChild(slice01, 0, 1); - auto slice01_ends = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{input0Dims[0], input0Dims[1]}}), "", true); + auto slice01_ends = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{static_cast<std::int64_t>(input0Dims.end()[-2]), static_cast<std::int64_t>(input0Dims.end()[-1])}}), "", true); slice01_ends->addChild(slice01, 0, 2); - auto slice01_axes = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{0, 1}}), "", true); + auto slice01_axes = Producer(std::make_shared<Tensor>(Vector<std::int8_t>{{-2, -1}}), "", true); slice01_axes->addChild(slice01, 0, 3); - auto slice01_steps = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{1, 1}}), "", true); + auto slice01_steps = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{1, 1}}), "", true); slice01_steps->addChild(slice01, 0, 4); auto matMul01 = MatMul(); auto concat0 = Concat(2, axis); @@ -85,17 +85,17 @@ void Aidge::matMulTiling(NodePtr matMul, const std::vector<DimSize_t>& maxDims) gMatMul->add({matMul}); auto g = std::make_shared<GraphView>(); - g->add({identity0, identity1}); + g->add({identity0}); + g->add({identity1}); g->add({slice00, slice00_starts, slice00_ends, slice00_axes, slice00_steps, matMul00, matMul01, slice01, slice01_starts, slice01_ends, slice01_axes, slice01_steps, concat0}); - g->save("micrograph"); auto replaced = GraphView::replace(gMatMul, g); if (replaced) { g->forwardDims({}, true); + g->save("micrograph"); // Recursive tiling - matMulTiling(matMul00, maxDims); matMulTiling(matMul01, maxDims); } else { diff --git a/unit_tests/recipes/Test_MatMulTiling.cpp b/unit_tests/recipes/Test_MatMulTiling.cpp deleted file mode 100644 index 1f98af94b..000000000 --- a/unit_tests/recipes/Test_MatMulTiling.cpp +++ /dev/null @@ -1,48 +0,0 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#include <catch2/catch_test_macros.hpp> - -#include "aidge/recipes/Recipes.hpp" -#include "aidge/operator/MatMul.hpp" -#include "aidge/operator/AvgPooling.hpp" -#include "aidge/operator/MaxPooling.hpp" -#include "aidge/operator/GenericOperator.hpp" -#include "aidge/operator/Producer.hpp" -#include "aidge/graph/OpArgs.hpp" -#include <cstddef> - -using namespace Aidge; - -TEST_CASE("[MatMulTiling]") { - auto dataProvider = Producer({16, 3, 224, 224}, "dataProvider"); - auto w1 = Producer({16, 3, 224, 224}, "w1"); - auto matmul1 = MatMul("matmul1"); - auto w2 = Producer({16, 3, 224, 224}, "w1"); - auto matmul2 = MatMul("matmul2"); - auto w3 = Producer({16, 3, 224, 224}, "w1"); - auto matmul3 = MatMul("matmul3"); - - dataProvider->addChild(matmul1, 0, 0); - w1->addChild(matmul1, 0, 1); - matmul1->addChild(matmul2, 0, 0); - w2->addChild(matmul2, 0, 1); - matmul2->addChild(matmul3, 0, 0); - w3->addChild(matmul3, 0, 1); - - auto g1 = getConnectedGraphView(matmul1); - g1->forwardDims(); - g1->save("MatMulSplitting_graph"); - - matMulTiling(matmul1, {16, 16}); - - g1->save("MatMulSplitting_graph_split"); -} -- GitLab