diff --git a/include/aidge/backend/cpu/data/TensorImpl.hpp b/include/aidge/backend/cpu/data/TensorImpl.hpp index e9ad19291cc3907b3ef4a92370a3bafcabf37545..6434d23f1266bba5d4e7ae890dca2c2577e403b5 100644 --- a/include/aidge/backend/cpu/data/TensorImpl.hpp +++ b/include/aidge/backend/cpu/data/TensorImpl.hpp @@ -126,8 +126,12 @@ static Registrar<Tensor> registrarTensorImpl_cpu_Float16( {"cpu", DataType::Float16}, Aidge::TensorImpl_cpu<half_float::half>::create); static Registrar<Tensor> registrarTensorImpl_cpu_Int64( {"cpu", DataType::Int64}, Aidge::TensorImpl_cpu<int64_t>::create); +static Registrar<Tensor> registrarTensorImpl_cpu_UInt64( + {"cpu", DataType::UInt64}, Aidge::TensorImpl_cpu<uint64_t>::create); static Registrar<Tensor> registrarTensorImpl_cpu_Int32( {"cpu", DataType::Int32}, Aidge::TensorImpl_cpu<int32_t>::create); +static Registrar<Tensor> registrarTensorImpl_cpu_UInt32( + {"cpu", DataType::UInt32}, Aidge::TensorImpl_cpu<uint32_t>::create); static Registrar<Tensor> registrarTensorImpl_cpu_Int16( {"cpu", DataType::Int16}, Aidge::TensorImpl_cpu<int16_t>::create); static Registrar<Tensor> registrarTensorImpl_cpu_UInt16( diff --git a/include/aidge/data/Tensor.hpp b/include/aidge/data/Tensor.hpp index 3dbf54a5fa58be40b08f58d760f3991586203825..3737eb07e38d7651ca496c99be43d387ab5c4fa0 100644 --- a/include/aidge/data/Tensor.hpp +++ b/include/aidge/data/Tensor.hpp @@ -103,6 +103,22 @@ class Tensor : public Data, resize(dims); } + /** + * @brief Construct a new Tensor object from the 1-dimension Vector helper. + * @tparam T datatype + */ + template <typename T> + constexpr Tensor(Vector<T> &&arr) + : Data(Type), + mDataType(NativeType<T>::type), + mDims({arr.data.size()}), + mStrides({1}), + mImpl(Registrar<Tensor>::create({"cpu", NativeType<T>::type})(0, {arr.data.size()})), + mSize(arr.data.size()) + { + mImpl->copyFromHost(&arr.data[0], arr.data.size()); + } + /** * @brief Construct a new Tensor object from the 1-dimension Array helper. * @tparam T datatype @@ -199,6 +215,12 @@ class Tensor : public Data, */ Tensor &operator=(const Tensor& other); + template <typename T> + constexpr Tensor &operator=(Vector<T> &&arr) { + *this = Tensor(std::move(arr)); + return *this; + } + template <typename T, std::size_t SIZE_0> constexpr Tensor &operator=(Array1D<T, SIZE_0> &&arr) { *this = Tensor(std::move(arr)); diff --git a/include/aidge/recipes/Recipes.hpp b/include/aidge/recipes/Recipes.hpp index 97c608cd38ca76a4f40b8fb02282751a97ceed4e..a56d914721081fd04fc782a7a8c9689371225b48 100644 --- a/include/aidge/recipes/Recipes.hpp +++ b/include/aidge/recipes/Recipes.hpp @@ -123,6 +123,8 @@ void explicitCastMove(std::shared_ptr<GraphView> graphView); */ void expandMetaOps(std::shared_ptr<GraphView> graph, bool recursive = false); +void matMulTiling(NodePtr matMul, const std::vector<DimSize_t>& maxDims); + } // namespace Aidge #endif /* AIDGE_CORE_UTILS_RECIPES_H_ */ diff --git a/include/aidge/utils/ArrayHelpers.hpp b/include/aidge/utils/ArrayHelpers.hpp index b0db3ca11c10c10a3ce63c3c4809cf7ae09173da..4999ea53a11e0c2784ed4ae40243b18aabcda218 100644 --- a/include/aidge/utils/ArrayHelpers.hpp +++ b/include/aidge/utils/ArrayHelpers.hpp @@ -101,6 +101,11 @@ constexpr std::array<T, N + 1> append(T t, std::array<T, N> a) { } // Generic helper for initializing a Tensor +template <typename T> +struct Vector { + std::vector<T> data; +}; + template <typename T, std::size_t SIZE_0> struct Array1D { T data[SIZE_0]; diff --git a/src/recipes/MatMulTiling.cpp b/src/recipes/MatMulTiling.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d21c7d406d164721aaef4017db4211dd5d0d3bd8 --- /dev/null +++ b/src/recipes/MatMulTiling.cpp @@ -0,0 +1,104 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ +#include <cassert> +#include <memory> +#include <set> +#include <string> + +#include "aidge/data/Tensor.hpp" +#include "aidge/graph/GraphView.hpp" +#include "aidge/graph/Node.hpp" +#include "aidge/operator/Producer.hpp" +#include "aidge/operator/MatMul.hpp" +#include "aidge/operator/Slice.hpp" +#include "aidge/operator/Identity.hpp" +#include "aidge/operator/Concat.hpp" +#include "aidge/recipes/Recipes.hpp" +#include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Types.h" + +// see https://en.wikipedia.org/wiki/Matrix_multiplication_algorithm +void Aidge::matMulTiling(NodePtr matMul, const std::vector<DimSize_t>& maxDims) { + if (matMul->getOperator()->type() != "MatMul") { + AIDGE_INTERNAL_ASSERT("Operator should be a MatMul."); + } + AIDGE_ASSERT(matMul->getOperator()->operatorType() == OperatorType::Tensor, "Operator must be of Tensor type."); + const auto& op = std::static_pointer_cast<OperatorTensor>(matMul->getOperator()); + if (!op->dimsForwarded()) { + AIDGE_INTERNAL_ASSERT("Dimensions must be forwarded before any tiling"); + } + + const auto& in0Tensor = op->getInput(0); + const auto& in1Tensor = op->getInput(1); + const auto& outTensor = op->getOutput(0); + const auto& input0Dims = in0Tensor->dims(); + const auto& input1Dims = in1Tensor->dims(); + const auto& outputDims = outTensor->dims(); + const auto& outputMatDims = std::vector<std::size_t>(outputDims.end() - 2, outputDims.end());; + + if (outputMatDims[0] > maxDims[0]) { + const size_t axis = 0; + const auto splitIndex = outputMatDims[axis] / 2; + + auto identity0 = Identity(); + auto slice00 = Slice(); + auto slice00_starts = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{0, 0}}), "", true); + slice00_starts->addChild(slice00, 0, 1); + auto slice00_ends = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{splitIndex, input0Dims[1]}}), "", true); + slice00_ends->addChild(slice00, 0, 2); + auto slice00_axes = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{0, 1}}), "", true); + slice00_axes->addChild(slice00, 0, 3); + auto matMul00 = MatMul(); + auto identity1 = Identity(); + auto slice01 = Slice(); + auto slice01_starts = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{splitIndex, 0}}), "", true); + slice01_starts->addChild(slice01, 0, 1); + auto slice01_ends = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{input0Dims[0], input0Dims[1]}}), "", true); + slice01_ends->addChild(slice01, 0, 2); + auto slice01_axes = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{0, 1}}), "", true); + slice01_axes->addChild(slice01, 0, 3); + auto matMul01 = MatMul(); + auto concat0 = Concat(2, axis); + + identity0->addChild(slice00, 0, 0); + identity0->addChild(slice01, 0, 0); + identity1->addChild(matMul00, 0, 1); + identity1->addChild(matMul01, 0, 1); + slice00->addChild(matMul00, 0, 0); + slice01->addChild(matMul01, 0, 0); + matMul00->addChild(concat0, 0, 0); + matMul01->addChild(concat0, 0, 1); + + auto gMatMul = std::make_shared<GraphView>(); + gMatMul->add({matMul}); + + auto g = std::make_shared<GraphView>(); + g->add({identity0, identity1}); + g->add({slice00, matMul00, matMul01, slice01, concat0}); + g->save("micrograph"); + + auto replaced = GraphView::replace(gMatMul, g); + + if (replaced) { + g->forwardDims(); + + // Recursive tiling + MatMulTiling(matMul00, maxDims); + MatMulTiling(matMul01, maxDims); + } + else { + Log::warn("Unable to split MatMul {}", matMul->name()); + } + } + else if (outputMatDims[1] > maxDims[1]) { + + } +} diff --git a/unit_tests/recipes/Test_MatMulTiling.cpp b/unit_tests/recipes/Test_MatMulTiling.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1f98af94beb9a3398db590a7fdf66b29ff923f88 --- /dev/null +++ b/unit_tests/recipes/Test_MatMulTiling.cpp @@ -0,0 +1,48 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> + +#include "aidge/recipes/Recipes.hpp" +#include "aidge/operator/MatMul.hpp" +#include "aidge/operator/AvgPooling.hpp" +#include "aidge/operator/MaxPooling.hpp" +#include "aidge/operator/GenericOperator.hpp" +#include "aidge/operator/Producer.hpp" +#include "aidge/graph/OpArgs.hpp" +#include <cstddef> + +using namespace Aidge; + +TEST_CASE("[MatMulTiling]") { + auto dataProvider = Producer({16, 3, 224, 224}, "dataProvider"); + auto w1 = Producer({16, 3, 224, 224}, "w1"); + auto matmul1 = MatMul("matmul1"); + auto w2 = Producer({16, 3, 224, 224}, "w1"); + auto matmul2 = MatMul("matmul2"); + auto w3 = Producer({16, 3, 224, 224}, "w1"); + auto matmul3 = MatMul("matmul3"); + + dataProvider->addChild(matmul1, 0, 0); + w1->addChild(matmul1, 0, 1); + matmul1->addChild(matmul2, 0, 0); + w2->addChild(matmul2, 0, 1); + matmul2->addChild(matmul3, 0, 0); + w3->addChild(matmul3, 0, 1); + + auto g1 = getConnectedGraphView(matmul1); + g1->forwardDims(); + g1->save("MatMulSplitting_graph"); + + matMulTiling(matmul1, {16, 16}); + + g1->save("MatMulSplitting_graph_split"); +}