Skip to content
Snippets Groups Projects
Commit b30d0901 authored by Olivier BICHLER's avatar Olivier BICHLER
Browse files

Working prototype

parent 19402c50
No related branches found
No related tags found
3 merge requests!279v0.4.0,!253v0.4.0,!244Add MatMulTiling recipe
Pipeline #58595 canceled
...@@ -119,34 +119,17 @@ private: ...@@ -119,34 +119,17 @@ private:
template <typename T> template <typename T>
const std::string TensorImpl_cpu<T>::Backend = "cpu"; const std::string TensorImpl_cpu<T>::Backend = "cpu";
namespace { REGISTRAR(Tensor, {"cpu", DataType::Float64}, Aidge::TensorImpl_cpu<double>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_Float64( REGISTRAR(Tensor, {"cpu", DataType::Float32}, Aidge::TensorImpl_cpu<float>::create);
{"cpu", DataType::Float64}, Aidge::TensorImpl_cpu<double>::create); REGISTRAR(Tensor, {"cpu", DataType::Float16}, Aidge::TensorImpl_cpu<half_float::half>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_Float32( REGISTRAR(Tensor, {"cpu", DataType::Int64}, Aidge::TensorImpl_cpu<int64_t>::create);
{"cpu", DataType::Float32}, Aidge::TensorImpl_cpu<float>::create); REGISTRAR(Tensor, {"cpu", DataType::Int32}, Aidge::TensorImpl_cpu<int32_t>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_Float16( REGISTRAR(Tensor, {"cpu", DataType::Int16}, Aidge::TensorImpl_cpu<int16_t>::create);
{"cpu", DataType::Float16}, Aidge::TensorImpl_cpu<half_float::half>::create); REGISTRAR(Tensor, {"cpu", DataType::Int8}, Aidge::TensorImpl_cpu<int8_t>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_Int64( REGISTRAR(Tensor, {"cpu", DataType::UInt64}, Aidge::TensorImpl_cpu<uint64_t>::create);
{"cpu", DataType::Int64}, Aidge::TensorImpl_cpu<int64_t>::create); REGISTRAR(Tensor, {"cpu", DataType::UInt32}, Aidge::TensorImpl_cpu<uint32_t>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_UInt64( REGISTRAR(Tensor, {"cpu", DataType::UInt16}, Aidge::TensorImpl_cpu<uint16_t>::create);
{"cpu", DataType::UInt64}, Aidge::TensorImpl_cpu<uint64_t>::create); REGISTRAR(Tensor, {"cpu", DataType::UInt8}, Aidge::TensorImpl_cpu<uint8_t>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_Int32(
{"cpu", DataType::Int32}, Aidge::TensorImpl_cpu<int32_t>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_UInt32(
{"cpu", DataType::UInt32}, Aidge::TensorImpl_cpu<uint32_t>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_Int16(
{"cpu", DataType::Int16}, Aidge::TensorImpl_cpu<int16_t>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_Int8(
{"cpu", DataType::Int8}, Aidge::TensorImpl_cpu<int8_t>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_UInt64(
{"cpu", DataType::UInt64}, Aidge::TensorImpl_cpu<uint64_t>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_UInt32(
{"cpu", DataType::UInt32}, Aidge::TensorImpl_cpu<uint32_t>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_UInt16(
{"cpu", DataType::UInt16}, Aidge::TensorImpl_cpu<uint16_t>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_UInt8(
{"cpu", DataType::UInt8}, Aidge::TensorImpl_cpu<uint8_t>::create);
} // namespace
} // namespace Aidge } // namespace Aidge
#endif /* AIDGE_CPU_DATA_TENSORIMPL_H_ */ #endif /* AIDGE_CPU_DATA_TENSORIMPL_H_ */
...@@ -71,7 +71,7 @@ bool Aidge::MatMul_Op::forwardDims(bool /*allowDataDependency*/) { ...@@ -71,7 +71,7 @@ bool Aidge::MatMul_Op::forwardDims(bool /*allowDataDependency*/) {
std::vector<std::size_t> outDims = std::vector<std::size_t>(dims_size-2, 1); std::vector<std::size_t> outDims = std::vector<std::size_t>(dims_size-2, 1);
for (std::size_t i = 0; i < dims_size-2; ++i) { for (std::size_t i = 0; i < dims_size-2; ++i) {
AIDGE_ASSERT((dims0[i] == dims1[i]) || (dims0[i] == 1) || (dims1[i] == 1), "Bad vector dimension."); AIDGE_ASSERT((dims0[i] == dims1[i]) || (dims0[i] == 1) || (dims1[i] == 1), "Bad dimension {}: {} != {} for input #0 {} and #1 {}.", i, dims0[i], dims1[i], dims0, dims1);
outDims[i] = std::max(dims0[i], dims1[i]); outDims[i] = std::max(dims0[i], dims1[i]);
} }
......
...@@ -45,29 +45,29 @@ void Aidge::matMulTiling(NodePtr matMul, const std::vector<DimSize_t>& maxDims) ...@@ -45,29 +45,29 @@ void Aidge::matMulTiling(NodePtr matMul, const std::vector<DimSize_t>& maxDims)
const auto& outputMatDims = std::vector<std::size_t>(outputDims.end() - 2, outputDims.end());; const auto& outputMatDims = std::vector<std::size_t>(outputDims.end() - 2, outputDims.end());;
if (outputMatDims[0] > maxDims[0]) { if (outputMatDims[0] > maxDims[0]) {
const size_t axis = 0; const std::int32_t axis = -2;
const auto splitIndex = outputMatDims[axis] / 2; const std::int64_t splitIndex = maxDims[0];
auto identity0 = Identity(); auto identity0 = Identity();
auto slice00 = Slice(); auto slice00 = Slice();
auto slice00_starts = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{0, 0}}), "", true); auto slice00_starts = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{0, 0}}), "", true);
slice00_starts->addChild(slice00, 0, 1); slice00_starts->addChild(slice00, 0, 1);
auto slice00_ends = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{splitIndex, input0Dims[1]}}), "", true); auto slice00_ends = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{splitIndex, static_cast<std::int64_t>(input0Dims.end()[-1])}}), "", true);
slice00_ends->addChild(slice00, 0, 2); slice00_ends->addChild(slice00, 0, 2);
auto slice00_axes = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{0, 1}}), "", true); auto slice00_axes = Producer(std::make_shared<Tensor>(Vector<std::int8_t>{{-2, -1}}), "", true);
slice00_axes->addChild(slice00, 0, 3); slice00_axes->addChild(slice00, 0, 3);
auto slice00_steps = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{1, 1}}), "", true); auto slice00_steps = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{1, 1}}), "", true);
slice00_steps->addChild(slice00, 0, 4); slice00_steps->addChild(slice00, 0, 4);
auto matMul00 = MatMul(); auto matMul00 = MatMul();
auto identity1 = Identity(); auto identity1 = Identity();
auto slice01 = Slice(); auto slice01 = Slice();
auto slice01_starts = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{splitIndex, 0}}), "", true); auto slice01_starts = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{splitIndex, 0}}), "", true);
slice01_starts->addChild(slice01, 0, 1); slice01_starts->addChild(slice01, 0, 1);
auto slice01_ends = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{input0Dims[0], input0Dims[1]}}), "", true); auto slice01_ends = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{static_cast<std::int64_t>(input0Dims.end()[-2]), static_cast<std::int64_t>(input0Dims.end()[-1])}}), "", true);
slice01_ends->addChild(slice01, 0, 2); slice01_ends->addChild(slice01, 0, 2);
auto slice01_axes = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{0, 1}}), "", true); auto slice01_axes = Producer(std::make_shared<Tensor>(Vector<std::int8_t>{{-2, -1}}), "", true);
slice01_axes->addChild(slice01, 0, 3); slice01_axes->addChild(slice01, 0, 3);
auto slice01_steps = Producer(std::make_shared<Tensor>(Vector<DimSize_t>{{1, 1}}), "", true); auto slice01_steps = Producer(std::make_shared<Tensor>(Vector<std::int64_t>{{1, 1}}), "", true);
slice01_steps->addChild(slice01, 0, 4); slice01_steps->addChild(slice01, 0, 4);
auto matMul01 = MatMul(); auto matMul01 = MatMul();
auto concat0 = Concat(2, axis); auto concat0 = Concat(2, axis);
...@@ -85,17 +85,17 @@ void Aidge::matMulTiling(NodePtr matMul, const std::vector<DimSize_t>& maxDims) ...@@ -85,17 +85,17 @@ void Aidge::matMulTiling(NodePtr matMul, const std::vector<DimSize_t>& maxDims)
gMatMul->add({matMul}); gMatMul->add({matMul});
auto g = std::make_shared<GraphView>(); auto g = std::make_shared<GraphView>();
g->add({identity0, identity1}); g->add({identity0});
g->add({identity1});
g->add({slice00, slice00_starts, slice00_ends, slice00_axes, slice00_steps, matMul00, matMul01, slice01, slice01_starts, slice01_ends, slice01_axes, slice01_steps, concat0}); g->add({slice00, slice00_starts, slice00_ends, slice00_axes, slice00_steps, matMul00, matMul01, slice01, slice01_starts, slice01_ends, slice01_axes, slice01_steps, concat0});
g->save("micrograph");
auto replaced = GraphView::replace(gMatMul, g); auto replaced = GraphView::replace(gMatMul, g);
if (replaced) { if (replaced) {
g->forwardDims({}, true); g->forwardDims({}, true);
g->save("micrograph");
// Recursive tiling // Recursive tiling
matMulTiling(matMul00, maxDims);
matMulTiling(matMul01, maxDims); matMulTiling(matMul01, maxDims);
} }
else { else {
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include "aidge/recipes/Recipes.hpp"
#include "aidge/operator/MatMul.hpp"
#include "aidge/operator/AvgPooling.hpp"
#include "aidge/operator/MaxPooling.hpp"
#include "aidge/operator/GenericOperator.hpp"
#include "aidge/operator/Producer.hpp"
#include "aidge/graph/OpArgs.hpp"
#include <cstddef>
using namespace Aidge;
TEST_CASE("[MatMulTiling]") {
auto dataProvider = Producer({16, 3, 224, 224}, "dataProvider");
auto w1 = Producer({16, 3, 224, 224}, "w1");
auto matmul1 = MatMul("matmul1");
auto w2 = Producer({16, 3, 224, 224}, "w1");
auto matmul2 = MatMul("matmul2");
auto w3 = Producer({16, 3, 224, 224}, "w1");
auto matmul3 = MatMul("matmul3");
dataProvider->addChild(matmul1, 0, 0);
w1->addChild(matmul1, 0, 1);
matmul1->addChild(matmul2, 0, 0);
w2->addChild(matmul2, 0, 1);
matmul2->addChild(matmul3, 0, 0);
w3->addChild(matmul3, 0, 1);
auto g1 = getConnectedGraphView(matmul1);
g1->forwardDims();
g1->save("MatMulSplitting_graph");
matMulTiling(matmul1, {16, 16});
g1->save("MatMulSplitting_graph_split");
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment