Merge branch 'matmul_rework' into 'dev'

Matmul rework See merge request !76

Merge branch 'matmul_rework' into 'dev'
Matmul rework See merge request !76
52a3bf16 · Maxence Naud · 4fe5e82e · 9ec6e0fb · 52a3bf16 · 52a3bf16
Commit 52a3bf16 authored 1 year ago by Maxence Naud
--- a/CHANGELOG
+++ b/CHANGELOG
+# Version 0.1.1 (January 29, 2024)
+[Add] Support of a negative value for Reshape Operator shape attribute.
 # Version 0.1.0 (January 23, 2024)
 Initial release
--- a/aidge_core/unit_tests/test_parameters.py
+++ b/aidge_core/unit_tests/test_parameters.py
@@ -39,12 +39,6 @@ class test_attributes(unittest.TestCase):
        self.assertEqual(fc_op.get_attr("OutChannels"), out_channels)
        self.assertEqual(fc_op.get_attr("NoBias"), nb_bias)
-    def test_matmul(self):
-        in_channels = 4
-        out_channels = 8
-        matmul_op = aidge_core.MatMul(in_channels, out_channels).get_operator()
-        self.assertEqual(matmul_op.get_attr("OutChannels"), out_channels)
    def test_producer_1D(self):
        dims = [5]
        producer_op = aidge_core.Producer(dims).get_operator()

--- a/aidge_core/unit_tests/test_recipies.py
+++ b/aidge_core/unit_tests/test_recipies.py
@@ -45,9 +45,9 @@ class test_recipies(unittest.TestCase):
        self.assertTrue(all([i in old_nodes for i in graph_view.get_nodes()]))
    def test_fuse_matmul_add(self):
-        matmul0 = aidge_core.MatMul(1, 1, name="MatMul0")
+        matmul0 = aidge_core.MatMul(name="MatMul0")
        add0 = aidge_core.Add(2, name="Add0")
-        matmul1 = aidge_core.MatMul(1, 1, name="MatMul1")
+        matmul1 = aidge_core.MatMul(name="MatMul1")
        add1 = aidge_core.Add(2, name="Add1")
        graph_view = aidge_core.sequential([matmul0, add0, matmul1, add1])

--- a/include/aidge/data/Tensor.hpp
+++ b/include/aidge/data/Tensor.hpp
@@ -92,7 +92,7 @@ class Tensor : public Data,
            newTensor.makeContiguous();
        }
        else {
-            std::shared_ptr<TensorImpl> newImpl = Registrar<Tensor>::create({mImpl->backend(), mDataType})(mImpl->device().second, mDims);          
+            std::shared_ptr<TensorImpl> newImpl = Registrar<Tensor>::create({mImpl->backend(), mDataType})(mImpl->device().second, mDims);
            newImpl->copy(mImpl->rawPtr(mImplOffset), mSize);
            newTensor.setImpl(newImpl);
        }
@@ -389,7 +389,7 @@ class Tensor : public Data,
     * @param dims New dimensions
     */
    template <std::array<DimSize_t, 1>::size_type DIM> // deducing std::array size_type and declaring DIM accordingly
-    void resize(const std::array<DimSize_t, DIM> &dims) {
+    inline void resize(const std::array<DimSize_t, DIM> &dims) {
        resize(std::vector<DimSize_t>(dims.begin(), dims.end()));
    }
@@ -404,48 +404,7 @@ class Tensor : public Data,
     * @param dims New dimensions
     * @param strides Stride of the tensor (if not specified, "nested" stride is used)
     */
-    void resize(const std::vector<DimSize_t> &dims, std::vector<DimSize_t> strides = std::vector<DimSize_t>()) {
+    void resize(const std::vector<DimSize_t> &dims, std::vector<DimSize_t> strides = std::vector<DimSize_t>());
-        bool checkContiguous = true;
-        if (strides.empty()) {
-            strides.resize(dims.size());
-            size_t expectedStride = 1;
-            for (int dim = dims.size() - 1; dim >= 0; --dim) {
-                strides[dim] = expectedStride;
-                expectedStride*= dims[dim];
-            }
-            checkContiguous = false;
-        }
-        else {
-            AIDGE_ASSERT(strides.size() == dims.size(), "Number of strides must match number of dims");
-        }
-        if (mImpl.use_count() > 1) {
-            // Here we could also create a new storage for this tensor in this case
-            // But, is it more likely that the user really wants this, or that he did a mistake?
-            AIDGE_ASSERT(dims == mDims && strides == mStrides, "Cannot resize Tensor with shared storage");
-        }
-        else {
-            mDims = dims;
-            mStrides = strides;
-            mContiguous = true;
-            if (checkContiguous) {
-                size_t expectedStride = 1;
-                for (int dim = dims.size() - 1; dim >= 0; --dim) {
-                    if (strides[dim] != expectedStride) {
-                        mContiguous = false;
-                        break;
-                    }
-                    expectedStride*= dims[dim];
-                }
-            }
-            computeSize();
-            if (mImpl) {
-                mImpl->resize(mDims);
-            }
-        }
-    }
    /**
     * @brief Return if the Tensor object has at leastone element.
@@ -479,95 +438,7 @@ class Tensor : public Data,
        set<expectedType>(getStorageIdx(coordIdx), value);
    }
+    std::string toString() const;
-    std::string toString() const {
-        AIDGE_ASSERT(mImpl && (dims().empty() || (dims() == std::vector<DimSize_t>({0})) || (mImpl->hostPtr() != nullptr)), "tensor should have a valid host pointer");
-        // TODO: move lambda elsewhere?
-        auto ptrToString = [](DataType dt, void* ptr, size_t idx) {
-            switch (dt) {
-            case DataType::Float64:
-                return std::to_string(static_cast<double*>(ptr)[idx]);
-            case DataType::Float32:
-                return std::to_string(static_cast<float*>(ptr)[idx]);
-            case DataType::Float16:
-                return std::to_string(static_cast<half_float::half*>(ptr)[idx]);
-            case DataType::Int8:
-                return std::to_string(static_cast<int8_t*>(ptr)[idx]);
-            case DataType::Int16:
-                return std::to_string(static_cast<int16_t*>(ptr)[idx]);
-            case DataType::Int32:
-                return std::to_string(static_cast<int32_t*>(ptr)[idx]);
-            case DataType::Int64:
-                return std::to_string(static_cast<int64_t*>(ptr)[idx]);
-            case DataType::UInt8:
-                return std::to_string(static_cast<uint8_t*>(ptr)[idx]);
-            case DataType::UInt16:
-                return std::to_string(static_cast<uint16_t*>(ptr)[idx]);
-            case DataType::UInt32:
-                return std::to_string(static_cast<uint32_t*>(ptr)[idx]);
-            case DataType::UInt64:
-                return std::to_string(static_cast<uint64_t*>(ptr)[idx]);
-            default:
-                AIDGE_ASSERT(true, "unsupported type to convert to string");
-            }
-            return std::string("?");  // To make Clang happy
-        };
-        if (dims().empty()) { return ptrToString(mDataType, mImpl->hostPtr(), 0); }
-        std::string res;
-        std::size_t dim = 0;
-        std::size_t counter = 0;
-        if (nbDims()>=2) {
-            std::vector<std::size_t> dimVals(nbDims(), 0);
-            res += "{\n";
-            while (counter < mSize) {
-                std::string spaceString = std::string((dim+1)<<1,' ');
-                if (dim < nbDims()-2) {
-                    if (dimVals[dim] == 0) {
-                        res += spaceString + "{\n";
-                        ++dim;
-                    } else if (dimVals[dim] < static_cast<std::size_t>(dims()[dim])) {
-                        res += spaceString + "},\n" + spaceString + "{\n";
-                        ++dim;
-                    } else {
-                        res += spaceString + "}\n";
-                        dimVals[dim--] = 0;
-                        dimVals[dim]++;
-                    }
-                } else {
-                    for (; dimVals[dim] < static_cast<std::size_t>(dims()[dim]); ++dimVals[dim]) {
-                        res += spaceString + "{";
-                        for (DimSize_t j = 0; j < dims()[dim + 1] - 1; ++j) {
-                            res += " " + ptrToString(mDataType, mImpl->hostPtr(mImplOffset), counter++) + ",";
-                        }
-                        res += " " + ptrToString(mDataType, mImpl->hostPtr(mImplOffset), counter++) + "}";
-                        if (dimVals[dim] < static_cast<std::size_t>(dims()[dim] - 1)) {
-                            res += ",";
-                        }
-                        res += "\n";
-                    }
-                    if (dim == 0) {
-                        break;
-                    }
-                    dimVals[dim--] = 0;
-                    dimVals[dim]++;
-                }
-            }
-            for(int i = static_cast<int>(dim); i > 0; --i) {
-                res += std::string((dim+1)<<1,' ') + "}\n";
-            }
-        } else {
-            res += "{";
-            for (DimSize_t j = 0; j < dims()[0]; ++j) {
-                res += " " + ptrToString(mDataType, mImpl->hostPtr(mImplOffset), j) + ((j < dims()[0]-1) ? "," : " ");
-            }
-        }
-        res += "}";
-        return res;
-    }
    inline void print() const { printf("%s\n", toString().c_str()); }
@@ -635,7 +506,7 @@ class Tensor : public Data,
    }
    /**
-     * Returns a sub-tensor with one or more dimension less.
+     * @brief Returns a sub-tensor with one or more dimension less.
     * For instance, t.extract({1}) on a CHW tensor will return the HW tensor
     * of channel #1.
     * Likewise, t.extract({0, 1}) on a NCHW tensor will return the HW tensor
@@ -652,7 +523,7 @@ class Tensor : public Data,
    Tensor extract(const std::vector<std::size_t>& coordIdx) const;
    /**
-     * Returns a sub-tensor at some coordinate and with some dimension.
+     * @brief Returns a sub-tensor at some coordinate and with some dimension.
     *
     * @param coordIdx First coordinates of the sub-tensor to extract
     * @param dims Dimensions of the sub-tensor to extract
@@ -661,7 +532,7 @@ class Tensor : public Data,
    Tensor extract(const std::vector<std::size_t>& coordIdx, const std::vector<std::size_t>& dims) const;
    /**
-     * Make the tensor's storage contiguous, if it is not already the case.
+     * @brief Make the tensor's storage contiguous, if it is not already the case.
     * If not contiguous, a new memory space is allocated.
    */
    void makeContiguous();
@@ -796,10 +667,10 @@ class Tensor : public Data,
    }
    /**
-     * Return a reference to a Tensor on desired data type and backend/device:
+     * @brief Return a reference to a Tensor on desired data type and backend/device:
     * - itself, if already with the right characteristics;
     * - the provided Tensor, overwritten with the right characteristics.
-     * NOTE: no data is copy-casted. If it was so in a previous refCastFrom() on
+     * @note no data is copy-casted. If it was so in a previous refCastFrom() on
     * the same fallback, it remains valid, otherwise, data is invalid.
     * @param fallback A shared_ptr to Tensor ready to be overwritten if necessary.
     * The shared_ptr does not need to be initialized. No new memory allocation
@@ -814,11 +685,11 @@ class Tensor : public Data,
    const Tensor& ref(std::shared_ptr<Tensor>& fallback, const Aidge::DataType& dt, const std::string &backend, DeviceIdx_t device = 0) const;
    /**
-     * Return a reference to a Tensor with same characteristics
+     * @brief Return a reference to a Tensor with same characteristics
     * (data type, backend/device) as targetReqs Tensor:
     * - itself, if already with the right characteristics;
     * - the provided Tensor, overwritten with the right characteristics.
-     * NOTE: no data is copy-casted. If it was so in a previous refCastFrom() on
+     * @note no data is copy-casted. If it was so in a previous refCastFrom() on
     * the same fallback, it remains valid, otherwise, data is invalid.
     * @param fallback A shared_ptr to Tensor ready to be overwritten if necessary.
     * The shared_ptr does not need to be initialized. No new memory allocation
@@ -833,7 +704,11 @@ class Tensor : public Data,
    }
 private:
-    ///\bug not protected against overflow
+    /**
+     * @brief Compute the number of elements in the Tensor.
+     * @note If dimensions are not empty, they are multiplied to get the total number
+     * of elements. Else, the Tensor represents a scalar and contains a single element.
+     */
    void computeSize() {
        mSize = std::accumulate(mDims.begin(), mDims.end(), DimSize_t(1), std::multiplies<DimSize_t>());
    }

--- a/include/aidge/operator/MatMul.hpp
+++ b/include/aidge/operator/MatMul.hpp
@@ -12,49 +12,32 @@
 #ifndef AIDGE_CORE_OPERATOR_MATMUL_H_
 #define AIDGE_CORE_OPERATOR_MATMUL_H_
-#include <array>
-#include <cmath>
-#include <numeric>
 #include <memory>
+#include <string>
 #include <vector>
 #include "aidge/utils/Types.h"
 #include "aidge/data/Tensor.hpp"
 #include "aidge/graph/Node.hpp"
 #include "aidge/operator/OperatorTensor.hpp"
-#include "aidge/operator/Producer.hpp"
-#include "aidge/utils/StaticAttributes.hpp"
 #include "aidge/utils/Registrar.hpp"
 namespace Aidge {
-enum class MatMulAttr { OutChannels };
 class MatMul_Op : public OperatorTensor,
              public Registrable<MatMul_Op,
                                 std::string,
-                                 std::unique_ptr<OperatorImpl>(const MatMul_Op &)>,
+                                 std::unique_ptr<OperatorImpl>(const MatMul_Op &)> {
-              public StaticAttributes<MatMulAttr, DimSize_t> {
 public:
    static const std::string Type;
-    MatMul_Op() = delete;
+    MatMul_Op() : OperatorTensor(Type, 2, 0, 1) {}
-    using Attributes_ = StaticAttributes<MatMulAttr, DimSize_t>;
-    template <MatMulAttr e> using attr = typename Attributes_::template attr<e>;
-    MatMul_Op(DimSize_t out_channels)
-            : OperatorTensor(Type, 1, 1, 1),
-            Attributes_(
-                attr<MatMulAttr::OutChannels>(out_channels))
-    {}
    /**
     * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated).
     * @param op Operator to copy.
     */
-    MatMul_Op(const MatMul_Op& op)
+    MatMul_Op(const MatMul_Op& op) : OperatorTensor(op)
-        : OperatorTensor(op),
-          Attributes_(op)
    {
        mImpl = op.mImpl ? Registrar<MatMul_Op>::create(op.mOutputs[0]->getImpl()->backend())(*this) : nullptr;
    }
@@ -63,50 +46,40 @@ public:
     * @brief Clone the operator using its copy-constructor.
     * @see Operator::MatMul_Op
     */
-    std::shared_ptr<Operator> clone() const override {
+    std::shared_ptr<Operator> clone() const override final {
        return std::make_shared<MatMul_Op>(*this);
    }
+    /**
-    void computeOutputDims() override final {
+     * @brief Compute dimensions for the output Tensor following the same rules as
-        bool associated = true;
+     * numpy.matmul.
-        for (IOIndex_t i = 0; i < nbInputs(); ++i) {
+     * @note - Both inputs are 2-D Tensors: classic matrix multiplication
-            if (!getInput(i)) {
+     * @note - Either input is N-D with N > 2: it is treated as a stack of matrices residing
-                AIDGE_THROW_OR_ABORT(std::runtime_error, "Every input should be associated with a Tensor");
+     * in the last two indexes and broadcast accordingly.
-            }
+     * @note - First input is 1-D: it is promoted to a matrix by prepending a 1 to its
-            associated &= !(getInput(i)->empty());
+     * dimensions (D) -> (1,D). The prepended 1 is removed after computation.
-        }
+     * @note - Second input is 1-D: it is promoted to a matrix by appending a 1 to its
-        if (associated) {
+     * dimensions (D) -> (D,1). The appended 1 is removed after computation.
-            // <batch, OutChannels>
+     */
-            mOutputs[0]->resize({getInput(0)->dims()[0], this->template getAttr<MatMulAttr::OutChannels>()});
+    void computeOutputDims() override final;
-        }
-    }
-    void setBackend(const std::string& name, DeviceIdx_t device = 0) override {
+    void setBackend(const std::string& name, DeviceIdx_t device = 0) override final {
        mImpl = Registrar<MatMul_Op>::create(name)(*this);
        mOutputs[0]->setBackend(name, device);
    }
-    static const std::vector<std::string> getInputsName(){
+    static const std::vector<std::string> getInputsName() {
-        return {"data_input", "weight"};
+        return {"data_input1", "data_input2"};
    }
-    static const std::vector<std::string> getOutputsName(){
+    static const std::vector<std::string> getOutputsName() {
        return {"data_output"};
    }
 };
-inline std::shared_ptr<Node> MatMul(DimSize_t inChannels, DimSize_t outChannels, const std::string& name = "") {
+inline std::shared_ptr<Node> MatMul(const std::string& name = "") {
-    // FIXME: properly handle default w initialization in every cases
+    return std::make_shared<Node>(std::make_shared<MatMul_Op>(), name);
-    auto matmul = std::make_shared<Node>(std::make_shared<MatMul_Op>(outChannels), name);
-    addProducer(matmul, 1, {outChannels, inChannels}, "w");
-    return matmul;
 }
 } // namespace Aidge
-namespace {
-template <>
-const char *const EnumStrings<Aidge::MatMulAttr>::data[] = {"OutChannels"};
-}
 #endif /* AIDGE_CORE_OPERATOR__MATMUL_H_ */
--- a/python_binding/operator/pybind_Matmul.cpp
+++ b/python_binding/operator/pybind_Matmul.cpp
@@ -19,16 +19,11 @@
 namespace py = pybind11;
 namespace Aidge {
-void declare_MatMul(py::module &m) {
+void init_MatMul(py::module &m) {
-  py::class_<MatMul_Op, std::shared_ptr<MatMul_Op>, Attributes, OperatorTensor>(m, "MatMulOp", py::multiple_inheritance())
+  py::class_<MatMul_Op, std::shared_ptr<MatMul_Op>, OperatorTensor>(m, "MatMulOp", py::multiple_inheritance())
  .def("get_inputs_name", &MatMul_Op::getInputsName)
-  .def("get_outputs_name", &MatMul_Op::getOutputsName)
+  .def("get_outputs_name", &MatMul_Op::getOutputsName);
-  .def("attributes_name", &MatMul_Op::staticGetAttrsName);
-  m.def("MatMul", &MatMul, py::arg("in_channels"), py::arg("out_channels"), py::arg("name") = "");
-}
-void init_MatMul(py::module &m) {
+  m.def("MatMul", &MatMul, py::arg("name") = "");
-  declare_MatMul(m);
 }
 } // namespace Aidge
--- a/src/data/Tensor.cpp
+++ b/src/data/Tensor.cpp
@@ -9,10 +9,145 @@
 *
 ********************************************************************************/
+#include <vector>
+#include <cstddef>
 #include "aidge/data/Tensor.hpp"
 #include "aidge/utils/Types.h"
 #include "aidge/utils/ErrorHandling.hpp"
+void Aidge::Tensor::resize(const std::vector<Aidge::DimSize_t> &dims, std::vector<Aidge::DimSize_t> strides) {
+    bool checkContiguous = true;
+    if (strides.empty()) {
+        strides.resize(dims.size());
+        size_t expectedStride = 1;
+        for (int dim = dims.size() - 1; dim >= 0; --dim) {
+            strides[dim] = expectedStride;
+            expectedStride*= dims[dim];
+        }
+        checkContiguous = false;
+    }
+    else {
+        AIDGE_ASSERT(strides.size() == dims.size(), "Number of strides must match number of dims");
+    }
+    if (mImpl.use_count() > 1) {
+        // Here we could also create a new storage for this tensor in this case
+        // But, is it more likely that the user really wants this, or that he did a mistake?
+        AIDGE_ASSERT(dims == mDims && strides == mStrides, "Cannot resize Tensor with shared storage");
+    }
+    else {
+        mDims = dims;
+        mStrides = strides;
+        mContiguous = true;
+        if (checkContiguous) {
+            std::size_t expectedStride = 1;
+            for (std::size_t i = dims.size()-1; i > 0; --i) {
+                if (strides[i] != expectedStride) {
+                    mContiguous = false;
+                    break;
+                }
+                expectedStride*= dims[i];
+            }
+            mContiguous &= (strides[0] == expectedStride);
+        }
+        computeSize();
+        if (mImpl) {
+            mImpl->resize(mDims);
+        }
+    }
+}
+std::string Aidge::Tensor::toString() const {
+    AIDGE_ASSERT(mImpl && (dims().empty() || (dims() == std::vector<DimSize_t>({0})) || (mImpl->hostPtr() != nullptr)), "tensor should have a valid host pointer");
+    // TODO: move lambda elsewhere?
+    auto ptrToString = [](DataType dt, void* ptr, std::size_t idx) {
+        switch (dt) {
+        case DataType::Float64:
+            return std::to_string(static_cast<double*>(ptr)[idx]);
+        case DataType::Float32:
+            return std::to_string(static_cast<float*>(ptr)[idx]);
+        case DataType::Float16:
+            return std::to_string(static_cast<half_float::half*>(ptr)[idx]);
+        case DataType::Int8:
+            return std::to_string(static_cast<int8_t*>(ptr)[idx]);
+        case DataType::Int16:
+            return std::to_string(static_cast<int16_t*>(ptr)[idx]);
+        case DataType::Int32:
+            return std::to_string(static_cast<int32_t*>(ptr)[idx]);
+        case DataType::Int64:
+            return std::to_string(static_cast<int64_t*>(ptr)[idx]);
+        case DataType::UInt8:
+            return std::to_string(static_cast<uint8_t*>(ptr)[idx]);
+        case DataType::UInt16:
+            return std::to_string(static_cast<uint16_t*>(ptr)[idx]);
+        case DataType::UInt32:
+            return std::to_string(static_cast<uint32_t*>(ptr)[idx]);
+        case DataType::UInt64:
+            return std::to_string(static_cast<uint64_t*>(ptr)[idx]);
+        default:
+            AIDGE_ASSERT(true, "unsupported type to convert to string");
+        }
+        return std::string("?");  // To make Clang happy
+    };
+    if (dims().empty()) { return ptrToString(mDataType, mImpl->hostPtr(), 0); }
+    std::string res;
+    std::size_t dim = 0;
+    std::size_t counter = 0;
+    if (nbDims()>=2) {
+        std::vector<std::size_t> dimVals(nbDims(), 0);
+        res += "{\n";
+        while (counter < mSize) {
+            std::string spaceString = std::string((dim+1)<<1,' ');
+            if (dim < nbDims()-2) {
+                if (dimVals[dim] == 0) {
+                    res += spaceString + "{\n";
+                    ++dim;
+                } else if (dimVals[dim] < static_cast<std::size_t>(dims()[dim])) {
+                    res += spaceString + "},\n" + spaceString + "{\n";
+                    ++dim;
+                } else {
+                    res += spaceString + "}\n";
+                    dimVals[dim--] = 0;
+                    dimVals[dim]++;
+                }
+            } else {
+                for (; dimVals[dim] < static_cast<std::size_t>(dims()[dim]); ++dimVals[dim]) {
+                    res += spaceString + "{";
+                    for (DimSize_t j = 0; j < dims()[dim + 1] - 1; ++j) {
+                        res += " " + ptrToString(mDataType, mImpl->hostPtr(mImplOffset), counter++) + ",";
+                    }
+                    res += " " + ptrToString(mDataType, mImpl->hostPtr(mImplOffset), counter++) + "}";
+                    if (dimVals[dim] < static_cast<std::size_t>(dims()[dim] - 1)) {
+                        res += ",";
+                    }
+                    res += "\n";
+                }
+                if (dim == 0) {
+                    break;
+                }
+                dimVals[dim--] = 0;
+                dimVals[dim]++;
+            }
+        }
+        for(int i = static_cast<int>(dim); i > 0; --i) {
+            res += std::string((dim+1)<<1,' ') + "}\n";
+        }
+    } else {
+        res += "{";
+        for (DimSize_t j = 0; j < dims()[0]; ++j) {
+            res += " " + ptrToString(mDataType, mImpl->hostPtr(mImplOffset), j) + ((j < dims()[0]-1) ? "," : " ");
+        }
+    }
+    res += "}";
+    return res;
+}
 Aidge::Tensor Aidge::Tensor::extract(const std::vector<std::size_t>& coordIdx) const {
    AIDGE_ASSERT(isContiguous(), "Tensor must be contiguous");
    AIDGE_ASSERT(coordIdx.size() <= mDims.size(), "Number of coordinates is higher than number of dimensions");
@@ -52,7 +187,7 @@ void Aidge::Tensor::makeContiguous() {
            // Determine the size of the contiguous chunk
            size_t copySize = 1;
-            while (idx + copySize < mSize && 
+            while (idx + copySize < mSize &&
                getStorageIdx(getCoord(idx + copySize)) == storageIdx + copySize)
            {
                ++copySize;

--- a/src/operator/MatMul.cpp
+++ b/src/operator/MatMul.cpp
@@ -9,8 +9,64 @@
 *
 ********************************************************************************/
+#include <algorithm>
 #include <string>
+#include <vector>
 #include "aidge/operator/MatMul.hpp"
+#include "aidge/utils/Types.h"
+#include "aidge/utils/ErrorHandling.hpp"
 const std::string Aidge::MatMul_Op::Type = "MatMul";
\ No newline at end of file
+void Aidge::MatMul_Op::computeOutputDims() {
+    if (!getInput(0) || !getInput(1)) {
+        AIDGE_THROW_OR_ABORT(std::runtime_error, "Missing input. Cannot compute output dimensions for MatMul Operator.");
+    }
+    if (getInput(0)->empty() && getInput(1)->empty()) {
+        // both inputs are scalar
+        mOutputs[0]->resize({});
+    }
+    else if (!getInput(0)->empty() && !getInput(1)->empty())
+    {
+        std::vector<std::size_t> dims0 = getInput(0)->dims();
+        std::vector<std::size_t> dims1 = getInput(1)->dims();
+        // keep second-to-last dimension of dims0
+        const bool keepDim0 = dims0.size() > 1;
+        // keep last dimension of dims1
+        const bool keepDim1 = dims1.size() > 1;
+        if (dims0.size() == 1) {
+            dims0.insert(dims0.cbegin(), 1);
+        }
+        if (dims1.size() == 1) {
+            dims1.push_back(1);
+        }
+        const std::size_t dims_size = std::max(dims0.size(), dims1.size());
+        if (dims0.size() > dims1.size()) {
+            dims1.insert(dims1.cbegin(), dims0.size() - dims1.size(), std::size_t(1));
+        }
+        else if (dims1.size() > dims0.size()) {
+            dims0.insert(dims0.cbegin(), dims1.size() - dims0.size(), std::size_t(1));
+        }
+        AIDGE_ASSERT(dims0[dims_size-1] == dims1[dims_size-2], "Incompatible matrices sizes.");
+        std::vector<std::size_t> outDims = std::vector<std::size_t>(dims_size-2, 1);
+        for (std::size_t i = 0; i < dims_size-2; ++i) {
+            AIDGE_ASSERT((dims0[i] == dims1[i]) || (dims0[i] == 1) || (dims1[i] == 1), "Bad vector dimension.");
+            outDims[i] = std::max(dims0[i], dims1[i]);
+        }
+        // use keepDim0 instead of dims0.size() because dims0 has been modified
+        if (keepDim0)
+            outDims.push_back(dims0[dims_size-2]);
+        if (keepDim1)
+            outDims.push_back(dims1[dims_size-1]);
+        mOutputs[0]->resize(outDims);
+    }
+}
--- a/src/recipies/FuseMulAdd.cpp
+++ b/src/recipies/FuseMulAdd.cpp
@@ -41,7 +41,19 @@ void Aidge::fuseMulAdd(std::shared_ptr<Aidge::Node> matmulNode, std::shared_ptr<
    AIDGE_ASSERT(matmulNode->getParent(1), "No weight detected to produce the fuseMulAdd recipe.");
    std::shared_ptr<Node> weight = matmulNode->getParent(1)->cloneSharedOperators();
-    const DimSize_t outSize = std::dynamic_pointer_cast<MatMul_Op>(matmulNode->getOperator()) -> getAttr<DimSize_t>("OutChannels");
+    // TODO: find another way to get OutChannels for FC operator.
+    // This poor fix supposes that one of Add inputs is a const and has the same outChannels as the output
+    DimSize_t outSize = 0;
+    const auto& op = std::dynamic_pointer_cast<OperatorTensor>(addNode->getOperator());
+    for (size_t i = 0; i < op->nbInputs(); i++)
+    {
+        const auto& inTensor = op->getInput(i);
+        if(inTensor->nbDims() > 0) {
+            outSize = inTensor->dims()[inTensor->nbDims()-1];
+            break;
+        }
+    }
+    AIDGE_ASSERT(outSize, "Couldnt get output number of channels for FC operator.");
    // Instanciate FC
    //std::shared_ptr<Node> fc = FC(dim[0], false, "Fc");

--- a/unit_tests/data/Test_TensorImpl.cpp
+++ b/unit_tests/data/Test_TensorImpl.cpp
@@ -19,7 +19,7 @@
 using namespace Aidge;
-TEST_CASE("Tensor creation") {
+TEST_CASE("[core/data] Tensor creation") {
  SECTION("from const array") {
    Tensor x = Array3D<int, 2, 2, 2>{{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}};
@@ -59,7 +59,7 @@ TEST_CASE("Tensor creation") {
  }
 }
-TEST_CASE("Tensor methods") {
+TEST_CASE("[core/data] Tensor methods","[Tensor]") {
  Tensor x = Array3D<int, 2, 2, 2>{{
    {{1, 2},
     {3, 4}},
@@ -89,7 +89,7 @@ TEST_CASE("Tensor methods") {
    REQUIRE(y.getImpl() == x.getImpl());
    REQUIRE(approxEq<int>(y, Array1D<int, 2>{{3, 4}}));
    REQUIRE(y.isContiguous());
    Tensor y2 = x.extract({0, 1, 1}, {2, 1, 1});
    REQUIRE(y2.getImpl() == x.getImpl());
    REQUIRE(!y2.isContiguous());

--- a/unit_tests/graphRegex/Test_GraphRegex.cpp
+++ b/unit_tests/graphRegex/Test_GraphRegex.cpp
@@ -126,9 +126,9 @@ TEST_CASE("GraphRegexUser") {
    SECTION("Applied Recipes"){
      // generate the original GraphView
-        auto matmul0 = MatMul(5, 5, "matmul0");
+        auto matmul0 = MatMul("matmul0");
        auto add0 = Add(2, "add0");
-        auto matmul1 = MatMul(5, 5, "matmul1");
+        auto matmul1 = MatMul("matmul1");
        auto add1 = Add(2, "add1");
        auto b0 = Producer({5}, "B0");
@@ -154,7 +154,7 @@ TEST_CASE("GraphRegexUser") {
        auto g = std::make_shared<GraphView>();
-        g->add({matmul0, add0, matmul1, add1, b0, b1,fl,fc});
+        g->add({w0, matmul0, b0, add0, w1, matmul1, b1, add1,fl,fc});
        std::shared_ptr<GraphRegex> kitchenBook = std::make_shared<GraphRegex>();

--- a/unit_tests/operator/Test_MatMul_Op.cpp
+++ b/unit_tests/operator/Test_MatMul_Op.cpp
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+#include <catch2/catch_test_macros.hpp>
+#include <cstddef>  // std::size_t
+#include <memory>
+#include <random>   // std::random_device, std::mt19937, std::uniform_int_distribution
+#include <vector>
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/MatMul.hpp"
+#include "aidge/operator/OperatorTensor.hpp"
+namespace Aidge {
+TEST_CASE("[core/operator] MatMul_Op(computeOutputDims)", "[MatMul][computeOutputDims]") {
+    // Create a random number generator
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<std::size_t> dist(1, 10);
+    // Create MatMul Operator
+    std::shared_ptr<Node> myMatMul = MatMul();
+    auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
+    /** @todo Special case of scalar Tensor objects.
+     * Not handled yet.
+    */
+    // SECTION("0-D / 0-D") {
+    //     std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
+    //     T0->resize({});
+    //     op -> associateInput(0,T0);
+    //     // input_1 - right
+    //     std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
+    //     T1->resize({});
+    //     op -> associateInput(1,T1);
+    //     REQUIRE_NOTHROW(op->computeOutputDims());
+    //     REQUIRE((op->getOutput(0)->dims()).empty());
+    //     // input_1 - wrong
+    //     T1->resize({dist(gen)});
+    //     REQUIRE_THROWS(op->computeOutputDims());
+    // }
+    SECTION("1-D / N-D") {
+        // input_0
+        std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
+        const std::size_t dim0 = dist(gen);
+        T0->resize({dim0});
+        op -> associateInput(0,T0);
+        std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
+        op -> associateInput(1,T1);
+        SECTION("1-D / 1-D") {
+            // input_1 - right
+            T1->resize({dim0});
+            REQUIRE_NOTHROW(op -> computeOutputDims());
+            REQUIRE((op->getOutput(0)->dims()).empty());
+            // input_1 - wrong
+            T1->resize({dim0+1});
+            REQUIRE_THROWS(op -> computeOutputDims());
+        }
+        SECTION("1-D / 2-D") {
+            // input_1 - right
+            const std::size_t dim1 = dist(gen);
+            T1->resize({dim0,dim1});
+            REQUIRE_NOTHROW(op -> computeOutputDims());
+            REQUIRE(op->getOutput(0)->dims() == std::vector<std::size_t>({dim1}));
+            // input_1 - wrong
+            T1->resize({dim0+1,dim1});
+            REQUIRE_THROWS(op -> computeOutputDims());
+        }
+        SECTION("1-D / +2-D") {
+            // input_1 - right
+            const std::size_t dim1 = dist(gen);
+            const std::size_t dim2 = dist(gen);
+            const std::size_t dim3 = dist(gen);
+            T1->resize({dim1,dim2,dim0,dim3});
+            REQUIRE_NOTHROW(op -> computeOutputDims());
+            REQUIRE(op->getOutput(0)->dims() == std::vector<std::size_t>({dim1,dim2,dim3}));
+        }
+    }
+    SECTION("2-D / N-D") {
+        // input_0
+        std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
+        const std::size_t dim0 = dist(gen);
+        const std::size_t dim1 = dist(gen);
+        T0->resize({dim0,dim1});
+        op -> associateInput(0,T0);
+        // input_1
+        std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
+        op -> associateInput(1,T1);
+        SECTION("2-D / 1-D") {
+            // input_1 - right
+            T1->resize({dim1});
+            REQUIRE_NOTHROW(op -> computeOutputDims());
+            REQUIRE(op->getOutput(0)->dims() == std::vector<std::size_t>({dim0}));
+            // input_1 - wrong
+            T1->resize({dim1+1});
+            REQUIRE_THROWS(op -> computeOutputDims());
+        }
+        SECTION("2-D / 2-D") {
+            // input_1 - right
+            const std::size_t dim2 = dist(gen);
+            T1->resize({dim1, dim2});
+            REQUIRE_NOTHROW(op -> computeOutputDims());
+            REQUIRE(op->getOutput(0)->dims() == std::vector<std::size_t>({dim0,dim2}));
+            // input_1 - wrong
+            T1->resize({dim1+1,dim2});
+            REQUIRE_THROWS(op -> computeOutputDims());
+        }
+        SECTION("2-D / +2-D") {
+            // input_1 - right
+            const std::size_t dim2 = dist(gen);
+            const std::size_t dim3 = dist(gen);
+            const std::size_t dim4 = dist(gen);
+            T1->resize({dim3,dim4,dim1, dim2});
+            REQUIRE_NOTHROW(op -> computeOutputDims());
+            REQUIRE(op->getOutput(0)->dims() == std::vector<std::size_t>({dim3,dim4,dim0,dim2}));
+            // input_1 - wrong
+            T1->resize({dim3,dim4,dim1+1,dim2});
+            REQUIRE_THROWS(op -> computeOutputDims());
+        }
+    }
+    SECTION("+2-D / +2-D") {
+        // input_0
+        std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
+        const std::size_t dim0 = dist(gen) + 1;
+        const std::size_t dim1 = 1;
+        const std::size_t dim2 = dist(gen);
+        const std::size_t dim3 = dist(gen);
+        T0->resize({dim0,dim1,dim2,dim3});
+        op -> associateInput(0,T0);
+        // input_1
+        std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
+        op -> associateInput(1,T1);
+        // input_1 - right
+        // 1
+        const std::size_t dim5 = dist(gen);
+        T1->resize({dim0,dim1,dim3,dim5});
+        REQUIRE_NOTHROW(op -> computeOutputDims());
+        REQUIRE(op->getOutput(0)->dims() == std::vector<std::size_t>({dim0,dim1,dim2,dim5}));
+        // 2 - input_1 broadcast
+        T1->resize({1,dim1,dim3,dim5});
+        REQUIRE_NOTHROW(op -> computeOutputDims());
+        REQUIRE(op->getOutput(0)->dims() == std::vector<std::size_t>({dim0,dim1,dim2,dim5}));
+        // 3 - input_0 broadcast
+        const std::size_t dim1_bigger = dist(gen) + 1;
+        T1->resize({dim0,dim1_bigger,dim3,dim5});
+        REQUIRE_NOTHROW(op -> computeOutputDims());
+        REQUIRE(op->getOutput(0)->dims() == std::vector<std::size_t>({dim0,dim1_bigger,dim2,dim5}));
+        // 4 - input_0+input_1 broadcast
+        T1->resize({1,dim1_bigger,dim3,dim5});
+        REQUIRE_NOTHROW(op -> computeOutputDims());
+        REQUIRE(op->getOutput(0)->dims() == std::vector<std::size_t>({dim0,dim1_bigger,dim2,dim5}));
+        // input_1 - wrong
+        T1->resize({dim0+1,dim1,dim3,dim5});
+        REQUIRE_THROWS(op -> computeOutputDims());
+    }
+}
+} // namespace Aidge
\ No newline at end of file
--- a/unit_tests/recipies/Test_FuseMulAdd.cpp
+++ b/unit_tests/recipies/Test_FuseMulAdd.cpp
@@ -25,9 +25,9 @@ namespace Aidge {
 TEST_CASE("[cpu/recipies] FuseMulAdd", "[FuseMulAdd][recipies]") {
    // generate the original GraphView
-    auto matmul0 = MatMul(5, 5, "matmul0");
+    auto matmul0 = MatMul("matmul0");
    auto add0 = Add(2, "add0");
-    auto matmul1 = MatMul(5, 5, "matmul1");
+    auto matmul1 = MatMul("matmul1");
    auto add1 = Add(2, "add1");
    auto b0 = Producer({5}, "B0");
@@ -49,7 +49,7 @@ TEST_CASE("[cpu/recipies] FuseMulAdd", "[FuseMulAdd][recipies]") {
    b1->addChild(add1, 0, 1);
    auto g = std::make_shared<GraphView>();
-    g->add({matmul0, add0, matmul1, add1, b0, b1});
+    g->add({w0, matmul0, b0, add0, w1, matmul1, b1, add1});
    // Check original graph
    REQUIRE(g->getNodes() ==

--- a/version.txt
+++ b/version.txt
-0.0.1
+0.1.1
\ No newline at end of file