Working concept

39ec58b6 · Olivier BICHLER · 5d7a7e7b · 39ec58b6 · 39ec58b6 · 39ec58b6
Commit 39ec58b6 authored 1 year ago by Olivier BICHLER
--- a/include/aidge/backend/cpu/data/TensorImpl.hpp
+++ b/include/aidge/backend/cpu/data/TensorImpl.hpp
@@ -55,6 +55,10 @@ class TensorImpl_cpu : public TensorImpl {
    }
    void copyCast(const void *src, NbElts_t length, const DataType srcDt) override {
+        if (length == 0) {
+            return;
+        }
        if (srcDt == DataType::Float64) {
            std::copy(static_cast<const double*>(src), static_cast<const double*>(src) + length,
                    static_cast<T *>(rawPtr()));
@@ -151,8 +155,6 @@ class TensorImpl_cpu : public TensorImpl {
 private:
    void lazyInit() {
-        AIDGE_INTERNAL_ASSERT(mTensor.dataType() == NativeType<T>::type);
        if (mData.size() < mTensor.size()) {
            // Need more data, a re-allocation will occur
            AIDGE_ASSERT(mData.empty() || mDataOwner != nullptr, "trying to enlarge non-owned data");

--- a/src/operator/AddImpl.cpp
+++ b/src/operator/AddImpl.cpp
@@ -33,16 +33,36 @@ void  Aidge::AddImpl_cpu::forward() {
        assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->dataType() == datatypeFirstInput);
    }
-    auto kernelFunc = Registrar<AddImplForward_cpu>::create({
+    // Find the correct kernel type
+    const auto outputDataType = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType();
+    const Registrar<AddImplForward_cpu>::registrar_key registrarKey = {
        datatypeFirstInput,
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
+        outputDataType};
+    Registrar<AddImplForward_cpu>::registrar_type kernelFunc;
+    if (Registrar<AddImplForward_cpu>::exists(registrarKey)) {
+        // One exists with the right inputs/output types
+        kernelFunc = Registrar<AddImplForward_cpu>::create(registrarKey);
+    }
+    else {
+        // Otherwise, fallback to the kernel with all types matching output type
+        kernelFunc = Registrar<AddImplForward_cpu>::create({
+            outputDataType, outputDataType});
+    }
+    // Convert input data (no overhead if not needed!)
+    // TODO: right now, if needed, memory will be allocated/deallocated at each
+    // call to forward(). We might put the following shared_ptr as members of
+    // this class to avoid that.
    std::vector<const void*> opInputs;
+    std::vector<std::shared_ptr<Tensor>> inputsFallback(mOp.nbInputs());
    for (IOIndex_t i = 0; i < mOp.nbInputs(); ++i) {
-        opInputs.push_back(std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->getImpl()->rawPtr());
+        const auto& input = std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->refCast(inputsFallback[i], *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
+        opInputs.push_back(input.getImpl()->rawPtr());
    }
+    // Call kernel
    kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(),
               opInputs,
               std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
 }
\ No newline at end of file
--- a/src/operator/FCImpl.cpp
+++ b/src/operator/FCImpl.cpp
@@ -28,29 +28,37 @@ void Aidge::FCImpl_cpu::forward()
    assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(2)) && "missing input #2");
    // Find the correct kernel type
-    auto kernelFunc = Registrar<FCImplForward_cpu>::create(
+    const auto outputDataType = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType();
-        {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
+    const Registrar<FCImplForward_cpu>::registrar_key registrarKey = {
-         std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
+        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
-         std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(),
+        std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
-         std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
+        std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(),
+        outputDataType};
+    Registrar<FCImplForward_cpu>::registrar_type kernelFunc;
+    if (Registrar<FCImplForward_cpu>::exists(registrarKey)) {
+        // One exists with the right inputs/output types
+        kernelFunc = Registrar<FCImplForward_cpu>::create(registrarKey);
+    }
+    else {
+        // Otherwise, fallback to the kernel with all types matching output type
+        kernelFunc = Registrar<FCImplForward_cpu>::create({
+            outputDataType, outputDataType, outputDataType, outputDataType});
+    }
+    // Convert input data (no overhead if not needed!)
+    // TODO: right now, if needed, memory will be allocated/deallocated at each
+    // call to forward(). We might put the following shared_ptr as members of
+    // this class to avoid that.
+    std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback;
+    const auto& input0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->refCast(input0Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
+    const auto& input1 = std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->refCast(input1Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
+    const auto& input2 = std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->refCast(input2Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
    // Call kernel
-    // if (std::static_pointer_cast<Tensor>(mOp.getRawInput(0)->nbDims() == 4) {
+    kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(),
-    //     kernelFunc(
-    //         mOp.getStaticAttributes(),
-    //         std::static_pointer_cast<Tensor>(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(),
-    //         std::static_pointer_cast<Tensor>(mOp.getRawInput(0)->getImpl()->rawPtr(),
-    //         mOp.mInputs[1]->getImpl()->rawPtr(),
-    //         mOp.mInputs[2]->getImpl()->rawPtr(),
-    //         mOp.getOutput(0)->getImpl()->rawPtr());
-    // }
-    // else
-    kernelFunc(
-        dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(),
        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0],
        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->sizeM1(),
-        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
+        input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(),
-        std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->getImpl()->rawPtr(),
-        std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->getImpl()->rawPtr(),
        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
 }
--- a/unit_tests/recipies/Test_ExplicitConvert.cpp
+++ b/unit_tests/recipies/Test_ExplicitConvert.cpp
@@ -42,5 +42,5 @@ TEST_CASE("[ExplicitConvert] conv") {
    explicitConvert(g1);
    g1->save("ExplicitConvert_after");
-    REQUIRE(g1->getNodes().size() == 5);
+    REQUIRE(g1->getNodes().size() == 13);
 }
--- a/unit_tests/scheduler/Test_Convert.cpp
+++ b/unit_tests/scheduler/Test_Convert.cpp
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+#include <catch2/catch_test_macros.hpp>
+#include <memory>
+#include <string>
+#include "aidge/data/Tensor.hpp"
+#include "aidge/utils/TensorUtils.hpp"
+#include "aidge/graph/Node.hpp"
+#include "aidge/graph/GraphView.hpp"
+#include "aidge/graph/OpArgs.hpp"
+#include "aidge/scheduler/Scheduler.hpp"
+#include "aidge/recipies/Recipies.hpp"
+#include "aidge/backend/cpu.hpp"
+using namespace Aidge;
+TEST_CASE("[cpu/convert] Convert(forward)") {
+    std::shared_ptr<Tensor> inputTensor =
+            std::make_shared<Tensor>(Array4D<int, 2, 1, 5, 5>{{{{{0, 1, 2, 3, 4},
+                                                                 {5, 6, 7, 8, 9},
+                                                                 {10, 11, 12, 13, 14},
+                                                                 {15, 16, 17, 18, 19},
+                                                                 {20, 21, 22, 23, 24}}},
+                                                               {{{25, 26, 27, 28, 29},
+                                                                 {30, 31, 32, 33, 34},
+                                                                 {35, 36, 37, 38, 39},
+                                                                 {40, 41, 42, 43, 44},
+                                                                 {45, 46, 47, 48, 49}}}}});
+    std::shared_ptr<Tensor> weight1 = std::make_shared<Tensor>(
+            Array4D<int, 3, 1, 3, 3>{{{{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}},
+                                      {{{10, 11, 12}, {13, 14, 15}, {16, 17, 18}}},
+                                      {{{19, 20, 21}, {22, 23, 24}, {25, 26, 27}}}}});
+    std::shared_ptr<Tensor> bias1 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
+    SECTION("Test implicit") {
+        std::shared_ptr<GraphView> g =
+                Sequential({
+                    Conv(1, 3, {3, 3}, "conv1"),
+                    Conv(3, 4, {1, 1}, "conv2"),
+                    Conv(4, 3, {1, 1}, "conv3"),
+                    FC(27, 5, false, "fc")});
+        g->getNode("conv1")->getOperator()->setInput(0, inputTensor);
+        g->getNode("conv1")->getOperator()->setInput(1, weight1);
+        g->getNode("conv1")->getOperator()->setInput(2, bias1);
+        std::shared_ptr<Tensor> weight2 =
+                std::make_shared<Tensor>(Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}},
+                                                                   {{{4}}, {{5}}, {{6}}},
+                                                                   {{{7}}, {{8}}, {{9}}},
+                                                                   {{{10}}, {{11}}, {{12}}}}});
+        std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}});
+        g->getNode("conv2")->getOperator()->setInput(1, weight2);
+        g->getNode("conv2")->getOperator()->setInput(2, bias2);
+        // *(g->getNode("conv2")->getOperator()->input(1, weight2);
+        std::shared_ptr<Tensor> weight3 = std::make_shared<Tensor>(
+                Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}},
+                                          {{{5}}, {{6}}, {{7}}, {{8}}},
+                                          {{{9}}, {{10}}, {{11}}, {{12}}}}});
+        std::shared_ptr<Tensor> bias3 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
+        g->getNode("conv3")->getOperator()->setInput(1, weight3);
+        g->getNode("conv3")->getOperator()->setInput(2, bias3);
+        std::shared_ptr<Tensor> weightfc = std::make_shared<Tensor>(
+                Array2D<int, 5, 27>{{{1,  2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+                                      15, 1, 2, 3, 4, 5, 6, 7, 8, 9,  10, 11, 12},
+                                     {13, 14, 15, 1,  2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+                                      12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9},
+                                     {10, 11, 12, 13, 14, 15, 1,  2, 3, 4, 5, 6, 7, 8,
+                                      9,  10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6},
+                                     {7, 8, 9, 10, 11, 12, 13, 14, 15, 1,  2, 3, 4, 5,
+                                      6, 7, 8, 9,  10, 11, 12, 13, 14, 15, 1, 2, 3},
+                                     {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2,
+                                      3, 4, 5, 6, 7, 8, 9,  10, 11, 12, 13, 14, 15}}});
+        std::shared_ptr<Tensor> biasfc = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
+        g->getNode("fc")->getOperator()->setInput(1, weightfc);
+        g->getNode("fc")->getOperator()->setInput(2, biasfc);
+        // input->addChild(g);
+        g->setDataType(Aidge::DataType::Int32);
+        g->getNode("conv1")->getOperator()->setDataType(DataType::Float32);
+        g->getNode("conv3")->getOperator()->setDataType(DataType::Float64);
+        g->setBackend("cpu");
+        g->forwardDims();
+        SequentialScheduler scheduler(g);
+        REQUIRE_NOTHROW(scheduler.forward());
+        scheduler.saveSchedulingDiagram("schedulingSequential");
+        std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{
+                {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}},
+                  {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}},
+                  {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}},
+                 {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}},
+                  {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}},
+                  {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}});
+        std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(Array4D<int, 2, 4, 3, 3>{
+                {{{{6099, 7017, 7935}, {10689, 11607, 12525}, {15279, 16197, 17115}},
+                  {{13786, 15838, 17890}, {24046, 26098, 28150}, {34306, 36358, 38410}},
+                  {{21473, 24659, 27845}, {37403, 40589, 43775}, {53333, 56519, 59705}},
+                  {{29160, 33480, 37800}, {50760, 55080, 59400}, {72360, 76680, 81000}}},
+                 {{{29049, 29967, 30885}, {33639, 34557, 35475}, {38229, 39147, 40065}},
+                  {{65086, 67138, 69190}, {75346, 77398, 79450}, {85606, 87658, 89710}},
+                  {{101123, 104309, 107495}, {117053, 120239, 123425}, {132983, 136169, 139355}},
+                  {{137160, 141480, 145800}, {158760, 163080, 167400}, {180360, 184680, 189000}}}}});
+        std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{
+                {{{{214731, 246591, 278451}, {374031, 405891, 437751}, {533331, 565191, 597051}},
+                  {{496804, 570568, 644332}, {865624, 939388, 1013152}, {1234444, 1308208, 1381972}},
+                  {{778877, 894545, 1010213}, {1357217, 1472885, 1588553}, {1935557, 2051225, 2166893}}},
+                 {{{1011231, 1043091, 1074951}, {1170531, 1202391, 1234251}, {1329831, 1361691, 1393551}},
+                  {{2340904, 2414668, 2488432}, {2709724, 2783488, 2857252}, {3078544, 3152308, 3226072}},
+                  {{3670577, 3786245, 3901913}, {4248917, 4364585, 4480253}, {4827257, 4942925, 5058593}}}}});
+        Tensor expectedOutput4 = Array2D<int, 2, 5>{
+                {{205050376, 198925904, 181355097, 196978090, 238868348},
+                {598467376, 561797804, 560823897, 593043790, 698672948}}};
+        std::shared_ptr<Tensor> other1 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv1")->getOperator())->getOutput(0);
+        REQUIRE(approxEq<float, int>(*other1, *expectedOutput1, 0.0, 1.0e-12));
+        std::shared_ptr<Tensor> other2 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv2")->getOperator())->getOutput(0);
+        REQUIRE(approxEq<int>(*other2, *expectedOutput2, 0.0, 1.0e-12));
+        std::shared_ptr<Tensor> other3 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv3")->getOperator())->getOutput(0);
+        REQUIRE(approxEq<double, int>(*other3, *expectedOutput3, 0.0, 1.0e-12));
+        std::shared_ptr<Tensor> other4 = std::static_pointer_cast<OperatorTensor>(g->getNode("fc")->getOperator())->getOutput(0);
+        REQUIRE(approxEq<int>(*other4, expectedOutput4, 0.0, 1.0e-12));
+    }
+    SECTION("Test explicit") {
+        std::shared_ptr<GraphView> g =
+                Sequential({
+                    Conv(1, 3, {3, 3}, "conv1"),
+                    Conv(3, 4, {1, 1}, "conv2"),
+                    Conv(4, 3, {1, 1}, "conv3"),
+                    FC(27, 5, false, "fc")});
+        g->getNode("conv1")->getOperator()->setInput(0, inputTensor);
+        g->getNode("conv1")->getOperator()->setInput(1, weight1);
+        g->getNode("conv1")->getOperator()->setInput(2, bias1);
+        std::shared_ptr<Tensor> weight2 =
+                std::make_shared<Tensor>(Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}},
+                                                                   {{{4}}, {{5}}, {{6}}},
+                                                                   {{{7}}, {{8}}, {{9}}},
+                                                                   {{{10}}, {{11}}, {{12}}}}});
+        std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}});
+        g->getNode("conv2")->getOperator()->setInput(1, weight2);
+        g->getNode("conv2")->getOperator()->setInput(2, bias2);
+        // *(g->getNode("conv2")->getOperator()->input(1, weight2);
+        std::shared_ptr<Tensor> weight3 = std::make_shared<Tensor>(
+                Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}},
+                                          {{{5}}, {{6}}, {{7}}, {{8}}},
+                                          {{{9}}, {{10}}, {{11}}, {{12}}}}});
+        std::shared_ptr<Tensor> bias3 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
+        g->getNode("conv3")->getOperator()->setInput(1, weight3);
+        g->getNode("conv3")->getOperator()->setInput(2, bias3);
+        std::shared_ptr<Tensor> weightfc = std::make_shared<Tensor>(
+                Array2D<int, 5, 27>{{{1,  2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+                                      15, 1, 2, 3, 4, 5, 6, 7, 8, 9,  10, 11, 12},
+                                     {13, 14, 15, 1,  2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+                                      12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9},
+                                     {10, 11, 12, 13, 14, 15, 1,  2, 3, 4, 5, 6, 7, 8,
+                                      9,  10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6},
+                                     {7, 8, 9, 10, 11, 12, 13, 14, 15, 1,  2, 3, 4, 5,
+                                      6, 7, 8, 9,  10, 11, 12, 13, 14, 15, 1, 2, 3},
+                                     {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2,
+                                      3, 4, 5, 6, 7, 8, 9,  10, 11, 12, 13, 14, 15}}});
+        std::shared_ptr<Tensor> biasfc = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
+        g->getNode("fc")->getOperator()->setInput(1, weightfc);
+        g->getNode("fc")->getOperator()->setInput(2, biasfc);
+        // input->addChild(g);
+        g->setDataType(Aidge::DataType::Int32);
+        g->getNode("conv1")->getOperator()->setDataType(DataType::Float32);
+        g->getNode("conv3")->getOperator()->setDataType(DataType::Float64);
+        explicitConvert(g);
+        g->setBackend("cpu");
+        g->forwardDims();
+        SequentialScheduler scheduler(g);
+        REQUIRE_NOTHROW(scheduler.forward());
+        scheduler.saveSchedulingDiagram("schedulingSequential");
+        std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{
+                {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}},
+                  {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}},
+                  {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}},
+                 {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}},
+                  {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}},
+                  {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}});
+        std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(Array4D<int, 2, 4, 3, 3>{
+                {{{{6099, 7017, 7935}, {10689, 11607, 12525}, {15279, 16197, 17115}},
+                  {{13786, 15838, 17890}, {24046, 26098, 28150}, {34306, 36358, 38410}},
+                  {{21473, 24659, 27845}, {37403, 40589, 43775}, {53333, 56519, 59705}},
+                  {{29160, 33480, 37800}, {50760, 55080, 59400}, {72360, 76680, 81000}}},
+                 {{{29049, 29967, 30885}, {33639, 34557, 35475}, {38229, 39147, 40065}},
+                  {{65086, 67138, 69190}, {75346, 77398, 79450}, {85606, 87658, 89710}},
+                  {{101123, 104309, 107495}, {117053, 120239, 123425}, {132983, 136169, 139355}},
+                  {{137160, 141480, 145800}, {158760, 163080, 167400}, {180360, 184680, 189000}}}}});
+        std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{
+                {{{{214731, 246591, 278451}, {374031, 405891, 437751}, {533331, 565191, 597051}},
+                  {{496804, 570568, 644332}, {865624, 939388, 1013152}, {1234444, 1308208, 1381972}},
+                  {{778877, 894545, 1010213}, {1357217, 1472885, 1588553}, {1935557, 2051225, 2166893}}},
+                 {{{1011231, 1043091, 1074951}, {1170531, 1202391, 1234251}, {1329831, 1361691, 1393551}},
+                  {{2340904, 2414668, 2488432}, {2709724, 2783488, 2857252}, {3078544, 3152308, 3226072}},
+                  {{3670577, 3786245, 3901913}, {4248917, 4364585, 4480253}, {4827257, 4942925, 5058593}}}}});
+        Tensor expectedOutput4 = Array2D<int, 2, 5>{
+                {{205050376, 198925904, 181355097, 196978090, 238868348},
+                {598467376, 561797804, 560823897, 593043790, 698672948}}};
+        std::shared_ptr<Tensor> other1 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv1")->getOperator())->getOutput(0);
+        REQUIRE(approxEq<float, int>(*other1, *expectedOutput1, 0.0, 1.0e-12));
+        std::shared_ptr<Tensor> other2 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv2")->getOperator())->getOutput(0);
+        REQUIRE(approxEq<int>(*other2, *expectedOutput2, 0.0, 1.0e-12));
+        std::shared_ptr<Tensor> other3 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv3")->getOperator())->getOutput(0);
+        REQUIRE(approxEq<double, int>(*other3, *expectedOutput3, 0.0, 1.0e-12));
+        std::shared_ptr<Tensor> other4 = std::static_pointer_cast<OperatorTensor>(g->getNode("fc")->getOperator())->getOutput(0);
+        REQUIRE(approxEq<int>(*other4, expectedOutput4, 0.0, 1.0e-12));
+    }
+}