diff --git a/include/aidge/backend/cpu/data/TensorImpl.hpp b/include/aidge/backend/cpu/data/TensorImpl.hpp index 15e8ca34c05dd61e094b589956c187f89abcf450..0b77f2e44c9ab22f222f9a6b7b073bf12d42e8f0 100644 --- a/include/aidge/backend/cpu/data/TensorImpl.hpp +++ b/include/aidge/backend/cpu/data/TensorImpl.hpp @@ -55,6 +55,10 @@ class TensorImpl_cpu : public TensorImpl { } void copyCast(const void *src, NbElts_t length, const DataType srcDt) override { + if (length == 0) { + return; + } + if (srcDt == DataType::Float64) { std::copy(static_cast<const double*>(src), static_cast<const double*>(src) + length, static_cast<T *>(rawPtr())); @@ -151,8 +155,6 @@ class TensorImpl_cpu : public TensorImpl { private: void lazyInit() { - AIDGE_INTERNAL_ASSERT(mTensor.dataType() == NativeType<T>::type); - if (mData.size() < mTensor.size()) { // Need more data, a re-allocation will occur AIDGE_ASSERT(mData.empty() || mDataOwner != nullptr, "trying to enlarge non-owned data"); diff --git a/src/operator/AddImpl.cpp b/src/operator/AddImpl.cpp index fc54eca65a2638c53498c47ea8a21682f31e9eea..a6446641867d69551a8ba410016bd3e03c35e735 100644 --- a/src/operator/AddImpl.cpp +++ b/src/operator/AddImpl.cpp @@ -33,16 +33,36 @@ void Aidge::AddImpl_cpu::forward() { assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->dataType() == datatypeFirstInput); } - auto kernelFunc = Registrar<AddImplForward_cpu>::create({ + // Find the correct kernel type + const auto outputDataType = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType(); + const Registrar<AddImplForward_cpu>::registrar_key registrarKey = { datatypeFirstInput, - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + outputDataType}; + Registrar<AddImplForward_cpu>::registrar_type kernelFunc; + if (Registrar<AddImplForward_cpu>::exists(registrarKey)) { + // One exists with the right inputs/output types + kernelFunc = Registrar<AddImplForward_cpu>::create(registrarKey); + } + else { + // Otherwise, fallback to the kernel with all types matching output type + kernelFunc = Registrar<AddImplForward_cpu>::create({ + outputDataType, outputDataType}); + } + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. std::vector<const void*> opInputs; + std::vector<std::shared_ptr<Tensor>> inputsFallback(mOp.nbInputs()); for (IOIndex_t i = 0; i < mOp.nbInputs(); ++i) { - opInputs.push_back(std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->getImpl()->rawPtr()); + const auto& input = std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->refCast(inputsFallback[i], *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); + opInputs.push_back(input.getImpl()->rawPtr()); } + // Call kernel kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(), opInputs, std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); -} \ No newline at end of file +} diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp index 1e5450d330ee89bdceb30aca846800d7764ca911..e97fe4ccc2f12b34d9b799b0435547d329182dae 100644 --- a/src/operator/FCImpl.cpp +++ b/src/operator/FCImpl.cpp @@ -28,29 +28,37 @@ void Aidge::FCImpl_cpu::forward() assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(2)) && "missing input #2"); // Find the correct kernel type - auto kernelFunc = Registrar<FCImplForward_cpu>::create( - {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + const auto outputDataType = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType(); + const Registrar<FCImplForward_cpu>::registrar_key registrarKey = { + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(), + outputDataType}; + + Registrar<FCImplForward_cpu>::registrar_type kernelFunc; + if (Registrar<FCImplForward_cpu>::exists(registrarKey)) { + // One exists with the right inputs/output types + kernelFunc = Registrar<FCImplForward_cpu>::create(registrarKey); + } + else { + // Otherwise, fallback to the kernel with all types matching output type + kernelFunc = Registrar<FCImplForward_cpu>::create({ + outputDataType, outputDataType, outputDataType, outputDataType}); + } + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. + std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; + const auto& input0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->refCast(input0Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); + const auto& input1 = std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->refCast(input1Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); + const auto& input2 = std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->refCast(input2Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); // Call kernel - // if (std::static_pointer_cast<Tensor>(mOp.getRawInput(0)->nbDims() == 4) { - // kernelFunc( - // mOp.getStaticAttributes(), - // std::static_pointer_cast<Tensor>(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), - // std::static_pointer_cast<Tensor>(mOp.getRawInput(0)->getImpl()->rawPtr(), - // mOp.mInputs[1]->getImpl()->rawPtr(), - // mOp.mInputs[2]->getImpl()->rawPtr(), - // mOp.getOutput(0)->getImpl()->rawPtr()); - // } - // else - kernelFunc( - dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(), + kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0], std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->sizeM1(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->getImpl()->rawPtr(), + input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); } diff --git a/unit_tests/recipies/Test_ExplicitConvert.cpp b/unit_tests/recipies/Test_ExplicitConvert.cpp index 80548aac027c9c719049240b18afefd4ca2eb678..7c5a970ef5a96ec327cd504bc4f51c38de0499a9 100644 --- a/unit_tests/recipies/Test_ExplicitConvert.cpp +++ b/unit_tests/recipies/Test_ExplicitConvert.cpp @@ -42,5 +42,5 @@ TEST_CASE("[ExplicitConvert] conv") { explicitConvert(g1); g1->save("ExplicitConvert_after"); - REQUIRE(g1->getNodes().size() == 5); + REQUIRE(g1->getNodes().size() == 13); } diff --git a/unit_tests/scheduler/Test_Convert.cpp b/unit_tests/scheduler/Test_Convert.cpp new file mode 100644 index 0000000000000000000000000000000000000000..df3db1c2d6467dc328878929e172c480a109ac60 --- /dev/null +++ b/unit_tests/scheduler/Test_Convert.cpp @@ -0,0 +1,239 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <memory> +#include <string> + +#include "aidge/data/Tensor.hpp" +#include "aidge/utils/TensorUtils.hpp" +#include "aidge/graph/Node.hpp" +#include "aidge/graph/GraphView.hpp" +#include "aidge/graph/OpArgs.hpp" +#include "aidge/scheduler/Scheduler.hpp" +#include "aidge/recipies/Recipies.hpp" + +#include "aidge/backend/cpu.hpp" + +using namespace Aidge; + +TEST_CASE("[cpu/convert] Convert(forward)") { + std::shared_ptr<Tensor> inputTensor = + std::make_shared<Tensor>(Array4D<int, 2, 1, 5, 5>{{{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}}, + {{{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}}}}); + + std::shared_ptr<Tensor> weight1 = std::make_shared<Tensor>( + Array4D<int, 3, 1, 3, 3>{{{{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}}, + {{{10, 11, 12}, {13, 14, 15}, {16, 17, 18}}}, + {{{19, 20, 21}, {22, 23, 24}, {25, 26, 27}}}}}); + + std::shared_ptr<Tensor> bias1 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}}); + + SECTION("Test implicit") { + std::shared_ptr<GraphView> g = + Sequential({ + Conv(1, 3, {3, 3}, "conv1"), + Conv(3, 4, {1, 1}, "conv2"), + Conv(4, 3, {1, 1}, "conv3"), + FC(27, 5, false, "fc")}); + + g->getNode("conv1")->getOperator()->setInput(0, inputTensor); + g->getNode("conv1")->getOperator()->setInput(1, weight1); + g->getNode("conv1")->getOperator()->setInput(2, bias1); + + std::shared_ptr<Tensor> weight2 = + std::make_shared<Tensor>(Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}}, + {{{4}}, {{5}}, {{6}}}, + {{{7}}, {{8}}, {{9}}}, + {{{10}}, {{11}}, {{12}}}}}); + std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}}); + g->getNode("conv2")->getOperator()->setInput(1, weight2); + g->getNode("conv2")->getOperator()->setInput(2, bias2); + // *(g->getNode("conv2")->getOperator()->input(1, weight2); + + std::shared_ptr<Tensor> weight3 = std::make_shared<Tensor>( + Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}}, + {{{5}}, {{6}}, {{7}}, {{8}}}, + {{{9}}, {{10}}, {{11}}, {{12}}}}}); + std::shared_ptr<Tensor> bias3 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}}); + g->getNode("conv3")->getOperator()->setInput(1, weight3); + g->getNode("conv3")->getOperator()->setInput(2, bias3); + + std::shared_ptr<Tensor> weightfc = std::make_shared<Tensor>( + Array2D<int, 5, 27>{{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6}, + {7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3}, + {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}}); + std::shared_ptr<Tensor> biasfc = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}}); + g->getNode("fc")->getOperator()->setInput(1, weightfc); + g->getNode("fc")->getOperator()->setInput(2, biasfc); + + // input->addChild(g); + g->setDataType(Aidge::DataType::Int32); + g->getNode("conv1")->getOperator()->setDataType(DataType::Float32); + g->getNode("conv3")->getOperator()->setDataType(DataType::Float64); + + g->setBackend("cpu"); + g->forwardDims(); + SequentialScheduler scheduler(g); + REQUIRE_NOTHROW(scheduler.forward()); + scheduler.saveSchedulingDiagram("schedulingSequential"); + + std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{ + {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}}, + {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}}, + {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}}, + {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}}, + {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}}, + {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}}); + + std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(Array4D<int, 2, 4, 3, 3>{ + {{{{6099, 7017, 7935}, {10689, 11607, 12525}, {15279, 16197, 17115}}, + {{13786, 15838, 17890}, {24046, 26098, 28150}, {34306, 36358, 38410}}, + {{21473, 24659, 27845}, {37403, 40589, 43775}, {53333, 56519, 59705}}, + {{29160, 33480, 37800}, {50760, 55080, 59400}, {72360, 76680, 81000}}}, + {{{29049, 29967, 30885}, {33639, 34557, 35475}, {38229, 39147, 40065}}, + {{65086, 67138, 69190}, {75346, 77398, 79450}, {85606, 87658, 89710}}, + {{101123, 104309, 107495}, {117053, 120239, 123425}, {132983, 136169, 139355}}, + {{137160, 141480, 145800}, {158760, 163080, 167400}, {180360, 184680, 189000}}}}}); + + std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{ + {{{{214731, 246591, 278451}, {374031, 405891, 437751}, {533331, 565191, 597051}}, + {{496804, 570568, 644332}, {865624, 939388, 1013152}, {1234444, 1308208, 1381972}}, + {{778877, 894545, 1010213}, {1357217, 1472885, 1588553}, {1935557, 2051225, 2166893}}}, + {{{1011231, 1043091, 1074951}, {1170531, 1202391, 1234251}, {1329831, 1361691, 1393551}}, + {{2340904, 2414668, 2488432}, {2709724, 2783488, 2857252}, {3078544, 3152308, 3226072}}, + {{3670577, 3786245, 3901913}, {4248917, 4364585, 4480253}, {4827257, 4942925, 5058593}}}}}); + + Tensor expectedOutput4 = Array2D<int, 2, 5>{ + {{205050376, 198925904, 181355097, 196978090, 238868348}, + {598467376, 561797804, 560823897, 593043790, 698672948}}}; + std::shared_ptr<Tensor> other1 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv1")->getOperator())->getOutput(0); + REQUIRE(approxEq<float, int>(*other1, *expectedOutput1, 0.0, 1.0e-12)); + std::shared_ptr<Tensor> other2 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv2")->getOperator())->getOutput(0); + REQUIRE(approxEq<int>(*other2, *expectedOutput2, 0.0, 1.0e-12)); + std::shared_ptr<Tensor> other3 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv3")->getOperator())->getOutput(0); + REQUIRE(approxEq<double, int>(*other3, *expectedOutput3, 0.0, 1.0e-12)); + std::shared_ptr<Tensor> other4 = std::static_pointer_cast<OperatorTensor>(g->getNode("fc")->getOperator())->getOutput(0); + REQUIRE(approxEq<int>(*other4, expectedOutput4, 0.0, 1.0e-12)); + } + + SECTION("Test explicit") { + std::shared_ptr<GraphView> g = + Sequential({ + Conv(1, 3, {3, 3}, "conv1"), + Conv(3, 4, {1, 1}, "conv2"), + Conv(4, 3, {1, 1}, "conv3"), + FC(27, 5, false, "fc")}); + + g->getNode("conv1")->getOperator()->setInput(0, inputTensor); + g->getNode("conv1")->getOperator()->setInput(1, weight1); + g->getNode("conv1")->getOperator()->setInput(2, bias1); + + std::shared_ptr<Tensor> weight2 = + std::make_shared<Tensor>(Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}}, + {{{4}}, {{5}}, {{6}}}, + {{{7}}, {{8}}, {{9}}}, + {{{10}}, {{11}}, {{12}}}}}); + std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}}); + g->getNode("conv2")->getOperator()->setInput(1, weight2); + g->getNode("conv2")->getOperator()->setInput(2, bias2); + // *(g->getNode("conv2")->getOperator()->input(1, weight2); + + std::shared_ptr<Tensor> weight3 = std::make_shared<Tensor>( + Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}}, + {{{5}}, {{6}}, {{7}}, {{8}}}, + {{{9}}, {{10}}, {{11}}, {{12}}}}}); + std::shared_ptr<Tensor> bias3 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}}); + g->getNode("conv3")->getOperator()->setInput(1, weight3); + g->getNode("conv3")->getOperator()->setInput(2, bias3); + + std::shared_ptr<Tensor> weightfc = std::make_shared<Tensor>( + Array2D<int, 5, 27>{{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6}, + {7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3}, + {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}}); + std::shared_ptr<Tensor> biasfc = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}}); + g->getNode("fc")->getOperator()->setInput(1, weightfc); + g->getNode("fc")->getOperator()->setInput(2, biasfc); + + // input->addChild(g); + g->setDataType(Aidge::DataType::Int32); + g->getNode("conv1")->getOperator()->setDataType(DataType::Float32); + g->getNode("conv3")->getOperator()->setDataType(DataType::Float64); + + explicitConvert(g); + g->setBackend("cpu"); + g->forwardDims(); + + SequentialScheduler scheduler(g); + REQUIRE_NOTHROW(scheduler.forward()); + scheduler.saveSchedulingDiagram("schedulingSequential"); + + std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{ + {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}}, + {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}}, + {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}}, + {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}}, + {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}}, + {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}}); + + std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(Array4D<int, 2, 4, 3, 3>{ + {{{{6099, 7017, 7935}, {10689, 11607, 12525}, {15279, 16197, 17115}}, + {{13786, 15838, 17890}, {24046, 26098, 28150}, {34306, 36358, 38410}}, + {{21473, 24659, 27845}, {37403, 40589, 43775}, {53333, 56519, 59705}}, + {{29160, 33480, 37800}, {50760, 55080, 59400}, {72360, 76680, 81000}}}, + {{{29049, 29967, 30885}, {33639, 34557, 35475}, {38229, 39147, 40065}}, + {{65086, 67138, 69190}, {75346, 77398, 79450}, {85606, 87658, 89710}}, + {{101123, 104309, 107495}, {117053, 120239, 123425}, {132983, 136169, 139355}}, + {{137160, 141480, 145800}, {158760, 163080, 167400}, {180360, 184680, 189000}}}}}); + + std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{ + {{{{214731, 246591, 278451}, {374031, 405891, 437751}, {533331, 565191, 597051}}, + {{496804, 570568, 644332}, {865624, 939388, 1013152}, {1234444, 1308208, 1381972}}, + {{778877, 894545, 1010213}, {1357217, 1472885, 1588553}, {1935557, 2051225, 2166893}}}, + {{{1011231, 1043091, 1074951}, {1170531, 1202391, 1234251}, {1329831, 1361691, 1393551}}, + {{2340904, 2414668, 2488432}, {2709724, 2783488, 2857252}, {3078544, 3152308, 3226072}}, + {{3670577, 3786245, 3901913}, {4248917, 4364585, 4480253}, {4827257, 4942925, 5058593}}}}}); + + Tensor expectedOutput4 = Array2D<int, 2, 5>{ + {{205050376, 198925904, 181355097, 196978090, 238868348}, + {598467376, 561797804, 560823897, 593043790, 698672948}}}; + std::shared_ptr<Tensor> other1 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv1")->getOperator())->getOutput(0); + REQUIRE(approxEq<float, int>(*other1, *expectedOutput1, 0.0, 1.0e-12)); + std::shared_ptr<Tensor> other2 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv2")->getOperator())->getOutput(0); + REQUIRE(approxEq<int>(*other2, *expectedOutput2, 0.0, 1.0e-12)); + std::shared_ptr<Tensor> other3 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv3")->getOperator())->getOutput(0); + REQUIRE(approxEq<double, int>(*other3, *expectedOutput3, 0.0, 1.0e-12)); + std::shared_ptr<Tensor> other4 = std::static_pointer_cast<OperatorTensor>(g->getNode("fc")->getOperator())->getOutput(0); + REQUIRE(approxEq<int>(*other4, expectedOutput4, 0.0, 1.0e-12)); + } +}