Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mszczep/aidge_backend_cpu
  • eclipse/aidge/aidge_backend_cpu
  • hrouis/aidge_backend_cpu
  • oantoni/aidge_backend_cpu
  • raphaelmillet/aidge_backend_cpu
  • cguillon/aidge_backend_cpu
  • jeromeh/aidge_backend_cpu
  • axelfarr/aidge_backend_cpu
  • noamzerah/aidge_backend_cpu
  • silvanosky/aidge_backend_cpu
  • maab05/aidge_backend_cpu
  • lucaslopez/aidge_backend_cpu_ll
  • farnez/aidge_backend_cpu
  • mick94/aidge_backend_cpu
14 results
Show changes
Showing
with 2508 additions and 79 deletions
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <chrono> // std::micro, std::chrono::time_point,
// std::chrono::system_clock, std::chrono::duration
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <functional> // std::multiplies
#include <memory>
#include <numeric> // std::accumulate
#include <random> // std::random_device, std::mt19937
// std::uniform_int_distribution, std::uniform_real_distribution
#include <vector>
#include <catch2/catch_test_macros.hpp>
#include <fmt/core.h>
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/RoundImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Round.hpp"
#include "aidge/utils/TensorUtils.hpp"
namespace Aidge {
TEST_CASE("[cpu/operator] Round_Test", "[Round][CPU]") {
constexpr std::uint16_t NBTRIALS = 15;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(-15, 15);
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(5));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(3));
// Create BitShift Operator
std::shared_ptr<Node> myRound = Round();
auto op = std::static_pointer_cast<OperatorTensor>(myRound-> getOperator());
op->setDataType(DataType::Float32);
op->setBackend("cpu");
// Create 2 input Tensors
std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
op->associateInput(0,T0);
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
// Create results Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>();
Tres->setDataType(DataType::Float32);
Tres->setBackend("cpu");
// To measure execution time of 'Round_Op::forward()' member function call
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration{};
SECTION("Round [Forward]") {
SECTION("Test Forward Kernel") {
std::size_t number_of_operation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
const std::size_t nbDims = nbDimsDist(gen);
std::vector<std::size_t> dims;
for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen));
}
const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
// without broadcasting
float* array0 = new float[nb_elements];
float* result = new float[nb_elements];
for (std::size_t i = 0; i < nb_elements; ++i) {
array0[i] = valueDist(gen);
result[i] = std::nearbyint(array0[i]);
}
// input0
T0->resize(dims);
T0 -> getImpl() -> setRawPtr(array0, nb_elements);
// results
Tres->resize(dims);
Tres -> getImpl() -> setRawPtr(result, nb_elements);
op->forwardDims();
start = std::chrono::system_clock::now();
myRound->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] result;
}
Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count()));
Log::info("total time: {} μs\n", duration.count());
}
}
} // namespace Aidge
}
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Slice.hpp"
using namespace Aidge;
TEST_CASE("[cpu/operator] Slice(forward)", "[Slice][CPU]") {
SECTION("1D Tensor") {
std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<int,10> {
{0, 1, -2,-3, 4,-5,-6, 7, 8, 9}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array1D<int,3> {
{0, 1, -2}
});
std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,1>{{0}});
std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,1>{{3}});
std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,1>{{0}});
std::shared_ptr<Node> mySlice = Slice();
auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
mySlice->getOperator()->associateInput(0,input0);
mySlice->getOperator()->associateInput(1,starts);
mySlice->getOperator()->associateInput(2,ends);
mySlice->getOperator()->associateInput(3,axes);
mySlice->getOperator()->setDataType(DataType::Int32);
mySlice->getOperator()->setBackend("cpu");
mySlice->forward();
REQUIRE(*(op->getOutput(0)) == *expectedOutput);
REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims());
REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType());
}
SECTION("2D Tensor") {
std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array2D<int,2,10> {
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<int,2,3> {
{
{-5,-6, 7},
{-5,-6, 7}
}
});
std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,2>{{0,5}});
std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,2>{{2,8}});
std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,2>{{0,1}});
std::shared_ptr<Node> mySlice = Slice();
auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
mySlice->getOperator()->associateInput(0,input0);
mySlice->getOperator()->associateInput(1,starts);
mySlice->getOperator()->associateInput(2,ends);
mySlice->getOperator()->associateInput(3,axes);
mySlice->getOperator()->setDataType(DataType::Int32);
mySlice->getOperator()->setBackend("cpu");
mySlice->forward();
// op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == *expectedOutput);
REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims());
REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType());
}
SECTION("3D Tensor") {
std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array3D<int,2,2,10> {
{
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
},
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
}
}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,1,1,3> {
{
{
{ 4,-5,-6}
}
}
});
std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,3>{{0,1,4}});
std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,3>{{1,2,7}});
std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,3>{{0,1,2}});
std::shared_ptr<Node> mySlice = Slice();
auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
mySlice->getOperator()->associateInput(0,input0);
mySlice->getOperator()->associateInput(1,starts);
mySlice->getOperator()->associateInput(2,ends);
mySlice->getOperator()->associateInput(3,axes);
mySlice->getOperator()->setDataType(DataType::Int32);
mySlice->getOperator()->setBackend("cpu");
mySlice->forward();
// mySlice->getOperator()->output(0).print();
REQUIRE(*(op->getOutput(0)) == *expectedOutput);
REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims());
REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType());
}
SECTION("4D Tensor") {
std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<int,2,2,2,10> {
{
{
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
},
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
}
},
{
{
{ 0, 1, 2,-3, 6,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
},
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3,11,-5,-6, 7,-1,10}
}
}
}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,2,2,2,10> {
{
{
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
},
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
}
},
{
{
{ 0, 1, 2,-3, 6,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
},
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3,11,-5,-6, 7,-1,10}
}
}
}
});
std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,4>{{0,0,0,0}});
std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,4>{{2,2,2,10}});
std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,4>{{0,1,2,3}});
std::shared_ptr<Node> mySlice = Slice();
auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
mySlice->getOperator()->associateInput(0,input0);
mySlice->getOperator()->associateInput(1,starts);
mySlice->getOperator()->associateInput(2,ends);
mySlice->getOperator()->associateInput(3,axes);
mySlice->getOperator()->setDataType(DataType::Int32);
mySlice->getOperator()->setBackend("cpu");
mySlice->forward();
// op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == *expectedOutput);
REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims());
REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType());
}
SECTION("Attributes instead of inputs") {
std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<int,2,2,2,10> {
{
{
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
},
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
}
},
{
{
{ 0, 1, 2,-3, 6,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
},
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3,11,-5,-6, 7,-1,10}
}
}
}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,1,1,1,5> {
{
{
{
{ 0, 1, 2,-3, 4}
}
}
}
});
std::shared_ptr<Node> mySlice = Slice({0,0,0,0}, {1,1,1,5}, {0,1,2,3}, {1,1,1,1});
auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
mySlice->getOperator()->associateInput(0,input0);
mySlice->getOperator()->setDataType(DataType::Int32);
mySlice->getOperator()->setBackend("cpu");
mySlice->forward();
// op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == *expectedOutput);
REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims());
REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType());
}
SECTION("Different Steps") {
std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array3D<int,4,2,8> {
{
{
{ 0, 1, 2,-3, 4,-5,-6,7},
{-5, 4, 2,-3, 4,-5,-6,-7}
},
{
{ 10, 11, 12,-13, 14,-15,-16,17},
{-15, 14, 12,-13, 14,-15,-16,-17}
},
{
{ 20, 21, 22,-23, 24,-25,-26,27},
{-25, 24, 22,-23, 24,-25,-26,-27}
},
{
{ 30, 31, 32,-33, 34,-35,-36,37},
{-35, 34, 32,-33, 34,-35,-36,-37}
}
}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,2,1,3> {
{
{
{ 7, 4, 1}
},
{
{ 27, 24, 21}
}
}
});
std::shared_ptr<Node> mySlice = Slice({0,0,7}, {4,1,0}, {0,1,2}, {2,1,-3});
// Steps are 2,1,-3 so the slice will be:
// on Axis 0: from 0 to 4 by step of 2
// on Axis 1: from 0 to 1 by step of 1
// on Axis 2: from 7 to 0 by step of -3 (reverse the order of elements)
auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
mySlice->getOperator()->associateInput(0,input0);
mySlice->getOperator()->setDataType(DataType::Int32);
mySlice->getOperator()->setBackend("cpu");
mySlice->forward();
op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == *expectedOutput);
REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims());
REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType());
}
}
......@@ -9,18 +9,20 @@
*
********************************************************************************/
#include <memory>
#include <catch2/catch_test_macros.hpp>
#include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
#include "aidge/data/DataType.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Softmax.hpp"
#include "aidge/backend/cpu.hpp"
#include <memory>
#include "aidge/utils/ArrayHelpers.hpp"
#include "aidge/utils/TensorUtils.hpp"
using namespace Aidge;
TEST_CASE("[cpu/operator] Softmax(forward)") {
TEST_CASE("[cpu/operator] Softmax(forward)", "[Softmax][CPU]") {
SECTION("2D Tensor") {
std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array2D<float,2,10> {
{
......@@ -30,28 +32,22 @@ TEST_CASE("[cpu/operator] Softmax(forward)") {
0.35077620, -0.78156322, -0.98952234, 0.04166317, 1.34357309}
}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,10> {
Tensor expectedOutput = Array2D<float,2,10> {
{
{0.04883239, 0.11326669, 0.05974559, 0.09930880, 0.09267281, 0.03006749,
0.15842478, 0.24514021, 0.07825989, 0.07428131},
{0.05429055, 0.27136859, 0.28389078, 0.02240700, 0.06262558, 0.06087753,
0.01961952, 0.01593576, 0.04469007, 0.16429459}
}
});
};
std::shared_ptr<Node> mySoftmax = Softmax();
mySoftmax->getOperator()->setDatatype(DataType::Float32);
mySoftmax->getOperator()->setBackend("cpu");
mySoftmax->getOperator()->associateInput(0,input);
mySoftmax->getOperator()->computeOutputDims();
mySoftmax->forward();
float* resPtr = static_cast<float*>(mySoftmax->getOperator()->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< 20; ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
}
std::shared_ptr<Softmax_Op> op = std::make_shared<Softmax_Op>(1);
op->associateInput(0,input);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forward();
REQUIRE(approxEq<float>(*(op->getOutput(0)), expectedOutput, 1e-5f, 1e-8f));
}
SECTION("4D Tensor") {
std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array4D<float,2,3,3,3> {
......@@ -80,7 +76,7 @@ TEST_CASE("[cpu/operator] Softmax(forward)") {
}
}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,2,3,3,3> {
Tensor expectedOutput = Array4D<float,2,3,3,3> {
{
{
{{0.45109013, 0.42849392, 0.43775153},
......@@ -105,20 +101,14 @@ TEST_CASE("[cpu/operator] Softmax(forward)") {
{0.34566763, 0.32462072, 0.48979440}}
}
}
});
};
std::shared_ptr<Node> mySoftmax = Softmax();
mySoftmax->getOperator()->setDatatype(DataType::Float32);
mySoftmax->getOperator()->setBackend("cpu");
mySoftmax->getOperator()->associateInput(0,input);
mySoftmax->getOperator()->computeOutputDims();
mySoftmax->forward();
std::shared_ptr<Softmax_Op> op = std::make_shared<Softmax_Op>(1);
op->associateInput(0,input);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forward();
float* resPtr = static_cast<float*>(mySoftmax->getOperator()->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< 54; ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
}
// REQUIRE(*mySoftmax->getOperator()->getOutput(0) == *expectedOutput);
REQUIRE(approxEq<float>(*(op->getOutput(0)), expectedOutput, 1e-5f, 1e-8f));
}
}
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <memory>
#include <catch2/catch_test_macros.hpp>
#include "aidge/backend/cpu/operator/SqrtImpl.hpp"
#include "aidge/data/DataType.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Sqrt.hpp"
#include "aidge/utils/ArrayHelpers.hpp"
#include "aidge/utils/TensorUtils.hpp"
using namespace Aidge;
TEST_CASE("[cpu/operator] Sqrt(forward)", "[Sqrt][CPU]") {
SECTION("2D Tensor") {
std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array2D<float,2,2> {
{
{16.00000000, 0.62226844},
{ 0.00000000, 1.84539008}
}
});
Tensor expectedOutput = Array2D<float,2,2> {
{
{4.00000000, 0.78883994},
{0.00000000, 1.35845140}
}
};
std::shared_ptr<Sqrt_Op> op = std::make_shared<Sqrt_Op>();
op->associateInput(0,input);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forward();
REQUIRE(approxEq<float>(*(op->getOutput(0)), expectedOutput, 1e-5f, 1e-8f));
}
SECTION("4D Tensor") {
std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array4D<float,2,3,3,3> {
{
{
{{0.06218481, 0.46850157, 0.60914326},
{0.57470602, 0.09943211, 0.59992820},
{0.99623793, 0.54931718, 0.89343822}},
{{0.75176072, 0.38237786, 0.84824580},
{0.10619396, 0.11959118, 0.93499404},
{0.65563291, 0.02913034, 0.17093092}},
{{0.36303985, 0.92073035, 0.79146117},
{0.88962847, 0.94561219, 0.92033130},
{0.52903181, 0.13397896, 0.76086712}}
},
{
{{0.31242222, 0.80526417, 0.48411584},
{0.84375203, 0.65408552, 0.55028963},
{0.77546734, 0.06203610, 0.83163154}},
{{0.46342927, 0.53631741, 0.39145601},
{0.14204198, 0.84214240, 0.94185621},
{0.05068624, 0.99889028, 0.38464361}},
{{0.37591159, 0.51769549, 0.30288595},
{0.96883464, 0.35154045, 0.55648762},
{0.13022375, 0.73467660, 0.02705121}}
}
}
});
Tensor expectedOutput = Array4D<float,2,3,3,3> {
{
{
{{0.24936883, 0.6844717, 0.7804763},
{0.75809366, 0.31532857, 0.7745503},
{0.9981172, 0.7411593, 0.9452186}},
{{0.86704135, 0.6183671, 0.9210026},
{0.32587415, 0.34581956, 0.9669509},
{0.80971164, 0.17067613, 0.41343793}},
{{0.60252786, 0.9595469, 0.88964105},
{0.9432012, 0.97242594, 0.95933896},
{0.7273457, 0.36603138, 0.87227696}}
},
{
{{0.55894744, 0.89736515, 0.69578433},
{0.91855973, 0.8087555, 0.7418151},
{0.88060623, 0.24907047, 0.91193837}},
{{0.6807564, 0.73233694, 0.6256645},
{0.37688458, 0.9176832, 0.9704928},
{0.22513604, 0.99944496, 0.62019646}},
{{0.6131163, 0.7195106, 0.5503507},
{0.984294, 0.59290844, 0.745981},
{0.3608653, 0.8571328, 0.16447252}}
}
}
};
std::shared_ptr<Sqrt_Op> op = std::make_shared<Sqrt_Op>();
op->associateInput(0,input);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forward();
REQUIRE(approxEq<float>(*(op->getOutput(0)), expectedOutput, 1e-5f, 1e-8f));
}
}
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <chrono> // std::micro, std::chrono::time_point,
// std::chrono::system_clock
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <functional> // std::multiplies
#include <memory>
#include <numeric> // std::accumulate
#include <random> // std::random_device, std::mt19937
// std::uniform_int_distribution, std::uniform_real_distribution
#include <vector>
#include <catch2/catch_test_macros.hpp>
#include <fmt/core.h>
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/SubImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Sub.hpp"
#include "aidge/operator/OperatorTensor.hpp"
#include "aidge/utils/TensorUtils.hpp"
namespace Aidge {
TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
std::uniform_int_distribution<int> boolDist(0,1);
// Create MatMul Operator
std::shared_ptr<Node> mySub = Sub();
auto op = std::static_pointer_cast<OperatorTensor>(mySub-> getOperator());
op->setDataType(DataType::Float32);
op->setBackend("cpu");
// Create 2 input Tensors
std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
op->associateInput(0,T0);
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
op -> associateInput(1,T1);
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
// Create results Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>();
Tres->setDataType(DataType::Float32);
Tres->setBackend("cpu");
// To measure execution time of 'MatMul_Op::forward()' member function call
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration{};
SECTION("SubImpl_cpu::forward()") {
SECTION("Scalar / Scalar") {
}
SECTION("Scalar / +1-D Tensor") {
}
SECTION("+1-D Tensor / +1-D Tensor - same dimensions") {
std::size_t number_of_operation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
const std::size_t nbDims = nbDimsDist(gen);
std::vector<std::size_t> dims;
for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen));
}
const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
// without broadcasting
float* array0 = new float[nb_elements];
float* array1 = new float[nb_elements];
float* result = new float[nb_elements];
for (std::size_t i = 0; i < nb_elements; ++i) {
array0[i] = valueDist(gen);
array1[i] = valueDist(gen);
result[i] = array0[i] - array1[i];
}
// input0
T0->resize(dims);
T0 -> getImpl() -> setRawPtr(array0, nb_elements);
// input1
T1->resize(dims);
T1 -> getImpl() -> setRawPtr(array1, nb_elements);
// results
Tres->resize(dims);
Tres -> getImpl() -> setRawPtr(result, nb_elements);
op->forwardDims();
start = std::chrono::system_clock::now();
mySub->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
// with broadcasting
}
Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count()));
Log::info("total time: {}μs\n", duration.count());
}
SECTION("+1-D Tensor / +1-D Tensor - broadcasting") {
std::size_t number_of_operation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
// handle dimensions, replace some dimensions with '1' to get broadcasting
constexpr std::size_t nbDims = 4;
std::vector<std::size_t> dims;
for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen));
}
std::vector<std::size_t> dims0 = dims;
std::vector<std::size_t> dims1 = dims;
std::vector<std::size_t> dimsOut = dims;
for (std::size_t i = 0; i < nbDims; ++i) {
if (boolDist(gen)) {
dims0[i] = 1;
}
if (boolDist(gen)) {
dims1[i] = 1;
}
dimsOut[i] = (dims0[i] == 1) ? dims1[i] : dims0[i];
}
// create arrays and fill them with random values
float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]];
float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) {
array0[i] = valueDist(gen);
}
for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) {
array1[i] = valueDist(gen);
}
// compute true result
const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1};
for (std::size_t a = 0; a < dimsOut[0]; ++a) {
for (std::size_t b = 0; b < dimsOut[1]; ++b) {
const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
+ strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0)
+ strides1[1] * ((dims1[1] > 1) ? b : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c) {
const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
for (std::size_t d = 0; d < dimsOut[3]; ++d) {
std::size_t idx0 = idx0_0
+ strides0[2] * ((dims0[2] > 1) ? c : 0)
+ ((dims0[3] > 1) ? d : 0);
std::size_t idx1 = idx1_0
+ strides1[2] * ((dims1[2] > 1) ? c : 0)
+ ((dims1[3] > 1) ? d : 0);
result[idx_out + d] = array0[idx0] - array1[idx1];
// std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " - " << array1[idx1] << " -> " << idx_out + d << std::endl;
}
}
}
}
// conversion to Aidge::Tensors
// input0
T0->resize(dims0);
T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
// input1
T1->resize(dims1);
T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]);
// results
Tres->resize(dimsOut);
Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
// compute result
op->forwardDims();
start = std::chrono::system_clock::now();
mySub->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
// comparison between truth and computed result
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
}
Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count()));
Log::info("total time: {}μs\n", duration.count());
}
SECTION("+1-D Tensor / 1-D Tensor") {
std::size_t number_of_operation = 0;
std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(std::size_t(1), std::size_t(3));
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
// handle dimensions
constexpr std::size_t nbDims = 4;
std::vector<std::size_t> dims0(4);
for (std::size_t i = 0; i < nbDims; ++i) {
dims0[i] = dimSizeDist(gen);
}
std::vector<std::size_t> dimsOut = dims0;
std::vector<std::size_t> dims1 = dims0;
for (std::size_t i = 0; i < nbDims; ++i) {
if (boolDist(gen)) {
dims1[i] = 1;
}
}
dims1.erase(dims1.cbegin(), dims1.cbegin() + nbRemovedDimsDist(gen));
// create arrays and fill them with random values
float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
std::size_t array1_size = std::accumulate(dims1.cbegin(), dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
float* array1 = new float[array1_size];
float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
for (std::size_t i = 0; i < (dims0[0]*dims0[1]*dims0[2]*dims0[3]); ++i) {
array0[i] = valueDist(gen);
}
for (std::size_t i = 0; i < array1_size; ++i) {
array1[i] = valueDist(gen);
}
// compute true result
auto dims1_tmp = dims1;
dims1_tmp.insert(dims1_tmp.cbegin(), 4 - dims1_tmp.size(), std::size_t(1));
const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
const std::size_t strides1[nbDims] = {dims1_tmp[1]*dims1_tmp[2]*dims1_tmp[3], dims1_tmp[2]*dims1_tmp[3], dims1_tmp[3], 1};
for (std::size_t a = 0; a < dimsOut[0]; ++a) {
for (std::size_t b = 0; b < dimsOut[1]; ++b) {
const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
+ strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t idx1_0 = strides1[0] * ((dims1_tmp[0] > 1) ? a : 0)
+ strides1[1] * ((dims1_tmp[1] > 1) ? b : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c) {
const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
for (std::size_t d = 0; d < dimsOut[3]; ++d) {
std::size_t idx0 = idx0_0
+ strides0[2] * ((dims0[2] > 1) ? c : 0)
+ ((dims0[3] > 1) ? d : 0);
std::size_t idx1 = idx1_0
+ strides1[2] * ((dims1_tmp[2] > 1) ? c : 0)
+ ((dims1_tmp[3] > 1) ? d : 0);
result[idx_out + d] = array0[idx0] - array1[idx1];
// std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " - " << array1[idx1] << " -> " << idx_out + d << std::endl;
}
}
}
}
// conversion to Aidge::Tensors
// input0
T0->resize(dims0);
T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
// input1
T1->resize(dims1);
T1 -> getImpl() -> setRawPtr(array1, array1_size);
// results
Tres->resize(dimsOut);
Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
// compute result
op->forwardDims();
start = std::chrono::system_clock::now();
mySub->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
// comparison between truth and computed result
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
}
Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count()));
Log::info("total time: {}μs\n", duration.count());
}
}
}
} // namespace Aidge
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/WeightInterleaving.hpp"
#include "aidge/recipes/Recipes.hpp"
#include "aidge/utils/TensorUtils.hpp"
#include "aidge/backend/cpu.hpp"
#include <memory>
using namespace Aidge;
TEST_CASE("[cpu/operator] WeightInterleaving", "[WeightInterleaving][CPU]") {
std::shared_ptr<Node> myWeightInterleaving = WeightInterleaving();
auto opWeightInterleaving = std::static_pointer_cast<WeightInterleaving_Op>(myWeightInterleaving -> getOperator());
SECTION("CompactDataSize - Single element cases") {
REQUIRE(opWeightInterleaving->compactDataSize(1, 1) == 1); // 1 bit, needs 1 byte
REQUIRE(opWeightInterleaving->compactDataSize(1, 7) == 1); // 7 bits, needs 1 byte
}
SECTION("CompactDataSize - Boundary cases for different nb_bits values") {
REQUIRE(opWeightInterleaving->compactDataSize(8, 1) == 1); // 8 elements at 1 bit each, fits in 1 byte
REQUIRE(opWeightInterleaving->compactDataSize(8, 2) == 2); // 8 elements at 2 bits each, needs 2 bytes
REQUIRE(opWeightInterleaving->compactDataSize(8, 3) == 4); // 8 elements at 3 bits each, needs 4 bytes
REQUIRE(opWeightInterleaving->compactDataSize(8, 4) == 4); // 8 elements at 4 bits each, needs 4 bytes
}
SECTION("CompactDataSize - Larger dataSize values") {
REQUIRE(opWeightInterleaving->compactDataSize(16, 1) == 2); // 16 elements at 1 bit each, fits in 2 bytes
REQUIRE(opWeightInterleaving->compactDataSize(16, 2) == 4); // 16 elements at 2 bits each, needs 4 bytes
REQUIRE(opWeightInterleaving->compactDataSize(16, 3) == 8); // 16 elements at 3 bits each, needs 6 bytes
REQUIRE(opWeightInterleaving->compactDataSize(16, 4) == 8); // 16 elements at 4 bits each, needs 8 bytes
}
SECTION("CompactDataSize - Odd dataSize values with varying nb_bits") {
REQUIRE(opWeightInterleaving->compactDataSize(7, 1) == 1); // 7 elements at 1 bit each, fits in 1 byte
REQUIRE(opWeightInterleaving->compactDataSize(7, 2) == 2); // 7 elements at 2 bits each, needs 2 bytes
REQUIRE(opWeightInterleaving->compactDataSize(7, 3) == 4); // 7 elements at 3 bits each, needs 4 bytes
REQUIRE(opWeightInterleaving->compactDataSize(7, 4) == 4); // 7 elements at 4 bits each, needs 4 bytes
}
SECTION("CompactDataSize - Minimum and maximum values for nb_bits") {
REQUIRE(opWeightInterleaving->compactDataSize(5, 1) == 1); // 5 elements at 1 bit each, fits in 1 byte
}
SECTION("CompactDataSize - Edge Case - dataSize of 0 should result in 0 required size") {
REQUIRE(opWeightInterleaving->compactDataSize(0, 1) == 0); // No data elements
}
SECTION("CompactData - 4-bit compaction") {
std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array1D<std::int8_t, 4>{
{static_cast<std::int8_t>(0x0F),
static_cast<std::int8_t>(0xF5),
static_cast<std::int8_t>(0xB3),
static_cast<std::int8_t>(0x9C)}
});
weight->setDataFormat(Aidge::DataFormat::NHWC);
weight->setDataType(Aidge::DataType::Int4);
std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array1D<std::int8_t, 2>{
{static_cast<int8_t>(0xF5),
static_cast<int8_t>(0x3C)}
});
expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC);
expectedWeightInterleaving->setDataType(WeightInterleavedType_v<Aidge::DataType::Int4>);
std::shared_ptr<Node> myWeightInterleavingNode = WeightInterleaving();
auto op = std::static_pointer_cast<OperatorTensor>(myWeightInterleavingNode -> getOperator());
op->associateInput(0,weight);
op->setDataType(WeightInterleavedType_v<Aidge::DataType::Int4>);
op->setDataFormat(DataFormat::NHWC);
op->setBackend("cpu");
myWeightInterleavingNode->forward();
REQUIRE(*(op->getOutput(0)) == *expectedWeightInterleaving);
}
SECTION("CompactData - 3-bit compaction") {
std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array1D<std::int8_t, 4>{
{static_cast<int8_t>(0x0F),
static_cast<int8_t>(0x05),
static_cast<int8_t>(0x04),
static_cast<int8_t>(0xD3)}
});
weight->setDataFormat(Aidge::DataFormat::NHWC);
weight->setDataType(Aidge::DataType::Int3);
std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array1D<std::int8_t, 2>{
{static_cast<int8_t>(0x75),
static_cast<int8_t>(0x43)}
});
expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC);
expectedWeightInterleaving->setDataType(WeightInterleavedType_v<Aidge::DataType::Int3>);
std::shared_ptr<Node> myWeightInterleavingNode = WeightInterleaving();
auto op = std::static_pointer_cast<OperatorTensor>(myWeightInterleavingNode -> getOperator());
op->associateInput(0,weight);
op->setDataType(WeightInterleavedType_v<Aidge::DataType::Int3>);
op->setDataFormat(DataFormat::NHWC);
op->setBackend("cpu");
myWeightInterleavingNode->forward();
REQUIRE(*(op->getOutput(0)) == *expectedWeightInterleaving);
}
SECTION("CompactData - 2-bit compaction") {
std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array1D<std::int8_t, 4>{
{static_cast<std::int8_t>(0x03),
static_cast<std::int8_t>(0x02),
static_cast<std::int8_t>(0x01),
static_cast<std::int8_t>(0x00)}
});
weight->setDataFormat(Aidge::DataFormat::NHWC);
weight->setDataType(Aidge::DataType::Int2);
std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array1D<std::int8_t, 1>{
{static_cast<int8_t>(0xE4)}
});
expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC);
expectedWeightInterleaving->setDataType(WeightInterleavedType_v<Aidge::DataType::Int2>);
std::shared_ptr<Node> myWeightInterleavingNode = WeightInterleaving();
auto op = std::static_pointer_cast<OperatorTensor>(myWeightInterleavingNode -> getOperator());
op->associateInput(0,weight);
op->setDataType(WeightInterleavedType_v<Aidge::DataType::Int2>);
op->setDataFormat(DataFormat::NHWC);
op->setBackend("cpu");
myWeightInterleavingNode->forward();
REQUIRE(*(op->getOutput(0)) == *expectedWeightInterleaving);
}
SECTION("CompactData - Edge Cases - Single element data") {
std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array1D<std::int8_t, 1>{
{static_cast<int8_t>(0x0F)}
});
weight->setDataFormat(Aidge::DataFormat::NHWC);
weight->setDataType(Aidge::DataType::Int4);
std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array1D<std::int8_t, 1>{
{static_cast<int8_t>(0xF0)}
});
expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC);
expectedWeightInterleaving->setDataType(WeightInterleavedType_v<Aidge::DataType::Int4>);
std::shared_ptr<Node> myWeightInterleavingNode = WeightInterleaving();
auto op = std::static_pointer_cast<OperatorTensor>(myWeightInterleavingNode -> getOperator());
op->associateInput(0,weight);
op->setDataType(WeightInterleavedType_v<Aidge::DataType::Int4>);
op->setDataFormat(DataFormat::NHWC);
op->setBackend("cpu");
myWeightInterleavingNode->forward();
REQUIRE(*(op->getOutput(0)) == *expectedWeightInterleaving);
}
SECTION("CompactData - Edge Cases - Non-divisible dataSize for nbSlot with nbbits=4") {
std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array1D<std::int8_t, 3>{
{static_cast<int8_t>(0x0F),
static_cast<int8_t>(0xA5),
static_cast<int8_t>(0x34)}
});
weight->setDataFormat(Aidge::DataFormat::NHWC);
weight->setDataType(Aidge::DataType::Int4);
std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array1D<std::int8_t, 2>{
{static_cast<int8_t>(0xF5),
static_cast<int8_t>(0x40)}
});
expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC);
expectedWeightInterleaving->setDataType(WeightInterleavedType_v<Aidge::DataType::Int4>);
std::shared_ptr<Node> myWeightInterleavingNode = WeightInterleaving();
auto op = std::static_pointer_cast<OperatorTensor>(myWeightInterleavingNode -> getOperator());
op->associateInput(0,weight);
op->setDataType(WeightInterleavedType_v<Aidge::DataType::Int4>);
op->setDataFormat(DataFormat::NHWC);
op->setBackend("cpu");
myWeightInterleavingNode->forward();
REQUIRE(*(op->getOutput(0)) == *expectedWeightInterleaving);
}
SECTION("CompactData - Edge Cases - Non-divisible dataSize for nbSlot with nbbits=3") {
std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array1D<std::int8_t, 3>{
{static_cast<int8_t>(0x0F),
static_cast<int8_t>(0x05),
static_cast<int8_t>(0x04)}
});
weight->setDataFormat(Aidge::DataFormat::NHWC);
weight->setDataType(Aidge::DataType::Int3);
std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array1D<std::int8_t, 2>{
{static_cast<int8_t>(0x75),
static_cast<int8_t>(0x40)}
});
expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC);
expectedWeightInterleaving->setDataType(WeightInterleavedType_v<Aidge::DataType::Int3>);
std::shared_ptr<Node> myWeightInterleavingNode = WeightInterleaving();
auto op = std::static_pointer_cast<OperatorTensor>(myWeightInterleavingNode -> getOperator());
op->associateInput(0,weight);
op->setDataType(WeightInterleavedType_v<Aidge::DataType::Int3>);
op->setDataFormat(DataFormat::NHWC);
op->setBackend("cpu");
myWeightInterleavingNode->forward();
REQUIRE(*(op->getOutput(0)) == *expectedWeightInterleaving);
}
SECTION("Forward Op - Convolution weight interleaving") {
// Weight [Cout = 2, H = 3, W = 3, Cin = 4]:
std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array4D<std::int8_t,2,3,3,4> {
{
{
{
{-6, 0, 5, -8}, // 'A' '0' '5' '8' in hexadecimal format
{ 5, 5, 4, -5}, // '5' '5' '4' 'B' in hexadecimal format
{-7, -1, 4, -7} // '9' 'F' '4' '9' in hexadecimal format
},
{
{ 3, -3, -3, -3}, // '3' 'D' 'D' 'D' in hexadecimal format
{ 1, 3, 1, -1}, // '1' '3' '1' 'F' in hexadecimal format
{ 7, -3, -1, 4} // '7' 'D' 'F' '4' in hexadecimal format
},
{
{-1, 3, 5, 6}, // 'F' '3' '5' '6' in hexadecimal format
{-8, 4, 7, 1}, // '8' '4' '7' '1' in hexadecimal format
{-5, 0, -1, -2} // 'B' '0' 'F' 'E' in hexadecimal format
}
},
{
{
{ 2, -7, 7, -4}, // '2' '9' '7' 'C' in hexadecimal format
{-7, 3, 0, 2}, // '9' '3' '0' '2' in hexadecimal format
{ 1, -1, 2, 3} // '1' 'F' '2' '3' in hexadecimal format
},
{
{-1, -5, -3, -7}, // 'F' 'B' 'D' '9' in hexadecimal format
{-8, 3, 5, -1}, // '8' '3' '5' 'F' in hexadecimal format
{-7, -4, -6, -1} // '9' 'C' 'A' 'F' in hexadecimal format
},
{
{ 1, 7, 5, -1}, // '1' '7' '5' 'F' in hexadecimal format
{ 1, -8, 1, 2}, // '1' '8' '1' '2' in hexadecimal format
{-1, -6, -3, 0} // 'F' 'A' 'D' '0' in hexadecimal format
}
}
}
});
std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array4D<std::int8_t,2,3,3,2> {
{
{
{
{static_cast<int8_t>(0xA0), static_cast<int8_t>(0x58)}, // 'A' '0' '5' '8' in hexadecimal format
{static_cast<int8_t>(0x55), static_cast<int8_t>(0x4B)}, // '5' '5' '4' 'B' in hexadecimal format
{static_cast<int8_t>(0x9F), static_cast<int8_t>(0x49)} // '9' 'F' '4' '9' in hexadecimal format
},
{
{static_cast<int8_t>(0x3D), static_cast<int8_t>(0xDD)}, // '3' 'D' 'D' 'D' in hexadecimal format
{static_cast<int8_t>(0x13), static_cast<int8_t>(0x1F)}, // '1' '3' '1' 'F' in hexadecimal format
{static_cast<int8_t>(0x7D), static_cast<int8_t>(0xF4)} // '7' 'D' 'F' '4' in hexadecimal format
},
{
{static_cast<int8_t>(0xF3), static_cast<int8_t>(0x56)}, // 'F' '3' '5' '6' in hexadecimal format
{static_cast<int8_t>(0x84), static_cast<int8_t>(0x71)}, // '8' '4' '7' '1' in hexadecimal format
{static_cast<int8_t>(0xB0), static_cast<int8_t>(0xFE)} // 'B' '0' 'F' 'E' in hexadecimal format
}
},
{
{
{static_cast<int8_t>(0x29), static_cast<int8_t>(0x7C)}, // '2' '9' '7' 'C' in hexadecimal format
{static_cast<int8_t>(0x93), static_cast<int8_t>(0x02)}, // '9' '3' '0' '2' in hexadecimal format
{static_cast<int8_t>(0x1F), static_cast<int8_t>(0x23)} // '1' 'F' '2' '3' in hexadecimal format
},
{
{static_cast<int8_t>(0xFB), static_cast<int8_t>(0xD9)}, // 'F' 'B' 'D' '9' in hexadecimal format
{static_cast<int8_t>(0x83), static_cast<int8_t>(0x5F)}, // '8' '3' '5' 'F' in hexadecimal format
{static_cast<int8_t>(0x9C), static_cast<int8_t>(0xAF)} // '9' 'C' 'A' 'F' in hexadecimal format
},
{
{static_cast<int8_t>(0x17), static_cast<int8_t>(0x5F)}, // '1' '7' '5' 'F' in hexadecimal format
{static_cast<int8_t>(0x18), static_cast<int8_t>(0x12)}, // '1' '8' '1' '2' in hexadecimal format
{static_cast<int8_t>(0xFA), static_cast<int8_t>(0xD0)} // 'F' 'A' 'D' '0' in hexadecimal format
}
}
}
});
weight->setDataFormat(Aidge::DataFormat::NHWC);
weight->setDataType(Aidge::DataType::Int4);
expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC);
expectedWeightInterleaving->setDataType(WeightInterleavedType_v<Aidge::DataType::Int4>);
std::shared_ptr<Node> myWeightInterleavingNode = WeightInterleaving();
auto op = std::static_pointer_cast<OperatorTensor>(myWeightInterleavingNode -> getOperator());
op->associateInput(0,weight);
op->setDataType(WeightInterleavedType_v<Aidge::DataType::Int4>);
op->setDataFormat(DataFormat::NHWC);
op->setBackend("cpu");
myWeightInterleavingNode->forward();
REQUIRE(*(op->getOutput(0)) == *expectedWeightInterleaving);
}
SECTION("Recipie ApplyWeightInterleaving") {
// Weight [Cout = 2, H = 3, W = 3, Cin = 4]:
std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array4D<std::int8_t,2,3,3,4> {
{
{
{
{-6, 0, 5, -8}, // 'A' '0' '5' '8' in hexadecimal format
{ 5, 5, 4, -5}, // '5' '5' '4' 'B' in hexadecimal format
{-7, -1, 4, -7} // '9' 'F' '4' '9' in hexadecimal format
},
{
{ 3, -3, -3, -3}, // '3' 'D' 'D' 'D' in hexadecimal format
{ 1, 3, 1, -1}, // '1' '3' '1' 'F' in hexadecimal format
{ 7, -3, -1, 4} // '7' 'D' 'F' '4' in hexadecimal format
},
{
{-1, 3, 5, 6}, // 'F' '3' '5' '6' in hexadecimal format
{-8, 4, 7, 1}, // '8' '4' '7' '1' in hexadecimal format
{-5, 0, -1, -2} // 'B' '0' 'F' 'E' in hexadecimal format
}
},
{
{
{ 2, -7, 7, -4}, // '2' '9' '7' 'C' in hexadecimal format
{-7, 3, 0, 2}, // '9' '3' '0' '2' in hexadecimal format
{ 1, -1, 2, 3} // '1' 'F' '2' '3' in hexadecimal format
},
{
{-1, -5, -3, -7}, // 'F' 'B' 'D' '9' in hexadecimal format
{-8, 3, 5, -1}, // '8' '3' '5' 'F' in hexadecimal format
{-7, -4, -6, -1} // '9' 'C' 'A' 'F' in hexadecimal format
},
{
{ 1, 7, 5, -1}, // '1' '7' '5' 'F' in hexadecimal format
{ 1, -8, 1, 2}, // '1' '8' '1' '2' in hexadecimal format
{-1, -6, -3, 0} // 'F' 'A' 'D' '0' in hexadecimal format
}
}
}
});
std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array4D<std::int8_t,2,3,3,2> {
{
{
{
{static_cast<int8_t>(0xA0), static_cast<int8_t>(0x58)}, // 'A' '0' '5' '8' in hexadecimal format
{static_cast<int8_t>(0x55), static_cast<int8_t>(0x4B)}, // '5' '5' '4' 'B' in hexadecimal format
{static_cast<int8_t>(0x9F), static_cast<int8_t>(0x49)} // '9' 'F' '4' '9' in hexadecimal format
},
{
{static_cast<int8_t>(0x3D), static_cast<int8_t>(0xDD)}, // '3' 'D' 'D' 'D' in hexadecimal format
{static_cast<int8_t>(0x13), static_cast<int8_t>(0x1F)}, // '1' '3' '1' 'F' in hexadecimal format
{static_cast<int8_t>(0x7D), static_cast<int8_t>(0xF4)} // '7' 'D' 'F' '4' in hexadecimal format
},
{
{static_cast<int8_t>(0xF3), static_cast<int8_t>(0x56)}, // 'F' '3' '5' '6' in hexadecimal format
{static_cast<int8_t>(0x84), static_cast<int8_t>(0x71)}, // '8' '4' '7' '1' in hexadecimal format
{static_cast<int8_t>(0xB0), static_cast<int8_t>(0xFE)} // 'B' '0' 'F' 'E' in hexadecimal format
}
},
{
{
{static_cast<int8_t>(0x29), static_cast<int8_t>(0x7C)}, // '2' '9' '7' 'C' in hexadecimal format
{static_cast<int8_t>(0x93), static_cast<int8_t>(0x02)}, // '9' '3' '0' '2' in hexadecimal format
{static_cast<int8_t>(0x1F), static_cast<int8_t>(0x23)} // '1' 'F' '2' '3' in hexadecimal format
},
{
{static_cast<int8_t>(0xFB), static_cast<int8_t>(0xD9)}, // 'F' 'B' 'D' '9' in hexadecimal format
{static_cast<int8_t>(0x83), static_cast<int8_t>(0x5F)}, // '8' '3' '5' 'F' in hexadecimal format
{static_cast<int8_t>(0x9C), static_cast<int8_t>(0xAF)} // '9' 'C' 'A' 'F' in hexadecimal format
},
{
{static_cast<int8_t>(0x17), static_cast<int8_t>(0x5F)}, // '1' '7' '5' 'F' in hexadecimal format
{static_cast<int8_t>(0x18), static_cast<int8_t>(0x12)}, // '1' '8' '1' '2' in hexadecimal format
{static_cast<int8_t>(0xFA), static_cast<int8_t>(0xD0)} // 'F' 'A' 'D' '0' in hexadecimal format
}
}
}
});
expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC);
expectedWeightInterleaving->setDataType(Aidge::DataType::Dual_Int4);
// Create convolution node
std::shared_ptr<Node> conv = Conv(4, 2, {3, 3}, "conv1");
// Place the weight tensor in the weight producer of the conv
auto weightProducer = conv->getParent(1);
weightProducer->getOperator()->setOutput(0, weight);
// Set dataType, dataformat and backend of convolution
conv->getOperator()->setDataFormat(Aidge::DataFormat::NHWC);
conv->getOperator()->setDataType(Aidge::DataType::Int4);
conv->getOperator()->setBackend("cpu");
// Apply recipie
applyWeightInterleaving(conv);
// Compare the weight producer output tensor with the expected weights with interleaving
auto newProdOp = std::static_pointer_cast<OperatorTensor>(conv->getParent(1)->getOperator());
REQUIRE(*(newProdOp->getOutput(0)) == *expectedWeightInterleaving);
}
}
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include "aidge/recipes/Recipes.hpp"
#include "aidge/operator/Add.hpp"
#include "aidge/operator/MatMul.hpp"
#include "aidge/operator/Producer.hpp"
#include "aidge/graph/OpArgs.hpp"
#include "aidge/scheduler/SequentialScheduler.hpp"
#include "aidge/utils/TensorUtils.hpp"
#include <cstddef>
using namespace Aidge;
TEST_CASE("[ConstantFolding] forward", "[ConstantFolding][forward][CPU]") {
// generate the original GraphView
auto matmul0 = MatMul("matmul0");
auto add0 = Add("add0");
auto matmul1 = MatMul("matmul1");
auto add1 = Add("add1");
auto b0 = Producer(std::make_shared<Tensor>(Array1D<float,5>{{1, 2, 3, 4, 5}}), "B0", true);
auto w0 = Producer(std::make_shared<Tensor>(Array2D<float,5,5>{{{1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}, {1, 2, 3, 4, 5}}}), "W0", true);
auto b1 = Producer(std::make_shared<Tensor>(Array1D<float,5>{{1, 2, 3, 4, 5}}), "B1", true);
auto w1 = Producer(std::make_shared<Tensor>(Array2D<float,5,5>{{{6, 7, 8, 9, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}}}),"W1", true);
auto input = Producer(std::make_shared<Tensor>(Array2D<float,2,5>{{{1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}}}), "input", true);
input->addChild(matmul0, 0, 0);
w0->addChild(matmul0, 0, 1);
matmul0->addChild(add0, 0, 0);
b0->addChild(add0, 0, 1);
add0->addChild(matmul1, 0, 0);
w1->addChild(matmul1, 0, 1);
matmul1->addChild(add1, 0, 0);
b1->addChild(add1, 0, 1);
auto g = std::make_shared<GraphView>();
g->add({input, w0, matmul0, b0, add0, w1, matmul1, b1, add1});
g->setBackend("cpu");
g->forwardDims();
// Check original graph
REQUIRE(g->getNodes() ==
std::set<std::shared_ptr<Node>>({input, w0, matmul0, b0, add0, w1, matmul1, b1, add1}));
REQUIRE(((matmul0->getParent(0) == input) && (matmul0->getParent(1) == w0)));
REQUIRE(((add0->getParent(0) == matmul0) && (add0->getParent(1) == b0)));
REQUIRE(((matmul1->getParent(0) == add0) && (matmul1->getParent(1) == w1)));
REQUIRE(((add1->getParent(0) == matmul1) && (add1->getParent(1) == b1)));
auto scheduler = SequentialScheduler(g);
scheduler.forward();
const std::shared_ptr<Tensor> result = std::make_shared<Tensor>(Array2D<float,2,5>{{
{ 1201.000000, 1532.000000, 1863.000000, 2194.000000, 785.000000},
{ 2501.000000, 3207.000000, 3913.000000, 4619.000000, 1735.000000}
}});
auto add1Op = std::static_pointer_cast<Add_Op>(add1->getOperator());
REQUIRE(approxEq<float>(*(add1Op->getOutput(0)), *result));
// Transform GraphView inplace
constantFolding(g);
// Check new GraphView
std::set<std::shared_ptr<Node>> newNodes = g->getNodes();
REQUIRE(newNodes != std::set<std::shared_ptr<Node>>({input, w0, matmul0, b0, add0, w1, matmul1, b1, add1}));
REQUIRE(newNodes.size() == 1);
REQUIRE((*newNodes.cbegin())->type() == "Producer");
auto prodOp = std::static_pointer_cast<Producer_Op>((*newNodes.cbegin())->getOperator());
REQUIRE(approxEq<float>(*(prodOp->getOutput(0)), *result));
}
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include "aidge/recipes/Recipes.hpp"
#include "aidge/operator/Conv.hpp"
#include "aidge/operator/Producer.hpp"
#include "aidge/scheduler/SequentialScheduler.hpp"
#include "aidge/filler/Filler.hpp"
#include "aidge/graph/OpArgs.hpp"
#include <cstddef>
using namespace Aidge;
TEST_CASE("[ConvToMatMul] conv") {
auto conv1 = Conv(3, 4, {3, 3}, "conv1");
auto conv2 = Conv(4, 7, {3, 3}, "conv2", {1, 1}, {1, 1}, true);
auto conv3 = Conv(7, 10, {1, 1}, "conv3", {2, 2});
auto g1 = Sequential({
Producer({2, 3, 13, 24}, "dataProvider"),
conv1,
conv2,
conv3
});
g1->setBackend("cpu");
g1->forwardDims();
// Random initialization of input and weights
uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv1->getOperator())->getInput(0), -10.0, 10.0);
uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv1->getOperator())->getInput(1), -10.0, 10.0);
uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv1->getOperator())->getInput(2), -10.0, 10.0);
uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv2->getOperator())->getInput(1), -10.0, 10.0);
uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv3->getOperator())->getInput(1), -10.0, 10.0);
uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv3->getOperator())->getInput(2), -10.0, 10.0);
auto s1 = SequentialScheduler(g1);
s1.forward();
g1->save("convToMatMul_before");
auto g2 = g1->clone();
g2->forwardDims();
REQUIRE(convToMatMul(g2) == 3);
g2->setBackend("cpu");
auto s2 = SequentialScheduler(g2);
s2.forward();
g2->save("convToMatMul_after");
auto g1OutOp = std::static_pointer_cast<OperatorTensor>((*g1->outputNodes().cbegin())->getOperator());
auto g2OutOp = std::static_pointer_cast<OperatorTensor>((*g1->outputNodes().cbegin())->getOperator());
REQUIRE(*(g1OutOp->getOutput(0)) == *(g2OutOp->getOutput(0)));
// Simplify the graph: freeze parameters to allow reshaping of the Producers
for (auto node : g2->getNodes()) {
if (node->type() == Producer_Op::Type && node->name() != "dataProvider") {
std::static_pointer_cast<Producer_Op>(node->getOperator())->constant() = true;
}
}
constantFolding(g2);
g2->save("convToMatMul_after_folding");
}
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include "aidge/recipes/Recipes.hpp"
#include "aidge/operator/Conv.hpp"
#include "aidge/operator/Producer.hpp"
#include "aidge/graph/OpArgs.hpp"
#include <cstddef>
using namespace Aidge;
TEST_CASE("[ExplicitCastMove] conv") {
auto conv1 = Conv(3, 32, {3, 3}, "conv1");
auto conv2 = Conv(32, 64, {3, 3}, "conv2");
auto conv3 = Conv(64, 10, {1, 1}, "conv3", {2, 2});
auto g1 = Sequential({
Producer({16, 3, 224, 224}, "dataProvider"),
conv1,
conv2,
conv3
});
g1->setBackend("cpu");
conv1->getOperator()->setDataType(DataType::Int32);
conv3->getOperator()->setDataType(DataType::Float64);
g1->save("explicitCastMove_before");
REQUIRE(g1->getNodes().size() == 10);
g1->forwardDims();
explicitCastMove(g1);
g1->save("explicitCastMove_after");
REQUIRE(g1->getNodes().size() == 13);
}
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include <memory>
#include <cmath>
#include "aidge/graph/GraphView.hpp"
#include "aidge/graph/OpArgs.hpp"
#include "aidge/operator/Conv.hpp"
#include "aidge/operator/BatchNorm.hpp"
#include "aidge/operator/Producer.hpp"
#include "aidge/recipes/Recipes.hpp"
#include "aidge/scheduler/SequentialScheduler.hpp"
#include "aidge/data/Tensor.hpp"
namespace Aidge {
TEST_CASE("[core/recipes] FuseBatchNorm", "[recipes][FuseBatchNorm]") {
auto myProd = Producer({2, 3, 3, 3}, "dataProvider");
auto myConv = Conv(3, 3, {1, 1}, "conv1");
auto myBN = BatchNorm<2>(32, 1.0e-5F, 0.1F, "batchnorm1");
auto myProdOp = std::static_pointer_cast<Producer_Op>(myProd->getOperator());
auto myConvOp = std::static_pointer_cast<Conv_Op<2>>(myConv->getOperator());
auto myBNOp = std::static_pointer_cast<BatchNorm_Op<2>>(myBN->getOperator());
myProdOp->setOutput(0, std::make_shared<Tensor>(Array4D<float,2,3,3,3> { //NCHW
{
{
{{8.28257084e-01, 7.99335480e-01, 7.36702740e-01},
{2.36729562e-01, 8.61912668e-01, 9.93067741e-01},
{1.63514376e-01, 8.95773172e-02, 2.96533108e-01}},
{{2.20776618e-01, 5.89067876e-01, 2.03930080e-01},
{1.31294072e-01, 7.10182846e-01, 1.08420849e-04},
{7.21750259e-01, 4.38212037e-01, 5.08823872e-01}},
{{4.30953979e-01, 1.51903450e-01, 3.76343548e-01},
{8.07861805e-01, 7.79679358e-01, 5.01209974e-01},
{9.31280375e-01, 9.94207084e-01, 1.74868107e-03}}
},
{
{{6.22058094e-01, 2.32256651e-02, 6.18222237e-01},
{9.58304763e-01, 2.11395025e-02, 4.95614648e-01},
{2.50825584e-01, 4.50860739e-01, 3.80362332e-01}},
{{9.91703272e-02, 5.06073236e-01, 4.88969564e-01},
{1.12059772e-01, 7.64178872e-01, 7.60362148e-01},
{2.84135342e-02, 4.29610193e-01, 1.27862811e-01}},
{{9.57209170e-01, 8.22797656e-01, 1.91352129e-01},
{9.52722490e-01, 6.35501027e-01, 5.67592978e-02},
{2.00799644e-01, 4.00822222e-01, 9.14380193e-01}}
}
}
}));
myConvOp -> setInput(1, std::make_shared<Tensor>(Array4D<float,3,3,1,1> { //NCHW
{
{
{{8.28257084e-01}},
{{7.99335480e-01}},
{{7.36702740e-01}}
},
{
{{2.36729562e-01}},
{{8.61912668e-01}},
{{9.93067741e-01}}
},
{
{{1.63514376e-01}},
{{8.95773172e-02}},
{{2.96533108e-01}}
}
}
}));
myConvOp -> setInput(2, std::make_shared<Tensor>(Array1D<float,3> {{0.4470, 0.3064, 0.7061}}));
myBNOp -> setInput(1, std::make_shared<Tensor>(Array1D<float,3> {{0.9044, 0.3028, 0.0218}}));
myBNOp -> setInput(2, std::make_shared<Tensor>(Array1D<float,3> {{0.1332, 0.7503, 0.0878}}));
myBNOp -> setInput(3, std::make_shared<Tensor>(Array1D<float,3> {{0.9931, 0.8421, 0.9936}}));
myBNOp -> setInput(4, std::make_shared<Tensor>(Array1D<float,3> {{0.4470, 0.3064, 0.7061}}));
auto g1 = Sequential({
myProd,
myConv,
myBN
});
g1 -> setName("fuseBNGraph");
g1 -> compile("cpu", DataType::Float32);
auto s = SequentialScheduler(g1);
s.forward();
std::shared_ptr<Tensor> res1 = std::make_shared<Tensor>(*(myBNOp -> getOutput(0)));
fuseBatchNorm(g1);
s.resetScheduling();
s.forward();
std::shared_ptr<Tensor> res2 = std::make_shared<Tensor>(*(myConvOp -> getOutput(0)));
REQUIRE(g1 -> outputNodes().size() == 1);
REQUIRE(g1 -> inputNodes().size() == 0);
bool eq = true;
for (std::size_t i = 0; i < res1->size(); ++i) {
eq &= std::abs(res1->get<float>(i) - res2->get<float>(i)) < 1.0e-06;
}
REQUIRE(eq);
}
} // namespace Aidge
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include <set>
#include "aidge/graph/GraphView.hpp"
#include "aidge/graph/OpArgs.hpp"
#include "aidge/operator/Conv.hpp"
#include "aidge/operator/ReLU.hpp"
#include "aidge/recipes/Recipes.hpp"
#include "aidge/scheduler/SequentialScheduler.hpp"
#include "aidge/operator/Concat.hpp"
namespace Aidge {
TEST_CASE("[core/recipes] Tiling(transformation)", "[Tiling][Recipes]") {
SECTION("Transform a pre-generated GraphView") {
SECTION("Simple Node: Conv") {
std::shared_ptr<Node> myReLU = ReLU("myReLU");
std::shared_ptr<Node> myConv = Conv(3,4,{3,3}, "myconv");
std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> {
{
{
{{ 0, 1, 2},
{ 3, 4, 5},
{ 6, 7, 8}},
{{ 9, 10, 11},
{ 12, 13, 14},
{ 15, 16, 17}},
{{ 18, 19, 20},
{ 21, 22, 23},
{ 24, 25, 26}}
},
{
{{ 27, 28, 29},
{ 30, 31, 32},
{ 33, 34, 35}},
{{ 36, 37, 38},
{ 39, 40, 41},
{ 42, 43, 44}},
{{ 45, 46, 47},
{ 48, 49, 50},
{ 51, 52, 53}}
},
{
{{ 54, 55, 56},
{ 57, 58, 59},
{ 60, 61, 62}},
{{ 63, 64, 65},
{ 66, 67, 68},
{ 69, 70, 71}},
{{ 72, 73, 74},
{ 75, 76, 77},
{ 78, 79, 80}}
},
{
{{ 81, 82, 83},
{ 84, 85, 86},
{ 87, 88, 89}},
{{ 90, 91, 92},
{ 93, 94, 95},
{ 96, 97, 98}},
{{ 99, 100, 101},
{102, 103, 104},
{105, 106, 107}}
}
}
});
std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}});
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
{
{
{{ 0, 1, 2, 3, 4},
{ 5, 6, 7, 8, 9},
{ 10, 11, 12, 13, 14},
{ 15, 16, 17, 18, 19},
{ 20, 21, 22, 23, 24}},
{{ 25, 26, 27, 28, 29},
{ 30, 31, 32, 33, 34},
{ 35, 36, 37, 38, 39},
{ 40, 41, 42, 43, 44},
{ 45, 46, 47, 48, 49}},
{{ 50, 51, 52, 53, 54},
{ 55, 56, 57, 58, 59},
{ 60, 61, 62, 63, 64},
{ 65, 66, 67, 68, 69},
{ 70, 71, 72, 73, 74}}
},
{
{{ 75, 76, 77, 78, 79},
{ 80, 81, 82, 83, 84},
{ 85, 86, 87, 88, 89},
{ 90, 91, 92, 93, 94},
{ 95, 96, 97, 98, 99}},
{{100, 101, 102, 103, 104},
{105, 106, 107, 108, 109},
{110, 111, 112, 113, 114},
{115, 116, 117, 118, 119},
{120, 121, 122, 123, 124}},
{{125, 126, 127, 128, 129},
{130, 131, 132, 133, 134},
{135, 136, 137, 138, 139},
{140, 141, 142, 143, 144},
{145, 146, 147, 148, 149}}
}
}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> {
{
{
{{ 15226, 15577, 15928},
{ 16981, 17332, 17683},
{ 18736, 19087, 19438}},
{{ 37818, 38898, 39978},
{ 43218, 44298, 45378},
{ 48618, 49698, 50778}},
{{ 60426, 62235, 64044},
{ 69471, 71280, 73089},
{ 78516, 80325, 82134}},
{{ 83016, 85554, 88092},
{ 95706, 98244, 100782},
{108396, 110934, 113472}}
},
{
{{ 41551, 41902, 42253},
{ 43306, 43657, 44008},
{ 45061, 45412, 45763}},
{{118818, 119898, 120978},
{124218, 125298, 126378},
{129618, 130698, 131778}},
{{196101, 197910, 199719},
{205146, 206955, 208764},
{214191, 216000, 217809}},
{{273366, 275904, 278442},
{286056, 288594, 291132},
{298746, 301284, 303822}}
}
}
});
myReLU->getOperator()->associateInput(0, myInput);
myReLU->addChild(myConv, 0, 0);
myConv->getOperator()->setInput(1, myWeights);
myConv->getOperator()->setInput(2, myBias);
std::shared_ptr<GraphView> g = std::make_shared<GraphView>();
g->add({myReLU, myConv});
g->compile("cpu", DataType::Int32);
std::set<std::shared_ptr<Node>> tiledConv = getConvHorizontalTiling(myConv, 2, 3);
SequentialScheduler s(g);
s.forward();
REQUIRE(*(std::dynamic_pointer_cast<Conv_Op<2>>(myConv->getOperator())->getOutput(0)) == *myOutput);
GraphView::replace({myConv, myConv->getParent(1), myConv->getParent(2)}, tiledConv);
g->compile("cpu", DataType::Int32, 0, {{2,3,5,5}}); // changes myInput DataType from Int32 to Float32. Why??????
s.resetScheduling();
s.forward();
REQUIRE(*(std::dynamic_pointer_cast<OperatorTensor>((*g->outputNodes().begin())->getOperator())->getOutput(0)) == *myOutput);
}
}
}
} // namespace Aidge
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <cstddef>
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include <catch2/catch_test_macros.hpp>
#include "aidge/recipes/Recipes.hpp"
#include "aidge/operator/MatMul.hpp"
#include "aidge/operator/AvgPooling.hpp"
#include "aidge/operator/MaxPooling.hpp"
#include "aidge/operator/GenericOperator.hpp"
#include "aidge/operator/Producer.hpp"
#include "aidge/graph/OpArgs.hpp"
#include "aidge/scheduler/SequentialScheduler.hpp"
#include "aidge/graph/Matching.hpp"
#include "aidge/utils/TensorUtils.hpp"
using namespace Aidge;
TEST_CASE("[MatMulTiling]") {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(-1.0f, 1.0f);
auto dataProvider = Producer({2, 3, 80, 80}, "dataProvider");
auto w1 = Producer({2, 3, 80, 80}, "w1");
auto matmul1 = MatMul("matmul1");
auto w2 = Producer({2, 3, 80, 80}, "w1");
auto matmul2 = MatMul("matmul2");
auto w3 = Producer({2, 3, 80, 80}, "w1");
auto matmul3 = MatMul("matmul3");
dataProvider->addChild(matmul1, 0, 0);
w1->addChild(matmul1, 0, 1);
matmul1->addChild(matmul2, 0, 0);
w2->addChild(matmul2, 0, 1);
matmul2->addChild(matmul3, 0, 0);
w3->addChild(matmul3, 0, 1);
auto g1 = getConnectedGraphView(matmul1);
g1->setBackend("cpu");
g1->forwardDims();
g1->save("MatMulSplitting_graph");
// Fill random values
fmt::println("Fill random values");
auto tData = std::static_pointer_cast<OperatorTensor>(dataProvider->getOperator())->getOutput(0);
for (size_t i = 0; i < tData->size(); ++i) {
tData->set<float>(i, valueDist(gen));
}
auto tw1 = std::static_pointer_cast<OperatorTensor>(w1->getOperator())->getOutput(0);
for (size_t i = 0; i < tw1->size(); ++i) {
tw1->set<float>(i, valueDist(gen));
}
auto tw2 = std::static_pointer_cast<OperatorTensor>(w2->getOperator())->getOutput(0);
for (size_t i = 0; i < tw2->size(); ++i) {
tw2->set<float>(i, valueDist(gen));
}
auto tw3 = std::static_pointer_cast<OperatorTensor>(w3->getOperator())->getOutput(0);
for (size_t i = 0; i < tw3->size(); ++i) {
tw3->set<float>(i, valueDist(gen));
}
fmt::println("Schedule forward graph");
auto s1 = SequentialScheduler(g1);
s1.forward();
const auto tOut = std::static_pointer_cast<OperatorTensor>(g1->getOrderedOutputs()[0].first->getOperator())->getOutput(0)->clone();
// Tiling
fmt::println("Tiling");
matMulTiling(matmul1, {16, 16});
removeIdentity(g1);
g1->setBackend("cpu");
g1->save("MatMulSplitting_graph_split");
auto gm = SinglePassGraphMatching(g1);
gm.addNodeLambda("16x16", [](const NodePtr& node) {
const auto op =
std::static_pointer_cast<OperatorTensor>(node->getOperator());
const auto dims = op->getOutput(0)->dims();
return (dims.end()[-2] == 16 && dims.end()[-1] == 16);
});
const auto results = gm.match("MatMul[16x16]");
REQUIRE(results.size() == 25);
// Check result
fmt::println("Schedule forward tiled graph");
s1 = SequentialScheduler(g1);
s1.resetScheduling();
s1.forward();
const auto tOutTiled = std::static_pointer_cast<OperatorTensor>(g1->getOrderedOutputs()[0].first->getOperator())->getOutput(0)->clone();
REQUIRE(approxEq<float>(tOut, tOutTiled));
}
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include <memory>
#include <string>
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/ConvImpl.hpp"
#include "aidge/backend/cpu/operator/FCImpl.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/graph/Node.hpp"
#include "aidge/graph/GraphView.hpp"
#include "aidge/graph/OpArgs.hpp"
#include "aidge/operator/Conv.hpp"
#include "aidge/operator/FC.hpp"
#include "aidge/scheduler/SequentialScheduler.hpp"
#include "aidge/recipes/Recipes.hpp"
#include "aidge/utils/ArrayHelpers.hpp"
#include "aidge/utils/TensorUtils.hpp"
using namespace Aidge;
TEST_CASE("[cpu/castmove] CastMove(forward)") {
std::shared_ptr<Tensor> inputTensor =
std::make_shared<Tensor>(Array4D<int, 2, 1, 5, 5>{{{{{0, 1, 2, 3, 4},
{5, 6, 7, 8, 9},
{10, 11, 12, 13, 14},
{15, 16, 17, 18, 19},
{20, 21, 22, 23, 24}}},
{{{25, 26, 27, 28, 29},
{30, 31, 32, 33, 34},
{35, 36, 37, 38, 39},
{40, 41, 42, 43, 44},
{45, 46, 47, 48, 49}}}}});
std::shared_ptr<Tensor> weight1 = std::make_shared<Tensor>(
Array4D<int, 3, 1, 3, 3>{{{{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}},
{{{10, 11, 12}, {13, 14, 15}, {16, 17, 18}}},
{{{19, 20, 21}, {22, 23, 24}, {25, 26, 27}}}}});
std::shared_ptr<Tensor> bias1 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
SECTION("Test implicit") {
std::shared_ptr<GraphView> g =
Sequential({
Conv(1, 3, {3, 3}, "conv1"),
Conv(3, 4, {1, 1}, "conv2"),
Conv(4, 3, {1, 1}, "conv3"),
FC(27, 5, false, "fc")});
g->getNode("conv1")->getOperator()->setInput(0, inputTensor);
g->getNode("conv1")->getOperator()->setInput(1, weight1);
g->getNode("conv1")->getOperator()->setInput(2, bias1);
std::shared_ptr<Tensor> weight2 =
std::make_shared<Tensor>(Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}},
{{{4}}, {{5}}, {{6}}},
{{{7}}, {{8}}, {{9}}},
{{{10}}, {{11}}, {{12}}}}});
std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}});
g->getNode("conv2")->getOperator()->setInput(1, weight2);
g->getNode("conv2")->getOperator()->setInput(2, bias2);
// *(g->getNode("conv2")->getOperator()->input(1, weight2);
std::shared_ptr<Tensor> weight3 = std::make_shared<Tensor>(
Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}},
{{{5}}, {{6}}, {{7}}, {{8}}},
{{{9}}, {{10}}, {{11}}, {{12}}}}});
std::shared_ptr<Tensor> bias3 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
g->getNode("conv3")->getOperator()->setInput(1, weight3);
g->getNode("conv3")->getOperator()->setInput(2, bias3);
std::shared_ptr<Tensor> weightfc = std::make_shared<Tensor>(
Array2D<int, 5, 27>{{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9},
{10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6},
{7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5,
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3},
{4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}});
std::shared_ptr<Tensor> biasfc = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
g->getNode("fc")->getOperator()->setInput(1, weightfc);
g->getNode("fc")->getOperator()->setInput(2, biasfc);
// input->addChild(g);
g->setDataType(Aidge::DataType::Int32);
g->getNode("conv1")->getOperator()->setDataType(DataType::Float32);
g->getNode("conv3")->getOperator()->setDataType(DataType::Float64);
g->setBackend("cpu");
g->forwardDims();
SequentialScheduler scheduler(g);
REQUIRE_NOTHROW(scheduler.forward());
scheduler.saveSchedulingDiagram("schedulingSequential");
std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{
{{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}},
{{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}},
{{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}},
{{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}},
{{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}},
{{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}});
std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(Array4D<int, 2, 4, 3, 3>{
{{{{6099, 7017, 7935}, {10689, 11607, 12525}, {15279, 16197, 17115}},
{{13786, 15838, 17890}, {24046, 26098, 28150}, {34306, 36358, 38410}},
{{21473, 24659, 27845}, {37403, 40589, 43775}, {53333, 56519, 59705}},
{{29160, 33480, 37800}, {50760, 55080, 59400}, {72360, 76680, 81000}}},
{{{29049, 29967, 30885}, {33639, 34557, 35475}, {38229, 39147, 40065}},
{{65086, 67138, 69190}, {75346, 77398, 79450}, {85606, 87658, 89710}},
{{101123, 104309, 107495}, {117053, 120239, 123425}, {132983, 136169, 139355}},
{{137160, 141480, 145800}, {158760, 163080, 167400}, {180360, 184680, 189000}}}}});
std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{
{{{{214731, 246591, 278451}, {374031, 405891, 437751}, {533331, 565191, 597051}},
{{496804, 570568, 644332}, {865624, 939388, 1013152}, {1234444, 1308208, 1381972}},
{{778877, 894545, 1010213}, {1357217, 1472885, 1588553}, {1935557, 2051225, 2166893}}},
{{{1011231, 1043091, 1074951}, {1170531, 1202391, 1234251}, {1329831, 1361691, 1393551}},
{{2340904, 2414668, 2488432}, {2709724, 2783488, 2857252}, {3078544, 3152308, 3226072}},
{{3670577, 3786245, 3901913}, {4248917, 4364585, 4480253}, {4827257, 4942925, 5058593}}}}});
Tensor expectedOutput4 = Array2D<int, 2, 5>{
{{205050376, 198925904, 181355097, 196978090, 238868348},
{598467376, 561797804, 560823897, 593043790, 698672948}}};
std::shared_ptr<Tensor> other1 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv1")->getOperator())->getOutput(0);
REQUIRE(approxEq<float, int>(*other1, *expectedOutput1, 0.0, 1.0e-12));
std::shared_ptr<Tensor> other2 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv2")->getOperator())->getOutput(0);
REQUIRE(approxEq<int>(*other2, *expectedOutput2, 0.0, 1.0e-12));
std::shared_ptr<Tensor> other3 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv3")->getOperator())->getOutput(0);
REQUIRE(approxEq<double, int>(*other3, *expectedOutput3, 0.0, 1.0e-12));
std::shared_ptr<Tensor> other4 = std::static_pointer_cast<OperatorTensor>(g->getNode("fc")->getOperator())->getOutput(0);
REQUIRE(approxEq<int>(*other4, expectedOutput4, 0.0, 1.0e-12));
}
SECTION("Half") {
Tensor refTensor = Array2D<float, 3, 2>{{{0.0, 1.0},{2.1, 3.4},{5000.0, 1.0e5}}};
Tensor tensor(DataType::Float16);
tensor.copyCastFrom(refTensor);
REQUIRE(approxEq<float, half_float::half>(refTensor, tensor, 1.0e-3, 0.0));
}
SECTION("Test explicit") {
std::shared_ptr<GraphView> g =
Sequential({
Conv(1, 3, {3, 3}, "conv1"),
Conv(3, 4, {1, 1}, "conv2"),
Conv(4, 3, {1, 1}, "conv3"),
FC(27, 5, false, "fc")});
g->getNode("conv1")->getOperator()->setInput(0, inputTensor);
g->getNode("conv1")->getOperator()->setInput(1, weight1);
g->getNode("conv1")->getOperator()->setInput(2, bias1);
std::shared_ptr<Tensor> weight2 =
std::make_shared<Tensor>(Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}},
{{{4}}, {{5}}, {{6}}},
{{{7}}, {{8}}, {{9}}},
{{{10}}, {{11}}, {{12}}}}});
std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}});
g->getNode("conv2")->getOperator()->setInput(1, weight2);
g->getNode("conv2")->getOperator()->setInput(2, bias2);
// *(g->getNode("conv2")->getOperator()->input(1, weight2);
std::shared_ptr<Tensor> weight3 = std::make_shared<Tensor>(
Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}},
{{{5}}, {{6}}, {{7}}, {{8}}},
{{{9}}, {{10}}, {{11}}, {{12}}}}});
std::shared_ptr<Tensor> bias3 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
g->getNode("conv3")->getOperator()->setInput(1, weight3);
g->getNode("conv3")->getOperator()->setInput(2, bias3);
std::shared_ptr<Tensor> weightfc = std::make_shared<Tensor>(
Array2D<int, 5, 27>{{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9},
{10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6},
{7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5,
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3},
{4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}});
std::shared_ptr<Tensor> biasfc = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
g->getNode("fc")->getOperator()->setInput(1, weightfc);
g->getNode("fc")->getOperator()->setInput(2, biasfc);
// input->addChild(g);
g->setDataType(Aidge::DataType::Int32);
g->getNode("conv1")->getOperator()->setDataType(DataType::Float32);
g->getNode("conv3")->getOperator()->setDataType(DataType::Float64);
explicitCastMove(g);
g->setBackend("cpu");
g->forwardDims();
SequentialScheduler scheduler(g);
REQUIRE_NOTHROW(scheduler.forward());
scheduler.saveSchedulingDiagram("schedulingSequential");
Tensor expectedOutput1 = Array4D<int, 2, 3, 3, 3>{
{{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}},
{{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}},
{{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}},
{{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}},
{{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}},
{{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}};
Tensor expectedOutput2 = Array4D<int, 2, 4, 3, 3>{
{{{{6099, 7017, 7935}, {10689, 11607, 12525}, {15279, 16197, 17115}},
{{13786, 15838, 17890}, {24046, 26098, 28150}, {34306, 36358, 38410}},
{{21473, 24659, 27845}, {37403, 40589, 43775}, {53333, 56519, 59705}},
{{29160, 33480, 37800}, {50760, 55080, 59400}, {72360, 76680, 81000}}},
{{{29049, 29967, 30885}, {33639, 34557, 35475}, {38229, 39147, 40065}},
{{65086, 67138, 69190}, {75346, 77398, 79450}, {85606, 87658, 89710}},
{{101123, 104309, 107495}, {117053, 120239, 123425}, {132983, 136169, 139355}},
{{137160, 141480, 145800}, {158760, 163080, 167400}, {180360, 184680, 189000}}}}};
Tensor expectedOutput3 = Array4D<int, 2, 3, 3, 3>{
{{{{214731, 246591, 278451}, {374031, 405891, 437751}, {533331, 565191, 597051}},
{{496804, 570568, 644332}, {865624, 939388, 1013152}, {1234444, 1308208, 1381972}},
{{778877, 894545, 1010213}, {1357217, 1472885, 1588553}, {1935557, 2051225, 2166893}}},
{{{1011231, 1043091, 1074951}, {1170531, 1202391, 1234251}, {1329831, 1361691, 1393551}},
{{2340904, 2414668, 2488432}, {2709724, 2783488, 2857252}, {3078544, 3152308, 3226072}},
{{3670577, 3786245, 3901913}, {4248917, 4364585, 4480253}, {4827257, 4942925, 5058593}}}}};
Tensor expectedOutput4 = Array2D<int, 2, 5>{
{{205050376, 198925904, 181355097, 196978090, 238868348},
{598467376, 561797804, 560823897, 593043790, 698672948}}};
std::shared_ptr<Tensor> other1 = std::static_pointer_cast<Conv_Op<2>>(g->getNode("conv1")->getOperator())->getOutput(0);
REQUIRE(approxEq<float, int>(*other1, expectedOutput1, 0.0, 1.0e-12));
std::shared_ptr<Tensor> other2 = std::static_pointer_cast<Conv_Op<2>>(g->getNode("conv2")->getOperator())->getOutput(0);
REQUIRE(*other2 == expectedOutput2);
std::shared_ptr<Tensor> other3 = std::static_pointer_cast<Conv_Op<2>>(g->getNode("conv3")->getOperator())->getOutput(0);
REQUIRE(approxEq<double, int>(*other3, expectedOutput3, 0.0, 1.0e-12));
std::shared_ptr<Tensor> other4 = std::static_pointer_cast<FC_Op>(g->getNode("fc")->getOperator())->getOutput(0);
REQUIRE(*other4 == expectedOutput4);
}
}
......@@ -17,13 +17,25 @@
#include "aidge/graph/Node.hpp"
#include "aidge/graph/GraphView.hpp"
#include "aidge/graph/OpArgs.hpp"
#include "aidge/scheduler/Scheduler.hpp"
#include "aidge/operator/Memorize.hpp"
#include "aidge/operator/Pop.hpp"
#include "aidge/operator/Stack.hpp"
#include "aidge/operator/Identity.hpp"
#include "aidge/operator/MetaOperator.hpp"
#include "aidge/scheduler/SequentialScheduler.hpp"
#include "aidge/scheduler/ParallelScheduler.hpp"
#include "aidge/backend/cpu.hpp"
#include "aidge/backend/cpu/operator/FCImpl.hpp"
#include "aidge/backend/cpu/operator/ConvImpl.hpp"
#include "aidge/backend/cpu/operator/ReLUImpl.hpp"
#include "aidge/backend/cpu/operator/SqrtImpl.hpp"
#include "aidge/backend/cpu/operator/AddImpl.hpp"
using namespace Aidge;
#include "aidge/recipes/GraphViewHelper.hpp"
namespace Aidge {
TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") {
std::shared_ptr<Tensor> inputTensor =
std::make_shared<Tensor>(Array4D<int, 2, 1, 5, 5>{{{{{0, 1, 2, 3, 4},
......@@ -50,13 +62,11 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") {
Conv(1, 3, {3, 3}, "conv1"),
Conv(3, 4, {1, 1}, "conv2"),
Conv(4, 3, {1, 1}, "conv3"),
FC(5, false, "fc")});
g->setDatatype(Aidge::DataType::Int32);
g->setBackend("cpu");
FC(27, 5, false, "fc")});
g->getNode("conv1")->getOperator()->input(0) = *inputTensor;
g->getNode("conv1")->getOperator()->input(1) = *weight1;
g->getNode("conv1")->getOperator()->input(2) = *bias1;
g->getNode("conv1")->getOperator()->setInput(0, inputTensor);
g->getNode("conv1")->getOperator()->setInput(1, weight1);
g->getNode("conv1")->getOperator()->setInput(2, bias1);
std::shared_ptr<Tensor> weight2 =
std::make_shared<Tensor>(Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}},
......@@ -64,8 +74,8 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") {
{{{7}}, {{8}}, {{9}}},
{{{10}}, {{11}}, {{12}}}}});
std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}});
g->getNode("conv2")->getOperator()->input(1) = *weight2;
g->getNode("conv2")->getOperator()->input(2) = *bias2;
g->getNode("conv2")->getOperator()->setInput(1, weight2);
g->getNode("conv2")->getOperator()->setInput(2, bias2);
// *(g->getNode("conv2")->getOperator()->input(1, weight2);
std::shared_ptr<Tensor> weight3 = std::make_shared<Tensor>(
......@@ -73,8 +83,8 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") {
{{{5}}, {{6}}, {{7}}, {{8}}},
{{{9}}, {{10}}, {{11}}, {{12}}}}});
std::shared_ptr<Tensor> bias3 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
g->getNode("conv3")->getOperator()->input(1) = *weight3;
g->getNode("conv3")->getOperator()->input(2) = *bias3;
g->getNode("conv3")->getOperator()->setInput(1, weight3);
g->getNode("conv3")->getOperator()->setInput(2, bias3);
std::shared_ptr<Tensor> weightfc = std::make_shared<Tensor>(
Array2D<int, 5, 27>{{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
......@@ -88,10 +98,12 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") {
{4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}});
std::shared_ptr<Tensor> biasfc = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
g->getNode("fc")->getOperator()->input(1) = *weightfc;
g->getNode("fc")->getOperator()->input(2) = *biasfc;
g->getNode("fc")->getOperator()->setInput(1, weightfc);
g->getNode("fc")->getOperator()->setInput(2, biasfc);
// input->addChild(g);
g->setDataType(Aidge::DataType::Int32);
g->setBackend("cpu");
g->forwardDims();
SequentialScheduler scheduler(g);
REQUIRE_NOTHROW(scheduler.forward());
......@@ -126,17 +138,17 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") {
Tensor expectedOutput4 = Array2D<int, 2, 5>{
{{205050376, 198925904, 181355097, 196978090, 238868348},
{598467376, 561797804, 560823897, 593043790, 698672948}}};
Tensor other1 = g->getNode("conv1")->getOperator()->output(0);
bool equal1 = (other1 == *expectedOutput1);
std::shared_ptr<Tensor> other1 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv1")->getOperator())->getOutput(0);
bool equal1 = (*other1 == *expectedOutput1);
REQUIRE(equal1);
Tensor other2 = g->getNode("conv2")->getOperator()->output(0);
bool equal2 = (other2 == *expectedOutput2);
std::shared_ptr<Tensor> other2 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv2")->getOperator())->getOutput(0);
bool equal2 = (*other2 == *expectedOutput2);
REQUIRE(equal2);
Tensor other3 = g->getNode("conv3")->getOperator()->output(0);
bool equal3 = (other3 == *expectedOutput3);
std::shared_ptr<Tensor> other3 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv3")->getOperator())->getOutput(0);
bool equal3 = (*other3 == *expectedOutput3);
REQUIRE(equal3);
Tensor other4 = g->getNode("fc")->getOperator()->output(0);
bool equal4 = (other4 == expectedOutput4);
std::shared_ptr<Tensor> other4 = std::static_pointer_cast<OperatorTensor>(g->getNode("fc")->getOperator())->getOutput(0);
bool equal4 = (*other4 == expectedOutput4);
REQUIRE(equal4);
}
......@@ -144,39 +156,40 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") {
std::shared_ptr<GraphView> g =
Sequential({Conv(1, 3, {3, 3}, "inputConv"),
Parallel({
Conv(3, 3, {1, 1}, "conv1.1"),
Conv(3, 3, {1, 1}, "conv1.2"),
Sequential({
Parallel({
Conv(3, 3, {1, 1}, "conv1.1"),
Conv(3, 3, {1, 1}, "conv1.2")}),
Add("add1")}),
Conv(3, 3, {1, 1}, "conv1.3")}),
Add<3>("add1"),
Add("add2"),
Conv(3, 2, {1, 1}, "conv2"),
FC(5, false, "out")});
g->setBackend("cpu");
g->setDatatype(Aidge::DataType::Int32);
FC(18, 5, false, "out")});
g->getNode("inputConv")->getOperator()->input(0) = *inputTensor;
g->getNode("inputConv")->getOperator()->input(1) = *weight1;
g->getNode("inputConv")->getOperator()->input(2) = *bias1;
g->getNode("inputConv")->getOperator()->setInput(0, inputTensor);
g->getNode("inputConv")->getOperator()->setInput(1, weight1);
g->getNode("inputConv")->getOperator()->setInput(2, bias1);
std::shared_ptr<Tensor> conv11Weight = std::make_shared<Tensor>(Array4D<int, 3, 3, 1, 1>{
{{{{1}}, {{2}}, {{3}}}, {{{4}}, {{5}}, {{6}}}, {{{7}}, {{8}}, {{9}}}}});
g->getNode("conv1.1")->getOperator()->input(1) = *conv11Weight;
g->getNode("conv1.1")->getOperator()->input(2) = *bias1;
g->getNode("conv1.1")->getOperator()->setInput(1, conv11Weight);
g->getNode("conv1.1")->getOperator()->setInput(2, bias1);
std::shared_ptr<Tensor> conv12Weight = std::make_shared<Tensor>(Array4D<int, 3, 3, 1, 1>{
{{{{11}}, {{12}}, {{13}}}, {{{14}}, {{15}}, {{16}}}, {{{17}}, {{18}}, {{19}}}}});
g->getNode("conv1.2")->getOperator()->input(1) = *conv12Weight;
g->getNode("conv1.2")->getOperator()->input(2) = *bias1;
g->getNode("conv1.2")->getOperator()->setInput(1, conv12Weight);
g->getNode("conv1.2")->getOperator()->setInput(2, bias1);
std::shared_ptr<Tensor> conv13Weight = std::make_shared<Tensor>(Array4D<int, 3, 3, 1, 1>{
{{{{21}}, {{22}}, {{23}}}, {{{24}}, {{25}}, {{26}}}, {{{27}}, {{28}}, {{29}}}}});
g->getNode("conv1.3")->getOperator()->input(1) = *conv13Weight;
g->getNode("conv1.3")->getOperator()->input(2) = *bias1;
g->getNode("conv1.3")->getOperator()->setInput(1, conv13Weight);
g->getNode("conv1.3")->getOperator()->setInput(2, bias1);
std::shared_ptr<Tensor> conv2Weight = std::make_shared<Tensor>(
Array4D<int, 2, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}}, {{{4}}, {{5}}, {{6}}}}});
std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 2>{{1, 2}});
g->getNode("conv2")->getOperator()->input(1) = *conv2Weight;
g->getNode("conv2")->getOperator()->input(2) = *bias2;
g->getNode("conv2")->getOperator()->setInput(1, conv2Weight);
g->getNode("conv2")->getOperator()->setInput(2, bias2);
std::shared_ptr<Tensor> fcWeight = std::make_shared<Tensor>(
Array2D<int, 5, 18>{{{1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3},
......@@ -185,19 +198,21 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") {
{5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2},
{3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5}}});
std::shared_ptr<Tensor> fcBias = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
g->getNode("out")->getOperator()->input(1) = *fcWeight;
g->getNode("out")->getOperator()->input(2) = *fcBias;
g->getNode("out")->getOperator()->setInput(1, fcWeight);
g->getNode("out")->getOperator()->setInput(2, fcBias);
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
Array2D<int, 2, 5>{{{124324368, 130692907, 133325056, 125044620, 142843879},
{369195468, 394615207, 382643056, 379441320, 416291779}}});
g->setBackend("cpu");
g->setDataType(Aidge::DataType::Int32);
g->forwardDims();
SequentialScheduler scheduler(g);
REQUIRE_NOTHROW(scheduler.forward());
scheduler.saveSchedulingDiagram("schedulingSequential");
std::shared_ptr<Tensor> result =
std::static_pointer_cast<Tensor>(g->getNode("out")->getOperator()->getOutput(0));
std::static_pointer_cast<Tensor>(g->getNode("out")->getOperator()->getRawOutput(0));
bool equal = (*result == *expectedOutput);
REQUIRE(equal);
}
......@@ -205,5 +220,296 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") {
SECTION("Test Residual graph") {
}
SECTION("Test Recurrent graph") {}
}
\ No newline at end of file
SECTION("Test Recurrent graph (sequential)") {
std::shared_ptr<Tensor> in = std::make_shared<Tensor>(
Array2D<int, 2, 3>{{{1, 2, 3}, {4, 5, 6}}});
std::shared_ptr<Tensor> initTensor = std::make_shared<Tensor>(
Array2D<int, 2, 3>{{{0, 0, 0}, {1, 1, 1}}});
std::shared_ptr<Tensor> biasTensor = std::make_shared<Tensor>(
Array2D<int, 2, 3>{{{2, 0, 0}, {1, 0, 0}}});
auto add1 = Add("add1");
auto mem = Memorize(3, "mem1");
auto add2 = Add("add2");
auto bias = Producer(biasTensor, "bias");
auto init = Producer(initTensor, "init");
auto input = Producer(in, "input");
std::shared_ptr<GraphView> g = Sequential({add1, mem, add2});
init->addChild(mem, 0, 1);
mem->addChild(add1, 1, 1);
bias->addChild(add2, 0, 1);
input->addChild(add1, 0, 0);
// Update GraphView inputs/outputs following previous connections:
g->add({mem, add1, add2, init, bias, input});
g->setBackend("cpu");
g->setDataType(Aidge::DataType::Int32);
g->save("graphRecurrent");
g->forwardDims();
SequentialScheduler scheduler(g);
REQUIRE_NOTHROW(scheduler.forward(true));
scheduler.saveStaticSchedulingDiagram("static_schedule");
scheduler.saveSchedulingDiagram("schedulingRecurrent_seq");
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
Array2D<int, 2, 3>{{{5, 6, 9}, {14, 16, 19}}});
std::shared_ptr<Tensor> result =
std::static_pointer_cast<Tensor>(g->getNode("add2")->getOperator()->getRawOutput(0));
result->print();
expectedOutput->print();
bool equal = (*result == *expectedOutput);
REQUIRE(equal);
}
SECTION("Test Recurrent graph (parallel)") {
std::shared_ptr<Tensor> in = std::make_shared<Tensor>(
Array2D<int, 2, 3>{{{1, 2, 3}, {4, 5, 6}}});
std::shared_ptr<Tensor> initTensor = std::make_shared<Tensor>(
Array2D<int, 2, 3>{{{0, 0, 0}, {1, 1, 1}}});
std::shared_ptr<Tensor> biasTensor = std::make_shared<Tensor>(
Array2D<int, 2, 3>{{{2, 0, 0}, {1, 0, 0}}});
auto add1 = Add("add1");
auto mem = Memorize(3, "mem1");
auto add2 = Add("add2");
auto bias = Producer(biasTensor, "bias");
auto init = Producer(initTensor, "init");
auto input = Producer(in, "input");
std::shared_ptr<GraphView> g = Sequential({add1, mem, add2});
init->addChild(mem, 0, 1);
mem->addChild(add1, 1, 1);
bias->addChild(add2, 0, 1);
input->addChild(add1, 0, 0);
// Update GraphView inputs/outputs following previous connections:
g->add({mem, add1, add2, init, bias, input});
g->setBackend("cpu");
g->setDataType(Aidge::DataType::Int32);
g->save("graphRecurrent");
g->forwardDims();
ParallelScheduler scheduler(g);
REQUIRE_NOTHROW(scheduler.forward(true));
scheduler.saveSchedulingDiagram("schedulingRecurrent_par");
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
Array2D<int, 2, 3>{{{5, 6, 9}, {14, 16, 19}}});
std::shared_ptr<Tensor> result =
std::static_pointer_cast<Tensor>(g->getNode("add2")->getOperator()->getRawOutput(0));
result->print();
expectedOutput->print();
bool equal = (*result == *expectedOutput);
REQUIRE(equal);
}
SECTION("Test ConnectInput graph") {
std::shared_ptr<GraphView> g =
Sequential({
Conv(1, 3, {3, 3}, "conv1"),
Conv(3, 4, {1, 1}, "conv2"),
Conv(4, 3, {1, 1}, "conv3"),
FC(27, 5, false, "fc")});
// g->getNode("conv1")->getOperator()->setInput(0, inputTensor);
g->getNode("conv1")->getOperator()->setInput(1, weight1);
g->getNode("conv1")->getOperator()->setInput(2, bias1);
std::shared_ptr<Tensor> weight2 =
std::make_shared<Tensor>(Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}},
{{{4}}, {{5}}, {{6}}},
{{{7}}, {{8}}, {{9}}},
{{{10}}, {{11}}, {{12}}}}});
std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}});
g->getNode("conv2")->getOperator()->setInput(1, weight2);
g->getNode("conv2")->getOperator()->setInput(2, bias2);
// *(g->getNode("conv2")->getOperator()->input(1, weight2);
std::shared_ptr<Tensor> weight3 = std::make_shared<Tensor>(
Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}},
{{{5}}, {{6}}, {{7}}, {{8}}},
{{{9}}, {{10}}, {{11}}, {{12}}}}});
std::shared_ptr<Tensor> bias3 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
g->getNode("conv3")->getOperator()->setInput(1, weight3);
g->getNode("conv3")->getOperator()->setInput(2, bias3);
std::shared_ptr<Tensor> weightfc = std::make_shared<Tensor>(
Array2D<int, 5, 27>{{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9},
{10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6},
{7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5,
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3},
{4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}});
std::shared_ptr<Tensor> biasfc = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
g->getNode("fc")->getOperator()->setInput(1, weightfc);
g->getNode("fc")->getOperator()->setInput(2, biasfc);
// input->addChild(g);
g->setDataType(Aidge::DataType::Int32);
g->setBackend("cpu");
std::vector<std::vector<Aidge::DimSize_t>> dims = {inputTensor->dims()};
g->forwardDims(dims);
SequentialScheduler scheduler(g);
std::vector<std::shared_ptr<Aidge::Tensor>> dataIn = {inputTensor};
REQUIRE_NOTHROW(scheduler.forward(true, dataIn));
scheduler.saveSchedulingDiagram("schedulingSequential");
std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{
{{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}},
{{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}},
{{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}},
{{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}},
{{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}},
{{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}});
std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(Array4D<int, 2, 4, 3, 3>{
{{{{6099, 7017, 7935}, {10689, 11607, 12525}, {15279, 16197, 17115}},
{{13786, 15838, 17890}, {24046, 26098, 28150}, {34306, 36358, 38410}},
{{21473, 24659, 27845}, {37403, 40589, 43775}, {53333, 56519, 59705}},
{{29160, 33480, 37800}, {50760, 55080, 59400}, {72360, 76680, 81000}}},
{{{29049, 29967, 30885}, {33639, 34557, 35475}, {38229, 39147, 40065}},
{{65086, 67138, 69190}, {75346, 77398, 79450}, {85606, 87658, 89710}},
{{101123, 104309, 107495}, {117053, 120239, 123425}, {132983, 136169, 139355}},
{{137160, 141480, 145800}, {158760, 163080, 167400}, {180360, 184680, 189000}}}}});
std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{
{{{{214731, 246591, 278451}, {374031, 405891, 437751}, {533331, 565191, 597051}},
{{496804, 570568, 644332}, {865624, 939388, 1013152}, {1234444, 1308208, 1381972}},
{{778877, 894545, 1010213}, {1357217, 1472885, 1588553}, {1935557, 2051225, 2166893}}},
{{{1011231, 1043091, 1074951}, {1170531, 1202391, 1234251}, {1329831, 1361691, 1393551}},
{{2340904, 2414668, 2488432}, {2709724, 2783488, 2857252}, {3078544, 3152308, 3226072}},
{{3670577, 3786245, 3901913}, {4248917, 4364585, 4480253}, {4827257, 4942925, 5058593}}}}});
Tensor expectedOutput4 = Array2D<int, 2, 5>{
{{205050376, 198925904, 181355097, 196978090, 238868348},
{598467376, 561797804, 560823897, 593043790, 698672948}}};
std::shared_ptr<Tensor> other1 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv1")->getOperator())->getOutput(0);
bool equal1 = (*other1 == *expectedOutput1);
REQUIRE(equal1);
std::shared_ptr<Tensor> other2 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv2")->getOperator())->getOutput(0);
bool equal2 = (*other2 == *expectedOutput2);
REQUIRE(equal2);
std::shared_ptr<Tensor> other3 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv3")->getOperator())->getOutput(0);
bool equal3 = (*other3 == *expectedOutput3);
REQUIRE(equal3);
std::shared_ptr<Tensor> other4 = std::static_pointer_cast<OperatorTensor>(g->getNode("fc")->getOperator())->getOutput(0);
bool equal4 = (*other4 == expectedOutput4);
REQUIRE(equal4);
}
}
TEST_CASE("[cpu/scheduler] SequentialScheduler(backward)", "[scheduler][backward]") {
// create GraphView
std::shared_ptr<GraphView> gv = Sequential({ReLU("relu0"), Sqrt("srqt0"), ReLU("relu1")});
std::shared_ptr<Tensor> inputTensor =
std::make_shared<Tensor>(Array4D<float, 2, 1, 5, 5>{{{{{0.0f, 1.0f, 2.0f, 3.0f, 4.0f},
{5.0f, 6.0f, 7.0f, 8.0f, 9.0f},
{10.0f, 11.0f, 12.0f, 13.0f, 14.0f},
{15.0f, 16.0f, 17.0f, 18.0f, 19.0f},
{20.0f, 21.0f, 22.0f, 23.0f, 24.0f}}},
{{{25.0f, 26.0f, 27.0f, 28.0f, 29.0f},
{30.0f, 31.0f, 32.0f, 33.0f, 34.0f},
{35.0f, 36.0f, 37.0f, 38.0f, 39.0f},
{40.0f, 41.0f, 42.0f, 43.0f, 44.0f},
{45.0f, 46.0f, 47.0f, 48.0f, 49.0f}}}}});
auto label = inputTensor;
// implem already set to default
auto myProd = Producer(inputTensor, "prod");
myProd -> addChild(gv);
gv -> compile("cpu", DataType::Float32);
SequentialScheduler scheduler(gv);
scheduler.forward();
auto outNode = gv->getOrderedOutputs()[0].first;
std::shared_ptr<Tensor> predictedOutput = std::dynamic_pointer_cast<OperatorTensor>(outNode->getOperator())->getOutput(0);
std::shared_ptr<Tensor> targetOutput =
std::make_shared<Tensor>(Array4D<float, 2, 1, 5, 5>{{{{{0.0f, 1.0f, 1.0f, 2.0f, 2.0f},
{2.0f, 2.0f, 3.0f, 3.0f, 3.0f},
{3.0f, 3.0f, 3.0f, 4.0f, 4.0f},
{4.0f, 4.0f, 4.0f, 4.0f, 4.0f},
{4.0f, 5.0f, 5.0f, 5.0f, 5.0f}}},
{{{5.0f, 5.0f, 5.0f, 5.0f, 5.0f},
{5.0f, 6.0f, 6.0f, 6.0f, 6.0f},
{6.0f, 6.0f, 6.0f, 6.0f, 6.0f},
{6.0f, 6.0f, 6.0f, 7.0f, 7.0f},
{7.0f, 7.0f, 7.0f, 7.0f, 7.0f}}}}});
predictedOutput->setGrad(targetOutput);
REQUIRE_NOTHROW(scheduler.backward());
}
std::shared_ptr<Node> Accumulate(int seqLength, const std::string& name) {
auto input = Identity((!name.empty()) ? name + "_input" : "");
auto hiddenState = Memorize(seqLength, (!name.empty()) ? name + "_hidden_state" : "");
auto add = Add((!name.empty()) ? name + "_add" : "");
input->addChild(add, 0, 0);
add->addChild(hiddenState, 0,0);
hiddenState->addChild(/*otherNode=*/add, /*outId=*/1, /*otherInId=*/1);
std::shared_ptr<GraphView> microGraph = std::make_shared<GraphView>();
microGraph->add(input);
microGraph->add({hiddenState, add});
microGraph->setOrderedInputs({{input, 0}, {hiddenState, 1}});
microGraph->setOrderedOutputs({{hiddenState, 0}});
auto metaOp = MetaOperator("Accumulate", microGraph, {}, name);
return metaOp;
}
TEST_CASE("[cpu/scheduler] Accumulate", "[scheduler]") {
std::shared_ptr<Tensor> Input = std::make_shared<Tensor>(
Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}},
{{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}});
std::shared_ptr<Tensor> MemInit =
std::make_shared<Tensor>(Array2D<float, 3, 2>{
{{0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}}});
auto meta = Accumulate(2, "accumulate");
auto op = std::static_pointer_cast<MetaOperator_Op>(meta->getOperator());
auto pop_i = Pop("pop_input");
auto pop_o = Identity("pop_output"); // NOTE: Could be Identity/Stack/Whatever node you want, this is is not the problem here
pop_i->getOperator()->associateInput(0, Input);
pop_i->addChild(op->getMicroGraph()->getOrderedInputs()[0].first, 0, 0);
op->getMicroGraph()->getOrderedOutputs()[0].first->addChild(pop_o, 0, 0);
//pop_i->addChild(meta, 0, 0);
//meta->addChild(pop_o, 0, 0);
//op->associateInput(1, MemInit);
op->getMicroGraph()->getNode("accumulate_hidden_state")->getOperator()->associateInput(1, MemInit);
// Build the graph.
auto myGraph = std::make_shared<GraphView>();
myGraph->add(pop_i);
myGraph->add(op->getMicroGraph());
//myGraph->add(meta);
myGraph->add(pop_o);
myGraph->compile("cpu", DataType::Float32);
myGraph->save("accumulate_graph", true);
// Schedule and run
auto scheduler = SequentialScheduler(myGraph);
scheduler.generateScheduling();
scheduler.saveStaticSchedulingDiagram("accumulate_scheduling");
REQUIRE_NOTHROW(scheduler.forward(true));
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
Array2D<float, 3, 2>{{{3.0, 5.0}, {7.0, 9.0}, {11.0, 13.0}}});
std::shared_ptr<Tensor> output = std::static_pointer_cast<OperatorTensor>(pop_o->getOperator())->getOutput(0);
REQUIRE(*output == *expectedOutput);
}
} // namespace Aidge
0.0.1
\ No newline at end of file
0.5.0