Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mszczep/aidge_backend_cpu
  • eclipse/aidge/aidge_backend_cpu
  • hrouis/aidge_backend_cpu
  • oantoni/aidge_backend_cpu
  • raphaelmillet/aidge_backend_cpu
  • cguillon/aidge_backend_cpu
  • jeromeh/aidge_backend_cpu
  • axelfarr/aidge_backend_cpu
  • noamzerah/aidge_backend_cpu
  • silvanosky/aidge_backend_cpu
  • maab05/aidge_backend_cpu
  • lucaslopez/aidge_backend_cpu_ll
  • farnez/aidge_backend_cpu
  • mick94/aidge_backend_cpu
14 results
Show changes
Showing
with 5816 additions and 86 deletions
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <chrono> // std::micro, std::chrono::time_point,
// std::chrono::system_clock
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <functional> // std::multiplies
#include <memory>
#include <numeric> // std::accumulate
#include <random> // std::random_device, std::mt19937
// std::uniform_int_distribution, std::uniform_real_distribution
#include <vector>
#include <catch2/catch_test_macros.hpp>
#include <fmt/core.h>
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/DivImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Div.hpp"
#include "aidge/operator/OperatorTensor.hpp"
#include "aidge/utils/TensorUtils.hpp"
namespace Aidge {
TEST_CASE("[cpu/operator] Div", "[Div][CPU]") {
constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
std::uniform_int_distribution<int> boolDist(0,1);
// Create MatMul Operator
std::shared_ptr<Node> myDiv = Div();
auto op = std::static_pointer_cast<OperatorTensor>(myDiv-> getOperator());
op->setDataType(DataType::Float32);
op->setBackend("cpu");
// Create 2 input Tensors
std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
op->associateInput(0,T0);
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
op -> associateInput(1,T1);
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
// Create results Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>();
Tres->setDataType(DataType::Float32);
Tres->setBackend("cpu");
// To measure execution time of 'MatMul_Op::forward()' member function call
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration{};
SECTION("DivImpl_cpu::forward()") {
SECTION("Scalar / Scalar") {
}
SECTION("Scalar / +1-D Tensor") {
}
SECTION("+1-D Tensor / +1-D Tensor - same dimensions") {
std::size_t number_of_operation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
const std::size_t nbDims = nbDimsDist(gen);
std::vector<std::size_t> dims;
for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen));
}
const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
// without broadcasting
float* array0 = new float[nb_elements];
float* array1 = new float[nb_elements];
float* result = new float[nb_elements];
for (std::size_t i = 0; i < nb_elements; ++i) {
array0[i] = valueDist(gen);
array1[i] = valueDist(gen);
result[i] = array0[i] / array1[i];
}
// input0
T0->resize(dims);
T0 -> getImpl() -> setRawPtr(array0, nb_elements);
// input1
T1->resize(dims);
T1 -> getImpl() -> setRawPtr(array1, nb_elements);
// results
Tres->resize(dims);
Tres -> getImpl() -> setRawPtr(result, nb_elements);
op->forwardDims();
start = std::chrono::system_clock::now();
myDiv->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
// with broadcasting
}
Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count()));
Log::info("total time: {} μs\n", duration.count());
}
SECTION("+1-D Tensor / +1-D Tensor - broadcasting") {
std::size_t number_of_operation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
// handle dimensions, replace some dimensions with '1' to get broadcasting
constexpr std::size_t nbDims = 4;
std::vector<std::size_t> dims;
for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen));
}
std::vector<std::size_t> dims0 = dims;
std::vector<std::size_t> dims1 = dims;
std::vector<std::size_t> dimsOut = dims;
for (std::size_t i = 0; i < nbDims; ++i) {
if (boolDist(gen)) {
dims0[i] = 1;
}
if (boolDist(gen)) {
dims1[i] = 1;
}
dimsOut[i] = (dims0[i] == 1) ? dims1[i] : dims0[i];
}
// create arrays and fill them with random values
float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]];
float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) {
array0[i] = valueDist(gen);
}
for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) {
array1[i] = valueDist(gen);
}
// compute true result
const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1};
for (std::size_t a = 0; a < dimsOut[0]; ++a) {
for (std::size_t b = 0; b < dimsOut[1]; ++b) {
const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
+ strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0)
+ strides1[1] * ((dims1[1] > 1) ? b : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c) {
const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
for (std::size_t d = 0; d < dimsOut[3]; ++d) {
std::size_t idx0 = idx0_0
+ strides0[2] * ((dims0[2] > 1) ? c : 0)
+ ((dims0[3] > 1) ? d : 0);
std::size_t idx1 = idx1_0
+ strides1[2] * ((dims1[2] > 1) ? c : 0)
+ ((dims1[3] > 1) ? d : 0);
result[idx_out + d] = array0[idx0] / array1[idx1];
// std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " / " << array1[idx1] << " -> " << idx_out + d << std::endl;
}
}
}
}
// conversion to Aidge::Tensors
// input0
T0->resize(dims0);
T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
// input1
T1->resize(dims1);
T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]);
// results
Tres->resize(dimsOut);
Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
// compute result
op->forwardDims();
start = std::chrono::system_clock::now();
myDiv->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
// comparison between truth and computed result
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
}
Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count()));
Log::info("total time: {} μs\n", duration.count());
}
SECTION("+1-D Tensor / 1-D Tensor") {
std::size_t number_of_operation = 0;
std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(std::size_t(1), std::size_t(3));
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
// handle dimensions
constexpr std::size_t nbDims = 4;
std::vector<std::size_t> dims0(4);
for (std::size_t i = 0; i < nbDims; ++i) {
dims0[i] = dimSizeDist(gen);
}
std::vector<std::size_t> dimsOut = dims0;
std::vector<std::size_t> dims1 = dims0;
for (std::size_t i = 0; i < nbDims; ++i) {
if (boolDist(gen)) {
dims1[i] = 1;
}
}
dims1.erase(dims1.cbegin(), dims1.cbegin() + nbRemovedDimsDist(gen));
// create arrays and fill them with random values
float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
std::size_t array1_size = std::accumulate(dims1.cbegin(), dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
float* array1 = new float[array1_size];
float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
for (std::size_t i = 0; i < (dims0[0]*dims0[1]*dims0[2]*dims0[3]); ++i) {
array0[i] = valueDist(gen);
}
for (std::size_t i = 0; i < array1_size; ++i) {
array1[i] = valueDist(gen);
}
// compute true result
auto dims1_tmp = dims1;
dims1_tmp.insert(dims1_tmp.cbegin(), 4 - dims1_tmp.size(), std::size_t(1));
const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
const std::size_t strides1[nbDims] = {dims1_tmp[1]*dims1_tmp[2]*dims1_tmp[3], dims1_tmp[2]*dims1_tmp[3], dims1_tmp[3], 1};
for (std::size_t a = 0; a < dimsOut[0]; ++a) {
for (std::size_t b = 0; b < dimsOut[1]; ++b) {
const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
+ strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t idx1_0 = strides1[0] * ((dims1_tmp[0] > 1) ? a : 0)
+ strides1[1] * ((dims1_tmp[1] > 1) ? b : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c) {
const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
for (std::size_t d = 0; d < dimsOut[3]; ++d) {
std::size_t idx0 = idx0_0
+ strides0[2] * ((dims0[2] > 1) ? c : 0)
+ ((dims0[3] > 1) ? d : 0);
std::size_t idx1 = idx1_0
+ strides1[2] * ((dims1_tmp[2] > 1) ? c : 0)
+ ((dims1_tmp[3] > 1) ? d : 0);
result[idx_out + d] = array0[idx0] / array1[idx1];
// std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " / " << array1[idx1] << " -> " << idx_out + d << std::endl;
}
}
}
}
// conversion to Aidge::Tensors
// input0
T0->resize(dims0);
T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
// input1
T1->resize(dims1);
T1 -> getImpl() -> setRawPtr(array1, array1_size);
// results
Tres->resize(dimsOut);
Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
// compute result
op->forwardDims();
start = std::chrono::system_clock::now();
myDiv->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
// comparison between truth and computed result
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
}
Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count()));
Log::info("total time: {} μs\n", duration.count());
}
}
}
} // namespace Aidge
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <memory>
#include <catch2/catch_test_macros.hpp>
#include "aidge/backend/cpu/operator/ErfImpl.hpp"
#include "aidge/data/DataType.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Erf.hpp"
#include "aidge/utils/ArrayHelpers.hpp"
#include "aidge/utils/TensorUtils.hpp"
using namespace Aidge;
TEST_CASE("[cpu/operator] Erf(forward)") {
SECTION("1D Tensor") {
std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<float,10> {
{0.41384590, 0.43120754, 0.93762982, 0.31049860, 0.77547199, 0.09514862,
0.16145366, 0.42776686, 0.43487436, 0.41170865}
});
Tensor expectedOutput = Array1D<float,10> {
{0.44163144, 0.45801866, 0.81516320, 0.33941913, 0.72722000, 0.10704061,
0.18061027, 0.45479023, 0.46144873, 0.43959764}
};
auto op = std::make_shared<Erf_Op>();
op->associateInput(0,input0);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forward();
REQUIRE(approxEq<float>(*(op->getOutput(0)), expectedOutput, 1e-5f, 1e-8f));
}
SECTION("3D Tensor") {
std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array3D<float,2,2,3> {
{
{
{0.97037154, 0.86208081, 0.77767169},
{0.38160080, 0.11422747, 0.77284443},
},
{
{0.51592529, 0.72543722, 0.54641193},
{0.93866944, 0.97767913, 0.34172094}
}
}
});
Tensor expectedOutput = Array3D<float,2,2,3> {
{
{
{0.83003384, 0.77721894, 0.72857803},
{0.41057193, 0.12833349, 0.72559172},
},
{
{0.53438270, 0.69507217, 0.56032562},
{0.81564975, 0.83322692, 0.37109339}
}
}
};
auto op = std::make_shared<Erf_Op>();
op->associateInput(0,input0);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forward();
REQUIRE(approxEq<float>(*(op->getOutput(0)), expectedOutput, 1e-5f, 1e-8f));
}
}
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <memory>
#include <catch2/catch_test_macros.hpp>
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/ExpandImpl.hpp"
#include "aidge/data/DataType.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Expand.hpp"
#include "aidge/utils/ArrayHelpers.hpp"
using std::shared_ptr;
using namespace Aidge;
void setupTestExpand(shared_ptr<Tensor> inputData,
shared_ptr<Tensor> inputShape,
shared_ptr<Expand_Op> &op) {
op->getOutput(0)->setDataType(inputData->dataType());
inputData->setBackend("cpu");
op->associateInput(0, inputData);
inputShape->setBackend("cpu");
op->associateInput(1, inputShape);
}
TEST_CASE("[cpu/operator] Expand(forward)", "[Expand][CPU]") {
std::shared_ptr<Expand_Op> op = std::make_shared<Expand_Op>();
op->setBackend("cpu");
SECTION("Expand shape is bigger than inputData") {
auto inputData = std::make_shared<Tensor>(Array1D<int, 2>({1, 3}));
auto inputShape =
std::make_shared<Tensor>(Array1D<std::int64_t, 4>({1, 3, 4, 2}));
Tensor expectedOutput =
Array4D<cpptype_t<DataType::Int32>, 1, 3, 4, 2>({{{{{1, 3}, {1, 3}, {1, 3}, {1, 3}},
{{1, 3}, {1, 3}, {1, 3}, {1, 3}},
{{1, 3}, {1, 3}, {1, 3}, {1, 3}}}}});
setupTestExpand(inputData, inputShape, op);
// forwardDims has already been tested in core
CHECK(op->forwardDims(true));
REQUIRE_NOTHROW(op->forward());
REQUIRE(expectedOutput == *op->getOutput(0));
}
SECTION("Expand shape has less dimensions than inputData") {
auto inputData = std::make_shared<Tensor>(
Array3D<int, 2, 1, 3>({{{2, 1, 3}, {2, 1, 3}}}));
auto inputShape =
std::make_shared<Tensor>(Array1D<std::int64_t, 2>({2, 3}));
Tensor expectedOutput = Array3D<cpptype_t<DataType::Int32>, 2, 2, 3>(
{{{{2, 1, 3}, {2, 1, 3}}, {{2, 1, 3}, {2, 1, 3}}}});
setupTestExpand(inputData, inputShape, op);
// forwardDims has already been tested in core
CHECK(op->forwardDims(true));
REQUIRE_NOTHROW(op->forward());
REQUIRE(expectedOutput == *op->getOutput(0));
}
SECTION("Expand shape = {1} leads to input equal to output.") {
auto inputData = std::make_shared<Tensor>(
Array4D<int, 2, 1, 3, 1>({{{2, 1, 3}, {2, 1, 3}}}));
auto inputShape =
std::make_shared<Tensor>(Array1D<std::int64_t, 1>({1}));
Tensor expectedOutput =
Array4D<cpptype_t<DataType::Int32>, 2, 1, 3, 1>({{{2, 1, 3}, {2, 1, 3}}});
setupTestExpand(inputData, inputShape, op);
// forwardDims has already been tested in core
CHECK(op->forwardDims(true));
REQUIRE_NOTHROW(op->forward());
REQUIRE(expectedOutput == *op->getOutput(0));
}
SECTION("The only common dimension is the last one & its equal to 1") {
auto inputData = std::make_shared<Tensor>(
Array4D<int, 1, 1, 3, 1>({{{{2, 1, 3}}}}));
auto inputShape =
std::make_shared<Tensor>(Array1D<std::int64_t, 3>({2, 1, 1}));
Tensor expectedOutput =
Array4D<cpptype_t<DataType::Int32>, 1, 2, 3, 1>({{{{2, 1, 3}, {2, 1, 3}}}});
setupTestExpand(inputData, inputShape, op);
// forwardDims has already been tested in core
CHECK(op->forwardDims(true));
REQUIRE_NOTHROW(op->forward());
REQUIRE(expectedOutput == *op->getOutput(0));
}
SECTION("N-Dim to N-Dim") {}
auto inputData = std::shared_ptr<Tensor>();
}
......@@ -9,17 +9,20 @@
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include <memory>
#include <catch2/catch_test_macros.hpp>
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/FCImpl.hpp"
#include "aidge/data/DataType.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/FC.hpp"
#include "aidge/backend/cpu.hpp"
#include "aidge/utils/ArrayHelpers.hpp"
using namespace Aidge;
TEST_CASE("[cpu/oeprator] FC(forward)") {
TEST_CASE("[cpu/oeprator] FC(forward)", "[FC][CPU]") {
std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array2D<int, 5, 75>{
{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
......@@ -42,14 +45,15 @@ TEST_CASE("[cpu/oeprator] FC(forward)") {
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}});
std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<int, 2, 5>{
{{23601, 23602, 23603, 23604, 23605}, {68601, 68602, 68603, 68604, 68605}}});
Tensor myOutput = Array2D<int, 2, 5>{
{{23601, 23602, 23603, 23604, 23605}, {68601, 68602, 68603, 68604, 68605}}};
std::shared_ptr<Node> myFC = FC(5, false, "myfc");
myFC->getOperator()->setDatatype(DataType::Int32);
myFC->getOperator()->setBackend("cpu");
myFC->getOperator()->associateInput(1, myWeights);
myFC->getOperator()->associateInput(2, myBias);
std::shared_ptr<Node> myFC = FC(75, 5, false, "myfc");
auto op = std::static_pointer_cast<FC_Op>(myFC -> getOperator());
op -> setDataType(DataType::Int32);
op -> setBackend("cpu");
op -> associateInput(1, myWeights);
op -> associateInput(2, myBias);
SECTION("2D input") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array2D<int, 2, 75>{
......@@ -62,10 +66,9 @@ TEST_CASE("[cpu/oeprator] FC(forward)") {
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149}}});
myFC->getOperator()->associateInput(0, myInput);
myFC->getOperator()->computeOutputDims();
op->associateInput(0, myInput);
myFC->forward();
REQUIRE(*std::static_pointer_cast<Tensor>(myFC->getOperator()->getOutput(0)) == *myOutput);
REQUIRE(*(op->getOutput(0)) == myOutput);
}
SECTION("4D input") {
std::shared_ptr<Tensor> myInput =
......@@ -99,10 +102,9 @@ TEST_CASE("[cpu/oeprator] FC(forward)") {
{135, 136, 137, 138, 139},
{140, 141, 142, 143, 144},
{145, 146, 147, 148, 149}}}}});
myFC->getOperator()->associateInput(0, myInput);
myFC->getOperator()->computeOutputDims();
op->associateInput(0, myInput);
myFC->forward();
REQUIRE(*std::static_pointer_cast<Tensor>(myFC->getOperator()->getOutput(0)) == *myOutput);
REQUIRE(*(op->getOutput(0)) == myOutput);
}
// std::cout << static_cast<Tensor>((*myFC->getOperator())["weight"])[0][0][0][0] << std::endl;
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include <cstdlib>
#include <memory>
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/graph/GraphView.hpp"
#include "aidge/scheduler/SequentialScheduler.hpp"
#include "aidge/operator/Fold.hpp"
#include "aidge/operator/Unfold.hpp"
#include "aidge/operator/MatMul.hpp"
#include "aidge/operator/Reshape.hpp"
using namespace Aidge;
TEST_CASE("[cpu/operator] Fold(forward)", "[Fold][CPU]") {
std::shared_ptr<Node> myUnfold = Unfold({3,3}, "myunfold");
std::shared_ptr<Node> myReshape = Reshape({4, 27}, "myreshape");
std::shared_ptr<Node> myMatMul = MatMul("mymatmul");
std::shared_ptr<Node> myFold = Fold({3,3}, {1,1}, "myfold");
myUnfold->addChild(myMatMul, 0, 1);
myReshape->addChild(myMatMul, 0, 0);
myMatMul->addChild(myFold, 0, 0);
std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> {
{
{
{{ 0, 1, 2},
{ 3, 4, 5},
{ 6, 7, 8}},
{{ 9, 10, 11},
{ 12, 13, 14},
{ 15, 16, 17}},
{{ 18, 19, 20},
{ 21, 22, 23},
{ 24, 25, 26}}
},
{
{{ 27, 28, 29},
{ 30, 31, 32},
{ 33, 34, 35}},
{{ 36, 37, 38},
{ 39, 40, 41},
{ 42, 43, 44}},
{{ 45, 46, 47},
{ 48, 49, 50},
{ 51, 52, 53}}
},
{
{{ 54, 55, 56},
{ 57, 58, 59},
{ 60, 61, 62}},
{{ 63, 64, 65},
{ 66, 67, 68},
{ 69, 70, 71}},
{{ 72, 73, 74},
{ 75, 76, 77},
{ 78, 79, 80}}
},
{
{{ 81, 82, 83},
{ 84, 85, 86},
{ 87, 88, 89}},
{{ 90, 91, 92},
{ 93, 94, 95},
{ 96, 97, 98}},
{{ 99, 100, 101},
{102, 103, 104},
{105, 106, 107}}
}
}
});
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
{
{
{{ 0, 1, 2, 3, 4},
{ 5, 6, 7, 8, 9},
{ 10, 11, 12, 13, 14},
{ 15, 16, 17, 18, 19},
{ 20, 21, 22, 23, 24}},
{{ 25, 26, 27, 28, 29},
{ 30, 31, 32, 33, 34},
{ 35, 36, 37, 38, 39},
{ 40, 41, 42, 43, 44},
{ 45, 46, 47, 48, 49}},
{{ 50, 51, 52, 53, 54},
{ 55, 56, 57, 58, 59},
{ 60, 61, 62, 63, 64},
{ 65, 66, 67, 68, 69},
{ 70, 71, 72, 73, 74}}
},
{
{{ 75, 76, 77, 78, 79},
{ 80, 81, 82, 83, 84},
{ 85, 86, 87, 88, 89},
{ 90, 91, 92, 93, 94},
{ 95, 96, 97, 98, 99}},
{{100, 101, 102, 103, 104},
{105, 106, 107, 108, 109},
{110, 111, 112, 113, 114},
{115, 116, 117, 118, 119},
{120, 121, 122, 123, 124}},
{{125, 126, 127, 128, 129},
{130, 131, 132, 133, 134},
{135, 136, 137, 138, 139},
{140, 141, 142, 143, 144},
{145, 146, 147, 148, 149}}
}
}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> {
{
{
{{ 15219, 15570, 15921},
{ 16974, 17325, 17676},
{ 18729, 19080, 19431}},
{{ 37818, 38898, 39978},
{ 43218, 44298, 45378},
{ 48618, 49698, 50778}},
{{ 60417, 62226, 64035},
{ 69462, 71271, 73080},
{ 78507, 80316, 82125}},
{{ 83016, 85554, 88092},
{ 95706, 98244, 100782},
{ 108396, 110934, 113472}}
},
{
{{ 41544, 41895, 42246},
{ 43299, 43650, 44001},
{ 45054, 45405, 45756}},
{{ 118818, 119898, 120978},
{ 124218, 125298, 126378},
{ 129618, 130698, 131778}},
{{ 196092, 197901, 199710},
{ 205137, 206946, 208755},
{ 214182, 215991, 217800}},
{{ 273366, 275904, 278442},
{ 286056, 288594, 291132},
{ 298746, 301284, 303822}}
}
}
});
auto opUnfold = std::static_pointer_cast<OperatorTensor>(myUnfold -> getOperator());
auto opReshape = std::static_pointer_cast<OperatorTensor>(myReshape -> getOperator());
auto opMatMul = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
auto opFold = std::static_pointer_cast<OperatorTensor>(myFold -> getOperator());
opUnfold->associateInput(0,myInput);
opReshape->associateInput(0,myWeights);
auto g = getConnectedGraphView(myMatMul);
g->setDataType(DataType::Int32);
g->setBackend("cpu");
g->forwardDims();
g->save("unfold_matmul_fold");
SequentialScheduler scheduler(g);
scheduler.forward();
//opFold->getOutput(0)->print();
REQUIRE(*(opFold->getOutput(0)) == *myOutput);
}
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <chrono>
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <functional> // std::multiplies
#include <memory>
#include <numeric> // std::accumulate
#include <random> // std::random_device, std::mt19937
// std::uniform_int_distribution, std::uniform_real_distribution
#include <vector>
#include <catch2/catch_test_macros.hpp>
#include <fmt/core.h>
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/GlobalAveragePooling.hpp"
#include "aidge/utils/TensorUtils.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
TEST_CASE("[cpu/operator] GlobalAveragePooling",
"[GlobalAveragePooling][CPU]") {
constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(
0.1f, 1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2),
std::size_t(10));
std::uniform_int_distribution<std::size_t> nbLowDimsDist(std::size_t(1),
std::size_t(2));
std::uniform_int_distribution<std::size_t> nbHighDimsDist(std::size_t(3),
std::size_t(7));
// Create MatGlobalAveragePooling Operator
std::shared_ptr<GlobalAveragePooling_Op> op = std::make_shared<GlobalAveragePooling_Op>();
op->setDataType(DataType::Float32);
op->setBackend("cpu");
// Create the input Tensor
std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
op->associateInput(0, T0);
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
// Create results Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>();
Tres->setDataType(DataType::Float32);
Tres->setBackend("cpu");
// To measure execution time of 'MatGlobalAveragePooling_Op::forward()' member
// function call
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration{};
int number_of_operation{0};
SECTION("GlobalAveragePoolingImpl_cpu::forward()") {
SECTION(
"1-2Dim > not enough dimensions leads to function throwing an error") {
// generate a random tensors
const std::size_t nbDims = nbLowDimsDist(gen);
std::vector<std::size_t> dims;
for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen));
}
const std::size_t nb_elements =
std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1),
std::multiplies<std::size_t>());
float *array0 = new float[nb_elements];
for (std::size_t i = 0; i < nb_elements; ++i) {
array0[i] = valueDist(gen);
}
// input0
T0->resize(dims);
T0->getImpl()->setRawPtr(array0, nb_elements);
REQUIRE_THROWS(op->forward());
delete[] array0;
}
SECTION("3+Dim") {
SECTION("Fill a tensor with all values set as N will result with every "
"output being N") {
// generate the tensor
const std::size_t nbDims = nbHighDimsDist(gen);
std::vector<std::size_t> dims_in;
for (std::size_t i = 0; i < nbDims; ++i) {
dims_in.push_back(dimSizeDist(gen));
}
// create in nb_elems
const std::size_t in_nb_elems =
std::accumulate(dims_in.cbegin(), dims_in.cend(), std::size_t(1),
std::multiplies<std::size_t>());
const DimSize_t in_batch_nb_elems = in_nb_elems / dims_in[0];
const DimSize_t in_channel_nb_elems = in_batch_nb_elems / dims_in[1];
number_of_operation +=
in_nb_elems +
dims_in[1]; // averaging per channel : 1 addition per element in
// the channel + 1 division this for every batch
// create out nb_elems
std::vector<std::size_t> dims_out(dims_in.size(), 1);
dims_out[0] = dims_in[0];
dims_out[1] = dims_in[1];
const std::size_t out_nb_elems =
std::accumulate(dims_out.cbegin(), dims_out.cend(), std::size_t(1),
std::multiplies<std::size_t>());
const DimSize_t out_batch_nb_elems = out_nb_elems / dims_out[0];
// iterate over each batch/channel
float *array0 = new float[in_nb_elems];
float *result = new float[out_nb_elems];
float val = valueDist(gen);
for (std::size_t batch = 0; batch < dims_in[0]; ++batch) {
for (std::size_t channel = 0; channel < dims_in[1]; ++channel) {
for (std::size_t i = 0; i < in_channel_nb_elems; ++i)
{
array0[batch * in_batch_nb_elems + channel * in_channel_nb_elems +
i] = val;
}
result[batch * out_batch_nb_elems + channel] = val;
}
}
// input0
T0->resize(dims_in);
T0->getImpl()->setRawPtr(array0, in_nb_elems);
// results
Tres->resize(dims_out);
Tres->getImpl()->setRawPtr(result, out_nb_elems);
op->forwardDims();
start = std::chrono::system_clock::now();
REQUIRE_NOTHROW(op->forward());
end = std::chrono::system_clock::now();
duration +=
std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims());
for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) {
REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i));
}
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] result;
}
SECTION("random testing") {
for (int trial = 0; trial < NBTRIALS; ++trial) {
// generate the tensor
const std::size_t nbDims = nbHighDimsDist(gen);
std::vector<std::size_t> dims_in;
for (std::size_t i = 0; i < nbDims; ++i) {
dims_in.push_back(dimSizeDist(gen));
}
// create in nb_elems
const std::size_t in_nb_elems =
std::accumulate(dims_in.cbegin(), dims_in.cend(), std::size_t(1),
std::multiplies<std::size_t>());
const DimSize_t in_batch_nb_elems = in_nb_elems / dims_in[0];
const DimSize_t in_channel_nb_elems = in_batch_nb_elems / dims_in[1];
number_of_operation +=
in_nb_elems +
dims_in[1]; // averaging per channel : 1 addition per element in
// the channel + 1 division this for every batch
// create out nb_elems
std::vector<std::size_t> dims_out(dims_in.size(), 1);
dims_out[0] = dims_in[0];
dims_out[1] = dims_in[1];
const std::size_t out_nb_elems =
std::accumulate(dims_out.cbegin(), dims_out.cend(),
std::size_t(1), std::multiplies<std::size_t>());
const DimSize_t out_batch_nb_elems = out_nb_elems / dims_out[0];
// iterate over each batch/channel
float *array0 = new float[in_nb_elems];
float *result = new float[out_nb_elems];
for (std::size_t batch = 0; batch < dims_in[0]; ++batch) {
for (std::size_t channel = 0; channel < dims_in[1]; ++channel) {
float channel_sum = 0;
for (std::size_t i = 0; i < in_channel_nb_elems; ++i)
{
float val = valueDist(gen);
array0[batch * in_batch_nb_elems +
channel * in_channel_nb_elems + i] = val;
channel_sum += val;
}
result[batch * out_batch_nb_elems + channel] =
channel_sum / in_channel_nb_elems;
}
}
// input0
T0->resize(dims_in);
T0->getImpl()->setRawPtr(array0, in_nb_elems);
// results
Tres->resize(dims_out);
Tres->getImpl()->setRawPtr(result, out_nb_elems);
op->forwardDims();
start = std::chrono::system_clock::now();
REQUIRE_NOTHROW(op->forward());
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(
end - start);
REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims());
for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) {
REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i));
}
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres, 1e-4f));
delete[] array0;
delete[] result;
}
}
SECTION("Using result from a pytorch function as groundtruth") {
DimSize_t batch_size = 2;
DimSize_t channels = 3;
DimSize_t height = 4;
DimSize_t width = 3;
DimSize_t depth = 2;
SECTION("2D_img") {
const std::vector<DimSize_t> in_dims{batch_size, channels, height,
width};
std::vector<std::size_t> out_dims(in_dims.size(), 1);
out_dims[0] = in_dims[0];
out_dims[1] = in_dims[1];
DimSize_t in_nb_elems = batch_size * channels * height * width;
DimSize_t out_nb_elems = batch_size * channels;
number_of_operation +=
in_nb_elems +
channels; // averaging per channel : 1 addition per element in
// the channel + 1 division this for every batch
auto input = new float[in_nb_elems];
auto result = new float[out_nb_elems];
input[0] = 0.1807716;
input[1] = -0.0699881;
input[2] = -0.3596235;
input[3] = -0.9152045;
input[4] = 0.6257653;
input[5] = 0.0255099;
input[6] = 0.9545137;
input[7] = 0.0643485;
input[8] = 0.3611506;
input[9] = 1.1678782;
input[10] = -1.3498932;
input[11] = -0.5101767;
input[12] = 0.2359577;
input[13] = -0.2397784;
input[14] = -0.9211147;
input[15] = 1.5432971;
input[16] = 1.3488258;
input[17] = -0.1396417;
input[18] = 0.2857972;
input[19] = 0.9651205;
input[20] = -2.0371499;
input[21] = 0.4931363;
input[22] = 1.4869986;
input[23] = 0.5910330;
input[24] = 0.1260297;
input[25] = -1.5626874;
input[26] = -1.1601028;
input[27] = -0.3348408;
input[28] = 0.4477722;
input[29] = -0.8016447;
input[30] = 1.5236114;
input[31] = 2.5085869;
input[32] = -0.6630959;
input[33] = -0.2512752;
input[34] = 1.0101448;
input[35] = 0.1215468;
input[36] = 0.1583993;
input[37] = 1.1340188;
input[38] = -1.1538976;
input[39] = -0.2983968;
input[40] = -0.5075365;
input[41] = -0.9239212;
input[42] = 0.5467061;
input[43] = -1.4947776;
input[44] = -1.2057148;
input[45] = 0.5718198;
input[46] = -0.5973545;
input[47] = -0.6936757;
input[48] = 1.6455388;
input[49] = -0.8029931;
input[50] = 1.3514109;
input[51] = -0.2759193;
input[52] = -1.5108346;
input[53] = 2.1047730;
input[54] = 2.7629590;
input[55] = -1.7465292;
input[56] = 0.8353187;
input[57] = -1.9560477;
input[58] = -0.8002653;
input[59] = -0.5044988;
input[60] = -0.0711742;
input[61] = -0.5130699;
input[62] = -1.0307810;
input[63] = 0.9154347;
input[64] = -0.2282317;
input[65] = -0.6884708;
input[66] = 0.1832259;
input[67] = 0.6003584;
input[68] = -1.5429375;
input[69] = -0.3465560;
input[70] = -0.1476223;
input[71] = 0.6469797;
result[0] = 0.0145876;
result[1] = 0.3010401;
result[2] = 0.0803371;
result[3] = -0.3720275;
result[4] = 0.0919094;
result[5] = -0.1852371;
// input0
T0->resize(in_dims);
T0->getImpl()->setRawPtr(input, in_nb_elems);
// results
Tres->resize(out_dims);
Tres->getImpl()->setRawPtr(result, out_nb_elems);
op->forwardDims();
start = std::chrono::system_clock::now();
REQUIRE_NOTHROW(op->forward());
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(
end - start);
REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims());
for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) {
REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i));
}
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] input;
delete[] result;
}
SECTION("3D_img") {
const std::vector<DimSize_t> in_dims{batch_size, channels, height,
width, depth};
std::vector<std::size_t> out_dims(in_dims.size(), 1);
out_dims[0] = in_dims[0];
out_dims[1] = in_dims[1];
DimSize_t in_nb_elems =
batch_size * channels * height * width * depth;
number_of_operation +=
in_nb_elems +
channels; // averaging per channel : 1 addition per element in
// the channel + 1 division this for every batch
DimSize_t out_nb_elems = batch_size * channels;
auto input = new float[in_nb_elems];
auto result = new float[out_nb_elems];
input[0] = 0.0061403;
input[1] = -0.9665052;
input[2] = 0.3582928;
input[3] = 0.1072854;
input[4] = 1.2463317;
input[5] = 1.2460036;
input[6] = 0.3534451;
input[7] = 0.9425349;
input[8] = -0.2103887;
input[9] = -0.7959853;
input[10] = 0.1297970;
input[11] = -1.9445597;
input[12] = 0.0609514;
input[13] = -0.2379328;
input[14] = 1.9020044;
input[15] = -1.1762751;
input[16] = 0.3404147;
input[17] = 1.1685153;
input[18] = -0.6526139;
input[19] = 0.3767620;
input[20] = 0.1887376;
input[21] = 0.5154487;
input[22] = 0.6371427;
input[23] = -0.3948864;
input[24] = -1.1571540;
input[25] = 0.2896117;
input[26] = 0.6163548;
input[27] = -0.4370409;
input[28] = 0.6589766;
input[29] = 0.6587803;
input[30] = -1.3702172;
input[31] = -1.6210355;
input[32] = 0.5872851;
input[33] = 0.2860694;
input[34] = 0.0082870;
input[35] = -0.2523253;
input[36] = -1.3247224;
input[37] = 0.1891782;
input[38] = 0.0211001;
input[39] = 0.9404197;
input[40] = -0.5576900;
input[41] = -0.6939272;
input[42] = -0.3252473;
input[43] = 1.2439330;
input[44] = -1.1671864;
input[45] = -0.4091243;
input[46] = 1.2600617;
input[47] = -1.5630058;
input[48] = 1.1346143;
input[49] = -0.0823837;
input[50] = 0.2893163;
input[51] = 0.8357732;
input[52] = -0.2449911;
input[53] = 0.2712233;
input[54] = 0.0936364;
input[55] = -0.8834321;
input[56] = -0.3274170;
input[57] = 0.0783938;
input[58] = -0.3807656;
input[59] = 0.3775077;
input[60] = 0.1119123;
input[61] = 2.3142793;
input[62] = -0.7989057;
input[63] = -0.5643027;
input[64] = -1.1346605;
input[65] = 0.1705271;
input[66] = 0.9946650;
input[67] = 1.2625724;
input[68] = 1.6218156;
input[69] = 1.0774711;
input[70] = 0.5947813;
input[71] = -1.5290873;
input[72] = 2.0437069;
input[73] = -0.1656267;
input[74] = 0.0870704;
input[75] = -0.5276564;
input[76] = -0.1002882;
input[77] = 1.0539219;
input[78] = -0.6230739;
input[79] = -1.5905718;
input[80] = -0.9741858;
input[81] = -0.1869211;
input[82] = 0.5816050;
input[83] = -2.6339815;
input[84] = -1.0764544;
input[85] = 2.5903966;
input[86] = 0.4940658;
input[87] = 0.4671729;
input[88] = 0.6588292;
input[89] = -0.7257792;
input[90] = 1.4280071;
input[91] = -1.2187740;
input[92] = 0.7380729;
input[93] = -1.1599953;
input[94] = -1.4355115;
input[95] = -1.5304037;
input[96] = 0.8474578;
input[97] = 0.0774260;
input[98] = 0.5433396;
input[99] = -0.8438400;
input[100] = -0.1089903;
input[101] = -0.6354192;
input[102] = 0.8772392;
input[103] = 0.2844733;
input[104] = 0.0975270;
input[105] = -0.9785872;
input[106] = -0.4320499;
input[107] = -1.4937501;
input[108] = -2.0644901;
input[109] = 0.0851217;
input[110] = 0.6644159;
input[111] = 0.4168026;
input[112] = 0.0958830;
input[113] = -1.5699565;
input[114] = 0.3739572;
input[115] = -0.1420672;
input[116] = -0.7864021;
input[117] = 0.2443752;
input[118] = -0.9811850;
input[119] = -0.0698569;
input[120] = 0.1463890;
input[121] = 0.2536245;
input[122] = 0.2136150;
input[123] = 0.3113698;
input[124] = 1.8353856;
input[125] = 1.4473228;
input[126] = -0.7373698;
input[127] = 0.2485314;
input[128] = -0.4789796;
input[129] = -0.3396149;
input[130] = 0.6438198;
input[131] = 0.7287521;
input[132] = -1.5119252;
input[133] = -0.1006494;
input[134] = 1.8955028;
input[135] = 1.0871323;
input[136] = 0.3620502;
input[137] = -0.8826663;
input[138] = 1.2220223;
input[139] = -1.2817260;
input[140] = 1.4153577;
input[141] = 0.4148015;
input[142] = 1.3458617;
input[143] = 1.9718349;
result[0] = 0.1333608;
result[1] = -0.1716091;
result[2] = 0.2201060;
result[3] = -0.1585989;
result[4] = -0.2291074;
result[5] = 0.4254351;
// input0
T0->resize(in_dims);
T0->getImpl()->setRawPtr(input, in_nb_elems);
// results
Tres->resize(out_dims);
Tres->getImpl()->setRawPtr(result, out_nb_elems);
op->forwardDims();
start = std::chrono::system_clock::now();
REQUIRE_NOTHROW(op->forward());
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(
end - start);
REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims());
for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) {
REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i));
}
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] input;
delete[] result;
}
}
Log::info("GlobalAveragePooling total execution time: {}µs\n", duration.count());
Log::info("Number of operations : {}\n", number_of_operation);
Log::info("Operation / µs = {}\n", number_of_operation / duration.count());
}
}
}
} // namespace Aidge
/********************************************************************************
* Copyright (c) 2025 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include "aidge/backend/cpu/operator/HeavisideImpl_kernels.hpp"
#include <memory>
#include <cstdlib>
#include <random>
#include <catch2/catch_test_macros.hpp>
#include "aidge/data/Tensor.hpp"
#include "aidge/backend/cpu/operator/HeavisideImpl.hpp"
#include "aidge/graph/Node.hpp"
#include "aidge/utils/TensorUtils.hpp"
namespace Aidge
{
TEST_CASE("[cpu/operator] Heaviside(forward)", "[Heaviside][CPU]") {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(-1.0f, 1.0f);
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
SECTION("1D Tensor") {
std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<float,10> {
{0, 1, 2,-3, 4,-5,-6, 7, 8, 9}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array1D<float,10> {
{0.5, 1, 1, 0, 1, 0, 0, 1, 1, 1}
});
std::shared_ptr<Node> heaviside = Heaviside(0.5);
auto op = std::static_pointer_cast<OperatorTensor>(heaviside->getOperator());
op->associateInput(0, input0);
op->setBackend("cpu");
op->setDataType(DataType::Float32);
op->forward();
REQUIRE(approxEq<float>(*op->getOutput(0),*expectedOutput));
}
SECTION("+1-D Tensor")
{
auto dims = std::vector<std::size_t>();
auto nbDims = nbDimsDist(gen);
for (auto i = 0u; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen));
}
auto numberOfElements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
float* inputArray = new float[numberOfElements];
float* resultArray = new float[numberOfElements];
for(auto i = 0u; i < numberOfElements; ++i)
{
inputArray[i] = valueDist(gen);
resultArray[i] = inputArray[i] > 0 ? 1 : (inputArray[i] == 0 ? 0.5 : 0);
}
auto T0 = std::make_shared<Tensor>();
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
auto T1 = std::make_shared<Tensor>();
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
T0->resize(dims);
T0->getImpl()->setRawPtr(inputArray, numberOfElements);
T1->resize(dims);
T1->getImpl()->setRawPtr(resultArray, numberOfElements);
std::shared_ptr<Node> heaviside = Heaviside(0.5);
auto op = std::static_pointer_cast<OperatorTensor>(heaviside->getOperator());
op->associateInput(0, T0);
op->setBackend("cpu");
op->setDataType(DataType::Float32);
op->forward();
REQUIRE(approxEq<float>(*(op->getOutput(0)), *T1));
}
}
}
......@@ -9,16 +9,19 @@
*
********************************************************************************/
#include <memory>
#include <catch2/catch_test_macros.hpp>
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
#include "aidge/data/DataType.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/LeakyReLU.hpp"
#include "aidge/backend/cpu.hpp"
using namespace Aidge;
TEST_CASE("[cpu/operator] LeakyReLU(forward)") {
TEST_CASE("[cpu/operator] LeakyReLU(forward)", "[LeakyReLU][CPU]") {
SECTION("1D Tensor") {
std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<int,10> {
{0, 1, 2,-3, 4,-5,-6, 7, 8, 9}
......@@ -28,12 +31,12 @@ TEST_CASE("[cpu/operator] LeakyReLU(forward)") {
});
std::shared_ptr<Node> myLeakyReLU = LeakyReLU();
myLeakyReLU->getOperator()->setDatatype(DataType::Int32);
myLeakyReLU->getOperator()->setBackend("cpu");
myLeakyReLU->getOperator()->associateInput(0,input0);
myLeakyReLU->getOperator()->computeOutputDims();
auto op = std::static_pointer_cast<OperatorTensor>(myLeakyReLU -> getOperator());
op->associateInput(0,input0);
op->setDataType(DataType::Int32);
op->setBackend("cpu");
myLeakyReLU->forward();
REQUIRE(*std::static_pointer_cast<Tensor>(myLeakyReLU->getOperator()->getOutput(0)) == *expectedOutput);
REQUIRE(*(op->getOutput(0)) == *expectedOutput);
}
SECTION("2D Tensor") {
......@@ -51,12 +54,12 @@ TEST_CASE("[cpu/operator] LeakyReLU(forward)") {
});
std::shared_ptr<Node> myLeakyReLU = LeakyReLU();
myLeakyReLU->getOperator()->setDatatype(DataType::Int32);
myLeakyReLU->getOperator()->setBackend("cpu");
myLeakyReLU->getOperator()->associateInput(0,input0);
myLeakyReLU->getOperator()->computeOutputDims();
auto op = std::static_pointer_cast<OperatorTensor>(myLeakyReLU -> getOperator());
op->associateInput(0,input0);
op->setDataType(DataType::Int32);
op->setBackend("cpu");
myLeakyReLU->forward();
REQUIRE(*myLeakyReLU->getOperator()->getOutput(0) == *expectedOutput);
REQUIRE(*(op->getOutput(0)) == *expectedOutput);
}
SECTION("3D Tensor") {
......@@ -86,12 +89,12 @@ TEST_CASE("[cpu/operator] LeakyReLU(forward)") {
});
std::shared_ptr<Node> myLeakyReLU = LeakyReLU();
myLeakyReLU->getOperator()->setDatatype(DataType::Int32);
myLeakyReLU->getOperator()->setBackend("cpu");
myLeakyReLU->getOperator()->associateInput(0,input0);
myLeakyReLU->getOperator()->computeOutputDims();
auto op = std::static_pointer_cast<OperatorTensor>(myLeakyReLU -> getOperator());
op->associateInput(0,input0);
op->setDataType(DataType::Int32);
op->setBackend("cpu");
myLeakyReLU->forward();
REQUIRE(*myLeakyReLU->getOperator()->getOutput(0) == *expectedOutput);
REQUIRE(*(op->getOutput(0)) == *expectedOutput);
}
SECTION("4D Tensor") {
......@@ -145,12 +148,12 @@ TEST_CASE("[cpu/operator] LeakyReLU(forward)") {
});
std::shared_ptr<Node> myLeakyReLU = LeakyReLU();
myLeakyReLU->getOperator()->setDatatype(DataType::Int32);
myLeakyReLU->getOperator()->setBackend("cpu");
myLeakyReLU->getOperator()->associateInput(0,input0);
myLeakyReLU->getOperator()->computeOutputDims();
auto op = std::static_pointer_cast<OperatorTensor>(myLeakyReLU -> getOperator());
op->associateInput(0,input0);
op->setDataType(DataType::Int32);
op->setBackend("cpu");
myLeakyReLU->forward();
REQUIRE(*myLeakyReLU->getOperator()->getOutput(0) == *expectedOutput);
REQUIRE(*(op->getOutput(0)) == *expectedOutput);
}
SECTION("Test construction attribute: negative_slop") {
......@@ -162,11 +165,11 @@ TEST_CASE("[cpu/operator] LeakyReLU(forward)") {
});
std::shared_ptr<Node> myLeakyReLU = LeakyReLU(0.5f);
myLeakyReLU->getOperator()->setDatatype(DataType::Float32);
myLeakyReLU->getOperator()->setBackend("cpu");
myLeakyReLU->getOperator()->associateInput(0,input0);
myLeakyReLU->getOperator()->computeOutputDims();
auto op = std::static_pointer_cast<OperatorTensor>(myLeakyReLU -> getOperator());
op->associateInput(0,input0);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
myLeakyReLU->forward();
REQUIRE(*myLeakyReLU->getOperator()->getOutput(0) == *expectedOutput);
REQUIRE(*(op->getOutput(0)) == *expectedOutput);
}
}
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <chrono> // std::micro, std::chrono::time_point,
// std::chrono::system_clock, std::chrono::duration
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <memory>
#include <random> // std::random_device, std::mt19937
// std::uniform_int_distribution, std::uniform_real_distribution
#include <vector>
#include <catch2/catch_test_macros.hpp>
#include <fmt/core.h>
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/MatMulImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/MatMul.hpp"
#include "aidge/operator/OperatorTensor.hpp"
#include "aidge/utils/TensorUtils.hpp"
namespace Aidge {
TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
const std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> dis(0.0, 1.0); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> distDims(10, 100);
std::uniform_int_distribution<std::size_t> distNbMatrix(1, 5);
// Create MatMul Operator
std::shared_ptr<Node> myMatMul = MatMul();
auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
// To measure execution time of 'MatMul_Op::forward()' member function call
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration;
SECTION("2-D Tensors") {
std::size_t totalComputation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate Tensors dimensions
const std::size_t dim0 = distDims(gen);
const std::size_t dim1 = distDims(gen);
const std::size_t dim2 = distDims(gen);
totalComputation += dim0*dim1*dim2;
// Create and populate the array with random float values
float* bigArray1 = new float[dim0*dim1];
for (int i = 0; i < dim0*dim1; ++i) {
bigArray1[i] = dis(gen); // Generate random float value
}
float* bigArray2 = new float[dim1*dim2];
for (int i = 0; i < dim1*dim2; ++i) {
bigArray2[i] = dis(gen); // Generate random float value
}
float* res = new float[dim0*dim2];
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim2; ++j) {
float sum = 0.0;
for (int k = 0; k < dim1; ++k) {
sum += bigArray1[i*dim1+k] * bigArray2[k*dim2+j];
}
res[i*dim2+j] = sum;
}
}
// Convert bigArray1 to Tensor
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
T1 -> resize({dim0,dim1});
T1 -> setBackend("cpu");
T1 -> getImpl() -> setRawPtr(bigArray1, dim0*dim1);
// Convert bigArray2 to Tensor
std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
T2 -> resize({dim1,dim2});
T2 -> setBackend("cpu");
T2 -> getImpl() -> setRawPtr(bigArray2, dim1*dim2);
// convert res to Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
Tres -> resize({dim0,dim2});
Tres -> setBackend("cpu");
Tres -> getImpl() -> setRawPtr(res, dim0*dim2);
op->associateInput(0, T1);
op->associateInput(1, T2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forwardDims();
start = std::chrono::system_clock::now();
myMatMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] bigArray1;
delete[] bigArray2;
delete[] res;
}
Log::info("number of multiplications over time spent: {}\n", (totalComputation / duration.count()));
Log::info("total time: {} μs\n", duration.count());
}
SECTION("3-D Tensors") {
std::size_t totalComputation = 0;
duration = std::chrono::duration<double, std::micro>::zero();
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate Tensors dimensions
const std::size_t dimNb = distNbMatrix(gen);
const std::size_t dim0 = distDims(gen);
const std::size_t dim1 = distDims(gen);
const std::size_t dim2 = distDims(gen);
totalComputation += dim0*dim1*dim2*dimNb;
// Create and populate the array with random float values
float* bigArray1 = new float[dimNb*dim0*dim1];
for (std::size_t i = 0; i < dimNb*dim0*dim1; ++i) {
bigArray1[i] = dis(gen); // Generate random float value
}
float* bigArray2 = new float[dimNb*dim1*dim2];
for (int i = 0; i < dimNb*dim1*dim2; ++i) {
bigArray2[i] = dis(gen); // Generate random float value
}
float* res = new float[dimNb*dim0*dim2];
for (std::size_t n = 0; n < dimNb; ++n) {
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim2; ++j) {
float sum = 0.0;
for (int k = 0; k < dim1; ++k) {
sum += bigArray1[n*dim0*dim1 + i*dim1 + k] * bigArray2[n*dim2*dim1+k*dim2+j];
}
res[n*dim0*dim2+i*dim2+j] = sum;
}
}
}
// Convert bigArray1 to Tensor
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
T1 -> resize({dimNb,dim0,dim1});
T1 -> setBackend("cpu");
T1 -> getImpl() -> setRawPtr(bigArray1, dimNb*dim0*dim1);
// Convert bigArray2 to Tensor
std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
T2 -> resize({dimNb,dim1,dim2});
T2 -> setBackend("cpu");
T2 -> getImpl() -> setRawPtr(bigArray2, dimNb*dim1*dim2);
// convert res to Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
Tres -> resize({dimNb,dim0,dim2});
Tres -> setBackend("cpu");
Tres -> getImpl() -> setRawPtr(res, dimNb*dim0*dim2);
op->associateInput(0, T1);
op->associateInput(1, T2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forwardDims();
start = std::chrono::system_clock::now();
myMatMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] bigArray1;
delete[] bigArray2;
delete[] res;
}
Log::info("number of multiplications over time spent: {}\n", (totalComputation / duration.count()));
Log::info("total time: {} μs\n", duration.count());
}
SECTION("4-D Tensors") {
std::size_t totalComputation = 0;
duration = std::chrono::duration<double, std::micro>::zero();
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate Tensors dimensions
const std::size_t dimNb1 = distNbMatrix(gen);
const std::size_t dimNb2 = distNbMatrix(gen);
const std::size_t dim0 = distDims(gen);
const std::size_t dim1 = distDims(gen);
const std::size_t dim2 = distDims(gen);
totalComputation += dim0*dim1*dim2*dimNb1*dimNb2;
// Create and populate the array with random float values
float* bigArray1 = new float[dimNb1*dimNb2*dim0*dim1];
for (std::size_t i = 0; i < dimNb1*dimNb2*dim0*dim1; ++i) {
bigArray1[i] = dis(gen); // Generate random float value
}
float* bigArray2 = new float[dimNb1*dimNb2*dim1*dim2];
for (std::size_t i = 0; i < dimNb1*dimNb2*dim1*dim2; ++i) {
bigArray2[i] = dis(gen); // Generate random float value
}
float* res = new float[dimNb1*dimNb2*dim0*dim2];
for (std::size_t n1 = 0; n1 < dimNb1; ++n1) {
for (std::size_t n2 = 0; n2 < dimNb2; ++n2) {
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim2; ++j) {
float sum = 0.0;
for (int k = 0; k < dim1; ++k) {
sum += bigArray1[n1*dimNb2*dim0*dim1+n2*dim0*dim1+i*dim1+k] * bigArray2[n1*dimNb2*dim1*dim2+n2*dim1*dim2+k*dim2+j];
}
res[n1*dimNb2*dim0*dim2+n2*dim0*dim2+i*dim2+j] = sum;
}
}
}
}
// Convert bigArray1 to Tensor
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
T1 -> resize({dimNb1,dimNb2,dim0,dim1});
T1 -> setBackend("cpu");
T1 -> getImpl() -> setRawPtr(bigArray1, dimNb1*dimNb2*dim0*dim1);
// Convert bigArray2 to Tensor
std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
T2 -> resize({dimNb1,dimNb2,dim1,dim2});
T2 -> setBackend("cpu");
T2 -> getImpl() -> setRawPtr(bigArray2, dimNb1*dimNb2*dim1*dim2);
// convert res to Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
Tres -> resize({dimNb1,dimNb2,dim0,dim2});
Tres -> setBackend("cpu");
Tres -> getImpl() -> setRawPtr(res, dimNb1*dimNb2*dim0*dim2);
op->associateInput(0, T1);
op->associateInput(1, T2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forwardDims();
start = std::chrono::system_clock::now();
myMatMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] bigArray1;
delete[] bigArray2;
delete[] res;
}
Log::info("number of multiplications over time spent: {}\n", (totalComputation / duration.count()));
Log::info("total time: {} μs\n", duration.count());
}
SECTION("+2-D / 1-D") {
// allows to test both computation with a 1-D Tensor and broadcasting
// input_0
std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
op->associateInput(0,T0);
const std::size_t dim0 = distNbMatrix(gen);
const std::size_t dim1 = distNbMatrix(gen) + 1;
const std::size_t dim2 = distNbMatrix(gen);
const std::size_t dim3 = distNbMatrix(gen);
T0->resize({dim0,dim1,dim2,dim3});
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
// input_1
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
op -> associateInput(1,T1);
T1->resize({dim3});
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forwardDims();
myMatMul->forward();
}
}
} // namespace Aidge
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <array>
#include <memory>
#include <catch2/catch_test_macros.hpp>
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
#include "aidge/data/DataType.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/MaxPooling.hpp"
using namespace Aidge;
TEST_CASE("[cpu/operator] MaxPooling(forward)", "[MaxPooling][CPU]") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<float,2,2,5,5> { //NCHW
{
{
{{-0.3848, 0.2166, -0.4373, 0.6142, 0.5277},
{0.7995, 0.3638, -1.4589, -1.0843, 1.0918},
{0.7147, 0.0936, -1.2902, 1.2037, 0.4874},
{-0.5981, 2.1184, -0.9175, 1.3859, 0.3305},
{-1.7700, 0.0563, -0.3914, 0.0538, -0.3955}},
{{-3.1409, -0.4554, 0.0524, 2.2291, 0.4859},
{-0.7465, -0.6567, -2.3703, -0.6386, -1.4152},
{ 2.2329, -0.5850, 0.0700, 1.2838, -1.7363},
{ 0.2139, 0.0624, -1.0689, -0.8221, -0.8038},
{ 0.1886, -0.7840, -0.2313, 0.2651, -1.6244}}
},
{
{{ 0.4371, 1.6417, 0.9129, 0.6325, 0.5438},
{-2.3552, -0.8850, -0.0232, -0.5462, -1.2011},
{1.7653, -1.6668, -1.0814, 0.6182, 1.2071},
{0.9541, -0.5133, 0.8664, -0.8892, 1.4585},
{1.0220, -0.5107, 0.1829, -0.2301, -0.4268}},
{{ 1.0429, 0.6279, -0.2875, 0.7187, -0.1500},
{1.6041, 2.9635, 1.4172, -0.7517, 0.5441},
{-0.2276, 0.0857, 0.6776, -0.1389, -0.0614},
{-0.1547, -0.3435, 0.0650, -0.5095, -1.8073},
{1.7217, 0.3999, -0.5953, 1.0604, -0.4126}}
}
}
});
SECTION("Stride") {
std::shared_ptr<MaxPooling_Op<2>> op = std::make_shared<MaxPooling_Op<2>>(std::array<std::size_t, 2>({2,2}), std::array<std::size_t, 2>({2,2}));
Tensor myOutput = Array4D<float,2,2,2,2> {
{
{
{{ 0.7995, 0.6142},
{ 2.1184, 1.3859}},
{{ -0.4554, 2.2291},
{ 2.2329, 1.2838}}
},
{
{{1.6417, 0.9129},
{1.7653, 0.8664}},
{{2.9635, 1.4172},
{0.0857, 0.6776}}
}
}
};
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forward();
op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == myOutput);
}
}
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <memory>
#include <string>
#include <catch2/catch_test_macros.hpp>
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/AddImpl.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/graph/Node.hpp"
#include "aidge/graph/GraphView.hpp"
#include "aidge/graph/OpArgs.hpp"
#include "aidge/operator/Add.hpp"
#include "aidge/operator/Memorize.hpp"
#include "aidge/operator/Producer.hpp"
#include "aidge/recipes/GraphViewHelper.hpp"
#include "aidge/scheduler/SequentialScheduler.hpp"
namespace Aidge {
TEST_CASE("[cpu/operator] Memorize(forward)", "[Memorize][CPU]") {
SECTION("Test simple") {
std::shared_ptr<Tensor> inputTensor =
std::make_shared<Tensor>(Array1D<int, 1>{{1}});
auto input = Producer({1}, "input");
auto init = Producer({1}, "init");
auto add = Add("add");
auto mem = Memorize(3, "mem");
input->addChild(add, 0, 0);
init->addChild(mem, 0, 1);
add->addChild(mem, 0,0);
mem->addChild(/*otherNode=*/add, /*outId=*/1, /*otherInId=*/1);
input->getOperator()->setOutput(0, inputTensor);
init->getOperator()->setOutput(0, inputTensor);
auto g = getConnectedGraphView(input);
g->setDataType(Aidge::DataType::Int32);
g->setBackend("cpu");
g->forwardDims();
g->save("simple_graph");
SequentialScheduler scheduler(g);
REQUIRE_NOTHROW(scheduler.forward());
scheduler.saveSchedulingDiagram("simple");
const Tensor expectedOutput = Array1D<int, 1>{{4}};
std::shared_ptr<Tensor> other = std::static_pointer_cast<OperatorTensor>(mem->getOperator())->getOutput(0);
other->print();
REQUIRE((*other == expectedOutput));
}
}
} // namespace Aidge
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <cmath>
#include <cstdlib>
#include <memory>
#include <random>
#include <catch2/catch_test_macros.hpp>
#include "aidge/backend/cpu/operator/ConvImpl.hpp"
#include "aidge/backend/cpu/operator/PadImpl.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/filler/Filler.hpp"
#include "aidge/operator/Conv.hpp"
#include "aidge/operator/FC.hpp"
#include "aidge/operator/Identity.hpp"
#include "aidge/operator/MetaOperator.hpp"
#include "aidge/operator/MetaOperatorDefs.hpp"
#include "aidge/operator/Pad.hpp"
#include "aidge/operator/Pop.hpp"
#include "aidge/operator/Stack.hpp"
#include "aidge/scheduler/ParallelScheduler.hpp"
#include "aidge/scheduler/SequentialScheduler.hpp"
#include "aidge/utils/TensorUtils.hpp"
using namespace Aidge;
TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
SECTION("PaddedConv(forward)") {
std::shared_ptr<Tensor> myWeights =
std::make_shared<Tensor>(Array4D<double, 4, 3, 3, 3>{
{{{{6.20986394e-01, 1.19775136e-03, 7.22876095e-02},
{1.16492919e-01, 8.21634093e-02, 1.17413265e-01},
{2.23743494e-01, 3.99495413e-01, 5.55552411e-01}},
{{6.64970077e-01, 9.62199940e-01, 4.87531967e-01},
{6.12586558e-01, 8.09918671e-02, 8.40649383e-01},
{4.15264406e-01, 8.28247138e-01, 1.52301135e-01}},
{{1.76992844e-02, 7.78697112e-01, 8.14531592e-01},
{1.36960611e-01, 4.64806728e-01, 4.85150000e-01},
{4.34776520e-01, 9.51740977e-01, 9.05793799e-01}}},
{{{1.71925246e-02, 1.91082720e-01, 3.67982644e-01},
{1.56806559e-01, 6.22280998e-01, 3.15827594e-01},
{6.04359038e-01, 2.83095947e-01, 6.11168892e-01}},
{{2.76942832e-01, 1.89768419e-01, 8.07988176e-01},
{1.67925807e-01, 2.68356150e-01, 6.28875602e-01},
{1.69093357e-04, 9.64788636e-01, 7.29254981e-01}},
{{6.34030122e-01, 1.32087038e-01, 3.33857107e-01},
{7.63047502e-01, 5.12539506e-02, 9.77400493e-01},
{8.06151288e-01, 2.60237147e-01, 3.93729313e-01}}},
{{{5.84605240e-01, 4.74648725e-01, 8.54111741e-01},
{7.10897067e-02, 5.02579011e-01, 3.35236224e-01},
{9.08637408e-01, 8.02903830e-01, 2.83929907e-01}},
{{3.68206999e-01, 9.18579021e-02, 7.33168098e-01},
{1.59875539e-01, 9.13163381e-01, 3.59806060e-01},
{1.41295882e-01, 7.00312185e-01, 5.63728289e-01}},
{{9.39513546e-01, 1.91704891e-01, 1.11454944e-01},
{5.46298282e-01, 2.89698587e-01, 2.62612651e-01},
{1.18554992e-01, 4.32147376e-02, 7.53016994e-01}}},
{{{9.53179175e-01, 2.05041054e-02, 1.11318451e-01},
{8.67878485e-01, 2.93263422e-01, 8.03912714e-01},
{8.93620255e-01, 1.37831128e-01, 3.83640583e-01}},
{{3.96020188e-01, 6.24959320e-01, 1.90709175e-01},
{5.80538620e-01, 6.63031275e-01, 2.07247191e-01},
{5.65672171e-01, 5.57014317e-01, 9.26909496e-01}},
{{3.43901418e-01, 4.47741636e-01, 6.59249367e-01},
{7.34639028e-01, 2.84957200e-02, 9.70225217e-01},
{1.33578790e-02, 6.12054702e-01, 9.36685235e-02}}}}});
std::shared_ptr<Tensor> myBias =
std::make_shared<Tensor>(Array1D<double, 4>{
{0.16884905, 0.27994487, 0.57227465, 0.06435205}});
std::shared_ptr<Tensor> myInput = std::make_shared<
Tensor>(Array4D<double, 2, 3, 5, 5>{
// NCHW
{{{{0.43224481, 0.9047832, 0.18402257, 0.06162838, 0.52490127},
{0.27773404, 0.55402353, 0.9485062, 0.31197083, 0.80328607},
{0.85065842, 0.88226201, 0.54971951, 0.23360494, 0.53907884},
{0.33423098, 0.79564312, 0.80419414, 0.76839638, 0.87248221},
{0.77328729, 0.65749407, 0.47277589, 0.32889198, 0.93970518}},
{{0.66669145, 0.64193351, 0.45315988, 0.32794057, 0.38461822},
{0.72295814, 0.18395073, 0.85909664, 0.30010301, 0.56065865},
{0.34777938, 0.77869746, 0.33159421, 0.19540932, 0.77767906},
{0.5778391, 0.08218411, 0.27758371, 0.99017749, 0.61827997},
{0.10440745, 0.3197831, 0.89157608, 0.12216887, 0.950232}},
{{0.68073443, 0.2681118, 0.51848834, 0.62864493, 0.36717478},
{0.64106244, 0.43779425, 0.02771029, 0.78275231, 0.45693104},
{0.6487417, 0.01603838, 0.73869997, 0.96494221, 0.39588782},
{0.5975827, 0.90913292, 0.55036969, 0.4747373, 0.62460509},
{0.79675124, 0.02807549, 0.53227602, 0.88805927, 0.96646591}}},
{{{0.81851935, 0.21267665, 0.01580692, 0.54907998, 0.89010049},
{0.80165784, 0.55195592, 0.20740314, 0.22782844, 0.89205031},
{0.94217108, 0.58434542, 0.20738313, 0.79065873, 0.9371597},
{0.02254708, 0.95539178, 0.95165758, 0.53736666, 0.49100362},
{0.08018625, 0.69108027, 0.00329741, 0.74565761, 0.30899213}},
{{0.34868638, 0.12792604, 0.37382248, 0.0374756, 0.50653087},
{0.59614405, 0.64820746, 0.31470307, 0.62460364, 0.29253268},
{0.92864889, 0.51014224, 0.08921206, 0.11094072, 0.64691121},
{0.50586371, 0.6686477, 0.72511169, 0.41681783, 0.6325049},
{0.71594137, 0.73382767, 0.36589439, 0.03255165, 0.75006865}},
{{0.6294127, 0.85548534, 0.0902963, 0.28915773, 0.36564289},
{0.95873236, 0.6742374, 0.55679676, 0.6323497, 0.34072958},
{0.49694061, 0.79173045, 0.19738225, 0.14755281, 0.80818177},
{0.02332061, 0.74270703, 0.59415632, 0.08195934, 0.46295434},
{0.71426058,
0.85032931,
0.90750818,
0.28768431,
0.4401146}}}}});
std::shared_ptr<Tensor> myOutput = std::make_shared<
Tensor>(Array4D<double, 2, 4, 5, 5>{
{{{{3.40294218, 3.74021220, 4.02050114, 4.07054710, 2.46286273},
{4.61770582, 6.70517588, 6.50356627, 6.29688787, 3.53332567},
{5.47480106, 5.92094421, 6.64605665, 7.95090199, 4.28721523},
{4.01485729, 6.06748962, 7.52447891, 7.37980652, 5.28401136},
{2.83065438, 3.62033439, 3.56222963, 5.56103945, 3.23335814}},
{{3.30230498, 4.92814112, 4.34710836, 3.96262765, 2.97987890},
{4.49693012, 6.68929291, 5.53603029, 5.68874264, 4.28756475},
{4.20528078, 6.82776880, 6.70569849, 7.12809610, 4.40845442},
{4.31169367, 6.73352146, 6.30962515, 7.45826864, 4.99164438},
{2.18136287, 4.28968000, 4.20080042, 4.89814138, 2.87394023}},
{{3.54787683, 4.35851812, 4.63881302, 4.23359537, 3.16992092},
{5.25099468, 7.54282856, 6.69849157, 5.64309788, 4.56919575},
{4.71914101, 7.52830601, 6.71450949, 7.81113863, 5.84658146},
{4.97893143, 7.39293909, 6.89905310, 8.14430809, 5.62998581},
{2.79735112, 4.80967140, 5.57630205, 5.38828325, 4.57078695}},
{{3.03048635, 5.04540300, 4.21824932, 4.87323284, 2.35113740},
{4.45167351, 6.47721338, 7.40922976, 6.70445728, 3.60700107},
{3.77927423, 6.82826376, 7.41777134, 7.57402420, 5.13131523},
{4.08747244, 7.07994175, 7.57206821, 8.51897335, 5.26987123},
{2.34426999, 4.60127831, 4.86486769, 6.01579571, 3.97803569}}},
{{{3.84700942, 4.25972605, 3.05269003, 3.78043652, 2.08771229},
{6.00459957, 6.05633259, 4.45951605, 4.54089880, 4.03066444},
{5.41579390, 7.29543972, 6.18680000, 5.58812714, 3.45964241},
{6.04531050, 7.70924091, 5.52207708, 5.02131319, 4.09403706},
{3.18092418, 4.45422697, 4.04294252, 3.86577177, 2.18776536}},
{{4.02600670, 4.27603531, 3.81011319, 4.03631020, 2.57254648},
{5.33471155, 5.72588634, 5.12079763, 5.11733150, 3.76836705},
{5.62947607, 5.92492962, 6.24170446, 6.44130468, 3.44276404},
{5.38414621, 6.02679539, 5.88985586, 5.90263271, 3.15044069},
{3.31261086, 4.44371319, 3.47660780, 4.15411520, 1.48961508}},
{{3.95879412, 4.17324543, 3.70114422, 3.27447152, 3.09713888},
{5.78258181, 6.57920837, 4.99913597, 6.20961237, 4.98552179},
{5.84685421, 7.19971228, 6.66386652, 6.68013430, 4.90963316},
{5.24417877, 7.06430531, 6.58512402, 6.02492285, 4.48986387},
{3.64294529, 5.00678444, 5.04760027, 4.72895622, 2.67990756}},
{{3.48610687, 4.12853813, 4.07563591, 3.51327014, 2.44217038},
{4.80529881, 7.33211374, 5.14774036, 4.77281189, 4.44612408},
{5.11703110, 7.55168772, 7.14374542, 6.43696356, 4.10621357},
{5.41270018, 6.85949135, 6.73503923, 5.74601364, 4.46150303},
{3.16612267,
4.38248920,
5.23248482,
4.21292210,
2.86031270}}}}});
std::shared_ptr<Node> myConv = Conv<2>(3, 4, {3, 3}, "myconv");
auto convOp =
std::static_pointer_cast<OperatorTensor>(myConv->getOperator());
std::shared_ptr<Node> myPad =
Pad<2>({1, 1, 1, 1}, "myPad", PadBorderType::Constant, 0.0);
auto padOp =
std::static_pointer_cast<OperatorTensor>(myPad->getOperator());
convOp->setInput(1, myWeights);
convOp->setInput(2, myBias);
myPad->addChild(myConv, 0, 0);
padOp->setInput(0, myInput);
padOp->setDataType(DataType::Float64);
padOp->setBackend("cpu");
convOp->setDataType(DataType::Float64);
convOp->setBackend("cpu");
myPad->forward();
myConv->forward();
convOp->getOutput(0)->print();
double *computedOutput =
static_cast<double *>(convOp->getOutput(0)->getImpl()->rawPtr());
double *expectedOutput =
static_cast<double *>(myOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i < myOutput->size(); ++i) {
REQUIRE(std::abs(computedOutput[i] - expectedOutput[i]) < 1e-5);
}
std::shared_ptr<Node> myPaddedConv =
PaddedConv(3, 4, {3, 3}, "myPaddedConv", {1, 1}, {1, 1, 1, 1});
}
SECTION("LSTM(forward)") {
auto pop = Pop();
auto myLSTM = LSTM(32, 64, 0, true, "ltsm");
auto op =
std::dynamic_pointer_cast<MetaOperator_Op>(myLSTM->getOperator());
auto microGraph = op->getMicroGraph();
microGraph->save("lstm", false, true);
REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8);
REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data);
for (size_t i = 1; i < 9; ++i) {
REQUIRE(myLSTM->inputCategory(i) == InputCategory::Param);
}
for (size_t i = 9; i < 17; ++i) {
REQUIRE(myLSTM->inputCategory(i) == InputCategory::OptionalParam);
}
REQUIRE(myLSTM->nbOutputs() == 2);
std::shared_ptr<Tensor> myInput =
std::make_shared<Tensor>(Array2D<float, 16, 32>{});
std::shared_ptr<Tensor> myInit =
std::make_shared<Tensor>(Array2D<float, 32, 64>{});
std::shared_ptr<Tensor> myInitW =
std::make_shared<Tensor>(Array2D<float, 64, 32>{});
std::shared_ptr<Tensor> myInitR =
std::make_shared<Tensor>(Array2D<float, 64, 64>{});
pop->addChild(myLSTM, 0, 0);
pop->getOperator()->associateInput(0, myInput);
op->associateInput(17, myInit);
op->associateInput(18, myInit);
// Weights X
myLSTM->input(1).first->getOperator()->setOutput(0, myInitW);
myLSTM->input(2).first->getOperator()->setOutput(0, myInitW);
myLSTM->input(3).first->getOperator()->setOutput(0, myInitW);
myLSTM->input(4).first->getOperator()->setOutput(0, myInitW);
// Weights H
myLSTM->input(5).first->getOperator()->setOutput(0, myInitR);
myLSTM->input(6).first->getOperator()->setOutput(0, myInitR);
myLSTM->input(7).first->getOperator()->setOutput(0, myInitR);
myLSTM->input(8).first->getOperator()->setOutput(0, myInitR);
auto g = getConnectedGraphView(myLSTM);
g->setDataType(DataType::Float32);
g->setBackend("cpu");
auto scheduler = SequentialScheduler(g);
scheduler.forward(true);
g->save("lstm_outside_dims", true, true);
microGraph->save("lstm_dims", true, true);
REQUIRE(op->dimsForwarded());
auto microGraphScheduler =
std::dynamic_pointer_cast<MetaOperator_Op>(op)
->getMicroGraphScheduler();
microGraphScheduler->saveSchedulingDiagram("lstm_scheduling");
REQUIRE(op->getNbConsumedData(0).data == 512);
REQUIRE(op->getNbConsumedData(1).data == 32768);
REQUIRE(op->getNbProducedData(0).data == 34816);
REQUIRE(op->getNbProducedData(1).data == 34816);
REQUIRE(microGraphScheduler->getStaticScheduling(0).size() == 26);
REQUIRE(microGraphScheduler->getStaticScheduling(1).size() == 24);
REQUIRE(microGraphScheduler->getStaticScheduling(15).size() == 24);
}
SECTION("LSTM(forward_values)") {
auto myLSTM = LSTM(2, 3, 0, true, "ltsm");
auto op =
std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator());
auto microGraph =
std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraph();
microGraph->save("lstm", false, false);
REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8);
REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data);
for (size_t i = 1; i < 9; ++i) {
REQUIRE(myLSTM->inputCategory(i) == InputCategory::Param);
}
for (size_t i = 9; i < 17; ++i) {
REQUIRE(myLSTM->inputCategory(i) == InputCategory::OptionalParam);
}
REQUIRE(myLSTM->nbOutputs() == 2);
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
Array2D<float, 3, 2>{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}});
std::shared_ptr<Tensor> myInit =
std::make_shared<Tensor>(Array2D<float, 3, 3>{
{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>(
Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}});
std::shared_ptr<Tensor> myInitR =
std::make_shared<Tensor>(Array2D<float, 3, 3>{
{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
op->associateInput(0, myInput);
op->associateInput(17, myInit);
op->associateInput(18, myInit);
// Weights X
myLSTM->input(1).first->getOperator()->setOutput(0, myInitW);
myLSTM->input(2).first->getOperator()->setOutput(0, myInitW);
myLSTM->input(3).first->getOperator()->setOutput(0, myInitW);
myLSTM->input(4).first->getOperator()->setOutput(0, myInitW);
// Weights H
myLSTM->input(5).first->getOperator()->setOutput(0, myInitR);
myLSTM->input(6).first->getOperator()->setOutput(0, myInitR);
myLSTM->input(7).first->getOperator()->setOutput(0, myInitR);
myLSTM->input(8).first->getOperator()->setOutput(0, myInitR);
auto g = getConnectedGraphView(myLSTM);
g->setDataType(DataType::Float32);
g->setBackend("cpu");
auto scheduler = SequentialScheduler(g);
scheduler.forward();
microGraph->save("lstm_values_dims", false, true);
std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>(
Array2D<float, 3, 3>{{{0.0952412, 0.0952412, 0.0952412},
{0.25606447, 0.25606447, 0.25606447},
{0.40323776, 0.40323776, 0.40323776}}});
auto microGraphScheduler =
std::dynamic_pointer_cast<MetaOperator_Op>(op)
->getMicroGraphScheduler();
microGraphScheduler->saveSchedulingDiagram("lstm_values_scheduling");
op->getOutput(0)->print();
myHiddenState->print();
REQUIRE(approxEq<float>(*(op->getOutput(0)), *myHiddenState));
}
SECTION("LSTM(forward_values_seq)") {
auto pop = Pop();
auto myLSTM = LSTM(2, 3, 2, true, "ltsm");
auto myGraph = Sequential({pop, myLSTM});
auto op =
std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator());
REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8);
REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data);
for (size_t i = 1; i < 9; ++i) {
REQUIRE(myLSTM->inputCategory(i) == InputCategory::Param);
}
for (size_t i = 9; i < 17; ++i) {
REQUIRE(myLSTM->inputCategory(i) == InputCategory::OptionalParam);
}
REQUIRE(myLSTM->nbOutputs() == 2);
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}},
{{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}});
std::shared_ptr<Tensor> myInit =
std::make_shared<Tensor>(Array2D<float, 3, 3>{
{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>(
Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}});
std::shared_ptr<Tensor> myInitR =
std::make_shared<Tensor>(Array2D<float, 3, 3>{
{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
pop->getOperator()->associateInput(0, myInput);
op->associateInput(17, myInit);
op->associateInput(18, myInit);
// Weights X
myLSTM->input(1).first->getOperator()->setOutput(0, myInitW);
myLSTM->input(2).first->getOperator()->setOutput(0, myInitW);
myLSTM->input(3).first->getOperator()->setOutput(0, myInitW);
myLSTM->input(4).first->getOperator()->setOutput(0, myInitW);
// Weights H
myLSTM->input(5).first->getOperator()->setOutput(0, myInitR);
myLSTM->input(6).first->getOperator()->setOutput(0, myInitR);
myLSTM->input(7).first->getOperator()->setOutput(0, myInitR);
myLSTM->input(8).first->getOperator()->setOutput(0, myInitR);
auto g = getConnectedGraphView(myLSTM);
g->compile("cpu", DataType::Float32);
g->save("lstm_seq", true, true);
auto scheduler = SequentialScheduler(g);
scheduler.forward();
scheduler.saveSchedulingDiagram("lstm_seq_schedule");
std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>(
Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372},
{0.49801484, 0.49801484, 0.49801484},
{0.67162132, 0.67162132, 0.67162132}}});
myGraph->save("lstm_seq_mygraph", true, true);
op->getOutput(0)->print();
myHiddenState->print();
REQUIRE(approxEq<float>(*(op->getOutput(0)), *myHiddenState));
}
SECTION("LSTM(forward_values_seq_flatten)(sequential)") {
auto pop = Pop();
auto myLSTM = LSTM(2, 3, 2, true, "ltsm");
auto op =
std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator());
// Here we test LSTM as it is was flatten in the graph.
// We just borrow its micro-graph into our larger myGraph graph.
auto myGraph = std::make_shared<GraphView>();
pop->addChild(op->getMicroGraph()->getOrderedInputs()[0].first, 0, 0);
myGraph->add(op->getMicroGraph());
myGraph->add(pop);
REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8);
REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data);
for (size_t i = 1; i < 9; ++i) {
REQUIRE(myLSTM->inputCategory(i) == InputCategory::Param);
}
for (size_t i = 9; i < 17; ++i) {
REQUIRE(myLSTM->inputCategory(i) == InputCategory::OptionalParam);
}
REQUIRE(myLSTM->nbOutputs() == 2);
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}},
{{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}});
std::shared_ptr<Tensor> myInit =
std::make_shared<Tensor>(Array2D<float, 3, 3>{
{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>(
Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}});
std::shared_ptr<Tensor> myInitR =
std::make_shared<Tensor>(Array2D<float, 3, 3>{
{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
pop->getOperator()->associateInput(0, myInput);
op->associateInput(17, myInit);
op->associateInput(18, myInit);
// Weights X
auto prodX = Producer(myInitW);
prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first,
0,
1);
prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first,
0,
1);
prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first,
0,
1);
prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first,
0,
1);
// Weights H
auto prodH = Producer(myInitR);
prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first,
0,
1);
prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first,
0,
1);
prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first,
0,
1);
prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first,
0,
1);
myGraph->add({prodX, prodH});
myGraph->setDataType(DataType::Float32);
myGraph->setBackend("cpu");
myGraph->save("lstm_seq_flatten", true, true);
std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>(
Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372},
{0.49801484, 0.49801484, 0.49801484},
{0.67162132, 0.67162132, 0.67162132}}});
auto scheduler = SequentialScheduler(myGraph);
scheduler.generateScheduling();
scheduler.saveStaticSchedulingDiagram("lstm_static_schedule");
scheduler.forward(true);
scheduler.saveSchedulingDiagram("lstm_seq_flatten_schedule_seq");
op->getOutput(0)->print();
myHiddenState->print();
REQUIRE(approxEq<float>(*(op->getOutput(0)), *myHiddenState));
}
SECTION("LSTM(forward_values_seq_flatten)(parallel)") {
auto pop = Pop();
auto myLSTM = LSTM(2, 3, 2, true, "ltsm");
auto op =
std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator());
// Here we test LSTM as it is was flatten in the graph.
// We just borrow its micro-graph into our larger myGraph graph.
auto myGraph = std::make_shared<GraphView>();
pop->addChild(op->getMicroGraph()->getOrderedInputs()[0].first, 0, 0);
myGraph->add(op->getMicroGraph());
myGraph->add(pop);
REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8);
REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data);
for (size_t i = 1; i < 9; ++i) {
REQUIRE(myLSTM->inputCategory(i) == InputCategory::Param);
}
for (size_t i = 9; i < 17; ++i) {
REQUIRE(myLSTM->inputCategory(i) == InputCategory::OptionalParam);
}
REQUIRE(myLSTM->nbOutputs() == 2);
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}},
{{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}});
std::shared_ptr<Tensor> myInit =
std::make_shared<Tensor>(Array2D<float, 3, 3>{
{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>(
Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}});
std::shared_ptr<Tensor> myInitR =
std::make_shared<Tensor>(Array2D<float, 3, 3>{
{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
pop->getOperator()->associateInput(0, myInput);
op->associateInput(17, myInit);
op->associateInput(18, myInit);
// Weights X
auto prodX = Producer(myInitW);
prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first,
0,
1);
prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first,
0,
1);
prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first,
0,
1);
prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first,
0,
1);
// Weights H
auto prodH = Producer(myInitR);
prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first,
0,
1);
prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first,
0,
1);
prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first,
0,
1);
prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first,
0,
1);
myGraph->add({prodX, prodH});
myGraph->setDataType(DataType::Float32);
myGraph->setBackend("cpu");
myGraph->save("lstm_seq_flatten", true, true);
std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>(
Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372},
{0.49801484, 0.49801484, 0.49801484},
{0.67162132, 0.67162132, 0.67162132}}});
auto scheduler = ParallelScheduler(myGraph);
scheduler.generateScheduling();
scheduler.forward(true);
scheduler.saveSchedulingDiagram("lstm_seq_flatten_schedule_par");
op->getOutput(0)->print();
myHiddenState->print();
REQUIRE(approxEq<float>(*(op->getOutput(0)), *myHiddenState));
}
SECTION("Leaky(forward)(fixed)") {
constexpr auto inChannels = 10;
constexpr auto outChannels = 5;
constexpr auto beta = 0.95;
constexpr auto threshold = 1.0;
constexpr auto nbTimeSteps = 2;
auto myWeights =
std::make_shared<Tensor>(Array2D<float, outChannels, inChannels>{{
{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0},
{1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1},
{0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4},
{0.4, 0.3, 0.2, 0.1, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5},
{0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0.0},
}});
auto myWeights2 =
std::make_shared<Tensor>(Array2D<float, inChannels, outChannels>{{
{0.1, 0.2, 0.3, 0.4, 0.5},
{0.6, 0.7, 0.8, 0.9, 1.0},
{1.0, 0.9, 0.8, 0.7, 0.6},
{0.5, 0.4, 0.3, 0.2, 0.1},
{0.5, 0.6, 0.7, 0.8, 0.9},
{1.0, 0.1, 0.2, 0.3, 0.4},
{0.4, 0.3, 0.2, 0.1, 0.0},
{0.1, 0.2, 0.3, 0.4, 0.5},
{0.9, 0.8, 0.7, 0.6, 0.5},
{0.4, 0.3, 0.2, 0.1, 0.0},
}});
auto myInput = std::make_shared<Tensor>(Array2D<float, 2, 10>{{
{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0},
{1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1},
}});
// py/snn Torch computed result, output of fc1 at time step 1
auto expectedOutputlif1ts1 =
std::make_shared<Tensor>(Array2D<float, 2, 5>{{
{3.850, 2.2000, 2.6500, 1.5000, 1.6500},
{2.200, 3.8500, 3.4000, 1.2500, 3.3000},
}});
auto expectedOutputfc2ts1 =
std::make_shared<Tensor>(Array2D<float, 2, 10>{{
{1.5000,
4.0000,
4.0000,
1.5000,
3.5000,
2.0000,
1.0000,
1.5000,
3.5000,
1.0000},
{1.5000,
4.0000,
4.0000,
1.5000,
3.5000,
2.0000,
1.0000,
1.5000,
3.5000,
1.0000},
}});
auto expectedOutputlif1ts2 =
std::make_shared<Tensor>(Array2D<float, 2, 5>{{
{6.5075, 3.2900, 4.1675, 1.9250, 2.2175},
{3.2900, 6.5075, 5.6300, 1.4375, 5.4350},
}});
// NOTE: Same output as before, because for all channels, we have a
// potential higher than threshold. Thus the lif neuron fires at every
// timestep for every channel.
auto expectedOutputfc2ts2 =
std::make_shared<Tensor>(Array2D<float, 2, 10>{{
{1.5000,
4.0000,
4.0000,
1.5000,
3.5000,
2.0000,
1.0000,
1.5000,
3.5000,
1.0000},
{1.5000,
4.0000,
4.0000,
1.5000,
3.5000,
2.0000,
1.0000,
1.5000,
3.5000,
1.0000},
}});
auto init = std::make_shared<Tensor>(Array2D<float, 2, 5>{});
uniformFiller<float>(init, 0.0, 0.0);
auto fc1 = FC(inChannels, outChannels, true, "myfc");
auto fc2 = FC(outChannels, inChannels, true, "fc2");
// NOTE: Account for init step by adding 1 to the max timestep
// parameter.
auto lif1 = Leaky(nbTimeSteps + 1, beta, threshold, "leaky");
// associateInput() does not work
fc1->input(1).first->getOperator()->setOutput(0, myWeights);
fc2->input(1).first->getOperator()->setOutput(0, myWeights2);
auto fc1Op =
std::static_pointer_cast<OperatorTensor>(fc1->getOperator());
auto lif1Op =
std::static_pointer_cast<MetaOperator_Op>(lif1->getOperator());
auto fc2Op =
std::static_pointer_cast<OperatorTensor>(fc2->getOperator());
fc1Op->associateInput(0, myInput);
lif1Op->associateInput(1, init);
lif1Op->associateInput(2, init);
fc1->addChild(lif1, 0, 0);
lif1->addChild(fc2, 1, 0);
auto g = std::make_shared<GraphView>();
g->add({fc1, lif1, fc2});
g->compile("cpu", DataType::Float32);
auto scheduler = SequentialScheduler(g);
// Forward 1 (simulate timestep 0)
scheduler.forward(true);
REQUIRE(approxEq<float>(*(lif1Op->getOutput(0)),
*(expectedOutputlif1ts1)));
REQUIRE(
approxEq<float>(*(fc2Op->getOutput(0)), *(expectedOutputfc2ts1)));
// Forward 1 (simulate timestep 1)
scheduler.forward(true);
REQUIRE(approxEq<float>(*(lif1Op->getOutput(0)),
*(expectedOutputlif1ts2)));
REQUIRE(
approxEq<float>(*(fc2Op->getOutput(0)), *(expectedOutputfc2ts2)));
}
SECTION("Leaky(forward)") {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(
0.1f,
1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2),
std::size_t(4));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(3),
std::size_t(3));
std::uniform_int_distribution<int> boolDist(0, 1);
std::uniform_real_distribution<float> betaDist(0,1);
const std::size_t nbDims = nbDimsDist(gen);
Log::info("Nbdims : {}", nbDims);
std::vector<std::size_t> dims;
for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen));
}
Log::info("timesteps : {}", dims[0]);
Log::info("dimensions : ");
for (auto dim : dims) {
Log::info("{}", dim);
}
const auto nbTimeSteps = dims[0];
const auto beta = betaDist(gen);
auto myLeaky = Leaky(nbTimeSteps, beta, 1.0, "leaky");
auto op =
std::static_pointer_cast<MetaOperator_Op>(myLeaky->getOperator());
// auto stack = Stack(2);
auto mem_rec = Stack(nbTimeSteps, "mem_rec");
auto spk_rec = Stack(nbTimeSteps, "spk_rec");
auto pop = Pop("popinput");
// Here we test LSTM as it is was flatten in the graph.
// We just borrow its micro-graph into our larger myGraph graph.
auto myGraph = std::make_shared<GraphView>();
pop->addChild(op->getMicroGraph()->getOrderedInputs()[0].first, 0, 0);
// 0 for mem 1 for stack
op->getMicroGraph()->getOrderedOutputs()[1].first->addChild(mem_rec,
0,
0);
op->getMicroGraph()->getOrderedOutputs()[0].first->addChild(spk_rec,
0,
0);
for (auto node : op->getMicroGraph()->getOrderedOutputs()) {
Log::info("name of output {}", node.first->name());
}
myGraph->add(pop);
myGraph->add(op->getMicroGraph());
myGraph->add(mem_rec);
myGraph->add(spk_rec);
myGraph->save("mg", true, true);
// 3 outputs
REQUIRE(myLeaky->nbInputs() == 3);
REQUIRE(myLeaky->inputCategory(0) == InputCategory::Data);
// Two spikes connected to nothing, + the Add node real output
REQUIRE(myLeaky->nbOutputs() == 4);
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}},
{{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}});
// std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
// Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}},
// {{2.0, 3.0}, {4.0, 5.0},
// {6.0, 7.0}}}});
// Generate input
std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>();
expectedOutput->setDataType(DataType::Float32);
expectedOutput->setBackend("cpu");
const auto nb_elements =
std::accumulate(dims.cbegin(),
dims.cend(),
std::size_t(1),
std::multiplies<std::size_t>());
float *input = new float[nb_elements];
float *result = new float[nb_elements];
for (std::size_t i = 0; i < nb_elements; ++i) {
input[i] = valueDist(gen);
}
T0->resize(dims);
T0->getImpl()->setRawPtr(input, nb_elements);
T0->print();
// Elements popped at each time step
auto nbElementsPerTimeStep = nb_elements / dims[0];
// Init
for (int i = 0; i < nbElementsPerTimeStep; ++i) {
result[i] = input[i];
}
// Reccurence
for (int i = 1; i < dims[0]; ++i) {
auto offset = nbElementsPerTimeStep * i;
auto prev = nbElementsPerTimeStep * (i - 1);
for (int j = 0; j < nbElementsPerTimeStep; ++j) {
auto reset = (result[prev + j] > 1.0 ? 1 : 0);
result[offset + j] =
result[prev + j] * beta + input[offset + j] - reset;
}
}
expectedOutput->resize(dims);
expectedOutput->getImpl()->setRawPtr(result, nb_elements);
Log::info("Expected ouptut : ");
expectedOutput->print();
std::shared_ptr<Tensor> myInit =
std::make_shared<Tensor>(Array2D<float, 3, 3>{
{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
auto initMemdims =
std::vector<std::size_t>(dims.begin() + 1, dims.end());
Log::info("dimensions : ");
for (auto dim : initMemdims) {
Log::info("{}", dim);
}
std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>(
Array2D<float, 3, 2>{{{0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}}});
std::shared_ptr<Tensor> myInitR =
std::make_shared<Tensor>(initMemdims);
myInitR->setDataType(DataType::Float32);
myInitR->setBackend("cpu");
uniformFiller<float>(myInitR, 0, 0);
pop->getOperator()->associateInput(0, T0);
op->associateInput(1, myInitR);
op->associateInput(2, myInitR);
myGraph->compile("cpu", DataType::Float32);
auto scheduler = SequentialScheduler(myGraph);
REQUIRE_NOTHROW(scheduler.generateScheduling());
REQUIRE_NOTHROW(scheduler.forward(true));
auto memOp =
std::static_pointer_cast<OperatorTensor>(spk_rec->getOperator());
REQUIRE(approxEq<float>(*(memOp->getOutput(0)), *(expectedOutput)));
}
}
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <chrono>
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <memory>
#include <numeric> // std::accumulate
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution,
// std::uniform_int_distribution
#include <catch2/catch_test_macros.hpp>
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/MulImpl.hpp"
#include "aidge/data/DataType.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Mul.hpp"
#include "aidge/utils/ArrayHelpers.hpp"
#include "aidge/utils/Log.hpp"
#include "aidge/utils/TensorUtils.hpp"
namespace Aidge {
TEST_CASE("[CPU/Operator] Mul(Backward)", "[Mul][CPU][Backward]") {
std::shared_ptr<Mul_Op> op = std::make_shared<Mul_Op>();
op->setDataType(DataType::Float32);
op->setBackend("cpu");
// NOTE: The first four tests use fixed values, the last one uses random values but static dimensions.
SECTION("Case 1: 1D and 2D Tensors") {
const auto T0 = std::make_shared<Tensor>(
Array2D<cpptype_t<DataType::Float32>, 2, 3>({{{1, 2, 3}, {4, 5, 6}}}));
const auto T1 =
std::make_shared<Tensor>(Array1D<cpptype_t<DataType::Float32>, 3>({0.1, 0.2, 0.3}));
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(std::make_shared<Tensor>(
Array2D<float, 2, 3>({{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}})));
op->forwardDims();
op->backward();
const Tensor expectedGrad0 =
Array2D<cpptype_t<DataType::Float32>, 2, 3>({{{0.1, 0.2, 0.3}, {0.1, 0.2, 0.3}}});
const Tensor expectedGrad1 = Array1D<cpptype_t<DataType::Float32>, 3>({5, 7, 9});
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0));
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1));
}
SECTION("Case 2: 3D and 1D tensors") {
const auto T0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
{{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}},
{{7.0, 8.0, 9.0}, {10.0, 11.0, 12.0}}}}));
const auto T1 =
std::make_shared<Tensor>(Array1D<float, 3>({0.3, 0.2, 0.1}));
const auto newGrad = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
{{{{1, 1, 1}, {1, 1, 1}}, {{1, 1, 1}, {1, 1, 1}}}}));
const Tensor expectedGrad0 =
Array3D<float, 2, 2, 3>({{{{0.3, 0.2, 0.1}, {0.3, 0.2, 0.1}},
{{0.3, 0.2, 0.1}, {0.3, 0.2, 0.1}}}});
const Tensor expectedGrad1 = Array1D<cpptype_t<DataType::Float32>, 3>({22.0, 26.0, 30.0});
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(newGrad);
op->forwardDims();
op->backward();
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0));
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1));
}
SECTION("Case 3: 4D and 2D tensors") {
const auto T0 = std::make_shared<Tensor>(Array4D<cpptype_t<DataType::Float32>, 2, 2, 3, 3>(
{{{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}, {7.0, 8.0, 9.0}},
{{10.0, 11.0, 12.0}, {13.0, 14.0, 15.0}, {16.0, 17.0, 18.0}}},
{{{19.0, 20.0, 21.0}, {22.0, 23.0, 24.0}, {25.0, 26.0, 27.0}},
{{28.0, 29.0, 30.0},
{31.0, 32.0, 33.0},
{34.0, 35.0, 36.0}}}}}));
const auto T1 = std::make_shared<Tensor>(Array2D<cpptype_t<DataType::Float32>, 3, 3>(
{{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}}));
const auto newGrad =
std::make_shared<Tensor>(Array4D<cpptype_t<DataType::Float32>, 2, 2, 3, 3>(
{{{{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}},
{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}},
{{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}},
{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}}}}));
const Tensor expectedGrad0 =
Array4D<cpptype_t<DataType::Float32>, 2, 2, 3, 3>(
{{{{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}},
{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}},
{{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}},
{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}}}});
const Tensor expectedGrad1 =
Array2D<cpptype_t<DataType::Float32>, 3, 3>({{{58.0, 62.0, 66.0},
{70.0, 74.0, 78.0},
{82.0, 86.0, 90.0}}});
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(newGrad);
op->forwardDims();
op->backward();
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0));
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1));
}
SECTION("Case 4: 3D and 2D tensors") {
const auto T0 = std::make_shared<Tensor>(
Array3D<float, 2, 3, 4>({{{
{1.0, 2.0, 3.0, 4.0},
{5.0, 6.0, 7.0, 8.0},
{9.0, 10.0, 11.0, 12.0},
},
{
{13.0, 14.0, 15.0, 16.0},
{17.0, 18.0, 19.0, 20.0},
{21.0, 22.0, 23.0, 24.0},
}}}));
const auto T1 = std::make_shared<Tensor>(
Array2D<cpptype_t<DataType::Float32>, 3, 4>({{{0.1, 0.2, 0.3, 0.4},
{0.5, 0.6, 0.7, 0.8},
{0.9, 1.0, 1.1, 1.2}}}));
const auto newGrad = std::make_shared<Tensor>(
Array3D<cpptype_t<DataType::Float32>, 2, 3, 4>({{{
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
},
{
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
}}}));
const Tensor expectedGrad0 =
Array3D<cpptype_t<DataType::Float32>, 2, 3, 4>({{{{0.1, 0.2, 0.3, 0.4},
{0.5, 0.6, 0.7, 0.8},
{0.9, 1.0, 1.1, 1.2}},
{{0.1, 0.2, 0.3, 0.4},
{0.5, 0.6, 0.7, 0.8},
{0.9, 1.0, 1.1, 1.2}}}});
const Tensor expectedGrad1 =
Array2D<cpptype_t<DataType::Float32>, 3, 4>({{{14.0, 16.0, 18.0, 20.0},
{22.0, 24.0, 26.0, 28.0},
{30.0, 32.0, 34.0, 36.0}}});
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(newGrad);
op->forwardDims();
op->backward();
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0));
REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1));
}
SECTION("Case 5: Tensors with random values") {
// Use random values
const std::vector<std::size_t> dims0 = {5, 2, 1, 7}; // First tensor
const std::vector<std::size_t> dims1 = {2, 6, 7}; // Second tensor
const std::vector<std::size_t> outputDims = {5, 2, 6, 7};
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> dist(0.1f, 1.0f);
auto T0 = std::make_shared<Tensor>(dims0);
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
float* input0Data = static_cast<float*>(T0->getImpl()->rawPtr());
// Fill with random values
for (std::size_t i = 0; i < T0->size(); ++i) {
input0Data[i] = dist(gen);
}
auto T1 = std::make_shared<Tensor>(dims1);
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
float* input1Data = static_cast<float*>(T1->getImpl()->rawPtr());
// Fill with random values
for (std::size_t i = 0; i < T1->size(); ++i) {
input1Data[i] = dist(gen);
}
op->associateInput(0, T0);
op->associateInput(1, T1);
op->forwardDims();
op->forward();
Tensor expectedOutput{outputDims};
expectedOutput.setBackend("cpu");
float* expectedOutputData = static_cast<float*>(expectedOutput.getImpl()->rawPtr());
for (std::size_t n = 0; n < 5; ++n) {
for (std::size_t c = 0; c < 2; ++c) {
for (std::size_t h = 0; h < 6; ++h) {
for (std::size_t w = 0; w < 7; ++w) {
std::size_t outIdx = w + 7 * (h + 6 * (c + 2 * n));
std::size_t in0Idx =
w + 7 * (0 + 1 * (c + 2 * n)); // middle dim is 1
std::size_t in1Idx =
w + 7 * (h + 6 * c); // no n dimension
expectedOutputData[outIdx] = input0Data[in0Idx] * input1Data[in1Idx];
}
}
}
}
auto outputTensor = op->getOutput(0);
REQUIRE(approxEq<float>(*outputTensor, expectedOutput));
// Backward pass
std::vector<float> gradOutputData(expectedOutput.size());
for (auto &val : gradOutputData) {
val = dist(gen);
}
op->getOutput(0)->setGrad(std::make_shared<Tensor>());
op->getOutput(0)->grad()->resize(outputDims);
op->getOutput(0)->grad()->getImpl()->setRawPtr(gradOutputData.data(),
expectedOutput.size());
// Compute reference gradients
std::vector<float> expectedGrad0(T0->size(), 0.0f);
std::vector<float> expectedGrad1(T1->size(), 0.0f);
for (std::size_t n = 0; n < 5; ++n) {
for (std::size_t c = 0; c < 2; ++c) {
for (std::size_t h = 0; h < 6; ++h) {
for (std::size_t w = 0; w < 7; ++w) {
std::size_t outIdx = w + 7 * (h + 6 * (c + 2 * n));
std::size_t in0Idx = w + 7 * (0 + 1 * (c + 2 * n));
std::size_t in1Idx = w + 7 * (h + 6 * c);
// Gradient for input0: grad_output * input1
expectedGrad0[in0Idx] +=
gradOutputData[outIdx] * input1Data[in1Idx];
// Gradient for input1: grad_output * input0
expectedGrad1[in1Idx] +=
gradOutputData[outIdx] * input0Data[in0Idx];
}
}
}
}
// Perform backward pass
op->backward();
auto expectedGrad0Tensor = std::make_shared<Tensor>();
expectedGrad0Tensor->resize(T0->dims());
expectedGrad0Tensor->setBackend("cpu");
expectedGrad0Tensor->setDataType(DataType::Float32);
expectedGrad0Tensor->getImpl()->setRawPtr(expectedGrad0.data(),
expectedGrad0.size());
auto expectedGrad1Tensor = std::make_shared<Tensor>(T1->dims());
expectedGrad1Tensor->setBackend("cpu");
expectedGrad1Tensor->setDataType(DataType::Float32);
expectedGrad1Tensor->getImpl()->setRawPtr(expectedGrad1.data(),
expectedGrad1.size());
// Verify backward pass
REQUIRE(approxEq<float>(*T0->grad(), *expectedGrad0Tensor));
REQUIRE(approxEq<float>(*T1->grad(), *expectedGrad1Tensor));
// Optional: Print some values for verification
// std::cout << "Input shapes: (" << dims0[0] << "," << dims0[1] <<
// "," << dims0[2] << "," << dims0[3]
// << ") * (" << dims1[0] << "," << dims1[1] << "," <<
// dims1[2]
// << ") -> (" << outputDims[0] << "," << outputDims[1]
// << "," << outputDims[2] << "," << outputDims[3] <<
// ")\n";
// std::cout << "Input sizes: " << input0_size << " * " <<
// input1_size << " -> " << output_size << "\n";
}
}
TEST_CASE("[cpu/operator] Mul(forward)", "[Mul][CPU]") {
constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(
0.1f,
1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2),
std::size_t(10));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1),
std::size_t(3));
std::uniform_int_distribution<int> boolDist(0, 1);
std::shared_ptr<Mul_Op> op = std::make_shared<Mul_Op>();
op->setDataType(DataType::Float32);
op->setBackend("cpu");
std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
op->associateInput(0, T0);
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
op->associateInput(1, T1);
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>();
Tres->setDataType(DataType::Float32);
Tres->setBackend("cpu");
// To measure execution time of 'MatMul_Op::forward()' member function call
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration{};
SECTION("MulImpl_cpu::forward()") {
SECTION("Scalar / Scalar") {}
SECTION("Scalar / +1-D Tensor") {}
SECTION("+1-D Tensor / +1-D Tensor - same dimensions") {
std::size_t number_of_operation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
const auto nbDims = nbDimsDist(gen);
auto dims = std::vector<std::size_t>{};
for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen));
}
const auto nb_elements =
std::accumulate(dims.cbegin(),
dims.cend(),
std::size_t(1),
std::multiplies<std::size_t>());
number_of_operation += nb_elements;
// without broadcasting
float *array0 = new float[nb_elements];
float *array1 = new float[nb_elements];
float *result = new float[nb_elements];
for (std::size_t i = 0; i < nb_elements; ++i) {
array0[i] = valueDist(gen);
array1[i] = valueDist(gen);
result[i] = array0[i] * array1[i];
}
// input0
T0->resize(dims);
T0->getImpl()->setRawPtr(array0, nb_elements);
// input1
T1->resize(dims);
T1->getImpl()->setRawPtr(array1, nb_elements);
// results
Tres->resize(dims);
Tres->getImpl()->setRawPtr(result, nb_elements);
op->forwardDims();
start = std::chrono::system_clock::now();
op->forward();
end = std::chrono::system_clock::now();
duration +=
std::chrono::duration_cast<std::chrono::microseconds>(
end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
}
Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count()));
Log::info("total time: {}μs\n", duration.count());
}
SECTION("+1-D Tensor / +1-D Tensor - broadcasting") {
std::size_t number_of_operation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
// handle dimensions, replace some dimensions with '1' to get
// broadcasting
constexpr std::size_t nbDims = 4;
std::vector<std::size_t> dimensions;
for (std::size_t i = 0; i < nbDims; ++i) {
dimensions.push_back(dimSizeDist(gen));
}
auto dims0 = dimensions;
auto dims1 = dimensions;
auto dimsOut = dimensions;
for (std::size_t i = 0; i < nbDims; ++i) {
if (boolDist(gen)) {
dims0[i] = 1;
}
if (boolDist(gen)) {
dims1[i] = 1;
}
dimsOut[i] = (dims0[i] == 1) ? dims1[i] : dims0[i];
}
for (auto dim : dims0) {
Log::info("Dimension of input 0 : {}", dim);
}
for (auto dim : dims1) {
Log::info("Dimension of input 1 : {}", dim);
}
// create arrays and fill them with random values
float *array0 =
new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]];
float *array1 =
new float[dims1[0] * dims1[1] * dims1[2] * dims1[3]];
float *result = new float[dimsOut[0] * dimsOut[1] *
dimsOut[2] * dimsOut[3]];
for (std::size_t i = 0;
i < dims0[0] * dims0[1] * dims0[2] * dims0[3];
++i) {
array0[i] = valueDist(gen);
}
for (std::size_t i = 0;
i < dims1[0] * dims1[1] * dims1[2] * dims1[3];
++i) {
array1[i] = valueDist(gen);
}
// compute true result
const std::size_t strides0[nbDims] = {
dims0[1] * dims0[2] * dims0[3],
dims0[2] * dims0[3],
dims0[3],
1};
const std::size_t strides1[nbDims] = {
dims1[1] * dims1[2] * dims1[3],
dims1[2] * dims1[3],
dims1[3],
1};
for (std::size_t a = 0; a < dimsOut[0]; ++a) {
for (std::size_t b = 0; b < dimsOut[1]; ++b) {
const std::size_t idx0_0 =
strides0[0] * ((dims0[0] > 1) ? a : 0) +
strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t idx1_0 =
strides1[0] * ((dims1[0] > 1) ? a : 0) +
strides1[1] * ((dims1[1] > 1) ? b : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c) {
const std::size_t idx_out =
dimsOut[3] *
(c + dimsOut[2] * (b + dimsOut[1] * a));
for (std::size_t d = 0; d < dimsOut[3]; ++d) {
std::size_t idx0 =
idx0_0 +
strides0[2] * ((dims0[2] > 1) ? c : 0) +
((dims0[3] > 1) ? d : 0);
std::size_t idx1 =
idx1_0 +
strides1[2] * ((dims1[2] > 1) ? c : 0) +
((dims1[3] > 1) ? d : 0);
result[idx_out + d] =
array0[idx0] * array1[idx1];
// std::cout << "(" << idx0 << ", " << idx1 <<
// ") -> " << array0[idx0] << " * " <<
// array1[idx1] << " -> " << idx_out + d <<
// std::endl;
}
}
}
}
// conversion to Aidge::Tensors
// input0
T0->resize(dims0);
T0->getImpl()->setRawPtr(
array0,
dims0[0] * dims0[1] * dims0[2] * dims0[3]);
// input1
T1->resize(dims1);
T1->getImpl()->setRawPtr(
array1,
dims1[0] * dims1[1] * dims1[2] * dims1[3]);
// results
Tres->resize(dimsOut);
Tres->getImpl()->setRawPtr(
result,
dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]);
// compute result
op->forwardDims();
start = std::chrono::system_clock::now();
op->forward();
end = std::chrono::system_clock::now();
duration +=
std::chrono::duration_cast<std::chrono::microseconds>(
end - start);
// comparison between truth and computed result
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
const std::size_t nb_elements =
std::accumulate(dimsOut.cbegin(),
dimsOut.cend(),
std::size_t(1),
std::multiplies<std::size_t>());
number_of_operation += nb_elements;
}
Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count()));
Log::info("total time: {}μs\n", duration.count());
}
SECTION("+1-D Tensor / 1-D Tensor") {
std::size_t number_of_operation = 0;
std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(
std::size_t(1),
std::size_t(3));
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
// handle dimensions
constexpr std::size_t nbDims = 4;
std::vector<std::size_t> dims0(4);
for (std::size_t i = 0; i < nbDims; ++i) {
dims0[i] = dimSizeDist(gen);
}
std::vector<std::size_t> dimsOut = dims0;
std::vector<std::size_t> dims1 = dims0;
for (std::size_t i = 0; i < nbDims; ++i) {
if (boolDist(gen)) {
dims1[i] = 1;
}
}
dims1.erase(dims1.cbegin(),
dims1.cbegin() + nbRemovedDimsDist(gen));
// create arrays and fill them with random values
float *array0 =
new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]];
std::size_t array1_size =
std::accumulate(dims1.cbegin(),
dims1.cend(),
std::size_t(1),
std::multiplies<std::size_t>());
float *array1 = new float[array1_size];
float *result = new float[dimsOut[0] * dimsOut[1] *
dimsOut[2] * dimsOut[3]];
for (std::size_t i = 0;
i < (dims0[0] * dims0[1] * dims0[2] * dims0[3]);
++i) {
array0[i] = valueDist(gen);
}
for (std::size_t i = 0; i < array1_size; ++i) {
array1[i] = valueDist(gen);
}
// compute true result
auto dims1_tmp = dims1;
dims1_tmp.insert(dims1_tmp.cbegin(),
4 - dims1_tmp.size(),
std::size_t(1));
const std::size_t strides0[nbDims] = {
dims0[1] * dims0[2] * dims0[3],
dims0[2] * dims0[3],
dims0[3],
1};
const std::size_t strides1[nbDims] = {
dims1_tmp[1] * dims1_tmp[2] * dims1_tmp[3],
dims1_tmp[2] * dims1_tmp[3],
dims1_tmp[3],
1};
for (std::size_t a = 0; a < dimsOut[0]; ++a) {
for (std::size_t b = 0; b < dimsOut[1]; ++b) {
const std::size_t idx0_0 =
strides0[0] * ((dims0[0] > 1) ? a : 0) +
strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t idx1_0 =
strides1[0] * ((dims1_tmp[0] > 1) ? a : 0) +
strides1[1] * ((dims1_tmp[1] > 1) ? b : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c) {
const std::size_t idx_out =
dimsOut[3] *
(c + dimsOut[2] * (b + dimsOut[1] * a));
for (std::size_t d = 0; d < dimsOut[3]; ++d) {
std::size_t idx0 =
idx0_0 +
strides0[2] * ((dims0[2] > 1) ? c : 0) +
((dims0[3] > 1) ? d : 0);
std::size_t idx1 =
idx1_0 +
strides1[2] *
((dims1_tmp[2] > 1) ? c : 0) +
((dims1_tmp[3] > 1) ? d : 0);
result[idx_out + d] =
array0[idx0] * array1[idx1];
// std::cout << "(" << idx0 << ", " << idx1 <<
// ") -> " << array0[idx0] << " * " <<
// array1[idx1] << " -> " << idx_out + d <<
// std::endl;
}
}
}
}
// conversion to Aidge::Tensors
// input0
T0->resize(dims0);
T0->getImpl()->setRawPtr(
array0,
dims0[0] * dims0[1] * dims0[2] * dims0[3]);
// input1
T1->resize(dims1);
T1->getImpl()->setRawPtr(array1, array1_size);
// results
Tres->resize(dimsOut);
Tres->getImpl()->setRawPtr(
result,
dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]);
// compute result
op->forwardDims();
start = std::chrono::system_clock::now();
op->forward();
end = std::chrono::system_clock::now();
duration +=
std::chrono::duration_cast<std::chrono::microseconds>(
end - start);
// comparison between truth and computed result
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
const std::size_t nb_elements =
std::accumulate(dimsOut.cbegin(),
dimsOut.cend(),
std::size_t(1),
std::multiplies<std::size_t>());
number_of_operation += nb_elements;
}
Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count()));
Log::info("total time: {}μs\n", duration.count());
}
}
}
} // namespace Aidge
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <memory>
#include <catch2/catch_test_macros.hpp>
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/PadImpl.hpp"
#include "aidge/data/DataType.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/graph/Node.hpp"
#include "aidge/operator/Pad.hpp"
using namespace Aidge;
TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") {
SECTION("Symmetric Pad") {
const int pv = 0; // pad value
std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Constant, static_cast<double>(pv));
auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator());
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
{
{
{{ 0, 1, 2, 3, 4},
{ 5, 6, 7, 8, 9},
{ 10, 11, 12, 13, 14},
{ 15, 16, 17, 18, 19},
{ 20, 21, 22, 23, 24}},
{{ 25, 26, 27, 28, 29},
{ 30, 31, 32, 33, 34},
{ 35, 36, 37, 38, 39},
{ 40, 41, 42, 43, 44},
{ 45, 46, 47, 48, 49}},
{{ 50, 51, 52, 53, 54},
{ 55, 56, 57, 58, 59},
{ 60, 61, 62, 63, 64},
{ 65, 66, 67, 68, 69},
{ 70, 71, 72, 73, 74}}
},
{
{{ 75, 76, 77, 78, 79},
{ 80, 81, 82, 83, 84},
{ 85, 86, 87, 88, 89},
{ 90, 91, 92, 93, 94},
{ 95, 96, 97, 98, 99}},
{{100, 101, 102, 103, 104},
{105, 106, 107, 108, 109},
{110, 111, 112, 113, 114},
{115, 116, 117, 118, 119},
{120, 121, 122, 123, 124}},
{{125, 126, 127, 128, 129},
{130, 131, 132, 133, 134},
{135, 136, 137, 138, 139},
{140, 141, 142, 143, 144},
{145, 146, 147, 148, 149}}
}
}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW
{
{
{{ pv, pv, pv, pv, pv, pv, pv},
{ pv, 0, 1, 2, 3, 4, pv},
{ pv, 5, 6, 7, 8, 9, pv},
{ pv, 10, 11, 12, 13, 14, pv},
{ pv, 15, 16, 17, 18, 19, pv},
{ pv, 20, 21, 22, 23, 24, pv},
{ pv, pv, pv, pv, pv, pv, pv}},
{{ pv, pv, pv, pv, pv, pv, pv},
{ pv, 25, 26, 27, 28, 29, pv},
{ pv, 30, 31, 32, 33, 34, pv},
{ pv, 35, 36, 37, 38, 39, pv},
{ pv, 40, 41, 42, 43, 44, pv},
{ pv, 45, 46, 47, 48, 49, pv},
{ pv, pv, pv, pv, pv, pv, pv}},
{{ pv, pv, pv, pv, pv, pv, pv},
{ pv, 50, 51, 52, 53, 54, pv},
{ pv, 55, 56, 57, 58, 59, pv},
{ pv, 60, 61, 62, 63, 64, pv},
{ pv, 65, 66, 67, 68, 69, pv},
{ pv, 70, 71, 72, 73, 74, pv},
{ pv, pv, pv, pv, pv, pv, pv}}
},
{
{{ pv, pv, pv, pv, pv, pv, pv},
{ pv, 75, 76, 77, 78, 79, pv},
{ pv, 80, 81, 82, 83, 84, pv},
{ pv, 85, 86, 87, 88, 89, pv},
{ pv, 90, 91, 92, 93, 94, pv},
{ pv, 95, 96, 97, 98, 99, pv},
{ pv, pv, pv, pv, pv, pv, pv}},
{{ pv, pv, pv, pv, pv, pv, pv},
{pv, 100, 101, 102, 103, 104, pv},
{pv, 105, 106, 107, 108, 109, pv},
{pv, 110, 111, 112, 113, 114, pv},
{pv, 115, 116, 117, 118, 119, pv},
{pv, 120, 121, 122, 123, 124, pv},
{ pv, pv, pv, pv, pv, pv, pv}},
{{ pv, pv, pv, pv, pv, pv, pv},
{pv, 125, 126, 127, 128, 129, pv},
{pv, 130, 131, 132, 133, 134, pv},
{pv, 135, 136, 137, 138, 139, pv},
{pv, 140, 141, 142, 143, 144, pv},
{pv, 145, 146, 147, 148, 149, pv},
{ pv, pv, pv, pv, pv, pv, pv}}
}
}
});
myPad->getOperator()->associateInput(0,myInput);
myPad->getOperator()->setDataType(DataType::Int32);
myPad->getOperator()->setBackend("cpu");
myPad->forward();
// myPad->getOperator()->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == *myOutput);
}
SECTION("Asymmetric Pad") {
const int pv = 0; // pad value
std::shared_ptr<Node> myPad = Pad<2>({1, 0, 0, 1}, "mypad", PadBorderType::Constant, static_cast<double>(pv));
auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator());
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
{
{
{{ 0, 1, 2, 3, 4},
{ 5, 6, 7, 8, 9},
{ 10, 11, 12, 13, 14},
{ 15, 16, 17, 18, 19},
{ 20, 21, 22, 23, 24}},
{{ 25, 26, 27, 28, 29},
{ 30, 31, 32, 33, 34},
{ 35, 36, 37, 38, 39},
{ 40, 41, 42, 43, 44},
{ 45, 46, 47, 48, 49}},
{{ 50, 51, 52, 53, 54},
{ 55, 56, 57, 58, 59},
{ 60, 61, 62, 63, 64},
{ 65, 66, 67, 68, 69},
{ 70, 71, 72, 73, 74}}
},
{
{{ 75, 76, 77, 78, 79},
{ 80, 81, 82, 83, 84},
{ 85, 86, 87, 88, 89},
{ 90, 91, 92, 93, 94},
{ 95, 96, 97, 98, 99}},
{{100, 101, 102, 103, 104},
{105, 106, 107, 108, 109},
{110, 111, 112, 113, 114},
{115, 116, 117, 118, 119},
{120, 121, 122, 123, 124}},
{{125, 126, 127, 128, 129},
{130, 131, 132, 133, 134},
{135, 136, 137, 138, 139},
{140, 141, 142, 143, 144},
{145, 146, 147, 148, 149}}
}
}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,6,6> { //NCHW
{
{
{{ pv, pv, pv, pv, pv, pv},
{ 0, 1, 2, 3, 4, pv},
{ 5, 6, 7, 8, 9, pv},
{ 10, 11, 12, 13, 14, pv},
{ 15, 16, 17, 18, 19, pv},
{ 20, 21, 22, 23, 24, pv}},
{{ pv, pv, pv, pv, pv, pv},
{ 25, 26, 27, 28, 29, pv},
{ 30, 31, 32, 33, 34, pv},
{ 35, 36, 37, 38, 39, pv},
{ 40, 41, 42, 43, 44, pv},
{ 45, 46, 47, 48, 49, pv}},
{{ pv, pv, pv, pv, pv, pv},
{ 50, 51, 52, 53, 54, pv},
{ 55, 56, 57, 58, 59, pv},
{ 60, 61, 62, 63, 64, pv},
{ 65, 66, 67, 68, 69, pv},
{ 70, 71, 72, 73, 74, pv}}
},
{
{{ pv, pv, pv, pv, pv, pv},
{ 75, 76, 77, 78, 79, pv},
{ 80, 81, 82, 83, 84, pv},
{ 85, 86, 87, 88, 89, pv},
{ 90, 91, 92, 93, 94, pv},
{ 95, 96, 97, 98, 99, pv}},
{{ pv, pv, pv, pv, pv, pv},
{ 100, 101, 102, 103, 104, pv},
{ 105, 106, 107, 108, 109, pv},
{ 110, 111, 112, 113, 114, pv},
{ 115, 116, 117, 118, 119, pv},
{ 120, 121, 122, 123, 124, pv}},
{{ pv, pv, pv, pv, pv, pv},
{ 125, 126, 127, 128, 129, pv},
{ 130, 131, 132, 133, 134, pv},
{ 135, 136, 137, 138, 139, pv},
{ 140, 141, 142, 143, 144, pv},
{ 145, 146, 147, 148, 149, pv}}
}
}
});
myPad->getOperator()->associateInput(0,myInput);
myPad->getOperator()->setDataType(DataType::Int32);
myPad->getOperator()->setBackend("cpu");
myPad->forward();
// myPad->getOperator()->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == *myOutput);
}
SECTION("Pad Edge") {
std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Edge);
auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator());
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
{
{
{{ 0, 1, 2, 3, 4},
{ 5, 6, 7, 8, 9},
{ 10, 11, 12, 13, 14},
{ 15, 16, 17, 18, 19},
{ 20, 21, 22, 23, 24}},
{{ 25, 26, 27, 28, 29},
{ 30, 31, 32, 33, 34},
{ 35, 36, 37, 38, 39},
{ 40, 41, 42, 43, 44},
{ 45, 46, 47, 48, 49}},
{{ 50, 51, 52, 53, 54},
{ 55, 56, 57, 58, 59},
{ 60, 61, 62, 63, 64},
{ 65, 66, 67, 68, 69},
{ 70, 71, 72, 73, 74}}
},
{
{{ 75, 76, 77, 78, 79},
{ 80, 81, 82, 83, 84},
{ 85, 86, 87, 88, 89},
{ 90, 91, 92, 93, 94},
{ 95, 96, 97, 98, 99}},
{{100, 101, 102, 103, 104},
{105, 106, 107, 108, 109},
{110, 111, 112, 113, 114},
{115, 116, 117, 118, 119},
{120, 121, 122, 123, 124}},
{{125, 126, 127, 128, 129},
{130, 131, 132, 133, 134},
{135, 136, 137, 138, 139},
{140, 141, 142, 143, 144},
{145, 146, 147, 148, 149}}
}
}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW
{
{
{{ 0, 0, 1, 2, 3, 4, 4},
{ 0, 0, 1, 2, 3, 4, 4},
{ 5, 5, 6, 7, 8, 9, 9},
{ 10, 10, 11, 12, 13, 14, 14},
{ 15, 15, 16, 17, 18, 19, 19},
{ 20, 20, 21, 22, 23, 24, 24},
{ 20, 20, 21, 22, 23, 24, 24}},
{{ 25, 25, 26, 27, 28, 29, 29},
{ 25, 25, 26, 27, 28, 29, 29},
{ 30, 30, 31, 32, 33, 34, 34},
{ 35, 35, 36, 37, 38, 39, 39},
{ 40, 40, 41, 42, 43, 44, 44},
{ 45, 45, 46, 47, 48, 49, 49},
{ 45, 45, 46, 47, 48, 49, 49}},
{{ 50, 50, 51, 52, 53, 54, 54},
{ 50, 50, 51, 52, 53, 54, 54},
{ 55, 55, 56, 57, 58, 59, 59},
{ 60, 60, 61, 62, 63, 64, 64},
{ 65, 65, 66, 67, 68, 69, 69},
{ 70, 70, 71, 72, 73, 74, 74},
{ 70, 70, 71, 72, 73, 74, 74}}
},
{
{{ 75, 75, 76, 77, 78, 79, 79},
{ 75, 75, 76, 77, 78, 79, 79},
{ 80, 80, 81, 82, 83, 84, 84},
{ 85, 85, 86, 87, 88, 89, 89},
{ 90, 90, 91, 92, 93, 94, 94},
{ 95, 95, 96, 97, 98, 99, 99},
{ 95, 95, 96, 97, 98, 99, 99}},
{{100, 100, 101, 102, 103, 104, 104},
{100, 100, 101, 102, 103, 104, 104},
{105, 105, 106, 107, 108, 109, 109},
{110, 110, 111, 112, 113, 114, 114},
{115, 115, 116, 117, 118, 119, 119},
{120, 120, 121, 122, 123, 124, 124},
{120, 120, 121, 122, 123, 124, 124}},
{{125, 125, 126, 127, 128, 129, 129},
{125, 125, 126, 127, 128, 129, 129},
{130, 130, 131, 132, 133, 134, 134},
{135, 135, 136, 137, 138, 139, 139},
{140, 140, 141, 142, 143, 144, 144},
{145, 145, 146, 147, 148, 149, 149},
{145, 145, 146, 147, 148, 149, 149}}
}
}
});
myPad->getOperator()->associateInput(0,myInput);
myPad->getOperator()->setDataType(DataType::Int32);
myPad->getOperator()->setBackend("cpu");
myPad->forward();
// myPad->getOperator()->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == *myOutput);
}
SECTION("Pad Reflect") {
std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Reflect);
auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator());
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
{
{
{{ 0, 1, 2, 3, 4},
{ 5, 6, 7, 8, 9},
{ 10, 11, 12, 13, 14},
{ 15, 16, 17, 18, 19},
{ 20, 21, 22, 23, 24}},
{{ 25, 26, 27, 28, 29},
{ 30, 31, 32, 33, 34},
{ 35, 36, 37, 38, 39},
{ 40, 41, 42, 43, 44},
{ 45, 46, 47, 48, 49}},
{{ 50, 51, 52, 53, 54},
{ 55, 56, 57, 58, 59},
{ 60, 61, 62, 63, 64},
{ 65, 66, 67, 68, 69},
{ 70, 71, 72, 73, 74}}
},
{
{{ 75, 76, 77, 78, 79},
{ 80, 81, 82, 83, 84},
{ 85, 86, 87, 88, 89},
{ 90, 91, 92, 93, 94},
{ 95, 96, 97, 98, 99}},
{{100, 101, 102, 103, 104},
{105, 106, 107, 108, 109},
{110, 111, 112, 113, 114},
{115, 116, 117, 118, 119},
{120, 121, 122, 123, 124}},
{{125, 126, 127, 128, 129},
{130, 131, 132, 133, 134},
{135, 136, 137, 138, 139},
{140, 141, 142, 143, 144},
{145, 146, 147, 148, 149}}
}
}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW
{
{
{
{ 6, 5, 6, 7, 8, 9, 5},
{ 1, 0, 1, 2, 3, 4, 0},
{ 6, 5, 6, 7, 8, 9, 5},
{ 11, 10, 11, 12, 13, 14, 10},
{ 16, 15, 16, 17, 18, 19, 15},
{ 21, 20, 21, 22, 23, 24, 20},
{ 1, 0, 1, 2, 3, 4, 0}
},
{
{ 31, 30, 31, 32, 33, 34, 30},
{ 26, 25, 26, 27, 28, 29, 25},
{ 31, 30, 31, 32, 33, 34, 30},
{ 36, 35, 36, 37, 38, 39, 35},
{ 41, 40, 41, 42, 43, 44, 40},
{ 46, 45, 46, 47, 48, 49, 45},
{ 26, 25, 26, 27, 28, 29, 25}
},
{
{ 56, 55, 56, 57, 58, 59, 55},
{ 51, 50, 51, 52, 53, 54, 50},
{ 56, 55, 56, 57, 58, 59, 55},
{ 61, 60, 61, 62, 63, 64, 60},
{ 66, 65, 66, 67, 68, 69, 65},
{ 71, 70, 71, 72, 73, 74, 70},
{ 51, 50, 51, 52, 53, 54, 50}
}
},
{
{
{ 81, 80, 81, 82, 83, 84, 80},
{ 76, 75, 76, 77, 78, 79, 75},
{ 81, 80, 81, 82, 83, 84, 80},
{ 86, 85, 86, 87, 88, 89, 85},
{ 91, 90, 91, 92, 93, 94, 90},
{ 96, 95, 96, 97, 98, 99, 95},
{ 76, 75, 76, 77, 78, 79, 75}
},
{
{ 106, 105, 106, 107, 108, 109, 105},
{ 101, 100, 101, 102, 103, 104, 100},
{ 106, 105, 106, 107, 108, 109, 105},
{ 111, 110, 111, 112, 113, 114, 110},
{ 116, 115, 116, 117, 118, 119, 115},
{ 121, 120, 121, 122, 123, 124, 120},
{ 101, 100, 101, 102, 103, 104, 100}
},
{
{ 131, 130, 131, 132, 133, 134, 130},
{ 126, 125, 126, 127, 128, 129, 125},
{ 131, 130, 131, 132, 133, 134, 130},
{ 136, 135, 136, 137, 138, 139, 135},
{ 141, 140, 141, 142, 143, 144, 140},
{ 146, 145, 146, 147, 148, 149, 145},
{ 126, 125, 126, 127, 128, 129, 125}
}
}
}
});
myPad->getOperator()->associateInput(0,myInput);
myPad->getOperator()->setDataType(DataType::Int32);
myPad->getOperator()->setBackend("cpu");
myPad->forward();
op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == *myOutput);
}
SECTION("Pad Wrap") {
std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Wrap);
auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator());
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
{
{
{{ 0, 1, 2, 3, 4},
{ 5, 6, 7, 8, 9},
{ 10, 11, 12, 13, 14},
{ 15, 16, 17, 18, 19},
{ 20, 21, 22, 23, 24}},
{{ 25, 26, 27, 28, 29},
{ 30, 31, 32, 33, 34},
{ 35, 36, 37, 38, 39},
{ 40, 41, 42, 43, 44},
{ 45, 46, 47, 48, 49}},
{{ 50, 51, 52, 53, 54},
{ 55, 56, 57, 58, 59},
{ 60, 61, 62, 63, 64},
{ 65, 66, 67, 68, 69},
{ 70, 71, 72, 73, 74}}
},
{
{{ 75, 76, 77, 78, 79},
{ 80, 81, 82, 83, 84},
{ 85, 86, 87, 88, 89},
{ 90, 91, 92, 93, 94},
{ 95, 96, 97, 98, 99}},
{{100, 101, 102, 103, 104},
{105, 106, 107, 108, 109},
{110, 111, 112, 113, 114},
{115, 116, 117, 118, 119},
{120, 121, 122, 123, 124}},
{{125, 126, 127, 128, 129},
{130, 131, 132, 133, 134},
{135, 136, 137, 138, 139},
{140, 141, 142, 143, 144},
{145, 146, 147, 148, 149}}
}
}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW
{
{
{{ 24, 20, 21, 22, 23, 24, 20},
{ 4, 0, 1, 2, 3, 4, 0},
{ 9, 5, 6, 7, 8, 9, 5},
{ 14, 10, 11, 12, 13, 14, 10},
{ 19, 15, 16, 17, 18, 19, 15},
{ 24, 20, 21, 22, 23, 24, 20},
{ 4, 0, 1, 2, 3, 4, 0}},
{{ 49, 45, 46, 47, 48, 49, 45},
{ 29, 25, 26, 27, 28, 29, 25},
{ 34, 30, 31, 32, 33, 34, 30},
{ 39, 35, 36, 37, 38, 39, 35},
{ 44, 40, 41, 42, 43, 44, 40},
{ 49, 45, 46, 47, 48, 49, 45},
{ 29, 25, 26, 27, 28, 29, 25}},
{{ 74, 70, 71, 72, 73, 74, 70},
{ 54, 50, 51, 52, 53, 54, 50},
{ 59, 55, 56, 57, 58, 59, 55},
{ 64, 60, 61, 62, 63, 64, 60},
{ 69, 65, 66, 67, 68, 69, 65},
{ 74, 70, 71, 72, 73, 74, 70},
{ 54, 50, 51, 52, 53, 54, 50}}
},
{
{{ 99, 95, 96, 97, 98, 99, 95},
{ 79, 75, 76, 77, 78, 79, 75},
{ 84, 80, 81, 82, 83, 84, 80},
{ 89, 85, 86, 87, 88, 89, 85},
{ 94, 90, 91, 92, 93, 94, 90},
{ 99, 95, 96, 97, 98, 99, 95},
{ 79, 75, 76, 77, 78, 79, 75}},
{{124, 120, 121, 122, 123, 124, 120},
{104, 100, 101, 102, 103, 104, 100},
{109, 105, 106, 107, 108, 109, 105},
{114, 110, 111, 112, 113, 114, 110},
{119, 115, 116, 117, 118, 119, 115},
{124, 120, 121, 122, 123, 124, 120},
{104, 100, 101, 102, 103, 104, 100}},
{{149, 145, 146, 147, 148, 149, 145},
{129, 125, 126, 127, 128, 129, 125},
{134, 130, 131, 132, 133, 134, 130},
{139, 135, 136, 137, 138, 139, 135},
{144, 140, 141, 142, 143, 144, 140},
{149, 145, 146, 147, 148, 149, 145},
{129, 125, 126, 127, 128, 129, 125}}
}
}
});
myPad->getOperator()->associateInput(0,myInput);
myPad->getOperator()->setDataType(DataType::Int32);
myPad->getOperator()->setBackend("cpu");
myPad->forward();
// myPad->getOperator()->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == *myOutput);
}
}
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <memory>
#include <catch2/catch_test_macros.hpp>
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/PaddedConvImpl.hpp"
#include "aidge/data/DataType.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/graph/Node.hpp"
#include "aidge/operator/MetaOperatorDefs.hpp"
using namespace Aidge;
TEST_CASE("[cpu/operator] PaddedConv(forward)", "[PaddedConv][CPU]") {
SECTION("Classic Conv") {
std::shared_ptr<Node> myConv = PaddedConv(3,4,{3,3}, "myconv");
auto op = std::static_pointer_cast<OperatorTensor>(myConv -> getOperator());
std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> {
{
{
{{ 0, 1, 2},
{ 3, 4, 5},
{ 6, 7, 8}},
{{ 9, 10, 11},
{ 12, 13, 14},
{ 15, 16, 17}},
{{ 18, 19, 20},
{ 21, 22, 23},
{ 24, 25, 26}}
},
{
{{ 27, 28, 29},
{ 30, 31, 32},
{ 33, 34, 35}},
{{ 36, 37, 38},
{ 39, 40, 41},
{ 42, 43, 44}},
{{ 45, 46, 47},
{ 48, 49, 50},
{ 51, 52, 53}}
},
{
{{ 54, 55, 56},
{ 57, 58, 59},
{ 60, 61, 62}},
{{ 63, 64, 65},
{ 66, 67, 68},
{ 69, 70, 71}},
{{ 72, 73, 74},
{ 75, 76, 77},
{ 78, 79, 80}}
},
{
{{ 81, 82, 83},
{ 84, 85, 86},
{ 87, 88, 89}},
{{ 90, 91, 92},
{ 93, 94, 95},
{ 96, 97, 98}},
{{ 99, 100, 101},
{102, 103, 104},
{105, 106, 107}}
}
}
});
std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}});
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
{
{
{{ 0, 1, 2, 3, 4},
{ 5, 6, 7, 8, 9},
{ 10, 11, 12, 13, 14},
{ 15, 16, 17, 18, 19},
{ 20, 21, 22, 23, 24}},
{{ 25, 26, 27, 28, 29},
{ 30, 31, 32, 33, 34},
{ 35, 36, 37, 38, 39},
{ 40, 41, 42, 43, 44},
{ 45, 46, 47, 48, 49}},
{{ 50, 51, 52, 53, 54},
{ 55, 56, 57, 58, 59},
{ 60, 61, 62, 63, 64},
{ 65, 66, 67, 68, 69},
{ 70, 71, 72, 73, 74}}
},
{
{{ 75, 76, 77, 78, 79},
{ 80, 81, 82, 83, 84},
{ 85, 86, 87, 88, 89},
{ 90, 91, 92, 93, 94},
{ 95, 96, 97, 98, 99}},
{{100, 101, 102, 103, 104},
{105, 106, 107, 108, 109},
{110, 111, 112, 113, 114},
{115, 116, 117, 118, 119},
{120, 121, 122, 123, 124}},
{{125, 126, 127, 128, 129},
{130, 131, 132, 133, 134},
{135, 136, 137, 138, 139},
{140, 141, 142, 143, 144},
{145, 146, 147, 148, 149}}
}
}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> {
{
{
{{ 15226, 15577, 15928},
{ 16981, 17332, 17683},
{ 18736, 19087, 19438}},
{{ 37818, 38898, 39978},
{ 43218, 44298, 45378},
{ 48618, 49698, 50778}},
{{ 60426, 62235, 64044},
{ 69471, 71280, 73089},
{ 78516, 80325, 82134}},
{{ 83016, 85554, 88092},
{ 95706, 98244, 100782},
{108396, 110934, 113472}}
},
{
{{ 41551, 41902, 42253},
{ 43306, 43657, 44008},
{ 45061, 45412, 45763}},
{{118818, 119898, 120978},
{124218, 125298, 126378},
{129618, 130698, 131778}},
{{196101, 197910, 199719},
{205146, 206955, 208764},
{214191, 216000, 217809}},
{{273366, 275904, 278442},
{286056, 288594, 291132},
{298746, 301284, 303822}}
}
}
});
myConv->getOperator()->associateInput(0,myInput);
myConv->input(1).first->getOperator()->setOutput(0, myWeights);
myConv->input(2).first->getOperator()->setOutput(0, myBias);
auto g = getConnectedGraphView(myConv);
g->setDataType(DataType::Int32);
g->setBackend("cpu");
auto scheduler = SequentialScheduler(g);
scheduler.forward();
REQUIRE(*(op->getOutput(0)) == *myOutput);
}
SECTION("test Padding") {
std::shared_ptr<Node> myConv = PaddedConv(3,4,{3,3}, "myconv", {1,1}, {1,1,1,1});
auto op = std::static_pointer_cast<OperatorTensor>(myConv -> getOperator());
std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> {
{
{
{{ 0, 1, 2},
{ 3, 4, 5},
{ 6, 7, 8}},
{{ 9, 10, 11},
{ 12, 13, 14},
{ 15, 16, 17}},
{{ 18, 19, 20},
{ 21, 22, 23},
{ 24, 25, 26}}
},
{
{{ 27, 28, 29},
{ 30, 31, 32},
{ 33, 34, 35}},
{{ 36, 37, 38},
{ 39, 40, 41},
{ 42, 43, 44}},
{{ 45, 46, 47},
{ 48, 49, 50},
{ 51, 52, 53}}
},
{
{{ 54, 55, 56},
{ 57, 58, 59},
{ 60, 61, 62}},
{{ 63, 64, 65},
{ 66, 67, 68},
{ 69, 70, 71}},
{{ 72, 73, 74},
{ 75, 76, 77},
{ 78, 79, 80}}
},
{
{{ 81, 82, 83},
{ 84, 85, 86},
{ 87, 88, 89}},
{{ 90, 91, 92},
{ 93, 94, 95},
{ 96, 97, 98}},
{{ 99, 100, 101},
{102, 103, 104},
{105, 106, 107}}
}
}
});
std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}});
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
{
{
{{ 0, 1, 2, 3, 4},
{ 5, 6, 7, 8, 9},
{ 10, 11, 12, 13, 14},
{ 15, 16, 17, 18, 19},
{ 20, 21, 22, 23, 24}},
{{ 25, 26, 27, 28, 29},
{ 30, 31, 32, 33, 34},
{ 35, 36, 37, 38, 39},
{ 40, 41, 42, 43, 44},
{ 45, 46, 47, 48, 49}},
{{ 50, 51, 52, 53, 54},
{ 55, 56, 57, 58, 59},
{ 60, 61, 62, 63, 64},
{ 65, 66, 67, 68, 69},
{ 70, 71, 72, 73, 74}}
},
{
{{ 75, 76, 77, 78, 79},
{ 80, 81, 82, 83, 84},
{ 85, 86, 87, 88, 89},
{ 90, 91, 92, 93, 94},
{ 95, 96, 97, 98, 99}},
{{100, 101, 102, 103, 104},
{105, 106, 107, 108, 109},
{110, 111, 112, 113, 114},
{115, 116, 117, 118, 119},
{120, 121, 122, 123, 124}},
{{125, 126, 127, 128, 129},
{130, 131, 132, 133, 134},
{135, 136, 137, 138, 139},
{140, 141, 142, 143, 144},
{145, 146, 147, 148, 149}}
}
}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,5,5> {
{
{
{{ 6895, 10225, 10486, 10747, 7063},
{ 10303, 15226, 15577, 15928, 10429},
{ 11518, 16981, 17332, 17683, 11554},
{ 12733, 18736, 19087, 19438, 12679},
{ 8047, 11791, 11998, 12205, 7927}},
{{ 15960, 24069, 24816, 25563, 17100},
{ 25119, 37818, 38898, 39978, 26703},
{ 28764, 43218, 44298, 45378, 30258},
{ 32409, 48618, 49698, 50778, 33813},
{ 21972, 32925, 33618, 34311, 22824}},
{{ 25041, 37929, 39162, 40395, 27153},
{ 39951, 60426, 62235, 64044, 42993},
{ 46026, 69471, 71280, 73089, 48978},
{ 52101, 78516, 80325, 82134, 54963},
{ 35913, 54075, 55254, 56433, 37737}},
{{ 34104, 51771, 53490, 55209, 37188},
{ 54765, 83016, 85554, 88092, 59265},
{ 63270, 95706, 98244, 100782, 67680},
{ 71775, 108396, 110934, 113472, 76095},
{ 49836, 75207, 76872, 78537, 52632}}
},
{
{{ 20395, 29800, 30061, 30322, 19663},
{ 28528, 41551, 41902, 42253, 27304},
{ 29743, 43306, 43657, 44008, 28429},
{ 30958, 45061, 45412, 45763, 29554},
{ 18847, 27316, 27523, 27730, 17827}},
{{ 53760, 80094, 80841, 81588, 54000},
{ 79794, 118818, 119898, 120978, 80028},
{ 83439, 124218, 125298, 126378, 83583},
{ 87084, 129618, 130698, 131778, 87138},
{ 57072, 84900, 85593, 86286, 57024}},
{{ 87141, 130404, 131637, 132870, 88353},
{131076, 196101, 197910, 199719, 132768},
{137151, 205146, 206955, 208764, 138753},
{143226, 214191, 216000, 217809, 144738},
{ 95313, 142500, 143679, 144858, 96237}},
{{120504, 180696, 182415, 184134, 122688},
{182340, 273366, 275904, 278442, 185490},
{190845, 286056, 288594, 291132, 193905},
{199350, 298746, 301284, 303822, 202320},
{133536, 200082, 201747, 203412, 135432}}
}
}
});
myConv->getOperator()->associateInput(0,myInput);
myConv->input(1).first->getOperator()->setOutput(0, myWeights);
myConv->input(2).first->getOperator()->setOutput(0, myBias);
auto g = getConnectedGraphView(myConv);
g->setDataType(DataType::Int32);
g->setBackend("cpu");
auto scheduler = SequentialScheduler(g);
scheduler.forward();
REQUIRE(*(op->getOutput(0)) == *myOutput);
}
}
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <chrono> // std::micro, std::chrono::time_point,
// std::chrono::system_clock, std::chrono::duration
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <functional> // std::multiplies
#include <memory>
#include <numeric> // std::accumulate
#include <random> // std::random_device, std::mt19937
// std::uniform_int_distribution, std::uniform_real_distribution
#include <vector>
#include <catch2/catch_test_macros.hpp>
#include <fmt/core.h>
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/PowImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Pow.hpp"
#include "aidge/utils/ArrayHelpers.hpp"
#include "aidge/utils/TensorUtils.hpp"
namespace Aidge {
TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
std::uniform_int_distribution<int> boolDist(0,1);
// Create MatPow Operator
std::shared_ptr<Node> myPow = Pow();
auto op = std::static_pointer_cast<OperatorTensor>(myPow-> getOperator());
op->setDataType(DataType::Float32);
op->setBackend("cpu");
// Create 2 input Tensors
std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
op->associateInput(0,T0);
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
op -> associateInput(1,T1);
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
// Create results Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>();
Tres->setDataType(DataType::Float32);
Tres->setBackend("cpu");
// To measure execution time of 'MatPow_Op::forward()' member function call
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration{};
SECTION("PowImpl_cpu::forward()") {
SECTION("Scalar / Scalar") {
}
SECTION("Scalar / +1-D Tensor") {
}
SECTION("+1-D Tensor / +1-D Tensor - same dimensions") {
std::size_t number_of_operation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
const std::size_t nbDims = nbDimsDist(gen);
std::vector<std::size_t> dims;
for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen));
}
const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
// without broadcasting
float* array0 = new float[nb_elements];
float* array1 = new float[nb_elements];
float* result = new float[nb_elements];
for (std::size_t i = 0; i < nb_elements; ++i) {
array0[i] = valueDist(gen);
array1[i] = valueDist(gen);
result[i] = std::pow(array0[i], array1[i]);
}
// input0
T0->resize(dims);
T0 -> getImpl() -> setRawPtr(array0, nb_elements);
// input1
T1->resize(dims);
T1 -> getImpl() -> setRawPtr(array1, nb_elements);
// results
Tres->resize(dims);
Tres -> getImpl() -> setRawPtr(result, nb_elements);
op->forwardDims();
start = std::chrono::system_clock::now();
myPow->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
// with broadcasting
}
Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count()));
Log::info("total time: {} μs\n", duration.count());
}
SECTION("+1-D Tensor / +1-D Tensor - broadcasting") {
std::size_t number_of_operation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
// handle dimensions, replace some dimensions with '1' to get broadcasting
constexpr std::size_t nbDims = 4;
std::vector<std::size_t> dims;
for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen));
}
std::vector<std::size_t> dims0 = dims;
std::vector<std::size_t> dims1 = dims;
std::vector<std::size_t> dimsOut = dims;
for (std::size_t i = 0; i < nbDims; ++i) {
if (boolDist(gen)) {
dims0[i] = 1;
}
if (boolDist(gen)) {
dims1[i] = 1;
}
dimsOut[i] = (dims0[i] == 1) ? dims1[i] : dims0[i];
}
// create arrays and fill them with random values
float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]];
float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) {
array0[i] = valueDist(gen);
}
for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) {
array1[i] = valueDist(gen);
}
// compute true result
const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1};
for (std::size_t a = 0; a < dimsOut[0]; ++a) {
for (std::size_t b = 0; b < dimsOut[1]; ++b) {
const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
+ strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0)
+ strides1[1] * ((dims1[1] > 1) ? b : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c) {
const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
for (std::size_t d = 0; d < dimsOut[3]; ++d) {
std::size_t idx0 = idx0_0
+ strides0[2] * ((dims0[2] > 1) ? c : 0)
+ ((dims0[3] > 1) ? d : 0);
std::size_t idx1 = idx1_0
+ strides1[2] * ((dims1[2] > 1) ? c : 0)
+ ((dims1[3] > 1) ? d : 0);
result[idx_out + d] = std::pow(array0[idx0], array1[idx1]);
// std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " ** " << array1[idx1] << " -> " << idx_out + d << std::endl;
}
}
}
}
// conversion to Aidge::Tensors
// input0
T0->resize(dims0);
T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
// input1
T1->resize(dims1);
T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]);
// results
Tres->resize(dimsOut);
Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
// compute result
op->forwardDims();
start = std::chrono::system_clock::now();
myPow->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
// comparison between truth and computed result
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
}
Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count()));
Log::info("total time: {} μs\n", duration.count());
}
SECTION("+1-D Tensor / 1-D Tensor") {
std::size_t number_of_operation = 0;
std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(std::size_t(1), std::size_t(3));
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
// handle dimensions
constexpr std::size_t nbDims = 4;
std::vector<std::size_t> dims0(4);
for (std::size_t i = 0; i < nbDims; ++i) {
dims0[i] = dimSizeDist(gen);
}
std::vector<std::size_t> dimsOut = dims0;
std::vector<std::size_t> dims1 = dims0;
for (std::size_t i = 0; i < nbDims; ++i) {
if (boolDist(gen)) {
dims1[i] = 1;
}
}
dims1.erase(dims1.cbegin(), dims1.cbegin() + nbRemovedDimsDist(gen));
// create arrays and fill them with random values
float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
std::size_t array1_size = std::accumulate(dims1.cbegin(), dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
float* array1 = new float[array1_size];
float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
for (std::size_t i = 0; i < (dims0[0]*dims0[1]*dims0[2]*dims0[3]); ++i) {
array0[i] = valueDist(gen);
}
for (std::size_t i = 0; i < array1_size; ++i) {
array1[i] = valueDist(gen);
}
// compute true result
auto dims1_tmp = dims1;
dims1_tmp.insert(dims1_tmp.cbegin(), 4 - dims1_tmp.size(), std::size_t(1));
const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
const std::size_t strides1[nbDims] = {dims1_tmp[1]*dims1_tmp[2]*dims1_tmp[3], dims1_tmp[2]*dims1_tmp[3], dims1_tmp[3], 1};
for (std::size_t a = 0; a < dimsOut[0]; ++a) {
for (std::size_t b = 0; b < dimsOut[1]; ++b) {
const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
+ strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t idx1_0 = strides1[0] * ((dims1_tmp[0] > 1) ? a : 0)
+ strides1[1] * ((dims1_tmp[1] > 1) ? b : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c) {
const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
for (std::size_t d = 0; d < dimsOut[3]; ++d) {
std::size_t idx0 = idx0_0
+ strides0[2] * ((dims0[2] > 1) ? c : 0)
+ ((dims0[3] > 1) ? d : 0);
std::size_t idx1 = idx1_0
+ strides1[2] * ((dims1_tmp[2] > 1) ? c : 0)
+ ((dims1_tmp[3] > 1) ? d : 0);
result[idx_out + d] = std::pow(array0[idx0], array1[idx1]);
// std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " ** " << array1[idx1] << " -> " << idx_out + d << std::endl;
}
}
}
}
// conversion to Aidge::Tensors
// input0
T0->resize(dims0);
T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
// input1
T1->resize(dims1);
T1 -> getImpl() -> setRawPtr(array1, array1_size);
// results
Tres->resize(dimsOut);
Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
// compute result
op->forwardDims();
start = std::chrono::system_clock::now();
myPow->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
// comparison between truth and computed result
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
}
Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count()));
Log::info("total time: {} μs\n", duration.count());
}
}
SECTION("PowImpl_cpu::backward()") {
SECTION("3D Tensors") {
const auto input0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
{
{
{
{2.0, 3.0},
{4.0, 5.0}
},
{
{6.0, 7.0},
{8.0, 9.0}
}
}
}
));
const auto input1 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
{
{
{
{1.0, 2.0},
{3.0, 2.0}
},
{
{2.0, 3.0},
{1.0, 0.5}
}
}
}
));
const auto gradOut = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
{
{
{
{0.5, 1.0},
{1.5, 2.0}
},
{
{2.5, 3.0},
{3.5, 4.0}
}
}
}
));
const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
{
{
{
{0.50000000, 6.00000000},
{72.00000000, 20.00000000}
},
{
{30.00000000, 441.00000000},
{3.50000000, 0.66666669}
}
}
}
));
const auto expectedGrad1 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
{
{
{
{ 0.693147182, 9.88751030},
{1.33084259e+02, 8.04718933e+01}
},
{
{1.61258362e+02, 2.00234143e+03},
{5.82243652e+01, 2.63666954e+01}
}
}
}
));
for(const auto T: {input0, input1, gradOut, expectedGrad0, expectedGrad1})
{
T->setBackend("cpu") ;
T->setDataType(DataType::Float32);
}
std::shared_ptr<Node> powOp = Pow();
auto opr = std::static_pointer_cast<OperatorTensor>(powOp-> getOperator());
opr->setDataType(DataType::Float32);
opr->setBackend("cpu");
opr->associateInput(0, input0);
opr->associateInput(1, input1);
opr->getOutput(0)->setGrad(gradOut);
opr->forward();
powOp->backward();
REQUIRE(approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0));
REQUIRE(approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1));
}
SECTION("Broadcasting") {
const auto input0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
{
{
{
{1.0, 2.0, 3.0},
{4.0, 5.0, 6.0}
},
{
{1.5, 2.5, 3.5},
{4.5, 5.5, 6.5}
}
}
}
));
const auto input1 = std::make_shared<Tensor>(Array1D<float, 3>(
{
{0.1, 0.2, 0.3}
}
));
const auto gradOut = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
{
{
{
{1.0, 2.0, 3.0},
{4.0, 5.0, 6.0}
},
{
{6.0, 5.0, 4.0},
{3.0, 2.0, 1.0}
}
}
}
));
const Tensor expectedGrad0 = Array3D<float, 2, 2, 3>(
{
{
{
{0.10000000, 0.22973967, 0.41711676},
{0.11486985, 0.27594593, 0.51353097}
},
{
{0.41655189, 0.48044977, 0.49926791},
{0.07748720, 0.10227509, 0.08092485}
}
}
}
);
const Tensor expectedGrad1 = Array1D<float, 3>(
{
{14.14779854, 22.99299049, 33.56402588}
}
);
std::shared_ptr<Node> powOp = Pow();
auto opr = std::static_pointer_cast<OperatorTensor>(powOp-> getOperator());
opr->setDataType(DataType::Float32);
opr->setBackend("cpu");
opr->associateInput(0, input0);
opr->associateInput(1, input1);
opr->getOutput(0)->setGrad(gradOut);
powOp->forward();
powOp->backward();
REQUIRE(approxEq<float>(*(opr->getInput(0)->grad()), expectedGrad0));
REQUIRE(approxEq<float>(*(opr->getInput(1)->grad()), expectedGrad1));
}
}
}
} // namespace Aidge
......@@ -9,34 +9,34 @@
*
********************************************************************************/
#include <memory>
#include <catch2/catch_test_macros.hpp>
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/ReLUImpl.hpp"
#include "aidge/data/DataType.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/ReLU.hpp"
#include "aidge/backend/cpu.hpp"
#include <memory>
using namespace Aidge;
TEST_CASE("[cpu/operator] ReLU(forward)") {
TEST_CASE("[cpu/operator] ReLU(forward)", "[ReLU][CPU]") {
SECTION("1D Tensor") {
std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<int,10> {
{0, 1, 2,-3, 4,-5,-6, 7, 8, 9}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array1D<int,10> {
Tensor expectedOutput = Array1D<int,10> {
{0, 1, 2, 0, 4, 0, 0, 7, 8, 9}
});
};
std::shared_ptr<Node> myReLU = ReLU();
myReLU->getOperator()->setDatatype(DataType::Int32);
myReLU->getOperator()->setBackend("cpu");
myReLU->getOperator()->associateInput(0,input0);
myReLU->getOperator()->computeOutputDims();
myReLU->forward();
REQUIRE(*(myReLU->getOperator()->getOutput(0)) == *expectedOutput);
std::shared_ptr<ReLU_Op> op = std::make_shared<ReLU_Op>();
op->associateInput(0,input0);
op->setDataType(DataType::Int32);
op->setBackend("cpu");
op->forward();
REQUIRE(*(op->getOutput(0)) == expectedOutput);
}
SECTION("2D Tensor") {
......@@ -46,20 +46,19 @@ TEST_CASE("[cpu/operator] ReLU(forward)") {
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<int,2,10> {
Tensor expectedOutput = Array2D<int,2,10> {
{
{ 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
{ 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
}
});
};
std::shared_ptr<Node> myReLU = ReLU();
myReLU->getOperator()->setDatatype(DataType::Int32);
myReLU->getOperator()->setBackend("cpu");
myReLU->getOperator()->associateInput(0,input0);
myReLU->getOperator()->computeOutputDims();
myReLU->forward();
REQUIRE(*myReLU->getOperator()->getOutput(0) == *expectedOutput);
std::shared_ptr<ReLU_Op> op = std::make_shared<ReLU_Op>();
op->associateInput(0,input0);
op->setDataType(DataType::Int32);
op->setBackend("cpu");
op->forward();
REQUIRE(*op->getOutput(0) == expectedOutput);
}
SECTION("3D Tensor") {
......@@ -75,7 +74,7 @@ TEST_CASE("[cpu/operator] ReLU(forward)") {
}
}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,2,2,10> {
Tensor expectedOutput = Array3D<int,2,2,10> {
{
{
{ 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
......@@ -86,15 +85,14 @@ TEST_CASE("[cpu/operator] ReLU(forward)") {
{ 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
}
}
});
};
std::shared_ptr<Node> myReLU = ReLU();
myReLU->getOperator()->setDatatype(DataType::Int32);
myReLU->getOperator()->setBackend("cpu");
myReLU->getOperator()->associateInput(0,input0);
myReLU->getOperator()->computeOutputDims();
myReLU->forward();
REQUIRE(*(myReLU->getOperator()->getOutput(0)) == *expectedOutput);
std::shared_ptr<ReLU_Op> op = std::make_shared<ReLU_Op>();
op->associateInput(0,input0);
op->setDataType(DataType::Int32);
op->setBackend("cpu");
op->forward();
REQUIRE(*(op->getOutput(0)) == expectedOutput);
}
SECTION("4D Tensor") {
......@@ -122,7 +120,7 @@ TEST_CASE("[cpu/operator] ReLU(forward)") {
}
}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,2,2,2,10> {
Tensor expectedOutput = Array4D<int,2,2,2,10> {
{
{
{
......@@ -145,14 +143,13 @@ TEST_CASE("[cpu/operator] ReLU(forward)") {
}
}
}
});
};
std::shared_ptr<Node> myReLU = ReLU();
myReLU->getOperator()->setDatatype(DataType::Int32);
myReLU->getOperator()->setBackend("cpu");
myReLU->getOperator()->associateInput(0,input0);
myReLU->getOperator()->computeOutputDims();
myReLU->forward();
REQUIRE(*myReLU->getOperator()->getOutput(0) == *expectedOutput);
std::shared_ptr<ReLU_Op> op = std::make_shared<ReLU_Op>();
op->associateInput(0,input0);
op->setDataType(DataType::Int32);
op->setBackend("cpu");
op->forward();
REQUIRE(*op->getOutput(0) == expectedOutput);
}
}
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <algorithm> // std::fill
#include <cstddef> // std::size_t
#include <cstdint> // std::int32_t, std::uint16_t
#include <memory>
#include <random> // std::random_device, std::mt19937
// std::uniform_int_distribution, std::uniform_real_distribution
#include <vector>
#include <catch2/catch_test_macros.hpp>
#include <fmt/core.h>
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp"
#include "aidge/data/DataType.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/ReduceMean.hpp"
#include "aidge/operator/OperatorTensor.hpp"
#include "aidge/utils/TensorUtils.hpp"
using namespace Aidge;
TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") {
SECTION("ForwardDims")
{
constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
std::uniform_int_distribution<int> boolDist(0,1);
SECTION("KeepDims") {
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
DimSize_t nbDims = nbDimsDist(gen);
std::vector<DimSize_t> dims(nbDims);
std::vector<DimSize_t> expectedOutDims(nbDims);
std::vector<std::int32_t> axes;
for (std::size_t i = 0; i < nbDims; i++) {
dims[i] = dimSizeDist(gen);
expectedOutDims[i] = dims[i];
if(boolDist(gen)) {
axes.push_back(i);
expectedOutDims[i] = 1;
}
}
if (axes.empty()) { // Default behaviour if no axes are provided is to reduce all dimensions
std::fill(expectedOutDims.begin(), expectedOutDims.end(), 1);
}
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims);
myInput->setBackend("cpu");
myInput->setDataType(DataType::Float32);
myInput->zeros();
std::shared_ptr<Node> myReduceMean = ReduceMean(axes, true);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forwardDims();
const auto outputDims = op->getOutput(0)->dims();
REQUIRE(outputDims == expectedOutDims);
}
}
SECTION("Not KeepDims") {
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
DimSize_t nbDims = nbDimsDist(gen);
std::vector<DimSize_t> dims(nbDims);
std::vector<DimSize_t> expectedOutDims;
std::vector<std::int32_t> axes;
for (std::size_t i = 0; i < nbDims; i++) {
dims[i] = dimSizeDist(gen);
if(boolDist(gen)) {
axes.push_back(i);
}
else {
expectedOutDims.push_back(dims[i]);
}
}
if (axes.empty() || expectedOutDims.empty()) { // Default behaviour if no axes are provided is to reduce all dimensions
expectedOutDims = std::vector<DimSize_t>{1};
}
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims);
myInput->setBackend("cpu");
myInput->setDataType(DataType::Float32);
std::shared_ptr<Node> myReduceMean = ReduceMean(axes, false);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forwardDims();
const auto outputDims = op->getOutput(0)->dims();
REQUIRE(outputDims == expectedOutDims);
}
}
SECTION("NoopWithEmptyAxes") {
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
DimSize_t nbDims = nbDimsDist(gen);
std::vector<DimSize_t> dims(nbDims);
for (std::size_t i = 0; i < nbDims; i++) {
dims[i] = dimSizeDist(gen);
}
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims);
myInput->setBackend("cpu");
myInput->setDataType(DataType::Float32);
std::shared_ptr<Node> myReduceMean = ReduceMean(std::vector<int32_t>{}, false, true);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forwardDims();
const auto outputDims = op->getOutput(0)->dims();
REQUIRE(outputDims == dims);
}
}
SECTION("Not NoopWithEmptyAxes") {
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
DimSize_t nbDims = nbDimsDist(gen);
std::vector<DimSize_t> dims(nbDims);
for (std::size_t i = 0; i < nbDims; i++) {
dims[i] = dimSizeDist(gen);
}
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims);
myInput->setBackend("cpu");
myInput->setDataType(DataType::Float32);
std::shared_ptr<Node> myReduceMean = ReduceMean({}, false, false);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forwardDims();
REQUIRE(op->getOutput(0)->nbDims() == 1);
REQUIRE(op->getOutput(0)->size() == 1);
}
}
}
SECTION("KeepDims") {
SECTION("test 1") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
{
{
{ 5.0, 1.0 },
{ 20.0, 2.0 }
},
{
{ 30.0, 1.0 },
{ 40.0, 2.0 }
},
{
{ 55.0, 1.0 },
{ 60.0, 2.0 }
}
}
});
Tensor myOutput = Tensor(Array3D<float,3,1,2> {
{
{{ 12.5, 1.5 }},
{{ 35.0, 1.5 }},
{{ 57.5, 1.5 }}
}
});
std::shared_ptr<Node> myReduceMean = ReduceMean({1}, 1);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
myReduceMean->forward();
op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == myOutput);
}
SECTION("test 2") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,3,2> {
{
{
{ 0.0, 0.0 },
{ 1.0, 1.0 },
{ 2.0, 2.0 }
},
{
{ 3.0, 3.0 },
{ 4.0, 4.0 },
{ 5.0, 5.0 }
},
{
{ 6.0, 6.0 },
{ 7.0, 7.0 },
{ 8.0, 8.0 }
}
}
});
Tensor myOutput = Tensor(Array3D<float,3,1,1> {
{
{{ 1.0 }},
{{ 4.0 }},
{{ 7.0 }}
}
});
std::shared_ptr<Node> myReduceMean = ReduceMean({1, 2}, 1);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
myReduceMean->forward();
myOutput.print();
op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == myOutput);
}
}
SECTION("not_KeepDims") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
{
{
{ 5.0, 1.0 },
{ 20.0, 2.0 }
},
{
{ 30.0, 1.0 },
{ 40.0, 2.0 }
},
{
{ 55.0, 1.0 },
{ 60.0, 2.0 }
}
}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<float,3,2> {
{
{ 12.5, 1.5 },
{ 35.0, 1.5 },
{ 57.5, 1.5 }
}
});
std::shared_ptr<Node> myReduceMean = ReduceMean({1}, 0);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
myReduceMean->forward();
op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == *myOutput);
}
SECTION("all_axes") {
SECTION("1") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
{
{
{ 5.0, 1.0 },
{ 20.0, 2.0 }
},
{
{ 30.0, 1.0 },
{ 40.0, 2.0 }
},
{
{ 55.0, 1.0 },
{ 60.0, 2.0 }
}
}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> {
{18.25}
});
std::shared_ptr<Node> myReduceMean = ReduceMean({}, 0);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
myReduceMean->forward();
op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == *myOutput);
}
SECTION("2") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array2D<float,5,4> {
{{ 0.004232f, 0.105120f, 0.045124f, 0.009205f},
{ 0.000766f, 0.272162f, 0.503560f, 0.044163f},
{ 0.049755f, 0.000305f, 0.143634f, 0.013253f},
{ 0.096258f, 0.311231f, 0.358143f, 0.000452f},
{ 0.468617f, 0.015693f, 0.145316f, 0.000105f}}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> {
{0.1293547f}
});
std::shared_ptr<Node> myReduceMean = ReduceMean({}, 0);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
myReduceMean->forward();
REQUIRE(approxEq<float>(*(op->getOutput(0)), *myOutput));
}
SECTION("noop_with_empty_axes") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
{
{
{ 5.0, 1.0 },
{ 20.0, 2.0 }
},
{
{ 30.0, 1.0 },
{ 40.0, 2.0 }
},
{
{ 55.0, 1.0 },
{ 60.0, 2.0 }
}
}
});
std::shared_ptr<Node> myReduceMean = ReduceMean({}, 0, 1);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
myReduceMean->forward();
op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == *myInput);
}
}
}
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t, std::int32_t
#include <memory>
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include <vector>
#include <catch2/catch_test_macros.hpp>
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/data/Data.hpp" // DataType
#include "aidge/data/Tensor.hpp"
#include "aidge/graph/Node.hpp"
#include "aidge/operator/OperatorTensor.hpp"
#include "aidge/operator/ReduceSum.hpp"
#include "aidge/utils/TensorUtils.hpp"
#include "aidge/utils/Types.h"
using namespace Aidge;
TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") {
SECTION("ForwardDims")
{
constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
std::uniform_int_distribution<int> boolDist(0,1);
SECTION("KeepDims") {
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
DimSize_t nbDims = nbDimsDist(gen);
std::vector<DimSize_t> dims(nbDims);
std::vector<DimSize_t> expectedOutDims(nbDims);
std::vector<std::int32_t> axes;
for (std::size_t i = 0; i < nbDims; i++) {
dims[i] = dimSizeDist(gen);
expectedOutDims[i] = dims[i];
if(boolDist(gen)) {
axes.push_back(i);
expectedOutDims[i] = 1;
}
}
if (axes.empty()) { // Default behaviour if no axes are provided is to reduce all dimensions
std::fill(expectedOutDims.begin(), expectedOutDims.end(), 1);
}
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims);
myInput->setBackend("cpu");
myInput->setDataType(DataType::Float32);
myInput->zeros();
std::shared_ptr<Node> myReduceSum = ReduceSum(axes, true);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forwardDims();
const auto outputDims = op->getOutput(0)->dims();
REQUIRE(outputDims == expectedOutDims);
}
}
SECTION("Not KeepDims") {
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
DimSize_t nbDims = nbDimsDist(gen);
std::vector<DimSize_t> dims(nbDims);
std::vector<DimSize_t> expectedOutDims;
std::vector<std::int32_t> axes;
for (std::size_t i = 0; i < nbDims; i++) {
dims[i] = dimSizeDist(gen);
if(boolDist(gen)) {
axes.push_back(i);
}
else {
expectedOutDims.push_back(dims[i]);
}
}
if (axes.empty() || expectedOutDims.empty()) { // Default behaviour if no axes are provided is to reduce all dimensions
expectedOutDims = std::vector<DimSize_t>{1};
}
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims);
myInput->setBackend("cpu");
myInput->setDataType(DataType::Float32);
std::shared_ptr<Node> myReduceSum = ReduceSum(axes, false);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forwardDims();
const auto outputDims = op->getOutput(0)->dims();
REQUIRE(outputDims == expectedOutDims);
}
}
SECTION("NoopWithEmptyAxes") {
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
DimSize_t nbDims = nbDimsDist(gen);
std::vector<DimSize_t> dims(nbDims);
for (std::size_t i = 0; i < nbDims; i++) {
dims[i] = dimSizeDist(gen);
}
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims);
myInput->setBackend("cpu");
myInput->setDataType(DataType::Float32);
std::shared_ptr<Node> myReduceSum = ReduceSum(std::vector<std::int32_t>{}, false, true);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forwardDims();
const auto outputDims = op->getOutput(0)->dims();
REQUIRE(outputDims == dims);
}
}
SECTION("Not NoopWithEmptyAxes") {
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
DimSize_t nbDims = nbDimsDist(gen);
std::vector<DimSize_t> dims(nbDims);
for (std::size_t i = 0; i < nbDims; i++) {
dims[i] = dimSizeDist(gen);
}
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims);
myInput->setBackend("cpu");
myInput->setDataType(DataType::Float32);
std::shared_ptr<Node> myReduceSum = ReduceSum({}, false, false);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forwardDims();
REQUIRE(op->getOutput(0)->nbDims() == 1);
REQUIRE(op->getOutput(0)->size() == 1);
}
}
}
SECTION("KeepDims") {
SECTION("test 1") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
{
{
{ 5.0, 1.0 },
{ 20.0, 2.0 }
},
{
{ 30.0, 1.0 },
{ 40.0, 2.0 }
},
{
{ 55.0, 1.0 },
{ 60.0, 2.0 }
}
}
});
Tensor myOutput = Tensor(Array3D<float,3,1,2> {
{
{{ 25.0, 3.0 }},
{{ 70.0, 3.0 }},
{{ 115.0, 3.0 }}
}
});
std::shared_ptr<Node> myReduceSum = ReduceSum({1}, 1);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
myReduceSum->forward();
op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == myOutput);
}
SECTION("test 2") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,3,2> {
{
{
{ 0.0, 0.0 },
{ 1.0, 1.0 },
{ 2.0, 2.0 }
},
{
{ 3.0, 3.0 },
{ 4.0, 4.0 },
{ 5.0, 5.0 }
},
{
{ 6.0, 6.0 },
{ 7.0, 7.0 },
{ 8.0, 8.0 }
}
}
});
Tensor myOutput = Tensor(Array3D<float,3,1,1> {
{
{{ 6.0 }},
{{ 24.0 }},
{{ 42.0 }}
}
});
std::shared_ptr<Node> myReduceSum = ReduceSum({1, 2}, 1);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
myReduceSum->forward();
myOutput.print();
op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == myOutput);
}
}
SECTION("not_KeepDims") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
{
{
{ 5.0, 1.0 },
{ 20.0, 2.0 }
},
{
{ 30.0, 1.0 },
{ 40.0, 2.0 }
},
{
{ 55.0, 1.0 },
{ 60.0, 2.0 }
}
}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<float,3,2> {
{
{ 25.0, 3.0 },
{ 70.0, 3.0 },
{ 115.0, 3.0 }
}
});
std::shared_ptr<Node> myReduceSum = ReduceSum({1}, 0);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
myReduceSum->forward();
op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == *myOutput);
}
SECTION("all_axes") {
SECTION("1") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
{
{
{ 5.0, 1.0 },
{ 20.0, 2.0 }
},
{
{ 30.0, 1.0 },
{ 40.0, 2.0 }
},
{
{ 55.0, 1.0 },
{ 60.0, 2.0 }
}
}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> {
{219.0}
});
std::shared_ptr<Node> myReduceSum = ReduceSum({}, 0);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
myReduceSum->forward();
op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == *myOutput);
}
SECTION("2") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array2D<float,5,4> {
{{ 0.004232f, 0.105120f, 0.045124f, 0.009205f},
{ 0.000766f, 0.272162f, 0.503560f, 0.044163f},
{ 0.049755f, 0.000305f, 0.143634f, 0.013253f},
{ 0.096258f, 0.311231f, 0.358143f, 0.000452f},
{ 0.468617f, 0.015693f, 0.145316f, 0.000105f}}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> {
{2.587094f}
});
std::shared_ptr<Node> myReduceSum = ReduceSum({0, 1}, 0);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
myReduceSum->forward();
op->getOutput(0)->print();
REQUIRE(approxEq<float>(*(op->getOutput(0)), *myOutput));
}
SECTION("noop_with_empty_axes") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
{
{
{ 5.0, 1.0 },
{ 20.0, 2.0 }
},
{
{ 30.0, 1.0 },
{ 40.0, 2.0 }
},
{
{ 55.0, 1.0 },
{ 60.0, 2.0 }
}
}
});
std::shared_ptr<Node> myReduceSum = ReduceSum({}, 0, 1);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
myReduceSum->forward();
op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == *myInput);
}
}
}
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <cstdint>
#include <memory>
#include <aidge/data/Data.hpp>
#include <aidge/data/Interpolation.hpp>
#include <aidge/data/half.hpp>
#include <aidge/operator/Pad.hpp>
#include <aidge/utils/ArrayHelpers.hpp>
#include <catch2/catch_test_macros.hpp>
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/OperatorTensor.hpp"
#include "aidge/operator/Resize.hpp"
#include "aidge/utils/TensorUtils.hpp"
namespace Aidge {
TEST_CASE("[cpu/operator] Resize(forward)", "[Resize][CPU]") {
Log::setConsoleLevel(Log::Level::Debug);
SECTION("Nearest") {
SECTION("Ceil") {
std::shared_ptr<Tensor> input_tensor = std::make_shared<Tensor>(Array4D<std::int32_t, 1, 1, 2, 2>{{
{
{
{ 1, 2},
{ 3, 4}
}
}
}});
Tensor expected_out_tensor = Tensor(Array4D<std::int32_t, 1, 1, 4, 4>{{
{
{
{ 1, 1, 1, 2},
{ 1, 1, 1, 2},
{ 1, 1, 1, 2},
{ 3, 3, 3, 4}
}
}
}});
std::vector<float> scales = {1.0f, 1.0f, 2.0f, 2.0f};
auto resize_node = Resize(scales, {}, Interpolation::CoordinateTransformation::HalfPixel, Interpolation::Mode::Floor);
auto op = std::static_pointer_cast<Resize_Op>(resize_node->getOperator());
op->associateInput(0, input_tensor);
op->setDataType(DataType::Int32);
op->setBackend("cpu");
op->forwardDims(true);
op->forward();
op->getOutput(0)->print();
expected_out_tensor.print();
CHECK(*(op->getOutput(0)) == expected_out_tensor);
}
}
SECTION("1-sized input tensor (upscaling)") {
std::shared_ptr<Tensor> input_tensor = std::make_shared<Tensor>(Array4D<float, 1, 1, 1, 1>{{{{{0.417022}}}}});
std::vector<std::size_t> sizes = {1, 1, 2, 2};
auto resize_node = Resize({}, sizes, Interpolation::CoordinateTransformation::HalfPixel, Interpolation::Mode::Linear);
auto op = std::static_pointer_cast<Resize_Op>(resize_node->getOperator());
op->associateInput(0, input_tensor);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forwardDims(true);
op->forward();
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float, 1, 1, 2, 2>{
{{{{0.417022, 0.417022}, {0.417022, 0.417022}}}}});
op->getOutput(0)->print();
CHECK(approxEq<float>(*op->getOutput(0), *expectedOutput) == true);
}
SECTION("Upscaling from 5x5 to 10x10 (linear)") {
std::shared_ptr<Tensor> input_tensor = std::make_shared<Tensor>(
Array4D<float, 1, 1, 5, 5>{{{{{7.20324516e-01,
1.14374816e-04,
3.02332580e-01,
1.46755889e-01,
9.23385918e-02},
{1.86260208e-01,
3.45560730e-01,
3.96767467e-01,
5.38816750e-01,
4.19194520e-01},
{6.85219526e-01,
2.04452246e-01,
8.78117442e-01,
2.73875929e-02,
6.70467496e-01},
{4.17304814e-01,
5.58689833e-01,
1.40386939e-01,
1.98101491e-01,
8.00744593e-01},
{9.68261600e-01,
3.13424170e-01,
6.92322612e-01,
8.76389146e-01,
8.94606650e-01}}}}}
);
std::vector<std::size_t> sizes = {1, 1, 10, 10};
auto resize_node = Resize({}, sizes, Interpolation::CoordinateTransformation::Asymmetric, Interpolation::Mode::Linear);
auto op = std::static_pointer_cast<Resize_Op>(resize_node->getOperator());
op->associateInput(0, input_tensor);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->forwardDims(true);
op->forward();
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
Array4D<float, 1, 1, 10, 10>{{{{{7.20324516e-01,
3.60219449e-01,
1.14374816e-04,
1.51223481e-01,
3.02332580e-01,
2.24544227e-01,
1.46755889e-01,
1.19547240e-01,
9.23385918e-02,
9.23385918e-02},
{4.53292370e-01,
3.13064963e-01,
1.72837555e-01,
2.61193782e-01,
3.49550009e-01,
3.46168160e-01,
3.42786312e-01,
2.99276441e-01,
2.55766571e-01,
2.55766571e-01},
{1.86260208e-01,
2.65910476e-01,
3.45560730e-01,
3.71164083e-01,
3.96767467e-01,
4.67792094e-01,
5.38816750e-01,
4.79005635e-01,
4.19194520e-01,
4.19194520e-01},
{4.35739875e-01,
3.55373204e-01,
2.75006473e-01,
4.56224471e-01,
6.37442470e-01,
4.60272312e-01,
2.83102185e-01,
4.13966596e-01,
5.44831038e-01,
5.44831038e-01},
{6.85219526e-01,
4.44835901e-01,
2.04452246e-01,
5.41284859e-01,
8.78117442e-01,
4.52752531e-01,
2.73875929e-02,
3.48927557e-01,
6.70467496e-01,
6.70467496e-01},
{5.51262140e-01,
4.66416597e-01,
3.81571054e-01,
4.45411623e-01,
5.09252191e-01,
3.10998380e-01,
1.12744540e-01,
4.24175322e-01,
7.35606015e-01,
7.35606015e-01},
{4.17304814e-01,
4.87997323e-01,
5.58689833e-01,
3.49538386e-01,
1.40386939e-01,
1.69244215e-01,
1.98101491e-01,
4.99423027e-01,
8.00744593e-01,
8.00744593e-01},
{6.92783237e-01,
5.64420104e-01,
4.36057001e-01,
4.26205903e-01,
4.16354775e-01,
4.76800054e-01,
5.37245333e-01,
6.92460477e-01,
8.47675622e-01,
8.47675622e-01},
{9.68261600e-01,
6.40842915e-01,
3.13424170e-01,
5.02873421e-01,
6.92322612e-01,
7.84355879e-01,
8.76389146e-01,
8.85497928e-01,
8.94606650e-01,
8.94606650e-01},
{9.68261600e-01,
6.40842915e-01,
3.13424170e-01,
5.02873421e-01,
6.92322612e-01,
7.84355879e-01,
8.76389146e-01,
8.85497928e-01,
8.94606650e-01,
8.94606650e-01}}}}});
Log::notice("Expected result : dims = {}", expectedOutput->dims());
expectedOutput->print();
Log::notice("\nActual result: dims = {}", op->getOutput(0)->dims());
op->getOutput(0)->print();
CHECK(approxEq<float>(*op->getOutput(0),
*expectedOutput,
1e-5f,
1e-5f) == true);
}
}
} // namespace Aidge