Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mszczep/aidge_backend_cpu
  • eclipse/aidge/aidge_backend_cpu
  • hrouis/aidge_backend_cpu
  • oantoni/aidge_backend_cpu
  • raphaelmillet/aidge_backend_cpu
  • cguillon/aidge_backend_cpu
  • jeromeh/aidge_backend_cpu
  • axelfarr/aidge_backend_cpu
  • noamzerah/aidge_backend_cpu
  • silvanosky/aidge_backend_cpu
  • maab05/aidge_backend_cpu
  • lucaslopez/aidge_backend_cpu_ll
  • farnez/aidge_backend_cpu
13 results
Show changes
Showing
with 1910 additions and 759 deletions
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "aidge/operator/Sub.hpp" #include "aidge/operator/Sub.hpp"
#include "aidge/utils/Types.h" #include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/Broadcasting.hpp"
#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/SubImpl.hpp" #include "aidge/backend/cpu/operator/SubImpl.hpp"
...@@ -35,9 +36,15 @@ void Aidge::SubImpl_cpu::forward() { ...@@ -35,9 +36,15 @@ void Aidge::SubImpl_cpu::forward() {
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims());
const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims());
// Call kernel // Call kernel
kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(), kernelFunc(inputDims0,
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->size(), inputDims1,
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawInput(0)),
getCPUPtr(mOp.getRawInput(1)), getCPUPtr(mOp.getRawInput(1)),
getCPUPtr(mOp.getRawOutput(0))); getCPUPtr(mOp.getRawOutput(0)));
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <vector>
#include "aidge/operator/Tanh.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/TanhImpl.hpp"
#include "aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp"
Aidge::NbElts_t Aidge::TanhImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
// this implementation can be in-place
return 0;
}
void Aidge::TanhImpl_cpu::forward() {
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
// Find the correct kernel type
auto kernelFunc = Registrar<TanhImplForward_cpu>::create({
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
// Call kernel
kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(),
getCPUPtr(mOp.getRawInput(0)),
getCPUPtr(mOp.getRawOutput(0)));
}
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <array>
#include <catch2/catch_test_macros.hpp>
#include "aidge/data/Tensor.hpp"
#include "aidge/utils/TensorUtils.hpp"
#include "aidge/backend/cpu/data/TensorImpl.hpp"
using namespace Aidge;
TEST_CASE("Tensor creation") {
SECTION("from const array") {
Tensor x = Array3D<int, 2, 2, 2>{{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}};
Tensor xCopy = Array3D<int, 2, 2, 2>{{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}};
Tensor xFloat =
Array3D<float, 2, 2, 2>{{{{1., 2.}, {3., 4.}}, {{5., 6.}, {7., 8.}}}};
SECTION("Tensor features") {
REQUIRE(x.nbDims() == 3);
REQUIRE(x.dims()[0] == 2);
REQUIRE(x.dims()[1] == 2);
REQUIRE(x.dims()[2] == 2);
REQUIRE(x.size() == 8);
}
SECTION("Access to array") {
REQUIRE(static_cast<int *>(x.getImpl()->rawPtr())[0] == 1);
REQUIRE(static_cast<int *>(x.getImpl()->rawPtr())[7] == 8);
}
SECTION("get function") {
REQUIRE(x.get<int>({0, 0, 0}) == 1);
REQUIRE(x.get<int>({0, 0, 1}) == 2);
REQUIRE(x.get<int>({0, 1, 1}) == 4);
REQUIRE(x.get<int>({1, 1, 0}) == 7);
x.set<int>({1, 1, 1}, 36);
REQUIRE(x.get<int>({1, 1, 1}) == 36);
}
SECTION("Pretty printing for debug") { REQUIRE_NOTHROW(x.print()); }
SECTION("Tensor (in)equality") {
REQUIRE(x == xCopy);
REQUIRE_FALSE(x == xFloat);
}
}
}
TEST_CASE("Tensor methods") {
Tensor x = Array3D<int, 2, 2, 2>{{
{{1, 2},
{3, 4}},
{{5, 6},
{7, 8}}
}};
Tensor xCopy = Array3D<int, 2, 2, 2>{{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}};
Tensor xFloat =
Array3D<float, 2, 2, 2>{{{{1., 2.}, {3., 4.}}, {{5., 6.}, {7., 8.}}}};
SECTION("Tensor sharing") {
Tensor xCopyCtor(x);
REQUIRE(xCopyCtor.getImpl() == x.getImpl());
Tensor xEqOp = x;
REQUIRE(xEqOp.getImpl() == x.getImpl());
Tensor xCloned = x.clone();
REQUIRE(xCloned.getImpl() != x.getImpl());
REQUIRE(xCloned == x);
}
SECTION("Tensor extract") {
Tensor y = x.extract({0, 1});
REQUIRE(y.getImpl() == x.getImpl());
REQUIRE(approxEq<int>(y, Array1D<int, 2>{{3, 4}}));
REQUIRE(y.isContiguous());
Tensor y2 = x.extract({0, 1, 1}, {2, 1, 1});
REQUIRE(y2.getImpl() == x.getImpl());
REQUIRE(!y2.isContiguous());
Tensor y3 = y2.clone();
REQUIRE(y3.isContiguous());
REQUIRE(approxEq<int>(y3, Array3D<int, 2, 1, 1>{{{{4}}, {{8}}}}));
}
}
...@@ -117,4 +117,63 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") { ...@@ -117,4 +117,63 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") {
REQUIRE(*op->getOutput(0) == *expectedOutput); REQUIRE(*op->getOutput(0) == *expectedOutput);
} }
SECTION("Broadcasting") {
std::shared_ptr<Tensor> input_0 = std::make_shared<Tensor>(Array4D<int,3,1,3,2> {
{ //
{ //
{{0, 1},{2, 3},{4, 5}} //
}, //
{ //
{{6, 7},{8, 9},{10, 11}} //
}, //
{ //
{{12, 13},{14, 15},{16, 17}} //
} //
} //
}); //
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<int,1,3,3,2> {
{ //
{ //
{{20, 21},{22, 23},{24, 25}}, //
{{26, 27},{28, 29},{30, 31}}, //
{{32, 33},{34, 35},{36, 37}} //
} //
} //
}); //
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{100,200}});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
{ //
{ //
{{ 120, 222},{ 124, 226},{ 128, 230}}, //
{{ 126, 228},{ 130, 232},{ 134, 236}}, //
{{ 132, 234},{ 136, 238},{ 140, 242}} //
}, //
{ //
{{ 126, 228},{ 130, 232},{ 134, 236}}, //
{{ 132, 234},{ 136, 238},{ 140, 242}}, //
{{ 138, 240},{ 142, 244},{ 146, 248}} //
}, //
{ //
{{ 132, 234},{ 136, 238},{140, 242}}, //
{{ 138, 240},{ 142, 244},{146, 248}}, //
{{ 144, 246},{ 148, 250},{152, 254}} //
} //
} //
}); //
std::shared_ptr<Node> myAdd = Add(3);
auto op = std::static_pointer_cast<OperatorTensor>(myAdd -> getOperator());
op->associateInput(0, input_0);
op->associateInput(1, input_1);
op->associateInput(2, input_2);
op->setDataType(DataType::Int32);
op->setBackend("cpu");
op->computeOutputDims();
myAdd->forward();
op->getOutput(0)->print();
expectedOutput->print();
REQUIRE(*op->getOutput(0) == *expectedOutput);
}
} }
\ No newline at end of file
This diff is collapsed.
...@@ -10,102 +10,281 @@ ...@@ -10,102 +10,281 @@
********************************************************************************/ ********************************************************************************/
#include <catch2/catch_test_macros.hpp> #include <catch2/catch_test_macros.hpp>
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <chrono>
#include <iostream>
#include <memory> #include <memory>
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include "aidge/data/Tensor.hpp" #include "aidge/data/Tensor.hpp"
#include "aidge/operator/MatMul.hpp" #include "aidge/operator/MatMul.hpp"
#include "aidge/operator/OperatorTensor.hpp"
#include "aidge/utils/TensorUtils.hpp"
#include "aidge/backend/cpu/operator/MatMulImpl.hpp" #include "aidge/backend/cpu/operator/MatMulImpl.hpp"
using namespace Aidge; namespace Aidge {
TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
// Test MatMul forward with batch size = 2 and feature size = 75 const std::uint16_t NBTRIALS = 10;
std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array2D<int, 5, 75>{ // Create a random number generator
{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, std::random_device rd;
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, std::mt19937 gen(rd());
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, std::uniform_real_distribution<float> dis(0.0, 1.0); // Random float distribution between 0 and 1
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, std::uniform_int_distribution<std::size_t> distDims(10, 100);
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, std::uniform_int_distribution<std::size_t> distNbMatrix(1, 5);
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, // Create MatMul Operator
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, std::shared_ptr<Node> myMatMul = MatMul();
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<int, 2, 5>{
{{23600, 23600, 23600, 23600, 23600}, {68600, 68600, 68600, 68600, 68600}}});
std::shared_ptr<Node> myMatMul = MatMul(75, 5, "mymatmul");
auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator()); auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
op->associateInput(1, myWeights);
// To measure execution time of 'MatMul_Op::forward()' member function call
SECTION("2D input") { std::chrono::time_point<std::chrono::system_clock> start;
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array2D<int, 2, 75>{ std::chrono::time_point<std::chrono::system_clock> end;
{{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, std::chrono::duration<double, std::micro> duration;
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, SECTION("2-D Tensors") {
57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74}, std::size_t totalComputation = 0;
{75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, // generate Tensors dimensions
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, const std::size_t dim0 = distDims(gen);
120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, const std::size_t dim1 = distDims(gen);
135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149}}}); const std::size_t dim2 = distDims(gen);
op->associateInput(0, myInput); totalComputation += dim0*dim1*dim2;
op->setDataType(DataType::Int32);
op->setBackend("cpu"); // Create and populate the array with random float values
op->computeOutputDims(); float bigArray1[dim0][dim1];
myMatMul->forward(); for (int i = 0; i < dim0; ++i) {
REQUIRE(*(op->getOutput(0)) == *myOutput); for (int j = 0; j < dim1; ++j) {
bigArray1[i][j] = dis(gen); // Generate random float value
}
}
float bigArray2[dim1][dim2];
for (int i = 0; i < dim1; ++i) {
for (int j = 0; j < dim2; ++j) {
bigArray2[i][j] = dis(gen); // Generate random float value
}
}
float res[dim0][dim2];
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim2; ++j) {
float sum = 0.0;
for (int k = 0; k < dim1; ++k) {
sum += bigArray1[i][k] * bigArray2[k][j];
}
res[i][j] = sum;
}
}
// Convert bigArray1 to Tensor
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
T1 -> resize({dim0,dim1});
T1 -> setBackend("cpu");
T1 -> getImpl() -> setRawPtr(&bigArray1[0][0], dim0*dim1);
// Convert bigArray2 to Tensor
std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
T2 -> resize({dim1,dim2});
T2 -> setBackend("cpu");
T2 -> getImpl() -> setRawPtr(&bigArray2[0][0], dim1*dim2);
// convert res to Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
Tres -> resize({dim0,dim2});
Tres -> setBackend("cpu");
Tres -> getImpl() -> setRawPtr(&res[0][0], dim0*dim2);
op->associateInput(0, T1);
op->associateInput(1, T2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->computeOutputDims();
start = std::chrono::system_clock::now();
myMatMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
}
std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
std::cout << "total time: " << duration.count() << std::endl;
} }
SECTION("4D input") {
std::shared_ptr<Tensor> myInput = SECTION("3-D Tensors") {
std::make_shared<Tensor>(Array4D<int, 2, 3, 5, 5>{{{{{0, 1, 2, 3, 4}, std::size_t totalComputation = 0;
{5, 6, 7, 8, 9}, duration = std::chrono::duration<double, std::micro>::zero();
{10, 11, 12, 13, 14}, for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
{15, 16, 17, 18, 19}, // generate Tensors dimensions
{20, 21, 22, 23, 24}}, const std::size_t dimNb = distNbMatrix(gen);
{{25, 26, 27, 28, 29}, const std::size_t dim0 = distDims(gen);
{30, 31, 32, 33, 34}, const std::size_t dim1 = distDims(gen);
{35, 36, 37, 38, 39}, const std::size_t dim2 = distDims(gen);
{40, 41, 42, 43, 44}, totalComputation += dim0*dim1*dim2*dimNb;
{45, 46, 47, 48, 49}},
{{50, 51, 52, 53, 54}, // Create and populate the array with random float values
{55, 56, 57, 58, 59}, float bigArray1[dimNb][dim0][dim1];
{60, 61, 62, 63, 64}, for (std::size_t n = 0; n < dimNb; ++n) {
{65, 66, 67, 68, 69}, for (std::size_t i = 0; i < dim0; ++i) {
{70, 71, 72, 73, 74}}}, for (std::size_t j = 0; j < dim1; ++j) {
{{{75, 76, 77, 78, 79}, bigArray1[n][i][j] = dis(gen); // Generate random float value
{80, 81, 82, 83, 84}, }
{85, 86, 87, 88, 89}, }
{90, 91, 92, 93, 94}, }
{95, 96, 97, 98, 99}}, float bigArray2[dimNb][dim1][dim2];
{{100, 101, 102, 103, 104}, for (std::size_t n = 0; n < dimNb; ++n) {
{105, 106, 107, 108, 109}, for (int i = 0; i < dim1; ++i) {
{110, 111, 112, 113, 114}, for (int j = 0; j < dim2; ++j) {
{115, 116, 117, 118, 119}, bigArray2[n][i][j] = dis(gen); // Generate random float value
{120, 121, 122, 123, 124}}, }
{{125, 126, 127, 128, 129}, }
{130, 131, 132, 133, 134}, }
{135, 136, 137, 138, 139}, float res[dimNb][dim0][dim2];
{140, 141, 142, 143, 144}, for (std::size_t n = 0; n < dimNb; ++n) {
{145, 146, 147, 148, 149}}}}}); for (int i = 0; i < dim0; ++i) {
op->associateInput(0, myInput); for (int j = 0; j < dim2; ++j) {
op->setDataType(DataType::Int32); float sum = 0.0;
for (int k = 0; k < dim1; ++k) {
sum += bigArray1[n][i][k] * bigArray2[n][k][j];
}
res[n][i][j] = sum;
}
}
}
// Convert bigArray1 to Tensor
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
T1 -> resize({dimNb,dim0,dim1});
T1 -> setBackend("cpu");
T1 -> getImpl() -> setRawPtr(&bigArray1[0][0], dimNb*dim0*dim1);
// Convert bigArray2 to Tensor
std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
T2 -> resize({dimNb,dim1,dim2});
T2 -> setBackend("cpu");
T2 -> getImpl() -> setRawPtr(&bigArray2[0][0], dimNb*dim1*dim2);
// convert res to Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
Tres -> resize({dimNb,dim0,dim2});
Tres -> setBackend("cpu");
Tres -> getImpl() -> setRawPtr(&res[0][0], dimNb*dim0*dim2);
op->associateInput(0, T1);
op->associateInput(1, T2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->computeOutputDims();
start = std::chrono::system_clock::now();
myMatMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
}
std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
std::cout << "total time: " << duration.count() << std::endl;
}
SECTION("4-D Tensors") {
std::size_t totalComputation = 0;
duration = std::chrono::duration<double, std::micro>::zero();
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate Tensors dimensions
const std::size_t dimNb1 = distNbMatrix(gen);
const std::size_t dimNb2 = distNbMatrix(gen);
const std::size_t dim0 = distDims(gen);
const std::size_t dim1 = distDims(gen);
const std::size_t dim2 = distDims(gen);
totalComputation += dim0*dim1*dim2*dimNb1*dimNb2;
// Create and populate the array with random float values
float bigArray1[dimNb1][dimNb2][dim0][dim1];
for (std::size_t n1 = 0; n1 < dimNb1; ++n1) {
for (std::size_t n2 = 0; n2 < dimNb2; ++n2) {
for (std::size_t i = 0; i < dim0; ++i) {
for (std::size_t j = 0; j < dim1; ++j) {
bigArray1[n1][n2][i][j] = dis(gen); // Generate random float value
}
}
}
}
float bigArray2[dimNb1][dimNb2][dim1][dim2];
for (std::size_t n1 = 0; n1 < dimNb1; ++n1) {
for (std::size_t n2 = 0; n2 < dimNb2; ++n2) {
for (std::size_t i = 0; i < dim1; ++i) {
for (std::size_t j = 0; j < dim2; ++j) {
bigArray2[n1][n2][i][j] = dis(gen); // Generate random float value
}
}
}
}
float res[dimNb1][dimNb2][dim0][dim2];
for (std::size_t n1 = 0; n1 < dimNb1; ++n1) {
for (std::size_t n2 = 0; n2 < dimNb2; ++n2) {
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim2; ++j) {
float sum = 0.0;
for (int k = 0; k < dim1; ++k) {
sum += bigArray1[n1][n2][i][k] * bigArray2[n1][n2][k][j];
}
res[n1][n2][i][j] = sum;
}
}
}
}
// Convert bigArray1 to Tensor
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
T1 -> resize({dimNb1,dimNb2,dim0,dim1});
T1 -> setBackend("cpu");
T1 -> getImpl() -> setRawPtr(&bigArray1[0][0], dimNb1*dimNb2*dim0*dim1);
// Convert bigArray2 to Tensor
std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
T2 -> resize({dimNb1,dimNb2,dim1,dim2});
T2 -> setBackend("cpu");
T2 -> getImpl() -> setRawPtr(&bigArray2[0][0], dimNb1*dimNb2*dim1*dim2);
// convert res to Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
Tres -> resize({dimNb1,dimNb2,dim0,dim2});
Tres -> setBackend("cpu");
Tres -> getImpl() -> setRawPtr(&res[0][0], dimNb1*dimNb2*dim0*dim2);
op->associateInput(0, T1);
op->associateInput(1, T2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->computeOutputDims();
start = std::chrono::system_clock::now();
myMatMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
}
std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
std::cout << "total time: " << duration.count() << std::endl;
}
SECTION("+2-D / 1-D") {
// allows to test both computation with a 1-D Tensor and broadcasting
// input_0
std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
op->associateInput(0,T0);
const std::size_t dim0 = distNbMatrix(gen);
const std::size_t dim1 = distNbMatrix(gen) + 1;
const std::size_t dim2 = distNbMatrix(gen);
const std::size_t dim3 = distNbMatrix(gen);
T0->resize({dim0,dim1,dim2,dim3});
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
// input_1
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
op -> associateInput(1,T1);
T1->resize({dim3});
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
op->setDataType(DataType::Float32);
op->setBackend("cpu"); op->setBackend("cpu");
op->computeOutputDims(); op->computeOutputDims();
myMatMul->forward(); myMatMul->forward();
REQUIRE(*(op->getOutput(0)) == *myOutput);
}
// std::cout << static_cast<Tensor>((*myMatMul->getOperator())["weight"])[0][0][0][0] << std::endl; }
} }
\ No newline at end of file } // namespace Aidge
\ No newline at end of file
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <cstdlib> #include <cstdlib>
#include <memory> #include <memory>
#include "aidge/utils/TensorUtils.hpp"
#include "aidge/backend/cpu/operator/ConvImpl.hpp" #include "aidge/backend/cpu/operator/ConvImpl.hpp"
#include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/backend/cpu/operator/PadImpl.hpp"
#include "aidge/data/Tensor.hpp" #include "aidge/data/Tensor.hpp"
...@@ -21,10 +22,12 @@ ...@@ -21,10 +22,12 @@
#include "aidge/operator/MetaOperator.hpp" #include "aidge/operator/MetaOperator.hpp"
#include "aidge/operator/MetaOperatorDefs.hpp" #include "aidge/operator/MetaOperatorDefs.hpp"
#include "aidge/operator/Pad.hpp" #include "aidge/operator/Pad.hpp"
#include "aidge/operator/Pop.hpp"
using namespace Aidge; using namespace Aidge;
TEST_CASE("[cpu/operator] MetaOperator/PaddedConv(forward)", "[MetaOperator][PaddedConv][CPU]") { TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
SECTION("PaddedConv(forward)") {
std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>( std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(
Array4D<double, 4, 3, 3, 3>{{{{{6.20986394e-01, 1.19775136e-03, 7.22876095e-02}, Array4D<double, 4, 3, 3, 3>{{{{{6.20986394e-01, 1.19775136e-03, 7.22876095e-02},
{1.16492919e-01, 8.21634093e-02, 1.17413265e-01}, {1.16492919e-01, 8.21634093e-02, 1.17413265e-01},
...@@ -187,4 +190,240 @@ TEST_CASE("[cpu/operator] MetaOperator/PaddedConv(forward)", "[MetaOperator][Pad ...@@ -187,4 +190,240 @@ TEST_CASE("[cpu/operator] MetaOperator/PaddedConv(forward)", "[MetaOperator][Pad
std::shared_ptr<Node> myPaddedConv = std::shared_ptr<Node> myPaddedConv =
PaddedConv(3, 4, {3, 3}, "myPaddedConv", {1, 1}, {1, 1, 1, 1}); PaddedConv(3, 4, {3, 3}, "myPaddedConv", {1, 1}, {1, 1, 1, 1});
}
SECTION("LSTM(forward)") {
auto pop = Pop();
auto myLSTM = LSTM(32, 64, 0, true, "ltsm");
auto op = std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator());
auto microGraph = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraph();
microGraph->save("lstm", false, false);
REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8);
REQUIRE(myLSTM->nbData() == 1);
REQUIRE(myLSTM->nbOutputs() == 2);
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
Array2D<float, 16, 32>{});
std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>(
Array2D<float, 1, 64>{});
std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>(
Array2D<float, 64, 32>{});
std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>(
Array2D<float, 64, 64>{});
pop->addChild(myLSTM, 0, 0);
pop->getOperator()->associateInput(0, myInput);
op->associateInput(17, myInit);
op->associateInput(18, myInit);
// Weights X
myLSTM->input(1).first->getOperator()->setOutput(0, myInitW);
myLSTM->input(2).first->getOperator()->setOutput(0, myInitW);
myLSTM->input(3).first->getOperator()->setOutput(0, myInitW);
myLSTM->input(4).first->getOperator()->setOutput(0, myInitW);
// Weights H
myLSTM->input(5).first->getOperator()->setOutput(0, myInitR);
myLSTM->input(6).first->getOperator()->setOutput(0, myInitR);
myLSTM->input(7).first->getOperator()->setOutput(0, myInitR);
myLSTM->input(8).first->getOperator()->setOutput(0, myInitR);
auto g = getConnectedGraphView(myLSTM);
g->setDataType(DataType::Float32);
g->setBackend("cpu");
auto scheduler = SequentialScheduler(g);
scheduler.forward(true, true);
g->save("lstm_outside_dims", true, true);
microGraph->save("lstm_dims", true, true);
REQUIRE(op->outputDimsForwarded());
auto microGraphScheduler = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraphScheduler();
microGraphScheduler->saveSchedulingDiagram("lstm_scheduling");
REQUIRE(op->getNbConsumedData(0) == 512);
REQUIRE(op->getNbConsumedData(1) == 32768);
REQUIRE(op->getNbProducedData(0) == 1088);
REQUIRE(op->getNbProducedData(1) == 1088);
REQUIRE(microGraphScheduler->getStaticScheduling(0).size() == 26);
REQUIRE(microGraphScheduler->getStaticScheduling(1).size() == 24);
REQUIRE(microGraphScheduler->getStaticScheduling(15).size() == 24);
}
SECTION("LSTM(forward_values)") {
auto myLSTM = LSTM(2, 3, 0, true, "ltsm");
auto op = std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator());
auto microGraph = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraph();
microGraph->save("lstm", false, false);
REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8);
REQUIRE(myLSTM->nbData() == 1);
REQUIRE(myLSTM->nbOutputs() == 2);
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
Array2D<float, 3, 2>{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}});
std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>(
Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>(
Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}});
std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>(
Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
op->associateInput(0, myInput);
op->associateInput(17, myInit);
op->associateInput(18, myInit);
// Weights X
myLSTM->input(1).first->getOperator()->setOutput(0, myInitW);
myLSTM->input(2).first->getOperator()->setOutput(0, myInitW);
myLSTM->input(3).first->getOperator()->setOutput(0, myInitW);
myLSTM->input(4).first->getOperator()->setOutput(0, myInitW);
// Weights H
myLSTM->input(5).first->getOperator()->setOutput(0, myInitR);
myLSTM->input(6).first->getOperator()->setOutput(0, myInitR);
myLSTM->input(7).first->getOperator()->setOutput(0, myInitR);
myLSTM->input(8).first->getOperator()->setOutput(0, myInitR);
auto g = getConnectedGraphView(myLSTM);
g->setDataType(DataType::Float32);
g->setBackend("cpu");
auto scheduler = SequentialScheduler(g);
scheduler.forward();
microGraph->save("lstm_values_dims", false, true);
std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>(
Array2D<float, 3, 3>{{{0.0952412, 0.0952412, 0.0952412},
{0.25606447, 0.25606447, 0.25606447},
{0.40323776, 0.40323776, 0.40323776}}});
auto microGraphScheduler = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraphScheduler();
microGraphScheduler->saveSchedulingDiagram("lstm_values_scheduling");
op->getOutput(0)->print();
myHiddenState->print();
REQUIRE(approxEq<float>(*(op->getOutput(0)), *myHiddenState));
}
SECTION("LSTM(forward_values_seq)") {
auto pop = Pop();
auto myLSTM = LSTM(2, 3, 2, true, "ltsm");
auto myGraph = Sequential({pop, myLSTM});
auto op = std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator());
REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8);
REQUIRE(myLSTM->nbData() == 1);
REQUIRE(myLSTM->nbOutputs() == 2);
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}});
std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>(
Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>(
Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}});
std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>(
Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
pop->getOperator()->associateInput(0, myInput);
op->associateInput(17, myInit);
op->associateInput(18, myInit);
// Weights X
myLSTM->input(1).first->getOperator()->setOutput(0, myInitW);
myLSTM->input(2).first->getOperator()->setOutput(0, myInitW);
myLSTM->input(3).first->getOperator()->setOutput(0, myInitW);
myLSTM->input(4).first->getOperator()->setOutput(0, myInitW);
// Weights H
myLSTM->input(5).first->getOperator()->setOutput(0, myInitR);
myLSTM->input(6).first->getOperator()->setOutput(0, myInitR);
myLSTM->input(7).first->getOperator()->setOutput(0, myInitR);
myLSTM->input(8).first->getOperator()->setOutput(0, myInitR);
auto g = getConnectedGraphView(myLSTM);
g->setDataType(DataType::Float32);
g->setBackend("cpu");
g->save("lstm_seq", true, true);
auto scheduler = SequentialScheduler(g);
scheduler.forward(true, true);
scheduler.saveSchedulingDiagram("lstm_seq_schedule");
std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>(
Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372},
{0.49801484, 0.49801484, 0.49801484},
{0.67162132, 0.67162132, 0.67162132}}});
myGraph->save("lstm_seq_mygraph", true, true);
op->getOutput(0)->print();
myHiddenState->print();
REQUIRE(approxEq<float>(*(op->getOutput(0)), *myHiddenState));
}
SECTION("LSTM(forward_values_seq_flatten)") {
auto pop = Pop();
auto myLSTM = LSTM(2, 3, 2, true, "ltsm");
auto op = std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator());
// Here we test LSTM as it is was flatten in the graph.
// We just borrow its micro-graph into our larger myGraph graph.
auto myGraph = std::make_shared<GraphView>();
pop->addChild(op->getMicroGraph()->getOrderedInputs()[0].first, 0, 0);
myGraph->add(op->getMicroGraph());
myGraph->add(pop);
REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8);
REQUIRE(myLSTM->nbData() == 1);
REQUIRE(myLSTM->nbOutputs() == 2);
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}});
std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>(
Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>(
Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}});
std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>(
Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
pop->getOperator()->associateInput(0, myInput);
op->associateInput(17, myInit);
op->associateInput(18, myInit);
// Weights X
auto prodX = Producer(myInitW);
prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first, 0, 1);
prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first, 0, 1);
prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first, 0, 1);
prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first, 0, 1);
// Weights H
auto prodH = Producer(myInitR);
prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first, 0, 1);
prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first, 0, 1);
prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first, 0, 1);
prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first, 0, 1);
myGraph->add({prodX, prodH});
myGraph->setDataType(DataType::Float32);
myGraph->setBackend("cpu");
myGraph->save("lstm_seq_flatten", true, true);
std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>(
Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372},
{0.49801484, 0.49801484, 0.49801484},
{0.67162132, 0.67162132, 0.67162132}}});
auto scheduler = SequentialScheduler(myGraph);
scheduler.forward(true, true);
scheduler.saveSchedulingDiagram("lstm_seq_flatten_schedule");
op->getOutput(0)->print();
myHiddenState->print();
REQUIRE(approxEq<float>(*(op->getOutput(0)), *myHiddenState));
}
} }
\ No newline at end of file
...@@ -10,123 +10,307 @@ ...@@ -10,123 +10,307 @@
********************************************************************************/ ********************************************************************************/
#include <catch2/catch_test_macros.hpp> #include <catch2/catch_test_macros.hpp>
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <chrono>
#include <iostream>
#include <memory>
#include <numeric> // std::accumulate
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include "aidge/data/Tensor.hpp" #include "aidge/data/Tensor.hpp"
#include "aidge/operator/Mul.hpp" #include "aidge/operator/Mul.hpp"
#include "aidge/utils/TensorUtils.hpp"
#include "aidge/backend/cpu.hpp" namespace Aidge {
#include <memory> TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
std::uniform_int_distribution<int> boolDist(0,1);
using namespace Aidge; // Create MatMul Operator
std::shared_ptr<Node> myMul = Mul();
auto op = std::static_pointer_cast<OperatorTensor>(myMul-> getOperator());
op->setDataType(DataType::Float32);
op->setBackend("cpu");
// Create 2 input Tensors
std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
op->associateInput(0,T0);
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
op -> associateInput(1,T1);
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
// Create results Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>();
Tres->setDataType(DataType::Float32);
Tres->setBackend("cpu");
// To measure execution time of 'MatMul_Op::forward()' member function call
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration{};
SECTION("MulImpl_cpu::forward()") {
SECTION("Scalar / Scalar") {
TEST_CASE("[cpu/operator] Mul(forward)", "[Mul][CPU]") {
SECTION("2D Tensor by Singleton") {
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> {
{
{0.38977361, 0.34064174},
{0.00427264, 0.90872520}
}
});
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,1,1>{{3.0}});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> {
{
{1.16932082, 1.02192521},
{0.01281792, 2.72617555}
}
});
std::shared_ptr<Node> myMul = Mul();
auto op = std::static_pointer_cast<OperatorTensor>(myMul -> getOperator());
myMul->getOperator()->associateInput(0, input_1);
myMul->getOperator()->associateInput(1, input_2);
myMul->getOperator()->setDataType(DataType::Float32);
myMul->getOperator()->setBackend("cpu");
op->computeOutputDims();
myMul->forward();
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< 4; ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
} }
SECTION("Scalar / +1-D Tensor") {
} }
SECTION("+1-D Tensor / +1-D Tensor - same dimensions") {
std::size_t number_of_operation = 0;
SECTION("2D Tensors") { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> { // generate 2 random Tensors
{ const std::size_t nbDims = nbDimsDist(gen);
{0.38977361, 0.34064174}, std::vector<std::size_t> dims;
{0.00427264, 0.90872520} for (std::size_t i = 0; i < nbDims; ++i) {
} dims.push_back(dimSizeDist(gen));
}); }
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,2,2>{ const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
{ number_of_operation += nb_elements;
{0.02362096, 0.24084556},
{0.94690859, 0.13512510} // without broadcasting
} float* array0 = new float[nb_elements];
}); float* array1 = new float[nb_elements];
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> { float* result = new float[nb_elements];
{
{0.00920683, 0.08204205}, for (std::size_t i = 0; i < nb_elements; ++i) {
{0.00404580, 0.12279158} array0[i] = valueDist(gen);
array1[i] = valueDist(gen);
result[i] = array0[i] * array1[i];
}
// input0
T0->resize(dims);
T0 -> getImpl() -> setRawPtr(array0, nb_elements);
// input1
T1->resize(dims);
T1 -> getImpl() -> setRawPtr(array1, nb_elements);
// results
Tres->resize(dims);
Tres -> getImpl() -> setRawPtr(result, nb_elements);
op->computeOutputDims();
start = std::chrono::system_clock::now();
myMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
// with broadcasting
} }
}); std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
std::cout << "total time: " << duration.count() << "μs" << std::endl;
std::shared_ptr<Node> myMul = Mul();
auto op = std::static_pointer_cast<OperatorTensor>(myMul -> getOperator());
myMul->getOperator()->associateInput(0, input_1);
myMul->getOperator()->associateInput(1, input_2);
myMul->getOperator()->setDataType(DataType::Float32);
myMul->getOperator()->setBackend("cpu");
op->computeOutputDims();
myMul->forward();
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< 4; ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
} }
} SECTION("+1-D Tensor / +1-D Tensor - broadcasting") {
std::size_t number_of_operation = 0;
SECTION("3D Tensor by 1D Tensor") { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array3D<float,2,2,3> { // generate 2 random Tensors
{ // handle dimensions, replace some dimensions with '1' to get broadcasting
{{0.33647752, 0.89360154, 0.46586215}, constexpr std::size_t nbDims = 4;
{0.71518236, 0.71481097, 0.97991812}}, std::vector<std::size_t> dims;
for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen));
}
std::vector<std::size_t> dims0 = dims;
std::vector<std::size_t> dims1 = dims;
std::vector<std::size_t> dimsOut = dims;
for (std::size_t i = 0; i < nbDims; ++i) {
if (boolDist(gen)) {
dims0[i] = 1;
}
if (boolDist(gen)) {
dims1[i] = 1;
}
dimsOut[i] = (dims0[i] == 1) ? dims1[i] : dims0[i];
}
{{0.17393428, 0.56849813, 0.18489265}, // create arrays and fill them with random values
{0.78397650, 0.00348300, 0.65758008}} float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
} float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]];
}); float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float,3>{
{0.15380561, 0.51063120, 0.93031412} for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) {
}); array0[i] = valueDist(gen);
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<float,2,2,3> { }
{ for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) {
{{0.05175213, 0.45630082, 0.43339813}, array1[i] = valueDist(gen);
{0.10999906, 0.36500478, 0.91163164}}, }
{{0.02675207, 0.29029289, 0.17200825}, // compute true result
{0.12057999, 0.00177853, 0.61175603}} const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1};
for (std::size_t a = 0; a < dimsOut[0]; ++a) {
for (std::size_t b = 0; b < dimsOut[1]; ++b) {
const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
+ strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0)
+ strides1[1] * ((dims1[1] > 1) ? b : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c) {
const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
for (std::size_t d = 0; d < dimsOut[3]; ++d) {
std::size_t idx0 = idx0_0
+ strides0[2] * ((dims0[2] > 1) ? c : 0)
+ ((dims0[3] > 1) ? d : 0);
std::size_t idx1 = idx1_0
+ strides1[2] * ((dims1[2] > 1) ? c : 0)
+ ((dims1[3] > 1) ? d : 0);
result[idx_out + d] = array0[idx0] * array1[idx1];
// std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " * " << array1[idx1] << " -> " << idx_out + d << std::endl;
}
}
}
}
// conversion to Aidge::Tensors
// input0
T0->resize(dims0);
T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
// input1
T1->resize(dims1);
T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]);
// results
Tres->resize(dimsOut);
Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
// compute result
op->computeOutputDims();
start = std::chrono::system_clock::now();
myMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
// comparison between truth and computed result
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
} }
}); std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
std::cout << "total time: " << duration.count() << "μs" << std::endl;
std::shared_ptr<Node> myMul = Mul();
auto op = std::static_pointer_cast<OperatorTensor>(myMul -> getOperator());
myMul->getOperator()->associateInput(0, input_1);
myMul->getOperator()->associateInput(1, input_2);
myMul->getOperator()->setDataType(DataType::Float32);
myMul->getOperator()->setBackend("cpu");
op->computeOutputDims();
myMul->forward();
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< 12; ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
} }
SECTION("+1-D Tensor / 1-D Tensor") {
std::size_t number_of_operation = 0;
std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(std::size_t(1), std::size_t(3));
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
// handle dimensions
constexpr std::size_t nbDims = 4;
std::vector<std::size_t> dims0(4);
for (std::size_t i = 0; i < nbDims; ++i) {
dims0[i] = dimSizeDist(gen);
}
std::vector<std::size_t> dimsOut = dims0;
std::vector<std::size_t> dims1 = dims0;
for (std::size_t i = 0; i < nbDims; ++i) {
if (boolDist(gen)) {
dims1[i] = 1;
}
}
dims1.erase(dims1.cbegin(), dims1.cbegin() + nbRemovedDimsDist(gen));
// create arrays and fill them with random values
float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
std::size_t array1_size = std::accumulate(dims1.cbegin(), dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
float* array1 = new float[array1_size];
float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
for (std::size_t i = 0; i < (dims0[0]*dims0[1]*dims0[2]*dims0[3]); ++i) {
array0[i] = valueDist(gen);
}
for (std::size_t i = 0; i < array1_size; ++i) {
array1[i] = valueDist(gen);
}
// compute true result
auto dims1_tmp = dims1;
dims1_tmp.insert(dims1_tmp.cbegin(), 4 - dims1_tmp.size(), std::size_t(1));
const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
const std::size_t strides1[nbDims] = {dims1_tmp[1]*dims1_tmp[2]*dims1_tmp[3], dims1_tmp[2]*dims1_tmp[3], dims1_tmp[3], 1};
for (std::size_t a = 0; a < dimsOut[0]; ++a) {
for (std::size_t b = 0; b < dimsOut[1]; ++b) {
const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
+ strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t idx1_0 = strides1[0] * ((dims1_tmp[0] > 1) ? a : 0)
+ strides1[1] * ((dims1_tmp[1] > 1) ? b : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c) {
const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
for (std::size_t d = 0; d < dimsOut[3]; ++d) {
std::size_t idx0 = idx0_0
+ strides0[2] * ((dims0[2] > 1) ? c : 0)
+ ((dims0[3] > 1) ? d : 0);
std::size_t idx1 = idx1_0
+ strides1[2] * ((dims1_tmp[2] > 1) ? c : 0)
+ ((dims1_tmp[3] > 1) ? d : 0);
result[idx_out + d] = array0[idx0] * array1[idx1];
// std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " * " << array1[idx1] << " -> " << idx_out + d << std::endl;
}
}
}
}
// conversion to Aidge::Tensors
// input0
T0->resize(dims0);
T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
// input1
T1->resize(dims1);
T1 -> getImpl() -> setRawPtr(array1, array1_size);
// results
Tres->resize(dimsOut);
Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
// compute result
op->computeOutputDims();
start = std::chrono::system_clock::now();
myMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
// comparison between truth and computed result
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
}
std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
std::cout << "total time: " << duration.count() << "μs" << std::endl;
}
} }
} }
\ No newline at end of file } // namespace Aidge
...@@ -150,12 +150,15 @@ TEST_CASE("[cpu/operator] PaddedConv(forward)", "[PaddedConv][CPU]") { ...@@ -150,12 +150,15 @@ TEST_CASE("[cpu/operator] PaddedConv(forward)", "[PaddedConv][CPU]") {
}); });
myConv->getOperator()->associateInput(0,myInput); myConv->getOperator()->associateInput(0,myInput);
myConv->getOperator()->associateInput(1,myWeights); myConv->input(1).first->getOperator()->setOutput(0, myWeights);
myConv->getOperator()->associateInput(2,myBias); myConv->input(2).first->getOperator()->setOutput(0, myBias);
myConv->getOperator()->setDataType(DataType::Int32);
myConv->getOperator()->setBackend("cpu"); auto g = getConnectedGraphView(myConv);
op->computeOutputDims(); g->setDataType(DataType::Int32);
myConv->forward(); g->setBackend("cpu");
auto scheduler = SequentialScheduler(g);
scheduler.forward();
REQUIRE(*(op->getOutput(0)) == *myOutput); REQUIRE(*(op->getOutput(0)) == *myOutput);
} }
...@@ -309,12 +312,15 @@ TEST_CASE("[cpu/operator] PaddedConv(forward)", "[PaddedConv][CPU]") { ...@@ -309,12 +312,15 @@ TEST_CASE("[cpu/operator] PaddedConv(forward)", "[PaddedConv][CPU]") {
}); });
myConv->getOperator()->associateInput(0,myInput); myConv->getOperator()->associateInput(0,myInput);
myConv->getOperator()->associateInput(1,myWeights); myConv->input(1).first->getOperator()->setOutput(0, myWeights);
myConv->getOperator()->associateInput(2,myBias); myConv->input(2).first->getOperator()->setOutput(0, myBias);
myConv->getOperator()->setDataType(DataType::Int32);
myConv->getOperator()->setBackend("cpu"); auto g = getConnectedGraphView(myConv);
op->computeOutputDims(); g->setDataType(DataType::Int32);
myConv->forward(); g->setBackend("cpu");
auto scheduler = SequentialScheduler(g);
scheduler.forward();
REQUIRE(*(op->getOutput(0)) == *myOutput); REQUIRE(*(op->getOutput(0)) == *myOutput);
} }
......
This diff is collapsed.
...@@ -10,123 +10,307 @@ ...@@ -10,123 +10,307 @@
********************************************************************************/ ********************************************************************************/
#include <catch2/catch_test_macros.hpp> #include <catch2/catch_test_macros.hpp>
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <chrono>
#include <iostream>
#include <memory>
#include <numeric> // std::accumulate
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include "aidge/data/Tensor.hpp" #include "aidge/data/Tensor.hpp"
#include "aidge/operator/Sub.hpp" #include "aidge/operator/Sub.hpp"
#include "aidge/utils/TensorUtils.hpp"
#include "aidge/backend/cpu.hpp" namespace Aidge {
#include <memory> TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
std::uniform_int_distribution<int> boolDist(0,1);
using namespace Aidge; // Create MatMul Operator
std::shared_ptr<Node> mySub = Sub();
auto op = std::static_pointer_cast<OperatorTensor>(mySub-> getOperator());
op->setDataType(DataType::Float32);
op->setBackend("cpu");
// Create 2 input Tensors
std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
op->associateInput(0,T0);
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
op -> associateInput(1,T1);
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
// Create results Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>();
Tres->setDataType(DataType::Float32);
Tres->setBackend("cpu");
// To measure execution time of 'MatMul_Op::forward()' member function call
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration{};
SECTION("SubImpl_cpu::forward()") {
SECTION("Scalar / Scalar") {
TEST_CASE("[cpu/operator] Sub(forward)", "[Sub][CPU]") {
SECTION("2D Tensor by Singleton") {
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> {
{
{0.34234560, 0.92812711},
{0.73706615, 0.69953883}
}
});
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,1,1>{{2.5}});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> {
{
{-2.15765429, -1.57187295},
{-1.76293385, -1.80046117}
}
});
std::shared_ptr<Node> mySub = Sub();
auto op = std::static_pointer_cast<OperatorTensor>(mySub -> getOperator());
mySub->getOperator()->associateInput(0, input_1);
mySub->getOperator()->associateInput(1, input_2);
mySub->getOperator()->setDataType(DataType::Float32);
mySub->getOperator()->setBackend("cpu");
op->computeOutputDims();
mySub->forward();
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< 4; ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
} }
SECTION("Scalar / +1-D Tensor") {
} }
SECTION("+1-D Tensor / +1-D Tensor - same dimensions") {
std::size_t number_of_operation = 0;
SECTION("2D Tensors") { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> { // generate 2 random Tensors
{ const std::size_t nbDims = nbDimsDist(gen);
{0.34234560, 0.92812711}, std::vector<std::size_t> dims;
{0.73706615, 0.69953883} for (std::size_t i = 0; i < nbDims; ++i) {
} dims.push_back(dimSizeDist(gen));
}); }
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,2,2>{ const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
{ number_of_operation += nb_elements;
{0.61729127, 0.83004373},
{0.72002089, 0.52473849} // without broadcasting
} float* array0 = new float[nb_elements];
}); float* array1 = new float[nb_elements];
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> { float* result = new float[nb_elements];
{
{-0.27494568, 0.09808338}, for (std::size_t i = 0; i < nb_elements; ++i) {
{0.01704526, 0.17480034} array0[i] = valueDist(gen);
array1[i] = valueDist(gen);
result[i] = array0[i] - array1[i];
}
// input0
T0->resize(dims);
T0 -> getImpl() -> setRawPtr(array0, nb_elements);
// input1
T1->resize(dims);
T1 -> getImpl() -> setRawPtr(array1, nb_elements);
// results
Tres->resize(dims);
Tres -> getImpl() -> setRawPtr(result, nb_elements);
op->computeOutputDims();
start = std::chrono::system_clock::now();
mySub->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
// with broadcasting
} }
}); std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
std::cout << "total time: " << duration.count() << "μs" << std::endl;
std::shared_ptr<Node> mySub = Sub();
auto op = std::static_pointer_cast<OperatorTensor>(mySub -> getOperator());
mySub->getOperator()->associateInput(0, input_1);
mySub->getOperator()->associateInput(1, input_2);
mySub->getOperator()->setDataType(DataType::Float32);
mySub->getOperator()->setBackend("cpu");
op->computeOutputDims();
mySub->forward();
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< 4; ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
} }
} SECTION("+1-D Tensor / +1-D Tensor - broadcasting") {
std::size_t number_of_operation = 0;
SECTION("3D Tensor by 1D Tensor") { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array3D<float,2,2,3> { // generate 2 random Tensors
{ // handle dimensions, replace some dimensions with '1' to get broadcasting
{{0.84181279, 0.20655948, 0.09750116}, constexpr std::size_t nbDims = 4;
{0.37723488, 0.73120135, 0.04666907}}, std::vector<std::size_t> dims;
for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen));
}
std::vector<std::size_t> dims0 = dims;
std::vector<std::size_t> dims1 = dims;
std::vector<std::size_t> dimsOut = dims;
for (std::size_t i = 0; i < nbDims; ++i) {
if (boolDist(gen)) {
dims0[i] = 1;
}
if (boolDist(gen)) {
dims1[i] = 1;
}
dimsOut[i] = (dims0[i] == 1) ? dims1[i] : dims0[i];
}
{{0.91483921, 0.93985939, 0.58823180}, // create arrays and fill them with random values
{0.39963132, 0.67879969, 0.33209187}} float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
} float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]];
}); float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float,3>{
{0.04784805, 0.91903114, 0.38606840} for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) {
}); array0[i] = valueDist(gen);
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<float,2,2,3> { }
{ for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) {
{{0.79396474, -0.71247166, -0.28856725}, array1[i] = valueDist(gen);
{0.32938683, -0.18782979, -0.33939934}}, }
{{0.86699116, 0.02082825, 0.20216340}, // compute true result
{0.35178328, -0.24023145, -0.05397654}} const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1};
for (std::size_t a = 0; a < dimsOut[0]; ++a) {
for (std::size_t b = 0; b < dimsOut[1]; ++b) {
const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
+ strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0)
+ strides1[1] * ((dims1[1] > 1) ? b : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c) {
const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
for (std::size_t d = 0; d < dimsOut[3]; ++d) {
std::size_t idx0 = idx0_0
+ strides0[2] * ((dims0[2] > 1) ? c : 0)
+ ((dims0[3] > 1) ? d : 0);
std::size_t idx1 = idx1_0
+ strides1[2] * ((dims1[2] > 1) ? c : 0)
+ ((dims1[3] > 1) ? d : 0);
result[idx_out + d] = array0[idx0] - array1[idx1];
// std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " - " << array1[idx1] << " -> " << idx_out + d << std::endl;
}
}
}
}
// conversion to Aidge::Tensors
// input0
T0->resize(dims0);
T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
// input1
T1->resize(dims1);
T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]);
// results
Tres->resize(dimsOut);
Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
// compute result
op->computeOutputDims();
start = std::chrono::system_clock::now();
mySub->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
// comparison between truth and computed result
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
} }
}); std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
std::cout << "total time: " << duration.count() << "μs" << std::endl;
std::shared_ptr<Node> mySub = Sub();
auto op = std::static_pointer_cast<OperatorTensor>(mySub -> getOperator());
mySub->getOperator()->associateInput(0, input_1);
mySub->getOperator()->associateInput(1, input_2);
mySub->getOperator()->setDataType(DataType::Float32);
mySub->getOperator()->setBackend("cpu");
op->computeOutputDims();
mySub->forward();
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< 12; ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
} }
SECTION("+1-D Tensor / 1-D Tensor") {
std::size_t number_of_operation = 0;
std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(std::size_t(1), std::size_t(3));
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
// handle dimensions
constexpr std::size_t nbDims = 4;
std::vector<std::size_t> dims0(4);
for (std::size_t i = 0; i < nbDims; ++i) {
dims0[i] = dimSizeDist(gen);
}
std::vector<std::size_t> dimsOut = dims0;
std::vector<std::size_t> dims1 = dims0;
for (std::size_t i = 0; i < nbDims; ++i) {
if (boolDist(gen)) {
dims1[i] = 1;
}
}
dims1.erase(dims1.cbegin(), dims1.cbegin() + nbRemovedDimsDist(gen));
// create arrays and fill them with random values
float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
std::size_t array1_size = std::accumulate(dims1.cbegin(), dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
float* array1 = new float[array1_size];
float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
for (std::size_t i = 0; i < (dims0[0]*dims0[1]*dims0[2]*dims0[3]); ++i) {
array0[i] = valueDist(gen);
}
for (std::size_t i = 0; i < array1_size; ++i) {
array1[i] = valueDist(gen);
}
// compute true result
auto dims1_tmp = dims1;
dims1_tmp.insert(dims1_tmp.cbegin(), 4 - dims1_tmp.size(), std::size_t(1));
const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
const std::size_t strides1[nbDims] = {dims1_tmp[1]*dims1_tmp[2]*dims1_tmp[3], dims1_tmp[2]*dims1_tmp[3], dims1_tmp[3], 1};
for (std::size_t a = 0; a < dimsOut[0]; ++a) {
for (std::size_t b = 0; b < dimsOut[1]; ++b) {
const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
+ strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t idx1_0 = strides1[0] * ((dims1_tmp[0] > 1) ? a : 0)
+ strides1[1] * ((dims1_tmp[1] > 1) ? b : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c) {
const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
for (std::size_t d = 0; d < dimsOut[3]; ++d) {
std::size_t idx0 = idx0_0
+ strides0[2] * ((dims0[2] > 1) ? c : 0)
+ ((dims0[3] > 1) ? d : 0);
std::size_t idx1 = idx1_0
+ strides1[2] * ((dims1_tmp[2] > 1) ? c : 0)
+ ((dims1_tmp[3] > 1) ? d : 0);
result[idx_out + d] = array0[idx0] - array1[idx1];
// std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " - " << array1[idx1] << " -> " << idx_out + d << std::endl;
}
}
}
}
// conversion to Aidge::Tensors
// input0
T0->resize(dims0);
T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
// input1
T1->resize(dims1);
T1 -> getImpl() -> setRawPtr(array1, array1_size);
// results
Tres->resize(dimsOut);
Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
// compute result
op->computeOutputDims();
start = std::chrono::system_clock::now();
mySub->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
// comparison between truth and computed result
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
}
std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
std::cout << "total time: " << duration.count() << "μs" << std::endl;
}
} }
} }
\ No newline at end of file } // namespace Aidge
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
#include <catch2/catch_test_macros.hpp> #include <catch2/catch_test_macros.hpp>
#include "aidge/recipies/Recipies.hpp" #include "aidge/recipes/Recipes.hpp"
#include "aidge/operator/Conv.hpp" #include "aidge/operator/Conv.hpp"
#include "aidge/operator/Producer.hpp" #include "aidge/operator/Producer.hpp"
#include "aidge/graph/OpArgs.hpp" #include "aidge/graph/OpArgs.hpp"
......
...@@ -18,14 +18,14 @@ ...@@ -18,14 +18,14 @@
#include "aidge/operator/Conv.hpp" #include "aidge/operator/Conv.hpp"
#include "aidge/operator/BatchNorm.hpp" #include "aidge/operator/BatchNorm.hpp"
#include "aidge/operator/Producer.hpp" #include "aidge/operator/Producer.hpp"
#include "aidge/recipies/Recipies.hpp" #include "aidge/recipes/Recipes.hpp"
#include "aidge/scheduler/Scheduler.hpp" #include "aidge/scheduler/Scheduler.hpp"
#include "aidge/data/Tensor.hpp" #include "aidge/data/Tensor.hpp"
namespace Aidge { namespace Aidge {
TEST_CASE("[core/recipies] FuseBatchNorm", "[recipies][FuseBatchNorm]") { TEST_CASE("[core/recipes] FuseBatchNorm", "[recipes][FuseBatchNorm]") {
auto myProd = Producer({2, 3, 3, 3}, "dataProvider"); auto myProd = Producer({2, 3, 3, 3}, "dataProvider");
auto myConv = Conv(3, 3, {1, 1}, "conv1"); auto myConv = Conv(3, 3, {1, 1}, "conv1");
auto myBN = BatchNorm<2>(32, 1.0e-5F, 0.1F, "batchnorm1"); auto myBN = BatchNorm<2>(32, 1.0e-5F, 0.1F, "batchnorm1");
...@@ -86,14 +86,11 @@ TEST_CASE("[core/recipies] FuseBatchNorm", "[recipies][FuseBatchNorm]") { ...@@ -86,14 +86,11 @@ TEST_CASE("[core/recipies] FuseBatchNorm", "[recipies][FuseBatchNorm]") {
myBNOp -> setInput(4, std::make_shared<Tensor>(Array1D<float,3> {{0.4470, 0.3064, 0.7061}})); myBNOp -> setInput(4, std::make_shared<Tensor>(Array1D<float,3> {{0.4470, 0.3064, 0.7061}}));
auto g1 = Sequential({ auto g1 = Sequential({
myProd,
myConv, myConv,
myBN myBN
}); });
g1 -> setName("fuseBNGraph"); g1 -> setName("fuseBNGraph");
myProd -> addChild(myConv); // set graph input
myProdOp -> setDataType(DataType::Float32);
myProdOp -> setBackend("cpu");
g1 -> compile("cpu", DataType::Float32); g1 -> compile("cpu", DataType::Float32);
auto s = SequentialScheduler(g1); auto s = SequentialScheduler(g1);
...@@ -107,7 +104,7 @@ TEST_CASE("[core/recipies] FuseBatchNorm", "[recipies][FuseBatchNorm]") { ...@@ -107,7 +104,7 @@ TEST_CASE("[core/recipies] FuseBatchNorm", "[recipies][FuseBatchNorm]") {
std::shared_ptr<Tensor> res2 = std::make_shared<Tensor>(*(myConvOp -> getOutput(0))); std::shared_ptr<Tensor> res2 = std::make_shared<Tensor>(*(myConvOp -> getOutput(0)));
REQUIRE(g1 -> outputNodes().size() == 1); REQUIRE(g1 -> outputNodes().size() == 1);
REQUIRE(g1 -> inputNodes().size() == 1); REQUIRE(g1 -> inputNodes().size() == 0);
bool eq = true; bool eq = true;
for (std::size_t i = 0; i < res1->size(); ++i) { for (std::size_t i = 0; i < res1->size(); ++i) {
eq &= std::abs(res1->get<float>(i) - res2->get<float>(i)) < 1.0e-06; eq &= std::abs(res1->get<float>(i) - res2->get<float>(i)) < 1.0e-06;
......
...@@ -16,14 +16,14 @@ ...@@ -16,14 +16,14 @@
#include "aidge/graph/OpArgs.hpp" #include "aidge/graph/OpArgs.hpp"
#include "aidge/operator/Conv.hpp" #include "aidge/operator/Conv.hpp"
#include "aidge/operator/ReLU.hpp" #include "aidge/operator/ReLU.hpp"
#include "aidge/recipies/Recipies.hpp" #include "aidge/recipes/Recipes.hpp"
#include "aidge/scheduler/Scheduler.hpp" #include "aidge/scheduler/Scheduler.hpp"
#include "aidge/operator/Concat.hpp" #include "aidge/operator/Concat.hpp"
namespace Aidge { namespace Aidge {
TEST_CASE("[core/recipies] Tiling(transformation)", "[Tiling][Recipies]") { TEST_CASE("[core/recipes] Tiling(transformation)", "[Tiling][Recipes]") {
SECTION("Transform a pre-generated GraphView") { SECTION("Transform a pre-generated GraphView") {
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#include "aidge/graph/GraphView.hpp" #include "aidge/graph/GraphView.hpp"
#include "aidge/graph/OpArgs.hpp" #include "aidge/graph/OpArgs.hpp"
#include "aidge/scheduler/Scheduler.hpp" #include "aidge/scheduler/Scheduler.hpp"
#include "aidge/recipies/Recipies.hpp" #include "aidge/recipes/Recipes.hpp"
#include "aidge/backend/cpu.hpp" #include "aidge/backend/cpu.hpp"
......
...@@ -205,5 +205,144 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") { ...@@ -205,5 +205,144 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") {
SECTION("Test Residual graph") { SECTION("Test Residual graph") {
} }
SECTION("Test Recurrent graph") {} SECTION("Test Recurrent graph") {
std::shared_ptr<Tensor> in = std::make_shared<Tensor>(
Array2D<int, 2, 3>{{{1, 2, 3}, {4, 5, 6}}});
std::shared_ptr<Tensor> initTensor = std::make_shared<Tensor>(
Array2D<int, 2, 3>{{{0, 0, 0}, {1, 1, 1}}});
std::shared_ptr<Tensor> biasTensor = std::make_shared<Tensor>(
Array2D<int, 2, 3>{{{2, 0, 0}, {1, 0, 0}}});
auto add1 = Add(2, "add1");
auto mem = Memorize(3, "mem1");
auto add2 = Add(2, "add2");
auto bias = Producer(biasTensor, "bias");
auto init = Producer(initTensor, "init");
auto input = Producer(in, "input");
std::shared_ptr<GraphView> g = Sequential({add1, mem, add2});
init->addChild(mem, 0, 1);
mem->addChild(add1, 1, 1);
bias->addChild(add2, 0, 1);
input->addChild(add1, 0, 0);
// Update GraphView inputs/outputs following previous connections:
g->add({mem, add1, add2, init, bias, input});
g->setBackend("cpu");
g->setDataType(Aidge::DataType::Int32);
g->save("graphRecurrent");
g->forwardDims();
SequentialScheduler scheduler(g);
REQUIRE_NOTHROW(scheduler.forward(true, true));
scheduler.saveSchedulingDiagram("schedulingRecurrent");
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
Array2D<int, 2, 3>{{{5, 6, 9}, {14, 16, 19}}});
std::shared_ptr<Tensor> result =
std::static_pointer_cast<Tensor>(g->getNode("add2")->getOperator()->getRawOutput(0));
result->print();
expectedOutput->print();
bool equal = (*result == *expectedOutput);
REQUIRE(equal);
}
SECTION("Test ConnectInput graph") {
std::shared_ptr<GraphView> g =
Sequential({
Conv(1, 3, {3, 3}, "conv1"),
Conv(3, 4, {1, 1}, "conv2"),
Conv(4, 3, {1, 1}, "conv3"),
FC(27, 5, false, "fc")});
// g->getNode("conv1")->getOperator()->setInput(0, inputTensor);
g->getNode("conv1")->getOperator()->setInput(1, weight1);
g->getNode("conv1")->getOperator()->setInput(2, bias1);
std::shared_ptr<Tensor> weight2 =
std::make_shared<Tensor>(Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}},
{{{4}}, {{5}}, {{6}}},
{{{7}}, {{8}}, {{9}}},
{{{10}}, {{11}}, {{12}}}}});
std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}});
g->getNode("conv2")->getOperator()->setInput(1, weight2);
g->getNode("conv2")->getOperator()->setInput(2, bias2);
// *(g->getNode("conv2")->getOperator()->input(1, weight2);
std::shared_ptr<Tensor> weight3 = std::make_shared<Tensor>(
Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}},
{{{5}}, {{6}}, {{7}}, {{8}}},
{{{9}}, {{10}}, {{11}}, {{12}}}}});
std::shared_ptr<Tensor> bias3 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
g->getNode("conv3")->getOperator()->setInput(1, weight3);
g->getNode("conv3")->getOperator()->setInput(2, bias3);
std::shared_ptr<Tensor> weightfc = std::make_shared<Tensor>(
Array2D<int, 5, 27>{{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9},
{10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6},
{7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5,
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3},
{4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}});
std::shared_ptr<Tensor> biasfc = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
g->getNode("fc")->getOperator()->setInput(1, weightfc);
g->getNode("fc")->getOperator()->setInput(2, biasfc);
// input->addChild(g);
g->setDataType(Aidge::DataType::Int32);
g->setBackend("cpu");
std::vector<std::vector<Aidge::DimSize_t>> dims = {inputTensor->dims()};
g->forwardDims(dims);
SequentialScheduler scheduler(g);
std::vector<std::shared_ptr<Aidge::Tensor>> dataIn = {inputTensor};
REQUIRE_NOTHROW(scheduler.forward(true, false, dataIn));
scheduler.saveSchedulingDiagram("schedulingSequential");
std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{
{{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}},
{{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}},
{{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}},
{{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}},
{{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}},
{{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}});
std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(Array4D<int, 2, 4, 3, 3>{
{{{{6099, 7017, 7935}, {10689, 11607, 12525}, {15279, 16197, 17115}},
{{13786, 15838, 17890}, {24046, 26098, 28150}, {34306, 36358, 38410}},
{{21473, 24659, 27845}, {37403, 40589, 43775}, {53333, 56519, 59705}},
{{29160, 33480, 37800}, {50760, 55080, 59400}, {72360, 76680, 81000}}},
{{{29049, 29967, 30885}, {33639, 34557, 35475}, {38229, 39147, 40065}},
{{65086, 67138, 69190}, {75346, 77398, 79450}, {85606, 87658, 89710}},
{{101123, 104309, 107495}, {117053, 120239, 123425}, {132983, 136169, 139355}},
{{137160, 141480, 145800}, {158760, 163080, 167400}, {180360, 184680, 189000}}}}});
std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{
{{{{214731, 246591, 278451}, {374031, 405891, 437751}, {533331, 565191, 597051}},
{{496804, 570568, 644332}, {865624, 939388, 1013152}, {1234444, 1308208, 1381972}},
{{778877, 894545, 1010213}, {1357217, 1472885, 1588553}, {1935557, 2051225, 2166893}}},
{{{1011231, 1043091, 1074951}, {1170531, 1202391, 1234251}, {1329831, 1361691, 1393551}},
{{2340904, 2414668, 2488432}, {2709724, 2783488, 2857252}, {3078544, 3152308, 3226072}},
{{3670577, 3786245, 3901913}, {4248917, 4364585, 4480253}, {4827257, 4942925, 5058593}}}}});
Tensor expectedOutput4 = Array2D<int, 2, 5>{
{{205050376, 198925904, 181355097, 196978090, 238868348},
{598467376, 561797804, 560823897, 593043790, 698672948}}};
std::shared_ptr<Tensor> other1 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv1")->getOperator())->getOutput(0);
bool equal1 = (*other1 == *expectedOutput1);
REQUIRE(equal1);
std::shared_ptr<Tensor> other2 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv2")->getOperator())->getOutput(0);
bool equal2 = (*other2 == *expectedOutput2);
REQUIRE(equal2);
std::shared_ptr<Tensor> other3 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv3")->getOperator())->getOutput(0);
bool equal3 = (*other3 == *expectedOutput3);
REQUIRE(equal3);
std::shared_ptr<Tensor> other4 = std::static_pointer_cast<OperatorTensor>(g->getNode("fc")->getOperator())->getOutput(0);
bool equal4 = (*other4 == expectedOutput4);
REQUIRE(equal4);
}
} }
\ No newline at end of file