Skip to content
Snippets Groups Projects
Commit 30d2af82 authored by Lucas Lopez's avatar Lucas Lopez
Browse files
parents 35e720e9 9d427c11
No related branches found
No related tags found
No related merge requests found
Pipeline #65939 failed
......@@ -69,6 +69,8 @@ file(GLOB_RECURSE inc_files "include/*.hpp")
add_library(${module_name} ${src_files} ${inc_files})
target_link_libraries(${module_name}
PRIVATE
fmt::fmt
PUBLIC
_aidge_core # _ is added because we link the exported target and not the project
)
......
......@@ -34,6 +34,7 @@ using MulImpl_cpu = OperatorImpl_cpu<Mul_Op,
const std::size_t,
const std::vector<std::size_t>,
const std::vector<std::size_t>,
const std::vector<std::size_t>,
const void*,
const void*,
const void*,
......
......@@ -149,61 +149,53 @@ void MulImpl_cpu_forward_kernel(std::vector<std::size_t> dims0,
template <class I1, class I2, class O>
void MulImpl_cpu_backward_kernel(const std::size_t input0Length,
const std::size_t input1Length,
const std::size_t grad0Length,
const std::vector<std::size_t> input0Dims,
const std::vector<std::size_t> input1Dims,
const void* input0_,
const void* input1_,
const void* grad_output_,
void* gradientInput0,
void* gradientInput1)
const std::size_t input1Length,
const std::size_t gradOutputLength,
const std::vector<std::size_t>& dims0,
const std::vector<std::size_t>& dims1,
const std::vector<std::size_t>& outputDims,
const void* input0_,
const void* input1_,
const void* grad_output_,
void* gradientInput0_,
void* gradientInput1_)
{
const auto* input0 = static_cast<const I1*>(input0_);
const auto* input1 = static_cast<const I1*>(input1_);
const auto* grad_output = static_cast<const O*>(grad_output_);
auto* grad_input_0 = static_cast<I1*>(gradientInput0);
auto* grad_input_1 = static_cast<I2*>(gradientInput1);
if(input0Dims.size() >= input1Dims.size())
{
AIDGE_ASSERT(input0Length == grad0Length, "Incorrect dimensions between Mul input and output tensors");
for(auto i = 0U; i < input0Length; ++i)
{
const auto indices = getMultiDimIndices(input1Dims, i);
const auto flattenedIndex = getFlattenedIndex(input1Dims, indices);
grad_input_0[i] = input1[flattenedIndex] * grad_output[i];
const I1* input0 = static_cast<const I1*>(input0_);
const I2* input1 = static_cast<const I2*>(input1_);
const O* grad_output = static_cast<const O*>(grad_output_);
auto* grad_input_0 = static_cast<I1*>(gradientInput0_);
auto* grad_input_1 = static_cast<I2*>(gradientInput1_);
std::fill_n(grad_input_0, input0Length, static_cast<I1>(0));
std::fill_n(grad_input_1, input1Length, static_cast<I2>(0));
// Broadcast dims0 and dims1 to match the shape of outputDims
auto broadcastedDims0 = getBroadcastedDims(outputDims, dims0);
auto broadcastedDims1 = getBroadcastedDims(outputDims, dims1);
for (std::size_t i = 0; i < gradOutputLength; ++i) {
auto idxOutputGrad = getMultiDimIndices(outputDims, i);
std::vector<std::size_t> idxInput0(broadcastedDims0.size());
std::vector<std::size_t> idxInput1(broadcastedDims1.size());
// Map output indices to input0 indices, considering broadcasting
for (std::size_t dimension = 0; dimension < broadcastedDims0.size(); ++dimension) {
// If input0 is broadcasted along this dimension (== 1) or both dimensions are 1, index is 0.
// idxInput0 represent the multi dim index of input0 contributing
// to the output at index i.
idxInput0[dimension] = (broadcastedDims0[dimension] == 1) ? 0 : idxOutputGrad[dimension];
}
for(std::size_t i = 0 ; i < grad0Length; ++i)
{
const auto indices = getMultiDimIndices(input1Dims, i);
const auto flattenedIndex = getFlattenedIndex(input1Dims, indices);
grad_input_1[flattenedIndex] += input0[i] * grad_output[i];
for (std::size_t dimension = 0; dimension < broadcastedDims1.size(); ++dimension) {
idxInput1[dimension] = (broadcastedDims1[dimension] == 1) ? 0 : idxOutputGrad[dimension];
}
} else {
AIDGE_ASSERT(input1Length == grad0Length, "Incorrect dimensions between Mul input and output tensors");
// We have to access tensors with a flat index, hence the conversion
auto idx0 = getFlattenedIndex(broadcastedDims0, idxInput0);
auto idx1 = getFlattenedIndex(broadcastedDims1, idxInput1);
for(auto i = 0U; i < input1Length; ++i)
{
const auto indices = getMultiDimIndices(input0Dims, i);
const auto flattenedIndex = getFlattenedIndex(input0Dims, indices);
grad_input_1[i] = input0[flattenedIndex] * grad_output[i];
}
for(std::size_t i = 0 ; i < grad0Length; ++i)
{
const auto indices = getMultiDimIndices(input0Dims, i);
const auto flattenedIndex = getFlattenedIndex(input0Dims, indices);
grad_input_0[flattenedIndex] += input1[i] * grad_output[i];
}
grad_input_0[idx0] += static_cast<I1>(grad_output[i] * input1[idx1]);
grad_input_1[idx1] += static_cast<I2>(grad_output[i] * input0[idx0]);
}
}
......
......@@ -58,6 +58,7 @@ void Aidge::MulImpl_cpu::backward() {
/* grad0Length */ out0grad->size(),
/* input0Dims */ in0->dims(),
/* input1Dims */ in1->dims(),
out0grad->dims(),
getCPUPtr(in0),
getCPUPtr(in1),
getCPUPtr(out0grad),
......
......@@ -100,7 +100,7 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") {
}); //
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{100,200}});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
Tensor expectedOutput = Array4D<int,3,3,3,2> {
{ //
{ //
{{ 120, 222},{ 124, 226},{ 128, 230}}, //
......@@ -118,7 +118,7 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") {
{{ 144, 246},{ 148, 250},{152, 254}} //
} //
} //
}); //
}; //
std::shared_ptr<Node> myAdd_0 = Add();
std::shared_ptr<Node> myAdd_1 = Add();
......@@ -135,8 +135,8 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") {
op_1->setBackend("cpu");
myAdd_0->forward();
myAdd_1->forward();
op_1->getOutput(0)->print();
expectedOutput->print();
REQUIRE(*op_1->getOutput(0) == *expectedOutput);
Log::info("Add_1 Tensor:\n{}", *(op_1->getOutput(0)));
Log::info("Expected Add_1 Tensor:\n{}", expectedOutput);
REQUIRE(*op_1->getOutput(0) == expectedOutput);
}
}
\ No newline at end of file
......@@ -9,6 +9,7 @@
*
********************************************************************************/
<<<<<<< HEAD
#include <chrono> // std::micro, std::chrono::time_point,
// std::chrono::system_clock,
#include <cstddef> // std::size_t
......@@ -22,6 +23,16 @@
#include <catch2/catch_test_macros.hpp>
#include <fmt/core.h>
=======
#include <catch2/catch_test_macros.hpp>
#include <chrono>
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <iostream>
#include <memory>
#include <numeric> // std::accumulate
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
>>>>>>> 9d427c111c3c37ad141cd12a30c851f7fdf6a8fe
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/MulImpl.hpp"
......@@ -34,6 +45,7 @@
namespace Aidge {
<<<<<<< HEAD
TEST_CASE("[CPU/Operator] Mul Backward", "[Mul][CPU][Backward]")
{
using aif32 = cpptype_t<DataType::Float32>;
......@@ -339,35 +351,368 @@ TEST_CASE("[CPU/Operator] Mul Backward", "[Mul][CPU][Backward]")
REQUIRE(approxEq<aif32>(*(op->getInput(0)->grad()), expectedGrad0));
REQUIRE(approxEq<aif32>(*(op->getInput(1)->grad()), expectedGrad1));
=======
TEST_CASE("[CPU/Operator] Mul(Backward)", "[Mul][CPU][Backward]") {
std::shared_ptr<Node> myMul = Mul();
auto op = std::static_pointer_cast<OperatorTensor>(myMul->getOperator());
op->setDataType(DataType::Float32);
op->setBackend("cpu");
// NOTE: The first four tests use fixed values, the last one uses random values but static dimensions.
SECTION("Case 1: 1D and 2D Tensors") {
const auto T0 = std::make_shared<Tensor>(
Array2D<float, 2, 3>({{{1, 2, 3}, {4, 5, 6}}}));
const auto T1 =
std::make_shared<Tensor>(Array1D<float, 3>({0.1, 0.2, 0.3}));
float *input0 = static_cast<float *>(T0->getImpl()->rawPtr());
float *input1 = static_cast<float *>(T1->getImpl()->rawPtr());
// TODO Use
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(std::make_shared<Tensor>(
Array2D<float, 2, 3>({{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}})));
op->forwardDims();
myMul->backward();
const auto expectedGrad0 = std::make_shared<Tensor>(
Array2D<float, 2, 3>({{{0.1, 0.2, 0.3}, {0.1, 0.2, 0.3}}}));
const auto expectedGrad1 =
std::make_shared<Tensor>(Array1D<float, 3>({5, 7, 9}));
REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0));
REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1));
}
SECTION("Case 2: 3D and 1D tensors") {
const auto T0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
{{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}},
{{7.0, 8.0, 9.0}, {10.0, 11.0, 12.0}}}}));
const auto T1 =
std::make_shared<Tensor>(Array1D<float, 3>({0.3, 0.2, 0.1}));
const auto newGrad = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
{{{{1, 1, 1}, {1, 1, 1}}, {{1, 1, 1}, {1, 1, 1}}}}));
const auto expectedGrad0 = std::make_shared<Tensor>(
Array3D<float, 2, 2, 3>({{{{0.3, 0.2, 0.1}, {0.3, 0.2, 0.1}},
{{0.3, 0.2, 0.1}, {0.3, 0.2, 0.1}}}}));
const auto expectedGrad1 =
std::make_shared<Tensor>(Array1D<float, 3>({22.0, 26.0, 30.0}));
for (auto T : {T0, T1, newGrad, expectedGrad0, expectedGrad1}) {
T->setBackend("cpu");
T->setDataType(DataType::Float32);
}
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(newGrad);
op->forwardDims();
myMul->backward();
REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0));
REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1));
}
SECTION("Case 3: 4D and 2D tensors") {
const auto T0 = std::make_shared<Tensor>(Array4D<float, 2, 2, 3, 3>(
{{{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}, {7.0, 8.0, 9.0}},
{{10.0, 11.0, 12.0}, {13.0, 14.0, 15.0}, {16.0, 17.0, 18.0}}},
{{{19.0, 20.0, 21.0}, {22.0, 23.0, 24.0}, {25.0, 26.0, 27.0}},
{{28.0, 29.0, 30.0},
{31.0, 32.0, 33.0},
{34.0, 35.0, 36.0}}}}}));
const auto T1 = std::make_shared<Tensor>(Array2D<float, 3, 3>(
{{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}}));
const auto newGrad =
std::make_shared<Tensor>(Array4D<float, 2, 2, 3, 3>(
{{{{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}},
{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}},
{{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}},
{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}}}}));
const auto expectedGrad0 =
std::make_shared<Tensor>(Array4D<float, 2, 2, 3, 3>(
{{{{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}},
{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}},
{{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}},
{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}}}}));
const auto expectedGrad1 = std::make_shared<Tensor>(
Array2D<float, 3, 3>({{{58.0, 62.0, 66.0},
{70.0, 74.0, 78.0},
{82.0, 86.0, 90.0}}}));
for (const auto T : {T0, T1, newGrad, expectedGrad0, expectedGrad1}) {
T->setBackend("cpu");
T->setDataType(DataType::Float32);
}
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(newGrad);
op->forwardDims();
myMul->backward();
REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0));
REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1));
}
SECTION("Case 4: 3D and 2D tensors") {
const auto T0 = std::make_shared<Tensor>(
Array3D<float, 2, 3, 4>({{{
{1.0, 2.0, 3.0, 4.0},
{5.0, 6.0, 7.0, 8.0},
{9.0, 10.0, 11.0, 12.0},
},
{
{13.0, 14.0, 15.0, 16.0},
{17.0, 18.0, 19.0, 20.0},
{21.0, 22.0, 23.0, 24.0},
}}}));
const auto T1 = std::make_shared<Tensor>(
Array2D<float, 3, 4>({{{0.1, 0.2, 0.3, 0.4},
{0.5, 0.6, 0.7, 0.8},
{0.9, 1.0, 1.1, 1.2}}}));
const auto newGrad = std::make_shared<Tensor>(
Array3D<float, 2, 3, 4>({{{
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
},
{
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
}}}));
const auto expectedGrad0 = std::make_shared<Tensor>(
Array3D<float, 2, 3, 4>({{{{0.1, 0.2, 0.3, 0.4},
{0.5, 0.6, 0.7, 0.8},
{0.9, 1.0, 1.1, 1.2}},
{{0.1, 0.2, 0.3, 0.4},
{0.5, 0.6, 0.7, 0.8},
{0.9, 1.0, 1.1, 1.2}}}}));
const auto expectedGrad1 = std::make_shared<Tensor>(
Array2D<float, 3, 4>({{{14.0, 16.0, 18.0, 20.0},
{22.0, 24.0, 26.0, 28.0},
{30.0, 32.0, 34.0, 36.0}}}));
for (const auto T : {T0, T1, newGrad, expectedGrad0, expectedGrad1}) {
T->setBackend("cpu");
T->setDataType(DataType::Float32);
}
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(newGrad);
op->forwardDims();
myMul->backward();
REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0));
REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1));
}
SECTION("Case 5: Tensors with random values") {
// Use random values
std::vector<std::size_t> dims0 = {5, 2, 1, 7}; // First tensor
std::vector<std::size_t> dims1 = {2, 6, 7}; // Second tensor
std::vector<std::size_t> outputDims = {5, 2, 6, 7};
const auto input0Size = 5 * 2 * 1 * 7;
const auto input1Size = 2 * 6 * 7;
const auto outputSize = 5 * 2 * 6 * 7;
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> dist(0.1f, 1.0f);
std::vector<float> input0Data(input0Size);
std::vector<float> input1Data(input1Size);
// Fill with random values
for (auto &val : input0Data) {
val = dist(gen);
}
for (auto &val : input1Data) {
val = dist(gen);
}
auto T0 = std::make_shared<Tensor>();
auto T1 = std::make_shared<Tensor>();
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
T0->resize(dims0);
T0->getImpl()->setRawPtr(input0Data.data(), input0Size);
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
T1->resize(dims1);
T1->getImpl()->setRawPtr(input1Data.data(), input1Size);
op->associateInput(0, T0);
op->associateInput(1, T1);
op->forwardDims();
myMul->forward();
std::vector<float> expectedOutput(outputSize);
for (std::size_t n = 0; n < 5; ++n) {
for (std::size_t c = 0; c < 2; ++c) {
for (std::size_t h = 0; h < 6; ++h) {
for (std::size_t w = 0; w < 7; ++w) {
std::size_t outIdx = w + 7 * (h + 6 * (c + 2 * n));
std::size_t in0Idx =
w + 7 * (0 + 1 * (c + 2 * n)); // middle dim is 1
std::size_t in1Idx =
w + 7 * (h + 6 * c); // no n dimension
expectedOutput[outIdx] =
input0Data[in0Idx] * input1Data[in1Idx];
}
}
}
}
auto outputTensor = op->getOutput(0);
// Verify forward pass
auto expectedOutputTensor = std::make_shared<Tensor>();
expectedOutputTensor->resize(outputDims);
expectedOutputTensor->setBackend("cpu");
expectedOutputTensor->setDataType(DataType::Float32);
expectedOutputTensor->getImpl()->setRawPtr(expectedOutput.data(),
expectedOutput.size());
REQUIRE(approxEq<float>(*outputTensor, *expectedOutputTensor));
// Backward pass
std::vector<float> gradOutputData(outputSize);
for (auto &val : gradOutputData) {
val = dist(gen);
}
op->getOutput(0)->setGrad(std::make_shared<Tensor>());
op->getOutput(0)->grad()->resize(outputDims);
op->getOutput(0)->grad()->getImpl()->setRawPtr(gradOutputData.data(),
outputSize);
// Compute reference gradients
std::vector<float> expectedGrad0(input0Size, 0.0f);
std::vector<float> expectedGrad1(input1Size, 0.0f);
for (std::size_t n = 0; n < 5; ++n) {
for (std::size_t c = 0; c < 2; ++c) {
for (std::size_t h = 0; h < 6; ++h) {
for (std::size_t w = 0; w < 7; ++w) {
std::size_t outIdx = w + 7 * (h + 6 * (c + 2 * n));
std::size_t in0Idx = w + 7 * (0 + 1 * (c + 2 * n));
std::size_t in1Idx = w + 7 * (h + 6 * c);
// Gradient for input0: grad_output * input1
expectedGrad0[in0Idx] +=
gradOutputData[outIdx] * input1Data[in1Idx];
// Gradient for input1: grad_output * input0
expectedGrad1[in1Idx] +=
gradOutputData[outIdx] * input0Data[in0Idx];
}
}
}
}
// Perform backward pass
myMul->backward();
auto expectedGrad0Tensor = std::make_shared<Tensor>();
expectedGrad0Tensor->resize(T0->dims());
expectedGrad0Tensor->setBackend("cpu");
expectedGrad0Tensor->setDataType(DataType::Float32);
expectedGrad0Tensor->getImpl()->setRawPtr(expectedGrad0.data(),
expectedGrad0.size());
auto expectedGrad1Tensor = std::make_shared<Tensor>();
expectedGrad1Tensor->resize(T1->dims());
expectedGrad1Tensor->setBackend("cpu");
expectedGrad1Tensor->setDataType(DataType::Float32);
expectedGrad1Tensor->getImpl()->setRawPtr(expectedGrad1.data(),
expectedGrad1.size());
// Verify backward pass
REQUIRE(approxEq<float>(*T0->grad(), *expectedGrad0Tensor));
REQUIRE(approxEq<float>(*T1->grad(), *expectedGrad1Tensor));
// Optional: Print some values for verification
// std::cout << "Input shapes: (" << dims0[0] << "," << dims0[1] <<
// "," << dims0[2] << "," << dims0[3]
// << ") * (" << dims1[0] << "," << dims1[1] << "," <<
// dims1[2]
// << ") -> (" << outputDims[0] << "," << outputDims[1]
// << "," << outputDims[2] << "," << outputDims[3] <<
// ")\n";
// std::cout << "Input sizes: " << input0_size << " * " <<
// input1_size << " -> " << output_size << "\n";
>>>>>>> 9d427c111c3c37ad141cd12a30c851f7fdf6a8fe
}
}
TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
TEST_CASE("[cpu/operator] Mul(forward)", "[Mul][CPU]") {
constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(3));
std::uniform_int_distribution<int> boolDist(0,1);
std::uniform_real_distribution<float> valueDist(
0.1f,
1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2),
std::size_t(10));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1),
std::size_t(3));
std::uniform_int_distribution<int> boolDist(0, 1);
<<<<<<< HEAD
// Create MatMul Operator
std::shared_ptr<Mul_Op> op = std::make_shared<Mul_Op>();
=======
std::shared_ptr<Node> myMul = Mul();
auto op = std::static_pointer_cast<OperatorTensor>(myMul->getOperator());
>>>>>>> 9d427c111c3c37ad141cd12a30c851f7fdf6a8fe
op->setDataType(DataType::Float32);
op->setBackend("cpu");
// Create 2 input Tensors
std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
op->associateInput(0,T0);
op->associateInput(0, T0);
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
op -> associateInput(1,T1);
op->associateInput(1, T1);
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
// Create results Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>();
Tres->setDataType(DataType::Float32);
Tres->setBackend("cpu");
......@@ -377,14 +722,9 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration{};
SECTION("MulImpl_cpu::forward()") {
SECTION("Scalar / Scalar") {
}
SECTION("Scalar / +1-D Tensor") {
}
SECTION("Scalar / Scalar") {}
SECTION("Scalar / +1-D Tensor") {}
SECTION("+1-D Tensor / +1-D Tensor - same dimensions") {
std::size_t number_of_operation = 0;
......@@ -399,13 +739,17 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
dims.push_back(dimSizeDist(gen));
}
const auto nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
const auto nb_elements =
std::accumulate(dims.cbegin(),
dims.cend(),
std::size_t(1),
std::multiplies<std::size_t>());
number_of_operation += nb_elements;
// without broadcasting
float* array0 = new float[nb_elements];
float* array1 = new float[nb_elements];
float* result = new float[nb_elements];
float *array0 = new float[nb_elements];
float *array1 = new float[nb_elements];
float *result = new float[nb_elements];
for (std::size_t i = 0; i < nb_elements; ++i) {
array0[i] = valueDist(gen);
......@@ -415,21 +759,23 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
// input0
T0->resize(dims);
T0 -> getImpl() -> setRawPtr(array0, nb_elements);
T0->getImpl()->setRawPtr(array0, nb_elements);
// input1
T1->resize(dims);
T1 -> getImpl() -> setRawPtr(array1, nb_elements);
T1->getImpl()->setRawPtr(array1, nb_elements);
// results
Tres->resize(dims);
Tres -> getImpl() -> setRawPtr(result, nb_elements);
Tres->getImpl()->setRawPtr(result, nb_elements);
op->forwardDims();
start = std::chrono::system_clock::now();
op->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
duration +=
std::chrono::duration_cast<std::chrono::microseconds>(
end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
......@@ -437,24 +783,30 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
delete[] array1;
delete[] result;
}
<<<<<<< HEAD
Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count()));
Log::info("total time: {} μs\n", duration.count());
=======
std::cout << "number of elements over time spent: "
<< (number_of_operation / duration.count()) << std::endl;
std::cout << "total time: " << duration.count() << "μs"
<< std::endl;
>>>>>>> 9d427c111c3c37ad141cd12a30c851f7fdf6a8fe
}
SECTION("+1-D Tensor / +1-D Tensor - broadcasting") {
std::size_t number_of_operation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
// handle dimensions, replace some dimensions with '1' to get broadcasting
// handle dimensions, replace some dimensions with '1' to get
// broadcasting
constexpr std::size_t nbDims = 4;
std::vector<std::size_t> dimensions;
for (std::size_t i = 0; i < nbDims; ++i)
{
for (std::size_t i = 0; i < nbDims; ++i) {
dimensions.push_back(dimSizeDist(gen));
}
......@@ -462,77 +814,90 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
auto dims1 = dimensions;
auto dimsOut = dimensions;
for (std::size_t i = 0; i < nbDims; ++i)
{
if (boolDist(gen))
{
for (std::size_t i = 0; i < nbDims; ++i) {
if (boolDist(gen)) {
dims0[i] = 1;
}
if (boolDist(gen))
{
if (boolDist(gen)) {
dims1[i] = 1;
}
dimsOut[i] = (dims0[i] == 1) ? dims1[i] : dims0[i];
}
for(auto dim : dims0)
{
for (auto dim : dims0) {
Log::info("Dimension of input 0 : {}", dim);
}
for(auto dim : dims1)
{
for (auto dim : dims1) {
Log::info("Dimension of input 1 : {}", dim);
}
// create arrays and fill them with random values
float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]];
float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i)
{
float *array0 =
new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]];
float *array1 =
new float[dims1[0] * dims1[1] * dims1[2] * dims1[3]];
float *result = new float[dimsOut[0] * dimsOut[1] *
dimsOut[2] * dimsOut[3]];
for (std::size_t i = 0;
i < dims0[0] * dims0[1] * dims0[2] * dims0[3];
++i) {
array0[i] = valueDist(gen);
}
for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i)
{
for (std::size_t i = 0;
i < dims1[0] * dims1[1] * dims1[2] * dims1[3];
++i) {
array1[i] = valueDist(gen);
}
// compute true result
const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1};
for (std::size_t a = 0; a < dimsOut[0]; ++a)
{
for (std::size_t b = 0; b < dimsOut[1]; ++b)
{
const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
+ strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t strides0[nbDims] = {
dims0[1] * dims0[2] * dims0[3],
dims0[2] * dims0[3],
dims0[3],
1};
const std::size_t strides1[nbDims] = {
dims1[1] * dims1[2] * dims1[3],
dims1[2] * dims1[3],
dims1[3],
1};
const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0)
+ strides1[1] * ((dims1[1] > 1) ? b : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c)
{
const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
for (std::size_t a = 0; a < dimsOut[0]; ++a) {
for (std::size_t b = 0; b < dimsOut[1]; ++b) {
const std::size_t idx0_0 =
strides0[0] * ((dims0[0] > 1) ? a : 0) +
strides0[1] * ((dims0[1] > 1) ? b : 0);
for (std::size_t d = 0; d < dimsOut[3]; ++d)
{
std::size_t idx0 = idx0_0
+ strides0[2] * ((dims0[2] > 1) ? c : 0)
+ ((dims0[3] > 1) ? d : 0);
const std::size_t idx1_0 =
strides1[0] * ((dims1[0] > 1) ? a : 0) +
strides1[1] * ((dims1[1] > 1) ? b : 0);
std::size_t idx1 = idx1_0
+ strides1[2] * ((dims1[2] > 1) ? c : 0)
+ ((dims1[3] > 1) ? d : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c) {
const std::size_t idx_out =
dimsOut[3] *
(c + dimsOut[2] * (b + dimsOut[1] * a));
result[idx_out + d] = array0[idx0] * array1[idx1];
// std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " * " << array1[idx1] << " -> " << idx_out + d << std::endl;
for (std::size_t d = 0; d < dimsOut[3]; ++d) {
std::size_t idx0 =
idx0_0 +
strides0[2] * ((dims0[2] > 1) ? c : 0) +
((dims0[3] > 1) ? d : 0);
std::size_t idx1 =
idx1_0 +
strides1[2] * ((dims1[2] > 1) ? c : 0) +
((dims1[3] > 1) ? d : 0);
result[idx_out + d] =
array0[idx0] * array1[idx1];
// std::cout << "(" << idx0 << ", " << idx1 <<
// ") -> " << array0[idx0] << " * " <<
// array1[idx1] << " -> " << idx_out + d <<
// std::endl;
}
}
}
......@@ -541,22 +906,30 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
// conversion to Aidge::Tensors
// input0
T0->resize(dims0);
T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
T0->getImpl()->setRawPtr(
array0,
dims0[0] * dims0[1] * dims0[2] * dims0[3]);
// input1
T1->resize(dims1);
T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]);
T1->getImpl()->setRawPtr(
array1,
dims1[0] * dims1[1] * dims1[2] * dims1[3]);
// results
Tres->resize(dimsOut);
Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
Tres->getImpl()->setRawPtr(
result,
dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]);
// compute result
op->forwardDims();
start = std::chrono::system_clock::now();
op->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
duration +=
std::chrono::duration_cast<std::chrono::microseconds>(
end - start);
// comparison between truth and computed result
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
......@@ -565,15 +938,28 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
delete[] array1;
delete[] result;
const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t nb_elements =
std::accumulate(dimsOut.cbegin(),
dimsOut.cend(),
std::size_t(1),
std::multiplies<std::size_t>());
number_of_operation += nb_elements;
}
<<<<<<< HEAD
Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count()));
Log::info("total time: {} μs\n", duration.count());
=======
std::cout << "number of elements over time spent: "
<< (number_of_operation / duration.count()) << std::endl;
std::cout << "total time: " << duration.count() << "μs"
<< std::endl;
>>>>>>> 9d427c111c3c37ad141cd12a30c851f7fdf6a8fe
}
SECTION("+1-D Tensor / 1-D Tensor") {
std::size_t number_of_operation = 0;
std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(std::size_t(1), std::size_t(3));
std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(
std::size_t(1),
std::size_t(3));
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
......@@ -590,15 +976,24 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
dims1[i] = 1;
}
}
dims1.erase(dims1.cbegin(), dims1.cbegin() + nbRemovedDimsDist(gen));
dims1.erase(dims1.cbegin(),
dims1.cbegin() + nbRemovedDimsDist(gen));
// create arrays and fill them with random values
float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
std::size_t array1_size = std::accumulate(dims1.cbegin(), dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
float* array1 = new float[array1_size];
float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
for (std::size_t i = 0; i < (dims0[0]*dims0[1]*dims0[2]*dims0[3]); ++i) {
float *array0 =
new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]];
std::size_t array1_size =
std::accumulate(dims1.cbegin(),
dims1.cend(),
std::size_t(1),
std::multiplies<std::size_t>());
float *array1 = new float[array1_size];
float *result = new float[dimsOut[0] * dimsOut[1] *
dimsOut[2] * dimsOut[3]];
for (std::size_t i = 0;
i < (dims0[0] * dims0[1] * dims0[2] * dims0[3]);
++i) {
array0[i] = valueDist(gen);
}
for (std::size_t i = 0; i < array1_size; ++i) {
......@@ -607,27 +1002,48 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
// compute true result
auto dims1_tmp = dims1;
dims1_tmp.insert(dims1_tmp.cbegin(), 4 - dims1_tmp.size(), std::size_t(1));
const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
const std::size_t strides1[nbDims] = {dims1_tmp[1]*dims1_tmp[2]*dims1_tmp[3], dims1_tmp[2]*dims1_tmp[3], dims1_tmp[3], 1};
dims1_tmp.insert(dims1_tmp.cbegin(),
4 - dims1_tmp.size(),
std::size_t(1));
const std::size_t strides0[nbDims] = {
dims0[1] * dims0[2] * dims0[3],
dims0[2] * dims0[3],
dims0[3],
1};
const std::size_t strides1[nbDims] = {
dims1_tmp[1] * dims1_tmp[2] * dims1_tmp[3],
dims1_tmp[2] * dims1_tmp[3],
dims1_tmp[3],
1};
for (std::size_t a = 0; a < dimsOut[0]; ++a) {
for (std::size_t b = 0; b < dimsOut[1]; ++b) {
const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
+ strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t idx1_0 = strides1[0] * ((dims1_tmp[0] > 1) ? a : 0)
+ strides1[1] * ((dims1_tmp[1] > 1) ? b : 0);
const std::size_t idx0_0 =
strides0[0] * ((dims0[0] > 1) ? a : 0) +
strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t idx1_0 =
strides1[0] * ((dims1_tmp[0] > 1) ? a : 0) +
strides1[1] * ((dims1_tmp[1] > 1) ? b : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c) {
const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
const std::size_t idx_out =
dimsOut[3] *
(c + dimsOut[2] * (b + dimsOut[1] * a));
for (std::size_t d = 0; d < dimsOut[3]; ++d) {
std::size_t idx0 = idx0_0
+ strides0[2] * ((dims0[2] > 1) ? c : 0)
+ ((dims0[3] > 1) ? d : 0);
std::size_t idx1 = idx1_0
+ strides1[2] * ((dims1_tmp[2] > 1) ? c : 0)
+ ((dims1_tmp[3] > 1) ? d : 0);
result[idx_out + d] = array0[idx0] * array1[idx1];
// std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " * " << array1[idx1] << " -> " << idx_out + d << std::endl;
std::size_t idx0 =
idx0_0 +
strides0[2] * ((dims0[2] > 1) ? c : 0) +
((dims0[3] > 1) ? d : 0);
std::size_t idx1 =
idx1_0 +
strides1[2] *
((dims1_tmp[2] > 1) ? c : 0) +
((dims1_tmp[3] > 1) ? d : 0);
result[idx_out + d] =
array0[idx0] * array1[idx1];
// std::cout << "(" << idx0 << ", " << idx1 <<
// ") -> " << array0[idx0] << " * " <<
// array1[idx1] << " -> " << idx_out + d <<
// std::endl;
}
}
}
......@@ -636,22 +1052,28 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
// conversion to Aidge::Tensors
// input0
T0->resize(dims0);
T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
T0->getImpl()->setRawPtr(
array0,
dims0[0] * dims0[1] * dims0[2] * dims0[3]);
// input1
T1->resize(dims1);
T1 -> getImpl() -> setRawPtr(array1, array1_size);
T1->getImpl()->setRawPtr(array1, array1_size);
// results
Tres->resize(dimsOut);
Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
Tres->getImpl()->setRawPtr(
result,
dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]);
// compute result
op->forwardDims();
start = std::chrono::system_clock::now();
op->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
duration +=
std::chrono::duration_cast<std::chrono::microseconds>(
end - start);
// comparison between truth and computed result
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
......@@ -660,13 +1082,25 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
delete[] array1;
delete[] result;
const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t nb_elements =
std::accumulate(dimsOut.cbegin(),
dimsOut.cend(),
std::size_t(1),
std::multiplies<std::size_t>());
number_of_operation += nb_elements;
}
<<<<<<< HEAD
Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count()));
Log::info("total time: {} μs\n", duration.count());
=======
std::cout << "number of elements over time spent: "
<< (number_of_operation / duration.count()) << std::endl;
std::cout << "total time: " << duration.count() << "μs"
<< std::endl;
>>>>>>> 9d427c111c3c37ad141cd12a30c851f7fdf6a8fe
}
}
}
} // namespace Aidge
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment