diff --git a/.gitignore b/.gitignore index 0e14676b900cb1418593019be70cc4d20aba2883..9877699f938bdbf94d0383b5b9db6f5f4cf1023e 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ # C++ Build build*/ install*/ +include/aidge/backend/cpu_version.h # VSCode .vscode diff --git a/CHANGELOG b/CHANGELOG index a461371a17b586e8ebc65172282153a6ae8e09e2..9153a9a2d4506bcd147c7cb1a114c6a3db18b268 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,5 @@ +# Verson 0.5.0 (January 31, 2025) + # Verson 0.4.0 (December 6, 2024) # Version 0.2.2 (May 14, 2024) diff --git a/CMakeLists.txt b/CMakeLists.txt index e9e191c36d5ad57a9a9dbed378154db6676ec796..66ef8ff28503a70de816d546b72e21d8528f0e33 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,17 +1,23 @@ cmake_minimum_required(VERSION 3.18) -set(CXX_STANDARD 14) + +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version) +# Parse version.txt to retrieve Major, Minor and Path +string(REGEX MATCH "([0-9]+\\.[0-9]+\\.[0-9]+)" _ MATCHES ${version}) +set(PROJECT_VERSION_MAJOR ${CMAKE_MATCH_1}) +set(PROJECT_VERSION_MINOR ${CMAKE_MATCH_2}) +set(PROJECT_VERSION_PATCH ${CMAKE_MATCH_3}) + project(aidge_backend_cpu VERSION ${version} DESCRIPTION "CPU implementations of the operators of aidge framework." LANGUAGES CXX) -message(STATUS "Project name: ${CMAKE_PROJECT_NAME}") -message(STATUS "Project version: ${version}") -add_definitions(-DPROJECT_VERSION="${version}") - +# Retrieve latest git commit execute_process( COMMAND git rev-parse --short HEAD WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} @@ -19,8 +25,10 @@ execute_process( OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET ) + +message(STATUS "Project name: ${CMAKE_PROJECT_NAME}") +message(STATUS "Project version: ${version}") message(STATUS "Latest git commit: ${GIT_COMMIT_HASH}") -add_definitions(-DGIT_COMMIT_HASH="${GIT_COMMIT_HASH}") # helper for LSP users set(CMAKE_EXPORT_COMPILE_COMMANDS ON) @@ -64,6 +72,8 @@ file(GLOB_RECURSE inc_files "include/*.hpp") add_library(${module_name} ${src_files} ${inc_files}) target_link_libraries(${module_name} + PRIVATE + fmt::fmt PUBLIC _aidge_core # _ is added because we link the exported target and not the project ) @@ -115,6 +125,13 @@ if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) append_coverage_compiler_flags() endif() +message(STATUS "Creating ${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/backend/cpu_version.h") +# Generate version.h file from config file version.h.in +configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/backend/version.h.in" + "${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/backend/cpu_version.h" +) + ############################################## # Installation instructions include(GNUInstallDirs) diff --git a/aidge_backend_cpu/__init__.py b/aidge_backend_cpu/__init__.py index a7fe1ea3abdea25b18af6e7e0a1958f01f928433..bb320b2fe436a3be81dde8d643728bd5a30942e7 100644 --- a/aidge_backend_cpu/__init__.py +++ b/aidge_backend_cpu/__init__.py @@ -1,3 +1,2 @@ import aidge_core from aidge_backend_cpu.aidge_backend_cpu import * # import so generated by PyBind -from ._version import * diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index caa75328e58f6c9581f81368a3981bb79a069d49..5db19a2b7a2f88dae13d8baf24cf95f961e730a0 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -12,6 +12,8 @@ #ifndef AIDGE_CPU_IMPORTS_H_ #define AIDGE_CPU_IMPORTS_H_ +#include "aidge/backend/cpu_version.h" + #include "aidge/backend/cpu/operator/AbsImpl.hpp" #include "aidge/backend/cpu/operator/AddImpl.hpp" #include "aidge/backend/cpu/operator/AndImpl.hpp" @@ -28,9 +30,11 @@ #include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp" #include "aidge/backend/cpu/operator/DivImpl.hpp" #include "aidge/backend/cpu/operator/ErfImpl.hpp" +#include "aidge/backend/cpu/operator/ExpandImpl.hpp" #include "aidge/backend/cpu/operator/FCImpl.hpp" #include "aidge/backend/cpu/operator/FoldImpl.hpp" #include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp" +#include "aidge/backend/cpu/operator/HeavisideImpl.hpp" #include "aidge/backend/cpu/operator/LRNImpl.hpp" #include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" #include "aidge/backend/cpu/operator/LnImpl.hpp" @@ -51,8 +55,8 @@ #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" #include "aidge/backend/cpu/operator/SubImpl.hpp" #include "aidge/backend/cpu/operator/TanhImpl.hpp" +#include "aidge/backend/cpu/operator/WeightInterleavedImpl.hpp" #include "aidge/backend/cpu/data/TensorImpl.hpp" #endif /* AIDGE_CPU_IMPORTS_H_ */ - diff --git a/include/aidge/backend/cpu/operator/ExpandImpl.hpp b/include/aidge/backend/cpu/operator/ExpandImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..adfc6ab1ef2e6550c6307fb93d0079ea3b5fc5a2 --- /dev/null +++ b/include/aidge/backend/cpu/operator/ExpandImpl.hpp @@ -0,0 +1,35 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_EXPANDIMPL_H_ +#define AIDGE_CPU_OPERATOR_EXPANDIMPL_H_ + +#include <memory> +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/Expand.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// Operator implementation entry point for the backend +using ExpandImpl_cpu = OperatorImpl_cpu<Expand_Op, + void(const std::shared_ptr<Tensor> &, + const std::shared_ptr<Tensor> &, + void *, + const std::vector<DimSize_t> &)>; + +// Implementation entry point registration to Operator +REGISTRAR(Expand_Op, "cpu", Aidge::ExpandImpl_cpu::create); +} // namespace Aidge + +#endif /* _AIDGE_CPU_OPERATOR_EXPANDIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ExpandImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ExpandImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3f4341c333ffd4032a90b8ee5d50a8475e81253e --- /dev/null +++ b/include/aidge/backend/cpu/operator/ExpandImpl_kernels.hpp @@ -0,0 +1,215 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_EXPANDIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_EXPANDIMPL_KERNELS_H_ + +#include "aidge/backend/cpu/operator/ExpandImpl.hpp" +#include "aidge/utils/Registrar.hpp" + +#include <aidge/data/Data.hpp> +#include <aidge/data/Tensor.hpp> +#include <aidge/data/half.hpp> +#include <aidge/scheduler/ProdConso.hpp> +#include <aidge/utils/Types.h> +#include <cmath> +#include <cstdint> // std::int32_t, std::int64_t +#include <memory> +#include <numeric> + +namespace { +// suppose values are contiguous in memory +template <class IO> +void expandContiguousArray(const std::size_t inputStackSize, + const std::size_t outputStackSize, + const IO *input, + IO *output) { + for (std::size_t i = 0; i < outputStackSize; ++i) { + output[i] = (inputStackSize == 1) ? input[0] : input[i]; + } + return; +} +} // namespace + +namespace Aidge { + +template <class IO> +void ExpandImpl_cpu_forward_kernel( + const std::shared_ptr<Tensor> &inData, + const std::shared_ptr<Tensor> &_inExpandShape, + void *_output, + const std::vector<DimSize_t> &outputDims) { + + // retrieving data of inputShape & dimensions of inputDims + // as the process will require to modify the values + IO *output = static_cast<IO *>(_output); + std::vector<DimSize_t> inExpandShape(_inExpandShape->size()); + for (DimSize_t i = 0; i < _inExpandShape->size(); ++i) { + inExpandShape[i] = _inExpandShape->get<std::int64_t>(i); + } + std::vector<DimSize_t> inDataDims = inData->dims(); + + // Example with 2 tensors + // [5,2,1,7] & [2,6,7] + // 1. Same number of dimensions but adding 1s to le left of "smallest" + // tensor -> [5,2,1,7] & [1,2,6,7] + // 2. Find the highest equal dimension -> 3 + // Exception: if the first diverging dimension is the last one, then -> + // 4 (dims.size()) + // 3. Compute the highest number of contiguous data -> 7 + // 4. Compute stride and offset step for the broadcast mechanism + // 5. Call a simple kernel + + // ## Compute compatible input dimensions + // special case for equal dimensions, the kernel is called with the entire + // arrays at once + + if (inDataDims == inExpandShape) { + const std::size_t input0ContiguousSize = + std::accumulate(inDataDims.cbegin(), + inDataDims.cend(), + static_cast<std::size_t>(1), + std::multiplies<std::size_t>()); + for (std::size_t i = 0; i < input0ContiguousSize; ++i) { + output[i] = inData->get<IO>(i); + } + return; + } + + // set dimensions to be of equal size by filling the smallest one with + // ones. + if (inDataDims.size() > inExpandShape.size()) { + inExpandShape.insert(inExpandShape.cbegin(), + inDataDims.size() - inExpandShape.size(), + static_cast<DimSize_t>(1)); + } else if (_inExpandShape->size() > inDataDims.size()) { + inDataDims.insert(inDataDims.cbegin(), + inExpandShape.size() - inDataDims.size(), + static_cast<DimSize_t>(1)); + } + + const std::size_t nbDims = inDataDims.size(); + + // Find the highest equal dimension + // std::size_t contiguousIdx = nbDims - 1; + std::size_t contiguousIdx = nbDims; + while (contiguousIdx-- > 0) { + // for (; contiguousIdx+1 > 0; --contiguousIdx) { + if (inDataDims[contiguousIdx] != inExpandShape[contiguousIdx]) { + break; + } + } + if (contiguousIdx == (nbDims - 1)) { + // last dimensions of one of the input Tensor are of size 1 + const std::vector<std::size_t> &dims = + (inDataDims[contiguousIdx] == 1) ? inDataDims : inExpandShape; + while ((contiguousIdx + 1 > 0) && (dims[contiguousIdx] == 1)) { + --contiguousIdx; + } + } + ++contiguousIdx; + + // Compute the highest number of contiguous data for each Tensor + const std::size_t inputDataContiguousSize = + std::accumulate(inDataDims.cbegin() + contiguousIdx, + inDataDims.cend(), + static_cast<std::size_t>(1), + std::multiplies<std::size_t>()); + const std::size_t outputContiguousSize = + std::accumulate(outputDims.cbegin() + contiguousIdx, + outputDims.cend(), + static_cast<std::size_t>(1), + std::multiplies<std::size_t>()); + + // initialize strides to iterate through data because of broadcasting + std::unique_ptr<std::int32_t[]> stridePostIn = + std::make_unique<std::int32_t[]>(contiguousIdx); + std::unique_ptr<std::int32_t[]> strideStepIn = + std::make_unique<std::int32_t[]>(contiguousIdx); + if (contiguousIdx > 0) { + stridePostIn[contiguousIdx - 1] = 1; + for (std::size_t i = contiguousIdx - 2; + i != static_cast<std::size_t>(-1); + --i) { + stridePostIn[i] = stridePostIn[i + 1] * + static_cast<std::int32_t>(inDataDims[i + 1]); + } + for (std::size_t i = 0; i != contiguousIdx; ++i) { + strideStepIn[i] = (inDataDims[i] == 1) ? 1 - stridePostIn[i] : 1; + } + } + + // variables for arrays offsets + std::size_t offsetInData = 0; + std::size_t offsetOut = 0; + + std::size_t dim = contiguousIdx - 1; + const std::size_t nbStacks = + std::accumulate(outputDims.cbegin(), + outputDims.cbegin() + contiguousIdx, + static_cast<std::size_t>(1), + std::multiplies<std::size_t>()); + + for (std::size_t stack = 0; stack < nbStacks;) { + expandContiguousArray<IO>( + inputDataContiguousSize, + outputContiguousSize, + &static_cast<const IO *>( + inData->getImpl() + ->rawPtr())[offsetInData * inputDataContiguousSize], + &output[offsetOut * outputContiguousSize]); + if (++stack < nbStacks) { + std::size_t tmpStack = stack; + while (tmpStack % outputDims[dim] == 0) { + tmpStack /= outputDims[dim]; + dim--; + } + offsetInData += strideStepIn[dim]; + ++offsetOut; + dim = contiguousIdx - 1; + } + } +} + +REGISTRAR(ExpandImpl_cpu, + {{DataType::Int16, DataType::Int64}, {DataType::Int16}}, + {ProdConso::inPlaceModel, + Aidge::ExpandImpl_cpu_forward_kernel<std::int16_t>, + nullptr}); +REGISTRAR(ExpandImpl_cpu, + {{DataType::Int32, DataType::Int64}, {DataType::Int32}}, + {ProdConso::inPlaceModel, + Aidge::ExpandImpl_cpu_forward_kernel<std::int32_t>, + nullptr}); +REGISTRAR(ExpandImpl_cpu, + {{DataType::Int64, DataType::Int64}, {DataType::Int64}}, + {ProdConso::inPlaceModel, + Aidge::ExpandImpl_cpu_forward_kernel<std::int64_t>, + nullptr}); + +REGISTRAR(ExpandImpl_cpu, + {{DataType::Float16, DataType::Int64}, {DataType::Float16}}, + {ProdConso::inPlaceModel, + Aidge::ExpandImpl_cpu_forward_kernel<half_float::half>, + nullptr}); +REGISTRAR(ExpandImpl_cpu, + {{DataType::Float32, DataType::Int64}, {DataType::Float32}}, + {ProdConso::inPlaceModel, + Aidge::ExpandImpl_cpu_forward_kernel<float>, + nullptr}); +REGISTRAR(ExpandImpl_cpu, + {{DataType::Float64, DataType::Int64}, {DataType::Float64}}, + {ProdConso::inPlaceModel, + Aidge::ExpandImpl_cpu_forward_kernel<double>, + nullptr}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_EXPANDIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/HeavisideImpl.hpp b/include/aidge/backend/cpu/operator/HeavisideImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7a3ba9add1e98580c51a8416adc0d1feb5e1317a --- /dev/null +++ b/include/aidge/backend/cpu/operator/HeavisideImpl.hpp @@ -0,0 +1,32 @@ +/******************************************************************************** + * Copyright (c) 2025 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_H_ +#define AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_H_ + +#include <cstddef> // std::size_t + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/Heaviside.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/future_std/span.hpp" + +namespace Aidge { +using HeavisideImplCpu = + OperatorImpl_cpu<Heaviside_Op, + void(std::size_t, const void *, void *, const float), + void(const float, std::size_t, const void *, void *)>; + +// Implementation entry point registration for operator Heaviside +REGISTRAR(Heaviside_Op, "cpu", HeavisideImplCpu::create); +} // namespace Aidge + +#endif // AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_H_ diff --git a/include/aidge/backend/cpu/operator/HeavisideImpl_kernels.hpp b/include/aidge/backend/cpu/operator/HeavisideImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3fd6ca7de348ff18e75b2a88281d4db980b58774 --- /dev/null +++ b/include/aidge/backend/cpu/operator/HeavisideImpl_kernels.hpp @@ -0,0 +1,46 @@ +/******************************************************************************** + * Copyright (c) 2025 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_KERNELS_H_ + +#include "aidge/utils/Registrar.hpp" + +#include <cstddef> // std::size_t + +#include "aidge/backend/cpu/operator/HeavisideImpl.hpp" +#include "aidge/utils/ErrorHandling.hpp" + + +namespace Aidge { + +template <class I, class O> +void HeavisideImplCpuForwardKernel(std::size_t inputLenght, + const void *input_, + void *output_, + const float value) { + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); + + for (std::size_t i = 0; i < inputLenght; ++i) { + output[i] = (input[i] > 0) ? 1 : (input[i] == 0 ? value : 0); + } +} + +// Kernels registration to implementation entry point +REGISTRAR(HeavisideImplCpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, + Aidge::HeavisideImplCpuForwardKernel<float, float>, + nullptr}); +} // namespace Aidge + +#endif // AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_KERNELS_H__H_ diff --git a/include/aidge/backend/cpu/operator/MulImpl.hpp b/include/aidge/backend/cpu/operator/MulImpl.hpp index c927af9ebd4d658c764cc059df9778c273ba178e..eec5583bb548a3d3343b966c54cfccd1600b8f76 100644 --- a/include/aidge/backend/cpu/operator/MulImpl.hpp +++ b/include/aidge/backend/cpu/operator/MulImpl.hpp @@ -34,6 +34,7 @@ using MulImpl_cpu = OperatorImpl_cpu<Mul_Op, const std::size_t, const std::vector<std::size_t>, const std::vector<std::size_t>, + const std::vector<std::size_t>, const void*, const void*, const void*, diff --git a/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp b/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp index 556dd56cd32f28de14a43d20b97deb0083341fee..36acb9199c51e900287ca9b262322aa86287d838 100644 --- a/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp @@ -149,61 +149,53 @@ void MulImpl_cpu_forward_kernel(std::vector<std::size_t> dims0, template <class I1, class I2, class O> void MulImpl_cpu_backward_kernel(const std::size_t input0Length, - const std::size_t input1Length, - const std::size_t grad0Length, - const std::vector<std::size_t> input0Dims, - const std::vector<std::size_t> input1Dims, - const void* input0_, - const void* input1_, - const void* grad_output_, - void* gradientInput0, - void* gradientInput1) + const std::size_t input1Length, + const std::size_t gradOutputLength, + const std::vector<std::size_t>& dims0, + const std::vector<std::size_t>& dims1, + const std::vector<std::size_t>& outputDims, + const void* input0_, + const void* input1_, + const void* grad_output_, + void* gradientInput0_, + void* gradientInput1_) { - const auto* input0 = static_cast<const I1*>(input0_); - const auto* input1 = static_cast<const I1*>(input1_); - const auto* grad_output = static_cast<const O*>(grad_output_); - auto* grad_input_0 = static_cast<I1*>(gradientInput0); - auto* grad_input_1 = static_cast<I2*>(gradientInput1); - - - if(input0Dims.size() >= input1Dims.size()) - { - AIDGE_ASSERT(input0Length == grad0Length, "Incorrect dimensions between Mul input and output tensors"); - - for(auto i = 0U; i < input0Length; ++i) - { - const auto indices = getMultiDimIndices(input1Dims, i); - const auto flattenedIndex = getFlattenedIndex(input1Dims, indices); - - grad_input_0[i] = input1[flattenedIndex] * grad_output[i]; - } - - for(std::size_t i = 0 ; i < grad0Length; ++i) - { - const auto indices = getMultiDimIndices(input1Dims, i); - const auto flattenedIndex = getFlattenedIndex(input1Dims, indices); - - grad_input_1[flattenedIndex] += input0[i] * grad_output[i]; + const I1* input0 = static_cast<const I1*>(input0_); + const I2* input1 = static_cast<const I2*>(input1_); + const O* grad_output = static_cast<const O*>(grad_output_); + auto* grad_input_0 = static_cast<I1*>(gradientInput0_); + auto* grad_input_1 = static_cast<I2*>(gradientInput1_); + + std::fill_n(grad_input_0, input0Length, static_cast<I1>(0)); + std::fill_n(grad_input_1, input1Length, static_cast<I2>(0)); + + // Broadcast dims0 and dims1 to match the shape of outputDims + auto broadcastedDims0 = getBroadcastedDims(outputDims, dims0); + auto broadcastedDims1 = getBroadcastedDims(outputDims, dims1); + + for (std::size_t i = 0; i < gradOutputLength; ++i) { + auto idxOutputGrad = getMultiDimIndices(outputDims, i); + std::vector<std::size_t> idxInput0(broadcastedDims0.size()); + std::vector<std::size_t> idxInput1(broadcastedDims1.size()); + + // Map output indices to input0 indices, considering broadcasting + for (std::size_t dimension = 0; dimension < broadcastedDims0.size(); ++dimension) { + // If input0 is broadcasted along this dimension (== 1) or both dimensions are 1, index is 0. + // idxInput0 represent the multi dim index of input0 contributing + // to the output at index i. + idxInput0[dimension] = (broadcastedDims0[dimension] == 1) ? 0 : idxOutputGrad[dimension]; } - } else { - AIDGE_ASSERT(input1Length == grad0Length, "Incorrect dimensions between Mul input and output tensors"); - - for(auto i = 0U; i < input1Length; ++i) - { - const auto indices = getMultiDimIndices(input0Dims, i); - const auto flattenedIndex = getFlattenedIndex(input0Dims, indices); - - grad_input_1[i] = input0[flattenedIndex] * grad_output[i]; + for (std::size_t dimension = 0; dimension < broadcastedDims1.size(); ++dimension) { + idxInput1[dimension] = (broadcastedDims1[dimension] == 1) ? 0 : idxOutputGrad[dimension]; } - for(std::size_t i = 0 ; i < grad0Length; ++i) - { - const auto indices = getMultiDimIndices(input0Dims, i); - const auto flattenedIndex = getFlattenedIndex(input0Dims, indices); + // We have to access tensors with a flat index, hence the conversion + auto idx0 = getFlattenedIndex(broadcastedDims0, idxInput0); + auto idx1 = getFlattenedIndex(broadcastedDims1, idxInput1); - grad_input_0[flattenedIndex] += input1[i] * grad_output[i]; - } + grad_input_0[idx0] += static_cast<I1>(grad_output[i] * input1[idx1]); + grad_input_1[idx1] += static_cast<I2>(grad_output[i] * input0[idx0]); } } @@ -211,6 +203,9 @@ void MulImpl_cpu_backward_kernel(const std::size_t input0Length, REGISTRAR(MulImpl_cpu, {DataType::Float32}, {ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<float, float, float>, Aidge::MulImpl_cpu_backward_kernel<float, float, float>}); +REGISTRAR(MulImpl_cpu, + {{{DataType::Float32}, {DataType::Float64}}, {DataType::Float32}}, + {ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<float, double, float>, Aidge::MulImpl_cpu_backward_kernel<float, double, float>}); REGISTRAR(MulImpl_cpu, {DataType::Float64}, {ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<double, double, double>, Aidge::MulImpl_cpu_backward_kernel<double, double, double>}); diff --git a/include/aidge/backend/cpu/operator/ResizeImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ResizeImpl_kernels.hpp index 6a22ff4ec9d7beaf05be3b479b43dd3ad69bc74b..6449417baf855620669aba11ebca16d9384c4e7c 100644 --- a/include/aidge/backend/cpu/operator/ResizeImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ResizeImpl_kernels.hpp @@ -99,30 +99,31 @@ void ResizeImpl_cpu_forward_kernel( } return; } + // Kernels registration to implementation entry point REGISTRAR(ResizeImpl_cpu, {{{DataType::Int16}, - {DataType::Float32}, - {DataType::Float32}, - {DataType::UInt64}}, + {DataType::Any}, + {DataType::Any}, + {DataType::Any}}, {DataType::Int16}}, {ProdConso::inPlaceModel, ResizeImpl_cpu_forward_kernel<int16_t>, nullptr}); REGISTRAR(ResizeImpl_cpu, {{{DataType::Int32}, - {DataType::Float32}, - {DataType::Float32}, - {DataType::UInt64}}, + {DataType::Any}, + {DataType::Any}, + {DataType::Any}}, {DataType::Int32}}, {ProdConso::inPlaceModel, ResizeImpl_cpu_forward_kernel<int32_t>, nullptr}); REGISTRAR(ResizeImpl_cpu, {{{DataType::Int64}, - {DataType::Float32}, - {DataType::Float32}, - {DataType::Int64}}, + {DataType::Any}, + {DataType::Any}, + {DataType::Any}}, {DataType::UInt64}}, {ProdConso::inPlaceModel, ResizeImpl_cpu_forward_kernel<int64_t>, @@ -130,27 +131,27 @@ REGISTRAR(ResizeImpl_cpu, REGISTRAR(ResizeImpl_cpu, {{{DataType::Float16}, - {DataType::Float32}, - {DataType::Float32}, - {DataType::UInt64}}, + {DataType::Any}, + {DataType::Any}, + {DataType::Any}}, {DataType::Float16}}, {ProdConso::inPlaceModel, ResizeImpl_cpu_forward_kernel<half_float::half>, nullptr}); REGISTRAR(ResizeImpl_cpu, {{{DataType::Float32}, - {DataType::Float32}, - {DataType::Float32}, - {DataType::UInt64}}, + {DataType::Any}, + {DataType::Any}, + {DataType::Any}}, {DataType::Float32}}, {ProdConso::inPlaceModel, ResizeImpl_cpu_forward_kernel<float>, nullptr}); REGISTRAR(ResizeImpl_cpu, {{{DataType::Float64}, - {DataType::Float32}, - {DataType::Float32}, - {DataType::UInt64}}, + {DataType::Any}, + {DataType::Any}, + {DataType::Any}}, {DataType::Float64}}, {ProdConso::inPlaceModel, ResizeImpl_cpu_forward_kernel<double>, diff --git a/include/aidge/backend/cpu/operator/WeightInterleavedImpl.hpp b/include/aidge/backend/cpu/operator/WeightInterleavedImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ff5c4778f530912e8bdf97ffadb2f546789e2c48 --- /dev/null +++ b/include/aidge/backend/cpu/operator/WeightInterleavedImpl.hpp @@ -0,0 +1,37 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVINGIMPL_H_ +#define AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVINGIMPL_H_ + +#include <array> +#include <memory> +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/WeightInterleaving.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// Operator implementation entry point for the backend +using WeightInterleavedImpl_cpu = OperatorImpl_cpu<WeightInterleaving_Op, + void(const DimSize_t, + const DimSize_t, + const DimSize_t, + const void *, + void *)>; + +// Implementation entry point registration to Operator +REGISTRAR(WeightInterleaving_Op, "cpu", Aidge::WeightInterleavedImpl_cpu::create); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_WeightInterleavingIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/WeightInterleavedImpl_kernels.hpp b/include/aidge/backend/cpu/operator/WeightInterleavedImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..18557f8fb5fcdd31476904d273d4d2d7f37a66b5 --- /dev/null +++ b/include/aidge/backend/cpu/operator/WeightInterleavedImpl_kernels.hpp @@ -0,0 +1,143 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVEDIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVEDIMPL_KERNELS_H_ + +#include <cstddef> // std::size_t +#include <cstdint> // std::int8_t, std::uint8_t + +#include "aidge/backend/cpu/operator/WeightInterleavedImpl.hpp" +#include "aidge/data/DataType.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/ErrorHandling.hpp" + + +namespace Aidge { + + /** + * @brief Compacts 8-bit data into a smaller bit-width representation. + * + * This function takes an array of 8-bit data and compacts it into smaller chunks + * based on the specified bit-width `nb_bits`. Each element in `compactData` will + * store multiple packed `nb_bits` segments extracted from `data`. + * + * @param data The input array of 8-bit values to be compacted. + * @param dataSize The size of the input `data` array. + * @param compactData The output array storing the compacted data. + * @param nb_bits The number of bits to extract from each `data` element (must be less than 8). + */ + template <typename T> + void compact_data(const T* data, std::size_t dataSize, T* compactData, std::uint8_t nb_bits) { + AIDGE_ASSERT(nb_bits > 0 && nb_bits < 5, "Cannot compact with the given nb_bits"); // Ensure valid bit width + + // Mask to extract `nb_bits` from each data element + const unsigned int mask = (1U << nb_bits) - 1; + + // Calculate the number of `nb_bits` segments that fit into an 8-bit compacted value + const unsigned int nbSlot = 8 / nb_bits; + + // Case nb_bits=3 or 4, then shift is 4 + // Case nb_bits=2, then shift is 2 + // Case nb_bits=1, then shift is 1 + std::uint8_t shift = 8 / nbSlot; + + const unsigned int nbFullCompactbytes = dataSize / nbSlot; + + // Main loop to process data in groups of `nbSlot` + for (std::size_t i = 0; i < nbFullCompactbytes; ++i) { + T compact = 0; + + for (unsigned int j = 0; j < nbSlot; ++j) { + compact |= (data[i * nbSlot + j] & mask); // Apply mask to keep `nb_bits` only + + // Shift only if not on the last slot to make room for the next `nb_bits` + if (j < nbSlot - 1) { + compact <<= shift; + } + } + // Store the compacted value in the output array + compactData[i] = compact; + } + + + // Handle any remaining data elements (if dataSize is not a multiple of nbSlot). + std::size_t remaining = dataSize % nbSlot; + if (remaining != 0) { + std::int8_t compact = 0; + for (std::size_t j = 0; j < remaining; ++j) { + compact |= (data[nbFullCompactbytes*nbSlot + j] & mask); + + if (j < remaining - 1) { + compact <<= shift; + } + } + compact <<= (shift*(nbSlot - remaining)); + // Store the last compacted value + compactData[dataSize / nbSlot] = compact; + } + } + +template <class I, class O, int nb_bits> +void WeightInterleavedImpl_cpu_forward_kernel(const DimSize_t input_interleaving, + const DimSize_t nb_interleaving, + const DimSize_t output_interleaving, + const void* input_, + void* output_) { + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + // Aidge::compact_data(const std::int8_t* data, std::size_t dataSize, std::int8_t* compactData, std::uint8_t nb_bits) { + for (std::size_t i=0; i<nb_interleaving; ++i){ + compact_data(input+(i*input_interleaving), input_interleaving, output+(i*output_interleaving), static_cast<std::uint8_t>(nb_bits)); + } + +} + + +REGISTRAR(WeightInterleavedImpl_cpu, + {ImplSpec::IOSpec{DataType::Int4, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavedType_v<DataType::Int4>, DataFormat::NHWC}}, + {ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<int8_t, int8_t, 4>, nullptr}); +REGISTRAR(WeightInterleavedImpl_cpu, + {ImplSpec::IOSpec{DataType::Int3, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavedType_v<DataType::Int3>, DataFormat::NHWC}}, + {ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<int8_t, int8_t, 3>, nullptr}); +REGISTRAR(WeightInterleavedImpl_cpu, + {ImplSpec::IOSpec{DataType::Int2, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavedType_v<DataType::Int2>, DataFormat::NHWC}}, + {ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<int8_t, int8_t, 2>, nullptr}); +REGISTRAR(WeightInterleavedImpl_cpu, + {ImplSpec::IOSpec{DataType::Binary, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavedType_v<DataType::Binary>, DataFormat::NHWC}}, + {ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<int8_t, int8_t, 1>, nullptr}); + +REGISTRAR(WeightInterleavedImpl_cpu, + {ImplSpec::IOSpec{DataType::UInt4, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavedType_v<DataType::UInt4>, DataFormat::NHWC}}, + {ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<uint8_t, uint8_t, 4>, nullptr}); +REGISTRAR(WeightInterleavedImpl_cpu, + {ImplSpec::IOSpec{DataType::UInt3, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavedType_v<DataType::UInt3>, DataFormat::NHWC}}, + {ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<uint8_t, uint8_t, 3>, nullptr}); +REGISTRAR(WeightInterleavedImpl_cpu, + {ImplSpec::IOSpec{DataType::UInt2, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavedType_v<DataType::UInt2>, DataFormat::NHWC}}, + {ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<uint8_t, uint8_t, 2>, nullptr}); + + +// REGISTRAR(WeightInterleavedImpl_cpu, +// {ImplSpec::IOSpec{DataType::Int4, DataFormat::NHWC}}, +// {ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<int8_t, int8_t, 4>, nullptr}); +// REGISTRAR(WeightInterleavedImpl_cpu, +// {ImplSpec::IOSpec{DataType::Int3, DataFormat::NHWC}}, +// {ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<int8_t, int8_t, 3>, nullptr}); +// REGISTRAR(WeightInterleavedImpl_cpu, +// {ImplSpec::IOSpec{DataType::Int2, DataFormat::NHWC}}, +// {ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<int8_t, int8_t, 2>, nullptr}); + + +} + +#endif /* AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVEDIMPL_KERNELS_H_ */ \ No newline at end of file diff --git a/include/aidge/backend/version.h.in b/include/aidge/backend/version.h.in new file mode 100644 index 0000000000000000000000000000000000000000..4b876f63002972c1f8f1340b70cdecdace911012 --- /dev/null +++ b/include/aidge/backend/version.h.in @@ -0,0 +1,11 @@ +#ifndef VERSION_H +#define VERSION_H + +namespace Aidge { +static constexpr const int PROJECT_VERSION_MAJOR = @PROJECT_VERSION_MAJOR@; +static constexpr const int PROJECT_VERSION_MINOR = @PROJECT_VERSION_MINOR@; +static constexpr const int PROJECT_VERSION_PATCH = @PROJECT_VERSION_PATCH@; +static constexpr const char * PROJECT_VERSION = "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@"; +static constexpr const char * PROJECT_GIT_HASH = "@GIT_COMMIT_HASH@"; +} +#endif // VERSION_H diff --git a/include/aidge/utils/sys_info/CpuVersionInfo.hpp b/include/aidge/utils/sys_info/CpuVersionInfo.hpp index 887ce839e079349d9d64505f7184831ffc4cf1c2..3df70d139bd4ad26e0c88d2ee5192e508bc3f71a 100644 --- a/include/aidge/utils/sys_info/CpuVersionInfo.hpp +++ b/include/aidge/utils/sys_info/CpuVersionInfo.hpp @@ -2,17 +2,20 @@ #define AIDGE_UTILS_SYS_INFO_CPU_VERSION_INFO_H #include "aidge/utils/Log.hpp" +#include "aidge/backend/cpu_version.h" namespace Aidge { -#ifndef PROJECT_VERSION // Normally defined in CMakeLists.txt -#define PROJECT_VERSION "Unknown version" -#endif -#ifndef GIT_COMMIT_HASH -#define GIT_COMMIT_HASH "" -#endif -void showCpuVersion() { - Log::info("Aidge backend CPU: {} ({}), {} {}", PROJECT_VERSION, GIT_COMMIT_HASH, __DATE__, __TIME__); +constexpr inline const char * getBackendCPUProjectVersion(){ + return PROJECT_VERSION; +} + +constexpr inline const char * getBackendCPUGitHash(){ + return PROJECT_GIT_HASH; +} + +void showBackendCpuVersion() { + Log::info("Aidge backend CPU: {} ({}), {} {}", getBackendCPUProjectVersion(), getBackendCPUGitHash(), __DATE__, __TIME__); // Compiler version #if defined(__clang__) /* Clang/LLVM. ---------------------------------------------- */ diff --git a/project_name.txt b/project_name.txt new file mode 100644 index 0000000000000000000000000000000000000000..25caafdd8ab794dbcb8c8cdef5097e2143accc6a --- /dev/null +++ b/project_name.txt @@ -0,0 +1 @@ +aidge_backend_cpu diff --git a/pyproject.toml b/pyproject.toml index 3c08302d0fcd4e77943d165fab802a22f4dc39cc..39bed4d209581b272a8491fbce6c3f28029fdd57 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,20 +4,28 @@ description="CPU implementation of operators of the AIDGE framework" dependencies = [ "numpy", ] -requires-python = ">= 3.7" +requires-python = ">= 3.8" readme = "README.md" license = { file = "LICENSE" } -classifiers = [ +classifiers = [ "Development Status :: 2 - Pre-Alpha", "Programming Language :: Python :: 3" ] -dynamic = ["version"] # defined in tool.setuptools_scm +dynamic = ["version"] # defined by pbr + + +[project.urls] +Homepage = "https://www.deepgreen.ai/en/platform" +Documentation = "https://eclipse-aidge.readthedocs.io/en/latest/" +Repository = "https://gitlab.eclipse.org/eclipse/aidge/aidge_backend_cpu" +Issues = "https://gitlab.eclipse.org/eclipse/aidge/aidge_backend_cpu/-/issues" +Changelog = "https://gitlab.eclipse.org/eclipse/aidge/aidge_backend_cpu/-/releases" [build-system] requires = [ "setuptools>=64", - "setuptools_scm[toml]==7.1.0", - "cmake>=3.18.4.post1" + "cmake>=3.18.4.post1", + "pbr" ] build-backend = "setuptools.build_meta" @@ -29,9 +37,6 @@ where = ["."] # list of folders that contain the packages (["."] by default) include = ["aidge_backend_cpu*"] # package names should match these glob patterns (["*"] by default) exclude = ["aidge_backend_cpu.unit_tests*"] # exclude packages matching these glob patterns (empty by default) namespaces = false # to disable scanning PEP 420 namespaces (true by default) -# SETUPTOOLS_SCM -[tool.setuptools_scm] -write_to = "aidge_backend_cpu/_version.py" ##################################################### # CIBUILDWHEEL diff --git a/python_binding/pybind_cpu.cpp b/python_binding/pybind_cpu.cpp index d5022e1d469ae4171e796baed6c1aa061dd95765..e576de0849ec6e0739b7cad32e8e8b003c092b7b 100644 --- a/python_binding/pybind_cpu.cpp +++ b/python_binding/pybind_cpu.cpp @@ -6,10 +6,10 @@ namespace py = pybind11; namespace Aidge { -void init_cpu_sys_info(py::module& m); +void init_CpuVersionInfo(py::module& m); void init_Aidge(py::module& m){ - init_cpu_sys_info(m); + init_CpuVersionInfo(m); } diff --git a/python_binding/utils/sys_info/pybind_CpuVersionInfo.cpp b/python_binding/utils/sys_info/pybind_CpuVersionInfo.cpp index 573bee3659c65f90935e03c06eff5a2998bb9f5b..7461dd955e8b3e64b086a51664b3d73b6bfaed93 100644 --- a/python_binding/utils/sys_info/pybind_CpuVersionInfo.cpp +++ b/python_binding/utils/sys_info/pybind_CpuVersionInfo.cpp @@ -3,7 +3,9 @@ namespace py = pybind11; namespace Aidge { -void init_cpu_sys_info(py::module& m){ - m.def("show_cpu_version", &showCpuVersion); +void init_CpuVersionInfo(py::module& m){ + m.def("show_version", &showBackendCpuVersion); + m.def("get_project_version", &getBackendCPUProjectVersion); + m.def("get_git_hash", &getBackendCPUGitHash); } } diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000000000000000000000000000000000000..aa0f227f6688468a5ab93384f7b1670086000035 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,3 @@ +# pbr file +[metadata] +version = file: version.txt diff --git a/setup.py b/setup.py index 22cbd9732c8b9e1099c3e322032e8377f6d4506b..366a4825da5d8ad369834b2231152a8c0424c9e8 100644 --- a/setup.py +++ b/setup.py @@ -11,8 +11,10 @@ from math import ceil from setuptools import setup, Extension from setuptools.command.build_ext import build_ext +def get_project_name() -> str: + return open(pathlib.Path().absolute() / "project_name.txt", "r").read().strip() -PROJECT_NAME = "aidge_backend_cpu" +PROJECT_NAME = get_project_name() SETUP_DIR = pathlib.Path(__file__).parent @@ -37,6 +39,7 @@ class AidgePkgBuild(build_ext): # This lists the number of processors available on the machine # The compilation will use half of them max_jobs = str(ceil(multiprocessing.cpu_count() / 2)) + max_jobs = os.environ.get("AIDGE_NB_PROC", max_jobs) cwd = pathlib.Path().absolute() @@ -51,14 +54,19 @@ class AidgePkgBuild(build_ext): package_prefix = build_lib if not self.editable_mode else SETUP_DIR pybind_install_prefix = (package_prefix / PROJECT_NAME).absolute() - os.chdir(str(build_temp)) - - compile_type = os.environ.get("AIDGE_PYTHON_BUILD_TYPE", "Release") install_path = ( os.path.join(sys.prefix, "lib", "libAidge") if "AIDGE_INSTALL" not in os.environ else os.environ["AIDGE_INSTALL"] ) + + # Read environment variables for CMake options + c_compiler = os.environ.get("AIDGE_C_COMPILER", "gcc") + cxx_compiler = os.environ.get("AIDGE_CXX_COMPILER", "g++") + build_type = os.environ.get("AIDGE_BUILD_TYPE", "Release") + asan = os.environ.get("AIDGE_ASAN", "OFF") + cmake_arch = os.environ.get("AIDGE_CMAKE_ARCH", "") + build_gen = os.environ.get("AIDGE_BUILD_GEN", "") build_gen_opts = ( ["-G", build_gen] @@ -66,27 +74,36 @@ class AidgePkgBuild(build_ext): else [] ) test_onoff = os.environ.get("AIDGE_BUILD_TEST", "OFF") - - self.spawn( - [ - "cmake", - *build_gen_opts, - str(cwd), - f"-DTEST={test_onoff}", - f"-DCMAKE_INSTALL_PREFIX:PATH={install_path}", - f"-DCMAKE_BUILD_TYPE={compile_type}", - "-DPYBIND=ON", - f"-DPYBIND_INSTALL_PREFIX:PATH={pybind_install_prefix}", - "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", - "-DCOVERAGE=OFF", - ] - ) + + os.chdir(str(build_temp)) + + cmake_cmd = [ + "cmake", + *build_gen_opts, + str(cwd), + f"-DTEST={test_onoff}", + f"-DCMAKE_INSTALL_PREFIX:PATH={install_path}", + f"-DCMAKE_BUILD_TYPE={build_type}", + f"-DCMAKE_C_COMPILER={c_compiler}", + f"-DCMAKE_CXX_COMPILER={cxx_compiler}", + f"-DENABLE_ASAN={asan}", + "-DPYBIND=ON", + f"-DPYBIND_INSTALL_PREFIX:PATH={pybind_install_prefix}", + "-DCMAKE_EXPORT_COMPILE_COMMANDS=1", + "-DCOVERAGE=OFF", + ] + + # Append architecture-specific arguments if provided + if cmake_arch: + cmake_cmd.append(cmake_arch) + + self.spawn(cmake_cmd) if not self.dry_run: self.spawn( - ["cmake", "--build", ".", "--config", compile_type, "-j", max_jobs] + ["cmake", "--build", ".", "--config", build_type, "-j", max_jobs] ) - self.spawn(["cmake", "--install", ".", "--config", compile_type]) + self.spawn(["cmake", "--install", ".", "--config", build_type]) os.chdir(str(cwd)) diff --git a/src/operator/ExpandImpl.cpp b/src/operator/ExpandImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dfd4d2d82edc4dfb5bbaec6f5b33bf1c00bf3c75 --- /dev/null +++ b/src/operator/ExpandImpl.cpp @@ -0,0 +1,56 @@ + +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/backend/cpu/operator/ExpandImpl.hpp" + +#include <vector> + +#include "aidge/backend/cpu/operator/ExpandImpl_kernels.hpp" +#include "aidge/data/Data.hpp" +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/Expand.hpp" +#include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { + +template <> void ExpandImpl_cpu::forward() { + const Expand_Op &op_ = static_cast<const Expand_Op &>(mOp); + // Check if input are provided + AIDGE_ASSERT(op_.getInput(0), + "{}: missing input 0: {}", + Expand_Op::Type, + Expand_Op::getInputsName()[0]); + AIDGE_ASSERT(op_.getInput(1), + "{}: missing input 1: {}", + Expand_Op::Type, + Expand_Op::getInputsName()[1]); + + // Find the correct kernel type + const auto impl = + Registrar<ExpandImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.forward(op_.getInput(0), + op_.getInput(1), + op_.getOutput(0)->getImpl()->rawPtr(), + op_.getOutput(0)->dims()); +} + +template <> void ExpandImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for Expand_Op on backend cpu"); +} + +} // namespace Aidge diff --git a/src/operator/HeavisideImpl.cpp b/src/operator/HeavisideImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..56ceb9b0b474d416f25d77b533373d4b193532b8 --- /dev/null +++ b/src/operator/HeavisideImpl.cpp @@ -0,0 +1,37 @@ +/******************************************************************************** + * Copyright (c) 2025 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/backend/cpu/operator/HeavisideImpl.hpp" + +#include <stdexcept> + +#include "aidge/backend/cpu/operator/HeavisideImpl_kernels.hpp" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/utils/ErrorHandling.hpp" + +template <> void Aidge::HeavisideImplCpu::forward() { + const Heaviside_Op &op_ = dynamic_cast<const Heaviside_Op &>(mOp); + std::shared_ptr<Tensor> input0 = op_.getInput(0); + std::shared_ptr<Tensor> output0 = op_.getOutput(0); + AIDGE_ASSERT(input0, "missing input #0"); + + const auto impl = + Registrar<HeavisideImplCpu>::create(getBestMatch(getRequiredSpec())); + + impl.forward(input0->size(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0)), + op_.value()); +} + +template <> void Aidge::HeavisideImplCpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Heaviside backward not implemented yet"); +} diff --git a/src/operator/MulImpl.cpp b/src/operator/MulImpl.cpp index 422bdd005f058fc9200cf5f7962bfc8d5877e6e1..a90d521a759f1ce6f4883bdd0bc05d84daa0f668 100644 --- a/src/operator/MulImpl.cpp +++ b/src/operator/MulImpl.cpp @@ -58,6 +58,7 @@ void Aidge::MulImpl_cpu::backward() { /* grad0Length */ out0grad->size(), /* input0Dims */ in0->dims(), /* input1Dims */ in1->dims(), + out0grad->dims(), getCPUPtr(in0), getCPUPtr(in1), getCPUPtr(out0grad), diff --git a/src/operator/WeightInterleavedImpl.cpp b/src/operator/WeightInterleavedImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2c9f3a6e8df35616a4f7ffae86cbeacd841f44bf --- /dev/null +++ b/src/operator/WeightInterleavedImpl.cpp @@ -0,0 +1,75 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/backend/cpu/operator/WeightInterleavedImpl.hpp" + +#include <cstddef> // std::size_t +#include <functional> +#include <memory> +#include <tuple> + +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/backend/cpu/operator/WeightInterleavedImpl_kernels.hpp" +#include "aidge/operator/WeightInterleaving.hpp" +#include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Types.h" + + +template <> +void Aidge::WeightInterleavedImpl_cpu::forward() +{ + const WeightInterleaving_Op& op_ = dynamic_cast<const WeightInterleaving_Op&>(mOp); + AIDGE_ASSERT(op_.getInput(0), "missing input #0"); + + const auto impl = Registrar<WeightInterleavedImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. + std::shared_ptr<Tensor> input0Fallback; + const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *(op_.getOutput(0))); + + // inputInterleaving is the number of consecutive input elements that will be compacted + // Here the interleaving is the last dimension (cf STM32 low bit kernels) + std::size_t inputInterleaving = input0.dims().back(); + + // The resulting compacted dimension was computed in forwardDims and the output tensor was resized + std::size_t outputInterleaving = op_.getOutput(0)->dims().back(); + + // nb_interleaving is the number of compacted segments + std::size_t nbInterleaving; + + // Determine the number of segment to compact + if (input0.dims().size() > 1){ + nbInterleaving = std::accumulate( + input0.dims().cbegin(), + std::prev(input0.dims().cend()), // Exclude the last element + std::size_t(1), + std::multiplies<std::size_t>()); + } else { + // Case when the weight tensor is only one dimension + nbInterleaving = 1; + } + + impl.forward(inputInterleaving, + nbInterleaving, + outputInterleaving, + input0.getImpl()->rawPtr(), + getCPUPtr(mOp.getRawOutput(0))); + + +} + +template <> +void Aidge::WeightInterleavedImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for WeightInterleaving_Op on backend cpu"); +} \ No newline at end of file diff --git a/unit_tests/CMakeLists.txt b/unit_tests/CMakeLists.txt index 5984524fdc8c596641e505897d16e12de78024cc..6c7af9c376a0a58a361880edad9340e3d845febc 100644 --- a/unit_tests/CMakeLists.txt +++ b/unit_tests/CMakeLists.txt @@ -1,12 +1,21 @@ -Include(FetchContent) +set(CATCH2_MIN_VERSION 3.3.0) -FetchContent_Declare( - Catch2 - GIT_REPOSITORY https://github.com/catchorg/Catch2.git - GIT_TAG v3.7.1 # or a later release -) +find_package(Catch2 ${CATCH2_MIN_VERSION} QUIET) -FetchContent_MakeAvailable(Catch2) +if(NOT Catch2_FOUND) + message(STATUS "Catch2 not found in system, retrieving from git") + Include(FetchContent) + + FetchContent_Declare( + Catch2 + GIT_REPOSITORY https://github.com/catchorg/Catch2.git + GIT_TAG devel # or a later release + ) + + FetchContent_MakeAvailable(Catch2) +else() + message(STATUS "Found system Catch2 version ${Catch2_VERSION}") +endif() file(GLOB_RECURSE src_files "*.cpp") diff --git a/unit_tests/data/Test_TensorImpl.cpp b/unit_tests/data/Test_TensorImpl.cpp index fd938f10a947d1520600a1d00022eeb970cd76e6..2bc1e7d4c6f8a7cfbae8807e3021f9c5dd89fff6 100644 --- a/unit_tests/data/Test_TensorImpl.cpp +++ b/unit_tests/data/Test_TensorImpl.cpp @@ -9,19 +9,23 @@ * ********************************************************************************/ -#include <catch2/catch_test_macros.hpp> -#include <cstddef> // std::size_t -#include <cstdint> // std::uint16_t -#include <chrono> -#include <iostream> +#include <chrono> // std::micro, std::chrono::time_point, + // std::chrono::system_clock +#include <cstddef> // std::size_t +#include <cstdint> // std::int32_t, std::uint16_t #include <memory> -#include <numeric> // std::accumulate -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <random> // std::random_device, std::mt19937 + // std::uniform_int_distribution, std::uniform_real_distribution +#include <vector> + +#include <catch2/catch_test_macros.hpp> +#include <fmt/core.h> -#include "aidge/data/Tensor.hpp" #include "aidge/backend/cpu/data/TensorImpl.hpp" -#include "aidge/operator/Add.hpp" #include "aidge/backend/cpu/operator/AddImpl.hpp" +#include "aidge/data/Data.hpp" +#include "aidge/operator/Add.hpp" +#include "aidge/utils/ArrayHelpers.hpp" namespace Aidge { @@ -35,8 +39,7 @@ TEST_CASE("Test addition of Tensors","[TensorImpl][Add][Data]") { std::uniform_int_distribution<int> boolDist(0,1); // Create MatMul Operator - std::shared_ptr<Node> mySub = Add(); - auto op = std::static_pointer_cast<OperatorTensor>(mySub-> getOperator()); + std::shared_ptr<Add_Op> op = std::make_shared<Add_Op>(); op->setDataType(DataType::Float32); op->setBackend("cpu"); diff --git a/unit_tests/operator/Test_AddImpl.cpp b/unit_tests/operator/Test_AddImpl.cpp index bca4025705cb1c851dcf3e9accbf016c4535120a..bff9629be152163b2aa92bdc9d0c3029d7987b9b 100644 --- a/unit_tests/operator/Test_AddImpl.cpp +++ b/unit_tests/operator/Test_AddImpl.cpp @@ -9,12 +9,16 @@ * ********************************************************************************/ +#include <memory> + #include <catch2/catch_test_macros.hpp> +#include "aidge/backend/cpu/operator/AddImpl.hpp" +#include "aidge/data/Data.hpp" #include "aidge/data/Tensor.hpp" +#include "aidge/graph/Node.hpp" #include "aidge/operator/Add.hpp" - -#include "aidge/backend/cpu.hpp" +#include "aidge/utils/ArrayHelpers.hpp" using namespace Aidge; @@ -96,7 +100,7 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") { }); // std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{100,200}}); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + Tensor expectedOutput = Array4D<int,3,3,3,2> { { // { // {{ 120, 222},{ 124, 226},{ 128, 230}}, // @@ -114,7 +118,7 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") { {{ 144, 246},{ 148, 250},{152, 254}} // } // } // - }); // + }; // std::shared_ptr<Node> myAdd_0 = Add(); std::shared_ptr<Node> myAdd_1 = Add(); @@ -131,8 +135,8 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") { op_1->setBackend("cpu"); myAdd_0->forward(); myAdd_1->forward(); - op_1->getOutput(0)->print(); - expectedOutput->print(); - REQUIRE(*op_1->getOutput(0) == *expectedOutput); + Log::info("Add_1 Tensor:\n{}", *(op_1->getOutput(0))); + Log::info("Expected Add_1 Tensor:\n{}", expectedOutput); + REQUIRE(*op_1->getOutput(0) == expectedOutput); } } \ No newline at end of file diff --git a/unit_tests/operator/Test_AndImpl.cpp b/unit_tests/operator/Test_AndImpl.cpp index 053bb3ea4ed913bd388f3ae049c4d6402ad58d59..c2309dce5f32862ad9aeceaf98430b75ab7be6ef 100644 --- a/unit_tests/operator/Test_AndImpl.cpp +++ b/unit_tests/operator/Test_AndImpl.cpp @@ -9,13 +9,19 @@ * ********************************************************************************/ +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t +#include <memory> +#include <random> // std::random_device, std::mt19937, std::uniform_int_distribution, std::uniform_real_distribution + #include <catch2/catch_test_macros.hpp> -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include "aidge/backend/cpu/operator/AndImpl.hpp" +#include "aidge/data/Data.hpp" #include "aidge/data/Tensor.hpp" +#include "aidge/graph/Node.hpp" #include "aidge/operator/And.hpp" - -#include "aidge/backend/cpu.hpp" +#include "aidge/utils/ArrayHelpers.hpp" using namespace Aidge; @@ -180,7 +186,7 @@ TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") { } // }); // - std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{10, 20}}); + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{10, 20}}); std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { { // { // diff --git a/unit_tests/operator/Test_ArgMaxImpl.cpp b/unit_tests/operator/Test_ArgMaxImpl.cpp index 9915d90423e976db1bdd2a694a2cfd7beb380cee..894697f65a6f73af27a568b994c1dd2dc6b118f3 100644 --- a/unit_tests/operator/Test_ArgMaxImpl.cpp +++ b/unit_tests/operator/Test_ArgMaxImpl.cpp @@ -9,17 +9,20 @@ * ********************************************************************************/ -#include <catch2/catch_test_macros.hpp> +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t #include <memory> -#include <numeric> // std::accumulate -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <random> // std::random_device, std::mt19937, std::uniform_int_distribution, std::uniform_real_distribution + +#include <catch2/catch_test_macros.hpp> +#include <fmt/core.h> +#include "aidge/backend/cpu/operator/ArgMaxImpl.hpp" +#include "aidge/data/Data.hpp" #include "aidge/data/Tensor.hpp" +#include "aidge/graph/Node.hpp" #include "aidge/operator/ArgMax.hpp" -#include "aidge/operator/Conv.hpp" - -#include "aidge/backend/cpu.hpp" -#include "aidge/utils/TensorUtils.hpp" +#include "aidge/utils/ArrayHelpers.hpp" using namespace Aidge; @@ -118,8 +121,8 @@ TEST_CASE("[cpu/operator] ArgMax(forward)", "[ArgMax][CPU]") { SECTION("Axis 2") { Tensor myOutput = Tensor(Array3D<float,2,3, 1> { - { - { + { + { {3.0}, {2.0}, {1.0} @@ -144,7 +147,7 @@ TEST_CASE("[cpu/operator] ArgMax(forward)", "[ArgMax][CPU]") { SECTION("Axis 2 with keep_dims false") { Tensor myOutput = Tensor(Array2D<float,2,3> { - { + { { 3.0, 2.0, 1.0 }, { 2.0, 1.0, 0.0 } } @@ -196,10 +199,11 @@ TEST_CASE("[cpu/operator] ArgMax(forward)", "[ArgMax][CPU]") { op->associateInput(0,myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); - std::cout << " ............... "<< std::endl; + fmt::print("{:.^20}\n", "forward"); myArgMax->forward(); + fmt::print("{:.^20}\n", "result"); op->getOutput(0)->print(); - std::cout <<"------"<<std::endl; + fmt::print("{:.^20}\n", "truth"); myOutput.print(); REQUIRE(*(op->getOutput(0)) == myOutput); diff --git a/unit_tests/operator/Test_Atan.cpp b/unit_tests/operator/Test_Atan.cpp index 9548e35d81b0423125424a4198d82558c4e57df4..b9438db0b38642e8c49e46451544a68714ac4de6 100644 --- a/unit_tests/operator/Test_Atan.cpp +++ b/unit_tests/operator/Test_Atan.cpp @@ -9,14 +9,18 @@ * ********************************************************************************/ +#include <cmath> // std::abs +#include <cstddef> // std::size_t +#include <memory> + #include <catch2/catch_test_macros.hpp> +#include "aidge/backend/cpu/operator/AtanImpl.hpp" +#include "aidge/data/Data.hpp" #include "aidge/data/Tensor.hpp" +#include "aidge/graph/Node.hpp" #include "aidge/operator/Atan.hpp" - -#include "aidge/backend/cpu.hpp" - -#include <memory> +#include "aidge/utils/ArrayHelpers.hpp" using namespace Aidge; @@ -32,7 +36,7 @@ TEST_CASE("[cpu/operator] Atan(forward)") { 0.09486303, 0.16007232, 0.40421187, 0.4102045, 0.39055911}}); std::shared_ptr<Node> myAtan = Atan(); - auto op = std::static_pointer_cast<OperatorTensor>(myAtan->getOperator()); + auto op = std::static_pointer_cast<Atan_Op>(myAtan->getOperator()); op->associateInput(0, input0); op->setDataType(DataType::Float32); op->setBackend("cpu"); @@ -61,7 +65,7 @@ TEST_CASE("[cpu/operator] Atan(forward)") { {0.75377332, 0.77411225, 0.32928031}}}}); std::shared_ptr<Node> myAtan = Atan(); - auto op = std::static_pointer_cast<OperatorTensor>(myAtan->getOperator()); + auto op = std::static_pointer_cast<Atan_Op>(myAtan->getOperator()); op->associateInput(0, input0); op->setDataType(DataType::Float32); op->setBackend("cpu"); diff --git a/unit_tests/operator/Test_AvgPoolingImpl.cpp b/unit_tests/operator/Test_AvgPoolingImpl.cpp index aaa2757830c245275d02792a7a5a2eb1db32d7b8..372febc61d04c2ba983dd33f009fe5bf1d2908a0 100644 --- a/unit_tests/operator/Test_AvgPoolingImpl.cpp +++ b/unit_tests/operator/Test_AvgPoolingImpl.cpp @@ -9,14 +9,18 @@ * ********************************************************************************/ -#include <catch2/catch_test_macros.hpp> +#include <cmath> // std::abs +#include <cstddef> // std::size_t #include <memory> -#include <cstdlib> +#include <catch2/catch_test_macros.hpp> + +#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp" +#include "aidge/data/Data.hpp" #include "aidge/data/Tensor.hpp" +#include "aidge/graph/Node.hpp" #include "aidge/operator/AvgPooling.hpp" - -#include "aidge/backend/cpu.hpp" +#include "aidge/utils/ArrayHelpers.hpp" using namespace Aidge; @@ -53,7 +57,7 @@ TEST_CASE("[cpu/operator] AvgPooling(forward)", "[AvgPooling][CPU]") { }); SECTION("Stride") { std::shared_ptr<Node> myAvgPool = AvgPooling({2,2}, "mycdw", {2,2}); - auto op = std::static_pointer_cast<OperatorTensor>(myAvgPool -> getOperator()); + auto op = std::static_pointer_cast<AvgPooling_Op<2>>(myAvgPool -> getOperator()); std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<float,2,2,2,2> { { @@ -90,7 +94,7 @@ TEST_CASE("[cpu/operator] AvgPooling(forward)", "[AvgPooling][CPU]") { } }); std::shared_ptr<Node> myAvgPool = AvgPooling({3,3}, "mycdw", {3,3}); - auto op = std::static_pointer_cast<OperatorTensor>(myAvgPool -> getOperator()); + auto op = std::static_pointer_cast<AvgPooling_Op<2>>(myAvgPool -> getOperator()); Tensor myOutput = Array4D<float,1,1,1,1> { {{{{(0.3745 + 0.9507 + 0.7320 + 0.5987 + 0.1560 + 0.1560 + 0.0581 + 0.8662 + 0.6011)/9.0}}}} diff --git a/unit_tests/operator/Test_BatchNormImpl.cpp b/unit_tests/operator/Test_BatchNormImpl.cpp index 1b42c90dd09d63cd319f19bd29751da816db06c0..26e964f9386e19a6070d75a4106b6b46a29e455d 100644 --- a/unit_tests/operator/Test_BatchNormImpl.cpp +++ b/unit_tests/operator/Test_BatchNormImpl.cpp @@ -9,20 +9,24 @@ * ********************************************************************************/ -#include <catch2/catch_test_macros.hpp> +#include <cmath> // std::abs +#include <cstddef> // std::size_t #include <memory> +#include <catch2/catch_test_macros.hpp> + +#include "aidge/backend/cpu/operator/BatchNormImpl.hpp" +#include "aidge/data/Data.hpp" #include "aidge/data/Tensor.hpp" +#include "aidge/graph/Node.hpp" #include "aidge/operator/BatchNorm.hpp" -#include "aidge/scheduler/SequentialScheduler.hpp" - -#include "aidge/backend/cpu.hpp" +#include "aidge/utils/ArrayHelpers.hpp" using namespace Aidge; TEST_CASE("[cpu/operator] BatchNorm(forward)", "[BatchNorm][CPU]") { std::shared_ptr<Node> myBatchNorm = BatchNorm<2>(3, 0.00001F, 0.1F, "mybatchnorm"); - auto op = std::static_pointer_cast<OperatorTensor>(myBatchNorm -> getOperator()); + auto op = std::static_pointer_cast<BatchNorm_Op<2>>(myBatchNorm -> getOperator()); std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array1D<float,3> {{0.9044, 0.3028, 0.0218}}); std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<float,3> {{0.1332, 0.7503, 0.0878}}); std::shared_ptr<Tensor> myMean = std::make_shared<Tensor>(Array1D<float,3> {{0.9931, 0.8421, 0.9936}}); diff --git a/unit_tests/operator/Test_BitShift.cpp b/unit_tests/operator/Test_BitShift.cpp index a52990bc7991a325ce151cf6634b0d5a831992c8..33ab932e296be717604be42716d7abe2b61f65ee 100644 --- a/unit_tests/operator/Test_BitShift.cpp +++ b/unit_tests/operator/Test_BitShift.cpp @@ -9,15 +9,20 @@ * ********************************************************************************/ -#include <catch2/catch_test_macros.hpp> +#include <chrono> // std::micro, std::chrono::time_point, + // std::chrono::system_clock #include <cstddef> // std::size_t #include <cstdint> // std::uint16_t #include <chrono> -#include <iostream> #include <memory> -#include <numeric> +#include <numeric> #include <random> // std::random_device, std::mt19937, std::uniform_real_distribution -#include <iomanip> + +#include <catch2/catch_test_macros.hpp> +#include <fmt/core.h> + +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/backend/cpu/operator/BitShiftImpl.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/BitShift.hpp" #include "aidge/utils/TensorUtils.hpp" @@ -29,7 +34,7 @@ TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") { // Create a random number generator std::random_device rd; std::mt19937 gen(rd()); - std::uniform_int_distribution<int> valueDist(-15, 15); + std::uniform_int_distribution<int> valueDist(-15, 15); std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(5)); std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(3)); std::uniform_int_distribution<int> boolDist(0,1); @@ -131,8 +136,8 @@ TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") { } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count())); + Log::info("total time: {}μs\n", duration.count()); } SECTION("Test BitShift kernels with Broadcasting") { std::size_t number_of_operation = 0; @@ -194,7 +199,7 @@ TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") { } else { - result[idx_out + d] = array0[idx0] >> array1[idx1]; + result[idx_out + d] = array0[idx0] >> array1[idx1]; } } } @@ -222,12 +227,7 @@ TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") { duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); // comparison between truth and computed result - bool equiv = (approxEq<int>(*(op->getOutput(0)), *Tres)); - if(equiv == false) - { - std::cout << "Problem\n"; - } - REQUIRE(equiv); + REQUIRE(approxEq<int>(*(op->getOutput(0)), *Tres)); delete[] array0; delete[] array1; @@ -236,8 +236,8 @@ TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") { const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>()); number_of_operation += nb_elements; } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count())); + Log::info("total time: {}μs\n", duration.count()); } } diff --git a/unit_tests/operator/Test_ClipImpl.cpp b/unit_tests/operator/Test_ClipImpl.cpp index 45c8da5bf7ecc84fad6b3e694fe204540f579af3..99147ac93bd659dd91897f6b7f1f3f33e5552ef6 100644 --- a/unit_tests/operator/Test_ClipImpl.cpp +++ b/unit_tests/operator/Test_ClipImpl.cpp @@ -9,36 +9,37 @@ * ********************************************************************************/ -#include <catch2/catch_test_macros.hpp> +#include <algorithm> // std::max, std::min +#include <chrono> #include <cstddef> // std::size_t #include <cstdint> // std::uint16_t -#include <chrono> -#include <iostream> -#include <vector> -#include <algorithm> -#include <iomanip> #include <memory> -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <random> // std::random_device, std::mt19937 + // std::uniform_int_distribution, std::uniform_real_distribution +#include <vector> + +#include <catch2/catch_test_macros.hpp> +#include <fmt/core.h> +#include "aidge/backend/cpu/operator/ClipImpl.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/Clip.hpp" #include "aidge/operator/OperatorTensor.hpp" #include "aidge/utils/TensorUtils.hpp" -#include "aidge/backend/cpu.hpp" void ComputeClipBackward(const std::vector<float>& vec1, std::vector<float>& vec2, float min, float max) { if (vec1.size() != vec2.size()) { - std::cerr << "Vectors should have the same sizes." << std::endl; + fmt::print(stderr, "Vectors should have the same sizes.\n"); return; } - for (size_t i = 0; i < vec1.size(); ++i) { + for (std::size_t i = 0; i < vec1.size(); ++i) { if (vec1[i] < min || vec1[i] > max) { vec2[i] = 0.0f; } } } -namespace Aidge +namespace Aidge { TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") { @@ -47,8 +48,8 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") std::random_device rd; std::mt19937 gen(rd()); std::uniform_real_distribution<float> dis(0.0, 10.0); - std::uniform_real_distribution<float> dismin(0.0, 4.5); - std::uniform_real_distribution<float> dismax(5.5, 10.0); + std::uniform_real_distribution<float> dismin(0.0, 4.5); + std::uniform_real_distribution<float> dismax(5.5, 10.0); std::uniform_int_distribution<std::size_t> distDims(5,15); std::uniform_int_distribution<std::size_t> distNbMatrix(1, 5); @@ -71,7 +72,7 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") // Create and populate the array with random float values float* Array = new float[dim0*dim1]; - for (int i = 0; i < dim0*dim1; ++i) { + for (std::size_t i = 0; i < dim0*dim1; ++i) { Array[i] = dis(gen); // Generate random float value } @@ -80,7 +81,7 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") TInput -> resize({dim0,dim1}); TInput -> setBackend("cpu"); TInput -> getImpl() -> setRawPtr(Array, dim0*dim1); - + float min = dismin(gen); std::shared_ptr<Tensor> Tmin = std::make_shared<Tensor>(DataType::Float32); Tmin -> resize({}); @@ -109,7 +110,7 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") op->setDataType(DataType::Float32); op->setBackend("cpu"); op->forwardDims(true); - + start = std::chrono::system_clock::now(); myClip->forward(); end = std::chrono::system_clock::now(); @@ -118,9 +119,9 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); } - std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; - std::cout << "total time: " << duration.count() << std::endl; - } + Log::info("multiplications over time spent: {}\n", totalComputation/duration.count()); + Log::info("total time: {}\n", duration.count()); + } SECTION("Clip test with min >= max [Forward]") { std::size_t totalComputation = 0; for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { @@ -131,7 +132,7 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") // Create and populate the array with random float values float* Array = new float[dim0*dim1]; - for (int i = 0; i < dim0*dim1; ++i) { + for (std::size_t i = 0; i < dim0*dim1; ++i) { Array[i] = dis(gen); // Generate random float value } @@ -140,7 +141,7 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") TInput -> resize({dim0,dim1}); TInput -> setBackend("cpu"); TInput -> getImpl() -> setRawPtr(Array, dim0*dim1); - + float min = dismax(gen); std::shared_ptr<Tensor> Tmin = std::make_shared<Tensor>(DataType::Float32); Tmin -> resize({}); @@ -169,7 +170,7 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") op->setDataType(DataType::Float32); op->setBackend("cpu"); op->forwardDims(true); - + start = std::chrono::system_clock::now(); myClip->forward(); end = std::chrono::system_clock::now(); @@ -178,13 +179,13 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); } - std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; - std::cout << "total time: " << duration.count() << std::endl; - } + Log::info("multiplications over time spent: {}\n", totalComputation/duration.count()); + Log::info("total time: {}\n", duration.count()); + } SECTION("Clip with Clip Attr [Forward]") { std::size_t totalComputation = 0; - for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { float min = dismin(gen); @@ -200,7 +201,7 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") // Create and populate the array with random float values float* Array = new float[dim0*dim1]; - for (int i = 0; i < dim0*dim1; ++i) { + for (std::size_t i = 0; i < dim0*dim1; ++i) { Array[i] = dis(gen); // Generate random float value } // Convert Input to Tensor @@ -231,8 +232,8 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); } - std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; - std::cout << "total time: " << duration.count() << std::endl; + Log::info("multiplications over time spent: {}\n", totalComputation/duration.count()); + Log::info("total time: {}\n", duration.count()); } SECTION("Simple clip test [Backward]") { std::size_t totalComputation = 0; @@ -243,13 +244,13 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") // generate Tensors dimensions const std::size_t dim0 = distDims(gen); const std::size_t dim1 = distDims(gen); - + totalComputation += dim0*dim1; // Create and populate the array with random float values float* Array = new float[dim0*dim1]; float* gradArray = new float[dim0*dim1]; - for (int i = 0; i < dim0*dim1; ++i) { + for (std::size_t i = 0; i < dim0*dim1; ++i) { Array[i] = dis(gen); // Generate random float value gradArray[i] = dis(gen); } @@ -264,7 +265,7 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") TInput -> resize({dim0,dim1}); TInput -> setBackend("cpu"); TInput -> getImpl() -> setRawPtr(Array, dim0*dim1); - + float min = dismin(gen); std::shared_ptr<Tensor> Tmin = std::make_shared<Tensor>(DataType::Float32); Tmin -> resize({}); @@ -296,7 +297,7 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") myClip->forward(); op->getOutput(0)->setGrad(TGrad); - + start = std::chrono::system_clock::now(); REQUIRE_NOTHROW(myClip->backward()); end = std::chrono::system_clock::now(); @@ -310,9 +311,9 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); REQUIRE(GT1 == BackwardTensorVec); } - std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; - std::cout << "total time: " << duration.count() << std::endl; + Log::info("multiplications over time spent: {}\n", totalComputation/duration.count()); + Log::info("total time: {}\n", duration.count()); } } -} // namespace Aidge +} // namespace Aidge } \ No newline at end of file diff --git a/unit_tests/operator/Test_ConstantOfShapeImpl.cpp b/unit_tests/operator/Test_ConstantOfShapeImpl.cpp index 42505d385fde7e72e09531f1607287ffc6978f75..8ec1669b92a5116999413cf55a8c5113363ef330 100644 --- a/unit_tests/operator/Test_ConstantOfShapeImpl.cpp +++ b/unit_tests/operator/Test_ConstantOfShapeImpl.cpp @@ -9,32 +9,27 @@ * ********************************************************************************/ -#include <algorithm> -#include <chrono> -#include <cmath> -#include <cstddef> // std::size_t -#include <cstdint> // std::uint16_t -#include <iostream> +#include <chrono> // std::micro, std::chrono::time_point, + // std::chrono::system_clock +#include <cstddef> // std::size_t +#include <cstdint> // std::int64_t, std::uint16_t #include <memory> -#include <numeric> // std::accumulate -#include <ostream> -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <random> // std::random_device, std::mt19937 + // std::uniform_int_distribution, std::uniform_real_distribution +#include <vector> -#include "catch2/internal/catch_compiler_capabilities.hpp" -#include "catch2/internal/catch_enforce.hpp" #include <catch2/catch_test_macros.hpp> #include <catch2/generators/catch_generators_random.hpp> +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp" +#include "aidge/data/Data.hpp" #include "aidge/data/Tensor.hpp" +#include "aidge/filler/Filler.hpp" #include "aidge/operator/ConstantOfShape.hpp" +#include "aidge/operator/OperatorTensor.hpp" #include "aidge/utils/TensorUtils.hpp" -#include <aidge/data/Data.hpp> -#include <aidge/data/half.hpp> -#include <aidge/filler/Filler.hpp> -#include <aidge/operator/OperatorTensor.hpp> -#include <aidge/operator/Reshape.hpp> -#include <aidge/utils/TensorUtils.hpp> -#include <aidge/utils/Types.h> +#include "aidge/utils/Types.h" namespace Aidge { TEST_CASE("[cpu/operator] ConstantOfShape", "[ConstantOfShape][CPU]") { @@ -62,7 +57,7 @@ TEST_CASE("[cpu/operator] ConstantOfShape", "[ConstantOfShape][CPU]") { result->setDataType(DataType::Int64); result->setBackend("cpu"); for (DimSize_t i = 0; i < result->size(); ++i) { - result->set<int64_t>(i, input_tensor_values_dist(gen)); + result->set<std::int64_t>(i, input_tensor_values_dist(gen)); } return result; }; diff --git a/unit_tests/operator/Test_DivImpl.cpp b/unit_tests/operator/Test_DivImpl.cpp index 5d7dfdf12032d4c444e38cda6d2a4298fc552b14..4037b2ad4e117573279f07d0c1819d3435ee7ada 100644 --- a/unit_tests/operator/Test_DivImpl.cpp +++ b/unit_tests/operator/Test_DivImpl.cpp @@ -9,17 +9,26 @@ * ********************************************************************************/ -#include <catch2/catch_test_macros.hpp> -#include <cstddef> // std::size_t -#include <cstdint> // std::uint16_t -#include <chrono> -#include <iostream> +#include <chrono> // std::micro, std::chrono::time_point, + // std::chrono::system_clock +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t +#include <functional> // std::multiplies #include <memory> -#include <numeric> // std::accumulate -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <numeric> // std::accumulate +#include <random> // std::random_device, std::mt19937 + // std::uniform_int_distribution, std::uniform_real_distribution +#include <vector> + +#include <catch2/catch_test_macros.hpp> +#include <fmt/core.h> +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/backend/cpu/operator/DivImpl.hpp" +#include "aidge/data/Data.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/Div.hpp" +#include "aidge/operator/OperatorTensor.hpp" #include "aidge/utils/TensorUtils.hpp" namespace Aidge { @@ -117,8 +126,8 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") { // with broadcasting } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count())); + Log::info("total time: {} μs\n", duration.count()); } SECTION("+1-D Tensor / +1-D Tensor - broadcasting") { @@ -212,8 +221,8 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") { const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>()); number_of_operation += nb_elements; } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count())); + Log::info("total time: {} μs\n", duration.count()); } SECTION("+1-D Tensor / 1-D Tensor") { std::size_t number_of_operation = 0; @@ -308,8 +317,8 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") { number_of_operation += nb_elements; } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count())); + Log::info("total time: {} μs\n", duration.count()); } } } diff --git a/unit_tests/operator/Test_ErfImpl.cpp b/unit_tests/operator/Test_ErfImpl.cpp index 2826b5b57d431cf8296a9869f88f7d642c59c963..c2fdd1c8606804e4a9f63051fa66667ae374fb9d 100644 --- a/unit_tests/operator/Test_ErfImpl.cpp +++ b/unit_tests/operator/Test_ErfImpl.cpp @@ -9,14 +9,16 @@ * ********************************************************************************/ +#include <memory> + #include <catch2/catch_test_macros.hpp> +#include "aidge/backend/cpu/operator/ErfImpl.hpp" +#include "aidge/data/DataType.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/Erf.hpp" - -#include "aidge/backend/cpu.hpp" - -#include <memory> +#include "aidge/utils/ArrayHelpers.hpp" +#include "aidge/utils/TensorUtils.hpp" using namespace Aidge; @@ -27,23 +29,18 @@ TEST_CASE("[cpu/operator] Erf(forward)") { {0.41384590, 0.43120754, 0.93762982, 0.31049860, 0.77547199, 0.09514862, 0.16145366, 0.42776686, 0.43487436, 0.41170865} }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array1D<float,10> { + Tensor expectedOutput = Array1D<float,10> { {0.44163144, 0.45801866, 0.81516320, 0.33941913, 0.72722000, 0.10704061, 0.18061027, 0.45479023, 0.46144873, 0.43959764} - }); + }; - std::shared_ptr<Node> myErf = Erf(); - auto op = std::static_pointer_cast<OperatorTensor>(myErf -> getOperator()); + auto op = std::make_shared<Erf_Op>(); op->associateInput(0,input0); op->setDataType(DataType::Float32); op->setBackend("cpu"); - myErf->forward(); + op->forward(); - float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); - float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); - for (std::size_t i = 0; i< expectedOutput->size(); ++i) { - REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); - } + REQUIRE(approxEq<float>(*(op->getOutput(0)), expectedOutput, 1e-5f, 1e-8f)); } SECTION("3D Tensor") { @@ -59,7 +56,7 @@ TEST_CASE("[cpu/operator] Erf(forward)") { } } }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<float,2,2,3> { + Tensor expectedOutput = Array3D<float,2,2,3> { { { {0.83003384, 0.77721894, 0.72857803}, @@ -70,19 +67,14 @@ TEST_CASE("[cpu/operator] Erf(forward)") { {0.81564975, 0.83322692, 0.37109339} } } - }); + }; - std::shared_ptr<Node> myErf = Erf(); - auto op = std::static_pointer_cast<OperatorTensor>(myErf -> getOperator()); + auto op = std::make_shared<Erf_Op>(); op->associateInput(0,input0); op->setDataType(DataType::Float32); op->setBackend("cpu"); - myErf->forward(); + op->forward(); - float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); - float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); - for (std::size_t i = 0; i< expectedOutput->size(); ++i) { - REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); - } + REQUIRE(approxEq<float>(*(op->getOutput(0)), expectedOutput, 1e-5f, 1e-8f)); } } \ No newline at end of file diff --git a/unit_tests/operator/Test_ExpandImpl.cpp b/unit_tests/operator/Test_ExpandImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..878c608110eabb824d8a6c0d1ceb0853b3c1449d --- /dev/null +++ b/unit_tests/operator/Test_ExpandImpl.cpp @@ -0,0 +1,103 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <memory> + +#include <catch2/catch_test_macros.hpp> + +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/backend/cpu/operator/ExpandImpl.hpp" +#include "aidge/data/DataType.hpp" +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/Expand.hpp" +#include "aidge/utils/ArrayHelpers.hpp" + +using std::shared_ptr; + +using namespace Aidge; + +void setupTestExpand(shared_ptr<Tensor> inputData, + shared_ptr<Tensor> inputShape, + shared_ptr<Expand_Op> &op) { + + op->getOutput(0)->setDataType(inputData->dataType()); + + inputData->setBackend("cpu"); + op->associateInput(0, inputData); + + inputShape->setBackend("cpu"); + op->associateInput(1, inputShape); +} + +TEST_CASE("[cpu/operator] Expand(forward)", "[Expand][CPU]") { + std::shared_ptr<Expand_Op> op = std::make_shared<Expand_Op>(); + op->setBackend("cpu"); + + SECTION("Expand shape is bigger than inputData") { + auto inputData = std::make_shared<Tensor>(Array1D<int, 2>({1, 3})); + auto inputShape = + std::make_shared<Tensor>(Array1D<std::int64_t, 4>({1, 3, 4, 2})); + Tensor expectedOutput = + Array4D<cpptype_t<DataType::Int32>, 1, 3, 4, 2>({{{{{1, 3}, {1, 3}, {1, 3}, {1, 3}}, + {{1, 3}, {1, 3}, {1, 3}, {1, 3}}, + {{1, 3}, {1, 3}, {1, 3}, {1, 3}}}}}); + setupTestExpand(inputData, inputShape, op); + + // forwardDims has already been tested in core + CHECK(op->forwardDims(true)); + REQUIRE_NOTHROW(op->forward()); + REQUIRE(expectedOutput == *op->getOutput(0)); + } + SECTION("Expand shape has less dimensions than inputData") { + auto inputData = std::make_shared<Tensor>( + Array3D<int, 2, 1, 3>({{{2, 1, 3}, {2, 1, 3}}})); + auto inputShape = + std::make_shared<Tensor>(Array1D<std::int64_t, 2>({2, 3})); + Tensor expectedOutput = Array3D<cpptype_t<DataType::Int32>, 2, 2, 3>( + {{{{2, 1, 3}, {2, 1, 3}}, {{2, 1, 3}, {2, 1, 3}}}}); + setupTestExpand(inputData, inputShape, op); + + // forwardDims has already been tested in core + CHECK(op->forwardDims(true)); + REQUIRE_NOTHROW(op->forward()); + REQUIRE(expectedOutput == *op->getOutput(0)); + } + SECTION("Expand shape = {1} leads to input equal to output.") { + auto inputData = std::make_shared<Tensor>( + Array4D<int, 2, 1, 3, 1>({{{2, 1, 3}, {2, 1, 3}}})); + auto inputShape = + std::make_shared<Tensor>(Array1D<std::int64_t, 1>({1})); + Tensor expectedOutput = + Array4D<cpptype_t<DataType::Int32>, 2, 1, 3, 1>({{{2, 1, 3}, {2, 1, 3}}}); + setupTestExpand(inputData, inputShape, op); + + // forwardDims has already been tested in core + CHECK(op->forwardDims(true)); + REQUIRE_NOTHROW(op->forward()); + REQUIRE(expectedOutput == *op->getOutput(0)); + } + SECTION("The only common dimension is the last one & its equal to 1") { + auto inputData = std::make_shared<Tensor>( + Array4D<int, 1, 1, 3, 1>({{{{2, 1, 3}}}})); + auto inputShape = + std::make_shared<Tensor>(Array1D<std::int64_t, 3>({2, 1, 1})); + Tensor expectedOutput = + Array4D<cpptype_t<DataType::Int32>, 1, 2, 3, 1>({{{{2, 1, 3}, {2, 1, 3}}}}); + setupTestExpand(inputData, inputShape, op); + + // forwardDims has already been tested in core + CHECK(op->forwardDims(true)); + REQUIRE_NOTHROW(op->forward()); + REQUIRE(expectedOutput == *op->getOutput(0)); + } + SECTION("N-Dim to N-Dim") {} + auto inputData = std::shared_ptr<Tensor>(); +} diff --git a/unit_tests/operator/Test_FCImpl.cpp b/unit_tests/operator/Test_FCImpl.cpp index b2566f26d984fb1d89052745ec35870c6b935d48..8ac0afc33152f4ae110b1c3ef0b4e88f37b00e99 100644 --- a/unit_tests/operator/Test_FCImpl.cpp +++ b/unit_tests/operator/Test_FCImpl.cpp @@ -9,13 +9,16 @@ * ********************************************************************************/ -#include <catch2/catch_test_macros.hpp> #include <memory> +#include <catch2/catch_test_macros.hpp> + +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/backend/cpu/operator/FCImpl.hpp" +#include "aidge/data/DataType.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/FC.hpp" - -#include "aidge/backend/cpu.hpp" +#include "aidge/utils/ArrayHelpers.hpp" using namespace Aidge; @@ -42,11 +45,13 @@ TEST_CASE("[cpu/oeprator] FC(forward)", "[FC][CPU]") { 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}}); std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}}); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<int, 2, 5>{ - {{23601, 23602, 23603, 23604, 23605}, {68601, 68602, 68603, 68604, 68605}}}); + Tensor myOutput = Array2D<int, 2, 5>{ + {{23601, 23602, 23603, 23604, 23605}, {68601, 68602, 68603, 68604, 68605}}}; std::shared_ptr<Node> myFC = FC(75, 5, false, "myfc"); - auto op = std::static_pointer_cast<OperatorTensor>(myFC -> getOperator()); + auto op = std::static_pointer_cast<FC_Op>(myFC -> getOperator()); + op -> setDataType(DataType::Int32); + op -> setBackend("cpu"); op -> associateInput(1, myWeights); op -> associateInput(2, myBias); @@ -62,10 +67,8 @@ TEST_CASE("[cpu/oeprator] FC(forward)", "[FC][CPU]") { 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149}}}); op->associateInput(0, myInput); - op -> setDataType(DataType::Int32); - op -> setBackend("cpu"); myFC->forward(); - REQUIRE(*(op->getOutput(0)) == *myOutput); + REQUIRE(*(op->getOutput(0)) == myOutput); } SECTION("4D input") { std::shared_ptr<Tensor> myInput = @@ -100,10 +103,8 @@ TEST_CASE("[cpu/oeprator] FC(forward)", "[FC][CPU]") { {140, 141, 142, 143, 144}, {145, 146, 147, 148, 149}}}}}); op->associateInput(0, myInput); - op -> setDataType(DataType::Int32); - op -> setBackend("cpu"); myFC->forward(); - REQUIRE(*(op->getOutput(0)) == *myOutput); + REQUIRE(*(op->getOutput(0)) == myOutput); } // std::cout << static_cast<Tensor>((*myFC->getOperator())["weight"])[0][0][0][0] << std::endl; diff --git a/unit_tests/operator/Test_FoldImpl.cpp b/unit_tests/operator/Test_FoldImpl.cpp index 6832f5a42d796d9261495794e0758ce1b6df0346..184b9e9acfe2cd5e86f74d304e37ba2aeacc7cf5 100644 --- a/unit_tests/operator/Test_FoldImpl.cpp +++ b/unit_tests/operator/Test_FoldImpl.cpp @@ -13,6 +13,7 @@ #include <cstdlib> #include <memory> +#include "aidge/backend/cpu/data/TensorImpl.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/graph/GraphView.hpp" #include "aidge/scheduler/SequentialScheduler.hpp" @@ -21,8 +22,6 @@ #include "aidge/operator/MatMul.hpp" #include "aidge/operator/Reshape.hpp" -#include "aidge/backend/cpu.hpp" - using namespace Aidge; TEST_CASE("[cpu/operator] Fold(forward)", "[Fold][CPU]") { diff --git a/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp index 43af544871ad6c2ac319de09f3c6fce5065e60d5..8e8536accadcb874f74d4d962aae435bc1351d6e 100644 --- a/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp +++ b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp @@ -9,34 +9,29 @@ * ********************************************************************************/ -#include <aidge/utils/Types.h> -#include <catch2/catch_test_macros.hpp> #include <chrono> -#include <cmath> #include <cstddef> // std::size_t #include <cstdint> // std::uint16_t -#include <iostream> +#include <functional> // std::multiplies #include <memory> #include <numeric> // std::accumulate -#include <ostream> -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <random> // std::random_device, std::mt19937 + // std::uniform_int_distribution, std::uniform_real_distribution +#include <vector> + +#include <catch2/catch_test_macros.hpp> +#include <fmt/core.h> +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp" +#include "aidge/data/Data.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/GlobalAveragePooling.hpp" #include "aidge/utils/TensorUtils.hpp" - -// debug print function -void print_tensor(Aidge::Tensor &T) { - // Print tensors - std::cout << "Tensor : size =  ["; - for (auto &dim : T.dims()) { - std::cout << dim << " , "; - } - std::cout << "]" << std::endl; - T.print(); -} +#include "aidge/utils/Types.h" namespace Aidge { + TEST_CASE("[cpu/operator] GlobalAveragePooling", "[GlobalAveragePooling][CPU]") { constexpr std::uint16_t NBTRIALS = 10; @@ -54,9 +49,7 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling", std::size_t(7)); // Create MatGlobalAveragePooling Operator - std::shared_ptr<Node> globAvgPool = GlobalAveragePooling(); - auto op = - std::static_pointer_cast<OperatorTensor>(globAvgPool->getOperator()); + std::shared_ptr<GlobalAveragePooling_Op> op = std::make_shared<GlobalAveragePooling_Op>(); op->setDataType(DataType::Float32); op->setBackend("cpu"); @@ -99,7 +92,7 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling", T0->resize(dims); T0->getImpl()->setRawPtr(array0, nb_elements); - REQUIRE_THROWS(globAvgPool->forward()); + REQUIRE_THROWS(op->forward()); delete[] array0; } @@ -158,7 +151,7 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling", op->forwardDims(); start = std::chrono::system_clock::now(); - REQUIRE_NOTHROW(globAvgPool->forward()); + REQUIRE_NOTHROW(op->forward()); end = std::chrono::system_clock::now(); duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); @@ -231,7 +224,7 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling", op->forwardDims(); start = std::chrono::system_clock::now(); - REQUIRE_NOTHROW(globAvgPool->forward()); + REQUIRE_NOTHROW(op->forward()); end = std::chrono::system_clock::now(); duration += std::chrono::duration_cast<std::chrono::microseconds>( end - start); @@ -358,7 +351,7 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling", Tres->getImpl()->setRawPtr(result, out_nb_elems); op->forwardDims(); start = std::chrono::system_clock::now(); - REQUIRE_NOTHROW(globAvgPool->forward()); + REQUIRE_NOTHROW(op->forward()); end = std::chrono::system_clock::now(); duration += std::chrono::duration_cast<std::chrono::microseconds>( end - start); @@ -547,7 +540,7 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling", Tres->getImpl()->setRawPtr(result, out_nb_elems); op->forwardDims(); start = std::chrono::system_clock::now(); - REQUIRE_NOTHROW(globAvgPool->forward()); + REQUIRE_NOTHROW(op->forward()); end = std::chrono::system_clock::now(); duration += std::chrono::duration_cast<std::chrono::microseconds>( end - start); @@ -561,12 +554,9 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling", delete[] result; } } - std::cout << "GlobalAveragePooling total execution time : " - << duration.count() << "µs" << std::endl; - std::cout << "Number of operations : " << number_of_operation - << std::endl; - std::cout << "Operation / µs = " << number_of_operation / duration.count() - << std::endl; + Log::info("GlobalAveragePooling total execution time: {}µs\n", duration.count()); + Log::info("Number of operations : {}\n", number_of_operation); + Log::info("Operation / µs = {}\n", number_of_operation / duration.count()); } } } diff --git a/unit_tests/operator/Test_HeavisideImpl.cpp b/unit_tests/operator/Test_HeavisideImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4cbdf1a0e29f8670e45897236374726dac62bb43 --- /dev/null +++ b/unit_tests/operator/Test_HeavisideImpl.cpp @@ -0,0 +1,98 @@ +/******************************************************************************** + * Copyright (c) 2025 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/backend/cpu/operator/HeavisideImpl_kernels.hpp" + +#include <memory> +#include <cstdlib> +#include <random> + +#include <catch2/catch_test_macros.hpp> + +#include "aidge/data/Tensor.hpp" +#include "aidge/backend/cpu/operator/HeavisideImpl.hpp" +#include "aidge/graph/Node.hpp" +#include "aidge/utils/TensorUtils.hpp" + +namespace Aidge +{ + +TEST_CASE("[cpu/operator] Heaviside(forward)", "[Heaviside][CPU]") { + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> valueDist(-1.0f, 1.0f); + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); + + SECTION("1D Tensor") { + + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<float,10> { + {0, 1, 2,-3, 4,-5,-6, 7, 8, 9} + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array1D<float,10> { + {0.5, 1, 1, 0, 1, 0, 0, 1, 1, 1} + }); + + std::shared_ptr<Node> heaviside = Heaviside(0.5); + auto op = std::static_pointer_cast<OperatorTensor>(heaviside->getOperator()); + op->associateInput(0, input0); + op->setBackend("cpu"); + op->setDataType(DataType::Float32); + + op->forward(); + REQUIRE(approxEq<float>(*op->getOutput(0),*expectedOutput)); + } + + SECTION("+1-D Tensor") + { + auto dims = std::vector<std::size_t>(); + auto nbDims = nbDimsDist(gen); + + for (auto i = 0u; i < nbDims; ++i) { + dims.push_back(dimSizeDist(gen)); + } + + auto numberOfElements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + float* inputArray = new float[numberOfElements]; + float* resultArray = new float[numberOfElements]; + + for(auto i = 0u; i < numberOfElements; ++i) + { + inputArray[i] = valueDist(gen); + resultArray[i] = inputArray[i] > 0 ? 1 : (inputArray[i] == 0 ? 0.5 : 0); + } + + auto T0 = std::make_shared<Tensor>(); + T0->setDataType(DataType::Float32); + T0->setBackend("cpu"); + + auto T1 = std::make_shared<Tensor>(); + T1->setDataType(DataType::Float32); + T1->setBackend("cpu"); + + T0->resize(dims); + T0->getImpl()->setRawPtr(inputArray, numberOfElements); + T1->resize(dims); + T1->getImpl()->setRawPtr(resultArray, numberOfElements); + + std::shared_ptr<Node> heaviside = Heaviside(0.5); + auto op = std::static_pointer_cast<OperatorTensor>(heaviside->getOperator()); + op->associateInput(0, T0); + op->setBackend("cpu"); + op->setDataType(DataType::Float32); + + op->forward(); + + REQUIRE(approxEq<float>(*(op->getOutput(0)), *T1)); + } +} +} diff --git a/unit_tests/operator/Test_LeakyReLUImpl.cpp b/unit_tests/operator/Test_LeakyReLUImpl.cpp index 85dd9f99ee425216f8495e7813b35ce69be9c806..b60b8bb3e50a33eb339f66905c81d6824e28a835 100644 --- a/unit_tests/operator/Test_LeakyReLUImpl.cpp +++ b/unit_tests/operator/Test_LeakyReLUImpl.cpp @@ -9,13 +9,16 @@ * ********************************************************************************/ +#include <memory> + #include <catch2/catch_test_macros.hpp> +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" +#include "aidge/data/DataType.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/LeakyReLU.hpp" -#include "aidge/backend/cpu.hpp" - using namespace Aidge; TEST_CASE("[cpu/operator] LeakyReLU(forward)", "[LeakyReLU][CPU]") { diff --git a/unit_tests/operator/Test_MatMulImpl.cpp b/unit_tests/operator/Test_MatMulImpl.cpp index d6e934b4dc8d84e8a595eb74d1af9d2c68c892d1..f062f06cddfbd04217d63e1edcb6505914bc77e9 100644 --- a/unit_tests/operator/Test_MatMulImpl.cpp +++ b/unit_tests/operator/Test_MatMulImpl.cpp @@ -9,21 +9,26 @@ * ********************************************************************************/ -#include <catch2/catch_test_macros.hpp> -#include <cstddef> // std::size_t -#include <cstdint> // std::uint16_t -#include <chrono> -#include <iostream> +#include <chrono> // std::micro, std::chrono::time_point, + // std::chrono::system_clock, std::chrono::duration +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t #include <memory> -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <random> // std::random_device, std::mt19937 + // std::uniform_int_distribution, std::uniform_real_distribution +#include <vector> + +#include <catch2/catch_test_macros.hpp> +#include <fmt/core.h> +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/backend/cpu/operator/MatMulImpl.hpp" +#include "aidge/data/Data.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/MatMul.hpp" #include "aidge/operator/OperatorTensor.hpp" #include "aidge/utils/TensorUtils.hpp" -#include "aidge/backend/cpu/operator/MatMulImpl.hpp" - namespace Aidge { TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { @@ -106,8 +111,8 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { delete[] bigArray2; delete[] res; } - std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; - std::cout << "total time: " << duration.count() << std::endl; + Log::info("number of multiplications over time spent: {}\n", (totalComputation / duration.count())); + Log::info("total time: {} μs\n", duration.count()); } SECTION("3-D Tensors") { @@ -174,8 +179,8 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { delete[] bigArray2; delete[] res; } - std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; - std::cout << "total time: " << duration.count() << std::endl; + Log::info("number of multiplications over time spent: {}\n", (totalComputation / duration.count())); + Log::info("total time: {} μs\n", duration.count()); } SECTION("4-D Tensors") { @@ -244,8 +249,8 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { delete[] bigArray2; delete[] res; } - std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; - std::cout << "total time: " << duration.count() << std::endl; + Log::info("number of multiplications over time spent: {}\n", (totalComputation / duration.count())); + Log::info("total time: {} μs\n", duration.count()); } SECTION("+2-D / 1-D") { diff --git a/unit_tests/operator/Test_MaxPoolingImpl.cpp b/unit_tests/operator/Test_MaxPoolingImpl.cpp index af04ede4e33c32ce785804e2484b6ba9ac5edc36..de02df2b73bc461bbd76b089cd555d7c82bd173e 100644 --- a/unit_tests/operator/Test_MaxPoolingImpl.cpp +++ b/unit_tests/operator/Test_MaxPoolingImpl.cpp @@ -9,15 +9,17 @@ * ********************************************************************************/ -#include <catch2/catch_test_macros.hpp> +#include <array> #include <memory> -#include <cstdlib> +#include <catch2/catch_test_macros.hpp> + +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" +#include "aidge/data/DataType.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/MaxPooling.hpp" -#include "aidge/backend/cpu.hpp" - using namespace Aidge; @@ -53,10 +55,9 @@ TEST_CASE("[cpu/operator] MaxPooling(forward)", "[MaxPooling][CPU]") { } }); SECTION("Stride") { - std::shared_ptr<Node> myMaxPool = MaxPooling({2,2}, "mycdw", {2,2}); - auto op = std::static_pointer_cast<OperatorTensor>(myMaxPool -> getOperator()); + std::shared_ptr<MaxPooling_Op<2>> op = std::make_shared<MaxPooling_Op<2>>(std::array<std::size_t, 2>({2,2}), std::array<std::size_t, 2>({2,2})); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<float,2,2,2,2> { + Tensor myOutput = Array4D<float,2,2,2,2> { { { {{ 0.7995, 0.6142}, @@ -71,12 +72,12 @@ TEST_CASE("[cpu/operator] MaxPooling(forward)", "[MaxPooling][CPU]") { {0.0857, 0.6776}} } } - }); - myMaxPool->getOperator()->associateInput(0,myInput); - myMaxPool->getOperator()->setDataType(DataType::Float32); - myMaxPool->getOperator()->setBackend("cpu"); - myMaxPool->forward(); + }; + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + op->forward(); op->getOutput(0)->print(); - REQUIRE(*(op->getOutput(0)) == *myOutput); + REQUIRE(*(op->getOutput(0)) == myOutput); } } \ No newline at end of file diff --git a/unit_tests/operator/Test_Memorize.cpp b/unit_tests/operator/Test_Memorize.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6c1a617e268dfa376ab154f4085dc63ef3760ea7 --- /dev/null +++ b/unit_tests/operator/Test_Memorize.cpp @@ -0,0 +1,66 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <memory> +#include <string> + +#include <catch2/catch_test_macros.hpp> + +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/backend/cpu/operator/AddImpl.hpp" +#include "aidge/data/Tensor.hpp" +#include "aidge/graph/Node.hpp" +#include "aidge/graph/GraphView.hpp" +#include "aidge/graph/OpArgs.hpp" +#include "aidge/operator/Add.hpp" +#include "aidge/operator/Memorize.hpp" +#include "aidge/operator/Producer.hpp" +#include "aidge/recipes/GraphViewHelper.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" + +namespace Aidge { + +TEST_CASE("[cpu/operator] Memorize(forward)", "[Memorize][CPU]") { + SECTION("Test simple") { + std::shared_ptr<Tensor> inputTensor = + std::make_shared<Tensor>(Array1D<int, 1>{{1}}); + + auto input = Producer({1}, "input"); + auto init = Producer({1}, "init"); + auto add = Add("add"); + auto mem = Memorize(3, "mem"); + + input->addChild(add, 0, 0); + init->addChild(mem, 0, 1); + add->addChild(mem, 0,0); + mem->addChild(/*otherNode=*/add, /*outId=*/1, /*otherInId=*/1); + + input->getOperator()->setOutput(0, inputTensor); + init->getOperator()->setOutput(0, inputTensor); + + auto g = getConnectedGraphView(input); + + g->setDataType(Aidge::DataType::Int32); + g->setBackend("cpu"); + g->forwardDims(); + g->save("simple_graph"); + + SequentialScheduler scheduler(g); + REQUIRE_NOTHROW(scheduler.forward()); + scheduler.saveSchedulingDiagram("simple"); + + const Tensor expectedOutput = Array1D<int, 1>{{4}}; + std::shared_ptr<Tensor> other = std::static_pointer_cast<OperatorTensor>(mem->getOperator())->getOutput(0); + other->print(); + REQUIRE((*other == expectedOutput)); + } +} +} // namespace Aidge diff --git a/unit_tests/operator/Test_MulImpl.cpp b/unit_tests/operator/Test_MulImpl.cpp index 3378861d0d3d7e74e7867c2765a0b09069fa8caf..2937e94938c671140eeeee87d47d5c48f685203e 100644 --- a/unit_tests/operator/Test_MulImpl.cpp +++ b/unit_tests/operator/Test_MulImpl.cpp @@ -9,379 +9,336 @@ * ********************************************************************************/ -#include <catch2/catch_test_macros.hpp> -#include <cstddef> // std::size_t -#include <cstdint> // std::uint16_t #include <chrono> -#include <iostream> +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t #include <memory> -#include <numeric> // std::accumulate -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <numeric> // std::accumulate +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution, + // std::uniform_int_distribution + +#include <catch2/catch_test_macros.hpp> +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/backend/cpu/operator/MulImpl.hpp" +#include "aidge/data/DataType.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/Mul.hpp" +#include "aidge/utils/ArrayHelpers.hpp" +#include "aidge/utils/Log.hpp" #include "aidge/utils/TensorUtils.hpp" namespace Aidge { - TEST_CASE("[CPU/Operator] Mul Backward", "[Mul][CPU][Backward]") - { - std::shared_ptr<Node> myMul = Mul(); - auto op = std::static_pointer_cast<OperatorTensor>(myMul->getOperator()); - op->setDataType(DataType::Float32); - op->setBackend("cpu"); - - SECTION("Case 1: 2D and 1D tensors") { - const auto T0 = std::make_shared<Tensor>(Array2D<float,2,3>( - { - { - {1,2,3},{4,5,6} - } - } - )); +TEST_CASE("[CPU/Operator] Mul(Backward)", "[Mul][CPU][Backward]") { + std::shared_ptr<Mul_Op> op = std::make_shared<Mul_Op>(); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); - const auto T1 = std::make_shared<Tensor>(Array1D<float,3>( - {0.1,0.2,0.3} - )); + // NOTE: The first four tests use fixed values, the last one uses random values but static dimensions. - T0->setDataType(DataType::Float32); - T0->setBackend("cpu"); - T1->setDataType(DataType::Float32); - T1->setBackend("cpu"); + SECTION("Case 1: 1D and 2D Tensors") { + const auto T0 = std::make_shared<Tensor>( + Array2D<cpptype_t<DataType::Float32>, 2, 3>({{{1, 2, 3}, {4, 5, 6}}})); - op->getOutput(0)->setGrad(std::make_shared<Tensor>(Array2D<float,2,3>({{{1.0,1.0,1.0},{1.0,1.0,1.0}}}))); + const auto T1 = + std::make_shared<Tensor>(Array1D<cpptype_t<DataType::Float32>, 3>({0.1, 0.2, 0.3})); - op->associateInput(0,T0); - op->associateInput(1,T1); - op->forwardDims(); + op->associateInput(0, T0); + op->associateInput(1, T1); + op->getOutput(0)->setGrad(std::make_shared<Tensor>( + Array2D<float, 2, 3>({{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}}))); + op->forwardDims(); - myMul->forward(); - myMul->backward(); + op->backward(); - auto T0Grad = std::make_shared<Tensor>(Array2D<float, 2,3>({{{0.1,0.2,0.3},{0.1, 0.2, 0.3}}})); - auto T1Grad = std::make_shared<Tensor>(Array1D<float, 3>({5,7,9})); + const Tensor expectedGrad0 = + Array2D<cpptype_t<DataType::Float32>, 2, 3>({{{0.1, 0.2, 0.3}, {0.1, 0.2, 0.3}}}); - REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *T0Grad)); - REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *T1Grad)); - } + const Tensor expectedGrad1 = Array1D<cpptype_t<DataType::Float32>, 3>({5, 7, 9}); - SECTION("Case 2: 3D and 1D tensors") { - const auto T0 = std::make_shared<Tensor>(Array3D<float,2,2,3>( - { - { - { - {1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0} - }, - { - {7.0, 8.0, 9.0}, - {10.0, 11.0, 12.0} - } - } - } - )); - - const auto T1 = std::make_shared<Tensor>(Array1D<float, 3>({0.3,0.2,0.1})); - - const auto newGrad = std::make_shared<Tensor>(Array3D<float,2,2,3>( - { - { - { - {1, 1, 1}, - {1, 1, 1} - }, - { - {1, 1, 1}, - {1, 1, 1} - } - } - } - )); - - const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float,2,2,3>( - { - { - { - {0.3, 0.2, 0.1}, - {0.3, 0.2, 0.1} - }, - { - {0.3, 0.2, 0.1}, - {0.3, 0.2, 0.1} - } - } - } - )); + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0)); + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1)); + } - const auto expectedGrad1 = std::make_shared<Tensor>(Array1D<float,3>( - {22.0, 26.0, 30.0} - )); + SECTION("Case 2: 3D and 1D tensors") { + const auto T0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + {{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}, + {{7.0, 8.0, 9.0}, {10.0, 11.0, 12.0}}}})); - for(auto T: {T0, T1, newGrad, expectedGrad0, expectedGrad1}) - { - T->setBackend("cpu") ; - T->setDataType(DataType::Float32); - } + const auto T1 = + std::make_shared<Tensor>(Array1D<float, 3>({0.3, 0.2, 0.1})); - op->associateInput(0, T0); - op->associateInput(1, T1); - op->getOutput(0)->setGrad(newGrad); - op->forwardDims(); + const auto newGrad = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + {{{{1, 1, 1}, {1, 1, 1}}, {{1, 1, 1}, {1, 1, 1}}}})); - myMul->backward(); + const Tensor expectedGrad0 = + Array3D<float, 2, 2, 3>({{{{0.3, 0.2, 0.1}, {0.3, 0.2, 0.1}}, + {{0.3, 0.2, 0.1}, {0.3, 0.2, 0.1}}}}); - REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0)); - REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1)); - } + const Tensor expectedGrad1 = Array1D<cpptype_t<DataType::Float32>, 3>({22.0, 26.0, 30.0}); - SECTION("Case 3: 4D and 2D tensors") { - const auto T0 = std::make_shared<Tensor>(Array4D<float,2, 2, 3, 3>( - { - { - { - { - {1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}, - {7.0, 8.0, 9.0} - }, - { - {10.0, 11.0, 12.0}, - {13.0, 14.0, 15.0}, - {16.0, 17.0, 18.0} - } - }, - { - { - {19.0, 20.0, 21.0}, - {22.0, 23.0, 24.0}, - {25.0, 26.0, 27.0} - }, - { - {28.0, 29.0, 30.0}, - {31.0, 32.0, 33.0}, - {34.0, 35.0, 36.0} - } - } - } - } - )); - - const auto T1 = std::make_shared<Tensor>(Array2D<float, 3,3>( - { - { - {0.5,0.3,0.1}, - {0.4,0.2,0.6}, - {0.7,0.8,0.9} - } - } - )); - - const auto newGrad = std::make_shared<Tensor>(Array4D<float,2, 2, 3, 3>( - { - { - { - { - {1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0} - }, - { - {1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0} - } - }, - { - { - {1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0} - }, - { - {1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0} - } - } - } - } - )); - - const auto expectedGrad0 = std::make_shared<Tensor>(Array4D<float,2,2,3,3>( - { - { - { - { - {0.5, 0.3, 0.1}, - {0.4, 0.2, 0.6}, - {0.7, 0.8, 0.9} - }, - { - {0.5, 0.3, 0.1}, - {0.4, 0.2, 0.6}, - {0.7, 0.8, 0.9} - } - }, - { - { - {0.5, 0.3, 0.1}, - {0.4, 0.2, 0.6}, - {0.7, 0.8, 0.9} - }, - { - {0.5, 0.3, 0.1}, - {0.4, 0.2, 0.6}, - {0.7, 0.8, 0.9} - } - } - } - } - )); - - const auto expectedGrad1 = std::make_shared<Tensor>(Array2D<float,3, 3>( - { - { - {58.0, 62.0, 66.0}, - {70.0, 74.0, 78.0}, - {82.0, 86.0, 90.0} - } - } - )); + op->associateInput(0, T0); + op->associateInput(1, T1); + op->getOutput(0)->setGrad(newGrad); + op->forwardDims(); - for(const auto T: {T0, T1, newGrad, expectedGrad0, expectedGrad1}) - { - T->setBackend("cpu") ; - T->setDataType(DataType::Float32); - } + op->backward(); + + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0)); + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1)); + } - op->associateInput(0, T0); - op->associateInput(1, T1); - op->getOutput(0)->setGrad(newGrad); - op->forwardDims(); + SECTION("Case 3: 4D and 2D tensors") { + const auto T0 = std::make_shared<Tensor>(Array4D<cpptype_t<DataType::Float32>, 2, 2, 3, 3>( + {{{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}, {7.0, 8.0, 9.0}}, + {{10.0, 11.0, 12.0}, {13.0, 14.0, 15.0}, {16.0, 17.0, 18.0}}}, + {{{19.0, 20.0, 21.0}, {22.0, 23.0, 24.0}, {25.0, 26.0, 27.0}}, + {{28.0, 29.0, 30.0}, + {31.0, 32.0, 33.0}, + {34.0, 35.0, 36.0}}}}})); + + const auto T1 = std::make_shared<Tensor>(Array2D<cpptype_t<DataType::Float32>, 3, 3>( + {{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}})); + + const auto newGrad = + std::make_shared<Tensor>(Array4D<cpptype_t<DataType::Float32>, 2, 2, 3, 3>( + {{{{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}, + {{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}}, + {{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}, + {{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}}}})); + + const Tensor expectedGrad0 = + Array4D<cpptype_t<DataType::Float32>, 2, 2, 3, 3>( + {{{{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}, + {{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}}, + {{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}, + {{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}}}}); + + const Tensor expectedGrad1 = + Array2D<cpptype_t<DataType::Float32>, 3, 3>({{{58.0, 62.0, 66.0}, + {70.0, 74.0, 78.0}, + {82.0, 86.0, 90.0}}}); + + op->associateInput(0, T0); + op->associateInput(1, T1); + op->getOutput(0)->setGrad(newGrad); + op->forwardDims(); + + op->backward(); + + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0)); + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1)); + } + + SECTION("Case 4: 3D and 2D tensors") { + const auto T0 = std::make_shared<Tensor>( + Array3D<float, 2, 3, 4>({{{ + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + }, + { + {13.0, 14.0, 15.0, 16.0}, + {17.0, 18.0, 19.0, 20.0}, + {21.0, 22.0, 23.0, 24.0}, + }}})); + + const auto T1 = std::make_shared<Tensor>( + Array2D<cpptype_t<DataType::Float32>, 3, 4>({{{0.1, 0.2, 0.3, 0.4}, + {0.5, 0.6, 0.7, 0.8}, + {0.9, 1.0, 1.1, 1.2}}})); + + const auto newGrad = std::make_shared<Tensor>( + Array3D<cpptype_t<DataType::Float32>, 2, 3, 4>({{{ + {1.0, 1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0, 1.0}, + }, + { + {1.0, 1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0, 1.0}, + }}})); + + const Tensor expectedGrad0 = + Array3D<cpptype_t<DataType::Float32>, 2, 3, 4>({{{{0.1, 0.2, 0.3, 0.4}, + {0.5, 0.6, 0.7, 0.8}, + {0.9, 1.0, 1.1, 1.2}}, + {{0.1, 0.2, 0.3, 0.4}, + {0.5, 0.6, 0.7, 0.8}, + {0.9, 1.0, 1.1, 1.2}}}}); + + const Tensor expectedGrad1 = + Array2D<cpptype_t<DataType::Float32>, 3, 4>({{{14.0, 16.0, 18.0, 20.0}, + {22.0, 24.0, 26.0, 28.0}, + {30.0, 32.0, 34.0, 36.0}}}); + + op->associateInput(0, T0); + op->associateInput(1, T1); + op->getOutput(0)->setGrad(newGrad); + op->forwardDims(); + + op->backward(); + + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0)); + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1)); + } - myMul->backward(); + SECTION("Case 5: Tensors with random values") { - REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0)); - REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1)); + // Use random values + const std::vector<std::size_t> dims0 = {5, 2, 1, 7}; // First tensor + const std::vector<std::size_t> dims1 = {2, 6, 7}; // Second tensor + const std::vector<std::size_t> outputDims = {5, 2, 6, 7}; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> dist(0.1f, 1.0f); + + auto T0 = std::make_shared<Tensor>(dims0); + T0->setDataType(DataType::Float32); + T0->setBackend("cpu"); + float* input0Data = static_cast<float*>(T0->getImpl()->rawPtr()); + // Fill with random values + for (std::size_t i = 0; i < T0->size(); ++i) { + input0Data[i] = dist(gen); } - SECTION("Case 4: 3D and 2D tensors") { - const auto T0 = std::make_shared<Tensor>(Array3D<float, 2, 3, 4>( - { - { - { - {1.0, 2.0, 3.0, 4.0}, - {5.0, 6.0, 7.0, 8.0}, - {9.0, 10.0, 11.0, 12.0}, - }, - { - {13.0, 14.0, 15.0, 16.0}, - {17.0, 18.0, 19.0, 20.0}, - {21.0, 22.0, 23.0, 24.0}, - } - } - } - )); - - const auto T1 = std::make_shared<Tensor>(Array2D<float, 3, 4>( - { - { - {0.1, 0.2, 0.3, 0.4}, - {0.5, 0.6, 0.7, 0.8}, - {0.9, 1.0, 1.1, 1.2} - } - } - )); - - const auto newGrad = std::make_shared<Tensor>(Array3D<float, 2,3,4>( - { - { - { - {1.0, 1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0, 1.0}, - }, - { - {1.0, 1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0, 1.0}, - } - } - } - )); - - const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float,2,3,4>( - { - { - { - {0.1, 0.2, 0.3, 0.4}, - {0.5, 0.6, 0.7, 0.8}, - {0.9, 1.0, 1.1, 1.2} - }, - { - {0.1, 0.2, 0.3, 0.4}, - {0.5, 0.6, 0.7, 0.8}, - {0.9, 1.0, 1.1, 1.2} - } - } - } - )); - - const auto expectedGrad1 = std::make_shared<Tensor>(Array2D<float,3, 4>( - { - { - {14.0, 16.0, 18.0, 20.0}, - {22.0, 24.0, 26.0, 28.0}, - {30.0, 32.0, 34.0, 36.0} + auto T1 = std::make_shared<Tensor>(dims1); + T1->setDataType(DataType::Float32); + T1->setBackend("cpu"); + float* input1Data = static_cast<float*>(T1->getImpl()->rawPtr()); + // Fill with random values + for (std::size_t i = 0; i < T1->size(); ++i) { + input1Data[i] = dist(gen); + } + + op->associateInput(0, T0); + op->associateInput(1, T1); + + op->forwardDims(); + op->forward(); + + Tensor expectedOutput{outputDims}; + expectedOutput.setBackend("cpu"); + float* expectedOutputData = static_cast<float*>(expectedOutput.getImpl()->rawPtr()); + + for (std::size_t n = 0; n < 5; ++n) { + for (std::size_t c = 0; c < 2; ++c) { + for (std::size_t h = 0; h < 6; ++h) { + for (std::size_t w = 0; w < 7; ++w) { + std::size_t outIdx = w + 7 * (h + 6 * (c + 2 * n)); + std::size_t in0Idx = + w + 7 * (0 + 1 * (c + 2 * n)); // middle dim is 1 + std::size_t in1Idx = + w + 7 * (h + 6 * c); // no n dimension + + expectedOutputData[outIdx] = input0Data[in0Idx] * input1Data[in1Idx]; } } - )); - - for(const auto T: {T0, T1, newGrad, expectedGrad0, expectedGrad1}) - { - T->setBackend("cpu") ; - T->setDataType(DataType::Float32); } + } - op->associateInput(0, T0); - op->associateInput(1, T1); - op->getOutput(0)->setGrad(newGrad); - op->forwardDims(); + auto outputTensor = op->getOutput(0); - myMul->backward(); + REQUIRE(approxEq<float>(*outputTensor, expectedOutput)); - REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0)); - REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1)); + // Backward pass + std::vector<float> gradOutputData(expectedOutput.size()); + for (auto &val : gradOutputData) { + val = dist(gen); } + + op->getOutput(0)->setGrad(std::make_shared<Tensor>()); + op->getOutput(0)->grad()->resize(outputDims); + op->getOutput(0)->grad()->getImpl()->setRawPtr(gradOutputData.data(), + expectedOutput.size()); + + // Compute reference gradients + std::vector<float> expectedGrad0(T0->size(), 0.0f); + std::vector<float> expectedGrad1(T1->size(), 0.0f); + + for (std::size_t n = 0; n < 5; ++n) { + for (std::size_t c = 0; c < 2; ++c) { + for (std::size_t h = 0; h < 6; ++h) { + for (std::size_t w = 0; w < 7; ++w) { + std::size_t outIdx = w + 7 * (h + 6 * (c + 2 * n)); + std::size_t in0Idx = w + 7 * (0 + 1 * (c + 2 * n)); + std::size_t in1Idx = w + 7 * (h + 6 * c); + + // Gradient for input0: grad_output * input1 + expectedGrad0[in0Idx] += + gradOutputData[outIdx] * input1Data[in1Idx]; + + // Gradient for input1: grad_output * input0 + expectedGrad1[in1Idx] += + gradOutputData[outIdx] * input0Data[in0Idx]; + } + } + } + } + + // Perform backward pass + op->backward(); + + auto expectedGrad0Tensor = std::make_shared<Tensor>(); + expectedGrad0Tensor->resize(T0->dims()); + expectedGrad0Tensor->setBackend("cpu"); + expectedGrad0Tensor->setDataType(DataType::Float32); + expectedGrad0Tensor->getImpl()->setRawPtr(expectedGrad0.data(), + expectedGrad0.size()); + + auto expectedGrad1Tensor = std::make_shared<Tensor>(T1->dims()); + expectedGrad1Tensor->setBackend("cpu"); + expectedGrad1Tensor->setDataType(DataType::Float32); + expectedGrad1Tensor->getImpl()->setRawPtr(expectedGrad1.data(), + expectedGrad1.size()); + + // Verify backward pass + REQUIRE(approxEq<float>(*T0->grad(), *expectedGrad0Tensor)); + REQUIRE(approxEq<float>(*T1->grad(), *expectedGrad1Tensor)); + + // Optional: Print some values for verification + // std::cout << "Input shapes: (" << dims0[0] << "," << dims0[1] << + // "," << dims0[2] << "," << dims0[3] + // << ") * (" << dims1[0] << "," << dims1[1] << "," << + // dims1[2] + // << ") -> (" << outputDims[0] << "," << outputDims[1] + // << "," << outputDims[2] << "," << outputDims[3] << + // ")\n"; + // std::cout << "Input sizes: " << input0_size << " * " << + // input1_size << " -> " << output_size << "\n"; } +} -TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { +TEST_CASE("[cpu/operator] Mul(forward)", "[Mul][CPU]") { constexpr std::uint16_t NBTRIALS = 10; // Create a random number generator std::random_device rd; std::mt19937 gen(rd()); - std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 - std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); - std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(3)); - std::uniform_int_distribution<int> boolDist(0,1); - - // Create MatMul Operator - std::shared_ptr<Node> myMul = Mul(); - auto op = std::static_pointer_cast<OperatorTensor>(myMul-> getOperator()); + std::uniform_real_distribution<float> valueDist( + 0.1f, + 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), + std::size_t(10)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), + std::size_t(3)); + std::uniform_int_distribution<int> boolDist(0, 1); + + std::shared_ptr<Mul_Op> op = std::make_shared<Mul_Op>(); op->setDataType(DataType::Float32); op->setBackend("cpu"); - // Create 2 input Tensors std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>(); - op->associateInput(0,T0); + op->associateInput(0, T0); T0->setDataType(DataType::Float32); T0->setBackend("cpu"); std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(); - op -> associateInput(1,T1); + op->associateInput(1, T1); T1->setDataType(DataType::Float32); T1->setBackend("cpu"); - // Create results Tensor std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(); Tres->setDataType(DataType::Float32); Tres->setBackend("cpu"); @@ -391,14 +348,9 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { std::chrono::time_point<std::chrono::system_clock> end; std::chrono::duration<double, std::micro> duration{}; - SECTION("MulImpl_cpu::forward()") { - SECTION("Scalar / Scalar") { - - } - SECTION("Scalar / +1-D Tensor") { - - } + SECTION("Scalar / Scalar") {} + SECTION("Scalar / +1-D Tensor") {} SECTION("+1-D Tensor / +1-D Tensor - same dimensions") { std::size_t number_of_operation = 0; @@ -413,13 +365,17 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { dims.push_back(dimSizeDist(gen)); } - const auto nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const auto nb_elements = + std::accumulate(dims.cbegin(), + dims.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); number_of_operation += nb_elements; // without broadcasting - float* array0 = new float[nb_elements]; - float* array1 = new float[nb_elements]; - float* result = new float[nb_elements]; + float *array0 = new float[nb_elements]; + float *array1 = new float[nb_elements]; + float *result = new float[nb_elements]; for (std::size_t i = 0; i < nb_elements; ++i) { array0[i] = valueDist(gen); @@ -429,21 +385,23 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { // input0 T0->resize(dims); - T0 -> getImpl() -> setRawPtr(array0, nb_elements); + T0->getImpl()->setRawPtr(array0, nb_elements); // input1 T1->resize(dims); - T1 -> getImpl() -> setRawPtr(array1, nb_elements); + T1->getImpl()->setRawPtr(array1, nb_elements); // results Tres->resize(dims); - Tres -> getImpl() -> setRawPtr(result, nb_elements); + Tres->getImpl()->setRawPtr(result, nb_elements); op->forwardDims(); start = std::chrono::system_clock::now(); - myMul->forward(); + op->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += + std::chrono::duration_cast<std::chrono::microseconds>( + end - start); REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); @@ -451,24 +409,23 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { delete[] array1; delete[] result; } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count())); + Log::info("total time: {}μs\n", duration.count()); } - SECTION("+1-D Tensor / +1-D Tensor - broadcasting") { std::size_t number_of_operation = 0; for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { // generate 2 random Tensors - // handle dimensions, replace some dimensions with '1' to get broadcasting + // handle dimensions, replace some dimensions with '1' to get + // broadcasting constexpr std::size_t nbDims = 4; std::vector<std::size_t> dimensions; - for (std::size_t i = 0; i < nbDims; ++i) - { + for (std::size_t i = 0; i < nbDims; ++i) { dimensions.push_back(dimSizeDist(gen)); } @@ -476,77 +433,90 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { auto dims1 = dimensions; auto dimsOut = dimensions; - for (std::size_t i = 0; i < nbDims; ++i) - { - if (boolDist(gen)) - { + for (std::size_t i = 0; i < nbDims; ++i) { + if (boolDist(gen)) { dims0[i] = 1; } - if (boolDist(gen)) - { + if (boolDist(gen)) { dims1[i] = 1; } dimsOut[i] = (dims0[i] == 1) ? dims1[i] : dims0[i]; } - for(auto dim : dims0) - { + for (auto dim : dims0) { Log::info("Dimension of input 0 : {}", dim); } - for(auto dim : dims1) - { + for (auto dim : dims1) { Log::info("Dimension of input 1 : {}", dim); } // create arrays and fill them with random values - float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]]; - float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]]; - float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]]; - - - for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) - { + float *array0 = + new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]]; + float *array1 = + new float[dims1[0] * dims1[1] * dims1[2] * dims1[3]]; + float *result = new float[dimsOut[0] * dimsOut[1] * + dimsOut[2] * dimsOut[3]]; + + for (std::size_t i = 0; + i < dims0[0] * dims0[1] * dims0[2] * dims0[3]; + ++i) { array0[i] = valueDist(gen); } - for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) - { + for (std::size_t i = 0; + i < dims1[0] * dims1[1] * dims1[2] * dims1[3]; + ++i) { array1[i] = valueDist(gen); } // compute true result - const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1}; - const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1}; - - for (std::size_t a = 0; a < dimsOut[0]; ++a) - { - for (std::size_t b = 0; b < dimsOut[1]; ++b) - { - const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0) - + strides0[1] * ((dims0[1] > 1) ? b : 0); - - const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0) - + strides1[1] * ((dims1[1] > 1) ? b : 0); - - for (std::size_t c = 0; c < dimsOut[2]; ++c) - { - const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a)); - - for (std::size_t d = 0; d < dimsOut[3]; ++d) - { - std::size_t idx0 = idx0_0 - + strides0[2] * ((dims0[2] > 1) ? c : 0) - + ((dims0[3] > 1) ? d : 0); - - std::size_t idx1 = idx1_0 - + strides1[2] * ((dims1[2] > 1) ? c : 0) - + ((dims1[3] > 1) ? d : 0); - - result[idx_out + d] = array0[idx0] * array1[idx1]; - // std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " * " << array1[idx1] << " -> " << idx_out + d << std::endl; + const std::size_t strides0[nbDims] = { + dims0[1] * dims0[2] * dims0[3], + dims0[2] * dims0[3], + dims0[3], + 1}; + const std::size_t strides1[nbDims] = { + dims1[1] * dims1[2] * dims1[3], + dims1[2] * dims1[3], + dims1[3], + 1}; + + for (std::size_t a = 0; a < dimsOut[0]; ++a) { + for (std::size_t b = 0; b < dimsOut[1]; ++b) { + const std::size_t idx0_0 = + strides0[0] * ((dims0[0] > 1) ? a : 0) + + strides0[1] * ((dims0[1] > 1) ? b : 0); + + const std::size_t idx1_0 = + strides1[0] * ((dims1[0] > 1) ? a : 0) + + strides1[1] * ((dims1[1] > 1) ? b : 0); + + for (std::size_t c = 0; c < dimsOut[2]; ++c) { + const std::size_t idx_out = + dimsOut[3] * + (c + dimsOut[2] * (b + dimsOut[1] * a)); + + for (std::size_t d = 0; d < dimsOut[3]; ++d) { + std::size_t idx0 = + idx0_0 + + strides0[2] * ((dims0[2] > 1) ? c : 0) + + ((dims0[3] > 1) ? d : 0); + + std::size_t idx1 = + idx1_0 + + strides1[2] * ((dims1[2] > 1) ? c : 0) + + ((dims1[3] > 1) ? d : 0); + + result[idx_out + d] = + array0[idx0] * array1[idx1]; + // std::cout << "(" << idx0 << ", " << idx1 << + // ") -> " << array0[idx0] << " * " << + // array1[idx1] << " -> " << idx_out + d << + // std::endl; } } } @@ -555,22 +525,30 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { // conversion to Aidge::Tensors // input0 T0->resize(dims0); - T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]); + T0->getImpl()->setRawPtr( + array0, + dims0[0] * dims0[1] * dims0[2] * dims0[3]); // input1 T1->resize(dims1); - T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]); + T1->getImpl()->setRawPtr( + array1, + dims1[0] * dims1[1] * dims1[2] * dims1[3]); // results Tres->resize(dimsOut); - Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]); + Tres->getImpl()->setRawPtr( + result, + dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]); // compute result op->forwardDims(); start = std::chrono::system_clock::now(); - myMul->forward(); + op->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += + std::chrono::duration_cast<std::chrono::microseconds>( + end - start); // comparison between truth and computed result REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); @@ -579,15 +557,21 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { delete[] array1; delete[] result; - const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t nb_elements = + std::accumulate(dimsOut.cbegin(), + dimsOut.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); number_of_operation += nb_elements; } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count())); + Log::info("total time: {}μs\n", duration.count()); } SECTION("+1-D Tensor / 1-D Tensor") { std::size_t number_of_operation = 0; - std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(std::size_t(1), std::size_t(3)); + std::uniform_int_distribution<std::size_t> nbRemovedDimsDist( + std::size_t(1), + std::size_t(3)); for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { // generate 2 random Tensors @@ -604,15 +588,24 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { dims1[i] = 1; } } - dims1.erase(dims1.cbegin(), dims1.cbegin() + nbRemovedDimsDist(gen)); + dims1.erase(dims1.cbegin(), + dims1.cbegin() + nbRemovedDimsDist(gen)); // create arrays and fill them with random values - float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]]; - std::size_t array1_size = std::accumulate(dims1.cbegin(), dims1.cend(), std::size_t(1), std::multiplies<std::size_t>()); - float* array1 = new float[array1_size]; - float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]]; - - for (std::size_t i = 0; i < (dims0[0]*dims0[1]*dims0[2]*dims0[3]); ++i) { + float *array0 = + new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]]; + std::size_t array1_size = + std::accumulate(dims1.cbegin(), + dims1.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); + float *array1 = new float[array1_size]; + float *result = new float[dimsOut[0] * dimsOut[1] * + dimsOut[2] * dimsOut[3]]; + + for (std::size_t i = 0; + i < (dims0[0] * dims0[1] * dims0[2] * dims0[3]); + ++i) { array0[i] = valueDist(gen); } for (std::size_t i = 0; i < array1_size; ++i) { @@ -621,27 +614,48 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { // compute true result auto dims1_tmp = dims1; - dims1_tmp.insert(dims1_tmp.cbegin(), 4 - dims1_tmp.size(), std::size_t(1)); - - const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1}; - const std::size_t strides1[nbDims] = {dims1_tmp[1]*dims1_tmp[2]*dims1_tmp[3], dims1_tmp[2]*dims1_tmp[3], dims1_tmp[3], 1}; + dims1_tmp.insert(dims1_tmp.cbegin(), + 4 - dims1_tmp.size(), + std::size_t(1)); + + const std::size_t strides0[nbDims] = { + dims0[1] * dims0[2] * dims0[3], + dims0[2] * dims0[3], + dims0[3], + 1}; + const std::size_t strides1[nbDims] = { + dims1_tmp[1] * dims1_tmp[2] * dims1_tmp[3], + dims1_tmp[2] * dims1_tmp[3], + dims1_tmp[3], + 1}; for (std::size_t a = 0; a < dimsOut[0]; ++a) { for (std::size_t b = 0; b < dimsOut[1]; ++b) { - const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0) - + strides0[1] * ((dims0[1] > 1) ? b : 0); - const std::size_t idx1_0 = strides1[0] * ((dims1_tmp[0] > 1) ? a : 0) - + strides1[1] * ((dims1_tmp[1] > 1) ? b : 0); + const std::size_t idx0_0 = + strides0[0] * ((dims0[0] > 1) ? a : 0) + + strides0[1] * ((dims0[1] > 1) ? b : 0); + const std::size_t idx1_0 = + strides1[0] * ((dims1_tmp[0] > 1) ? a : 0) + + strides1[1] * ((dims1_tmp[1] > 1) ? b : 0); for (std::size_t c = 0; c < dimsOut[2]; ++c) { - const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a)); + const std::size_t idx_out = + dimsOut[3] * + (c + dimsOut[2] * (b + dimsOut[1] * a)); for (std::size_t d = 0; d < dimsOut[3]; ++d) { - std::size_t idx0 = idx0_0 - + strides0[2] * ((dims0[2] > 1) ? c : 0) - + ((dims0[3] > 1) ? d : 0); - std::size_t idx1 = idx1_0 - + strides1[2] * ((dims1_tmp[2] > 1) ? c : 0) - + ((dims1_tmp[3] > 1) ? d : 0); - result[idx_out + d] = array0[idx0] * array1[idx1]; - // std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " * " << array1[idx1] << " -> " << idx_out + d << std::endl; + std::size_t idx0 = + idx0_0 + + strides0[2] * ((dims0[2] > 1) ? c : 0) + + ((dims0[3] > 1) ? d : 0); + std::size_t idx1 = + idx1_0 + + strides1[2] * + ((dims1_tmp[2] > 1) ? c : 0) + + ((dims1_tmp[3] > 1) ? d : 0); + result[idx_out + d] = + array0[idx0] * array1[idx1]; + // std::cout << "(" << idx0 << ", " << idx1 << + // ") -> " << array0[idx0] << " * " << + // array1[idx1] << " -> " << idx_out + d << + // std::endl; } } } @@ -650,22 +664,28 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { // conversion to Aidge::Tensors // input0 T0->resize(dims0); - T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]); + T0->getImpl()->setRawPtr( + array0, + dims0[0] * dims0[1] * dims0[2] * dims0[3]); // input1 T1->resize(dims1); - T1 -> getImpl() -> setRawPtr(array1, array1_size); + T1->getImpl()->setRawPtr(array1, array1_size); // results Tres->resize(dimsOut); - Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]); + Tres->getImpl()->setRawPtr( + result, + dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]); // compute result op->forwardDims(); start = std::chrono::system_clock::now(); - myMul->forward(); + op->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += + std::chrono::duration_cast<std::chrono::microseconds>( + end - start); // comparison between truth and computed result REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); @@ -674,13 +694,18 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { delete[] array1; delete[] result; - const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t nb_elements = + std::accumulate(dimsOut.cbegin(), + dimsOut.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); number_of_operation += nb_elements; } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count())); + Log::info("total time: {}μs\n", duration.count()); } } } } // namespace Aidge + diff --git a/unit_tests/operator/Test_PadImpl.cpp b/unit_tests/operator/Test_PadImpl.cpp index cdd3a5f979085f3782776ce69ddd92c0d53150c4..f7823d022c8d3b228740a3df3f1d01224cd346c6 100644 --- a/unit_tests/operator/Test_PadImpl.cpp +++ b/unit_tests/operator/Test_PadImpl.cpp @@ -9,15 +9,17 @@ * ********************************************************************************/ -#include <catch2/catch_test_macros.hpp> -#include <cstdlib> #include <memory> +#include <catch2/catch_test_macros.hpp> + +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/backend/cpu/operator/PadImpl.hpp" +#include "aidge/data/DataType.hpp" #include "aidge/data/Tensor.hpp" +#include "aidge/graph/Node.hpp" #include "aidge/operator/Pad.hpp" -#include "aidge/backend/cpu.hpp" - using namespace Aidge; TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") { diff --git a/unit_tests/operator/Test_PaddedConv.cpp b/unit_tests/operator/Test_PaddedConv.cpp index b7584ad069336a270ed07c32d4c07552888b6587..4b76fe0638033e41adf95d3abfd10691deefe940 100644 --- a/unit_tests/operator/Test_PaddedConv.cpp +++ b/unit_tests/operator/Test_PaddedConv.cpp @@ -9,16 +9,16 @@ * ********************************************************************************/ -#include <catch2/catch_test_macros.hpp> -#include <cstdlib> #include <memory> +#include <catch2/catch_test_macros.hpp> + +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/backend/cpu/operator/PaddedConvImpl.hpp" +#include "aidge/data/DataType.hpp" #include "aidge/data/Tensor.hpp" -#include "aidge/operator/MetaOperator.hpp" +#include "aidge/graph/Node.hpp" #include "aidge/operator/MetaOperatorDefs.hpp" -#include "aidge/scheduler/SequentialScheduler.hpp" - -#include "aidge/backend/cpu.hpp" using namespace Aidge; diff --git a/unit_tests/operator/Test_PowImpl.cpp b/unit_tests/operator/Test_PowImpl.cpp index cb5d8872c9c7242bb4aa4efca388d53b578417f9..55a416c3f404506359e06f9937dd958503236901 100644 --- a/unit_tests/operator/Test_PowImpl.cpp +++ b/unit_tests/operator/Test_PowImpl.cpp @@ -9,18 +9,26 @@ * ********************************************************************************/ -#include <catch2/catch_test_macros.hpp> -#include <cmath> -#include <cstddef> // std::size_t -#include <cstdint> // std::uint16_t -#include <chrono> -#include <iostream> +#include <chrono> // std::micro, std::chrono::time_point, + // std::chrono::system_clock, std::chrono::duration +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t +#include <functional> // std::multiplies #include <memory> -#include <numeric> // std::accumulate -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <numeric> // std::accumulate +#include <random> // std::random_device, std::mt19937 + // std::uniform_int_distribution, std::uniform_real_distribution +#include <vector> + +#include <catch2/catch_test_macros.hpp> +#include <fmt/core.h> +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/backend/cpu/operator/PowImpl.hpp" +#include "aidge/data/Data.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/Pow.hpp" +#include "aidge/utils/ArrayHelpers.hpp" #include "aidge/utils/TensorUtils.hpp" namespace Aidge { @@ -118,8 +126,8 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { // with broadcasting } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count())); + Log::info("total time: {} μs\n", duration.count()); } SECTION("+1-D Tensor / +1-D Tensor - broadcasting") { @@ -213,8 +221,8 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>()); number_of_operation += nb_elements; } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count())); + Log::info("total time: {} μs\n", duration.count()); } SECTION("+1-D Tensor / 1-D Tensor") { std::size_t number_of_operation = 0; @@ -309,8 +317,8 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { number_of_operation += nb_elements; } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count())); + Log::info("total time: {} μs\n", duration.count()); } } @@ -440,7 +448,7 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { } } )); - const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + const Tensor expectedGrad0 = Array3D<float, 2, 2, 3>( { { { @@ -453,18 +461,13 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { } } } - )); - const auto expectedGrad1 = std::make_shared<Tensor>(Array1D<float, 3>( + ); + const Tensor expectedGrad1 = Array1D<float, 3>( { {14.14779854, 22.99299049, 33.56402588} } - )); + ); - for(const auto T: {input0, input1, gradOut, expectedGrad0, expectedGrad1}) - { - T->setBackend("cpu") ; - T->setDataType(DataType::Float32); - } std::shared_ptr<Node> powOp = Pow(); auto opr = std::static_pointer_cast<OperatorTensor>(powOp-> getOperator()); opr->setDataType(DataType::Float32); @@ -475,8 +478,8 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { powOp->forward(); powOp->backward(); - REQUIRE(approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0)); - REQUIRE(approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1)); + REQUIRE(approxEq<float>(*(opr->getInput(0)->grad()), expectedGrad0)); + REQUIRE(approxEq<float>(*(opr->getInput(1)->grad()), expectedGrad1)); } } } diff --git a/unit_tests/operator/Test_ReLUImpl.cpp b/unit_tests/operator/Test_ReLUImpl.cpp index 106d29ecfbf8ba785b4f9e5dba75daa272a86b26..eebdf7ac6c716db987c2600f098dcf9331d6a6c9 100644 --- a/unit_tests/operator/Test_ReLUImpl.cpp +++ b/unit_tests/operator/Test_ReLUImpl.cpp @@ -9,15 +9,16 @@ * ********************************************************************************/ +#include <memory> + #include <catch2/catch_test_macros.hpp> +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/backend/cpu/operator/ReLUImpl.hpp" +#include "aidge/data/DataType.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/ReLU.hpp" -#include "aidge/backend/cpu.hpp" - -#include <memory> - using namespace Aidge; @@ -26,17 +27,16 @@ TEST_CASE("[cpu/operator] ReLU(forward)", "[ReLU][CPU]") { std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<int,10> { {0, 1, 2,-3, 4,-5,-6, 7, 8, 9} }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array1D<int,10> { + Tensor expectedOutput = Array1D<int,10> { {0, 1, 2, 0, 4, 0, 0, 7, 8, 9} - }); + }; - std::shared_ptr<Node> myReLU = ReLU(); - auto op = std::static_pointer_cast<OperatorTensor>(myReLU -> getOperator()); + std::shared_ptr<ReLU_Op> op = std::make_shared<ReLU_Op>(); op->associateInput(0,input0); op->setDataType(DataType::Int32); op->setBackend("cpu"); - myReLU->forward(); - REQUIRE(*(op->getOutput(0)) == *expectedOutput); + op->forward(); + REQUIRE(*(op->getOutput(0)) == expectedOutput); } SECTION("2D Tensor") { @@ -46,20 +46,19 @@ TEST_CASE("[cpu/operator] ReLU(forward)", "[ReLU][CPU]") { {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} } }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<int,2,10> { + Tensor expectedOutput = Array2D<int,2,10> { { { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, { 0, 4, 2, 0, 4, 0, 0, 7, 0,10} } - }); + }; - std::shared_ptr<Node> myReLU = ReLU(); - auto op = std::static_pointer_cast<OperatorTensor>(myReLU -> getOperator()); + std::shared_ptr<ReLU_Op> op = std::make_shared<ReLU_Op>(); op->associateInput(0,input0); op->setDataType(DataType::Int32); op->setBackend("cpu"); - myReLU->forward(); - REQUIRE(*op->getOutput(0) == *expectedOutput); + op->forward(); + REQUIRE(*op->getOutput(0) == expectedOutput); } SECTION("3D Tensor") { @@ -75,7 +74,7 @@ TEST_CASE("[cpu/operator] ReLU(forward)", "[ReLU][CPU]") { } } }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,2,2,10> { + Tensor expectedOutput = Array3D<int,2,2,10> { { { { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, @@ -86,15 +85,14 @@ TEST_CASE("[cpu/operator] ReLU(forward)", "[ReLU][CPU]") { { 0, 4, 2, 0, 4, 0, 0, 7, 0,10} } } - }); + }; - std::shared_ptr<Node> myReLU = ReLU(); - auto op = std::static_pointer_cast<OperatorTensor>(myReLU -> getOperator()); + std::shared_ptr<ReLU_Op> op = std::make_shared<ReLU_Op>(); op->associateInput(0,input0); op->setDataType(DataType::Int32); op->setBackend("cpu"); - myReLU->forward(); - REQUIRE(*(op->getOutput(0)) == *expectedOutput); + op->forward(); + REQUIRE(*(op->getOutput(0)) == expectedOutput); } SECTION("4D Tensor") { @@ -122,7 +120,7 @@ TEST_CASE("[cpu/operator] ReLU(forward)", "[ReLU][CPU]") { } } }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,2,2,2,10> { + Tensor expectedOutput = Array4D<int,2,2,2,10> { { { { @@ -145,14 +143,13 @@ TEST_CASE("[cpu/operator] ReLU(forward)", "[ReLU][CPU]") { } } } - }); + }; - std::shared_ptr<Node> myReLU = ReLU(); - auto op = std::static_pointer_cast<OperatorTensor>(myReLU -> getOperator()); + std::shared_ptr<ReLU_Op> op = std::make_shared<ReLU_Op>(); op->associateInput(0,input0); op->setDataType(DataType::Int32); op->setBackend("cpu"); - myReLU->forward(); - REQUIRE(*op->getOutput(0) == *expectedOutput); + op->forward(); + REQUIRE(*op->getOutput(0) == expectedOutput); } } \ No newline at end of file diff --git a/unit_tests/operator/Test_ReduceMeanImpl.cpp b/unit_tests/operator/Test_ReduceMeanImpl.cpp index dd647c7ba3f90fe7f3554aae7133e97ffa9c99ba..30ffeb0dd0b584f50349c206863c7ab9ac776721 100644 --- a/unit_tests/operator/Test_ReduceMeanImpl.cpp +++ b/unit_tests/operator/Test_ReduceMeanImpl.cpp @@ -9,16 +9,23 @@ * ********************************************************************************/ -#include <catch2/catch_test_macros.hpp> +#include <algorithm> // std::fill +#include <cstddef> // std::size_t +#include <cstdint> // std::int32_t, std::uint16_t #include <memory> -#include <numeric> // std::accumulate -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <random> // std::random_device, std::mt19937 + // std::uniform_int_distribution, std::uniform_real_distribution +#include <vector> + +#include <catch2/catch_test_macros.hpp> +#include <fmt/core.h> +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp" +#include "aidge/data/DataType.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/ReduceMean.hpp" -#include "aidge/operator/Conv.hpp" - -#include "aidge/backend/cpu.hpp" +#include "aidge/operator/OperatorTensor.hpp" #include "aidge/utils/TensorUtils.hpp" using namespace Aidge; diff --git a/unit_tests/operator/Test_ReduceSumImpl.cpp b/unit_tests/operator/Test_ReduceSumImpl.cpp index 49569d1f65ff6c51f9681632b16375605ab326e7..0aa543da4f9d55dfa672790a5d99467cd794001a 100644 --- a/unit_tests/operator/Test_ReduceSumImpl.cpp +++ b/unit_tests/operator/Test_ReduceSumImpl.cpp @@ -9,17 +9,22 @@ * ********************************************************************************/ -#include <catch2/catch_test_macros.hpp> +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t, std::int32_t #include <memory> -#include <numeric> // std::accumulate #include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <vector> + +#include <catch2/catch_test_macros.hpp> +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/data/Data.hpp" // DataType #include "aidge/data/Tensor.hpp" +#include "aidge/graph/Node.hpp" +#include "aidge/operator/OperatorTensor.hpp" #include "aidge/operator/ReduceSum.hpp" -#include "aidge/operator/Conv.hpp" - -#include "aidge/backend/cpu.hpp" #include "aidge/utils/TensorUtils.hpp" +#include "aidge/utils/Types.h" using namespace Aidge; @@ -112,7 +117,7 @@ TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") { std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); myInput->setBackend("cpu"); myInput->setDataType(DataType::Float32); - std::shared_ptr<Node> myReduceSum = ReduceSum(std::vector<int32_t>{}, false, true); + std::shared_ptr<Node> myReduceSum = ReduceSum(std::vector<std::int32_t>{}, false, true); auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); op->associateInput(0,myInput); op->setDataType(DataType::Float32); diff --git a/unit_tests/operator/Test_RoundImpl.cpp b/unit_tests/operator/Test_RoundImpl.cpp index b4cf9ffbedc18b35b42ebbc05971f86e0fa584e3..e658b0616683633ce19b2284abb9d4fae7942a23 100644 --- a/unit_tests/operator/Test_RoundImpl.cpp +++ b/unit_tests/operator/Test_RoundImpl.cpp @@ -9,15 +9,23 @@ * ********************************************************************************/ -#include <catch2/catch_test_macros.hpp> -#include <cstddef> // std::size_t -#include <cstdint> // std::uint16_t -#include <chrono> -#include <iostream> +#include <chrono> // std::micro, std::chrono::time_point, + // std::chrono::system_clock, std::chrono::duration +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t +#include <functional> // std::multiplies #include <memory> -#include <numeric> -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution -#include <iomanip> +#include <numeric> // std::accumulate +#include <random> // std::random_device, std::mt19937 + // std::uniform_int_distribution, std::uniform_real_distribution +#include <vector> + +#include <catch2/catch_test_macros.hpp> +#include <fmt/core.h> + +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/backend/cpu/operator/RoundImpl.hpp" +#include "aidge/data/Data.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/Round.hpp" #include "aidge/utils/TensorUtils.hpp" @@ -29,7 +37,7 @@ TEST_CASE("[cpu/operator] Round_Test", "[Round][CPU]") { // Create a random number generator std::random_device rd; std::mt19937 gen(rd()); - std::uniform_real_distribution<float> valueDist(-15, 15); + std::uniform_real_distribution<float> valueDist(-15, 15); std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(5)); std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(3)); @@ -59,7 +67,7 @@ TEST_CASE("[cpu/operator] Round_Test", "[Round][CPU]") { std::size_t number_of_operation = 0; for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { - + // generate 2 random Tensors const std::size_t nbDims = nbDimsDist(gen); std::vector<std::size_t> dims; @@ -72,7 +80,7 @@ TEST_CASE("[cpu/operator] Round_Test", "[Round][CPU]") { // without broadcasting float* array0 = new float[nb_elements]; float* result = new float[nb_elements]; - + for (std::size_t i = 0; i < nb_elements; ++i) { array0[i] = valueDist(gen); result[i] = std::nearbyint(array0[i]); @@ -86,29 +94,22 @@ TEST_CASE("[cpu/operator] Round_Test", "[Round][CPU]") { // results Tres->resize(dims); Tres -> getImpl() -> setRawPtr(result, nb_elements); - + op->forwardDims(); start = std::chrono::system_clock::now(); myRound->forward(); end = std::chrono::system_clock::now(); duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); - bool is_eq = approxEq<float>(*(op->getOutput(0)), *Tres); - - auto Output = *(op->getOutput(0)); - - auto prt = Output.getImpl()->rawPtr(); - - REQUIRE(is_eq); - + REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); delete[] array0; delete[] result; } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count())); + Log::info("total time: {} μs\n", duration.count()); } } } // namespace Aidge diff --git a/unit_tests/operator/Test_SoftmaxImpl.cpp b/unit_tests/operator/Test_SoftmaxImpl.cpp index da6c6f0d35a1db9ad9099a40b7e83459e14a20f5..bc452a409fef0236b3021de5b41eb47453f42f75 100644 --- a/unit_tests/operator/Test_SoftmaxImpl.cpp +++ b/unit_tests/operator/Test_SoftmaxImpl.cpp @@ -9,14 +9,16 @@ * ********************************************************************************/ +#include <memory> + #include <catch2/catch_test_macros.hpp> +#include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" +#include "aidge/data/DataType.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/Softmax.hpp" - -#include "aidge/backend/cpu.hpp" - -#include <memory> +#include "aidge/utils/ArrayHelpers.hpp" +#include "aidge/utils/TensorUtils.hpp" using namespace Aidge; @@ -30,28 +32,22 @@ TEST_CASE("[cpu/operator] Softmax(forward)", "[Softmax][CPU]") { 0.35077620, -0.78156322, -0.98952234, 0.04166317, 1.34357309} } }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,10> { + Tensor expectedOutput = Array2D<float,2,10> { { {0.04883239, 0.11326669, 0.05974559, 0.09930880, 0.09267281, 0.03006749, 0.15842478, 0.24514021, 0.07825989, 0.07428131}, {0.05429055, 0.27136859, 0.28389078, 0.02240700, 0.06262558, 0.06087753, 0.01961952, 0.01593576, 0.04469007, 0.16429459} } - }); + }; - std::shared_ptr<Node> mySoftmax = Softmax(1); - auto op = std::static_pointer_cast<OperatorTensor>(mySoftmax -> getOperator()); + std::shared_ptr<Softmax_Op> op = std::make_shared<Softmax_Op>(1); op->associateInput(0,input); op->setDataType(DataType::Float32); op->setBackend("cpu"); - mySoftmax->forward(); - - float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); - float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); - for (std::size_t i = 0; i< expectedOutput->size(); ++i) { - REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); - } + op->forward(); + REQUIRE(approxEq<float>(*(op->getOutput(0)), expectedOutput, 1e-5f, 1e-8f)); } SECTION("4D Tensor") { std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array4D<float,2,3,3,3> { @@ -80,7 +76,7 @@ TEST_CASE("[cpu/operator] Softmax(forward)", "[Softmax][CPU]") { } } }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,2,3,3,3> { + Tensor expectedOutput = Array4D<float,2,3,3,3> { { { {{0.45109013, 0.42849392, 0.43775153}, @@ -105,19 +101,14 @@ TEST_CASE("[cpu/operator] Softmax(forward)", "[Softmax][CPU]") { {0.34566763, 0.32462072, 0.48979440}} } } - }); + }; - std::shared_ptr<Node> mySoftmax = Softmax(1); - auto op = std::static_pointer_cast<OperatorTensor>(mySoftmax -> getOperator()); + std::shared_ptr<Softmax_Op> op = std::make_shared<Softmax_Op>(1); op->associateInput(0,input); op->setDataType(DataType::Float32); op->setBackend("cpu"); - mySoftmax->forward(); + op->forward(); - float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); - float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); - for (std::size_t i = 0; i< expectedOutput->size(); ++i) { - REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); - } + REQUIRE(approxEq<float>(*(op->getOutput(0)), expectedOutput, 1e-5f, 1e-8f)); } } \ No newline at end of file diff --git a/unit_tests/operator/Test_SqrtImpl.cpp b/unit_tests/operator/Test_SqrtImpl.cpp index d630c66c8b8085e6d382841da6b7cac2c88b1dd0..aac5046003611d96d5c8111192e9fd4c5254b89d 100644 --- a/unit_tests/operator/Test_SqrtImpl.cpp +++ b/unit_tests/operator/Test_SqrtImpl.cpp @@ -9,14 +9,16 @@ * ********************************************************************************/ +#include <memory> + #include <catch2/catch_test_macros.hpp> +#include "aidge/backend/cpu/operator/SqrtImpl.hpp" +#include "aidge/data/DataType.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/Sqrt.hpp" - -#include "aidge/backend/cpu.hpp" - -#include <memory> +#include "aidge/utils/ArrayHelpers.hpp" +#include "aidge/utils/TensorUtils.hpp" using namespace Aidge; @@ -28,26 +30,20 @@ TEST_CASE("[cpu/operator] Sqrt(forward)", "[Sqrt][CPU]") { { 0.00000000, 1.84539008} } }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> { + Tensor expectedOutput = Array2D<float,2,2> { { {4.00000000, 0.78883994}, {0.00000000, 1.35845140} } - }); + }; - std::shared_ptr<Node> mySqrt = Sqrt(); - auto op = std::static_pointer_cast<OperatorTensor>(mySqrt -> getOperator()); - mySqrt->getOperator()->associateInput(0,input); - mySqrt->getOperator()->setDataType(DataType::Float32); - mySqrt->getOperator()->setBackend("cpu"); - mySqrt->forward(); - - float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); - float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); - for (std::size_t i = 0; i< 4; ++i) { - REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); - } + std::shared_ptr<Sqrt_Op> op = std::make_shared<Sqrt_Op>(); + op->associateInput(0,input); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + op->forward(); + REQUIRE(approxEq<float>(*(op->getOutput(0)), expectedOutput, 1e-5f, 1e-8f)); } SECTION("4D Tensor") { @@ -78,7 +74,7 @@ TEST_CASE("[cpu/operator] Sqrt(forward)", "[Sqrt][CPU]") { } }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,2,3,3,3> { + Tensor expectedOutput = Array4D<float,2,3,3,3> { { { {{0.24936883, 0.6844717, 0.7804763}, @@ -103,19 +99,14 @@ TEST_CASE("[cpu/operator] Sqrt(forward)", "[Sqrt][CPU]") { {0.3608653, 0.8571328, 0.16447252}} } } - }); + }; - std::shared_ptr<Node> mySqrt = Sqrt(); - auto op = std::static_pointer_cast<OperatorTensor>(mySqrt -> getOperator()); - mySqrt->getOperator()->associateInput(0,input); - mySqrt->getOperator()->setDataType(DataType::Float32); - mySqrt->getOperator()->setBackend("cpu"); - mySqrt->forward(); + std::shared_ptr<Sqrt_Op> op = std::make_shared<Sqrt_Op>(); + op->associateInput(0,input); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + op->forward(); - float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); - float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); - for (std::size_t i = 0; i< 54; ++i) { - REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); - } + REQUIRE(approxEq<float>(*(op->getOutput(0)), expectedOutput, 1e-5f, 1e-8f)); } } \ No newline at end of file diff --git a/unit_tests/operator/Test_SubImpl.cpp b/unit_tests/operator/Test_SubImpl.cpp index 44666ae631152c8898e24f7003b0c2ede8c67b84..1317e88a371e9a6e7a3deae5b7f662a9cd879a60 100644 --- a/unit_tests/operator/Test_SubImpl.cpp +++ b/unit_tests/operator/Test_SubImpl.cpp @@ -9,17 +9,26 @@ * ********************************************************************************/ -#include <catch2/catch_test_macros.hpp> -#include <cstddef> // std::size_t -#include <cstdint> // std::uint16_t -#include <chrono> -#include <iostream> +#include <chrono> // std::micro, std::chrono::time_point, + // std::chrono::system_clock +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t +#include <functional> // std::multiplies #include <memory> -#include <numeric> // std::accumulate -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <numeric> // std::accumulate +#include <random> // std::random_device, std::mt19937 + // std::uniform_int_distribution, std::uniform_real_distribution +#include <vector> + +#include <catch2/catch_test_macros.hpp> +#include <fmt/core.h> +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/backend/cpu/operator/SubImpl.hpp" +#include "aidge/data/Data.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/Sub.hpp" +#include "aidge/operator/OperatorTensor.hpp" #include "aidge/utils/TensorUtils.hpp" namespace Aidge { @@ -117,8 +126,8 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") { // with broadcasting } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count())); + Log::info("total time: {}μs\n", duration.count()); } SECTION("+1-D Tensor / +1-D Tensor - broadcasting") { @@ -212,8 +221,8 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") { const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>()); number_of_operation += nb_elements; } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count())); + Log::info("total time: {}μs\n", duration.count()); } SECTION("+1-D Tensor / 1-D Tensor") { std::size_t number_of_operation = 0; @@ -308,8 +317,8 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") { number_of_operation += nb_elements; } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + Log::info("number of elements over time spent: {}\n", (number_of_operation / duration.count())); + Log::info("total time: {}μs\n", duration.count()); } } } diff --git a/unit_tests/operator/Test_WeightInterleavingImpl.cpp b/unit_tests/operator/Test_WeightInterleavingImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c95c8fca19eb79eb78fc19e93ded3383054383e7 --- /dev/null +++ b/unit_tests/operator/Test_WeightInterleavingImpl.cpp @@ -0,0 +1,436 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/WeightInterleaving.hpp" +#include "aidge/recipes/Recipes.hpp" +#include "aidge/utils/TensorUtils.hpp" + +#include "aidge/backend/cpu.hpp" + +#include <memory> + +using namespace Aidge; + +TEST_CASE("[cpu/operator] WeightInterleaving", "[WeightInterleaving][CPU]") { + + std::shared_ptr<Node> myWeightInterleaving = WeightInterleaving(); + auto opWeightInterleaving = std::static_pointer_cast<WeightInterleaving_Op>(myWeightInterleaving -> getOperator()); + + SECTION("CompactDataSize - Single element cases") { + REQUIRE(opWeightInterleaving->compactDataSize(1, 1) == 1); // 1 bit, needs 1 byte + REQUIRE(opWeightInterleaving->compactDataSize(1, 7) == 1); // 7 bits, needs 1 byte + } + + SECTION("CompactDataSize - Boundary cases for different nb_bits values") { + REQUIRE(opWeightInterleaving->compactDataSize(8, 1) == 1); // 8 elements at 1 bit each, fits in 1 byte + REQUIRE(opWeightInterleaving->compactDataSize(8, 2) == 2); // 8 elements at 2 bits each, needs 2 bytes + REQUIRE(opWeightInterleaving->compactDataSize(8, 3) == 4); // 8 elements at 3 bits each, needs 4 bytes + REQUIRE(opWeightInterleaving->compactDataSize(8, 4) == 4); // 8 elements at 4 bits each, needs 4 bytes + } + + SECTION("CompactDataSize - Larger dataSize values") { + REQUIRE(opWeightInterleaving->compactDataSize(16, 1) == 2); // 16 elements at 1 bit each, fits in 2 bytes + REQUIRE(opWeightInterleaving->compactDataSize(16, 2) == 4); // 16 elements at 2 bits each, needs 4 bytes + REQUIRE(opWeightInterleaving->compactDataSize(16, 3) == 8); // 16 elements at 3 bits each, needs 6 bytes + REQUIRE(opWeightInterleaving->compactDataSize(16, 4) == 8); // 16 elements at 4 bits each, needs 8 bytes + } + + SECTION("CompactDataSize - Odd dataSize values with varying nb_bits") { + REQUIRE(opWeightInterleaving->compactDataSize(7, 1) == 1); // 7 elements at 1 bit each, fits in 1 byte + REQUIRE(opWeightInterleaving->compactDataSize(7, 2) == 2); // 7 elements at 2 bits each, needs 2 bytes + REQUIRE(opWeightInterleaving->compactDataSize(7, 3) == 4); // 7 elements at 3 bits each, needs 4 bytes + REQUIRE(opWeightInterleaving->compactDataSize(7, 4) == 4); // 7 elements at 4 bits each, needs 4 bytes + } + + SECTION("CompactDataSize - Minimum and maximum values for nb_bits") { + REQUIRE(opWeightInterleaving->compactDataSize(5, 1) == 1); // 5 elements at 1 bit each, fits in 1 byte + } + + SECTION("CompactDataSize - Edge Case - dataSize of 0 should result in 0 required size") { + REQUIRE(opWeightInterleaving->compactDataSize(0, 1) == 0); // No data elements + } + + + SECTION("CompactData - 4-bit compaction") { + std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array1D<std::int8_t, 4>{ + {static_cast<std::int8_t>(0x0F), + static_cast<std::int8_t>(0xF5), + static_cast<std::int8_t>(0xB3), + static_cast<std::int8_t>(0x9C)} + }); + + weight->setDataFormat(Aidge::DataFormat::NHWC); + weight->setDataType(Aidge::DataType::Int4); + + std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array1D<std::int8_t, 2>{ + {static_cast<int8_t>(0xF5), + static_cast<int8_t>(0x3C)} + }); + + expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC); + expectedWeightInterleaving->setDataType(WeightInterleavedType_v<Aidge::DataType::Int4>); + + std::shared_ptr<Node> myWeightInterleavingNode = WeightInterleaving(); + auto op = std::static_pointer_cast<OperatorTensor>(myWeightInterleavingNode -> getOperator()); + op->associateInput(0,weight); + op->setDataType(WeightInterleavedType_v<Aidge::DataType::Int4>); + op->setDataFormat(DataFormat::NHWC); + op->setBackend("cpu"); + myWeightInterleavingNode->forward(); + REQUIRE(*(op->getOutput(0)) == *expectedWeightInterleaving); + } + + SECTION("CompactData - 3-bit compaction") { + std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array1D<std::int8_t, 4>{ + {static_cast<int8_t>(0x0F), + static_cast<int8_t>(0x05), + static_cast<int8_t>(0x04), + static_cast<int8_t>(0xD3)} + }); + + weight->setDataFormat(Aidge::DataFormat::NHWC); + weight->setDataType(Aidge::DataType::Int3); + + std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array1D<std::int8_t, 2>{ + {static_cast<int8_t>(0x75), + static_cast<int8_t>(0x43)} + }); + + expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC); + expectedWeightInterleaving->setDataType(WeightInterleavedType_v<Aidge::DataType::Int3>); + + std::shared_ptr<Node> myWeightInterleavingNode = WeightInterleaving(); + auto op = std::static_pointer_cast<OperatorTensor>(myWeightInterleavingNode -> getOperator()); + op->associateInput(0,weight); + op->setDataType(WeightInterleavedType_v<Aidge::DataType::Int3>); + op->setDataFormat(DataFormat::NHWC); + op->setBackend("cpu"); + myWeightInterleavingNode->forward(); + REQUIRE(*(op->getOutput(0)) == *expectedWeightInterleaving); + } + + SECTION("CompactData - 2-bit compaction") { + std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array1D<std::int8_t, 4>{ + {static_cast<std::int8_t>(0x03), + static_cast<std::int8_t>(0x02), + static_cast<std::int8_t>(0x01), + static_cast<std::int8_t>(0x00)} + }); + + weight->setDataFormat(Aidge::DataFormat::NHWC); + weight->setDataType(Aidge::DataType::Int2); + + std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array1D<std::int8_t, 1>{ + {static_cast<int8_t>(0xE4)} + }); + + expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC); + expectedWeightInterleaving->setDataType(WeightInterleavedType_v<Aidge::DataType::Int2>); + + std::shared_ptr<Node> myWeightInterleavingNode = WeightInterleaving(); + auto op = std::static_pointer_cast<OperatorTensor>(myWeightInterleavingNode -> getOperator()); + op->associateInput(0,weight); + op->setDataType(WeightInterleavedType_v<Aidge::DataType::Int2>); + op->setDataFormat(DataFormat::NHWC); + op->setBackend("cpu"); + myWeightInterleavingNode->forward(); + REQUIRE(*(op->getOutput(0)) == *expectedWeightInterleaving); + } + + SECTION("CompactData - Edge Cases - Single element data") { + std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array1D<std::int8_t, 1>{ + {static_cast<int8_t>(0x0F)} + }); + + weight->setDataFormat(Aidge::DataFormat::NHWC); + weight->setDataType(Aidge::DataType::Int4); + + std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array1D<std::int8_t, 1>{ + {static_cast<int8_t>(0xF0)} + }); + + expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC); + expectedWeightInterleaving->setDataType(WeightInterleavedType_v<Aidge::DataType::Int4>); + + std::shared_ptr<Node> myWeightInterleavingNode = WeightInterleaving(); + auto op = std::static_pointer_cast<OperatorTensor>(myWeightInterleavingNode -> getOperator()); + op->associateInput(0,weight); + op->setDataType(WeightInterleavedType_v<Aidge::DataType::Int4>); + op->setDataFormat(DataFormat::NHWC); + op->setBackend("cpu"); + myWeightInterleavingNode->forward(); + REQUIRE(*(op->getOutput(0)) == *expectedWeightInterleaving); + } + + SECTION("CompactData - Edge Cases - Non-divisible dataSize for nbSlot with nbbits=4") { + std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array1D<std::int8_t, 3>{ + {static_cast<int8_t>(0x0F), + static_cast<int8_t>(0xA5), + static_cast<int8_t>(0x34)} + }); + + weight->setDataFormat(Aidge::DataFormat::NHWC); + weight->setDataType(Aidge::DataType::Int4); + + std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array1D<std::int8_t, 2>{ + {static_cast<int8_t>(0xF5), + static_cast<int8_t>(0x40)} + }); + + expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC); + expectedWeightInterleaving->setDataType(WeightInterleavedType_v<Aidge::DataType::Int4>); + + std::shared_ptr<Node> myWeightInterleavingNode = WeightInterleaving(); + auto op = std::static_pointer_cast<OperatorTensor>(myWeightInterleavingNode -> getOperator()); + op->associateInput(0,weight); + op->setDataType(WeightInterleavedType_v<Aidge::DataType::Int4>); + op->setDataFormat(DataFormat::NHWC); + op->setBackend("cpu"); + myWeightInterleavingNode->forward(); + REQUIRE(*(op->getOutput(0)) == *expectedWeightInterleaving); + + } + + SECTION("CompactData - Edge Cases - Non-divisible dataSize for nbSlot with nbbits=3") { + + std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array1D<std::int8_t, 3>{ + {static_cast<int8_t>(0x0F), + static_cast<int8_t>(0x05), + static_cast<int8_t>(0x04)} + }); + + weight->setDataFormat(Aidge::DataFormat::NHWC); + weight->setDataType(Aidge::DataType::Int3); + + std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array1D<std::int8_t, 2>{ + {static_cast<int8_t>(0x75), + static_cast<int8_t>(0x40)} + }); + + expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC); + expectedWeightInterleaving->setDataType(WeightInterleavedType_v<Aidge::DataType::Int3>); + + std::shared_ptr<Node> myWeightInterleavingNode = WeightInterleaving(); + auto op = std::static_pointer_cast<OperatorTensor>(myWeightInterleavingNode -> getOperator()); + op->associateInput(0,weight); + op->setDataType(WeightInterleavedType_v<Aidge::DataType::Int3>); + op->setDataFormat(DataFormat::NHWC); + op->setBackend("cpu"); + myWeightInterleavingNode->forward(); + REQUIRE(*(op->getOutput(0)) == *expectedWeightInterleaving); + + } + + SECTION("Forward Op - Convolution weight interleaving") { + + // Weight [Cout = 2, H = 3, W = 3, Cin = 4]: + std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array4D<std::int8_t,2,3,3,4> { + { + { + { + {-6, 0, 5, -8}, // 'A' '0' '5' '8' in hexadecimal format + { 5, 5, 4, -5}, // '5' '5' '4' 'B' in hexadecimal format + {-7, -1, 4, -7} // '9' 'F' '4' '9' in hexadecimal format + }, + { + { 3, -3, -3, -3}, // '3' 'D' 'D' 'D' in hexadecimal format + { 1, 3, 1, -1}, // '1' '3' '1' 'F' in hexadecimal format + { 7, -3, -1, 4} // '7' 'D' 'F' '4' in hexadecimal format + }, + { + {-1, 3, 5, 6}, // 'F' '3' '5' '6' in hexadecimal format + {-8, 4, 7, 1}, // '8' '4' '7' '1' in hexadecimal format + {-5, 0, -1, -2} // 'B' '0' 'F' 'E' in hexadecimal format + } + }, + { + { + { 2, -7, 7, -4}, // '2' '9' '7' 'C' in hexadecimal format + {-7, 3, 0, 2}, // '9' '3' '0' '2' in hexadecimal format + { 1, -1, 2, 3} // '1' 'F' '2' '3' in hexadecimal format + }, + { + {-1, -5, -3, -7}, // 'F' 'B' 'D' '9' in hexadecimal format + {-8, 3, 5, -1}, // '8' '3' '5' 'F' in hexadecimal format + {-7, -4, -6, -1} // '9' 'C' 'A' 'F' in hexadecimal format + }, + { + { 1, 7, 5, -1}, // '1' '7' '5' 'F' in hexadecimal format + { 1, -8, 1, 2}, // '1' '8' '1' '2' in hexadecimal format + {-1, -6, -3, 0} // 'F' 'A' 'D' '0' in hexadecimal format + } + } + } + }); + + std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array4D<std::int8_t,2,3,3,2> { + { + { + { + {static_cast<int8_t>(0xA0), static_cast<int8_t>(0x58)}, // 'A' '0' '5' '8' in hexadecimal format + {static_cast<int8_t>(0x55), static_cast<int8_t>(0x4B)}, // '5' '5' '4' 'B' in hexadecimal format + {static_cast<int8_t>(0x9F), static_cast<int8_t>(0x49)} // '9' 'F' '4' '9' in hexadecimal format + }, + { + {static_cast<int8_t>(0x3D), static_cast<int8_t>(0xDD)}, // '3' 'D' 'D' 'D' in hexadecimal format + {static_cast<int8_t>(0x13), static_cast<int8_t>(0x1F)}, // '1' '3' '1' 'F' in hexadecimal format + {static_cast<int8_t>(0x7D), static_cast<int8_t>(0xF4)} // '7' 'D' 'F' '4' in hexadecimal format + }, + { + {static_cast<int8_t>(0xF3), static_cast<int8_t>(0x56)}, // 'F' '3' '5' '6' in hexadecimal format + {static_cast<int8_t>(0x84), static_cast<int8_t>(0x71)}, // '8' '4' '7' '1' in hexadecimal format + {static_cast<int8_t>(0xB0), static_cast<int8_t>(0xFE)} // 'B' '0' 'F' 'E' in hexadecimal format + } + }, + { + { + {static_cast<int8_t>(0x29), static_cast<int8_t>(0x7C)}, // '2' '9' '7' 'C' in hexadecimal format + {static_cast<int8_t>(0x93), static_cast<int8_t>(0x02)}, // '9' '3' '0' '2' in hexadecimal format + {static_cast<int8_t>(0x1F), static_cast<int8_t>(0x23)} // '1' 'F' '2' '3' in hexadecimal format + }, + { + {static_cast<int8_t>(0xFB), static_cast<int8_t>(0xD9)}, // 'F' 'B' 'D' '9' in hexadecimal format + {static_cast<int8_t>(0x83), static_cast<int8_t>(0x5F)}, // '8' '3' '5' 'F' in hexadecimal format + {static_cast<int8_t>(0x9C), static_cast<int8_t>(0xAF)} // '9' 'C' 'A' 'F' in hexadecimal format + }, + { + {static_cast<int8_t>(0x17), static_cast<int8_t>(0x5F)}, // '1' '7' '5' 'F' in hexadecimal format + {static_cast<int8_t>(0x18), static_cast<int8_t>(0x12)}, // '1' '8' '1' '2' in hexadecimal format + {static_cast<int8_t>(0xFA), static_cast<int8_t>(0xD0)} // 'F' 'A' 'D' '0' in hexadecimal format + } + } + } + }); + + weight->setDataFormat(Aidge::DataFormat::NHWC); + weight->setDataType(Aidge::DataType::Int4); + + expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC); + expectedWeightInterleaving->setDataType(WeightInterleavedType_v<Aidge::DataType::Int4>); + + std::shared_ptr<Node> myWeightInterleavingNode = WeightInterleaving(); + auto op = std::static_pointer_cast<OperatorTensor>(myWeightInterleavingNode -> getOperator()); + op->associateInput(0,weight); + op->setDataType(WeightInterleavedType_v<Aidge::DataType::Int4>); + op->setDataFormat(DataFormat::NHWC); + op->setBackend("cpu"); + myWeightInterleavingNode->forward(); + REQUIRE(*(op->getOutput(0)) == *expectedWeightInterleaving); + } + + SECTION("Recipie ApplyWeightInterleaving") { + + // Weight [Cout = 2, H = 3, W = 3, Cin = 4]: + std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array4D<std::int8_t,2,3,3,4> { + { + { + { + {-6, 0, 5, -8}, // 'A' '0' '5' '8' in hexadecimal format + { 5, 5, 4, -5}, // '5' '5' '4' 'B' in hexadecimal format + {-7, -1, 4, -7} // '9' 'F' '4' '9' in hexadecimal format + }, + { + { 3, -3, -3, -3}, // '3' 'D' 'D' 'D' in hexadecimal format + { 1, 3, 1, -1}, // '1' '3' '1' 'F' in hexadecimal format + { 7, -3, -1, 4} // '7' 'D' 'F' '4' in hexadecimal format + }, + { + {-1, 3, 5, 6}, // 'F' '3' '5' '6' in hexadecimal format + {-8, 4, 7, 1}, // '8' '4' '7' '1' in hexadecimal format + {-5, 0, -1, -2} // 'B' '0' 'F' 'E' in hexadecimal format + } + }, + { + { + { 2, -7, 7, -4}, // '2' '9' '7' 'C' in hexadecimal format + {-7, 3, 0, 2}, // '9' '3' '0' '2' in hexadecimal format + { 1, -1, 2, 3} // '1' 'F' '2' '3' in hexadecimal format + }, + { + {-1, -5, -3, -7}, // 'F' 'B' 'D' '9' in hexadecimal format + {-8, 3, 5, -1}, // '8' '3' '5' 'F' in hexadecimal format + {-7, -4, -6, -1} // '9' 'C' 'A' 'F' in hexadecimal format + }, + { + { 1, 7, 5, -1}, // '1' '7' '5' 'F' in hexadecimal format + { 1, -8, 1, 2}, // '1' '8' '1' '2' in hexadecimal format + {-1, -6, -3, 0} // 'F' 'A' 'D' '0' in hexadecimal format + } + } + } + }); + + std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array4D<std::int8_t,2,3,3,2> { + { + { + { + {static_cast<int8_t>(0xA0), static_cast<int8_t>(0x58)}, // 'A' '0' '5' '8' in hexadecimal format + {static_cast<int8_t>(0x55), static_cast<int8_t>(0x4B)}, // '5' '5' '4' 'B' in hexadecimal format + {static_cast<int8_t>(0x9F), static_cast<int8_t>(0x49)} // '9' 'F' '4' '9' in hexadecimal format + }, + { + {static_cast<int8_t>(0x3D), static_cast<int8_t>(0xDD)}, // '3' 'D' 'D' 'D' in hexadecimal format + {static_cast<int8_t>(0x13), static_cast<int8_t>(0x1F)}, // '1' '3' '1' 'F' in hexadecimal format + {static_cast<int8_t>(0x7D), static_cast<int8_t>(0xF4)} // '7' 'D' 'F' '4' in hexadecimal format + }, + { + {static_cast<int8_t>(0xF3), static_cast<int8_t>(0x56)}, // 'F' '3' '5' '6' in hexadecimal format + {static_cast<int8_t>(0x84), static_cast<int8_t>(0x71)}, // '8' '4' '7' '1' in hexadecimal format + {static_cast<int8_t>(0xB0), static_cast<int8_t>(0xFE)} // 'B' '0' 'F' 'E' in hexadecimal format + } + }, + { + { + {static_cast<int8_t>(0x29), static_cast<int8_t>(0x7C)}, // '2' '9' '7' 'C' in hexadecimal format + {static_cast<int8_t>(0x93), static_cast<int8_t>(0x02)}, // '9' '3' '0' '2' in hexadecimal format + {static_cast<int8_t>(0x1F), static_cast<int8_t>(0x23)} // '1' 'F' '2' '3' in hexadecimal format + }, + { + {static_cast<int8_t>(0xFB), static_cast<int8_t>(0xD9)}, // 'F' 'B' 'D' '9' in hexadecimal format + {static_cast<int8_t>(0x83), static_cast<int8_t>(0x5F)}, // '8' '3' '5' 'F' in hexadecimal format + {static_cast<int8_t>(0x9C), static_cast<int8_t>(0xAF)} // '9' 'C' 'A' 'F' in hexadecimal format + }, + { + {static_cast<int8_t>(0x17), static_cast<int8_t>(0x5F)}, // '1' '7' '5' 'F' in hexadecimal format + {static_cast<int8_t>(0x18), static_cast<int8_t>(0x12)}, // '1' '8' '1' '2' in hexadecimal format + {static_cast<int8_t>(0xFA), static_cast<int8_t>(0xD0)} // 'F' 'A' 'D' '0' in hexadecimal format + } + } + } + }); + + expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC); + expectedWeightInterleaving->setDataType(Aidge::DataType::Dual_Int4); + + // Create convolution node + std::shared_ptr<Node> conv = Conv(4, 2, {3, 3}, "conv1"); + + // Place the weight tensor in the weight producer of the conv + auto weightProducer = conv->getParent(1); + weightProducer->getOperator()->setOutput(0, weight); + + // Set dataType, dataformat and backend of convolution + conv->getOperator()->setDataFormat(Aidge::DataFormat::NHWC); + conv->getOperator()->setDataType(Aidge::DataType::Int4); + conv->getOperator()->setBackend("cpu"); + + // Apply recipie + applyWeightInterleaving(conv); + + // Compare the weight producer output tensor with the expected weights with interleaving + auto newProdOp = std::static_pointer_cast<OperatorTensor>(conv->getParent(1)->getOperator()); + REQUIRE(*(newProdOp->getOutput(0)) == *expectedWeightInterleaving); + } + +} diff --git a/unit_tests/scheduler/Test_CastMove.cpp b/unit_tests/scheduler/Test_CastMove.cpp index 5ca2cd9de4dcc9dab2c78f7ae1e1bf3090db8f2b..b78e864fecab1fd103a2cb30924d10a25f5b8f10 100644 --- a/unit_tests/scheduler/Test_CastMove.cpp +++ b/unit_tests/scheduler/Test_CastMove.cpp @@ -13,15 +13,20 @@ #include <memory> #include <string> + +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/backend/cpu/operator/ConvImpl.hpp" +#include "aidge/backend/cpu/operator/FCImpl.hpp" #include "aidge/data/Tensor.hpp" -#include "aidge/utils/TensorUtils.hpp" #include "aidge/graph/Node.hpp" #include "aidge/graph/GraphView.hpp" #include "aidge/graph/OpArgs.hpp" +#include "aidge/operator/Conv.hpp" +#include "aidge/operator/FC.hpp" #include "aidge/scheduler/SequentialScheduler.hpp" #include "aidge/recipes/Recipes.hpp" - -#include "aidge/backend/cpu.hpp" +#include "aidge/utils/ArrayHelpers.hpp" +#include "aidge/utils/TensorUtils.hpp" using namespace Aidge; @@ -205,15 +210,15 @@ TEST_CASE("[cpu/castmove] CastMove(forward)") { REQUIRE_NOTHROW(scheduler.forward()); scheduler.saveSchedulingDiagram("schedulingSequential"); - std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{ + Tensor expectedOutput1 = Array4D<int, 2, 3, 3, 3>{ {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}}, {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}}, {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}}, {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}}, {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}}, - {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}}); + {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}}; - std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(Array4D<int, 2, 4, 3, 3>{ + Tensor expectedOutput2 = Array4D<int, 2, 4, 3, 3>{ {{{{6099, 7017, 7935}, {10689, 11607, 12525}, {15279, 16197, 17115}}, {{13786, 15838, 17890}, {24046, 26098, 28150}, {34306, 36358, 38410}}, {{21473, 24659, 27845}, {37403, 40589, 43775}, {53333, 56519, 59705}}, @@ -221,26 +226,26 @@ TEST_CASE("[cpu/castmove] CastMove(forward)") { {{{29049, 29967, 30885}, {33639, 34557, 35475}, {38229, 39147, 40065}}, {{65086, 67138, 69190}, {75346, 77398, 79450}, {85606, 87658, 89710}}, {{101123, 104309, 107495}, {117053, 120239, 123425}, {132983, 136169, 139355}}, - {{137160, 141480, 145800}, {158760, 163080, 167400}, {180360, 184680, 189000}}}}}); + {{137160, 141480, 145800}, {158760, 163080, 167400}, {180360, 184680, 189000}}}}}; - std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{ + Tensor expectedOutput3 = Array4D<int, 2, 3, 3, 3>{ {{{{214731, 246591, 278451}, {374031, 405891, 437751}, {533331, 565191, 597051}}, {{496804, 570568, 644332}, {865624, 939388, 1013152}, {1234444, 1308208, 1381972}}, {{778877, 894545, 1010213}, {1357217, 1472885, 1588553}, {1935557, 2051225, 2166893}}}, {{{1011231, 1043091, 1074951}, {1170531, 1202391, 1234251}, {1329831, 1361691, 1393551}}, {{2340904, 2414668, 2488432}, {2709724, 2783488, 2857252}, {3078544, 3152308, 3226072}}, - {{3670577, 3786245, 3901913}, {4248917, 4364585, 4480253}, {4827257, 4942925, 5058593}}}}}); + {{3670577, 3786245, 3901913}, {4248917, 4364585, 4480253}, {4827257, 4942925, 5058593}}}}}; Tensor expectedOutput4 = Array2D<int, 2, 5>{ {{205050376, 198925904, 181355097, 196978090, 238868348}, {598467376, 561797804, 560823897, 593043790, 698672948}}}; - std::shared_ptr<Tensor> other1 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv1")->getOperator())->getOutput(0); - REQUIRE(approxEq<float, int>(*other1, *expectedOutput1, 0.0, 1.0e-12)); - std::shared_ptr<Tensor> other2 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv2")->getOperator())->getOutput(0); - REQUIRE(approxEq<int>(*other2, *expectedOutput2, 0.0, 1.0e-12)); - std::shared_ptr<Tensor> other3 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv3")->getOperator())->getOutput(0); - REQUIRE(approxEq<double, int>(*other3, *expectedOutput3, 0.0, 1.0e-12)); - std::shared_ptr<Tensor> other4 = std::static_pointer_cast<OperatorTensor>(g->getNode("fc")->getOperator())->getOutput(0); - REQUIRE(approxEq<int>(*other4, expectedOutput4, 0.0, 1.0e-12)); + std::shared_ptr<Tensor> other1 = std::static_pointer_cast<Conv_Op<2>>(g->getNode("conv1")->getOperator())->getOutput(0); + REQUIRE(approxEq<float, int>(*other1, expectedOutput1, 0.0, 1.0e-12)); + std::shared_ptr<Tensor> other2 = std::static_pointer_cast<Conv_Op<2>>(g->getNode("conv2")->getOperator())->getOutput(0); + REQUIRE(*other2 == expectedOutput2); + std::shared_ptr<Tensor> other3 = std::static_pointer_cast<Conv_Op<2>>(g->getNode("conv3")->getOperator())->getOutput(0); + REQUIRE(approxEq<double, int>(*other3, expectedOutput3, 0.0, 1.0e-12)); + std::shared_ptr<Tensor> other4 = std::static_pointer_cast<FC_Op>(g->getNode("fc")->getOperator())->getOutput(0); + REQUIRE(*other4 == expectedOutput4); } } diff --git a/unit_tests/scheduler/Test_Scheduler.cpp b/unit_tests/scheduler/Test_Scheduler.cpp index 78a10c308a60f026b83ea64cfbd25a848099eb90..956169c387c4a34f500f66b214dcf95a145feafd 100644 --- a/unit_tests/scheduler/Test_Scheduler.cpp +++ b/unit_tests/scheduler/Test_Scheduler.cpp @@ -18,10 +18,19 @@ #include "aidge/graph/GraphView.hpp" #include "aidge/graph/OpArgs.hpp" #include "aidge/operator/Memorize.hpp" +#include "aidge/operator/Pop.hpp" +#include "aidge/operator/Stack.hpp" +#include "aidge/operator/Identity.hpp" +#include "aidge/operator/MetaOperator.hpp" #include "aidge/scheduler/SequentialScheduler.hpp" #include "aidge/scheduler/ParallelScheduler.hpp" -#include "aidge/backend/cpu.hpp" +#include "aidge/backend/cpu/operator/FCImpl.hpp" +#include "aidge/backend/cpu/operator/ConvImpl.hpp" +#include "aidge/backend/cpu/operator/ReLUImpl.hpp" +#include "aidge/backend/cpu/operator/SqrtImpl.hpp" +#include "aidge/backend/cpu/operator/AddImpl.hpp" + #include "aidge/recipes/GraphViewHelper.hpp" @@ -438,4 +447,69 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(backward)", "[scheduler][backward predictedOutput->setGrad(targetOutput); REQUIRE_NOTHROW(scheduler.backward()); } + +std::shared_ptr<Node> Accumulate(int seqLength, const std::string& name) { + auto input = Identity((!name.empty()) ? name + "_input" : ""); + auto hiddenState = Memorize(seqLength, (!name.empty()) ? name + "_hidden_state" : ""); + auto add = Add((!name.empty()) ? name + "_add" : ""); + + input->addChild(add, 0, 0); + add->addChild(hiddenState, 0,0); + hiddenState->addChild(/*otherNode=*/add, /*outId=*/1, /*otherInId=*/1); + + std::shared_ptr<GraphView> microGraph = std::make_shared<GraphView>(); + microGraph->add(input); + microGraph->add({hiddenState, add}); + microGraph->setOrderedInputs({{input, 0}, {hiddenState, 1}}); + microGraph->setOrderedOutputs({{hiddenState, 0}}); + + auto metaOp = MetaOperator("Accumulate", microGraph, {}, name); + return metaOp; +} + +TEST_CASE("[cpu/scheduler] Accumulate", "[scheduler]") { + std::shared_ptr<Tensor> Input = std::make_shared<Tensor>( + Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, + {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); + + std::shared_ptr<Tensor> MemInit = + std::make_shared<Tensor>(Array2D<float, 3, 2>{ + {{0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}}}); + + auto meta = Accumulate(2, "accumulate"); + auto op = std::static_pointer_cast<MetaOperator_Op>(meta->getOperator()); + auto pop_i = Pop("pop_input"); + auto pop_o = Identity("pop_output"); // NOTE: Could be Identity/Stack/Whatever node you want, this is is not the problem here + + pop_i->getOperator()->associateInput(0, Input); + pop_i->addChild(op->getMicroGraph()->getOrderedInputs()[0].first, 0, 0); + op->getMicroGraph()->getOrderedOutputs()[0].first->addChild(pop_o, 0, 0); + + //pop_i->addChild(meta, 0, 0); + //meta->addChild(pop_o, 0, 0); + + //op->associateInput(1, MemInit); + op->getMicroGraph()->getNode("accumulate_hidden_state")->getOperator()->associateInput(1, MemInit); + + // Build the graph. + auto myGraph = std::make_shared<GraphView>(); + myGraph->add(pop_i); + myGraph->add(op->getMicroGraph()); + //myGraph->add(meta); + myGraph->add(pop_o); + myGraph->compile("cpu", DataType::Float32); + + myGraph->save("accumulate_graph", true); + + // Schedule and run + auto scheduler = SequentialScheduler(myGraph); + scheduler.generateScheduling(); + scheduler.saveStaticSchedulingDiagram("accumulate_scheduling"); + REQUIRE_NOTHROW(scheduler.forward(true)); + + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( + Array2D<float, 3, 2>{{{3.0, 5.0}, {7.0, 9.0}, {11.0, 13.0}}}); + std::shared_ptr<Tensor> output = std::static_pointer_cast<OperatorTensor>(pop_o->getOperator())->getOutput(0); + REQUIRE(*output == *expectedOutput); +} } // namespace Aidge diff --git a/version.txt b/version.txt index 267577d47e497a0630bc454b3f74c4fd9a10ced4..8f0916f768f0487bcf8d33827ce2c8dcecb645c1 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.4.1 +0.5.0