diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index 694275067b8b9708bab868da83688716f34e4fae..02f64e3c1da175543f61bf7845ff7a45c6ccea1b 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -19,13 +19,12 @@ #include "aidge/backend/cpu/operator/ArgMaxImpl.hpp" #include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp" -#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" #include "aidge/backend/cpu/operator/BatchNormImpl.hpp" #include "aidge/backend/cpu/operator/BitShiftImpl.hpp" #include "aidge/backend/cpu/operator/ClipImpl.hpp" +#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp" #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" #include "aidge/backend/cpu/operator/ConvImpl.hpp" -#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp" #include "aidge/backend/cpu/operator/DivImpl.hpp" #include "aidge/backend/cpu/operator/ErfImpl.hpp" #include "aidge/backend/cpu/operator/FCImpl.hpp" @@ -34,21 +33,21 @@ #include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" #include "aidge/backend/cpu/operator/LnImpl.hpp" #include "aidge/backend/cpu/operator/MatMulImpl.hpp" +#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" #include "aidge/backend/cpu/operator/MulImpl.hpp" #include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/backend/cpu/operator/PowImpl.hpp" +#include "aidge/backend/cpu/operator/ReLUImpl.hpp" #include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp" #include "aidge/backend/cpu/operator/ReduceSumImpl.hpp" -#include "aidge/backend/cpu/operator/ReLUImpl.hpp" #include "aidge/backend/cpu/operator/ScalingImpl.hpp" #include "aidge/backend/cpu/operator/SigmoidImpl.hpp" -#include "aidge/backend/cpu/operator/SqrtImpl.hpp" #include "aidge/backend/cpu/operator/SliceImpl.hpp" #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" +#include "aidge/backend/cpu/operator/SqrtImpl.hpp" #include "aidge/backend/cpu/operator/SubImpl.hpp" #include "aidge/backend/cpu/operator/TanhImpl.hpp" #include "aidge/backend/cpu/data/TensorImpl.hpp" #endif /* AIDGE_CPU_IMPORTS_H_ */ - diff --git a/include/aidge/backend/cpu/data/Broadcasting.hpp b/include/aidge/backend/cpu/data/Broadcasting.hpp index cb969cb54806a204072763a1672ee5266fb6347e..bd648b0c2c05bd888588808896221f49ae079a51 100644 --- a/include/aidge/backend/cpu/data/Broadcasting.hpp +++ b/include/aidge/backend/cpu/data/Broadcasting.hpp @@ -16,33 +16,43 @@ namespace Aidge { -// Function to broadCast an input dims vector into the same size as an outputDims vector - - /** - * @brief Broadcast an input dims vector into the same size as an outputDims vector - * @details The missing dimensions would be completed by 1 - * @param outputDims The vector of dimensions to follow - * @param dimsToBroadcast The vecotr of dimensions to braodcast - * @return std::vector<std::size_t> a broadcasted vector by addding 1 on the missing dimensions. - */ - std::vector<std::size_t> getBroadcastedDims(const std::vector<std::size_t>& outputDims, const std::vector<std::size_t>& dimsToBroadcast); - - /** - * @brief Get a vector of indexes along the dimensions vector from a flattened index - * @param dimensions The vector of dimensions we want the indexes on - * @param idx The flattened index - * @return std::vector<std::size_t> vector of indexes along dimensions. - */ - std::vector<std::size_t> getMultiDimIndices(const std::vector<std::size_t>& dimensions, std::size_t idx); - - // Function to get a flattened index from multi-dimensional indices - /** - * @brief Get a flattened index the dimensions vector from a given vector of indices on a broadcasted vector - * @param dimensions The vector of dimensions we want the flattened index on - * @param indices The vector of indices we want to flatten - * @return std::size_t The flattened index on the dimensions vector - */ - std::size_t getFlattenedIndex(const std::vector<std::size_t>& dimensions, const std::vector<std::size_t>& indices); +// Function to broadCast an input dims vector into the same size as an +// outputDims vector + +/** + * @brief Broadcast an input dims vector into the same size as an outputDims + * vector + * @details The missing dimensions would be completed by 1 + * @param outputDims The vector of dimensions to follow + * @param dimsToBroadcast The vecotr of dimensions to braodcast + * @return std::vector<std::size_t> a broadcasted vector by addding 1 on the + * missing dimensions. + */ +std::vector<std::size_t> +getBroadcastedDims(const std::vector<std::size_t> &outputDims, + const std::vector<std::size_t> &dimsToBroadcast); + +/** + * @brief Get a vector of indexes along the dimensions vector from a flattened + * index + * @param dimensions The vector of dimensions we want the indexes on + * @param idx The flattened index + * @return std::vector<std::size_t> vector of indexes along dimensions. + */ +std::vector<std::size_t> +getMultiDimIndices(const std::vector<std::size_t> &dimensions, + std::size_t idx); + +// Function to get a flattened index from multi-dimensional indices +/** + * @brief Get a flattened index the dimensions vector from a given vector of + * indices on a broadcasted vector + * @param dimensions The vector of dimensions we want the flattened index on + * @param indices The vector of indices we want to flatten + * @return std::size_t The flattened index on the dimensions vector + */ +std::size_t getFlattenedIndex(const std::vector<std::size_t> &dimensions, + const std::vector<std::size_t> &indices); } // namespace Aidge diff --git a/include/aidge/backend/cpu/operator/AbsImpl.hpp b/include/aidge/backend/cpu/operator/AbsImpl.hpp index 8233d47c4d1e2dc7bf724600ec083bcaa0d667e9..046fc709d019d0fe8c97ea95c1cc3267c2a8b35e 100644 --- a/include/aidge/backend/cpu/operator/AbsImpl.hpp +++ b/include/aidge/backend/cpu/operator/AbsImpl.hpp @@ -21,11 +21,11 @@ namespace Aidge { // Operator implementation entry point for the backend -using AbsImpl_cpu = OperatorImpl_cpu<Abs_Op, - void(const std::size_t, const void*, void*)>; +using AbsImpl_cpu = + OperatorImpl_cpu<Abs_Op, void(const std::size_t, const void *, void *)>; // Implementation entry point registration to Operator REGISTRAR(Abs_Op, "cpu", Aidge::AbsImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_ABSIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/AbsImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AbsImpl_kernels.hpp index 16e5f9dee26a6f8b760e14a1ad66a40d8f0f7e93..2af3a73dd7f1b23bee494756b677a1b26d5a8164 100644 --- a/include/aidge/backend/cpu/operator/AbsImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AbsImpl_kernels.hpp @@ -21,11 +21,11 @@ namespace Aidge { template <class I, class O> void AbsImpl_cpu_forward_kernel(std::size_t inputLenght, - const void* input_, - void* output_) { + const void *input_, + void *output_) { - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); for (std::size_t i = 0; i < inputLenght; ++i) { output[i] = std::abs(input[i]); @@ -34,14 +34,20 @@ void AbsImpl_cpu_forward_kernel(std::size_t inputLenght, // Kernels registration to implementation entry point REGISTRAR(AbsImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::AbsImpl_cpu_forward_kernel<float, float>, nullptr}); + {DataType::Float32}, + {ProdConso::inPlaceModel, + Aidge::AbsImpl_cpu_forward_kernel<float, float>, + nullptr}); REGISTRAR(AbsImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::AbsImpl_cpu_forward_kernel<double, double>, nullptr}); + {DataType::Float64}, + {ProdConso::inPlaceModel, + Aidge::AbsImpl_cpu_forward_kernel<double, double>, + nullptr}); REGISTRAR(AbsImpl_cpu, - {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::AbsImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr}); -} // namespace Aidge + {DataType::Int32}, + {ProdConso::inPlaceModel, + Aidge::AbsImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, + nullptr}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_ABSIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/AddImpl.hpp b/include/aidge/backend/cpu/operator/AddImpl.hpp index 5e795922a67be178dde588e8e5e346ec268efe86..756bd9bed412e1f24ea1238ff9bfa2e3e5d6dc0e 100644 --- a/include/aidge/backend/cpu/operator/AddImpl.hpp +++ b/include/aidge/backend/cpu/operator/AddImpl.hpp @@ -12,8 +12,8 @@ #ifndef AIDGE_CPU_OPERATOR_ADDIMPL_H_ #define AIDGE_CPU_OPERATOR_ADDIMPL_H_ -#include <cstddef> // std::size_t -#include <memory> // std::unique_ptr, std::make_unique +#include <cstddef> // std::size_t +#include <memory> // std::unique_ptr, std::make_unique #include <string> #include <vector> @@ -24,11 +24,16 @@ namespace Aidge { // Operator implementation entry point for the backend -using AddImpl_cpu = OperatorImpl_cpu<Add_Op, - void(const std::vector<const void*>, const std::vector<std::vector<std::size_t>>&, const std::size_t, const std::vector<std::size_t>&, void*)>; +using AddImpl_cpu = + OperatorImpl_cpu<Add_Op, + void(const std::vector<const void *>, + const std::vector<std::vector<std::size_t>> &, + const std::size_t, + const std::vector<std::size_t> &, + void *)>; // Implementation entry point registration to Operator REGISTRAR(Add_Op, "cpu", Aidge::AddImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_ADDIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/AddImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AddImpl_kernels.hpp index 4a4ba2a8999c4dc33fc743b5a3a7dad023f9e0dd..a1d15f024acbf48d23bb5659b04c36f918f2c601 100644 --- a/include/aidge/backend/cpu/operator/AddImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AddImpl_kernels.hpp @@ -14,7 +14,7 @@ #include "aidge/utils/Registrar.hpp" -#include <cstdint> // std::int32_t, std::int64_t +#include <cstdint> // std::int32_t, std::int64_t #include "aidge/backend/cpu/data/Broadcasting.hpp" #include "aidge/backend/cpu/operator/AddImpl.hpp" @@ -22,38 +22,52 @@ namespace Aidge { template <class I, class O> -void AddImpl_cpu_forward_kernel(const std::vector<const void*> inputs_, const std::vector<std::vector<std::size_t>>& inputDims, const std::size_t outputLength, const std::vector<std::size_t>& outDims, void* output_) { +void AddImpl_cpu_forward_kernel( + const std::vector<const void *> inputs_, + const std::vector<std::vector<std::size_t>> &inputDims, + const std::size_t outputLength, + const std::vector<std::size_t> &outDims, + void *output_) { // FIXME: missing Add attributes as arguments - std::vector<const I*> inputs; - for (const auto& input_ : inputs_) { - inputs.push_back(static_cast<const I*>(input_)); + std::vector<const I *> inputs; + for (const auto &input_ : inputs_) { + inputs.push_back(static_cast<const I *>(input_)); } - O* output = static_cast<O*>(output_); + O *output = static_cast<O *>(output_); - for (std::size_t oIndex = 0; oIndex < outputLength; ++oIndex) - { + for (std::size_t oIndex = 0; oIndex < outputLength; ++oIndex) { output[oIndex] = 0; - std::vector<size_t> indexes = getMultiDimIndices(outDims, oIndex); - for(std::size_t iIndex = 0; iIndex < inputs.size(); ++iIndex) { - std::size_t idx = getFlattenedIndex(inputDims[iIndex], indexes); + std::vector<size_t> indexes = getMultiDimIndices(outDims, oIndex); + for (std::size_t iIndex = 0; iIndex < inputs.size(); ++iIndex) { + std::size_t idx = getFlattenedIndex(inputDims[iIndex], indexes); output[oIndex] += inputs[iIndex][idx]; - } - } + } + } } // Kernels registration to implementation entry point REGISTRAR(AddImpl_cpu, - {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}}, - {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<float, float>, nullptr}); + {ImplSpec::IOSpec{DataType::Any}, + ImplSpec::IOSpec{DataType::Float32}}, + {ProdConso::inPlaceModel, + Aidge::AddImpl_cpu_forward_kernel<float, float>, + nullptr}); REGISTRAR(AddImpl_cpu, - {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}}, - {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<double, double>, nullptr}); + {ImplSpec::IOSpec{DataType::Any}, + ImplSpec::IOSpec{DataType::Float64}}, + {ProdConso::inPlaceModel, + Aidge::AddImpl_cpu_forward_kernel<double, double>, + nullptr}); REGISTRAR(AddImpl_cpu, - {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}}, - {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr}); + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}}, + {ProdConso::inPlaceModel, + Aidge::AddImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, + nullptr}); REGISTRAR(AddImpl_cpu, - {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int64}}, - {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, nullptr}); -} // namespace Aidge + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int64}}, + {ProdConso::inPlaceModel, + Aidge::AddImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, + nullptr}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_ADDIMPL_CPU_KERNELS_H_ */ \ No newline at end of file diff --git a/include/aidge/backend/cpu/operator/AndImpl.hpp b/include/aidge/backend/cpu/operator/AndImpl.hpp index 316a2fb922596642088d133a7fec49c988739bb7..7ce44a45a5763a80c0499642dec93d4313358b10 100644 --- a/include/aidge/backend/cpu/operator/AndImpl.hpp +++ b/include/aidge/backend/cpu/operator/AndImpl.hpp @@ -12,21 +12,26 @@ #ifndef AIDGE_CPU_OPERATOR_ANDIMPL_H_ #define AIDGE_CPU_OPERATOR_ANDIMPL_H_ +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/And.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" #include <memory> #include <vector> namespace Aidge { // Operator implementation entry point for the backend using AndImpl_cpu = OperatorImpl_cpu<And_Op, - void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*)>; + void(const std::vector<std::size_t> &, + const std::vector<std::size_t> &, + const std::vector<std::size_t> &, + const void *, + const void *, + void *)>; // Implementation entry point registration to Operator REGISTRAR(And_Op, "cpu", Aidge::AndImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_ANDIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/AndImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AndImpl_kernels.hpp index 197e829f3527ce2f36c3ef5ee812a26477633703..5c3a0152023fb6bde45f88b377a003b565d511c8 100644 --- a/include/aidge/backend/cpu/operator/AndImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AndImpl_kernels.hpp @@ -18,28 +18,27 @@ namespace Aidge { template <class I1, class I2, class O> -void AndImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims, - const std::vector<std::size_t>& input2Dims, - const std::vector<std::size_t>& outputDims, - const void* input1_, - const void* input2_, - void* output_) { +void AndImpl_cpu_forward_kernel(const std::vector<std::size_t> &input1Dims, + const std::vector<std::size_t> &input2Dims, + const std::vector<std::size_t> &outputDims, + const void *input1_, + const void *input2_, + void *output_) { - const I1* input_1 = static_cast<const I1*>(input1_); - const I2* input_2 = static_cast<const I2*>(input2_); - O* output = static_cast<O*>(output_); + const I1 *input_1 = static_cast<const I1 *>(input1_); + const I2 *input_2 = static_cast<const I2 *>(input2_); + O *output = static_cast<O *>(output_); size_t totalElements = 1; for (size_t dimSize : outputDims) { totalElements *= dimSize; } - for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) - { - std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex); + for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) { + std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex); - std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); - std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); + std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); + std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); output[oIndex] = static_cast<O>(input_1[idx1] == input_2[idx2]); } @@ -47,17 +46,29 @@ void AndImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims, // Kernels registration to implementation entry point REGISTRAR(AndImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<float, float, float>, nullptr}); + {DataType::Float32}, + {ProdConso::inPlaceModel, + Aidge::AndImpl_cpu_forward_kernel<float, float, float>, + nullptr}); REGISTRAR(AndImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<double, double, double>, nullptr}); + {DataType::Float64}, + {ProdConso::inPlaceModel, + Aidge::AndImpl_cpu_forward_kernel<double, double, double>, + nullptr}); REGISTRAR(AndImpl_cpu, - {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, nullptr}); + {DataType::Int32}, + {ProdConso::inPlaceModel, + Aidge::AndImpl_cpu_forward_kernel<std::int32_t, + std::int32_t, + std::int32_t>, + nullptr}); REGISTRAR(AndImpl_cpu, - {DataType::Int64}, - {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>, nullptr}); -} // namespace Aidge + {DataType::Int64}, + {ProdConso::inPlaceModel, + Aidge::AndImpl_cpu_forward_kernel<std::int64_t, + std::int64_t, + std::int64_t>, + nullptr}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_ANDIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/ArgMaxImpl.hpp b/include/aidge/backend/cpu/operator/ArgMaxImpl.hpp index b1a2d5168013e4f9595f4275b98143cfc3509629..57b9adde7745d2582929121dfbdb56587a3d2503 100644 --- a/include/aidge/backend/cpu/operator/ArgMaxImpl.hpp +++ b/include/aidge/backend/cpu/operator/ArgMaxImpl.hpp @@ -25,14 +25,14 @@ namespace Aidge { // Operator implementation entry point for the backend using ArgMaxImpl_cpu = OperatorImpl_cpu<ArgMax_Op, - void(std::int32_t, - DimSize_t, - const std::vector<DimSize_t>&, - const void *, - void *)>; + void(std::int32_t, + DimSize_t, + const std::vector<DimSize_t> &, + const void *, + void *)>; // Implementation entry point registration to Operator REGISTRAR(ArgMax_Op, "cpu", Aidge::ArgMaxImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_ARGMAXIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ArgMaxImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ArgMaxImpl_kernels.hpp index 1bedec701766fc59fac233a1c400df1042369c5a..9cd65b1c74990dac6635391eead0add743f8efd9 100644 --- a/include/aidge/backend/cpu/operator/ArgMaxImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ArgMaxImpl_kernels.hpp @@ -12,13 +12,13 @@ #ifndef AIDGE_CPU_OPERATOR_ARGMAXIMPL_KERNELS_H_ #define AIDGE_CPU_OPERATOR_ARGMAXIMPL_KERNELS_H_ -#include <algorithm> // std::for_each -#include <cstddef> // std::size_t -#include <cstdint> // std::int32_t -#include <functional> //std::multiplies -#include <numeric> //std::accumulate -#include <vector> +#include <algorithm> // std::for_each +#include <cstddef> // std::size_t +#include <cstdint> // std::int32_t +#include <functional> //std::multiplies #include <limits> +#include <numeric> //std::accumulate +#include <vector> #include "aidge/backend/cpu/operator/ArgMaxImpl.hpp" #include "aidge/data/Data.hpp" @@ -28,13 +28,13 @@ namespace Aidge { template <class I, class O> void ArgMaxImpl_cpu_forward_kernel(std::int32_t axis_, - DimSize_t select_last_index, - const std::vector<DimSize_t>& inputDims, - const void* input_, - void* output_) { + DimSize_t select_last_index, + const std::vector<DimSize_t> &inputDims, + const void *input_, + void *output_) { - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); const std::size_t axis = static_cast<std::size_t>(axis_); @@ -53,14 +53,13 @@ void ArgMaxImpl_cpu_forward_kernel(std::int32_t axis_, const std::size_t idx_o = pre * stride_post + post; I max = std::numeric_limits<I>::min(); for (std::size_t i = 0; i < dim_i; ++i) { - I curr_value = input[idx_i + i*stride_post]; + I curr_value = input[idx_i + i * stride_post]; if (select_last_index) { - if (curr_value>=max) { + if (curr_value >= max) { output[idx_o] = i; max = curr_value; } - } - else { + } else { if (curr_value > max) { output[idx_o] = i; max = curr_value; @@ -69,19 +68,24 @@ void ArgMaxImpl_cpu_forward_kernel(std::int32_t axis_, } } } - } // Kernels registration to implementation entry point REGISTRAR(ArgMaxImpl_cpu, - {DataType::Float32}, - {ProdConso::defaultModel, Aidge::ArgMaxImpl_cpu_forward_kernel<float, float>, nullptr}); + {DataType::Float32}, + {ProdConso::defaultModel, + Aidge::ArgMaxImpl_cpu_forward_kernel<float, float>, + nullptr}); REGISTRAR(ArgMaxImpl_cpu, - {DataType::Float64}, - {ProdConso::defaultModel, Aidge::ArgMaxImpl_cpu_forward_kernel<double, double>, nullptr}); + {DataType::Float64}, + {ProdConso::defaultModel, + Aidge::ArgMaxImpl_cpu_forward_kernel<double, double>, + nullptr}); REGISTRAR(ArgMaxImpl_cpu, - {DataType::Int32}, - {ProdConso::defaultModel, Aidge::ArgMaxImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr}); -} // namespace Aidge + {DataType::Int32}, + {ProdConso::defaultModel, + Aidge::ArgMaxImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, + nullptr}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_ARGMAXIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/AtanImpl.hpp b/include/aidge/backend/cpu/operator/AtanImpl.hpp index 2f1b4bf0ad666ff9856c24fa675b70d6f830b07c..83527b38509f30e305078adb28aafdaf998a7ce9 100644 --- a/include/aidge/backend/cpu/operator/AtanImpl.hpp +++ b/include/aidge/backend/cpu/operator/AtanImpl.hpp @@ -12,22 +12,23 @@ #ifndef AIDGE_CPU_OPERATOR_ATAN_H_ #define AIDGE_CPU_OPERATOR_ATAN_H_ +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Atan.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" #include <memory> #include <vector> namespace Aidge { // Operator implementation entry point for the backend -using AtanImpl_cpu = OperatorImpl_cpu<Atan_Op, - void(const std::size_t, const void*, void*), - void(const std::size_t, const void*, const void*, void*)>; +using AtanImpl_cpu = OperatorImpl_cpu< + Atan_Op, + void(const std::size_t, const void *, void *), + void(const std::size_t, const void *, const void *, void *)>; // Implementation entry point registration to Operator REGISTRAR(Atan_Op, "cpu", Aidge::AtanImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_ATAN_H_ */ diff --git a/include/aidge/backend/cpu/operator/AtanImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AtanImpl_kernels.hpp index 2a786339503354514416705b61cfedfcc0b7c321..916c974f4e4860ca805f3de4037e46a86b110ed6 100644 --- a/include/aidge/backend/cpu/operator/AtanImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AtanImpl_kernels.hpp @@ -15,46 +15,49 @@ #include "aidge/utils/Registrar.hpp" #include "aidge/backend/cpu/operator/AtanImpl.hpp" -#include <cmath> // For atan() - +#include <cmath> // For atan() namespace Aidge { template <class I, class O> void AtanImpl_cpu_forward_kernel(std::size_t inputLenght, - const void* input_, - void* output_) { - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); + const void *input_, + void *output_) { + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); for (size_t i = 0; i < inputLenght; ++i) { output[i] = static_cast<O>(atan(input[i])); } - } template <class O, class GI, class GO> void AtanImpl_cpu_backward_kernel(const std::size_t inputLenght, - const void* output_, const void* grad_output_, - void* grad_input_) { - const O* output = static_cast<const O*>(output_); - const GO* grad_output = static_cast<const GO*>(grad_output_); - GI* grad_input = static_cast<GI*>(grad_input_); + const void *output_, + const void *grad_output_, + void *grad_input_) { + const O *output = static_cast<const O *>(output_); + const GO *grad_output = static_cast<const GO *>(grad_output_); + GI *grad_input = static_cast<GI *>(grad_input_); // Apply the derivative of atan for each element in the input array for (size_t i = 0; i < inputLenght; ++i) { // dx = dy * (1 / (1 + x^2)) - grad_input[i] = grad_output[i] * static_cast<O>(1.0 / (1.0 + output[i] * output[i])); + grad_input[i] = grad_output[i] * + static_cast<O>(1.0 / (1.0 + output[i] * output[i])); } } - // Kernels registration to implementation entry point REGISTRAR(AtanImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::AtanImpl_cpu_forward_kernel<float, float>, Aidge::AtanImpl_cpu_backward_kernel<float, float, float>}); + {DataType::Float32}, + {ProdConso::inPlaceModel, + Aidge::AtanImpl_cpu_forward_kernel<float, float>, + Aidge::AtanImpl_cpu_backward_kernel<float, float, float>}); REGISTRAR(AtanImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::AtanImpl_cpu_forward_kernel<double, double>, Aidge::AtanImpl_cpu_backward_kernel<double, double, double>}); -} // namespace Aidge + {DataType::Float64}, + {ProdConso::inPlaceModel, + Aidge::AtanImpl_cpu_forward_kernel<double, double>, + Aidge::AtanImpl_cpu_backward_kernel<double, double, double>}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_ATANIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp index adea96ca43a1ad9d2a49777426913ca4676e4f32..cc1aaad220f738798e228658c65cf39d7b372891 100644 --- a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp +++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp @@ -17,24 +17,25 @@ #include <tuple> #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/AvgPooling.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { // Operator implementation entry point for the backend using AvgPooling2D_Op = AvgPooling_Op<2>; -using AvgPoolingImpl2D_cpu = OperatorImpl_cpu<AvgPooling_Op<2>, - void(const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 4>&, - const void *, - void *)>; +using AvgPoolingImpl2D_cpu = + OperatorImpl_cpu<AvgPooling_Op<2>, + void(const std::array<DimSize_t, 2> &, + const std::array<DimSize_t, 2> &, + const std::array<DimSize_t, 4> &, + const void *, + void *)>; // Implementation entry point registration to Operator REGISTRAR(AvgPooling2D_Op, "cpu", Aidge::AvgPoolingImpl2D_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp index f6da9dcb026101b93de862499d42ae8734532d52..7010f87fccaff0b0738b8006d1f3a0dc0e9a9de1 100644 --- a/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp @@ -13,8 +13,8 @@ #define AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_KERNELS_H_ #include <array> -#include <tuple> #include <cmath> +#include <tuple> #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp" @@ -33,24 +33,24 @@ namespace Aidge { * @param output_ Output Tensor. */ template <class I, class O> -void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, - const std::array<DimSize_t, 2>& kernelDims, - const std::array<DimSize_t, 4> &dims, - const void *input_, - void *output_) { +void AvgPoolingImpl2D_cpu_forward_kernel( + const std::array<DimSize_t, 2> &strideDims, + const std::array<DimSize_t, 2> &kernelDims, + const std::array<DimSize_t, 4> &dims, + const void *input_, + void *output_) { // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); O *output = static_cast<O *>(output_); - // output H size - const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) / - static_cast<float>(strideDims[0]))); + const std::size_t oxSize = static_cast<std::size_t>(std::floor( + static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) / + static_cast<float>(strideDims[0]))); // output W size - const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) / - static_cast<float>(strideDims[1]))); + const std::size_t oySize = static_cast<std::size_t>(std::floor( + static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) / + static_cast<float>(strideDims[1]))); // TODO: kernel computation // output (batch, outCh, Xout, Yout) @@ -60,40 +60,61 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD using signedsize = std::make_signed<std::size_t>::type; for (std::size_t batch = 0; batch < dims[0]; ++batch) { for (std::size_t ch = 0; ch < dims[1]; ++ch) { - const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize; - const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; - std::fill(output + oIndex, output+(oIndex+oxSize*oySize), 0); + const std::size_t oIndex = + (ch + batch * dims[1]) * oxSize * oySize; + const std::size_t iIndex = + (ch + batch * dims[1]) * dims[2] * dims[3]; + std::fill(output + oIndex, output + (oIndex + oxSize * oySize), 0); for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); - const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] : dims[2] + difx); + const signedsize difx = + static_cast<signedsize>(-ox * strideDims[0]); + const std::size_t sxMin = + static_cast<std::size_t>(std::max(difx, signedsize(0))); + const std::size_t sxMax = + (static_cast<signedsize>(dims[2]) + difx) < 0 + ? 0 + : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] + : dims[2] + difx); for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); - const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > kernelDims[1] ? kernelDims[1] : dims[3] + dify); - const std::size_t oIndexFull = oIndex + ox*oySize + oy; + const signedsize dify = + static_cast<signedsize>(-oy * strideDims[1]); + const std::size_t syMin = static_cast<std::size_t>( + std::max(dify, signedsize(0))); + const std::size_t syMax = + (static_cast<signedsize>(dims[3]) + dify) < 0 + ? 0 + : ((dims[3] + dify) > kernelDims[1] + ? kernelDims[1] + : dims[3] + dify); + const std::size_t oIndexFull = oIndex + ox * oySize + oy; const std::size_t ix = ox * strideDims[0]; const std::size_t iy = oy * strideDims[1]; if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { - output[oIndexFull] += static_cast<O>( - input[iIndex + (ix+0)*dims[3] + (iy+0)] + - input[iIndex + (ix+0)*dims[3] + (iy+1)] + - input[iIndex + (ix+0)*dims[3] + (iy+2)] + - input[iIndex + (ix+1)*dims[3] + (iy+0)] + - input[iIndex + (ix+1)*dims[3] + (iy+1)] + - input[iIndex + (ix+1)*dims[3] + (iy+2)] + - input[iIndex + (ix+2)*dims[3] + (iy+0)] + - input[iIndex + (ix+2)*dims[3] + (iy+1)] + - input[iIndex + (ix+2)*dims[3] + (iy+2)]) / O(9); + output[oIndexFull] += + static_cast<O>( + input[iIndex + (ix + 0) * dims[3] + (iy + 0)] + + input[iIndex + (ix + 0) * dims[3] + (iy + 1)] + + input[iIndex + (ix + 0) * dims[3] + (iy + 2)] + + input[iIndex + (ix + 1) * dims[3] + (iy + 0)] + + input[iIndex + (ix + 1) * dims[3] + (iy + 1)] + + input[iIndex + (ix + 1) * dims[3] + (iy + 2)] + + input[iIndex + (ix + 2) * dims[3] + (iy + 0)] + + input[iIndex + (ix + 2) * dims[3] + (iy + 1)] + + input[iIndex + (ix + 2) * dims[3] + + (iy + 2)]) / + O(9); } else { for (std::size_t sx = sxMin; sx < sxMax; ++sx) { for (std::size_t sy = syMin; sy < syMax; ++sy) { - output[oIndexFull] += input[iIndex + (ix+sx)*dims[3] + (iy+sy)]; + output[oIndexFull] += + input[iIndex + (ix + sx) * dims[3] + + (iy + sy)]; } } // padding not used - output[oIndexFull] /= (sxMax - sxMin) * (syMax - syMin); + output[oIndexFull] /= + (sxMax - sxMin) * (syMax - syMin); } } } @@ -103,14 +124,23 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD // Kernels registration to implementation entry point REGISTRAR(AvgPoolingImpl2D_cpu, - {{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::AvgPoolingImpl2D_cpu_forward_kernel<float, float>, nullptr}); -REGISTRAR(AvgPoolingImpl2D_cpu, + {{DataType::Float32, DataFormat::NCHW}, + {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + Aidge::AvgPoolingImpl2D_cpu_forward_kernel<float, float>, + nullptr}); +REGISTRAR( + AvgPoolingImpl2D_cpu, {{DataType::Int32, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::AvgPoolingImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr}); + {ProdConso::inPlaceModel, + Aidge::AvgPoolingImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t>, + nullptr}); REGISTRAR(AvgPoolingImpl2D_cpu, - {{DataType::Float64, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::AvgPoolingImpl2D_cpu_forward_kernel<double, double>, nullptr}); -} // namespace Aidge + {{DataType::Float64, DataFormat::NCHW}, + {DataType::Float64, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + Aidge::AvgPoolingImpl2D_cpu_forward_kernel<double, double>, + nullptr}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/BatchNormImpl.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp index 36a100b21edc6cd63a0176c89f2f1e57c10001c7..d96aa0904792820acf54c9a3cf01357762296c71 100644 --- a/include/aidge/backend/cpu/operator/BatchNormImpl.hpp +++ b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp @@ -17,29 +17,30 @@ #include <tuple> #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/BatchNorm.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { // Operator implementation entry point for the backend using BatchNorm2D_Op = BatchNorm_Op<2>; -using BatchNormImpl2D_cpu = OperatorImpl_cpu<BatchNorm_Op<2>, - void(float, - float, - const std::array<DimSize_t, 4> &, - const void *, - const void *, - const void *, - void *, - void *, - void *, - const bool)>; +using BatchNormImpl2D_cpu = + OperatorImpl_cpu<BatchNorm_Op<2>, + void(float, + float, + const std::array<DimSize_t, 4> &, + const void *, + const void *, + const void *, + void *, + void *, + void *, + const bool)>; // Implementation entry point registration to Operator REGISTRAR(BatchNorm2D_Op, "cpu", Aidge::BatchNormImpl2D_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_BATCHNORMIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/BatchNormImpl_kernels.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl_kernels.hpp index ec71e3b8e37e344c551fd643dc7b3957bdddcb67..543a9667737eb5270d929a7a4dd40562fd813fe9 100644 --- a/include/aidge/backend/cpu/operator/BatchNormImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/BatchNormImpl_kernels.hpp @@ -14,12 +14,12 @@ #include "aidge/utils/Registrar.hpp" +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/BatchNormImpl.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include <algorithm> #include <array> #include <cmath> -#include <algorithm> namespace Aidge { /** @@ -38,8 +38,16 @@ namespace Aidge { * @param output_ Output Tensor. */ template <class I, class P, class O> -void BatchNormImpl2D_cpu_forward_kernel(float epsilon, float momentum, const std::array<DimSize_t, 4> &dims, - const void *input_, const void *scale_, const void *shift_, void *batchMean_, void *batchVar_, void *output_, const bool freeze) { +void BatchNormImpl2D_cpu_forward_kernel(float epsilon, + float momentum, + const std::array<DimSize_t, 4> &dims, + const void *input_, + const void *scale_, + const void *shift_, + void *batchMean_, + void *batchVar_, + void *output_, + const bool freeze) { // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); const P *scale = static_cast<const P *>(scale_); @@ -50,18 +58,24 @@ void BatchNormImpl2D_cpu_forward_kernel(float epsilon, float momentum, const std const DimSize_t nbBatch = dims[0]; const DimSize_t nbChannels = dims[1]; - const DimSize_t featureMapSize = dims[2]*dims[3]; - + const DimSize_t featureMapSize = dims[2] * dims[3]; if ((freeze == true) || (momentum == 0.0f)) { for (std::size_t batch = 0; batch < nbBatch; ++batch) { for (std::size_t ch = 0; ch < nbChannels; ++ch) { - const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize; - std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]); - const P var = std::sqrt(batchVar[ch] + static_cast<P>(epsilon)); + const std::size_t ioIndex = + (ch + batch * nbChannels) * featureMapSize; + std::fill(output + ioIndex, + output + ioIndex + featureMapSize, + shift[ch]); + const P var = + std::sqrt(batchVar[ch] + static_cast<P>(epsilon)); - for (std::size_t feature = 0; feature<featureMapSize; ++feature) { - output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-batchMean[ch]) / var; + for (std::size_t feature = 0; feature < featureMapSize; + ++feature) { + output[ioIndex + feature] += + scale[ch] * + (input[ioIndex + feature] - batchMean[ch]) / var; } } } @@ -71,25 +85,40 @@ void BatchNormImpl2D_cpu_forward_kernel(float epsilon, float momentum, const std I sum = I(0); I sumSquare = I(0); for (std::size_t batch = 0; batch < nbBatch; ++batch) { - const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize; - std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]); + const std::size_t ioIndex = + (ch + batch * nbChannels) * featureMapSize; + std::fill(output + ioIndex, + output + ioIndex + featureMapSize, + shift[ch]); - for (std::size_t feature = 0; feature<featureMapSize; ++feature) { + for (std::size_t feature = 0; feature < featureMapSize; + ++feature) { sum += input[ioIndex + feature]; - sumSquare += input[ioIndex + feature] * input[ioIndex + feature]; + sumSquare += + input[ioIndex + feature] * input[ioIndex + feature]; } } const I inputMean = sum / static_cast<I>(nbDataPerChannel); - const I inputVar = sumSquare / static_cast<I>(nbDataPerChannel) - inputMean*inputMean; + const I inputVar = sumSquare / static_cast<I>(nbDataPerChannel) - + inputMean * inputMean; - batchMean[ch] = batchMean[ch]*(1-momentum) + inputMean*momentum; - batchVar[ch] = batchVar[ch]*(1-momentum) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*momentum; + batchMean[ch] = + batchMean[ch] * (1 - momentum) + inputMean * momentum; + batchVar[ch] = batchVar[ch] * (1 - momentum) + + inputVar * + (static_cast<I>(nbDataPerChannel) / + static_cast<I>(nbDataPerChannel - 1)) * + momentum; const P var = std::sqrt(inputVar + static_cast<P>(epsilon)); for (std::size_t batch = 0; batch < nbBatch; ++batch) { - const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize; - for (std::size_t feature = 0; feature<featureMapSize; ++feature) { - output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-inputMean) / var; + const std::size_t ioIndex = + (ch + batch * nbChannels) * featureMapSize; + for (std::size_t feature = 0; feature < featureMapSize; + ++feature) { + output[ioIndex + feature] += + scale[ch] * (input[ioIndex + feature] - inputMean) / + var; } } } @@ -98,8 +127,11 @@ void BatchNormImpl2D_cpu_forward_kernel(float epsilon, float momentum, const std // Kernels registration to implementation entry point REGISTRAR(BatchNormImpl2D_cpu, - {{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::BatchNormImpl2D_cpu_forward_kernel<float, float, float>, nullptr}); -} // namespace Aidge + {{DataType::Float32, DataFormat::NCHW}, + {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + Aidge::BatchNormImpl2D_cpu_forward_kernel<float, float, float>, + nullptr}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_BATCHNORMIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/BitShiftImpl.hpp b/include/aidge/backend/cpu/operator/BitShiftImpl.hpp index 6da67bb7dd4469b6ca609c5aea1ae70dfca3f939..6603566456dab4d3c8fa833f0d4e17a0ce50c101 100644 --- a/include/aidge/backend/cpu/operator/BitShiftImpl.hpp +++ b/include/aidge/backend/cpu/operator/BitShiftImpl.hpp @@ -12,27 +12,28 @@ #ifndef AIDGE_CPU_OPERATOR_BITSHIFTIMPL_H_ #define AIDGE_CPU_OPERATOR_BITSHIFTIMPL_H_ +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/BitShift.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" #include <memory> #include <vector> namespace Aidge { // Operator implementation entry point for the backend -using BitShiftImpl_cpu = OperatorImpl_cpu<BitShift_Op, - void(const BitShift_Op::BitShiftDirection, - const std::vector<std::size_t>&, - const std::vector<std::size_t>&, - const std::vector<std::size_t>&, - const void*, - const void*, - void*)>; - - // Implementation entry point registration to Operator - REGISTRAR(BitShift_Op,"cpu",Aidge::BitShiftImpl_cpu::create); -} // namespace Aidge +using BitShiftImpl_cpu = + OperatorImpl_cpu<BitShift_Op, + void(const BitShift_Op::BitShiftDirection, + const std::vector<std::size_t> &, + const std::vector<std::size_t> &, + const std::vector<std::size_t> &, + const void *, + const void *, + void *)>; + +// Implementation entry point registration to Operator +REGISTRAR(BitShift_Op, "cpu", Aidge::BitShiftImpl_cpu::create); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_BITSHIFTIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp b/include/aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp index f815e946ea2e4abaff48a6e5155368d564e88e8c..0d37fc267d13bd6c0edd855d08c22cc0bad559f0 100644 --- a/include/aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp @@ -14,57 +14,62 @@ #include "aidge/utils/Registrar.hpp" -#include <cstdint> // std::int32_t, std::int64_t #include "aidge/operator/BitShift.hpp" +#include <cstdint> // std::int32_t, std::int64_t #include "aidge/backend/cpu/data/Broadcasting.hpp" #include "aidge/backend/cpu/operator/BitShiftImpl.hpp" - - namespace Aidge { template <class I1, class I2, class O> void BitShiftImpl_cpu_forward_kernel( - const BitShift_Op::BitShiftDirection direction, - const std::vector<std::size_t>& input1Dims, - const std::vector<std::size_t>& input2Dims, - const std::vector<std::size_t>& outputDims, - const void* input1_, - const void* input2_, - void* output_ - ) { + const BitShift_Op::BitShiftDirection direction, + const std::vector<std::size_t> &input1Dims, + const std::vector<std::size_t> &input2Dims, + const std::vector<std::size_t> &outputDims, + const void *input1_, + const void *input2_, + void *output_) { + + const I1 *input_1 = static_cast<const I1 *>(input1_); + const I2 *input_2 = static_cast<const I2 *>(input2_); + O *output = static_cast<O *>(output_); - const I1* input_1 = static_cast<const I1*>(input1_); - const I2* input_2 = static_cast<const I2*>(input2_); - O* output = static_cast<O*>(output_); + const size_t totalElements = + std::accumulate(outputDims.begin(), + outputDims.end(), + std::size_t(1), + std::multiplies<std::size_t>()); - const size_t totalElements = std::accumulate(outputDims.begin(), outputDims.end(), std::size_t(1), std::multiplies<std::size_t>()); - - for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) - { + for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) { std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex); std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); - if(direction == BitShift_Op::BitShiftDirection::right) + if (direction == BitShift_Op::BitShiftDirection::right) { - output[oIndex]= input_1[idx1] >> input_2[idx2]; - } - else - { - output[oIndex] = input_1[idx1] << input_2[idx2]; + output[oIndex] = input_1[idx1] >> input_2[idx2]; + } else { + output[oIndex] = input_1[idx1] << input_2[idx2]; } } } REGISTRAR(BitShiftImpl_cpu, -{DataType::Int32}, -{ProdConso::inPlaceModel,Aidge::BitShiftImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>,nullptr}); + {DataType::Int32}, + {ProdConso::inPlaceModel, + Aidge::BitShiftImpl_cpu_forward_kernel<std::int32_t, + std::int32_t, + std::int32_t>, + nullptr}); REGISTRAR(BitShiftImpl_cpu, -{DataType::Int64}, -{ProdConso::inPlaceModel,Aidge::BitShiftImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>,nullptr}); - + {DataType::Int64}, + {ProdConso::inPlaceModel, + Aidge::BitShiftImpl_cpu_forward_kernel<std::int64_t, + std::int64_t, + std::int64_t>, + nullptr}); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_BitShiftIMPL_KERNELS_H_ */ \ No newline at end of file diff --git a/include/aidge/backend/cpu/operator/ClipImpl.hpp b/include/aidge/backend/cpu/operator/ClipImpl.hpp index c83836d5aa1d6aae27e3fdce1bbb9561b70ec31e..aec09416601314e744048fb48de83a2f9d7dca88 100644 --- a/include/aidge/backend/cpu/operator/ClipImpl.hpp +++ b/include/aidge/backend/cpu/operator/ClipImpl.hpp @@ -12,35 +12,34 @@ #ifndef AIDGE_CPU_OPERATOR_CLIPIMPL_H_ #define AIDGE_CPU_OPERATOR_CLIPIMPL_H_ -#include <cstddef> // std::size_t +#include <algorithm> +#include <cstddef> // std::size_t #include <memory> -#include <tuple> // std::tuple +#include <tuple> // std::tuple #include <vector> -#include <algorithm> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Clip.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" - namespace Aidge { // Operator implementation entry point for the backend - using ClipImpl_cpu = OperatorImpl_cpu<Clip_Op, - void(float, //Forward Types - float, - const void*, - const std::size_t, - void*), - void(float,//Backward Types - float, - const std::size_t, - const void*, - const void*, - void*)>; +using ClipImpl_cpu = OperatorImpl_cpu<Clip_Op, + void(float, // Forward Types + float, + const void *, + const std::size_t, + void *), + void(float, // Backward Types + float, + const std::size_t, + const void *, + const void *, + void *)>; - REGISTRAR(Clip_Op,"cpu",Aidge::ClipImpl_cpu::create); -} // namespace Aidge +REGISTRAR(Clip_Op, "cpu", Aidge::ClipImpl_cpu::create); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_CLIPIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp index 1afac4698be2a63790ebac671ecc1e59166c5f94..6438fbed090ea52781da6154d3fe1666a2d93c52 100644 --- a/include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp @@ -13,65 +13,67 @@ #ifndef AIDGE_CPU_OPERATOR_CLIPIMPL_KERNELS_H_ #define AIDGE_CPU_OPERATOR_CLIPIMPL_KERNELS_H_ -#include "aidge/utils/Registrar.hpp" #include "aidge/backend/cpu/operator/ClipImpl.hpp" +#include "aidge/utils/Registrar.hpp" namespace Aidge { template <class I, class O> -void ClipImpl_cpu_forward_kernel( - float min_, - float max_, - const void* input_, - const std::size_t length, - void* output_) -{ - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); +void ClipImpl_cpu_forward_kernel(float min_, + float max_, + const void *input_, + const std::size_t length, + void *output_) { + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); for (std::size_t i = 0; i < length; ++i) { - output[i] = std::min(std::max(static_cast<float>(input[i]), min_), max_); + output[i] = + std::min(std::max(static_cast<float>(input[i]), min_), max_); } } template <class I, class GI, class GO> -void ClipImpl_cpu_backward_kernel( - float min_, - float max_, - const std::size_t length, - const void* input_, - const void* grad_output_, - void* grad_input_) -{ - const I* input = static_cast<const I*>(input_); - const GO* grad_output = static_cast<const GO*>(grad_output_); - GI* grad_input = static_cast<GI*>(grad_input_); +void ClipImpl_cpu_backward_kernel(float min_, + float max_, + const std::size_t length, + const void *input_, + const void *grad_output_, + void *grad_input_) { + const I *input = static_cast<const I *>(input_); + const GO *grad_output = static_cast<const GO *>(grad_output_); + GI *grad_input = static_cast<GI *>(grad_input_); for (std::size_t i = 0; i < length; ++i) { - grad_input[i] = ((input[i] > min_) && (input[i] < max_)) ? grad_output[i] : 0; + grad_input[i] = + ((input[i] > min_) && (input[i] < max_)) ? grad_output[i] : 0; } } REGISTRAR(ClipImpl_cpu, -{DataType::Float32}, -{ProdConso::inPlaceModel, -Aidge::ClipImpl_cpu_forward_kernel<float,float>, -Aidge::ClipImpl_cpu_backward_kernel<float,float,float>}); + {DataType::Float32}, + {ProdConso::inPlaceModel, + Aidge::ClipImpl_cpu_forward_kernel<float, float>, + Aidge::ClipImpl_cpu_backward_kernel<float, float, float>}); REGISTRAR(ClipImpl_cpu, -{DataType::Float64}, -{ProdConso::inPlaceModel, -Aidge::ClipImpl_cpu_forward_kernel<double,double>, -Aidge::ClipImpl_cpu_backward_kernel<double,double,double>}); + {DataType::Float64}, + {ProdConso::inPlaceModel, + Aidge::ClipImpl_cpu_forward_kernel<double, double>, + Aidge::ClipImpl_cpu_backward_kernel<double, double, double>}); REGISTRAR(ClipImpl_cpu, -{DataType::Int32}, -{ProdConso::inPlaceModel, -Aidge::ClipImpl_cpu_forward_kernel<std::int32_t,std::int32_t>, -Aidge::ClipImpl_cpu_backward_kernel<std::int32_t,std::int32_t,std::int32_t>}); + {DataType::Int32}, + {ProdConso::inPlaceModel, + Aidge::ClipImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, + Aidge::ClipImpl_cpu_backward_kernel<std::int32_t, + std::int32_t, + std::int32_t>}); REGISTRAR(ClipImpl_cpu, -{DataType::Int64}, -{ProdConso::inPlaceModel, -Aidge::ClipImpl_cpu_forward_kernel<std::int64_t,std::int64_t>, -Aidge::ClipImpl_cpu_backward_kernel<std::int64_t,std::int64_t,std::int64_t>}); + {DataType::Int64}, + {ProdConso::inPlaceModel, + Aidge::ClipImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, + Aidge::ClipImpl_cpu_backward_kernel<std::int64_t, + std::int64_t, + std::int64_t>}); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_CLIPIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp b/include/aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp index 83e7e030f526e0db3cff4741eabe39e287130562..8e77611e859c20827edec377e8458795d7f90daa 100644 --- a/include/aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp @@ -23,12 +23,12 @@ namespace Aidge { // Operator implementation entry point for the backend -using ConstantOfShapeImpl_cpu = OperatorImpl_cpu<ConstantOfShape_Op, - void(const std::vector<DimSize_t>, const Tensor&, void *)>; +using ConstantOfShapeImpl_cpu = OperatorImpl_cpu< + ConstantOfShape_Op, + void(const std::vector<DimSize_t>, const Tensor &, void *)>; // Implementation entry point registration to Operator REGISTRAR(ConstantOfShape_Op, "cpu", Aidge::ConstantOfShapeImpl_cpu::create); } // namespace Aidge #endif /* _AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_ */ - diff --git a/include/aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp index 18ab9c0a77c4545c955fc4fe1f1fc1cbcb763bf7..0c3b631d79120145a3cfb16300a950629c905c96 100644 --- a/include/aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp @@ -30,42 +30,62 @@ namespace Aidge { template <class O> void ConstantOfShapeimpl_cpu_forward_kernel( - const std::vector<DimSize_t> output_dims, const Tensor &value, + const std::vector<DimSize_t> output_dims, + const Tensor &value, void *output_) { - O *output = static_cast<O *>(output_); - O val; - std::copy(static_cast<O *>(value.getImpl()->hostPtr()), - static_cast<O *>(value.getImpl()->hostPtr()) + - static_cast<NbElts_t>(1), - &val); - const size_t output_size = std::accumulate( - output_dims.begin(), output_dims.end(), 1, std::multiplies<DimSize_t>()); - for (size_t i = 0; i < output_size; ++i) { - output[i] = val; - } + O *output = static_cast<O *>(output_); + O val; + std::copy(static_cast<O *>(value.getImpl()->hostPtr()), + static_cast<O *>(value.getImpl()->hostPtr()) + + static_cast<NbElts_t>(1), + &val); + const size_t output_size = std::accumulate(output_dims.begin(), + output_dims.end(), + 1, + std::multiplies<DimSize_t>()); + for (size_t i = 0; i < output_size; ++i) { + output[i] = val; + } } // Kernels registration to implementation entry point REGISTRAR(ConstantOfShapeImpl_cpu, - {ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Float16}}, - {ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<half_float::half>, nullptr}); + {ImplSpec::IOSpec{DataType::Int64}, + ImplSpec::IOSpec{DataType::Float16}}, + {ProdConso::defaultModel, + Aidge::ConstantOfShapeimpl_cpu_forward_kernel<half_float::half>, + nullptr}); REGISTRAR(ConstantOfShapeImpl_cpu, - {ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Float32}}, - {ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<float>, nullptr}); + {ImplSpec::IOSpec{DataType::Int64}, + ImplSpec::IOSpec{DataType::Float32}}, + {ProdConso::defaultModel, + Aidge::ConstantOfShapeimpl_cpu_forward_kernel<float>, + nullptr}); REGISTRAR(ConstantOfShapeImpl_cpu, - {ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Float64}}, - {ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<double>, nullptr}); + {ImplSpec::IOSpec{DataType::Int64}, + ImplSpec::IOSpec{DataType::Float64}}, + {ProdConso::defaultModel, + Aidge::ConstantOfShapeimpl_cpu_forward_kernel<double>, + nullptr}); REGISTRAR(ConstantOfShapeImpl_cpu, - {ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Int16}}, - {ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int16_t>, nullptr}); + {ImplSpec::IOSpec{DataType::Int64}, + ImplSpec::IOSpec{DataType::Int16}}, + {ProdConso::defaultModel, + Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int16_t>, + nullptr}); REGISTRAR(ConstantOfShapeImpl_cpu, - {ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Int32}}, - {ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int32_t>, nullptr}); + {ImplSpec::IOSpec{DataType::Int64}, + ImplSpec::IOSpec{DataType::Int32}}, + {ProdConso::defaultModel, + Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int32_t>, + nullptr}); REGISTRAR(ConstantOfShapeImpl_cpu, - {ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Int64}}, - {ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int64_t>, nullptr}); + {ImplSpec::IOSpec{DataType::Int64}, + ImplSpec::IOSpec{DataType::Int64}}, + {ProdConso::defaultModel, + Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int64_t>, + nullptr}); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_KERNELS_H_ */ - diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp index 5b985accfb7b9778993b557524de7b60060ad437..82d86874649ea521493eee40ec61cef1caaaf304 100644 --- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp @@ -17,39 +17,41 @@ #include <tuple> #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/ConvDepthWise.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { // Operator implementation entry point for the backend using ConvDepthWise1D_Op = ConvDepthWise_Op<1>; -using ConvDepthWiseImpl1D_cpu = OperatorImpl_cpu<ConvDepthWise_Op<1>, - void(const std::array<DimSize_t, 1>&, - const std::array<DimSize_t, 1>&, - const std::array<DimSize_t, 1>&, - const std::array<DimSize_t, 3>&, - const void *, - const void *, - const void *, - void *)>; +using ConvDepthWiseImpl1D_cpu = + OperatorImpl_cpu<ConvDepthWise_Op<1>, + void(const std::array<DimSize_t, 1> &, + const std::array<DimSize_t, 1> &, + const std::array<DimSize_t, 1> &, + const std::array<DimSize_t, 3> &, + const void *, + const void *, + const void *, + void *)>; using ConvDepthWise2D_Op = ConvDepthWise_Op<2>; -using ConvDepthWiseImpl2D_cpu = OperatorImpl_cpu<ConvDepthWise_Op<2>, - void(const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 4> &, - const void *, - const void *, - const void *, - void *)>; +using ConvDepthWiseImpl2D_cpu = + OperatorImpl_cpu<ConvDepthWise_Op<2>, + void(const std::array<DimSize_t, 2> &, + const std::array<DimSize_t, 2> &, + const std::array<DimSize_t, 2> &, + const std::array<DimSize_t, 4> &, + const void *, + const void *, + const void *, + void *)>; // Implementation entry point registration to Operator REGISTRAR(ConvDepthWise1D_Op, "cpu", Aidge::ConvDepthWiseImpl1D_cpu::create); REGISTRAR(ConvDepthWise2D_Op, "cpu", Aidge::ConvDepthWiseImpl2D_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp index 28ed8969aa415ab4151d038869594376480eba43..146e3ed18a8ca9f497f2a606ce22a615d0ea269c 100644 --- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp @@ -37,27 +37,27 @@ namespace Aidge { * @param output_ Output Tensor. */ template <class I, class W, class B, class O> -void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, - const std::array<DimSize_t, 1>& dilationDims, - const std::array<DimSize_t, 1>& kernelDims, - const std::array<DimSize_t, 3>& inputDims, - const void *input_, - const void *weights_, - const void *biases_, - void *output_) { +void ConvDepthWiseImpl1D_cpu_forward_kernel( + const std::array<DimSize_t, 1> &strideDims, + const std::array<DimSize_t, 1> &dilationDims, + const std::array<DimSize_t, 1> &kernelDims, + const std::array<DimSize_t, 3> &inputDims, + const void *input_, + const void *weights_, + const void *biases_, + void *output_) { // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); const W *weights = static_cast<const W *>(weights_); const B *biases = static_cast<const B *>(biases_); O *output = static_cast<O *>(output_); - // output H size - const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1; - const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) / - static_cast<float>(strideDims[0]))); - + const DimSize_t dilated_kernel_x = + dilationDims[0] * (kernelDims[0] - 1) + 1; + const std::size_t oxSize = static_cast<std::size_t>(std::floor( + static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) / + static_cast<float>(strideDims[0]))); // TODO: kernel computation // output (batch, outCh, Xout, Yout) @@ -67,23 +67,33 @@ void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& stri using signedsize = std::make_signed<std::size_t>::type; for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { for (std::size_t ch = 0; ch < inputDims[1]; ++ch) { - const std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize; + const std::size_t oIndex = (ch + batch * inputDims[1]) * oxSize; B biasVal = (biases != nullptr) ? biases[ch] : B(0); - std::fill(output + oIndex, output+(oIndex+oxSize), biasVal); - const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2]; + std::fill(output + oIndex, output + (oIndex + oxSize), biasVal); + const std::size_t iIndex = + (ch + batch * inputDims[1]) * inputDims[2]; const std::size_t wIndex = ch * kernelDims[0]; for (std::size_t ox = 0; ox < oxSize; ++ox) { - // const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); - // const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - // const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + // const signedsize difx = static_cast<signedsize>(- ox * + // strideDims[0]); const std::size_t sxMin = + // static_cast<std::size_t>(std::max(difx, signedsize(0))); + // const std::size_t sxMax = + // (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : + // ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : + // inputDims[2] + difx); const std::size_t sxMin = 0; const std::size_t sxMax = dilated_kernel_x; const std::size_t oIndexFull = oIndex + ox; - const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); + const signedsize ix = + static_cast<signedsize>(ox * strideDims[0]); - for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) { - output[oIndexFull] += weights[wIndex + sx] * - input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))]; + for (std::size_t sx = sxMin; sx * dilationDims[0] < sxMax; + ++sx) { + output[oIndexFull] += + weights[wIndex + sx] * + input[iIndex + static_cast<std::size_t>( + ix + static_cast<signedsize>( + sx * dilationDims[0]))]; } } } @@ -91,16 +101,30 @@ void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& stri } // Kernels registration to implementation entry point -REGISTRAR(ConvDepthWiseImpl1D_cpu, +REGISTRAR( + ConvDepthWiseImpl1D_cpu, {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<float, float, float, float>, nullptr}); + {ProdConso::inPlaceModel, + Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<float, float, float, float>, + nullptr}); REGISTRAR(ConvDepthWiseImpl1D_cpu, - {{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>, nullptr}); + {{DataType::Any, DataFormat::NCHW}, + {DataType::Int32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<std::int32_t, + std::int32_t, + std::int32_t, + std::int32_t>, + nullptr}); REGISTRAR(ConvDepthWiseImpl1D_cpu, - {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<double, double, double, double>, nullptr}); - + {{DataType::Any, DataFormat::NCHW}, + {DataType::Float64, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<double, + double, + double, + double>, + nullptr}); /** * @brief Forward kernel for 2D ConvDepthWiseolution on CPU backend. @@ -116,33 +140,34 @@ REGISTRAR(ConvDepthWiseImpl1D_cpu, * @param output_ Output Tensor. */ template <class I, class W, class B, class O> -void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, - const std::array<DimSize_t, 2>& dilationDims, - const std::array<DimSize_t, 2>& kernelDims, - const std::array<DimSize_t, 4>& inputDims, - const void *input_, - const void *weights_, - const void *biases_, - void *output_) -{ +void ConvDepthWiseImpl2D_cpu_forward_kernel( + const std::array<DimSize_t, 2> &strideDims, + const std::array<DimSize_t, 2> &dilationDims, + const std::array<DimSize_t, 2> &kernelDims, + const std::array<DimSize_t, 4> &inputDims, + const void *input_, + const void *weights_, + const void *biases_, + void *output_) { // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); const W *weights = static_cast<const W *>(weights_); const B *biases = static_cast<const B *>(biases_); O *output = static_cast<O *>(output_); - // output H size - const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1; - const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) / - static_cast<float>(strideDims[0]))); + const DimSize_t dilated_kernel_x = + dilationDims[0] * (kernelDims[0] - 1) + 1; + const std::size_t oxSize = static_cast<std::size_t>(std::floor( + static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) / + static_cast<float>(strideDims[0]))); // output W size - const DimSize_t dilated_kernel_y = dilationDims[1]*(kernelDims[1] - 1) + 1; - const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - dilated_kernel_y + strideDims[1]) / - static_cast<float>(strideDims[1]))); + const DimSize_t dilated_kernel_y = + dilationDims[1] * (kernelDims[1] - 1) + 1; + const std::size_t oySize = static_cast<std::size_t>(std::floor( + static_cast<float>(inputDims[3] - dilated_kernel_y + strideDims[1]) / + static_cast<float>(strideDims[1]))); // TODO: kernel computation // output (batch, outCh, Xout, Yout) @@ -150,43 +175,72 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri // weight (outCh, ch, kernelX, kernelY) // does not take Dilation attribute into account using signedsize = std::make_signed<std::size_t>::type; - const std::size_t outChannels_s = oxSize * oySize; + const std::size_t outChannels_s = oxSize * oySize; - if (dilated_kernel_x ==3 && dilated_kernel_y == 3) { + if (dilated_kernel_x == 3 && dilated_kernel_y == 3) { for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { for (std::size_t ch = 0; ch < inputDims[1]; ++ch) { B biasVal = (biases != nullptr) ? biases[ch] : B(0); - std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3]; + std::size_t iIndex = + (ch + batch * inputDims[1]) * inputDims[2] * inputDims[3]; const std::size_t wIndex = ch * 9; - if (strideDims[0] == 1 && strideDims[1]==1) { - for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=inputDims[3]) { + if (strideDims[0] == 1 && strideDims[1] == 1) { + for (std::size_t ox = 0, oIndex = 0; ox < oxSize; + ++ox, oIndex += oySize, iIndex -= inputDims[3]) { for (std::size_t oy = 0; oy < oySize; ++oy) { - output[oIndex + oy] = biasVal + weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+1]+weights[wIndex+2]*input[iIndex+oy+2]; + output[oIndex + oy] = + biasVal + + weights[wIndex + 0] * input[iIndex + oy] + + weights[wIndex + 1] * input[iIndex + oy + 1] + + weights[wIndex + 2] * input[iIndex + oy + 2]; } - iIndex+=inputDims[3]; + iIndex += inputDims[3]; for (std::size_t oy = 0; oy < oySize; ++oy) { - output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+1]+weights[wIndex+5]*input[iIndex+oy+2]; + output[oIndex + oy] += + weights[wIndex + 3] * input[iIndex + oy] + + weights[wIndex + 4] * input[iIndex + oy + 1] + + weights[wIndex + 5] * input[iIndex + oy + 2]; } - iIndex+=inputDims[3]; + iIndex += inputDims[3]; for (std::size_t oy = 0; oy < oySize; ++oy) { - output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+1]+weights[wIndex+8]*input[iIndex+oy+2]; + output[oIndex + oy] += + weights[wIndex + 6] * input[iIndex + oy] + + weights[wIndex + 7] * input[iIndex + oy + 1] + + weights[wIndex + 8] * input[iIndex + oy + 2]; } } } else { - for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=strideDims[0]*inputDims[3]) { + for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, + oIndex += oySize, + iIndex -= strideDims[0] * inputDims[3]) { for (std::size_t oy = 0; oy < oySize; ++oy) { - output[oIndex + oy] += weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+strideDims[0]]+weights[wIndex+2]*input[iIndex+oy+strideDims[0]*2]; + output[oIndex + oy] += + weights[wIndex + 0] * input[iIndex + oy] + + weights[wIndex + 1] * + input[iIndex + oy + strideDims[0]] + + weights[wIndex + 2] * + input[iIndex + oy + strideDims[0] * 2]; } - iIndex+=strideDims[0]*inputDims[3]; + iIndex += strideDims[0] * inputDims[3]; for (std::size_t oy = 0; oy < oySize; ++oy) { - output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+strideDims[0]]+weights[wIndex+5]*input[iIndex+oy+strideDims[0]*2]; + output[oIndex + oy] += + weights[wIndex + 3] * input[iIndex + oy] + + weights[wIndex + 4] * + input[iIndex + oy + strideDims[0]] + + weights[wIndex + 5] * + input[iIndex + oy + strideDims[0] * 2]; } - iIndex+=strideDims[0]*inputDims[3]; + iIndex += strideDims[0] * inputDims[3]; for (std::size_t oy = 0; oy < oySize; ++oy) { - output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+strideDims[0]]+weights[wIndex+8]*input[iIndex+oy+strideDims[0]*2]; + output[oIndex + oy] += + weights[wIndex + 6] * input[iIndex + oy] + + weights[wIndex + 7] * + input[iIndex + oy + strideDims[0]] + + weights[wIndex + 8] * + input[iIndex + oy + strideDims[0] * 2]; } } } @@ -200,19 +254,25 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri B biasVal = (biases != nullptr) ? biases[ch] : B(0); - const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3]; + const std::size_t iIndex = + (ch + batch * inputDims[1]) * inputDims[2] * inputDims[3]; const std::size_t wIndex = ch; if (strideDims[0] == 1 && strideDims[1] == 1) { - for (; index < iIndex + oxSize*oySize; ++index) { - output[index] = biasVal + weights[wIndex] * input[index]; + for (; index < iIndex + oxSize * oySize; ++index) { + output[index] = + biasVal + weights[wIndex] * input[index]; } - } else { - std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize * oySize; - for (std::size_t ox = 0; ox < oxSize; ++ox, oIndex+=oySize) { - index = iIndex + strideDims[0]*inputDims[3]; - for (std::size_t oy = 0, iy = 0; oy < oySize; ++oy, iy+=strideDims[1]) { - output[oIndex + oy] += weights[wIndex]*input[index+iy]; + } else { + std::size_t oIndex = + (ch + batch * inputDims[1]) * oxSize * oySize; + for (std::size_t ox = 0; ox < oxSize; + ++ox, oIndex += oySize) { + index = iIndex + strideDims[0] * inputDims[3]; + for (std::size_t oy = 0, iy = 0; oy < oySize; + ++oy, iy += strideDims[1]) { + output[oIndex + oy] += + weights[wIndex] * input[index + iy]; } } } @@ -223,22 +283,37 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri for (std::size_t ch = 0; ch < inputDims[1]; ++ch) { B biasVal = (biases != nullptr) ? biases[ch] : B(0); - std::fill(output, output+outChannels_s, biasVal); + std::fill(output, output + outChannels_s, biasVal); - const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3]; + const std::size_t iIndex = + (ch + batch * inputDims[1]) * inputDims[2] * inputDims[3]; const std::size_t wIndex = ch * kernelDims[0] * kernelDims[1]; for (std::size_t ox = 0; ox < oxSize; ++ox) { for (std::size_t oy = 0; oy < oySize; ++oy) { - const std::size_t oIndexFull = ox*oySize + oy; - const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); - const signedsize iy = static_cast<signedsize>(oy * strideDims[1]); + const std::size_t oIndexFull = ox * oySize + oy; + const signedsize ix = + static_cast<signedsize>(ox * strideDims[0]); + const signedsize iy = + static_cast<signedsize>(oy * strideDims[1]); - for (std::size_t sx = 0; sx*dilationDims[0] < dilated_kernel_x; ++sx) { - for (std::size_t sy = 0; sy*dilationDims[1] < dilated_kernel_y; ++sy) { - output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] * - input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy*dilationDims[1]))]; + for (std::size_t sx = 0; + sx * dilationDims[0] < dilated_kernel_x; + ++sx) { + for (std::size_t sy = 0; + sy * dilationDims[1] < dilated_kernel_y; + ++sy) { + output[oIndexFull] += + weights[wIndex + sx * kernelDims[1] + sy] * + input[iIndex + + static_cast<std::size_t>( + ix + static_cast<signedsize>( + sx * dilationDims[0])) * + inputDims[3] + + static_cast<std::size_t>( + iy + static_cast<signedsize>( + sy * dilationDims[1]))]; } } } @@ -249,17 +324,31 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri } } - // Kernels registration to implementation entry point -REGISTRAR(ConvDepthWiseImpl2D_cpu, +REGISTRAR( + ConvDepthWiseImpl2D_cpu, {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<float, float, float, float>, nullptr}); + {ProdConso::inPlaceModel, + Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<float, float, float, float>, + nullptr}); REGISTRAR(ConvDepthWiseImpl2D_cpu, - {{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>, nullptr}); + {{DataType::Any, DataFormat::NCHW}, + {DataType::Int32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<std::int32_t, + std::int32_t, + std::int32_t, + std::int32_t>, + nullptr}); REGISTRAR(ConvDepthWiseImpl2D_cpu, - {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<double, double, double, double>, nullptr}); -} // namespace Aidge + {{DataType::Any, DataFormat::NCHW}, + {DataType::Float64, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<double, + double, + double, + double>, + nullptr}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/ConvImpl.hpp b/include/aidge/backend/cpu/operator/ConvImpl.hpp index c06d0912f419909013f930867ce3c3238c1a5555..5980554238240ce91ab75e739c14cd32e12d888d 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl.hpp @@ -17,41 +17,41 @@ #include <tuple> #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Conv.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { // Operator implementation entry point for the backend using Conv1D_Op = Conv_Op<1>; using ConvImpl1D_cpu = OperatorImpl_cpu<Conv_Op<1>, - void(const std::array<DimSize_t, 1>&, - const std::array<DimSize_t, 1>&, - const std::array<DimSize_t, 1>&, - const std::array<DimSize_t, 3> &, - DimSize_t, - const void *, - const void *, - const void *, - void *)>; + void(const std::array<DimSize_t, 1> &, + const std::array<DimSize_t, 1> &, + const std::array<DimSize_t, 1> &, + const std::array<DimSize_t, 3> &, + DimSize_t, + const void *, + const void *, + const void *, + void *)>; using Conv2D_Op = Conv_Op<2>; using ConvImpl2D_cpu = OperatorImpl_cpu<Conv_Op<2>, - void(const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 4> &, - DimSize_t, - const void *, - const void *, - const void *, - void *)>; + void(const std::array<DimSize_t, 2> &, + const std::array<DimSize_t, 2> &, + const std::array<DimSize_t, 2> &, + const std::array<DimSize_t, 4> &, + DimSize_t, + const void *, + const void *, + const void *, + void *)>; // Implementation entry point registration to Operator REGISTRAR(Conv1D_Op, "cpu", Aidge::ConvImpl1D_cpu::create); REGISTRAR(Conv2D_Op, "cpu", Aidge::ConvImpl2D_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_CONVIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp index b4abac19143d8222cf632757f1c9d4a532cb3661..745cd474dd4b6319910355fa3849169520e18e91 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp @@ -17,12 +17,12 @@ #include <tuple> #include <vector> -#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/ConvImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Conv.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { /** @@ -39,16 +39,16 @@ namespace Aidge { * @param output_ Output Tensor. */ template <class I, class W, class B, class O> -void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, - const std::array<DimSize_t, 1>& dilationDims, - const std::array<DimSize_t, 1>& kernelDims, - const std::array<DimSize_t, 3>& inputDims, - DimSize_t outChannels, - const void *input_, - const void *weights_, - const void *biases_, - void *output_) -{ +void ConvImpl1D_cpu_forward_kernel( + const std::array<DimSize_t, 1> &strideDims, + const std::array<DimSize_t, 1> &dilationDims, + const std::array<DimSize_t, 1> &kernelDims, + const std::array<DimSize_t, 3> &inputDims, + DimSize_t outChannels, + const void *input_, + const void *weights_, + const void *biases_, + void *output_) { // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); const W *weights = static_cast<const W *>(weights_); @@ -56,10 +56,13 @@ void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, O *output = static_cast<O *>(output_); // output H size - const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilationDims[0]*(kernelDims[0] - 1) - 1 + strideDims[0]) / - static_cast<float>(strideDims[0]))); - const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1; + const std::size_t oxSize = static_cast<std::size_t>( + std::floor(static_cast<float>(inputDims[2] - + dilationDims[0] * (kernelDims[0] - 1) - + 1 + strideDims[0]) / + static_cast<float>(strideDims[0]))); + const DimSize_t dilated_kernel_x = + dilationDims[0] * (kernelDims[0] - 1) + 1; // TODO: kernel computation // output (batch, outCh, Xout, Yout) @@ -69,25 +72,37 @@ void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, using signedsize = std::make_signed<std::size_t>::type; for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { - const std::size_t oIndex = (outCh + batch*outChannels) * oxSize; + const std::size_t oIndex = (outCh + batch * outChannels) * oxSize; // If bias = nullptr, set B(0) B biasVal = (biases != nullptr) ? biases[outCh] : B(0); - std::fill(output + oIndex, output+(oIndex+oxSize), biasVal); + std::fill(output + oIndex, output + (oIndex + oxSize), biasVal); for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) { - const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2]; - const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0]; + const std::size_t iIndex = + (inCh + batch * inputDims[1]) * inputDims[2]; + const std::size_t wIndex = + (inCh + outCh * inputDims[1]) * kernelDims[0]; for (std::size_t ox = 0; ox < oxSize; ++ox) { - // const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); - // const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - // const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + // const signedsize difx = static_cast<signedsize>(- ox * + // strideDims[0]); const std::size_t sxMin = + // static_cast<std::size_t>(std::max(difx, signedsize(0))); + // const std::size_t sxMax = + // (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : + // ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : + // inputDims[2] + difx); const std::size_t sxMin = 0; const std::size_t sxMax = dilated_kernel_x; const std::size_t oIndexFull = oIndex + ox; - const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); + const signedsize ix = + static_cast<signedsize>(ox * strideDims[0]); - for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) { - output[oIndexFull] += weights[wIndex + sx] * - input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))]; + for (std::size_t sx = sxMin; sx * dilationDims[0] < sxMax; + ++sx) { + output[oIndexFull] += + weights[wIndex + sx] * + input[iIndex + + static_cast<std::size_t>( + ix + static_cast<signedsize>( + sx * dilationDims[0]))]; } } } @@ -97,18 +112,32 @@ void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, // Kernels registration to implementation entry point REGISTRAR(ConvImpl1D_cpu, - {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<float, float, float, float>, nullptr}); -REGISTRAR(ConvImpl1D_cpu, - {{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr}); + {{DataType::Any, DataFormat::NCHW}, + {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + Aidge::ConvImpl1D_cpu_forward_kernel<float, float, float, float>, + nullptr}); REGISTRAR(ConvImpl1D_cpu, + {{DataType::Any, DataFormat::NCHW}, + {DataType::Float16, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + Aidge::ConvImpl1D_cpu_forward_kernel<half_float::half, + half_float::half, + half_float::half, + half_float::half>, + nullptr}); +REGISTRAR( + ConvImpl1D_cpu, {{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>, nullptr}); -REGISTRAR(ConvImpl1D_cpu, + {ProdConso::inPlaceModel, + Aidge::ConvImpl1D_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>, + nullptr}); +REGISTRAR( + ConvImpl1D_cpu, {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<double, double, double, double>, nullptr}); - + {ProdConso::inPlaceModel, + Aidge::ConvImpl1D_cpu_forward_kernel<double, double, double, double>, + nullptr}); /** * @brief Forward kernel for 2D Convolution on CPU backend. @@ -124,16 +153,16 @@ REGISTRAR(ConvImpl1D_cpu, * @param output_ Output Tensor. */ template <class I, class W, class B, class O> -void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, - const std::array<DimSize_t, 2>& dilationDims, - const std::array<DimSize_t, 2>& kernelDims, - const std::array<DimSize_t, 4> &inputDims, - DimSize_t outChannels, - const void *input_, - const void *weights_, - const void *biases_, - void *output_) -{ +void ConvImpl2D_cpu_forward_kernel( + const std::array<DimSize_t, 2> &strideDims, + const std::array<DimSize_t, 2> &dilationDims, + const std::array<DimSize_t, 2> &kernelDims, + const std::array<DimSize_t, 4> &inputDims, + DimSize_t outChannels, + const void *input_, + const void *weights_, + const void *biases_, + void *output_) { // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); const W *weights = static_cast<const W *>(weights_); @@ -141,23 +170,24 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, O *output = static_cast<O *>(output_); // output H size - const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1; - const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) / - static_cast<float>(strideDims[0]))); + const DimSize_t dilated_kernel_x = + dilationDims[0] * (kernelDims[0] - 1) + 1; + const std::size_t oxSize = static_cast<std::size_t>(std::floor( + static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) / + static_cast<float>(strideDims[0]))); // output W size - const DimSize_t dilated_kernel_y = dilationDims[1]*(kernelDims[1] - 1) + 1; - const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - dilated_kernel_y + strideDims[1]) / - static_cast<float>(strideDims[1]))); - + const DimSize_t dilated_kernel_y = + dilationDims[1] * (kernelDims[1] - 1) + 1; + const std::size_t oySize = static_cast<std::size_t>(std::floor( + static_cast<float>(inputDims[3] - dilated_kernel_y + strideDims[1]) / + static_cast<float>(strideDims[1]))); // TODO: kernel computation // output (batch, outCh, Xout, Yout) // input (batch, inCh, Xin, Yin) // weight (outCh, inCh, kernelX, kernelY) // does not take Dilation attribute into account - const std::size_t outChannels_s = oxSize * oySize; + const std::size_t outChannels_s = oxSize * oySize; using signedsize = std::make_signed<std::size_t>::type; if (dilated_kernel_x == 3 && dilated_kernel_y == 3) { @@ -165,36 +195,73 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { // If bias = nullptr, set B(0) B biasVal = (biases != nullptr) ? biases[outCh] : B(0); - std::fill(output, output+outChannels_s, biasVal); + std::fill(output, output + outChannels_s, biasVal); for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) { - std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3]; - const std::size_t wIndex = (inCh + outCh*inputDims[1]) * 9; - if (strideDims[0] == 1 && strideDims[1]==1) { - for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=inputDims[3]) { + std::size_t iIndex = (inCh + batch * inputDims[1]) * + inputDims[2] * inputDims[3]; + const std::size_t wIndex = + (inCh + outCh * inputDims[1]) * 9; + if (strideDims[0] == 1 && strideDims[1] == 1) { + for (std::size_t ox = 0, oIndex = 0; ox < oxSize; + ++ox, oIndex += oySize, iIndex -= inputDims[3]) { for (std::size_t oy = 0; oy < oySize; ++oy) { - output[oIndex + oy] += weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+1]+weights[wIndex+2]*input[iIndex+oy+2]; + output[oIndex + oy] += + weights[wIndex + 0] * input[iIndex + oy] + + weights[wIndex + 1] * + input[iIndex + oy + 1] + + weights[wIndex + 2] * + input[iIndex + oy + 2]; } - iIndex+=inputDims[3]; + iIndex += inputDims[3]; for (std::size_t oy = 0; oy < oySize; ++oy) { - output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+1]+weights[wIndex+5]*input[iIndex+oy+2]; + output[oIndex + oy] += + weights[wIndex + 3] * input[iIndex + oy] + + weights[wIndex + 4] * + input[iIndex + oy + 1] + + weights[wIndex + 5] * + input[iIndex + oy + 2]; } - iIndex+=inputDims[3]; + iIndex += inputDims[3]; for (std::size_t oy = 0; oy < oySize; ++oy) { - output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+1]+weights[wIndex+8]*input[iIndex+oy+2]; + output[oIndex + oy] += + weights[wIndex + 6] * input[iIndex + oy] + + weights[wIndex + 7] * + input[iIndex + oy + 1] + + weights[wIndex + 8] * + input[iIndex + oy + 2]; } } } else { - for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=strideDims[0]*inputDims[3]) { + for (std::size_t ox = 0, oIndex = 0; ox < oxSize; + ++ox, + oIndex += oySize, + iIndex -= + strideDims[0] * inputDims[3]) { for (std::size_t oy = 0; oy < oySize; ++oy) { - output[oIndex + oy] += weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+strideDims[0]]+weights[wIndex+2]*input[iIndex+oy+strideDims[0]*2]; + output[oIndex + oy] += + weights[wIndex + 0] * input[iIndex + oy] + + weights[wIndex + 1] * + input[iIndex + oy + strideDims[0]] + + weights[wIndex + 2] * + input[iIndex + oy + strideDims[0] * 2]; } - iIndex+=strideDims[0]*inputDims[3]; + iIndex += strideDims[0] * inputDims[3]; for (std::size_t oy = 0; oy < oySize; ++oy) { - output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+strideDims[0]]+weights[wIndex+5]*input[iIndex+oy+strideDims[0]*2]; + output[oIndex + oy] += + weights[wIndex + 3] * input[iIndex + oy] + + weights[wIndex + 4] * + input[iIndex + oy + strideDims[0]] + + weights[wIndex + 5] * + input[iIndex + oy + strideDims[0] * 2]; } - iIndex+=strideDims[0]*inputDims[3]; + iIndex += strideDims[0] * inputDims[3]; for (std::size_t oy = 0; oy < oySize; ++oy) { - output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+strideDims[0]]+weights[wIndex+8]*input[iIndex+oy+strideDims[0]*2]; + output[oIndex + oy] += + weights[wIndex + 6] * input[iIndex + oy] + + weights[wIndex + 7] * + input[iIndex + oy + strideDims[0]] + + weights[wIndex + 8] * + input[iIndex + oy + strideDims[0] * 2]; } } } @@ -207,18 +274,26 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { // If bias = nullptr, set B(0) B biasVal = (biases != nullptr) ? biases[outCh] : B(0); - std::fill(output, output+outChannels_s, biasVal); + std::fill(output, output + outChannels_s, biasVal); for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) { - std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3]; - const std::size_t wIndex = (inCh + outCh*inputDims[1]); + std::size_t iIndex = (inCh + batch * inputDims[1]) * + inputDims[2] * inputDims[3]; + const std::size_t wIndex = (inCh + outCh * inputDims[1]); if (strideDims[0] == 1 && strideDims[1] == 1) { - for (std::size_t oIndex = 0; oIndex < oxSize*oySize; ++oIndex, ++iIndex) { + for (std::size_t oIndex = 0; oIndex < oxSize * oySize; + ++oIndex, ++iIndex) { output[oIndex] += weights[wIndex] * input[iIndex]; } - } else { - for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex+=inputDims[3]*strideDims[0]) { - for (std::size_t oy = 0, iy = 0; oy < oySize; ++oy, iy+=strideDims[1]) { - output[oIndex + oy] += weights[wIndex+0]*input[iIndex+iy]; + } else { + for (std::size_t ox = 0, oIndex = 0; ox < oxSize; + ++ox, + oIndex += oySize, + iIndex += + inputDims[3] * strideDims[0]) { + for (std::size_t oy = 0, iy = 0; oy < oySize; + ++oy, iy += strideDims[1]) { + output[oIndex + oy] += + weights[wIndex + 0] * input[iIndex + iy]; } } } @@ -231,21 +306,36 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { // If bias = nullptr, set B(0) B biasVal = (biases != nullptr) ? biases[outCh] : B(0); - std::fill(output, output+outChannels_s, biasVal); + std::fill(output, output + outChannels_s, biasVal); for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) { - std::size_t iIndex_channel = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3]; - const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1]; + std::size_t iIndex_channel = + (inCh + batch * inputDims[1]) * inputDims[2] * + inputDims[3]; + const std::size_t wIndex = (inCh + outCh * inputDims[1]) * + kernelDims[0] * kernelDims[1]; // loop over each ouput line - for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex_channel+=inputDims[3]*strideDims[0]) { + for (std::size_t ox = 0, oIndex = 0; ox < oxSize; + ++ox, + oIndex += oySize, + iIndex_channel += + inputDims[3] * strideDims[0]) { // loop over associated input line - for (std::size_t ky = 0, ix = 0; ky < kernelDims[0]; ++ky, ix += inputDims[3]*dilationDims[0]) { + for (std::size_t ky = 0, ix = 0; ky < kernelDims[0]; + ++ky, ix += inputDims[3] * dilationDims[0]) { // loop over the entire line - for (std::size_t oy = 0, iy = 0; oy < oySize; ++oy, iy+=strideDims[1]) { - const std::size_t iIndex = iIndex_channel + ix + iy; - // loop over elements assosicated with one output - for (std::size_t kx = 0; kx < kernelDims[0]; ++kx) { - output[oIndex + oy] += weights[wIndex+kernelDims[0]*ky+kx]*input[iIndex+kx*dilationDims[1]]; + for (std::size_t oy = 0, iy = 0; oy < oySize; + ++oy, iy += strideDims[1]) { + const std::size_t iIndex = + iIndex_channel + ix + iy; + // loop over elements assosicated with one + // output + for (std::size_t kx = 0; kx < kernelDims[0]; + ++kx) { + output[oIndex + oy] += + weights[wIndex + kernelDims[0] * ky + + kx] * + input[iIndex + kx * dilationDims[1]]; } } } @@ -257,21 +347,34 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, } } - - // Kernels registration to implementation entry point REGISTRAR(ConvImpl2D_cpu, - {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>, nullptr}); -REGISTRAR(ConvImpl2D_cpu, - {{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr}); + {{DataType::Any, DataFormat::NCHW}, + {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>, + nullptr}); REGISTRAR(ConvImpl2D_cpu, + {{DataType::Any, DataFormat::NCHW}, + {DataType::Float16, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + Aidge::ConvImpl2D_cpu_forward_kernel<half_float::half, + half_float::half, + half_float::half, + half_float::half>, + nullptr}); +REGISTRAR( + ConvImpl2D_cpu, {{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>, nullptr}); -REGISTRAR(ConvImpl2D_cpu, + {ProdConso::inPlaceModel, + Aidge::ConvImpl2D_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>, + nullptr}); +REGISTRAR( + ConvImpl2D_cpu, {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, - {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>, nullptr}); -} // namespace Aidge + {ProdConso::inPlaceModel, + Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>, + nullptr}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/DivImpl.hpp b/include/aidge/backend/cpu/operator/DivImpl.hpp index 40c1b678a78713d6c3b27629ae898c715797b9b2..eb6a4715a37ed48662a6827b03ddc0ebacb60fe4 100644 --- a/include/aidge/backend/cpu/operator/DivImpl.hpp +++ b/include/aidge/backend/cpu/operator/DivImpl.hpp @@ -24,10 +24,15 @@ namespace Aidge { // Operator implementation entry point for the backend using DivImpl_cpu = OperatorImpl_cpu<Div_Op, - void(const std::size_t, const std::size_t, const std::size_t, const void*, const void*,void*)>; + void(const std::size_t, + const std::size_t, + const std::size_t, + const void *, + const void *, + void *)>; // Implementation entry point registration to Operator REGISTRAR(Div_Op, "cpu", Aidge::DivImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_DIVIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/DivImpl_kernels.hpp b/include/aidge/backend/cpu/operator/DivImpl_kernels.hpp index ed6e55a79acbe23a689a67c22477f64f785a3aef..16e36f1945edfb2fea4586429fd563a367a93e82 100644 --- a/include/aidge/backend/cpu/operator/DivImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/DivImpl_kernels.hpp @@ -12,10 +12,10 @@ #ifndef AIDGE_CPU_OPERATOR_DIVIMPL_KERNELS_H_ #define AIDGE_CPU_OPERATOR_DIVIMPL_KERNELS_H_ -#include <numeric> // std::accumulate -#include <cstddef> // std::size_t -#include <cstdint> // std::int32_t, std::int64_t -#include <functional> // std::multiplies +#include <cstddef> // std::size_t +#include <cstdint> // std::int32_t, std::int64_t +#include <functional> // std::multiplies +#include <numeric> // std::accumulate #include "aidge/utils/Registrar.hpp" @@ -35,11 +35,13 @@ namespace Aidge { // const I2* input_2 = static_cast<const I2*>(input2_); // O* output = static_cast<O*>(output_); -// const std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>()); +// const std::size_t totalElements = std::accumulate(outputDims.cbegin(), +// outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>()); // for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) // { -// std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, oIndex); +// std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, +// oIndex); // std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); // std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); @@ -51,17 +53,17 @@ namespace Aidge { template <class I1, class I2, class O> constexpr void DivImpl_cpu_forward_kernel(const std::size_t input1size_, - const std::size_t input2size_, - const std::size_t output1size_, - const void* input1_, - const void* input2_, - void* output_) { + const std::size_t input2size_, + const std::size_t output1size_, + const void *input1_, + const void *input2_, + void *output_) { - const I1* input_1 = static_cast<const I1*>(input1_); - const I2* input_2 = static_cast<const I2*>(input2_); - O* output = static_cast<O*>(output_); + const I1 *input_1 = static_cast<const I1 *>(input1_); + const I2 *input_2 = static_cast<const I2 *>(input2_); + O *output = static_cast<O *>(output_); -// suppose values are contiguous in memory + // suppose values are contiguous in memory for (std::size_t i = 0; i < output1size_; ++i) { const std::size_t in1_id = (input1size_ != 1) ? i : 0; const std::size_t in2_id = (input2size_ != 1) ? i : 0; @@ -71,14 +73,22 @@ constexpr void DivImpl_cpu_forward_kernel(const std::size_t input1size_, // Kernels registration to implementation entry point REGISTRAR(DivImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<float, float, float>, nullptr}); + {DataType::Float32}, + {ProdConso::inPlaceModel, + Aidge::DivImpl_cpu_forward_kernel<float, float, float>, + nullptr}); REGISTRAR(DivImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<double, double, double>, nullptr}); + {DataType::Float64}, + {ProdConso::inPlaceModel, + Aidge::DivImpl_cpu_forward_kernel<double, double, double>, + nullptr}); REGISTRAR(DivImpl_cpu, - {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, nullptr}); -} // namespace Aidge + {DataType::Int32}, + {ProdConso::inPlaceModel, + Aidge::DivImpl_cpu_forward_kernel<std::int32_t, + std::int32_t, + std::int32_t>, + nullptr}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_DIVIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/ErfImpl.hpp b/include/aidge/backend/cpu/operator/ErfImpl.hpp index 3d2835600367e81499cbe6af81a8475a0cd1b61e..bec2031c0947759f27b77101c5e0ea64dc9fb6da 100644 --- a/include/aidge/backend/cpu/operator/ErfImpl.hpp +++ b/include/aidge/backend/cpu/operator/ErfImpl.hpp @@ -21,11 +21,11 @@ namespace Aidge { // Operator implementation entry point for the backend -using ErfImpl_cpu = OperatorImpl_cpu<Erf_Op, - void(const std::size_t, const void*, void*)>; +using ErfImpl_cpu = + OperatorImpl_cpu<Erf_Op, void(const std::size_t, const void *, void *)>; // Implementation entry point registration to Operator REGISTRAR(Erf_Op, "cpu", Aidge::ErfImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_ERFIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ErfImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ErfImpl_kernels.hpp index 02041f55ce9a1b2476db575b40340b1bb6517ce1..386837b58a61c2f5748cdca4faad9904fce33aa4 100644 --- a/include/aidge/backend/cpu/operator/ErfImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ErfImpl_kernels.hpp @@ -21,11 +21,11 @@ namespace Aidge { template <class I, class O> void ErfImpl_cpu_forward_kernel(std::size_t inputLenght, - const void* input_, - void* output_) { + const void *input_, + void *output_) { - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); for (std::size_t i = 0; i < inputLenght; ++i) { output[i] = std::erf(input[i]); @@ -34,14 +34,20 @@ void ErfImpl_cpu_forward_kernel(std::size_t inputLenght, // Kernels registration to implementation entry point REGISTRAR(ErfImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::ErfImpl_cpu_forward_kernel<float, float>, nullptr}); + {DataType::Float32}, + {ProdConso::inPlaceModel, + Aidge::ErfImpl_cpu_forward_kernel<float, float>, + nullptr}); REGISTRAR(ErfImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::ErfImpl_cpu_forward_kernel<double, double>, nullptr}); + {DataType::Float64}, + {ProdConso::inPlaceModel, + Aidge::ErfImpl_cpu_forward_kernel<double, double>, + nullptr}); REGISTRAR(ErfImpl_cpu, - {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::ErfImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr}); -} // namespace Aidge + {DataType::Int32}, + {ProdConso::inPlaceModel, + Aidge::ErfImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, + nullptr}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_ERFIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/FCImpl.hpp b/include/aidge/backend/cpu/operator/FCImpl.hpp index e82352d9cba60440efef87faf97dfd4ed66565b6..6b42aff87edcd0ade8e866f3595d93d250fb0544 100644 --- a/include/aidge/backend/cpu/operator/FCImpl.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl.hpp @@ -24,25 +24,25 @@ namespace Aidge { // Operator implementation entry point for the backend using FCImpl_cpu = OperatorImpl_cpu<FC_Op, - void(const DimSize_t, - const DimSize_t, - const DimSize_t, - const void *, - const void *, - const void *, - void *), - void(const DimSize_t, - const DimSize_t, - const DimSize_t, - const void *, - const void *, - const void *, - void *, - void *, - void *)>; + void(const DimSize_t, + const DimSize_t, + const DimSize_t, + const void *, + const void *, + const void *, + void *), + void(const DimSize_t, + const DimSize_t, + const DimSize_t, + const void *, + const void *, + const void *, + void *, + void *, + void *)>; // Implementation entry point registration to Operator REGISTRAR(FC_Op, "cpu", Aidge::FCImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_FCIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/FCImpl_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_kernels.hpp index c57f86e6ac6e74acebb48f471991e7181920f7c3..28f09794a7b44fed9cd950c040bda50f9dbf6aee 100644 --- a/include/aidge/backend/cpu/operator/FCImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl_kernels.hpp @@ -19,8 +19,10 @@ namespace Aidge { // template <class I, class W, class B, class O> -// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const std::array<DimSize_t, 4>& dims, -// const void* input_, const void* weights_, const void* biases_, void* output_) { +// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const +// std::array<DimSize_t, 4>& dims, +// const void* input_, const void* weights_, +// const void* biases_, void* output_) { // // FIXME: missing FC attributes as arguments // const I* input = static_cast<const I*>(input_); // const W* weights = static_cast<const W*>(weights_); @@ -38,13 +40,18 @@ namespace Aidge { // for (std::size_t ix = 0; ix < dims[0]; ++ix) { // for (std::size_t iy = 0; iy < dims[1]; ++iy) { // for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) { -// const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix)); -// for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) { +// const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + +// dims[1] * ix)); for (std::size_t outCh = 0; outCh < +// outputFeatureSize; ++outCh) { // const std::size_t oIndex = dims[3] * outCh; -// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * outputFeatureSize + -// outCh; // (iIndex*outputFeatureSize + oIndex)/dims[3]; +// const std::size_t wIndex = (inCh + dims[2] * (iy + +// dims[1] * ix)) * outputFeatureSize + +// outCh; // +// (iIndex*outputFeatureSize + +// oIndex)/dims[3]; // for (std::size_t batch = 0; batch < dims[3]; ++batch) { -// output[oIndex + batch] += weights[wIndex] * input[iIndex + batch]; +// output[oIndex + batch] += weights[wIndex] * +// input[iIndex + batch]; // } // } // } @@ -53,8 +60,10 @@ namespace Aidge { // } // template <class I, class W, class B, class O> -// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const std::array<DimSize_t, 2>& dims, -// const void* input_, const void* weights_, const void* biases_, void* output_) { +// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const +// std::array<DimSize_t, 2>& dims, +// const void* input_, const void* weights_, +// const void* biases_, void* output_) { // // FIXME: missing FC attributes as arguments // const I* input = static_cast<const I*>(input_); // const W* weights = static_cast<const W*>(weights_); @@ -74,9 +83,11 @@ namespace Aidge { // for (std::size_t batch = 0; batch < dims[0]; ++batch) { // const std::size_t oIndex = dims[1] * batch; // for (std::size_t i = 0; i < dims[1]; ++i) { -// for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) { -// std::size_t wIndex = i * outputFeatureSize + outCh; // (iIndex*outputFeatureSize + oIndex)/dims[3]; -// output[oIndex + outCh] += weights[wIndex] * input[i + batch]; +// for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) +// { +// std::size_t wIndex = i * outputFeatureSize + outCh; // +// (iIndex*outputFeatureSize + oIndex)/dims[3]; output[oIndex + +// outCh] += weights[wIndex] * input[i + batch]; // } // } // } @@ -84,33 +95,35 @@ namespace Aidge { template <class I, class W, class B, class O> void FCImpl_cpu_forward_kernel(const DimSize_t batchSize, - const DimSize_t inputFeatureSize, - const DimSize_t outputFeatureSize, - const void* input_, - const void* weights_, - const void* biases_, - void* output_) { + const DimSize_t inputFeatureSize, + const DimSize_t outputFeatureSize, + const void *input_, + const void *weights_, + const void *biases_, + void *output_) { // FIXME: missing FC attributes as arguments - const I* input = static_cast<const I*>(input_); - const W* weights = static_cast<const W*>(weights_); - const B* biases = static_cast<const B*>(biases_); - O* output = static_cast<O*>(output_); + const I *input = static_cast<const I *>(input_); + const W *weights = static_cast<const W *>(weights_); + const B *biases = static_cast<const B *>(biases_); + O *output = static_cast<O *>(output_); if (biases == nullptr) { - std::fill(output, output+(batchSize*outputFeatureSize), B(0)); - } - else { + std::fill(output, output + (batchSize * outputFeatureSize), B(0)); + } else { for (std::size_t batch = 0; batch < batchSize; ++batch) { - std::copy(biases, biases+outputFeatureSize, output+(batch*outputFeatureSize)); + std::copy(biases, + biases + outputFeatureSize, + output + (batch * outputFeatureSize)); } } for (std::size_t batch = 0; batch < batchSize; ++batch) { for (std::size_t out = 0; out < outputFeatureSize; ++out) { - output[out + batch*outputFeatureSize] = std::inner_product(input + batch*inputFeatureSize, - input + (batch + 1)*inputFeatureSize, - weights + out*inputFeatureSize, - output[out + batch*outputFeatureSize]); + output[out + batch * outputFeatureSize] = + std::inner_product(input + batch * inputFeatureSize, + input + (batch + 1) * inputFeatureSize, + weights + out * inputFeatureSize, + output[out + batch * outputFeatureSize]); } } } @@ -119,30 +132,28 @@ template <class I, class O, class W, class B> void FCImpl_cpu_backward_kernel(const DimSize_t batchSize, const DimSize_t inputFeatureSize, const DimSize_t outputFeatureSize, - const void* input_, - const void* originalInput_, - const void* weight_, - void* output_, - void* weightGrad_, - void* biasesGrad_) -{ + const void *input_, + const void *originalInput_, + const void *weight_, + void *output_, + void *weightGrad_, + void *biasesGrad_) { // FIXME: missing FC attributes as arguments - const I* input = static_cast<const I*>(input_); - const I* originalInput = static_cast<const I*>(originalInput_); - const W* weight = static_cast<const W*>(weight_); - O* output = static_cast<O*>(output_); - W* weightGrad = static_cast<W*>(weightGrad_); - B* biasesGrad = static_cast<B*>(biasesGrad_); - + const I *input = static_cast<const I *>(input_); + const I *originalInput = static_cast<const I *>(originalInput_); + const W *weight = static_cast<const W *>(weight_); + O *output = static_cast<O *>(output_); + W *weightGrad = static_cast<W *>(weightGrad_); + B *biasesGrad = static_cast<B *>(biasesGrad_); // bias grad - if (biasesGrad == nullptr) { // no bias + if (biasesGrad == nullptr) { // no bias std::fill(biasesGrad, biasesGrad + outputFeatureSize, B(0)); } else { for (std::size_t o = 0; o < outputFeatureSize; ++o) { // nb outputs B sum{0}; for (std::size_t b = 0; b < batchSize; ++b) { - sum += input[b*outputFeatureSize + o]; + sum += input[b * outputFeatureSize + o]; } biasesGrad[o] = sum; } @@ -153,9 +164,10 @@ void FCImpl_cpu_backward_kernel(const DimSize_t batchSize, for (std::size_t c = 0; c < inputFeatureSize; ++c) { W sum{0}; for (std::size_t b = 0; b < batchSize; ++b) { - sum += originalInput[b*inputFeatureSize + c]*input[b*outputFeatureSize + o]; + sum += originalInput[b * inputFeatureSize + c] * + input[b * outputFeatureSize + o]; } - weightGrad[o*inputFeatureSize + c] = sum; + weightGrad[o * inputFeatureSize + c] = sum; } } @@ -164,23 +176,33 @@ void FCImpl_cpu_backward_kernel(const DimSize_t batchSize, for (std::size_t c = 0; c < inputFeatureSize; ++c) { O sum{0}; for (std::size_t o = 0; o < outputFeatureSize; ++o) { - sum += weight[o*inputFeatureSize + c] * input[b*outputFeatureSize + o]; + sum += weight[o * inputFeatureSize + c] * + input[b * outputFeatureSize + o]; } - output[b*inputFeatureSize + c] = sum; + output[b * inputFeatureSize + c] = sum; } } } // Kernels registration to implementation entry point REGISTRAR(FCImpl_cpu, - {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}}, - {ProdConso::defaultModel, Aidge::FCImpl_cpu_forward_kernel<float, float, float, float>, Aidge::FCImpl_cpu_backward_kernel<float, float, float, float>}); -REGISTRAR(FCImpl_cpu, - {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}}, - {ProdConso::defaultModel, Aidge::FCImpl_cpu_forward_kernel<double, double, double, double>, Aidge::FCImpl_cpu_backward_kernel<double, double, double, double>}); + {ImplSpec::IOSpec{DataType::Any}, + ImplSpec::IOSpec{DataType::Float32}}, + {ProdConso::defaultModel, + Aidge::FCImpl_cpu_forward_kernel<float, float, float, float>, + Aidge::FCImpl_cpu_backward_kernel<float, float, float, float>}); REGISTRAR(FCImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, + ImplSpec::IOSpec{DataType::Float64}}, + {ProdConso::defaultModel, + Aidge::FCImpl_cpu_forward_kernel<double, double, double, double>, + Aidge::FCImpl_cpu_backward_kernel<double, double, double, double>}); +REGISTRAR( + FCImpl_cpu, {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}}, - {ProdConso::defaultModel, Aidge::FCImpl_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>, Aidge::FCImpl_cpu_backward_kernel<int32_t, int32_t, int32_t, int32_t>}); -} // namespace Aidge + {ProdConso::defaultModel, + Aidge::FCImpl_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>, + Aidge::FCImpl_cpu_backward_kernel<int32_t, int32_t, int32_t, int32_t>}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_FCIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/FoldImpl.hpp b/include/aidge/backend/cpu/operator/FoldImpl.hpp index 94ddbdcba8e33e12108968d536037ab1ccab2c8d..ea8f53405aeb5039c3366ee7d605fd5f5fd4fb7b 100644 --- a/include/aidge/backend/cpu/operator/FoldImpl.hpp +++ b/include/aidge/backend/cpu/operator/FoldImpl.hpp @@ -17,26 +17,26 @@ #include <tuple> #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Fold.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { // Operator implementation entry point for the backend using Fold2D_Op = Fold_Op<2>; using FoldImpl2D_cpu = OperatorImpl_cpu<Fold_Op<2>, - void(const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 2>&, - const std::vector<DimSize_t> &, - const void *, - void *)>; + void(const std::array<DimSize_t, 2> &, + const std::array<DimSize_t, 2> &, + const std::array<DimSize_t, 2> &, + const std::array<DimSize_t, 2> &, + const std::vector<DimSize_t> &, + const void *, + void *)>; // Implementation entry point registration to Operator REGISTRAR(Fold2D_Op, "cpu", Aidge::FoldImpl2D_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_FOLDIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/FoldImpl_kernels.hpp b/include/aidge/backend/cpu/operator/FoldImpl_kernels.hpp index 8cced8958f49f1cc4215c7cf463cc3391fb29246..7427c253f10d94ba3a54881e09eb4de83b0449f2 100644 --- a/include/aidge/backend/cpu/operator/FoldImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/FoldImpl_kernels.hpp @@ -14,38 +14,41 @@ #include "aidge/utils/Registrar.hpp" +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/FoldImpl.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" -#include <cmath> -#include <array> #include <algorithm> +#include <array> +#include <cmath> namespace Aidge { template <class I, class O> -void FoldImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& outputDims, - const std::array<DimSize_t, 2>& strideDims, - const std::array<DimSize_t, 2>& dilationDims, - const std::array<DimSize_t, 2>& kernelDims, - const std::vector<DimSize_t> &dims, - const void *input_, void *output_) -{ +void FoldImpl2D_cpu_forward_kernel( + const std::array<DimSize_t, 2> &outputDims, + const std::array<DimSize_t, 2> &strideDims, + const std::array<DimSize_t, 2> &dilationDims, + const std::array<DimSize_t, 2> &kernelDims, + const std::vector<DimSize_t> &dims, + const void *input_, + void *output_) { const I *input = static_cast<const I *>(input_); O *output = static_cast<O *>(output_); const DimSize_t inHeight = outputDims[0]; const DimSize_t inWidth = outputDims[1]; - const DimSize_t kernelExtentHeight = dilationDims[0] * - (kernelDims[0] - 1) + 1; - const DimSize_t outHeight = 1 + static_cast<DimSize_t>( - floor(static_cast<float>(inHeight - kernelExtentHeight) / - static_cast<float>(strideDims[0]))); - const DimSize_t kernelExtentWidth = dilationDims[1] * - (kernelDims[1] - 1) + 1; - const DimSize_t outWidth = 1 + static_cast<DimSize_t>( - floor(static_cast<float>(inWidth - kernelExtentWidth) / - static_cast<float>(strideDims[1]))); + const DimSize_t kernelExtentHeight = + dilationDims[0] * (kernelDims[0] - 1) + 1; + const DimSize_t outHeight = + 1 + static_cast<DimSize_t>( + floor(static_cast<float>(inHeight - kernelExtentHeight) / + static_cast<float>(strideDims[0]))); + const DimSize_t kernelExtentWidth = + dilationDims[1] * (kernelDims[1] - 1) + 1; + const DimSize_t outWidth = + 1 + static_cast<DimSize_t>( + floor(static_cast<float>(inWidth - kernelExtentWidth) / + static_cast<float>(strideDims[1]))); const DimSize_t outChannels = dims[dims.size() - 2]; const DimSize_t inChannels = outChannels / kernelDims[0] / kernelDims[1]; @@ -58,13 +61,19 @@ void FoldImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& outputDims, const auto inC = outC / kernelDims[0] / kernelDims[1]; for (DimSize_t outH = 0; outH < outHeight; ++outH) { - const auto inH = outH * strideDims[0] + inOffsetH * dilationDims[0]; + const auto inH = + outH * strideDims[0] + inOffsetH * dilationDims[0]; for (DimSize_t outW = 0; outW < outWidth; ++outW) { - const auto inW = outW * strideDims[1] + inOffsetW * dilationDims[1]; + const auto inW = + outW * strideDims[1] + inOffsetW * dilationDims[1]; - output[((n * inChannels + inC) * inHeight + inH) * inWidth + inW] += - input[((n * outChannels + outC) * outHeight + outH) * outWidth + outW]; + output[((n * inChannels + inC) * inHeight + inH) * + inWidth + + inW] += + input[((n * outChannels + outC) * outHeight + outH) * + outWidth + + outW]; } } } @@ -73,14 +82,20 @@ void FoldImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& outputDims, // Kernels registration to implementation entry point REGISTRAR(FoldImpl2D_cpu, - {DataType::Float32}, - {ProdConso::defaultModel, Aidge::FoldImpl2D_cpu_forward_kernel<float, float>, nullptr}); + {DataType::Float32}, + {ProdConso::defaultModel, + Aidge::FoldImpl2D_cpu_forward_kernel<float, float>, + nullptr}); REGISTRAR(FoldImpl2D_cpu, - {DataType::Float64}, - {ProdConso::defaultModel, Aidge::FoldImpl2D_cpu_forward_kernel<double, double>, nullptr}); + {DataType::Float64}, + {ProdConso::defaultModel, + Aidge::FoldImpl2D_cpu_forward_kernel<double, double>, + nullptr}); REGISTRAR(FoldImpl2D_cpu, - {DataType::Int32}, - {ProdConso::defaultModel, Aidge::FoldImpl2D_cpu_forward_kernel<int32_t, int32_t>, nullptr}); -} // namespace Aidge + {DataType::Int32}, + {ProdConso::defaultModel, + Aidge::FoldImpl2D_cpu_forward_kernel<int32_t, int32_t>, + nullptr}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_FOLDIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp index 4e04b1a595a8660b1528e49921e7e3e7a567829a..2c39b8af09f292a3389bfdfe102984d9e5375de7 100644 --- a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp +++ b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp @@ -22,11 +22,14 @@ namespace Aidge { // Operator implementation entry point for the backend -using GlobalAveragePoolingImpl_cpu = OperatorImpl_cpu<GlobalAveragePooling_Op, +using GlobalAveragePoolingImpl_cpu = OperatorImpl_cpu< + GlobalAveragePooling_Op, void(const std::vector<DimSize_t> &, const void *, void *)>; // Implementation entry point registration to Operator -REGISTRAR(GlobalAveragePooling_Op, "cpu", Aidge::GlobalAveragePoolingImpl_cpu::create); +REGISTRAR(GlobalAveragePooling_Op, + "cpu", + Aidge::GlobalAveragePoolingImpl_cpu::create); } // namespace Aidge #endif /* _AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp index ed838a94cc0c0238a870427c3b774b29f7818b09..f4aee4a2cba7bbf0d9051f12e366b1cc86ecb520 100644 --- a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp @@ -13,8 +13,8 @@ #define AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_KERNELS_H_ #include <cstddef> -#include <functional> // std::multiplies -#include <numeric> // std::accumulate +#include <functional> // std::multiplies +#include <numeric> // std::accumulate #include <vector> #include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp" @@ -23,52 +23,64 @@ #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" - namespace Aidge { template <class I, class O> void GlobalAveragePoolingImpl_cpu_forward_kernel( - const std::vector<DimSize_t> &dims, const void *input_, void *output_) { - // error checking - AIDGE_ASSERT(dims.size() >= 3,"GlobalAveragePool needs at least a 3 dimensions " + const std::vector<DimSize_t> &dims, + const void *input_, + void *output_) { + // error checking + AIDGE_ASSERT(dims.size() >= 3, + "GlobalAveragePool needs at least a 3 dimensions " "input, number of input dim : {}", dims.size()); - // computation - const I *input = static_cast<const I *>(input_); - O *output = static_cast<O *>(output_); + // computation + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); - DimSize_t nb_elems = std::accumulate(dims.begin(), dims.end(), std::size_t(1), - std::multiplies<std::size_t>()); + DimSize_t nb_elems = std::accumulate(dims.begin(), + dims.end(), + std::size_t(1), + std::multiplies<std::size_t>()); - const DimSize_t in_batch_nb_elems{nb_elems / dims[0]}; - const DimSize_t in_channel_nb_elems{in_batch_nb_elems / dims[1]}; - const DimSize_t out_batch_nb_elems{dims[1]}; - // parse channel by channel and fill each output with the average of the - // values in the channel - for (DimSize_t batch = 0; batch < dims[0]; ++batch) { - for (DimSize_t channel = 0; channel < dims[1]; ++channel) { - const I *filter_start = std::next( - input, (batch * in_batch_nb_elems) + (channel * in_channel_nb_elems)); - I mean = 0; - for (size_t i = 0; i < in_channel_nb_elems; ++i) { - // Single pass numerically stable mean, using the fmaf - mean = fmaf(filter_start[i] - mean, 1.0f/(i+1), mean); - } - output[batch * out_batch_nb_elems + channel] = mean; + const DimSize_t in_batch_nb_elems{nb_elems / dims[0]}; + const DimSize_t in_channel_nb_elems{in_batch_nb_elems / dims[1]}; + const DimSize_t out_batch_nb_elems{dims[1]}; + // parse channel by channel and fill each output with the average of the + // values in the channel + for (DimSize_t batch = 0; batch < dims[0]; ++batch) { + for (DimSize_t channel = 0; channel < dims[1]; ++channel) { + const I *filter_start = std::next( + input, + (batch * in_batch_nb_elems) + (channel * in_channel_nb_elems)); + I mean = 0; + for (size_t i = 0; i < in_channel_nb_elems; ++i) { + // Single pass numerically stable mean, using the fmaf + mean = fmaf(filter_start[i] - mean, 1.0f / (i + 1), mean); + } + output[batch * out_batch_nb_elems + channel] = mean; + } } - } } // Kernels registration to implementation entry point REGISTRAR(GlobalAveragePoolingImpl_cpu, - {DataType::Float32}, - {ProdConso::defaultModel, Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<float, float>, nullptr}); -REGISTRAR(GlobalAveragePoolingImpl_cpu, - {DataType::Float64}, - {ProdConso::defaultModel, Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<double, double>, nullptr}); + {DataType::Float32}, + {ProdConso::defaultModel, + Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<float, float>, + nullptr}); REGISTRAR(GlobalAveragePoolingImpl_cpu, + {DataType::Float64}, + {ProdConso::defaultModel, + Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<double, double>, + nullptr}); +REGISTRAR( + GlobalAveragePoolingImpl_cpu, {DataType::Int32}, - {ProdConso::defaultModel, Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr}); + {ProdConso::defaultModel, + Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<int32_t, int32_t>, + nullptr}); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/GridSampleImpl.hpp b/include/aidge/backend/cpu/operator/GridSampleImpl.hpp index 697bb35a983bc108c2a5d65db3c08ef462ffcdbd..380757f8181e73002a9a151802cc3be2fc59c883 100644 --- a/include/aidge/backend/cpu/operator/GridSampleImpl.hpp +++ b/include/aidge/backend/cpu/operator/GridSampleImpl.hpp @@ -17,22 +17,23 @@ #include <tuple> #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/GridSample.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { // Operator implementation entry point for the backend -using GridSampleImpl_cpu = OperatorImpl_cpu<GridSample_Op, - void(const GridSample_Op&, - const std::shared_ptr<Tensor>&, - const std::shared_ptr<Tensor>&, - const std::shared_ptr<Tensor>&)>; +using GridSampleImpl_cpu = + OperatorImpl_cpu<GridSample_Op, + void(const GridSample_Op &, + const std::shared_ptr<Tensor> &, + const std::shared_ptr<Tensor> &, + const std::shared_ptr<Tensor> &)>; // Implementation entry point registration to Operator REGISTRAR(GridSample_Op, "cpu", Aidge::GridSampleImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/GridSampleImpl_kernels.hpp b/include/aidge/backend/cpu/operator/GridSampleImpl_kernels.hpp index fa390e4e9585225ab15b39651198cb3aaae77edb..3362c3875d6efdfdcd5d901cb8d848ebdd448cd3 100644 --- a/include/aidge/backend/cpu/operator/GridSampleImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/GridSampleImpl_kernels.hpp @@ -12,10 +12,10 @@ #ifndef AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ #define AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ -#include <algorithm> // std::max, std::min -#include <cmath> // std::fabs, std::trunf, std::nearbyint -#include <cstddef> // std::size_t -#include <cstdint> // std::int64_t +#include <algorithm> // std::max, std::min +#include <cmath> // std::fabs, std::trunf, std::nearbyint +#include <cstddef> // std::size_t +#include <cstdint> // std::int64_t #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/GridSampleImpl.hpp" @@ -23,56 +23,79 @@ #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -static bool in_bound(float coord, float lower_bound, float upper_bound) noexcept { +static bool +in_bound(float coord, float lower_bound, float upper_bound) noexcept { return (coord > lower_bound) && (coord < upper_bound); } -static float unnormalized_coord(float coord, float new_lower_bound, float new_upper_bound) noexcept { - return (coord + 1) / 2 * (new_upper_bound - new_lower_bound) + new_lower_bound; +static float unnormalized_coord(float coord, + float new_lower_bound, + float new_upper_bound) noexcept { + return (coord + 1) / 2 * (new_upper_bound - new_lower_bound) + + new_lower_bound; } // unused -// static float normalized_coord(float coord, float prev_lower_bound, float prev_upper_bound) noexcept { -// return (coord + prev_lower_bound) / (prev_upper_bound-prev_lower_bound) * 2 - 1; +// static float normalized_coord(float coord, float prev_lower_bound, float +// prev_upper_bound) noexcept { +// return (coord + prev_lower_bound) / (prev_upper_bound-prev_lower_bound) +// * 2 - 1; // } -static float unnormalize_grid_sample_coord(float coord, std::size_t size, bool align_corners) noexcept { - return align_corners ? unnormalized_coord(coord, 0.0f, static_cast<float>(size) - 1.0f) - : unnormalized_coord(coord, -0.5f, static_cast<float>(size) - 0.5f); +static float unnormalize_grid_sample_coord(float coord, + std::size_t size, + bool align_corners) noexcept { + return align_corners ? unnormalized_coord(coord, + 0.0f, + static_cast<float>(size) - 1.0f) + : unnormalized_coord(coord, + -0.5f, + static_cast<float>(size) - 0.5f); } // unused -// static float normalize_grid_sample_coord(float coord, std::size_t size, bool align_corners) noexcept { -// return align_corners ? normalized_coord(coord, 0.0f, static_cast<float>(size) - 1.0f) -// : normalized_coord(coord, -0.5f, static_cast<float>(size) - 0.5f); +// static float normalize_grid_sample_coord(float coord, std::size_t size, bool +// align_corners) noexcept { +// return align_corners ? normalized_coord(coord, 0.0f, +// static_cast<float>(size) - 1.0f) +// : normalized_coord(coord, -0.5f, +// static_cast<float>(size) - 0.5f); // } -static float update_normalized_coord_with_padding(float coord, Aidge::GridSample_Op::PaddingMode padding_mode) { +static float update_normalized_coord_with_padding( + float coord, + Aidge::GridSample_Op::PaddingMode padding_mode) { if (!in_bound(coord, -1.0f, 1.0f)) { if (padding_mode == Aidge::GridSample_Op::PaddingMode::Border) { coord = std::min(std::max(-1.0f, coord), 1.0f); - } - else if (padding_mode == Aidge::GridSample_Op::PaddingMode::Reflection) { + } else if (padding_mode == + Aidge::GridSample_Op::PaddingMode::Reflection) { float abs_coord = std::fabs(coord); float int_coord = std::truncf(abs_coord); - std::int32_t nb_refl = static_cast<std::int32_t>((int_coord - 1) / 2); - float res = ((nb_refl + 1)*2) - abs_coord; - coord = (coord > 0) ? (nb_refl % 2 == 0 ? res : -res) \ - : (nb_refl % 2 == 0 ? -res : res); + std::int32_t nb_refl = + static_cast<std::int32_t>((int_coord - 1) / 2); + float res = ((nb_refl + 1) * 2) - abs_coord; + coord = (coord > 0) ? (nb_refl % 2 == 0 ? res : -res) + : (nb_refl % 2 == 0 ? -res : res); } - } return coord; } -static inline std::int64_t update_unnormalized_coord_with_padding(std::int64_t coord, std::int64_t size, Aidge::GridSample_Op::PaddingMode padding_mode) { +static inline std::int64_t update_unnormalized_coord_with_padding( + std::int64_t coord, + std::int64_t size, + Aidge::GridSample_Op::PaddingMode padding_mode) { if (!in_bound(coord, 0, size)) { // out of bound. switch padding mode if (padding_mode == Aidge::GridSample_Op::PaddingMode::Border) { - coord = std::min(std::max(std::int64_t(0), coord), size-std::int64_t(1)); - } else if (padding_mode == Aidge::GridSample_Op::PaddingMode::Reflection) { - const std::int64_t quotient = coord / (size-1); - const std::int64_t remainer = std::abs(coord - quotient*(size-1)); + coord = std::min(std::max(std::int64_t(0), coord), + size - std::int64_t(1)); + } else if (padding_mode == + Aidge::GridSample_Op::PaddingMode::Reflection) { + const std::int64_t quotient = coord / (size - 1); + const std::int64_t remainer = + std::abs(coord - quotient * (size - 1)); coord = (quotient % 2 == 0) ? remainer : size - 1 - remainer; } } @@ -91,17 +114,16 @@ namespace Aidge { * @param output_ Output Tensor. */ template <class I, class O> -void GridSampleImpl1D_cpu_forward_kernel(const GridSample_Op& op, - const std::shared_ptr<Tensor>& in0, - const std::shared_ptr<Tensor>& in1, - const std::shared_ptr<Tensor>& out) -{ - const I* const input = static_cast<const I *>(in0->getImpl()->rawPtr()); - const I* input_ptr = input; - float* const grid = static_cast<float*>(in1->getImpl()->rawPtr()); - float* grid_ptr = grid; - O* const output = static_cast<O*>(out->getImpl()->rawPtr()); - O* output_ptr = output; +void GridSampleImpl1D_cpu_forward_kernel(const GridSample_Op &op, + const std::shared_ptr<Tensor> &in0, + const std::shared_ptr<Tensor> &in1, + const std::shared_ptr<Tensor> &out) { + const I *const input = static_cast<const I *>(in0->getImpl()->rawPtr()); + const I *input_ptr = input; + float *const grid = static_cast<float *>(in1->getImpl()->rawPtr()); + float *grid_ptr = grid; + O *const output = static_cast<O *>(out->getImpl()->rawPtr()); + O *output_ptr = output; const std::size_t N = in0->dim(0); const std::size_t C = in0->dim(1); @@ -117,23 +139,20 @@ void GridSampleImpl1D_cpu_forward_kernel(const GridSample_Op& op, const std::size_t out_C_s = out->stride(1); const std::size_t out_H_s = out->stride(2); - float* grid_ptr_N = grid; - const I* input_ptr_N = input; - O* output_ptr_N = output; + float *grid_ptr_N = grid; + const I *input_ptr_N = input; + O *output_ptr_N = output; for (std::size_t n = 0; n < N; ++n) { grid_ptr = grid_ptr_N; for (std::size_t grid_x = 0; grid_x < grid_H; ++grid_x) { - output_ptr = output_ptr_N + grid_x*out_H_s; + output_ptr = output_ptr_N + grid_x * out_H_s; /* - * change grid_x coord to match padding_mode - * Change range from [-1, 1] to [0, H-1] or [-0.5, H-0.5] according to align_corners - * Handle computation of interpolation - * any value outside bounds is considered 0 - * if nearest: - * else if linear: - * else if cubic: - * else : nothing - */ + * change grid_x coord to match padding_mode + * Change range from [-1, 1] to [0, H-1] or [-0.5, H-0.5] according + * to align_corners Handle computation of interpolation any value + * outside bounds is considered 0 if nearest: else if linear: else + * if cubic: else : nothing + */ float x = *grid_ptr; x = update_normalized_coord_with_padding(x, op.paddingMode()); x = unnormalize_grid_sample_coord(x, in_H, op.alignCorners()); @@ -141,7 +160,7 @@ void GridSampleImpl1D_cpu_forward_kernel(const GridSample_Op& op, const std::int64_t x_rounded = std::nearbyintf(x); if (in_bound(x_rounded, 0, in_H)) { - input_ptr = input_ptr_N + x_rounded*in_H_s; + input_ptr = input_ptr_N + x_rounded * in_H_s; for (std::size_t c = 0; c < C; ++c) { *output_ptr = *input_ptr; input_ptr += in_C_s; @@ -154,46 +173,81 @@ void GridSampleImpl1D_cpu_forward_kernel(const GridSample_Op& op, } } } else if (op.mode() == GridSample_Op::Mode::Linear) { - const std::int64_t x_inf = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_H, op.paddingMode()); - const std::int64_t x_sup = update_unnormalized_coord_with_padding(x_inf + 1, in_H, op.paddingMode()); - - const I* input_ptr_NC = input_ptr_N; + const std::int64_t x_inf = + update_unnormalized_coord_with_padding( + static_cast<std::int64_t>(std::floor(x)), + in_H, + op.paddingMode()); + const std::int64_t x_sup = + update_unnormalized_coord_with_padding(x_inf + 1, + in_H, + op.paddingMode()); + + const I *input_ptr_NC = input_ptr_N; for (std::size_t c = 0; c < C; ++c) { - const I f_inf = in_bound(x_inf, 0, in_H) ? - input_ptr_NC[static_cast<std::size_t>(x_inf)*in_H_s] : I(0); - const I f_sup = in_bound(x_sup, 0, in_H) ? - input_ptr_NC[static_cast<std::size_t>(x_sup)*in_H_s] : I(0); - - *output_ptr = static_cast<O>(static_cast<I>(x - x_inf)*f_inf \ - + static_cast<I>(x_sup - x)*f_sup); + const I f_inf = + in_bound(x_inf, 0, in_H) + ? input_ptr_NC[static_cast<std::size_t>(x_inf) * + in_H_s] + : I(0); + const I f_sup = + in_bound(x_sup, 0, in_H) + ? input_ptr_NC[static_cast<std::size_t>(x_sup) * + in_H_s] + : I(0); + + *output_ptr = + static_cast<O>(static_cast<I>(x - x_inf) * f_inf + + static_cast<I>(x_sup - x) * f_sup); input_ptr_NC += in_C_s; output_ptr += out_C_s; } } else if (op.mode() == GridSample_Op::Mode::Cubic) { - const std::int64_t x_inf = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_H, op.paddingMode()); - const std::int64_t x_sup = update_unnormalized_coord_with_padding(x_inf + 1, in_H, op.paddingMode()); - const std::int64_t x_inf_inf = update_unnormalized_coord_with_padding(x_inf - 1, in_H, op.paddingMode()); - const std::int64_t x_sup_sup = update_unnormalized_coord_with_padding(x_sup + 1, in_H, op.paddingMode()); + const std::int64_t x_inf = + update_unnormalized_coord_with_padding( + static_cast<std::int64_t>(std::floor(x)), + in_H, + op.paddingMode()); + const std::int64_t x_sup = + update_unnormalized_coord_with_padding(x_inf + 1, + in_H, + op.paddingMode()); + const std::int64_t x_inf_inf = + update_unnormalized_coord_with_padding(x_inf - 1, + in_H, + op.paddingMode()); + const std::int64_t x_sup_sup = + update_unnormalized_coord_with_padding(x_sup + 1, + in_H, + op.paddingMode()); const I x1 = static_cast<I>(x - static_cast<float>(x_inf)); const I x2 = x1 * x1; const I x3 = x1 * x2; - const I* input_ptr_NC = input_ptr_N; + const I *input_ptr_NC = input_ptr_N; for (std::size_t c = 0; c < C; ++c) { - const I f_inf_inf = in_bound(x_inf_inf, 0, in_H) ? input_ptr_NC[x_inf_inf*in_H_s] : I(0); - const I f_inf = in_bound(x_inf, 0, in_H) ? input_ptr_NC[x_inf*in_H_s] : I(0); - const I f_sup = in_bound(x_sup, 0, in_H) ? input_ptr_NC[x_sup*in_H_s] : I(0); - const I f_sup_sup = in_bound(x_sup_sup, 0, in_H) ? input_ptr_NC[x_sup_sup*in_H_s] : I(0); + const I f_inf_inf = in_bound(x_inf_inf, 0, in_H) + ? input_ptr_NC[x_inf_inf * in_H_s] + : I(0); + const I f_inf = in_bound(x_inf, 0, in_H) + ? input_ptr_NC[x_inf * in_H_s] + : I(0); + const I f_sup = in_bound(x_sup, 0, in_H) + ? input_ptr_NC[x_sup * in_H_s] + : I(0); + const I f_sup_sup = in_bound(x_sup_sup, 0, in_H) + ? input_ptr_NC[x_sup_sup * in_H_s] + : I(0); const I m_inf = (f_sup - f_inf_inf) / I(2); const I m_sup = (f_sup_sup - f_inf) / I(2); - *output_ptr = f_inf \ - + x1 * m_inf \ - + x2 * (3 * (f_sup - f_inf) - 2 * m_inf - m_sup) \ - + x3 * (2*(f_inf - f_sup) + m_inf + m_sup); + *output_ptr = + f_inf + x1 * m_inf + + x2 * (3 * (f_sup - f_inf) - 2 * m_inf - m_sup) + + x3 * (2 * (f_inf - f_sup) + m_inf + m_sup); input_ptr_NC += in_C_s; output_ptr += out_C_s; @@ -212,18 +266,30 @@ void GridSampleImpl1D_cpu_forward_kernel(const GridSample_Op& op, // Kernels registration to implementation entry point // only accept 1st input with only 1 spatial feat. (nb dims = 1) REGISTRAR(GridSampleImpl_cpu, - {{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, {{DataType::Float16}}}, - {ProdConso::defaultModel, Aidge::GridSampleImpl1D_cpu_forward_kernel<half_float::half, half_float::half>, nullptr}); + {{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, + {{DataType::Float16}}}, + {ProdConso::defaultModel, + Aidge::GridSampleImpl1D_cpu_forward_kernel<half_float::half, + half_float::half>, + nullptr}); REGISTRAR(GridSampleImpl_cpu, - {{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, {{DataType::Float32}}}, - {ProdConso::defaultModel, Aidge::GridSampleImpl1D_cpu_forward_kernel<float, float>, nullptr}); + {{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, + {{DataType::Float32}}}, + {ProdConso::defaultModel, + Aidge::GridSampleImpl1D_cpu_forward_kernel<float, float>, + nullptr}); REGISTRAR(GridSampleImpl_cpu, - {{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, {{DataType::Float64}}}, - {ProdConso::defaultModel, Aidge::GridSampleImpl1D_cpu_forward_kernel<double, double>, nullptr}); + {{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, + {{DataType::Float64}}}, + {ProdConso::defaultModel, + Aidge::GridSampleImpl1D_cpu_forward_kernel<double, double>, + nullptr}); REGISTRAR(GridSampleImpl_cpu, - {{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, {{DataType::Int32}}}, - {ProdConso::defaultModel, Aidge::GridSampleImpl1D_cpu_forward_kernel<int32_t, int32_t>, nullptr}); - + {{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, + {{DataType::Int32}}}, + {ProdConso::defaultModel, + Aidge::GridSampleImpl1D_cpu_forward_kernel<int32_t, int32_t>, + nullptr}); /** * @brief Forward kernel for 1D GridSample on CPU backend. @@ -236,16 +302,15 @@ REGISTRAR(GridSampleImpl_cpu, * @param output_ Output Tensor. */ template <class I, class O> -void GridSampleImpl2D_cpu_forward_kernel(const GridSample_Op& op, - const std::shared_ptr<Tensor>& in0, - const std::shared_ptr<Tensor>& in1, - const std::shared_ptr<Tensor>& out) -{ - const I* input = static_cast<const I *>(in0->getImpl()->rawPtr()); - const I* input_ptr = input; - float* const grid = static_cast<float*>(in0->getImpl()->rawPtr()); - float* grid_ptr = grid; - O* const output = static_cast<O*>(out->getImpl()->rawPtr()); +void GridSampleImpl2D_cpu_forward_kernel(const GridSample_Op &op, + const std::shared_ptr<Tensor> &in0, + const std::shared_ptr<Tensor> &in1, + const std::shared_ptr<Tensor> &out) { + const I *input = static_cast<const I *>(in0->getImpl()->rawPtr()); + const I *input_ptr = input; + float *const grid = static_cast<float *>(in0->getImpl()->rawPtr()); + float *grid_ptr = grid; + O *const output = static_cast<O *>(out->getImpl()->rawPtr()); const std::size_t N = in0->dim(0); const std::size_t C = in0->dim(1); @@ -267,25 +332,22 @@ void GridSampleImpl2D_cpu_forward_kernel(const GridSample_Op& op, const std::size_t out_H_s = out->stride(2); const std::size_t out_W_s = out->stride(3); - - float* grid_ptr_N = grid; - const I* input_ptr_N = input; - O* output_ptr_N = output; + float *grid_ptr_N = grid; + const I *input_ptr_N = input; + O *output_ptr_N = output; for (std::size_t n = 0; n < N; ++n) { for (std::size_t grid_y = 0; grid_y < grid_H; ++grid_y) { for (std::size_t grid_x = 0; grid_x < grid_W; ++grid_x) { - O* output_ptr = output_ptr_N + grid_y*out_H_s + grid_y*out_W_s; - grid_ptr = grid_ptr_N + grid_y*grid_H_s + grid_x*grid_W_s; + O *output_ptr = + output_ptr_N + grid_y * out_H_s + grid_y * out_W_s; + grid_ptr = grid_ptr_N + grid_y * grid_H_s + grid_x * grid_W_s; /* - * change grid_x coord to match padding_mode - * Change range from [-1, 1] to [0, H-1] or [-0.5, H-0.5] according to align_corners - * Handle computation of interpolation - * any value outside bounds is considered 0 - * if nearest: - * else if linear: - * else if cubic: - * else : nothing - */ + * change grid_x coord to match padding_mode + * Change range from [-1, 1] to [0, H-1] or [-0.5, H-0.5] + * according to align_corners Handle computation of + * interpolation any value outside bounds is considered 0 if + * nearest: else if linear: else if cubic: else : nothing + */ float x = *grid_ptr; float y = grid_ptr[grid_Coord_s]; x = update_normalized_coord_with_padding(x, op.paddingMode()); @@ -296,8 +358,10 @@ void GridSampleImpl2D_cpu_forward_kernel(const GridSample_Op& op, const std::int64_t x_rounded = std::nearbyintf(x); const std::int64_t y_rounded = std::nearbyintf(y); - if (in_bound(x_rounded, 0, in_W) && in_bound(y_rounded, 0, in_H)) { - input_ptr = input_ptr_N + y_rounded*in_H_s + x_rounded*in_W_s; + if (in_bound(x_rounded, 0, in_W) && + in_bound(y_rounded, 0, in_H)) { + input_ptr = input_ptr_N + y_rounded * in_H_s + + x_rounded * in_W_s; for (std::size_t c = 0; c < C; ++c) { *output_ptr = *input_ptr; input_ptr += in_C_s; @@ -310,97 +374,199 @@ void GridSampleImpl2D_cpu_forward_kernel(const GridSample_Op& op, } } } else if (op.mode() == GridSample_Op::Mode::Linear) { - const std::int64_t x_r = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_W, op.paddingMode()); // right - const std::int64_t x_l = update_unnormalized_coord_with_padding(x_r + 1, in_W, op.paddingMode()); // left - - const std::int64_t y_t = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(y)), in_H, op.paddingMode()); // top - const std::int64_t y_b = update_unnormalized_coord_with_padding(y_t + 1, in_H, op.paddingMode()); // bottom - - const I* input_ptr_NC = input_ptr_N; + const std::int64_t x_r = + update_unnormalized_coord_with_padding( + static_cast<std::int64_t>(std::floor(x)), + in_W, + op.paddingMode()); // right + const std::int64_t x_l = + update_unnormalized_coord_with_padding( + x_r + 1, + in_W, + op.paddingMode()); // left + + const std::int64_t y_t = + update_unnormalized_coord_with_padding( + static_cast<std::int64_t>(std::floor(y)), + in_H, + op.paddingMode()); // top + const std::int64_t y_b = + update_unnormalized_coord_with_padding( + y_t + 1, + in_H, + op.paddingMode()); // bottom + + const I *input_ptr_NC = input_ptr_N; for (std::size_t c = 0; c < C; ++c) { - const I f_tr = (in_bound(x_r, 0, in_W) && in_bound(y_t, 0, in_H)) ? - input_ptr_NC[static_cast<std::size_t>(y_t)*in_H_s - + static_cast<std::size_t>(x_r)*in_W_s] + const I f_tr = + (in_bound(x_r, 0, in_W) && in_bound(y_t, 0, in_H)) + ? input_ptr_NC[static_cast<std::size_t>(y_t) * + in_H_s + + static_cast<std::size_t>(x_r) * + in_W_s] : I(0); - const I f_tl = (in_bound(x_l, 0, in_W) && in_bound(y_t, 0, in_H)) ? - input_ptr_NC[static_cast<std::size_t>(y_t)*in_H_s - + static_cast<std::size_t>(x_l)*in_W_s] + const I f_tl = + (in_bound(x_l, 0, in_W) && in_bound(y_t, 0, in_H)) + ? input_ptr_NC[static_cast<std::size_t>(y_t) * + in_H_s + + static_cast<std::size_t>(x_l) * + in_W_s] : I(0); - const I f_br = (in_bound(x_r, 0, in_W) && in_bound(y_b, 0, in_H)) ? - input_ptr_NC[static_cast<std::size_t>(y_b)*in_H_s - + static_cast<std::size_t>(x_r)*in_W_s] + const I f_br = + (in_bound(x_r, 0, in_W) && in_bound(y_b, 0, in_H)) + ? input_ptr_NC[static_cast<std::size_t>(y_b) * + in_H_s + + static_cast<std::size_t>(x_r) * + in_W_s] : I(0); - const I f_bl = (in_bound(x_l, 0, in_W) && in_bound(y_b, 0, in_H)) ? - input_ptr_NC[static_cast<std::size_t>(y_b)*in_H_s - + static_cast<std::size_t>(x_l)*in_W_s] + const I f_bl = + (in_bound(x_l, 0, in_W) && in_bound(y_b, 0, in_H)) + ? input_ptr_NC[static_cast<std::size_t>(y_b) * + in_H_s + + static_cast<std::size_t>(x_l) * + in_W_s] : I(0); // compute weighted sum of the 4 corners - const I w_tr = static_cast<I>((y - static_cast<float>(y_t))*(static_cast<float>(x_r) - x)); - const I w_tl = static_cast<I>((y - static_cast<float>(y_t))*(x - static_cast<float>(x_l))); - const I w_br = static_cast<I>((static_cast<float>(y_b) - y)*(static_cast<float>(x_r) - x)); - const I w_bl = static_cast<I>((static_cast<float>(y_b) - y)*(x - static_cast<float>(x_l))); - - *output_ptr = static_cast<O>(w_tr*f_tr + w_tl*f_tl + w_br*f_br + w_bl*f_bl); + const I w_tr = + static_cast<I>((y - static_cast<float>(y_t)) * + (static_cast<float>(x_r) - x)); + const I w_tl = + static_cast<I>((y - static_cast<float>(y_t)) * + (x - static_cast<float>(x_l))); + const I w_br = + static_cast<I>((static_cast<float>(y_b) - y) * + (static_cast<float>(x_r) - x)); + const I w_bl = + static_cast<I>((static_cast<float>(y_b) - y) * + (x - static_cast<float>(x_l))); + + *output_ptr = + static_cast<O>(w_tr * f_tr + w_tl * f_tl + + w_br * f_br + w_bl * f_bl); input_ptr_NC += in_C_s; output_ptr += out_C_s; } } else if (op.mode() == GridSample_Op::Mode::Cubic) { /* - * .. .. .. .. .. .. - * .. 00 01 02 03 .. - * .. 10 11 12 13 .. - * .. 20 21 22 23 .. - * .. 30 31 32 33 .. - * .. .. .. .. .. .. - */ - const std::int64_t x_1 = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_W, op.paddingMode()); - const std::int64_t x_0 = update_unnormalized_coord_with_padding(x_1 - 1, in_W, op.paddingMode()); - const std::int64_t x_2 = update_unnormalized_coord_with_padding(x_1 + 1, in_W, op.paddingMode()); - const std::int64_t x_3 = update_unnormalized_coord_with_padding(x_1 + 2, in_W, op.paddingMode()); - - const std::int64_t y_1 = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(y)), in_H, op.paddingMode()); - const std::int64_t y_0 = update_unnormalized_coord_with_padding(y_1 - 1, in_H, op.paddingMode()); - const std::int64_t y_2 = update_unnormalized_coord_with_padding(y_1 + 1, in_H, op.paddingMode()); - const std::int64_t y_3 = update_unnormalized_coord_with_padding(y_1 + 2, in_H, op.paddingMode()); - - const I* input_ptr_NC = input_ptr_N; + * .. .. .. .. .. .. + * .. 00 01 02 03 .. + * .. 10 11 12 13 .. + * .. 20 21 22 23 .. + * .. 30 31 32 33 .. + * .. .. .. .. .. .. + */ + const std::int64_t x_1 = + update_unnormalized_coord_with_padding( + static_cast<std::int64_t>(std::floor(x)), + in_W, + op.paddingMode()); + const std::int64_t x_0 = + update_unnormalized_coord_with_padding( + x_1 - 1, + in_W, + op.paddingMode()); + const std::int64_t x_2 = + update_unnormalized_coord_with_padding( + x_1 + 1, + in_W, + op.paddingMode()); + const std::int64_t x_3 = + update_unnormalized_coord_with_padding( + x_1 + 2, + in_W, + op.paddingMode()); + + const std::int64_t y_1 = + update_unnormalized_coord_with_padding( + static_cast<std::int64_t>(std::floor(y)), + in_H, + op.paddingMode()); + const std::int64_t y_0 = + update_unnormalized_coord_with_padding( + y_1 - 1, + in_H, + op.paddingMode()); + const std::int64_t y_2 = + update_unnormalized_coord_with_padding( + y_1 + 1, + in_H, + op.paddingMode()); + const std::int64_t y_3 = + update_unnormalized_coord_with_padding( + y_1 + 2, + in_H, + op.paddingMode()); + + const I *input_ptr_NC = input_ptr_N; for (std::size_t c = 0; c < C; ++c) { - const I f_00 = in_bound(x_0, 0, in_W) && in_bound(y_0, 0, in_H) ? - input_ptr_NC[x_0*in_W_s + y_0*in_H_s] : I(0); - const I f_01 = in_bound(x_0, 0, in_W) && in_bound(y_1, 0, in_H) ? - input_ptr_NC[x_0*in_W_s + y_1*in_H_s] : I(0); - const I f_02 = in_bound(x_0, 0, in_W) && in_bound(y_2, 0, in_H) ? - input_ptr_NC[x_0*in_W_s + y_2*in_H_s] : I(0); - const I f_03 = in_bound(x_0, 0, in_W) && in_bound(y_3, 0, in_H) ? - input_ptr_NC[x_0*in_W_s + y_3*in_H_s] : I(0); - const I f_10 = in_bound(x_1, 0, in_W) && in_bound(y_0, 0, in_H) ? - input_ptr_NC[x_1*in_W_s + y_0*in_H_s] : I(0); - const I f_20 = in_bound(x_2, 0, in_W) && in_bound(y_0, 0, in_H) ? - input_ptr_NC[x_2*in_W_s + y_0*in_H_s] : I(0); - const I f_30 = in_bound(x_3, 0, in_W) && in_bound(y_0, 0, in_H) ? - input_ptr_NC[x_3*in_W_s + y_0*in_H_s] : I(0); - const I f_11 = in_bound(x_1, 0, in_W) && in_bound(y_1, 0, in_H) ? - input_ptr_NC[x_1*in_W_s + y_1*in_H_s] : I(0); - const I f_12 = in_bound(x_1, 0, in_W) && in_bound(y_2, 0, in_H) ? - input_ptr_NC[x_1*in_W_s + y_2*in_H_s] : I(0); - const I f_13 = in_bound(x_1, 0, in_W) && in_bound(y_3, 0, in_H) ? - input_ptr_NC[x_1*in_W_s + y_3*in_H_s] : I(0); - const I f_21 = in_bound(x_2, 0, in_W) && in_bound(y_1, 0, in_H) ? - input_ptr_NC[x_2*in_W_s + y_1*in_H_s] : I(0); - const I f_22 = in_bound(x_2, 0, in_W) && in_bound(y_2, 0, in_H) ? - input_ptr_NC[x_2*in_W_s + y_2*in_H_s] : I(0); - const I f_23 = in_bound(x_2, 0, in_W) && in_bound(y_3, 0, in_H) ? - input_ptr_NC[x_2*in_W_s + y_3*in_H_s] : I(0); - const I f_31 = in_bound(x_3, 0, in_W) && in_bound(y_1, 0, in_H) ? - input_ptr_NC[x_3*in_W_s + y_1*in_H_s] : I(0); - const I f_32 = in_bound(x_3, 0, in_W) && in_bound(y_2, 0, in_H) ? - input_ptr_NC[x_3*in_W_s + y_2*in_H_s] : I(0); - const I f_33 = in_bound(x_3, 0, in_W) && in_bound(y_3, 0, in_H) ? - input_ptr_NC[x_3*in_W_s + y_3*in_H_s] : I(0); + const I f_00 = + in_bound(x_0, 0, in_W) && in_bound(y_0, 0, in_H) + ? input_ptr_NC[x_0 * in_W_s + y_0 * in_H_s] + : I(0); + const I f_01 = + in_bound(x_0, 0, in_W) && in_bound(y_1, 0, in_H) + ? input_ptr_NC[x_0 * in_W_s + y_1 * in_H_s] + : I(0); + const I f_02 = + in_bound(x_0, 0, in_W) && in_bound(y_2, 0, in_H) + ? input_ptr_NC[x_0 * in_W_s + y_2 * in_H_s] + : I(0); + const I f_03 = + in_bound(x_0, 0, in_W) && in_bound(y_3, 0, in_H) + ? input_ptr_NC[x_0 * in_W_s + y_3 * in_H_s] + : I(0); + const I f_10 = + in_bound(x_1, 0, in_W) && in_bound(y_0, 0, in_H) + ? input_ptr_NC[x_1 * in_W_s + y_0 * in_H_s] + : I(0); + const I f_20 = + in_bound(x_2, 0, in_W) && in_bound(y_0, 0, in_H) + ? input_ptr_NC[x_2 * in_W_s + y_0 * in_H_s] + : I(0); + const I f_30 = + in_bound(x_3, 0, in_W) && in_bound(y_0, 0, in_H) + ? input_ptr_NC[x_3 * in_W_s + y_0 * in_H_s] + : I(0); + const I f_11 = + in_bound(x_1, 0, in_W) && in_bound(y_1, 0, in_H) + ? input_ptr_NC[x_1 * in_W_s + y_1 * in_H_s] + : I(0); + const I f_12 = + in_bound(x_1, 0, in_W) && in_bound(y_2, 0, in_H) + ? input_ptr_NC[x_1 * in_W_s + y_2 * in_H_s] + : I(0); + const I f_13 = + in_bound(x_1, 0, in_W) && in_bound(y_3, 0, in_H) + ? input_ptr_NC[x_1 * in_W_s + y_3 * in_H_s] + : I(0); + const I f_21 = + in_bound(x_2, 0, in_W) && in_bound(y_1, 0, in_H) + ? input_ptr_NC[x_2 * in_W_s + y_1 * in_H_s] + : I(0); + const I f_22 = + in_bound(x_2, 0, in_W) && in_bound(y_2, 0, in_H) + ? input_ptr_NC[x_2 * in_W_s + y_2 * in_H_s] + : I(0); + const I f_23 = + in_bound(x_2, 0, in_W) && in_bound(y_3, 0, in_H) + ? input_ptr_NC[x_2 * in_W_s + y_3 * in_H_s] + : I(0); + const I f_31 = + in_bound(x_3, 0, in_W) && in_bound(y_1, 0, in_H) + ? input_ptr_NC[x_3 * in_W_s + y_1 * in_H_s] + : I(0); + const I f_32 = + in_bound(x_3, 0, in_W) && in_bound(y_2, 0, in_H) + ? input_ptr_NC[x_3 * in_W_s + y_2 * in_H_s] + : I(0); + const I f_33 = + in_bound(x_3, 0, in_W) && in_bound(y_3, 0, in_H) + ? input_ptr_NC[x_3 * in_W_s + y_3 * in_H_s] + : I(0); const I mx_11 = (f_21 - f_01) / I(2); const I mx_12 = (f_22 - f_02) / I(2); @@ -412,38 +578,63 @@ void GridSampleImpl2D_cpu_forward_kernel(const GridSample_Op& op, const I my_21 = (f_22 - f_20) / I(2); const I my_22 = (f_23 - f_21) / I(2); - const I mxy_11 = (f_22 - f_20 - f_02 - + f_00) / I(4); - const I mxy_12 = (f_23 - f_21 - f_03 - + f_01) / I(4); - const I mxy_21 = (f_32 - f_30 - f_12 - + f_10) / I(4); - const I mxy_22 = (f_33 - f_31 - f_13 - + f_11) / I(4); + const I mxy_11 = (f_22 - f_20 - f_02 - +f_00) / I(4); + const I mxy_12 = (f_23 - f_21 - f_03 - +f_01) / I(4); + const I mxy_21 = (f_32 - f_30 - f_12 - +f_10) / I(4); + const I mxy_22 = (f_33 - f_31 - f_13 - +f_11) / I(4); const I a_00 = f_11; const I a_10 = mx_11; - const I a_20 = I(3)*(f_21 - f_11) - I(2)*mx_11 - mx_21; - const I a_30 = I(2)*(f_11 - f_21) + mx_11 + mx_21; + const I a_20 = + I(3) * (f_21 - f_11) - I(2) * mx_11 - mx_21; + const I a_30 = I(2) * (f_11 - f_21) + mx_11 + mx_21; const I a_01 = my_11; const I a_11 = mxy_11; - const I a_21 = I(3)*(my_21 - my_11) - I(2)*mxy_11 - mxy_21; - const I a_31 = I(2)*(my_11 - my_21) + mxy_11 + mxy_21; - const I a_02 = I(3)*(f_12 - f_11) - I(2)*my_11 - my_12; - const I a_12 = I(3)*(mx_12 - mx_11) - I(2)*mxy_11 - mxy_12; - const I a_22 = I(9)*(f_11 + f_22 - f_21 - f_12) + I(3)*(I(2)*(mx_11 - mx_12 + my_11 - my_21) + mx_21 - mx_22 + my_12 - my_22) + mxy_22 + I(2)*(mxy_12 + mxy_21 + I(2)*mxy_11); - const I a_32 = - mxy_12 - mxy_22 + I(2)*(my_22 - my_12 - mxy_11 - mxy_21 + I(2)*(my_21 - my_11) + I(3)*(f_21 + f_12 - f_11 - f_22)) + I(3)*(mx_12 + mx_22 - mx_11 - mx_21); - const I a_03 = I(2)*(f_11 - f_12) + my_11 + my_12; - const I a_13 = I(2)*(mx_11 - mx_12) + mxy_11 + mxy_12; - const I a_23 = - mxy_21 - mxy_22 + I(2)*(-mx_21 + mx_22 - mxy_11 - mxy_12 + I(2)*(mx_12 - mx_11) + I(3)*(f_12 + f_21 - f_11 - f_22)) + I(3)*(my_21 + my_22 - my_11 - my_12); - const I a_33 = mxy_11 + mxy_21 + mxy_12 + mxy_22 + I(2)*(mx_11 + mx_21 - mx_12 - mx_22 + my_11 - my_21 + my_12 - my_22 + I(2)*(f_11 - f_21 - f_12 + f_22)); - - const I x2 = static_cast<I>(x*x); - const I x3 = static_cast<I>(x*x*x); - const I y2 = static_cast<I>(y*y); - const I y3 = static_cast<I>(y*y*y); - - *output_ptr = static_cast<O>( \ - a_00 + a_10*x + a_20*x2 + a_30*x3 \ - + a_01*y + a_11*x*y + a_21*x2*y + a_31*x3*y \ - + a_02*y2 + a_12*x*y2 + a_22*x2*y2 + a_32*x3*y2 \ - + a_03*y3 + a_13*x*y3 + a_23*x2*y3 + a_33*x3*y3); + const I a_21 = + I(3) * (my_21 - my_11) - I(2) * mxy_11 - mxy_21; + const I a_31 = + I(2) * (my_11 - my_21) + mxy_11 + mxy_21; + const I a_02 = + I(3) * (f_12 - f_11) - I(2) * my_11 - my_12; + const I a_12 = + I(3) * (mx_12 - mx_11) - I(2) * mxy_11 - mxy_12; + const I a_22 = + I(9) * (f_11 + f_22 - f_21 - f_12) + + I(3) * (I(2) * (mx_11 - mx_12 + my_11 - my_21) + + mx_21 - mx_22 + my_12 - my_22) + + mxy_22 + I(2) * (mxy_12 + mxy_21 + I(2) * mxy_11); + const I a_32 = + -mxy_12 - mxy_22 + + I(2) * (my_22 - my_12 - mxy_11 - mxy_21 + + I(2) * (my_21 - my_11) + + I(3) * (f_21 + f_12 - f_11 - f_22)) + + I(3) * (mx_12 + mx_22 - mx_11 - mx_21); + const I a_03 = I(2) * (f_11 - f_12) + my_11 + my_12; + const I a_13 = + I(2) * (mx_11 - mx_12) + mxy_11 + mxy_12; + const I a_23 = + -mxy_21 - mxy_22 + + I(2) * (-mx_21 + mx_22 - mxy_11 - mxy_12 + + I(2) * (mx_12 - mx_11) + + I(3) * (f_12 + f_21 - f_11 - f_22)) + + I(3) * (my_21 + my_22 - my_11 - my_12); + const I a_33 = + mxy_11 + mxy_21 + mxy_12 + mxy_22 + + I(2) * (mx_11 + mx_21 - mx_12 - mx_22 + my_11 - + my_21 + my_12 - my_22 + + I(2) * (f_11 - f_21 - f_12 + f_22)); + + const I x2 = static_cast<I>(x * x); + const I x3 = static_cast<I>(x * x * x); + const I y2 = static_cast<I>(y * y); + const I y3 = static_cast<I>(y * y * y); + + *output_ptr = static_cast<O>( + a_00 + a_10 * x + a_20 * x2 + a_30 * x3 + + a_01 * y + a_11 * x * y + a_21 * x2 * y + + a_31 * x3 * y + a_02 * y2 + a_12 * x * y2 + + a_22 * x2 * y2 + a_32 * x3 * y2 + a_03 * y3 + + a_13 * x * y3 + a_23 * x2 * y3 + a_33 * x3 * y3); input_ptr_NC += in_C_s; output_ptr += out_C_s; @@ -461,17 +652,34 @@ void GridSampleImpl2D_cpu_forward_kernel(const GridSample_Op& op, // Kernels registration to implementation entry point // only accept 1st input with only 2 spatial feat. (nb dims = 2) REGISTRAR(GridSampleImpl_cpu, - {{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, {DataType::Any}}, {{DataType::Float16}}}, - {ProdConso::defaultModel, Aidge::GridSampleImpl2D_cpu_forward_kernel<half_float::half, half_float::half>, nullptr}); + {{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, + {DataType::Any}}, + {{DataType::Float16}}}, + {ProdConso::defaultModel, + Aidge::GridSampleImpl2D_cpu_forward_kernel<half_float::half, + half_float::half>, + nullptr}); REGISTRAR(GridSampleImpl_cpu, - {{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, {DataType::Any}}, {{DataType::Float32}}}, - {ProdConso::defaultModel, Aidge::GridSampleImpl2D_cpu_forward_kernel<float, float>, nullptr}); + {{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, + {DataType::Any}}, + {{DataType::Float32}}}, + {ProdConso::defaultModel, + Aidge::GridSampleImpl2D_cpu_forward_kernel<float, float>, + nullptr}); REGISTRAR(GridSampleImpl_cpu, - {{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, {DataType::Any}}, {{DataType::Float64}}}, - {ProdConso::defaultModel, Aidge::GridSampleImpl2D_cpu_forward_kernel<double, double>, nullptr}); + {{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, + {DataType::Any}}, + {{DataType::Float64}}}, + {ProdConso::defaultModel, + Aidge::GridSampleImpl2D_cpu_forward_kernel<double, double>, + nullptr}); REGISTRAR(GridSampleImpl_cpu, - {{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, {DataType::Any}}, {{DataType::Int32}}}, - {ProdConso::defaultModel, Aidge::GridSampleImpl2D_cpu_forward_kernel<int32_t, int32_t>, nullptr}); -} // namespace Aidge + {{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, + {DataType::Any}}, + {{DataType::Int32}}}, + {ProdConso::defaultModel, + Aidge::GridSampleImpl2D_cpu_forward_kernel<int32_t, int32_t>, + nullptr}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp index 1e8c1a14435f53ad7a63b327944e0bb8c70c8661..31fab26f763f5c943ebb212d704c7888796039a9 100644 --- a/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp +++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp @@ -16,26 +16,21 @@ #include <tuple> #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/LeakyReLU.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { // Operator implementation entry point for the backend -using LeakyReLUImpl_cpu = OperatorImpl_cpu<LeakyReLU_Op, - void(const float, - std::size_t, - const void*, - void*), - void(const float, - std::size_t, - const void*, - void*)>; +using LeakyReLUImpl_cpu = + OperatorImpl_cpu<LeakyReLU_Op, + void(const float, std::size_t, const void *, void *), + void(const float, std::size_t, const void *, void *)>; // Implementation entry point registration to Operator REGISTRAR(LeakyReLU_Op, "cpu", Aidge::LeakyReLUImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl_kernels.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl_kernels.hpp index bc856f703aee8ba422887d43cb96db2132fc4603..546a1f47b4f140e548aac95f6dd1382915b48496 100644 --- a/include/aidge/backend/cpu/operator/LeakyReLUImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl_kernels.hpp @@ -19,12 +19,12 @@ namespace Aidge { template <class I, class O> void LeakyReLUImpl_cpu_forward_kernel(const float negativeSlope_, - std::size_t inputLenght, - const void* input_, - void* output_) { + std::size_t inputLenght, + const void *input_, + void *output_) { - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); const I negativeSlope = static_cast<const I>(negativeSlope_); for (std::size_t i = 0; i < inputLenght; ++i) { @@ -34,29 +34,35 @@ void LeakyReLUImpl_cpu_forward_kernel(const float negativeSlope_, template <class I, class O> void LeakyReLUImpl_cpu_backward_kernel(const float negativeSlope_, - std::size_t inputLenght, - const void* input_, - void* output_) { + std::size_t inputLenght, + const void *input_, + void *output_) { - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); const I negativeSlope = static_cast<const I>(negativeSlope_); for (std::size_t i = 0; i < inputLenght; ++i) { - output[i] = (input[i] > 0) ? input[i] : negativeSlope*input[i]; + output[i] = (input[i] > 0) ? input[i] : negativeSlope * input[i]; } } // Kernels registration to implementation entry point REGISTRAR(LeakyReLUImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::LeakyReLUImpl_cpu_forward_kernel<float, float>, Aidge::LeakyReLUImpl_cpu_backward_kernel<float, float>}); + {DataType::Float32}, + {ProdConso::inPlaceModel, + Aidge::LeakyReLUImpl_cpu_forward_kernel<float, float>, + Aidge::LeakyReLUImpl_cpu_backward_kernel<float, float>}); REGISTRAR(LeakyReLUImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::LeakyReLUImpl_cpu_forward_kernel<double, double>, Aidge::LeakyReLUImpl_cpu_backward_kernel<double, double>}); + {DataType::Float64}, + {ProdConso::inPlaceModel, + Aidge::LeakyReLUImpl_cpu_forward_kernel<double, double>, + Aidge::LeakyReLUImpl_cpu_backward_kernel<double, double>}); REGISTRAR(LeakyReLUImpl_cpu, - {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::LeakyReLUImpl_cpu_forward_kernel<int32_t, int32_t>, Aidge::LeakyReLUImpl_cpu_backward_kernel<int32_t, int32_t>}); -} // namespace Aidge + {DataType::Int32}, + {ProdConso::inPlaceModel, + Aidge::LeakyReLUImpl_cpu_forward_kernel<int32_t, int32_t>, + Aidge::LeakyReLUImpl_cpu_backward_kernel<int32_t, int32_t>}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/LnImpl.hpp b/include/aidge/backend/cpu/operator/LnImpl.hpp old mode 100755 new mode 100644 index d48a7ae437d9ed1c7769d3628691993c1e9dcb90..5e9487af367cf0c7edebf2ae599e9eaf40eaa9c8 --- a/include/aidge/backend/cpu/operator/LnImpl.hpp +++ b/include/aidge/backend/cpu/operator/LnImpl.hpp @@ -12,22 +12,23 @@ #ifndef AIDGE_CPU_OPERATOR_LNIMPL_H_ #define AIDGE_CPU_OPERATOR_LNIMPL_H_ +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Ln.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" #include <memory> #include <vector> namespace Aidge { // Operator implementation entry point for the backend -using LnImpl_cpu = OperatorImpl_cpu<Ln_Op, - void(const std::size_t, const void*, void*), - void(const std::size_t, const void*, const void*, void*)>; +using LnImpl_cpu = OperatorImpl_cpu< + Ln_Op, + void(const std::size_t, const void *, void *), + void(const std::size_t, const void *, const void *, void *)>; // Implementation entry point registration to Operator REGISTRAR(Ln_Op, "cpu", Aidge::LnImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_LNIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/LnImpl_kernels.hpp b/include/aidge/backend/cpu/operator/LnImpl_kernels.hpp old mode 100755 new mode 100644 index b30b05bb806de08d4e70c67e66979fb3138980df..3c89e91ecebdc1711b036ee39028533c1732c9af --- a/include/aidge/backend/cpu/operator/LnImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/LnImpl_kernels.hpp @@ -19,49 +19,54 @@ namespace Aidge { template <class I, class O> void LnImpl_cpu_forward_kernel(std::size_t inputLenght, - const void* input_, - void* output_) { + const void *input_, + void *output_) { - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); - const float eps = 1.0e-20f; + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); + const float eps = 1.0e-20f; -//#pragma omp parallel for if (inputLenght > 1024) + // #pragma omp parallel for if (inputLenght > 1024) for (std::size_t i = 0; i < inputLenght; ++i) { - if (input[i] > I(eps)) { - output[i] = std::log(input[i]); - } else { - output[i] = std::log(I(eps)); - } + if (input[i] > I(eps)) { + output[i] = std::log(input[i]); + } else { + output[i] = std::log(I(eps)); + } } } template <class I, class GI, class GO> void LnImpl_cpu_backward_kernel(const std::size_t inputLenght, - const void* input_, const void* grad_output_, - void* grad_input_) { - - const I* input = static_cast<const I*>(input_); - const GO* grad_output = static_cast<const GO*>(grad_output_); - GI* grad_input = static_cast<GI*>(grad_input_); - const float eps = 1.0e-20f; - + const void *input_, + const void *grad_output_, + void *grad_input_) { + + const I *input = static_cast<const I *>(input_); + const GO *grad_output = static_cast<const GO *>(grad_output_); + GI *grad_input = static_cast<GI *>(grad_input_); + const float eps = 1.0e-20f; + for (std::size_t i = 0; i < inputLenght; ++i) { - if (input[i] > I(eps)) { - grad_input[i] = grad_output[i] / input[i]; - } else { - grad_input[i] = GI(0); - } + if (input[i] > I(eps)) { + grad_input[i] = grad_output[i] / input[i]; + } else { + grad_input[i] = GI(0); + } } } // Kernels registration to implementation entry point REGISTRAR(LnImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::LnImpl_cpu_forward_kernel<float, float>, Aidge::LnImpl_cpu_backward_kernel<float, float, float>}); + {DataType::Float32}, + {ProdConso::inPlaceModel, + Aidge::LnImpl_cpu_forward_kernel<float, float>, + Aidge::LnImpl_cpu_backward_kernel<float, float, float>}); REGISTRAR(LnImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::LnImpl_cpu_forward_kernel<double, double>, Aidge::LnImpl_cpu_backward_kernel<double, double, double>}); -} // namespace Aidge + {DataType::Float64}, + {ProdConso::inPlaceModel, + Aidge::LnImpl_cpu_forward_kernel<double, double>, + Aidge::LnImpl_cpu_backward_kernel<double, double, double>}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_LNIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/MatMulImpl.hpp b/include/aidge/backend/cpu/operator/MatMulImpl.hpp index c07aa5f8ffa62f5fffe3ca02638cc3c66cdaeedb..70fc8d450a30868d2a7c29969e441f1c389d0b4b 100644 --- a/include/aidge/backend/cpu/operator/MatMulImpl.hpp +++ b/include/aidge/backend/cpu/operator/MatMulImpl.hpp @@ -16,20 +16,24 @@ #include <memory> #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/MatMul.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { // Operator implementation entry point for the backend using MatMulImpl_cpu = OperatorImpl_cpu<MatMul_Op, - void(const std::size_t, const std::size_t, const std::size_t, - const void *, const void *, void *)>; + void(const std::size_t, + const std::size_t, + const std::size_t, + const void *, + const void *, + void *)>; // Implementation entry point registration to Operator REGISTRAR(MatMul_Op, "cpu", Aidge::MatMulImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_MATMULIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/MatMulImpl_kernels.hpp b/include/aidge/backend/cpu/operator/MatMulImpl_kernels.hpp index 5fc13baf49b1d0606eb4af5a54eec83fa5dce22a..5b5ed930a71d2d0f0184e1a95390f239704844ed 100644 --- a/include/aidge/backend/cpu/operator/MatMulImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/MatMulImpl_kernels.hpp @@ -17,19 +17,24 @@ namespace Aidge { template <class I, class O> -void MatMulImpl_cpu_forward_kernel(const std::size_t n, const std::size_t k, const std::size_t m, - const void* input1_, const void* input2_, void* __restrict output_) { +void MatMulImpl_cpu_forward_kernel(const std::size_t n, + const std::size_t k, + const std::size_t m, + const void *input1_, + const void *input2_, + void *__restrict output_) { // FIXME: missing MatMul parameters as arguments - const I* input1 = static_cast<const I*>(input1_); - const I* input2 = static_cast<const I*>(input2_); - O* __restrict output = static_cast<O* __restrict>(output_); + const I *input1 = static_cast<const I *>(input1_); + const I *input2 = static_cast<const I *>(input2_); + O *__restrict output = static_cast<O *__restrict>(output_); std::memset(output, O(0), n * m * sizeof(O)); for (std::size_t i = 0; i < n; ++i) { for (std::size_t l = 0; l < k; ++l) { for (std::size_t j = 0; j < m; ++j) { - output[i*m + j] += static_cast<O>(input1[i*k + l] * input2[l*m + j]); + output[i * m + j] += + static_cast<O>(input1[i * k + l] * input2[l * m + j]); } } } @@ -37,14 +42,20 @@ void MatMulImpl_cpu_forward_kernel(const std::size_t n, const std::size_t k, con // Kernels registration to implementation entry point REGISTRAR(MatMulImpl_cpu, - {DataType::Float32}, - {ProdConso::defaultModel, Aidge::MatMulImpl_cpu_forward_kernel<float, float>, nullptr}); + {DataType::Float32}, + {ProdConso::defaultModel, + Aidge::MatMulImpl_cpu_forward_kernel<float, float>, + nullptr}); REGISTRAR(MatMulImpl_cpu, - {DataType::Float64}, - {ProdConso::defaultModel, Aidge::MatMulImpl_cpu_forward_kernel<double, double>, nullptr}); + {DataType::Float64}, + {ProdConso::defaultModel, + Aidge::MatMulImpl_cpu_forward_kernel<double, double>, + nullptr}); REGISTRAR(MatMulImpl_cpu, - {DataType::Int32}, - {ProdConso::defaultModel, Aidge::MatMulImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr}); -} // namespace Aidge + {DataType::Int32}, + {ProdConso::defaultModel, + Aidge::MatMulImpl_cpu_forward_kernel<int32_t, int32_t>, + nullptr}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_MATMULIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp index 68cc3621514de97d9837e10bcf90218abe559aaa..ceafebf4e6e1ff64ce144f8bbf0ceef88d150f88 100644 --- a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp +++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp @@ -17,25 +17,26 @@ #include <tuple> #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/MaxPooling.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { // Operator implementation entry point for the backend using MaxPooling2D_Op = MaxPooling_Op<2>; -using MaxPoolingImpl2D_cpu = OperatorImpl_cpu<MaxPooling_Op<2>, - void(const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 2>&, - const bool, - const std::array<DimSize_t, 4> &, - const void *, - void *)>; +using MaxPoolingImpl2D_cpu = + OperatorImpl_cpu<MaxPooling_Op<2>, + void(const std::array<DimSize_t, 2> &, + const std::array<DimSize_t, 2> &, + const bool, + const std::array<DimSize_t, 4> &, + const void *, + void *)>; // Implementation entry point registration to Operator REGISTRAR(MaxPooling2D_Op, "cpu", Aidge::MaxPoolingImpl2D_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp index 7b6f04f141eb701849a8d436561bcf9e37471cfa..0d853a010e141c7f77efd29dd42c610f6cfdcbf6 100644 --- a/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp @@ -16,8 +16,8 @@ #include <cmath> #include <tuple> -#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" #include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" #include "aidge/data/Data.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" @@ -33,24 +33,25 @@ namespace Aidge { * @param output_ Output Tensor. */ template <class I, class O> -void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, - const std::array<DimSize_t, 2>& kernelDims, - const bool /*ceilMode*/, - const std::array<DimSize_t, 4> &dims, - const void *input_, - void *output_) { +void MaxPoolingImpl2D_cpu_forward_kernel( + const std::array<DimSize_t, 2> &strideDims, + const std::array<DimSize_t, 2> &kernelDims, + const bool /*ceilMode*/, + const std::array<DimSize_t, 4> &dims, + const void *input_, + void *output_) { // FIXME: missing convolution parameters as arguments const I *input = static_cast<const I *>(input_); O *output = static_cast<O *>(output_); // output H size - const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) / - static_cast<float>(strideDims[0]))); + const std::size_t oxSize = static_cast<std::size_t>(std::floor( + static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) / + static_cast<float>(strideDims[0]))); // output W size - const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) / - static_cast<float>(strideDims[1]))); + const std::size_t oySize = static_cast<std::size_t>(std::floor( + static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) / + static_cast<float>(strideDims[1]))); // TODO: kernel computation // output (batch, outCh, Xout, Yout) @@ -60,17 +61,32 @@ void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD using signedsize = std::make_signed<std::size_t>::type; for (std::size_t batch = 0; batch < dims[0]; ++batch) { for (std::size_t ch = 0; ch < dims[1]; ++ch) { - const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize; - const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; + const std::size_t oIndex = + (ch + batch * dims[1]) * oxSize * oySize; + const std::size_t iIndex = + (ch + batch * dims[1]) * dims[2] * dims[3]; for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); - const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] : dims[2] + difx); + const signedsize difx = + static_cast<signedsize>(-ox * strideDims[0]); + const std::size_t sxMin = + static_cast<std::size_t>(std::max(difx, signedsize(0))); + const std::size_t sxMax = + (static_cast<signedsize>(dims[2]) + difx) < 0 + ? 0 + : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] + : dims[2] + difx); for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); - const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > kernelDims[1] ? kernelDims[1] : dims[3] + dify); - const std::size_t oIndexFull = oIndex + ox*oySize + oy; + const signedsize dify = + static_cast<signedsize>(-oy * strideDims[1]); + const std::size_t syMin = static_cast<std::size_t>( + std::max(dify, signedsize(0))); + const std::size_t syMax = + (static_cast<signedsize>(dims[3]) + dify) < 0 + ? 0 + : ((dims[3] + dify) > kernelDims[1] + ? kernelDims[1] + : dims[3] + dify); + const std::size_t oIndexFull = oIndex + ox * oySize + oy; const std::size_t ix = ox * strideDims[0]; const std::size_t iy = oy * strideDims[1]; @@ -78,11 +94,12 @@ void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD bool valid = false; for (unsigned int channel = 0; channel < dims[1]; - ++channel){ + ++channel) { for (unsigned int sy = syMin; sy < syMax; ++sy) { - for (unsigned int sx = sxMin; sx < sxMax; ++sx) - { - const I value = input[iIndex + (ix+sx)*dims[3] + (iy+sy)]; + for (unsigned int sx = sxMin; sx < sxMax; ++sx) { + const I value = + input[iIndex + (ix + sx) * dims[3] + + (iy + sy)]; if (!valid || value > poolValue) { poolValue = value; @@ -98,7 +115,7 @@ void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD } } -//N2D2 version +// N2D2 version /* template <class T> void N2D2::PoolCell_Frame_Kernels::forwardMax(const T* alpha, @@ -127,16 +144,13 @@ void N2D2::PoolCell_Frame_Kernels::forwardMax(const T* alpha, const unsigned int syMin = (unsigned int)std::max( desc.padding[1] - (int)(oy * desc.stride[1]), 0); const unsigned int sxMax = Utils::clamp - <int>(inputs.dimX() + desc.padding[0] - ox * desc.stride[0], - 0, - desc.pool[0]); - const unsigned int syMax = Utils::clamp - <int>(inputs.dimY() + desc.padding[1] - oy * desc.stride[1], - 0, - desc.pool[1]); + <int>(inputs.dimX() + desc.padding[0] - ox * +desc.stride[0], 0, desc.pool[0]); const unsigned int syMax = Utils::clamp + <int>(inputs.dimY() + desc.padding[1] - oy * +desc.stride[1], 0, desc.pool[1]); - const int ix = (int)(ox * desc.stride[0]) - desc.padding[0]; - const int iy = (int)(oy * desc.stride[1]) - desc.padding[1]; + const int ix = (int)(ox * desc.stride[0]) - +desc.padding[0]; const int iy = (int)(oy * desc.stride[1]) - desc.padding[1]; T poolValue(0.0); @@ -201,14 +215,20 @@ void N2D2::PoolCell_Frame_Kernels::forwardMax(const T* alpha, // Kernels registration to implementation entry point REGISTRAR(MaxPoolingImpl2D_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::MaxPoolingImpl2D_cpu_forward_kernel<float, float>, nullptr}); + {DataType::Float32}, + {ProdConso::inPlaceModel, + Aidge::MaxPoolingImpl2D_cpu_forward_kernel<float, float>, + nullptr}); REGISTRAR(MaxPoolingImpl2D_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::MaxPoolingImpl2D_cpu_forward_kernel<double, double>, nullptr}); + {DataType::Float64}, + {ProdConso::inPlaceModel, + Aidge::MaxPoolingImpl2D_cpu_forward_kernel<double, double>, + nullptr}); REGISTRAR(MaxPoolingImpl2D_cpu, - {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::MaxPoolingImpl2D_cpu_forward_kernel<int32_t, int32_t>, nullptr}); -} // namespace Aidge + {DataType::Int32}, + {ProdConso::inPlaceModel, + Aidge::MaxPoolingImpl2D_cpu_forward_kernel<int32_t, int32_t>, + nullptr}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/MulImpl.hpp b/include/aidge/backend/cpu/operator/MulImpl.hpp index 05fceba17471229d83d9f8738614b2e747121b49..806d73d9b3e2ec1681ccb04b6c7ef14a8f28bc1e 100644 --- a/include/aidge/backend/cpu/operator/MulImpl.hpp +++ b/include/aidge/backend/cpu/operator/MulImpl.hpp @@ -12,36 +12,36 @@ #ifndef AIDGE_CPU_OPERATOR_MULIMPL_H_ #define AIDGE_CPU_OPERATOR_MULIMPL_H_ +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Mul.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" #include <memory> #include <vector> namespace Aidge { // Operator implementation entry point for the backend using MulImpl_cpu = OperatorImpl_cpu<Mul_Op, - void(const std::vector<std::size_t>&, - const std::vector<std::size_t>&, - const std::vector<std::size_t>&, - const void*, - const void*, - void*), - void(const std::size_t, - const std::size_t, - const std::size_t, - const std::vector<std::size_t>, - const std::vector<std::size_t>, - const void*, - const void*, - const void*, - void*, - void*)>; + void(const std::vector<std::size_t> &, + const std::vector<std::size_t> &, + const std::vector<std::size_t> &, + const void *, + const void *, + void *), + void(const std::size_t, + const std::size_t, + const std::size_t, + const std::vector<std::size_t>, + const std::vector<std::size_t>, + const void *, + const void *, + const void *, + void *, + void *)>; // Implementation entry point registration to Operator REGISTRAR(Mul_Op, "cpu", Aidge::MulImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_MULIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp b/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp index c015b8f0182608fecd3da94220e9411decfd186c..d1e7caab3359198c87814d31efd906301a99c3bc 100644 --- a/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp @@ -14,73 +14,69 @@ #include "aidge/utils/Registrar.hpp" -#include <cstdint> // std::int32_t, std::int64_t +#include <cstdint> // std::int32_t, std::int64_t #include "aidge/backend/cpu/data/Broadcasting.hpp" #include "aidge/backend/cpu/operator/MulImpl.hpp" namespace Aidge { template <class I1, class I2, class O> -void MulImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims, - const std::vector<std::size_t>& input2Dims, - const std::vector<std::size_t>& outputDims, - const void* input1_, - const void* input2_, - void* output_) { +void MulImpl_cpu_forward_kernel(const std::vector<std::size_t> &input1Dims, + const std::vector<std::size_t> &input2Dims, + const std::vector<std::size_t> &outputDims, + const void *input1_, + const void *input2_, + void *output_) { - const I1* input_1 = static_cast<const I1*>(input1_); - const I2* input_2 = static_cast<const I2*>(input2_); - O* output = static_cast<O*>(output_); + const I1 *input_1 = static_cast<const I1 *>(input1_); + const I2 *input_2 = static_cast<const I2 *>(input2_); + O *output = static_cast<O *>(output_); size_t totalElements = 1; for (size_t dimSize : outputDims) { totalElements *= dimSize; } - for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) - { - std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex); + for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) { + std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex); - std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); - std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); + std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); + std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); output[oIndex] = input_1[idx1] * input_2[idx2]; } } template <class I1, class I2, class O> -void MulImpl_cpu_backward_kernel(const std::size_t input0Length, +void MulImpl_cpu_backward_kernel(const std::size_t input0Length, const std::size_t input1Length, const std::size_t grad0Length, const std::vector<std::size_t> input0Dims, const std::vector<std::size_t> input1Dims, - const void* input0_, - const void* input1_, - const void* grad_output_, - void* gradientInput0, - void* gradientInput1) -{ - const auto* input0 = static_cast<const I1*>(input0_); - const auto* input1 = static_cast<const I1*>(input1_); - const auto* grad_output = static_cast<const O*>(grad_output_); - auto* grad_input_0 = static_cast<I1*>(gradientInput0); - auto* grad_input_1 = static_cast<I2*>(gradientInput1); - - - if(input0Dims.size() >= input1Dims.size()) - { - AIDGE_ASSERT(input0Length == grad0Length, "Incorrect dimensions between Mul input and output tensors"); - - for(auto i = 0U; i < input0Length; ++i) - { + const void *input0_, + const void *input1_, + const void *grad_output_, + void *gradientInput0, + void *gradientInput1) { + const auto *input0 = static_cast<const I1 *>(input0_); + const auto *input1 = static_cast<const I1 *>(input1_); + const auto *grad_output = static_cast<const O *>(grad_output_); + auto *grad_input_0 = static_cast<I1 *>(gradientInput0); + auto *grad_input_1 = static_cast<I2 *>(gradientInput1); + + if (input0Dims.size() >= input1Dims.size()) { + AIDGE_ASSERT( + input0Length == grad0Length, + "Incorrect dimensions between Mul input and output tensors"); + + for (auto i = 0U; i < input0Length; ++i) { const auto indices = getMultiDimIndices(input1Dims, i); const auto flattenedIndex = getFlattenedIndex(input1Dims, indices); grad_input_0[i] = input1[flattenedIndex] * grad_output[i]; } - for(std::size_t i = 0 ; i < grad0Length; ++i) - { + for (std::size_t i = 0; i < grad0Length; ++i) { const auto indices = getMultiDimIndices(input1Dims, i); const auto flattenedIndex = getFlattenedIndex(input1Dims, indices); @@ -88,18 +84,18 @@ void MulImpl_cpu_backward_kernel(const std::size_t input0Length, } } else { - AIDGE_ASSERT(input1Length == grad0Length, "Incorrect dimensions between Mul input and output tensors"); + AIDGE_ASSERT( + input1Length == grad0Length, + "Incorrect dimensions between Mul input and output tensors"); - for(auto i = 0U; i < input1Length; ++i) - { + for (auto i = 0U; i < input1Length; ++i) { const auto indices = getMultiDimIndices(input0Dims, i); const auto flattenedIndex = getFlattenedIndex(input0Dims, indices); grad_input_1[i] = input0[flattenedIndex] * grad_output[i]; } - for(std::size_t i = 0 ; i < grad0Length; ++i) - { + for (std::size_t i = 0; i < grad0Length; ++i) { const auto indices = getMultiDimIndices(input0Dims, i); const auto flattenedIndex = getFlattenedIndex(input0Dims, indices); @@ -110,17 +106,33 @@ void MulImpl_cpu_backward_kernel(const std::size_t input0Length, // Kernels registration to implementation entry point REGISTRAR(MulImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<float, float, float>, Aidge::MulImpl_cpu_backward_kernel<float, float, float>}); + {DataType::Float32}, + {ProdConso::inPlaceModel, + Aidge::MulImpl_cpu_forward_kernel<float, float, float>, + Aidge::MulImpl_cpu_backward_kernel<float, float, float>}); REGISTRAR(MulImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<double, double, double>, Aidge::MulImpl_cpu_backward_kernel<double, double, double>}); + {DataType::Float64}, + {ProdConso::inPlaceModel, + Aidge::MulImpl_cpu_forward_kernel<double, double, double>, + Aidge::MulImpl_cpu_backward_kernel<double, double, double>}); REGISTRAR(MulImpl_cpu, - {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, Aidge::MulImpl_cpu_backward_kernel<std::int32_t, std::int32_t, std::int32_t>}); + {DataType::Int32}, + {ProdConso::inPlaceModel, + Aidge::MulImpl_cpu_forward_kernel<std::int32_t, + std::int32_t, + std::int32_t>, + Aidge::MulImpl_cpu_backward_kernel<std::int32_t, + std::int32_t, + std::int32_t>}); REGISTRAR(MulImpl_cpu, - {DataType::Int64}, - {ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>, Aidge::MulImpl_cpu_backward_kernel<std::int64_t, std::int64_t, std::int64_t>}); -} // namespace Aidge + {DataType::Int64}, + {ProdConso::inPlaceModel, + Aidge::MulImpl_cpu_forward_kernel<std::int64_t, + std::int64_t, + std::int64_t>, + Aidge::MulImpl_cpu_backward_kernel<std::int64_t, + std::int64_t, + std::int64_t>}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_MULIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/OperatorImpl.hpp b/include/aidge/backend/cpu/operator/OperatorImpl.hpp index 45f099e8140395181d8be1600c61024efaa9c6a7..f3cd825dc1b48c21f09f3e063922ce0b80424bce 100644 --- a/include/aidge/backend/cpu/operator/OperatorImpl.hpp +++ b/include/aidge/backend/cpu/operator/OperatorImpl.hpp @@ -12,9 +12,9 @@ #ifndef AIDGE_CPU_OPERATOR_IMPL_H_ #define AIDGE_CPU_OPERATOR_IMPL_H_ -#include <cstddef> // std::size_t +#include <cstddef> // std::size_t #include <memory> -#include <tuple> // std::tuple +#include <tuple> // std::tuple #include <vector> #include "aidge/backend/OperatorImpl.hpp" @@ -23,30 +23,36 @@ namespace Aidge { template <class Op, class FwdFunc, class BwdFunc = void()> -class OperatorImpl_cpu : public OperatorImpl, - public Registrable<OperatorImpl_cpu<Op, FwdFunc, BwdFunc>, ImplSpec, Impl<FwdFunc, BwdFunc>> -{ -public: - OperatorImpl_cpu(const Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<OperatorImpl_cpu<Op, FwdFunc, BwdFunc>> create(const Op& op) { +class OperatorImpl_cpu + : public OperatorImpl, + public Registrable<OperatorImpl_cpu<Op, FwdFunc, BwdFunc>, + ImplSpec, + Impl<FwdFunc, BwdFunc>> { + public: + OperatorImpl_cpu(const Op &op) : OperatorImpl(op, "cpu") {} + + static std::unique_ptr<OperatorImpl_cpu<Op, FwdFunc, BwdFunc>> + create(const Op &op) { return std::make_unique<OperatorImpl_cpu<Op, FwdFunc, BwdFunc>>(op); } virtual std::shared_ptr<ProdConso> getProdConso() const override { - const auto impl = Registrar<OperatorImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = Registrar<OperatorImpl_cpu>::create( + getBestMatch(getRequiredSpec())); return impl.prodConso(mOp); } virtual std::vector<ImplSpec> getAvailableImplSpecs() const override { - // return Registrar<OperatorImpl_cpu>::getKeys(); // Note: cannot return set due to python binding - std::set<ImplSpec> implSpecsSet = Registrar<OperatorImpl_cpu>::getKeys(); + // return Registrar<OperatorImpl_cpu>::getKeys(); // Note: cannot + // return set due to python binding + std::set<ImplSpec> implSpecsSet = + Registrar<OperatorImpl_cpu>::getKeys(); return std::vector<ImplSpec>(implSpecsSet.begin(), implSpecsSet.end()); } void forward() override; void backward() override; }; -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_IMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/PadImpl.hpp b/include/aidge/backend/cpu/operator/PadImpl.hpp index bc0bd8cad3b630b89f728d78b59652f31bbcf410..33f3b7aa6dfc9798b02779090c5e722340e3260c 100644 --- a/include/aidge/backend/cpu/operator/PadImpl.hpp +++ b/include/aidge/backend/cpu/operator/PadImpl.hpp @@ -17,46 +17,47 @@ #include <tuple> #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Pad.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { class Pad_ProdConso_cpu : public ProdConso { -public: - Pad_ProdConso_cpu(const Operator& op): ProdConso(op) {} + public: + Pad_ProdConso_cpu(const Operator &op) : ProdConso(op) {} - static std::unique_ptr<ProdConso> defaultModel(const Operator& op) { + static std::unique_ptr<ProdConso> defaultModel(const Operator &op) { return std::make_unique<Pad_ProdConso_cpu>(op); } - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + Elts_t + getNbRequiredProtected(const IOIndex_t inputIdx) const override final; }; // Operator implementation entry point for the backend using Pad1D_Op = Pad_Op<1>; using PadImpl1D_cpu = OperatorImpl_cpu<Pad_Op<1>, - void(const std::array<DimSize_t, 2>&, - const PadBorderType, - const double, - const std::array<DimSize_t, 3> &, - const void *, - void *)>; + void(const std::array<DimSize_t, 2> &, + const PadBorderType, + const double, + const std::array<DimSize_t, 3> &, + const void *, + void *)>; using Pad2D_Op = Pad_Op<2>; using PadImpl2D_cpu = OperatorImpl_cpu<Pad_Op<2>, - void(const std::array<DimSize_t, 4>&, - const PadBorderType, - const double, - const std::array<DimSize_t, 4> &, - const void *, - void *)>; + void(const std::array<DimSize_t, 4> &, + const PadBorderType, + const double, + const std::array<DimSize_t, 4> &, + const void *, + void *)>; // Implementation entry point registration to Operator REGISTRAR(Pad1D_Op, "cpu", Aidge::PadImpl1D_cpu::create); REGISTRAR(Pad2D_Op, "cpu", Aidge::PadImpl2D_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_PADIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp b/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp index 6d218cb1d719e8576f6c013ac5a1b9c60a739852..08cb58fd2d137d16028222a8698a6387a9d703f5 100644 --- a/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp @@ -12,10 +12,10 @@ #ifndef AIDGE_CPU_OPERATOR_PADIMPL_KERNELS_H_ #define AIDGE_CPU_OPERATOR_PADIMPL_KERNELS_H_ -#include <algorithm> // std::max, std::min +#include <algorithm> // std::max, std::min #include <array> -#include <cstddef> // std::size_t -#include <cstdint> // std::int32_t +#include <cstddef> // std::size_t +#include <cstdint> // std::int32_t #include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/utils/Registrar.hpp" @@ -32,22 +32,23 @@ namespace Aidge { * @param output_ Output Tensor. */ template <class I, class O> -void PadImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 2>& beginEndBorders, - const PadBorderType borderType, - const double borderValue, - const std::array<DimSize_t, 3>& dims, - const void *input_, - void *output_) -{ +void PadImpl1D_cpu_forward_kernel( + const std::array<DimSize_t, 2> &beginEndBorders, + const PadBorderType borderType, + const double borderValue, + const std::array<DimSize_t, 3> &dims, + const void *input_, + void *output_) { const I *input = static_cast<const I *>(input_); O *output = static_cast<O *>(output_); - const std::size_t oxSize = dims[2] + beginEndBorders[0] + beginEndBorders[1]; + const std::size_t oxSize = + dims[2] + beginEndBorders[0] + beginEndBorders[1]; for (std::size_t batch = 0; batch < dims[0]; ++batch) { for (std::size_t ch = 0; ch < dims[1]; ++ch) { - const std::size_t iIndex = (ch + batch*dims[1]) * dims[2]; - const std::size_t oIndex = (ch + batch*dims[1]) * oxSize; + const std::size_t iIndex = (ch + batch * dims[1]) * dims[2]; + const std::size_t oIndex = (ch + batch * dims[1]) * oxSize; for (unsigned int ox = 0; ox < oxSize; ++ox) { const std::size_t oIndexFull = oIndex + ox; @@ -55,19 +56,24 @@ void PadImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 2>& beginEndBorder O outputValue = static_cast<O>(borderValue); if (borderType == PadBorderType::Constant) { - int ix = static_cast<int>(ox) - static_cast<int>(beginEndBorders[0]); + int ix = static_cast<int>(ox) - + static_cast<int>(beginEndBorders[0]); - if (ix >= 0 && ix < static_cast<int>(dims[2])) { - outputValue = input[iIndex + static_cast<std::size_t>(ix)]; + if (ix >= 0 && ix < static_cast<int>(dims[2])) { + outputValue = + input[iIndex + static_cast<std::size_t>(ix)]; } - } - else if (borderType == PadBorderType::Edge) { - int ix = std::max(0, std::min(static_cast<int>(dims[2]) - 1, static_cast<int>(ox) - static_cast<int>(beginEndBorders[0]))); + } else if (borderType == PadBorderType::Edge) { + int ix = std::max( + 0, + std::min(static_cast<int>(dims[2]) - 1, + static_cast<int>(ox) - + static_cast<int>(beginEndBorders[0]))); outputValue = input[iIndex + static_cast<std::size_t>(ix)]; - } - else if (borderType == PadBorderType::Reflect) { - int ix = static_cast<int>(ox) - static_cast<int>(beginEndBorders[0]); + } else if (borderType == PadBorderType::Reflect) { + int ix = static_cast<int>(ox) - + static_cast<int>(beginEndBorders[0]); if (ix < 0) ix = 0 - ix; @@ -75,9 +81,11 @@ void PadImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 2>& beginEndBorder ix = static_cast<int>(dims[2]) - ix; outputValue = input[iIndex + static_cast<std::size_t>(ix)]; - } - else if (borderType == PadBorderType::Wrap) { - int ix = (static_cast<int>(dims[2]) + static_cast<int>(ox) - static_cast<int>(beginEndBorders[0])) % static_cast<int>(dims[2]); + } else if (borderType == PadBorderType::Wrap) { + int ix = + (static_cast<int>(dims[2]) + static_cast<int>(ox) - + static_cast<int>(beginEndBorders[0])) % + static_cast<int>(dims[2]); outputValue = input[iIndex + static_cast<std::size_t>(ix)]; } @@ -90,15 +98,26 @@ void PadImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 2>& beginEndBorder // Kernels registration to implementation entry point REGISTRAR(PadImpl1D_cpu, - {{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, - {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float32>, cpptype_t<DataType::Float32>>, nullptr}); + {{DataType::Float32, DataFormat::NCHW}, + {DataType::Float32, DataFormat::NCHW}}, + {Pad_ProdConso_cpu::defaultModel, + Aidge::PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float32>, + cpptype_t<DataType::Float32>>, + nullptr}); REGISTRAR(PadImpl1D_cpu, - {{DataType::Float64, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, - {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float64>, cpptype_t<DataType::Float64>>, nullptr}); + {{DataType::Float64, DataFormat::NCHW}, + {DataType::Float64, DataFormat::NCHW}}, + {Pad_ProdConso_cpu::defaultModel, + Aidge::PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float64>, + cpptype_t<DataType::Float64>>, + nullptr}); REGISTRAR(PadImpl1D_cpu, - {{DataType::Int32, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, - {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Int32>, cpptype_t<DataType::Int32>>, nullptr}); - + {{DataType::Int32, DataFormat::NCHW}, + {DataType::Int32, DataFormat::NCHW}}, + {Pad_ProdConso_cpu::defaultModel, + Aidge::PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Int32>, + cpptype_t<DataType::Int32>>, + nullptr}); /** * @brief Forward kernel for 2D Padding on CPU backend. @@ -110,47 +129,76 @@ REGISTRAR(PadImpl1D_cpu, * @param output_ Output Tensor. */ template <class I, class O> -void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorders, - const PadBorderType borderType, - const double borderValue, - const std::array<DimSize_t, 4> &dims, - const void *input_, - void *output_) -{ +void PadImpl2D_cpu_forward_kernel( + const std::array<DimSize_t, 4> &beginEndBorders, + const PadBorderType borderType, + const double borderValue, + const std::array<DimSize_t, 4> &dims, + const void *input_, + void *output_) { const I *input = static_cast<const I *>(input_); O *output = static_cast<O *>(output_); - const std::size_t oySize = dims[2] + beginEndBorders[0] + beginEndBorders[2]; - const std::size_t oxSize = dims[3] + beginEndBorders[1] + beginEndBorders[3]; + const std::size_t oySize = + dims[2] + beginEndBorders[0] + beginEndBorders[2]; + const std::size_t oxSize = + dims[3] + beginEndBorders[1] + beginEndBorders[3]; for (std::size_t batch = 0; batch < dims[0]; ++batch) { for (std::size_t ch = 0; ch < dims[1]; ++ch) { - const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; - const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize; + const std::size_t iIndex = + (ch + batch * dims[1]) * dims[2] * dims[3]; + const std::size_t oIndex = + (ch + batch * dims[1]) * oxSize * oySize; for (std::uint32_t oy = 0; oy < oySize; ++oy) { for (std::uint32_t ox = 0; ox < oxSize; ++ox) { - const std::size_t oIndexFull = oIndex + oy*oxSize + ox; + const std::size_t oIndexFull = oIndex + oy * oxSize + ox; O outputValue = static_cast<O>(borderValue); if (borderType == PadBorderType::Constant) { - std::int32_t ix = static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[1]); - std::int32_t iy = static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[0]); + std::int32_t ix = + static_cast<std::int32_t>(ox) - + static_cast<std::int32_t>(beginEndBorders[1]); + std::int32_t iy = + static_cast<std::int32_t>(oy) - + static_cast<std::int32_t>(beginEndBorders[0]); - if (ix >= 0 && ix < static_cast<std::int32_t>(dims[3]) && iy >= 0 && iy < static_cast<std::int32_t>(dims[2])) { - outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)]; + if (ix >= 0 && + ix < static_cast<std::int32_t>(dims[3]) && + iy >= 0 && + iy < static_cast<std::int32_t>(dims[2])) { + outputValue = + input[iIndex + + static_cast<std::size_t>(iy) * dims[3] + + static_cast<std::size_t>(ix)]; } - } - else if (borderType == PadBorderType::Edge) { - std::int32_t ix = std::max(0, std::min(static_cast<std::int32_t>(dims[3]) - 1, static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[1]))); - std::int32_t iy = std::max(0, std::min(static_cast<std::int32_t>(dims[2]) - 1, static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[0]))); + } else if (borderType == PadBorderType::Edge) { + std::int32_t ix = std::max( + 0, + std::min(static_cast<std::int32_t>(dims[3]) - 1, + static_cast<std::int32_t>(ox) - + static_cast<std::int32_t>( + beginEndBorders[1]))); + std::int32_t iy = std::max( + 0, + std::min(static_cast<std::int32_t>(dims[2]) - 1, + static_cast<std::int32_t>(oy) - + static_cast<std::int32_t>( + beginEndBorders[0]))); - outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)]; - } - else if (borderType == PadBorderType::Reflect) { - std::int32_t ix = static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[1]); - std::int32_t iy = static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[0]); + outputValue = + input[iIndex + + static_cast<std::size_t>(iy) * dims[3] + + static_cast<std::size_t>(ix)]; + } else if (borderType == PadBorderType::Reflect) { + std::int32_t ix = + static_cast<std::int32_t>(ox) - + static_cast<std::int32_t>(beginEndBorders[1]); + std::int32_t iy = + static_cast<std::int32_t>(oy) - + static_cast<std::int32_t>(beginEndBorders[0]); if (ix < 0) ix = 0 - ix; @@ -161,13 +209,26 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder if (iy >= static_cast<std::int32_t>(dims[2])) iy = static_cast<std::int32_t>(dims[2]) - iy; - outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)]; - } - else if (borderType == PadBorderType::Wrap) { - std::int32_t ix = (static_cast<std::int32_t>(dims[3]) + static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[1])) % static_cast<std::int32_t>(dims[3]); - std::int32_t iy = (static_cast<std::int32_t>(dims[2]) + static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[0])) % static_cast<std::int32_t>(dims[2]); + outputValue = + input[iIndex + + static_cast<std::size_t>(iy) * dims[3] + + static_cast<std::size_t>(ix)]; + } else if (borderType == PadBorderType::Wrap) { + std::int32_t ix = + (static_cast<std::int32_t>(dims[3]) + + static_cast<std::int32_t>(ox) - + static_cast<std::int32_t>(beginEndBorders[1])) % + static_cast<std::int32_t>(dims[3]); + std::int32_t iy = + (static_cast<std::int32_t>(dims[2]) + + static_cast<std::int32_t>(oy) - + static_cast<std::int32_t>(beginEndBorders[0])) % + static_cast<std::int32_t>(dims[2]); - outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)]; + outputValue = + input[iIndex + + static_cast<std::size_t>(iy) * dims[3] + + static_cast<std::size_t>(ix)]; } output[oIndexFull] = outputValue; @@ -179,14 +240,26 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder // Kernels registration to implementation entry point REGISTRAR(PadImpl2D_cpu, - {{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, - {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl2D_cpu_forward_kernel<cpptype_t<DataType::Float32>, cpptype_t<DataType::Float32>>, nullptr}); + {{DataType::Float32, DataFormat::NCHW}, + {DataType::Float32, DataFormat::NCHW}}, + {Pad_ProdConso_cpu::defaultModel, + Aidge::PadImpl2D_cpu_forward_kernel<cpptype_t<DataType::Float32>, + cpptype_t<DataType::Float32>>, + nullptr}); REGISTRAR(PadImpl2D_cpu, - {{DataType::Float64, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, - {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl2D_cpu_forward_kernel<cpptype_t<DataType::Float64>, cpptype_t<DataType::Float64>>, nullptr}); + {{DataType::Float64, DataFormat::NCHW}, + {DataType::Float64, DataFormat::NCHW}}, + {Pad_ProdConso_cpu::defaultModel, + Aidge::PadImpl2D_cpu_forward_kernel<cpptype_t<DataType::Float64>, + cpptype_t<DataType::Float64>>, + nullptr}); REGISTRAR(PadImpl2D_cpu, - {{DataType::Int32, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, - {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl2D_cpu_forward_kernel<cpptype_t<DataType::Int32>, cpptype_t<DataType::Int32>>, nullptr}); -} // namespace Aidge + {{DataType::Int32, DataFormat::NCHW}, + {DataType::Int32, DataFormat::NCHW}}, + {Pad_ProdConso_cpu::defaultModel, + Aidge::PadImpl2D_cpu_forward_kernel<cpptype_t<DataType::Int32>, + cpptype_t<DataType::Int32>>, + nullptr}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_PADIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/PowImpl.hpp b/include/aidge/backend/cpu/operator/PowImpl.hpp index cfbb8173d1f83162519016a8f2b3c3166977a5b7..973fe7fe553d60cca81f0e028bd26d58a2084cac 100644 --- a/include/aidge/backend/cpu/operator/PowImpl.hpp +++ b/include/aidge/backend/cpu/operator/PowImpl.hpp @@ -12,23 +12,34 @@ #ifndef AIDGE_CPU_OPERATOR_POWIMPL_H_ #define AIDGE_CPU_OPERATOR_POWIMPL_H_ +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Pow.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" #include <memory> #include <vector> namespace Aidge { // Operator implementation entry point for the backend using PowImpl_cpu = OperatorImpl_cpu<Pow_Op, - void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*), - void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, const void*, void*, void*)>; - + void(const std::vector<std::size_t> &, + const std::vector<std::size_t> &, + const std::vector<std::size_t> &, + const void *, + const void *, + void *), + void(const std::vector<std::size_t> &, + const std::vector<std::size_t> &, + const std::vector<std::size_t> &, + const void *, + const void *, + const void *, + void *, + void *)>; // Implementation entry point registration to Operator REGISTRAR(Pow_Op, "cpu", Aidge::PowImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_POWIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp b/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp index ab9b2ccc7b823842decd044b90a5c6364cedc9c9..78ca9a3086f34fd248cd4b3eb444184aedfa90b0 100644 --- a/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp @@ -20,76 +20,100 @@ namespace Aidge { template <class I1, class I2, class O> -void PowImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims, - const std::vector<std::size_t>& input2Dims, - const std::vector<std::size_t>& outputDims, - const void* input1_, - const void* input2_, - void* output_) { +void PowImpl_cpu_forward_kernel(const std::vector<std::size_t> &input1Dims, + const std::vector<std::size_t> &input2Dims, + const std::vector<std::size_t> &outputDims, + const void *input1_, + const void *input2_, + void *output_) { - const I1* input_1 = static_cast<const I1*>(input1_); - const I2* input_2 = static_cast<const I2*>(input2_); - O* output = static_cast<O*>(output_); + const I1 *input_1 = static_cast<const I1 *>(input1_); + const I2 *input_2 = static_cast<const I2 *>(input2_); + O *output = static_cast<O *>(output_); - std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>()); - for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) - { - std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, oIndex); + std::size_t totalElements = + std::accumulate(outputDims.cbegin(), + outputDims.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); + for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) { + std::vector<std::size_t> indexes = + getMultiDimIndices(outputDims, oIndex); + + std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); + std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); - std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); - std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); - output[oIndex] = std::pow(input_1[idx1], input_2[idx2]); - } + } } template <class I1, class I2, class O> -void PowImpl_cpu_backward_kernel(const std::vector<std::size_t>& input0Dims, - const std::vector<std::size_t>& input1Dims, - const std::vector<std::size_t>& outputDims, - const void* input0_, - const void* input1_, - const void* gradOutput_, - void* gradientInput0_, - void* gradientInput1_) { - const I1* input0 = static_cast<const I1*>(input0_); - I1* grad0 = static_cast<I1*>(gradientInput0_); - const I2* input1 = static_cast<const I2*>(input1_); - I2* grad1 = static_cast<I2*>(gradientInput1_); - const O* gradOut = static_cast<const O*>(gradOutput_); +void PowImpl_cpu_backward_kernel(const std::vector<std::size_t> &input0Dims, + const std::vector<std::size_t> &input1Dims, + const std::vector<std::size_t> &outputDims, + const void *input0_, + const void *input1_, + const void *gradOutput_, + void *gradientInput0_, + void *gradientInput1_) { + const I1 *input0 = static_cast<const I1 *>(input0_); + I1 *grad0 = static_cast<I1 *>(gradientInput0_); + const I2 *input1 = static_cast<const I2 *>(input1_); + I2 *grad1 = static_cast<I2 *>(gradientInput1_); + const O *gradOut = static_cast<const O *>(gradOutput_); // Fill input grads with zeros - std::size_t input0Elements = std::accumulate(input0Dims.cbegin(), input0Dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); - std::fill(grad0, grad0 + input0Elements, I1(0)); - std::size_t input1Elements = std::accumulate(input1Dims.cbegin(), input1Dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); - std::fill(grad1, grad1 + input1Elements, I2(0)); + std::size_t input0Elements = + std::accumulate(input0Dims.cbegin(), + input0Dims.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); + std::fill(grad0, grad0 + input0Elements, I1(0)); + std::size_t input1Elements = + std::accumulate(input1Dims.cbegin(), + input1Dims.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); + std::fill(grad1, grad1 + input1Elements, I2(0)); - std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>()); - for (size_t oIndex = 0; oIndex < totalElements; ++oIndex) - { + std::size_t totalElements = + std::accumulate(outputDims.cbegin(), + outputDims.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); + for (size_t oIndex = 0; oIndex < totalElements; ++oIndex) { // Compute indexes in inputs 0 and 1 to support broadcasting - std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, oIndex); + std::vector<std::size_t> indexes = + getMultiDimIndices(outputDims, oIndex); std::size_t idx0 = getFlattenedIndex(input0Dims, indexes); std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); // grad0 = grad_output * (input1 * pow(input0, (input1 -1))) - grad0[idx0] += gradOut[oIndex]*input1[idx1]* std::pow(input0[idx0], input1[idx1]-1); + grad0[idx0] += gradOut[oIndex] * input1[idx1] * + std::pow(input0[idx0], input1[idx1] - 1); // grad1 = grad_output * (output * ln(input0)) - grad1[idx1] += gradOut[oIndex] * std::pow(input0[idx0], input1[idx1]) * std::log(input0[idx0]); + grad1[idx1] += gradOut[oIndex] * std::pow(input0[idx0], input1[idx1]) * + std::log(input0[idx0]); } } // Kernels registration to implementation entry point REGISTRAR(PowImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<float, float, float>, Aidge::PowImpl_cpu_backward_kernel<float, float, float>}); + {DataType::Float32}, + {ProdConso::inPlaceModel, + Aidge::PowImpl_cpu_forward_kernel<float, float, float>, + Aidge::PowImpl_cpu_backward_kernel<float, float, float>}); REGISTRAR(PowImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<double, double, double>, Aidge::PowImpl_cpu_backward_kernel<double, double, double>}); + {DataType::Float64}, + {ProdConso::inPlaceModel, + Aidge::PowImpl_cpu_forward_kernel<double, double, double>, + Aidge::PowImpl_cpu_backward_kernel<double, double, double>}); REGISTRAR(PowImpl_cpu, - {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<int32_t, int32_t, int32_t>, Aidge::PowImpl_cpu_backward_kernel<int32_t, int32_t, int32_t>}); -} // namespace Aidge + {DataType::Int32}, + {ProdConso::inPlaceModel, + Aidge::PowImpl_cpu_forward_kernel<int32_t, int32_t, int32_t>, + Aidge::PowImpl_cpu_backward_kernel<int32_t, int32_t, int32_t>}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_POWIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/ReLUImpl.hpp b/include/aidge/backend/cpu/operator/ReLUImpl.hpp index 5b900618abce83ff1c3822d4f61cc62c93f5081f..366b81f20cc1ac3ea7757a9edebc73267b46c661 100644 --- a/include/aidge/backend/cpu/operator/ReLUImpl.hpp +++ b/include/aidge/backend/cpu/operator/ReLUImpl.hpp @@ -12,9 +12,9 @@ #ifndef AIDGE_CPU_OPERATOR_RELUIMPL_H_ #define AIDGE_CPU_OPERATOR_RELUIMPL_H_ -#include <cstddef> // std::size_t +#include <cstddef> // std::size_t #include <memory> -#include <tuple> // std::tuple +#include <tuple> // std::tuple #include <vector> #include "aidge/backend/cpu/operator/OperatorImpl.hpp" @@ -24,12 +24,13 @@ namespace Aidge { // Operator implementation entry point for the backend -using ReLUImpl_cpu = OperatorImpl_cpu<ReLU_Op, - void(const std::size_t, const void*, void*), - void(const std::size_t, const void*, const void*, void*)>; +using ReLUImpl_cpu = OperatorImpl_cpu< + ReLU_Op, + void(const std::size_t, const void *, void *), + void(const std::size_t, const void *, const void *, void *)>; // Implementation entry point registration to Operator REGISTRAR(ReLU_Op, "cpu", Aidge::ReLUImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_RELUIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp index e39e9b7decd91e392c5db7e9e9bc4ed0f366829d..246063a7295b0c31f1598700fe7da0d641ec1e46 100644 --- a/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp @@ -12,9 +12,9 @@ #ifndef AIDGE_CPU_OPERATOR_RELUIMPL_KERNELS_H_ #define AIDGE_CPU_OPERATOR_RELUIMPL_KERNELS_H_ -#include <cstddef> // std::size_t +#include <cstddef> // std::size_t #include <memory> -#include <tuple> // std::tuple +#include <tuple> // std::tuple #include <vector> #include "aidge/backend/cpu/operator/OperatorImpl.hpp" @@ -27,13 +27,13 @@ namespace Aidge { // Kernels template <class I, class O> void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght, - const void* input_, - void* output_) { + const void *input_, + void *output_) { - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); -//#pragma omp parallel for if (inputLenght > 1024) + // #pragma omp parallel for if (inputLenght > 1024) for (std::size_t i = 0; i < inputLenght; ++i) { output[i] = (input[i] > 0) ? input[i] : 0; } @@ -41,11 +41,12 @@ void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght, template <class I, class GI, class GO> void ReLUImpl_cpu_backward_kernel(const std::size_t inputLenght, - const void* input_, const void* grad_output_, - void* grad_input_) { - const I* input = static_cast<const I*>(input_); - const GO* grad_output = static_cast<const GO*>(grad_output_); - GI* grad_input = static_cast<GI*>(grad_input_); + const void *input_, + const void *grad_output_, + void *grad_input_) { + const I *input = static_cast<const I *>(input_); + const GO *grad_output = static_cast<const GO *>(grad_output_); + GI *grad_input = static_cast<GI *>(grad_input_); for (std::size_t i = 0; i < inputLenght; ++i) { grad_input[i] = (input[i] > 0) ? grad_output[i] : 0; } @@ -53,14 +54,20 @@ void ReLUImpl_cpu_backward_kernel(const std::size_t inputLenght, // Kernels registration to implementation entry point REGISTRAR(ReLUImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<float, float>, Aidge::ReLUImpl_cpu_backward_kernel<float, float, float>}); + {DataType::Float32}, + {ProdConso::inPlaceModel, + Aidge::ReLUImpl_cpu_forward_kernel<float, float>, + Aidge::ReLUImpl_cpu_backward_kernel<float, float, float>}); REGISTRAR(ReLUImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<double, double>, Aidge::ReLUImpl_cpu_backward_kernel<double, double, double>}); + {DataType::Float64}, + {ProdConso::inPlaceModel, + Aidge::ReLUImpl_cpu_forward_kernel<double, double>, + Aidge::ReLUImpl_cpu_backward_kernel<double, double, double>}); REGISTRAR(ReLUImpl_cpu, - {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<int32_t, int32_t>, Aidge::ReLUImpl_cpu_backward_kernel<int32_t, int32_t, int32_t>}); -} // namespace Aidge + {DataType::Int32}, + {ProdConso::inPlaceModel, + Aidge::ReLUImpl_cpu_forward_kernel<int32_t, int32_t>, + Aidge::ReLUImpl_cpu_backward_kernel<int32_t, int32_t, int32_t>}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_RELUIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp index 1c50805d5af768dfc160488fda1e8fadfa798454..97f0cedc268fa825c068b90513a3bc8fc66d6532 100644 --- a/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp +++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp @@ -24,15 +24,16 @@ namespace Aidge { // Operator implementation entry point for the backend -using ReduceMeanImpl_cpu = OperatorImpl_cpu<ReduceMean_Op, - void(const std::vector<std::int32_t>&, - DimSize_t, - const std::vector<DimSize_t>&, - const void *, - void *)>; +using ReduceMeanImpl_cpu = + OperatorImpl_cpu<ReduceMean_Op, + void(const std::vector<std::int32_t> &, + DimSize_t, + const std::vector<DimSize_t> &, + const void *, + void *)>; // Implementation entry point registration to Operator REGISTRAR(ReduceMean_Op, "cpu", Aidge::ReduceMeanImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp index 5a143164d7e4fa2585ea72c38eaaa123f215d21a..339db53ad260f8b56fa2dbda71b8ce18460724c1 100644 --- a/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp @@ -12,11 +12,11 @@ #ifndef AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_KERNELS_H_ #define AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_KERNELS_H_ -#include <algorithm> // std::for_each -#include <cstddef> // std::size_t -#include <cstdint> // std::int32_t -#include <functional> //std::multiplies -#include <numeric> //std::accumulate +#include <algorithm> // std::for_each +#include <cstddef> // std::size_t +#include <cstdint> // std::int32_t +#include <functional> //std::multiplies +#include <numeric> //std::accumulate #include <vector> #include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp" @@ -26,24 +26,35 @@ namespace Aidge { template <class I, class O> -void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes, - DimSize_t /*keepDims*/, - const std::vector<DimSize_t>& inputDims, - const void* input_, - void* output_) { +void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t> &axes, + DimSize_t /*keepDims*/, + const std::vector<DimSize_t> &inputDims, + const void *input_, + void *output_) { - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); const std::size_t nb_dims = inputDims.size(); - const std::size_t totalElements = std::accumulate(inputDims.cbegin(), inputDims.cend(), 1, std::multiplies<std::size_t>()); + const std::size_t totalElements = + std::accumulate(inputDims.cbegin(), + inputDims.cend(), + 1, + std::multiplies<std::size_t>()); - if (axes.empty()){ - std::copy_n(input,totalElements, output); - } - else if (axes.size() == 1) { - const std::size_t stride_pre = std::accumulate(inputDims.cbegin(), inputDims.cbegin() + axes[0], 1, std::multiplies<std::size_t>()); - const std::size_t stride_post = std::accumulate(inputDims.crbegin(), inputDims.crbegin() + nb_dims -1 - axes[0], 1, std::multiplies<std::size_t>()); + if (axes.empty()) { + std::copy_n(input, totalElements, output); + } else if (axes.size() == 1) { + const std::size_t stride_pre = + std::accumulate(inputDims.cbegin(), + inputDims.cbegin() + axes[0], + 1, + std::multiplies<std::size_t>()); + const std::size_t stride_post = + std::accumulate(inputDims.crbegin(), + inputDims.crbegin() + nb_dims - 1 - axes[0], + 1, + std::multiplies<std::size_t>()); const std::size_t dim_i = inputDims[axes[0]]; for (std::size_t pre = 0; pre < stride_pre; ++pre) { @@ -53,54 +64,69 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes, O mean = 0; for (std::size_t i = 0; i < dim_i; ++i) { // Single pass numerically stable mean, using the fmaf - mean = fmaf(input[idx_i + i*stride_post] - mean, 1.0f/(i+1), mean); + mean = fmaf(input[idx_i + i * stride_post] - mean, + 1.0f / (i + 1), + mean); } - output[idx_o] = mean; + output[idx_o] = mean; } } } else { std::size_t outputElements = totalElements; - auto stride_post = std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]); + auto stride_post = + std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]); stride_post[nb_dims - 1] = 1; - for (std::size_t i = nb_dims-2; i != static_cast<std::size_t>(-1); --i) { - stride_post[i] = stride_post[i+1]*inputDims[i+1]; + for (std::size_t i = nb_dims - 2; i != static_cast<std::size_t>(-1); + --i) { + stride_post[i] = stride_post[i + 1] * inputDims[i + 1]; } - auto stride_pre = std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]); + auto stride_pre = + std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]); stride_pre[0] = 1; for (std::size_t i = 1; i < nb_dims; ++i) { - stride_pre[i] = stride_pre[i-1]*inputDims[i-1]; + stride_pre[i] = stride_pre[i - 1] * inputDims[i - 1]; } - const I* inputAccumulation = input; - I* outputAccumulation = nullptr; + const I *inputAccumulation = input; + I *outputAccumulation = nullptr; - for (const auto& axisInt : axes) { + for (const auto &axisInt : axes) { const std::size_t a = static_cast<std::size_t>(axisInt); outputElements /= inputDims[a]; outputAccumulation = new I[outputElements]; const std::size_t dim_i = inputDims[a]; for (std::size_t pre = 0; pre < stride_pre[a]; ++pre) { for (std::size_t post = 0; post < stride_post[a]; ++post) { - const std::size_t idx_i = pre * dim_i * stride_post[a] + post; + const std::size_t idx_i = + pre * dim_i * stride_post[a] + post; const std::size_t idx_o = pre * stride_post[a] + post; I mean = 0; for (std::size_t i = 0; i < dim_i; ++i) { // Single pass numerically stable mean, using the fmaf - mean = fmaf(inputAccumulation[idx_i + i*stride_post[a]] - mean, 1.0f/(i+1), mean); + mean = fmaf( + inputAccumulation[idx_i + i * stride_post[a]] - + mean, + 1.0f / (i + 1), + mean); } outputAccumulation[idx_o] = mean; } } - std::for_each(stride_pre.get()+a+1, stride_pre.get()+nb_dims, [dim_i] (std::size_t& val) { val /= dim_i; }); + std::for_each(stride_pre.get() + a + 1, + stride_pre.get() + nb_dims, + [dim_i](std::size_t &val) { val /= dim_i; }); if (inputAccumulation != input) { delete[] inputAccumulation; } inputAccumulation = outputAccumulation; } - // Copy elements from inputAccumulation to output while dividing by divisor - std::copy(inputAccumulation, inputAccumulation + outputElements, output); + // Copy elements from inputAccumulation to output while dividing by + // divisor + std::copy(inputAccumulation, + inputAccumulation + outputElements, + output); if (outputAccumulation) { delete[] outputAccumulation; } @@ -109,14 +135,20 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes, // Kernels registration to implementation entry point REGISTRAR(ReduceMeanImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float>, nullptr}); + {DataType::Float32}, + {ProdConso::inPlaceModel, + Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float>, + nullptr}); REGISTRAR(ReduceMeanImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double>, nullptr}); + {DataType::Float64}, + {ProdConso::inPlaceModel, + Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double>, + nullptr}); REGISTRAR(ReduceMeanImpl_cpu, - {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::ReduceMeanImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr}); -} // namespace Aidge + {DataType::Int32}, + {ProdConso::inPlaceModel, + Aidge::ReduceMeanImpl_cpu_forward_kernel<int32_t, int32_t>, + nullptr}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/ReduceSumImpl.hpp b/include/aidge/backend/cpu/operator/ReduceSumImpl.hpp index 4138c62c24149c15cfad5e85e8f50889b2b6a433..7e01ef5a55c088d5f8e674c9142ea3a92b553927 100644 --- a/include/aidge/backend/cpu/operator/ReduceSumImpl.hpp +++ b/include/aidge/backend/cpu/operator/ReduceSumImpl.hpp @@ -24,15 +24,16 @@ namespace Aidge { // Operator implementation entry point for the backend -using ReduceSumImpl_cpu = OperatorImpl_cpu<ReduceSum_Op, - void(const std::vector<std::int32_t>&, - DimSize_t, - const std::vector<DimSize_t>&, - const void *, - void *)>; +using ReduceSumImpl_cpu = + OperatorImpl_cpu<ReduceSum_Op, + void(const std::vector<std::int32_t> &, + DimSize_t, + const std::vector<DimSize_t> &, + const void *, + void *)>; // Implementation entry point registration to Operator REGISTRAR(ReduceSum_Op, "cpu", Aidge::ReduceSumImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_REDUCESUMIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ReduceSumImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ReduceSumImpl_kernels.hpp index 72671421796a0d5e799e6f762dfcaf02457220f3..773d099ec7c2747a2d6fcf5c0b6e95c03d5ad605 100644 --- a/include/aidge/backend/cpu/operator/ReduceSumImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ReduceSumImpl_kernels.hpp @@ -12,11 +12,11 @@ #ifndef AIDGE_CPU_OPERATOR_REDUCESUMIMPL_KERNELS_H_ #define AIDGE_CPU_OPERATOR_REDUCESUMIMPL_KERNELS_H_ -#include <algorithm> // std::for_each -#include <cstddef> // std::size_t -#include <cstdint> // std::int32_t -#include <functional> //std::multiplies -#include <numeric> //std::accumulate +#include <algorithm> // std::for_each +#include <cstddef> // std::size_t +#include <cstdint> // std::int32_t +#include <functional> //std::multiplies +#include <numeric> //std::accumulate #include <vector> #include "aidge/backend/cpu/operator/ReduceSumImpl.hpp" @@ -26,24 +26,35 @@ namespace Aidge { template <class I, class O> -void ReduceSumImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes, - DimSize_t /*keepDims*/, - const std::vector<DimSize_t>& inputDims, - const void* input_, - void* output_) { +void ReduceSumImpl_cpu_forward_kernel(const std::vector<std::int32_t> &axes, + DimSize_t /*keepDims*/, + const std::vector<DimSize_t> &inputDims, + const void *input_, + void *output_) { - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); const std::size_t nb_dims = inputDims.size(); - const std::size_t totalElements = std::accumulate(inputDims.cbegin(), inputDims.cend(), 1, std::multiplies<std::size_t>()); + const std::size_t totalElements = + std::accumulate(inputDims.cbegin(), + inputDims.cend(), + 1, + std::multiplies<std::size_t>()); - if (axes.empty()){ - std::copy_n(input,totalElements, output); - } - else if (axes.size() == 1) { - const std::size_t stride_pre = std::accumulate(inputDims.cbegin(), inputDims.cbegin() + axes[0], 1, std::multiplies<std::size_t>()); - const std::size_t stride_post = std::accumulate(inputDims.crbegin(), inputDims.crbegin() + nb_dims -1 - axes[0], 1, std::multiplies<std::size_t>()); + if (axes.empty()) { + std::copy_n(input, totalElements, output); + } else if (axes.size() == 1) { + const std::size_t stride_pre = + std::accumulate(inputDims.cbegin(), + inputDims.cbegin() + axes[0], + 1, + std::multiplies<std::size_t>()); + const std::size_t stride_post = + std::accumulate(inputDims.crbegin(), + inputDims.crbegin() + nb_dims - 1 - axes[0], + 1, + std::multiplies<std::size_t>()); const std::size_t dim_i = inputDims[axes[0]]; for (std::size_t pre = 0; pre < stride_pre; ++pre) { @@ -52,53 +63,62 @@ void ReduceSumImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes, const std::size_t idx_o = pre * stride_post + post; O sum = 0; for (std::size_t i = 0; i < dim_i; ++i) { - sum +=input[idx_i + i*stride_post]; + sum += input[idx_i + i * stride_post]; } - output[idx_o] = sum; + output[idx_o] = sum; } } } else { std::size_t outputElements = totalElements; - auto stride_post = std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]); + auto stride_post = + std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]); stride_post[nb_dims - 1] = 1; - for (std::size_t i = nb_dims-2; i != static_cast<std::size_t>(-1); --i) { - stride_post[i] = stride_post[i+1]*inputDims[i+1]; + for (std::size_t i = nb_dims - 2; i != static_cast<std::size_t>(-1); + --i) { + stride_post[i] = stride_post[i + 1] * inputDims[i + 1]; } - auto stride_pre = std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]); + auto stride_pre = + std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]); stride_pre[0] = 1; for (std::size_t i = 1; i < nb_dims; ++i) { - stride_pre[i] = stride_pre[i-1]*inputDims[i-1]; + stride_pre[i] = stride_pre[i - 1] * inputDims[i - 1]; } - const I* inputAccumulation = input; - I* outputAccumulation = nullptr; + const I *inputAccumulation = input; + I *outputAccumulation = nullptr; - for (const auto& axisInt : axes) { + for (const auto &axisInt : axes) { const std::size_t a = static_cast<std::size_t>(axisInt); outputElements /= inputDims[a]; outputAccumulation = new I[outputElements]; const std::size_t dim_i = inputDims[a]; for (std::size_t pre = 0; pre < stride_pre[a]; ++pre) { for (std::size_t post = 0; post < stride_post[a]; ++post) { - const std::size_t idx_i = pre * dim_i * stride_post[a] + post; + const std::size_t idx_i = + pre * dim_i * stride_post[a] + post; const std::size_t idx_o = pre * stride_post[a] + post; I sum = 0; for (std::size_t i = 0; i < dim_i; ++i) { - sum += inputAccumulation[idx_i + i*stride_post[a]]; + sum += inputAccumulation[idx_i + i * stride_post[a]]; } outputAccumulation[idx_o] = sum; } } - std::for_each(stride_pre.get()+a+1, stride_pre.get()+nb_dims, [dim_i] (std::size_t& val) { val /= dim_i; }); + std::for_each(stride_pre.get() + a + 1, + stride_pre.get() + nb_dims, + [dim_i](std::size_t &val) { val /= dim_i; }); if (inputAccumulation != input) { delete[] inputAccumulation; } inputAccumulation = outputAccumulation; } - // Copy elements from inputAccumulation to output while dividing by divisor - std::copy(inputAccumulation, inputAccumulation + outputElements, output); + // Copy elements from inputAccumulation to output while dividing by + // divisor + std::copy(inputAccumulation, + inputAccumulation + outputElements, + output); if (outputAccumulation) { delete[] outputAccumulation; } @@ -107,14 +127,20 @@ void ReduceSumImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes, // Kernels registration to implementation entry point REGISTRAR(ReduceSumImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::ReduceSumImpl_cpu_forward_kernel<float, float>, nullptr}); + {DataType::Float32}, + {ProdConso::inPlaceModel, + Aidge::ReduceSumImpl_cpu_forward_kernel<float, float>, + nullptr}); REGISTRAR(ReduceSumImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::ReduceSumImpl_cpu_forward_kernel<double, double>, nullptr}); + {DataType::Float64}, + {ProdConso::inPlaceModel, + Aidge::ReduceSumImpl_cpu_forward_kernel<double, double>, + nullptr}); REGISTRAR(ReduceSumImpl_cpu, - {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::ReduceSumImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr}); -} // namespace Aidge + {DataType::Int32}, + {ProdConso::inPlaceModel, + Aidge::ReduceSumImpl_cpu_forward_kernel<int32_t, int32_t>, + nullptr}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_REDUCESUMIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/ScalingImpl.hpp b/include/aidge/backend/cpu/operator/ScalingImpl.hpp index c1cc247c548701d43e01b1e92d02f42a11cfc710..86ccc326ee15c47e07bb4870526b81782dc02c8d 100644 --- a/include/aidge/backend/cpu/operator/ScalingImpl.hpp +++ b/include/aidge/backend/cpu/operator/ScalingImpl.hpp @@ -12,27 +12,27 @@ #ifndef __AIDGE_CPU_OPERATOR_ScalingIMPL_H__ #define __AIDGE_CPU_OPERATOR_ScalingIMPL_H__ +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Scaling.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include <array> #include <memory> #include <vector> -#include <array> namespace Aidge { // Operator implementation entry point for the backend using ScalingImpl_cpu = OperatorImpl_cpu<Scaling_Op, - void(const float, - const std::size_t, - const bool, - std::size_t, - const void*, - void*)>; + void(const float, + const std::size_t, + const bool, + std::size_t, + const void *, + void *)>; // Implementation entry point registration to Operator REGISTRAR(Scaling_Op, "cpu", Aidge::ScalingImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* __AIDGE_CPU_OPERATOR_ScalingIMPL_H__ */ \ No newline at end of file diff --git a/include/aidge/backend/cpu/operator/ScalingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ScalingImpl_kernels.hpp index c758c9cf39e76bb370c6d03c28e3a670c280eefc..532d364c549c2084f8c03f0b1c19f66b215c8fb2 100644 --- a/include/aidge/backend/cpu/operator/ScalingImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ScalingImpl_kernels.hpp @@ -12,21 +12,20 @@ #ifndef AIDGE_CPU_OPERATOR_SCALINGIMPL_KERNELS_H_ #define AIDGE_CPU_OPERATOR_SCALINGIMPL_KERNELS_H_ +#include "aidge/backend/cpu/operator/ScalingImpl.hpp" +#include "aidge/utils/Registrar.hpp" #include <cmath> #include <cstddef> -#include "aidge/utils/Registrar.hpp" -#include "aidge/backend/cpu/operator/ScalingImpl.hpp" -//TODO : improve propagate, n2d2 : +// TODO : improve propagate, n2d2 : /* template<typename T> -void N2D2::floatingPointScaling_propagate(const Tensor<T>& input, Tensor<T>& output, - std::size_t batchSize, std::size_t nbChannels, - std::size_t height, std::size_t width, - bool isClipped, - const std::vector<Float_T>& clippingFactorPerChannel, - const std::vector<Float_T>& scalingFactorPerChannel, - std::size_t quantizedNbBits, bool isOutputUnsigned) +void N2D2::floatingPointScaling_propagate(const Tensor<T>& input, Tensor<T>& +output, std::size_t batchSize, std::size_t nbChannels, std::size_t height, +std::size_t width, bool isClipped, const std::vector<Float_T>& +clippingFactorPerChannel, const std::vector<Float_T>& scalingFactorPerChannel, + std::size_t quantizedNbBits, bool +isOutputUnsigned) { std::size_t index = 0; for (std::size_t batch = 0; batch < batchSize; batch++) { @@ -34,12 +33,13 @@ void N2D2::floatingPointScaling_propagate(const Tensor<T>& input, Tensor<T>& out for(std::size_t y = 0; y < height; y++) { for(std::size_t x = 0; x < width; x++) { - T res = isClipped ? Clip(input(index), clippingFactorPerChannel[ch]) - : input(index); - res = Scale(res, scalingFactorPerChannel[ch]); + T res = isClipped ? Clip(input(index), +clippingFactorPerChannel[ch]) : input(index); res = Scale(res, +scalingFactorPerChannel[ch]); if(quantizedNbBits > 0) { - res = saturate(std::round(res), quantizedNbBits, isOutputUnsigned); + res = saturate(std::round(res), quantizedNbBits, +isOutputUnsigned); } output(index) = (T) res; index++; @@ -50,24 +50,22 @@ void N2D2::floatingPointScaling_propagate(const Tensor<T>& input, Tensor<T>& out } */ - namespace Aidge { -template <class O> -const O& clamp(const O& x, const O& min, const O& max) -{ +template <class O> const O &clamp(const O &x, const O &min, const O &max) { return (x < min) ? min : (x > max) ? max : x; } -template<class O> -O saturate(const O value, const std::size_t quantizedNbBits, const bool isOutputUnsigned) { +template <class O> +O saturate(const O value, + const std::size_t quantizedNbBits, + const bool isOutputUnsigned) { // TODO: no assertions in kernel assert(quantizedNbBits > 0); - const O min = isOutputUnsigned ? 0 : - -(1ll << (quantizedNbBits - 1ll)); - const O max = isOutputUnsigned ? (1ll << quantizedNbBits) - 1ll : - (1ll << (quantizedNbBits - 1ll)) - 1ll; + const O min = isOutputUnsigned ? 0 : -(1ll << (quantizedNbBits - 1ll)); + const O max = isOutputUnsigned ? (1ll << quantizedNbBits) - 1ll + : (1ll << (quantizedNbBits - 1ll)) - 1ll; return clamp(value, min, max); } @@ -77,31 +75,39 @@ void ScalingImpl_cpu_forward_kernel(const float scalingFactor, const std::size_t quantizedNbBits, const bool isOutputUnsigned, std::size_t inputLenght, - const void* input_, - void* output_) { + const void *input_, + void *output_) { - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); for (std::size_t i = 0; i < inputLenght; ++i) { output[i] = static_cast<O>(input[i] * static_cast<I>(scalingFactor)); - if(quantizedNbBits > 0) { - output[i] = saturate(std::round(output[i]), quantizedNbBits, isOutputUnsigned); + if (quantizedNbBits > 0) { + output[i] = saturate(std::round(output[i]), + quantizedNbBits, + isOutputUnsigned); } } } // Kernels registration to implementation entry point REGISTRAR(ScalingImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::ScalingImpl_cpu_forward_kernel<float, float>, nullptr}); + {DataType::Float32}, + {ProdConso::inPlaceModel, + Aidge::ScalingImpl_cpu_forward_kernel<float, float>, + nullptr}); REGISTRAR(ScalingImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::ScalingImpl_cpu_forward_kernel<double, double>, nullptr}); + {DataType::Float64}, + {ProdConso::inPlaceModel, + Aidge::ScalingImpl_cpu_forward_kernel<double, double>, + nullptr}); REGISTRAR(ScalingImpl_cpu, - {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::ScalingImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr}); -} // namespace Aidge + {DataType::Int32}, + {ProdConso::inPlaceModel, + Aidge::ScalingImpl_cpu_forward_kernel<int32_t, int32_t>, + nullptr}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_SCALINGIMPL_KERNELS_H_ */ \ No newline at end of file diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl.hpp index ee1c36edecbe50cc1765da59737509a2b6333caf..cae00b0a662fd948bc0466b541a41deabdb59f14 100644 --- a/include/aidge/backend/cpu/operator/SigmoidImpl.hpp +++ b/include/aidge/backend/cpu/operator/SigmoidImpl.hpp @@ -12,22 +12,23 @@ #ifndef AIDGE_CPU_OPERATOR_SIGMOIDIMPL_H_ #define AIDGE_CPU_OPERATOR_SIGMOIDIMPL_H_ +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Sigmoid.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" #include <memory> #include <vector> namespace Aidge { // Operator implementation entry point for the backend -using SigmoidImpl_cpu = OperatorImpl_cpu<Sigmoid_Op, - void(const std::size_t, const void*, void*), - void(const std::size_t, const void*, const void*, void*)>; +using SigmoidImpl_cpu = OperatorImpl_cpu< + Sigmoid_Op, + void(const std::size_t, const void *, void *), + void(const std::size_t, const void *, const void *, void *)>; // Implementation entry point registration to Operator REGISTRAR(Sigmoid_Op, "cpu", Aidge::SigmoidImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_SIGMOIDIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp index dfd71ce0a878efbeb779f3a67ad4ccc762bb8363..6f796fdd0bd0ab59b2b0ad930a796c6a7b2e4bf0 100644 --- a/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp @@ -19,29 +19,30 @@ namespace Aidge { template <class I, class O> void SigmoidImpl_cpu_forward_kernel(std::size_t inputLenght, - const void* input_, - void* output_) { + const void *input_, + void *output_) { - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); -//#pragma omp parallel for if (inputLenght > 1024) + // #pragma omp parallel for if (inputLenght > 1024) for (std::size_t i = 0; i < inputLenght; ++i) { - if (input[i] > I(0)) { - output[i] = O(1) / (O(1) + std::exp(-input[i])); - } else { - output[i] = std::exp(input[i]) / (O(1) + std::exp(input[i])); - } + if (input[i] > I(0)) { + output[i] = O(1) / (O(1) + std::exp(-input[i])); + } else { + output[i] = std::exp(input[i]) / (O(1) + std::exp(input[i])); + } } } template <class O, class GI, class GO> void SigmoidImpl_cpu_backward_kernel(const std::size_t inputLenght, - const void* output_, const void* grad_output_, - void* grad_input_) { - const O* output = static_cast<const O*>(output_); - const GO* grad_output = static_cast<const GO*>(grad_output_); - GI* grad_input = static_cast<GI*>(grad_input_); + const void *output_, + const void *grad_output_, + void *grad_input_) { + const O *output = static_cast<const O *>(output_); + const GO *grad_output = static_cast<const GO *>(grad_output_); + GI *grad_input = static_cast<GI *>(grad_input_); for (std::size_t i = 0; i < inputLenght; ++i) { grad_input[i] = output[i] * (O(1) - output[i]) * grad_output[i]; } @@ -49,11 +50,15 @@ void SigmoidImpl_cpu_backward_kernel(const std::size_t inputLenght, // Kernels registration to implementation entry point REGISTRAR(SigmoidImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::SigmoidImpl_cpu_forward_kernel<float, float>, Aidge::SigmoidImpl_cpu_backward_kernel<float, float, float>}); + {DataType::Float32}, + {ProdConso::inPlaceModel, + Aidge::SigmoidImpl_cpu_forward_kernel<float, float>, + Aidge::SigmoidImpl_cpu_backward_kernel<float, float, float>}); REGISTRAR(SigmoidImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::SigmoidImpl_cpu_forward_kernel<double, double>, Aidge::SigmoidImpl_cpu_backward_kernel<double, double, double>}); -} // namespace Aidge + {DataType::Float64}, + {ProdConso::inPlaceModel, + Aidge::SigmoidImpl_cpu_forward_kernel<double, double>, + Aidge::SigmoidImpl_cpu_backward_kernel<double, double, double>}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_SIGMOIDIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/SliceImpl.hpp b/include/aidge/backend/cpu/operator/SliceImpl.hpp index fd98b38d7117eaa14e35fe3cb89abf95b2913997..b224506169078a394f0c577eedf419237a2e3848 100644 --- a/include/aidge/backend/cpu/operator/SliceImpl.hpp +++ b/include/aidge/backend/cpu/operator/SliceImpl.hpp @@ -12,29 +12,29 @@ #ifndef AIDGE_CPU_OPERATOR_SLICEIMPL_H__ #define AIDGE_CPU_OPERATOR_SLICEIMPL_H__ +#include <array> #include <memory> #include <vector> -#include <array> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Slice.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { // Operator implementation entry point for the backend using SliceImpl_cpu = OperatorImpl_cpu<Slice_Op, - void(const std::vector<std::int64_t>&, - const std::vector<std::int64_t>&, - const std::vector<std::int8_t>&, - const std::vector<std::int64_t>&, - const std::vector<DimSize_t>&, - const void*, - void*)>; + void(const std::vector<std::int64_t> &, + const std::vector<std::int64_t> &, + const std::vector<std::int8_t> &, + const std::vector<std::int64_t> &, + const std::vector<DimSize_t> &, + const void *, + void *)>; // Implementation entry point registration to Operator REGISTRAR(Slice_Op, "cpu", Aidge::SliceImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* __AIDGE_CPU_OPERATOR_SLICEIMPL_H__ */ diff --git a/include/aidge/backend/cpu/operator/SliceImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SliceImpl_kernels.hpp index 1bf4c491723c570fa8bfd1774beca1630d2de9be..5b5396b15de65344a27e5a741b205d71b980431c 100644 --- a/include/aidge/backend/cpu/operator/SliceImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SliceImpl_kernels.hpp @@ -17,57 +17,76 @@ #include <cstddef> #include <iterator> -#include "aidge/utils/Registrar.hpp" #include "aidge/backend/cpu/operator/SliceImpl.hpp" +#include "aidge/utils/Registrar.hpp" namespace Aidge { -template<class I, class O> -void SliceImpl_cpu_forward_kernel(const std::vector<std::int64_t>& starts, - const std::vector<std::int64_t>& ends, - const std::vector<std::int8_t>& axes, - const std::vector<std::int64_t>& steps, - const std::vector<DimSize_t>& inputDims, - const void* input_, - void* output_) -{ - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); +template <class I, class O> +void SliceImpl_cpu_forward_kernel(const std::vector<std::int64_t> &starts, + const std::vector<std::int64_t> &ends, + const std::vector<std::int8_t> &axes, + const std::vector<std::int64_t> &steps, + const std::vector<DimSize_t> &inputDims, + const void *input_, + void *output_) { + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); const std::size_t nbDims = inputDims.size(); std::vector<DimSize_t> dims = inputDims; - DimSize_t totalSize = std::accumulate(inputDims.cbegin(), inputDims.cend(), std::size_t(1), std::multiplies<std::size_t>()); - const I* inputAccumulation = input; - I* outputAccumulation = nullptr; + DimSize_t totalSize = std::accumulate(inputDims.cbegin(), + inputDims.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); + const I *inputAccumulation = input; + I *outputAccumulation = nullptr; const std::size_t nbAxes = starts.size(); for (std::size_t i = 0; i < nbAxes; ++i) { - const DimIdx_t axis = axes[i] >= 0 ? - static_cast<DimIdx_t>(axes[i]) : - static_cast<DimIdx_t>(axes[i] + static_cast<DimIdx_t>(inputDims.size())); - const DimSize_t start = std::min(starts[i] >= 0 ? - static_cast<DimSize_t>(starts[i]) : - static_cast<DimSize_t>(starts[i] + static_cast<std::int64_t>(inputDims[axis])), - dims[axis]-1); - const DimSize_t end = ends[i] >= 0 ? - static_cast<DimSize_t>(ends[i]) : - static_cast<DimSize_t>(ends[i] + static_cast<std::int64_t>(inputDims[axis])); + const DimIdx_t axis = + axes[i] >= 0 + ? static_cast<DimIdx_t>(axes[i]) + : static_cast<DimIdx_t>( + axes[i] + static_cast<DimIdx_t>(inputDims.size())); + const DimSize_t start = std::min( + starts[i] >= 0 + ? static_cast<DimSize_t>(starts[i]) + : static_cast<DimSize_t>( + starts[i] + static_cast<std::int64_t>(inputDims[axis])), + dims[axis] - 1); + const DimSize_t end = + ends[i] >= 0 + ? static_cast<DimSize_t>(ends[i]) + : static_cast<DimSize_t>( + ends[i] + static_cast<std::int64_t>(inputDims[axis])); const std::int64_t step = steps[i]; - const std::size_t sliceSize = static_cast<std::size_t>(std::ceil((static_cast<float>(end) - static_cast<float>(start)) / static_cast<float>(step))); + const std::size_t sliceSize = static_cast<std::size_t>( + std::ceil((static_cast<float>(end) - static_cast<float>(start)) / + static_cast<float>(step))); outputAccumulation = new I[totalSize]; - const std::size_t stride_pre = std::accumulate(dims.cbegin(), dims.cbegin() + axis, 1, std::multiplies<std::size_t>()); - const std::size_t stride_post = std::accumulate(dims.crbegin(), dims.crbegin() + nbDims -1 - axis, 1, std::multiplies<std::size_t>()); - for (std::size_t outer = 0; outer < stride_pre; ++outer) - { - const std::size_t idx_in = outer * stride_post * dims[axis] + start * stride_post; + const std::size_t stride_pre = + std::accumulate(dims.cbegin(), + dims.cbegin() + axis, + 1, + std::multiplies<std::size_t>()); + const std::size_t stride_post = + std::accumulate(dims.crbegin(), + dims.crbegin() + nbDims - 1 - axis, + 1, + std::multiplies<std::size_t>()); + for (std::size_t outer = 0; outer < stride_pre; ++outer) { + const std::size_t idx_in = + outer * stride_post * dims[axis] + start * stride_post; const std::size_t idx_out = outer * stride_post * sliceSize; std::size_t addedSlices = 0; - for (std::size_t inner = 0; inner < sliceSize; ++inner) - { - std::copy_n(std::next(inputAccumulation, idx_in + inner * step * stride_post), + for (std::size_t inner = 0; inner < sliceSize; ++inner) { + std::copy_n(std::next(inputAccumulation, + idx_in + inner * step * stride_post), stride_post, - std::next(outputAccumulation, idx_out + addedSlices * stride_post)); + std::next(outputAccumulation, + idx_out + addedSlices * stride_post)); addedSlices++; } } @@ -79,7 +98,6 @@ void SliceImpl_cpu_forward_kernel(const std::vector<std::int64_t>& starts, delete[] inputAccumulation; } inputAccumulation = outputAccumulation; - } // Copy elements from inputAccumulation to output while dividing by divisor std::copy_n(inputAccumulation, totalSize, output); @@ -89,14 +107,20 @@ void SliceImpl_cpu_forward_kernel(const std::vector<std::int64_t>& starts, } REGISTRAR(SliceImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<float, float>, nullptr}); + {DataType::Float32}, + {ProdConso::inPlaceModel, + Aidge::SliceImpl_cpu_forward_kernel<float, float>, + nullptr}); REGISTRAR(SliceImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<double, double>, nullptr}); + {DataType::Float64}, + {ProdConso::inPlaceModel, + Aidge::SliceImpl_cpu_forward_kernel<double, double>, + nullptr}); REGISTRAR(SliceImpl_cpu, - {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr}); -} // namespace Aidge + {DataType::Int32}, + {ProdConso::inPlaceModel, + Aidge::SliceImpl_cpu_forward_kernel<int32_t, int32_t>, + nullptr}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp index ec2c2696ed6e2ba8cad1536519298d9331921c07..56e8d603ef2382b1725f46ba337cd9e65a132277 100644 --- a/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp +++ b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp @@ -12,21 +12,22 @@ #ifndef AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_ #define AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_ +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Softmax.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" #include <memory> #include <vector> namespace Aidge { // Operator implementation entry point for the backend -using SoftmaxImpl_cpu = OperatorImpl_cpu<Softmax_Op, - void(std::size_t, const std::vector<DimSize_t>&, const void*, void*)>; +using SoftmaxImpl_cpu = OperatorImpl_cpu< + Softmax_Op, + void(std::size_t, const std::vector<DimSize_t> &, const void *, void *)>; // Implementation entry point registration to Operator REGISTRAR(Softmax_Op, "cpu", Aidge::SoftmaxImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp index 07486a48f1b8cf29f6a6ef8aa934a9decdbafef7..ad569c1386f35eadc87707e089bccc802ae48278 100644 --- a/include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp @@ -12,21 +12,23 @@ #ifndef AIDGE_CPU_OPERATOR_SOFTMAXIMPL_KERNELS_H_ #define AIDGE_CPU_OPERATOR_SOFTMAXIMPL_KERNELS_H_ -#include "aidge/utils/Registrar.hpp" -#include <cstddef> -#include <cmath> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/data/Data.hpp" +#include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include <cmath> +#include <cstddef> #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" namespace Aidge { template <class I, class O> -void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSize_t>& inputDims, const void* input_, void* output_) -{ - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); +void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx, + const std::vector<DimSize_t> &inputDims, + const void *input_, + void *output_) { + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); std::size_t postAxisElems = 1; for (std::size_t i = axisIdx + 1; i < inputDims.size(); ++i) { @@ -41,20 +43,23 @@ void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSi for (std::size_t j = 0; j < postAxisElems; ++j) { I maxVal = input[i * inputDims[axisIdx] * postAxisElems + j]; for (std::size_t k = 1; k < inputDims[axisIdx]; ++k) { - std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j; + std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + + k * postAxisElems + j; maxVal = std::max(maxVal, input[inIdx]); } // Calculate sum of exponentials within the axis I sumExp = 0; for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) { - std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j; + std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + + k * postAxisElems + j; sumExp += std::exp(input[inIdx] - maxVal); } // Calculate softmax for the current slice along the axis - for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) { - std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j; + for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) { + std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + + k * postAxisElems + j; output[inIdx] = std::exp(input[inIdx] - maxVal) / sumExp; } } @@ -62,14 +67,20 @@ void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSi } REGISTRAR(SoftmaxImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>, nullptr}); + {DataType::Float32}, + {ProdConso::inPlaceModel, + Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>, + nullptr}); REGISTRAR(SoftmaxImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>, nullptr}); + {DataType::Float64}, + {ProdConso::inPlaceModel, + Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>, + nullptr}); REGISTRAR(SoftmaxImpl_cpu, - {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr}); -} // namespace Aidge + {DataType::Int32}, + {ProdConso::inPlaceModel, + Aidge::SoftmaxImpl_cpu_forward_kernel<int32_t, int32_t>, + nullptr}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/SqrtImpl.hpp b/include/aidge/backend/cpu/operator/SqrtImpl.hpp index dba75d1c58fb19ab2284ee0e98a32bff7ac58557..6f442cf15351c5f5c6da3ccff770fe9e90d241ca 100644 --- a/include/aidge/backend/cpu/operator/SqrtImpl.hpp +++ b/include/aidge/backend/cpu/operator/SqrtImpl.hpp @@ -12,7 +12,7 @@ #ifndef AIDGE_CPU_OPERATOR_SQRTIMPL_H_ #define AIDGE_CPU_OPERATOR_SQRTIMPL_H_ -#include <cstddef> // std::size_t +#include <cstddef> // std::size_t #include <memory> #include <tuple> #include <vector> @@ -24,12 +24,13 @@ namespace Aidge { // Operator implementation entry point for the backend -using SqrtImpl_cpu = OperatorImpl_cpu<Sqrt_Op, - void(const std::size_t, const void*, void*), - void(const std::size_t, const void*, void*)>; +using SqrtImpl_cpu = + OperatorImpl_cpu<Sqrt_Op, + void(const std::size_t, const void *, void *), + void(const std::size_t, const void *, void *)>; // Implementation entry point registration to Operator REGISTRAR(Sqrt_Op, "cpu", Aidge::SqrtImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_SQRTIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp index 0464119cad60742bc58c79da984b30776bc7932f..827383926f6848f1b599e110785299fba053b149 100644 --- a/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp @@ -12,8 +12,8 @@ #ifndef AIDGE_CPU_OPERATOR_SQRTIMPL_KERNELS_H_ #define AIDGE_CPU_OPERATOR_SQRTIMPL_KERNELS_H_ -#include <cmath> // std::sqrt -#include <cstddef> // std::size_t +#include <cmath> // std::sqrt +#include <cstddef> // std::size_t #include "aidge/utils/Registrar.hpp" @@ -22,11 +22,11 @@ namespace Aidge { template <class I, class O> void SqrtImpl_cpu_forward_kernel(const std::size_t inputLenght, - const void* input_, - void* output_) { + const void *input_, + void *output_) { - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); for (std::size_t i = 0; i < inputLenght; ++i) { output[i] = static_cast<O>(std::sqrt(static_cast<float>(input[i]))); @@ -35,26 +35,33 @@ void SqrtImpl_cpu_forward_kernel(const std::size_t inputLenght, template <class I, class O> void SqrtImpl_cpu_backward_kernel(const std::size_t inputLenght, - const void* input_, - void* output_) { + const void *input_, + void *output_) { - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); for (std::size_t i = 0; i < inputLenght; ++i) { - output[i] = static_cast<O>(0.5/(std::sqrt(static_cast<float>(input[i])))); + output[i] = + static_cast<O>(0.5 / (std::sqrt(static_cast<float>(input[i])))); } } REGISTRAR(SqrtImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::SqrtImpl_cpu_forward_kernel<float, float>, Aidge::SqrtImpl_cpu_backward_kernel<float, float>}); + {DataType::Float32}, + {ProdConso::inPlaceModel, + Aidge::SqrtImpl_cpu_forward_kernel<float, float>, + Aidge::SqrtImpl_cpu_backward_kernel<float, float>}); REGISTRAR(SqrtImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::SqrtImpl_cpu_forward_kernel<double, double>, Aidge::SqrtImpl_cpu_backward_kernel<double, double>}); + {DataType::Float64}, + {ProdConso::inPlaceModel, + Aidge::SqrtImpl_cpu_forward_kernel<double, double>, + Aidge::SqrtImpl_cpu_backward_kernel<double, double>}); REGISTRAR(SqrtImpl_cpu, - {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::SqrtImpl_cpu_forward_kernel<int32_t, int32_t>, Aidge::SqrtImpl_cpu_backward_kernel<int32_t, int32_t>}); -} // namespace Aidge + {DataType::Int32}, + {ProdConso::inPlaceModel, + Aidge::SqrtImpl_cpu_forward_kernel<int32_t, int32_t>, + Aidge::SqrtImpl_cpu_backward_kernel<int32_t, int32_t>}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_SQRTIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/SubImpl.hpp b/include/aidge/backend/cpu/operator/SubImpl.hpp index 2bb22bda74edf7db09404fd5613b6714ddcdf513..7fd0f95d0c0083ecee3a8e56b14ba7abefe721f1 100644 --- a/include/aidge/backend/cpu/operator/SubImpl.hpp +++ b/include/aidge/backend/cpu/operator/SubImpl.hpp @@ -12,21 +12,26 @@ #ifndef AIDGE_CPU_OPERATOR_SUBIMPL_H_ #define AIDGE_CPU_OPERATOR_SUBIMPL_H_ +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Sub.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" #include <memory> #include <vector> namespace Aidge { // Operator implementation entry point for the backend using SubImpl_cpu = OperatorImpl_cpu<Sub_Op, - void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*)>; + void(const std::vector<std::size_t> &, + const std::vector<std::size_t> &, + const std::vector<std::size_t> &, + const void *, + const void *, + void *)>; // Implementation entry point registration to Operator REGISTRAR(Sub_Op, "cpu", Aidge::SubImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_SUBIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SubImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SubImpl_kernels.hpp index 0486ed2105b23e95f9cdfcda578e14900fcb2c8e..5db82774af6df10889205dab55d93197845c1f0a 100644 --- a/include/aidge/backend/cpu/operator/SubImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SubImpl_kernels.hpp @@ -14,54 +14,64 @@ #include "aidge/utils/Registrar.hpp" -#include <cstddef> // std::size_t -#include <cstdint> // std::int32_t, std::int64_t +#include <cstddef> // std::size_t +#include <cstdint> // std::int32_t, std::int64_t #include <vector> #include "aidge/backend/cpu/data/Broadcasting.hpp" #include "aidge/backend/cpu/operator/SubImpl.hpp" - namespace Aidge { template <class I1, class I2, class O> -void SubImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims, - const std::vector<std::size_t>& input2Dims, - const std::vector<std::size_t>& outputDims, - const void* input1_, - const void* input2_, - void* output_) { +void SubImpl_cpu_forward_kernel(const std::vector<std::size_t> &input1Dims, + const std::vector<std::size_t> &input2Dims, + const std::vector<std::size_t> &outputDims, + const void *input1_, + const void *input2_, + void *output_) { - const I1* input_1 = static_cast<const I1*>(input1_); - const I2* input_2 = static_cast<const I2*>(input2_); - O* output = static_cast<O*>(output_); + const I1 *input_1 = static_cast<const I1 *>(input1_); + const I2 *input_2 = static_cast<const I2 *>(input2_); + O *output = static_cast<O *>(output_); size_t totalElements = 1; for (size_t dimSize : outputDims) { totalElements *= dimSize; } - for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) - { - std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex); - std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); - std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); + for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) { + std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex); + std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); + std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); output[oIndex] = input_1[idx1] - input_2[idx2]; - } + } } // Kernels registration to implementation entry point REGISTRAR(SubImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<float, float, float>, nullptr}); + {DataType::Float32}, + {ProdConso::inPlaceModel, + Aidge::SubImpl_cpu_forward_kernel<float, float, float>, + nullptr}); REGISTRAR(SubImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<double, double, double>, nullptr}); + {DataType::Float64}, + {ProdConso::inPlaceModel, + Aidge::SubImpl_cpu_forward_kernel<double, double, double>, + nullptr}); REGISTRAR(SubImpl_cpu, - {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, nullptr}); + {DataType::Int32}, + {ProdConso::inPlaceModel, + Aidge::SubImpl_cpu_forward_kernel<std::int32_t, + std::int32_t, + std::int32_t>, + nullptr}); REGISTRAR(SubImpl_cpu, - {DataType::Int64}, - {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>, nullptr}); -} // namespace Aidge + {DataType::Int64}, + {ProdConso::inPlaceModel, + Aidge::SubImpl_cpu_forward_kernel<std::int64_t, + std::int64_t, + std::int64_t>, + nullptr}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_SUBIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/TanhImpl.hpp b/include/aidge/backend/cpu/operator/TanhImpl.hpp index b1c2217bd29805eca2cf7b7906316756b75a74e0..fd853184435d7e0dfc24fc3d799e4aa7843e490f 100644 --- a/include/aidge/backend/cpu/operator/TanhImpl.hpp +++ b/include/aidge/backend/cpu/operator/TanhImpl.hpp @@ -12,22 +12,23 @@ #ifndef AIDGE_CPU_OPERATOR_TANHIMPL_H_ #define AIDGE_CPU_OPERATOR_TANHIMPL_H_ +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Tanh.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" #include <memory> #include <vector> namespace Aidge { // Operator implementation entry point for the backend -using TanhImpl_cpu = OperatorImpl_cpu<Tanh_Op, - void(const std::size_t, const void*, void*), - void(const std::size_t, const void*, const void*, void*)>; +using TanhImpl_cpu = OperatorImpl_cpu< + Tanh_Op, + void(const std::size_t, const void *, void *), + void(const std::size_t, const void *, const void *, void *)>; // Implementation entry point registration to Operator REGISTRAR(Tanh_Op, "cpu", Aidge::TanhImpl_cpu::create); -} // namespace Aidge +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_TANHIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp b/include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp index fdcac210484b11f2220dcc2a6813efed503d1913..939237c97d49c26c14e0895c7f1e84b19c8a8ff3 100644 --- a/include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp @@ -19,13 +19,13 @@ namespace Aidge { template <class I, class O> void TanhImpl_cpu_forward_kernel(std::size_t inputLenght, - const void* input_, - void* output_) { + const void *input_, + void *output_) { - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); -//#pragma omp parallel for if (inputLenght > 1024) + // #pragma omp parallel for if (inputLenght > 1024) for (std::size_t i = 0; i < inputLenght; ++i) { output[i] = std::tanh(input[i]); } @@ -33,11 +33,12 @@ void TanhImpl_cpu_forward_kernel(std::size_t inputLenght, template <class O, class GI, class GO> void TanhImpl_cpu_backward_kernel(const std::size_t inputLenght, - const void* output_, const void* grad_output_, - void* grad_input_) { - const O* output = static_cast<const O*>(output_); - const GO* grad_output = static_cast<const GO*>(grad_output_); - GI* grad_input = static_cast<GI*>(grad_input_); + const void *output_, + const void *grad_output_, + void *grad_input_) { + const O *output = static_cast<const O *>(output_); + const GO *grad_output = static_cast<const GO *>(grad_output_); + GI *grad_input = static_cast<GI *>(grad_input_); for (std::size_t i = 0; i < inputLenght; ++i) { grad_input[i] = (O(1) - output[i] * output[i]) * grad_output[i]; } @@ -45,11 +46,15 @@ void TanhImpl_cpu_backward_kernel(const std::size_t inputLenght, // Kernels registration to implementation entry point REGISTRAR(TanhImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::TanhImpl_cpu_forward_kernel<float, float>, Aidge::TanhImpl_cpu_backward_kernel<float, float, float>}); + {DataType::Float32}, + {ProdConso::inPlaceModel, + Aidge::TanhImpl_cpu_forward_kernel<float, float>, + Aidge::TanhImpl_cpu_backward_kernel<float, float, float>}); REGISTRAR(TanhImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::TanhImpl_cpu_forward_kernel<double, double>, Aidge::TanhImpl_cpu_backward_kernel<double, double, double>}); -} // namespace Aidge + {DataType::Float64}, + {ProdConso::inPlaceModel, + Aidge::TanhImpl_cpu_forward_kernel<double, double>, + Aidge::TanhImpl_cpu_backward_kernel<double, double, double>}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_TANHIMPL_KERNELS_H_ */ diff --git a/include/aidge/utils/sys_info/CpuVersionInfo.hpp b/include/aidge/utils/sys_info/CpuVersionInfo.hpp index 887ce839e079349d9d64505f7184831ffc4cf1c2..0a34ad25777ec5388e19f59aa8b9d009d855d47c 100644 --- a/include/aidge/utils/sys_info/CpuVersionInfo.hpp +++ b/include/aidge/utils/sys_info/CpuVersionInfo.hpp @@ -12,24 +12,33 @@ namespace Aidge { #define GIT_COMMIT_HASH "" #endif void showCpuVersion() { - Log::info("Aidge backend CPU: {} ({}), {} {}", PROJECT_VERSION, GIT_COMMIT_HASH, __DATE__, __TIME__); - // Compiler version - #if defined(__clang__) + Log::info("Aidge backend CPU: {} ({}), {} {}", + PROJECT_VERSION, + GIT_COMMIT_HASH, + __DATE__, + __TIME__); + // Compiler version +#if defined(__clang__) /* Clang/LLVM. ---------------------------------------------- */ - Log::info("Clang/LLVM compiler version: {}.{}.{}\n", __clang_major__ , __clang_minor__, __clang_patchlevel__); - #elif defined(__ICC) || defined(__INTEL_COMPILER) + Log::info("Clang/LLVM compiler version: {}.{}.{}\n", + __clang_major__, + __clang_minor__, + __clang_patchlevel__); +#elif defined(__ICC) || defined(__INTEL_COMPILER) /* Intel ICC/ICPC. ------------------------------------------ */ - Log::info("Intel ICC/ICPC compiler version: {}\n", __INTEL_COMPILER); - #elif defined(__GNUC__) || defined(__GNUG__) + Log::info("Intel ICC/ICPC compiler version: {}\n", __INTEL_COMPILER); +#elif defined(__GNUC__) || defined(__GNUG__) /* GNU GCC/G++. --------------------------------------------- */ - Log::info("GNU GCC/G++ compiler version: {}.{}.{}", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__); - #elif defined(_MSC_VER) + Log::info("GNU GCC/G++ compiler version: {}.{}.{}", + __GNUC__, + __GNUC_MINOR__, + __GNUC_PATCHLEVEL__); +#elif defined(_MSC_VER) /* Microsoft Visual Studio. --------------------------------- */ - Log::info("Microsoft Visual Studio compiler version: {}\n", _MSC_VER); - #else - Log::info("Unknown compiler\n"); - #endif - + Log::info("Microsoft Visual Studio compiler version: {}\n", _MSC_VER); +#else + Log::info("Unknown compiler\n"); +#endif } -} // namespace Aidge -#endif // AIDGE_UTILS_SYS_INFO_CPU_VERSION_INFO_H +} // namespace Aidge +#endif // AIDGE_UTILS_SYS_INFO_CPU_VERSION_INFO_H diff --git a/python_binding/pybind_cpu.cpp b/python_binding/pybind_cpu.cpp index d5022e1d469ae4171e796baed6c1aa061dd95765..1fee8571de75f66d7fcbd05f2efbc39880d0081b 100644 --- a/python_binding/pybind_cpu.cpp +++ b/python_binding/pybind_cpu.cpp @@ -6,14 +6,13 @@ namespace py = pybind11; namespace Aidge { -void init_cpu_sys_info(py::module& m); +void init_cpu_sys_info(py::module &m); -void init_Aidge(py::module& m){ +void init_Aidge(py::module &m) { init_cpu_sys_info(m); } - PYBIND11_MODULE(aidge_backend_cpu, m) { init_Aidge(m); } -} +} // namespace Aidge diff --git a/python_binding/utils/sys_info/pybind_CpuVersionInfo.cpp b/python_binding/utils/sys_info/pybind_CpuVersionInfo.cpp index 573bee3659c65f90935e03c06eff5a2998bb9f5b..6540c09def9ff3bc763af3d00f5346b9b4b4717a 100644 --- a/python_binding/utils/sys_info/pybind_CpuVersionInfo.cpp +++ b/python_binding/utils/sys_info/pybind_CpuVersionInfo.cpp @@ -1,9 +1,9 @@ -#include <pybind11/pybind11.h> #include "aidge/utils/sys_info/CpuVersionInfo.hpp" +#include <pybind11/pybind11.h> namespace py = pybind11; namespace Aidge { -void init_cpu_sys_info(py::module& m){ +void init_cpu_sys_info(py::module &m) { m.def("show_cpu_version", &showCpuVersion); } -} +} // namespace Aidge diff --git a/src/data/Broadcasting.cpp b/src/data/Broadcasting.cpp index 22977aa772e3f3f4810a59ff1fc024cc21c66bd1..c198e9c6f29bf06156513e2881106a189f2f1e33 100644 --- a/src/data/Broadcasting.cpp +++ b/src/data/Broadcasting.cpp @@ -11,17 +11,20 @@ #include "aidge/backend/cpu/data/Broadcasting.hpp" -std::vector<std::size_t> Aidge::getBroadcastedDims(const std::vector<std::size_t>& outputDims, const std::vector<std::size_t>& dimsToBroadcast){ +std::vector<std::size_t> +Aidge::getBroadcastedDims(const std::vector<std::size_t> &outputDims, + const std::vector<std::size_t> &dimsToBroadcast) { std::vector<std::size_t> broadcastedDims(outputDims.size(), 1); - for(int j=dimsToBroadcast.size()-1; j>=0; --j) - { - std::size_t idx = outputDims.size() - (dimsToBroadcast.size()-j); - broadcastedDims[idx] = dimsToBroadcast[j]; - } + for (int j = dimsToBroadcast.size() - 1; j >= 0; --j) { + std::size_t idx = outputDims.size() - (dimsToBroadcast.size() - j); + broadcastedDims[idx] = dimsToBroadcast[j]; + } return broadcastedDims; } -std::vector<std::size_t> Aidge::getMultiDimIndices(const std::vector<std::size_t>& dimensions, std::size_t idx){ +std::vector<std::size_t> +Aidge::getMultiDimIndices(const std::vector<std::size_t> &dimensions, + std::size_t idx) { std::vector<std::size_t> indices(dimensions.size(), 0); for (int i = dimensions.size() - 1; i >= 0; --i) { @@ -32,15 +35,16 @@ std::vector<std::size_t> Aidge::getMultiDimIndices(const std::vector<std::size_t return indices; } -std::size_t Aidge::getFlattenedIndex(const std::vector<std::size_t>& dimensions, const std::vector<std::size_t>& indices){ +std::size_t +Aidge::getFlattenedIndex(const std::vector<std::size_t> &dimensions, + const std::vector<std::size_t> &indices) { std::size_t flattenedIdx = 0; std::size_t stride = 1; for (int i = dimensions.size() - 1; i >= 0; --i) { - std::size_t idx = dimensions[i]>1 ? indices[i] : 0; + std::size_t idx = dimensions[i] > 1 ? indices[i] : 0; flattenedIdx += idx * stride; stride *= dimensions[i]; } return flattenedIdx; } - diff --git a/src/operator/AbsImpl.cpp b/src/operator/AbsImpl.cpp index 130d6cf7a64e1e75b8ef128974101a477f802caf..a0671bacff84150e15388ba853d4271164503c55 100644 --- a/src/operator/AbsImpl.cpp +++ b/src/operator/AbsImpl.cpp @@ -19,22 +19,21 @@ #include "aidge/operator/Abs.hpp" #include "aidge/utils/Types.h" -template <> -void Aidge::AbsImpl_cpu::forward() { - const Abs_Op& op = static_cast<const Abs_Op&>(mOp); +template <> void Aidge::AbsImpl_cpu::forward() { + const Abs_Op &op = static_cast<const Abs_Op &>(mOp); // Find the correct kernel type - const auto impl = Registrar<AbsImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<AbsImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - impl.forward( - op.getInput(0)->size(), - op.getInput(0)->getImpl()->rawPtr(), - op.getOutput(0)->getImpl()->rawPtr() - ); + impl.forward(op.getInput(0)->size(), + op.getInput(0)->getImpl()->rawPtr(), + op.getOutput(0)->getImpl()->rawPtr()); } -template <> -void Aidge::AbsImpl_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Abs_Op on backend cpu"); +template <> void Aidge::AbsImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for Abs_Op on backend cpu"); } diff --git a/src/operator/AddImpl.cpp b/src/operator/AddImpl.cpp index 457a0b17e531fac35ff873f9eedca7bbbe82d459..171e5333b4ff33a4bcf038d33da5c4e402a0430e 100644 --- a/src/operator/AddImpl.cpp +++ b/src/operator/AddImpl.cpp @@ -19,24 +19,30 @@ #include "aidge/backend/cpu/operator/AddImpl_kernels.hpp" #include "aidge/data/Data.hpp" #include "aidge/data/Tensor.hpp" -#include "aidge/utils/Types.h" #include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Types.h" -template <> -void Aidge::AddImpl_cpu::forward() { - const Add_Op& op = static_cast<const Add_Op&>(mOp); +template <> void Aidge::AddImpl_cpu::forward() { + const Add_Op &op = static_cast<const Add_Op &>(mOp); // Check inputs AIDGE_ASSERT(op.getInput(0), "missing input in Add operator"); - AIDGE_ASSERT(op.getInput(0)->hasImpl(), "cannot run Add forward because the 0-th input has no implementation."); + AIDGE_ASSERT(op.getInput(0)->hasImpl(), + "cannot run Add forward because the 0-th input has no " + "implementation."); DataType datatypeFirstInput = op.getInput(0)->dataType(); for (IOIndex_t i = 1; i < op.nbInputs(); ++i) { AIDGE_ASSERT(op.getInput(i), "missing input in Add operator"); - AIDGE_ASSERT(op.getInput(i)->hasImpl(), "cannot run Add forward because the {}-th input has no implementation.", i); - AIDGE_ASSERT(op.getInput(i)->dataType() == datatypeFirstInput, "Cannot add inputs with two differents data type."); + AIDGE_ASSERT(op.getInput(i)->hasImpl(), + "cannot run Add forward because the {}-th input has no " + "implementation.", + i); + AIDGE_ASSERT(op.getInput(i)->dataType() == datatypeFirstInput, + "Cannot add inputs with two differents data type."); } // Find the correct kernel type - const auto impl = Registrar<AddImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<AddImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Convert input data (no overhead if not needed!) // TODO: right now, if needed, memory will be allocated/deallocated at each @@ -44,29 +50,30 @@ void Aidge::AddImpl_cpu::forward() { // this class to avoid that. const std::size_t nbDims = op.getOutput(0)->nbDims(); std::vector<std::vector<std::size_t>> inputsDims; - std::vector<const void*> opInputs; + std::vector<const void *> opInputs; std::vector<std::shared_ptr<Tensor>> inputsFallback(op.nbInputs()); for (IOIndex_t i = 0; i < op.nbInputs(); ++i) { std::vector<std::size_t> inputDims(nbDims, 1); auto dims = op.getInput(i)->dims(); - for(std::size_t j=dims.size()-1; j+1>0; --j) - { - std::size_t idx = nbDims - (dims.size()-j); - inputDims[idx] = dims[j]; - } + for (std::size_t j = dims.size() - 1; j + 1 > 0; --j) { + std::size_t idx = nbDims - (dims.size() - j); + inputDims[idx] = dims[j]; + } inputsDims.push_back(inputDims); - const auto& input = op.getInput(i)->refCastFrom(inputsFallback[i], *op.getOutput(0)); + const auto &input = + op.getInput(i)->refCastFrom(inputsFallback[i], *op.getOutput(0)); opInputs.push_back(input.getImpl()->rawPtr()); } impl.forward(opInputs, - inputsDims, - op.getOutput(0)->size(), - op.getOutput(0)->dims(), - getCPUPtr(op.getRawOutput(0))); + inputsDims, + op.getOutput(0)->size(), + op.getOutput(0)->dims(), + getCPUPtr(op.getRawOutput(0))); } -template <> -void Aidge::AddImpl_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Add_Op on backend cpu"); +template <> void Aidge::AddImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for Add_Op on backend cpu"); } diff --git a/src/operator/AndImpl.cpp b/src/operator/AndImpl.cpp index 2e0f59769ad86f6e4143ab59d089706e34792244..40f2c8997958d4253aa2efb215d47d2dde759d12 100644 --- a/src/operator/AndImpl.cpp +++ b/src/operator/AndImpl.cpp @@ -15,35 +15,37 @@ #include <thread> // std::this_thread::sleep_for #include <vector> -#include "aidge/operator/And.hpp" -#include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/Broadcasting.hpp" #include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/operator/And.hpp" +#include "aidge/utils/Types.h" #include "aidge/backend/cpu/operator/AndImpl.hpp" #include "aidge/backend/cpu/operator/AndImpl_kernels.hpp" -template <> -void Aidge::AndImpl_cpu::forward() { - const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()); - const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims()); - +template <> void Aidge::AndImpl_cpu::forward() { + const std::vector<std::size_t> inputDims0 = getBroadcastedDims( + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()); + const std::vector<std::size_t> inputDims1 = getBroadcastedDims( + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims()); // Find the correct kernel type - const auto impl = Registrar<AndImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<AndImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.forward(inputDims0, - inputDims1, - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawInput(1)), - getCPUPtr(mOp.getRawOutput(0))); + inputDims1, + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawInput(1)), + getCPUPtr(mOp.getRawOutput(0))); } -template <> -void Aidge::AndImpl_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for And_Op on backend cpu"); +template <> void Aidge::AndImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for And_Op on backend cpu"); } diff --git a/src/operator/ArgMaxImpl.cpp b/src/operator/ArgMaxImpl.cpp index b8fb85a7cd86a788cda69307d5ed8f363619f9f0..29bfc2f1d99b27fd82de1136be846f8920dbd853 100644 --- a/src/operator/ArgMaxImpl.cpp +++ b/src/operator/ArgMaxImpl.cpp @@ -14,26 +14,27 @@ #include <memory> #include <vector> -#include "aidge/utils/Types.h" -#include "aidge/operator/ArgMax.hpp" #include "aidge/backend/cpu/operator/ArgMaxImpl_kernels.hpp" +#include "aidge/operator/ArgMax.hpp" +#include "aidge/utils/Types.h" -template <> -void Aidge::ArgMaxImpl_cpu::forward() { - const ArgMax_Op& op_ = dynamic_cast<const ArgMax_Op&>(mOp); +template <> void Aidge::ArgMaxImpl_cpu::forward() { + const ArgMax_Op &op_ = dynamic_cast<const ArgMax_Op &>(mOp); // Find the correct kernel type - const auto impl = Registrar<ArgMaxImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<ArgMaxImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.forward(op_.axis(), - op_.selectLastIndex(), - op_.getInput(0)->dims(), - op_.getInput(0)->getImpl()->rawPtr(), - op_.getOutput(0)->getImpl()->rawPtr()); + op_.selectLastIndex(), + op_.getInput(0)->dims(), + op_.getInput(0)->getImpl()->rawPtr(), + op_.getOutput(0)->getImpl()->rawPtr()); } -template <> -void Aidge::ArgMaxImpl_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ArgMax_Op on backend cpu"); +template <> void Aidge::ArgMaxImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for ArgMax_Op on backend cpu"); } diff --git a/src/operator/AtanImpl.cpp b/src/operator/AtanImpl.cpp index af3393e7eb13fad4b414172edc7d1ab32ffcc573..2a0210aa6c654596899c471b78c054bd9ba2c154 100644 --- a/src/operator/AtanImpl.cpp +++ b/src/operator/AtanImpl.cpp @@ -15,40 +15,45 @@ #include <thread> // std::this_thread::sleep_for #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/Atan.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/AtanImpl.hpp" #include "aidge/backend/cpu/operator/AtanImpl_kernels.hpp" -template <> -void Aidge::AtanImpl_cpu::forward() { - const Atan_Op& op_ = dynamic_cast<const Atan_Op&>(mOp); +template <> void Aidge::AtanImpl_cpu::forward() { + const Atan_Op &op_ = dynamic_cast<const Atan_Op &>(mOp); std::shared_ptr<Tensor> in0 = op_.getInput(0); std::shared_ptr<Tensor> out0 = op_.getOutput(0); AIDGE_ASSERT(in0, "missing input #0"); // Find the correct kernel type - const auto impl = Registrar<AtanImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<AtanImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.forward(in0->size(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawOutput(0))); + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); } -template <> -void Aidge::AtanImpl_cpu::backward() { - const Atan_Op& op_ = dynamic_cast<const Atan_Op&>(mOp); - std::shared_ptr<Tensor> out0 = op_.getOutput(0); +template <> void Aidge::AtanImpl_cpu::backward() { + const Atan_Op &op_ = dynamic_cast<const Atan_Op &>(mOp); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad(); std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); - AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type()); + AIDGE_ASSERT(out0, + "missing output #0 for current {} operator", + op_.type()); // Find the correct kernel type - const auto impl = Registrar<AtanImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<AtanImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - impl.backward(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0)); + impl.backward(gra_int0->size(), + getCPUPtr(out0), + getCPUPtr(gra_out0), + getCPUPtr(gra_int0)); } diff --git a/src/operator/AvgPoolingImpl.cpp b/src/operator/AvgPoolingImpl.cpp index 01a5e8cf1772161f5cf98d3a8bd52f43ac7a1d0d..4f896340a1a5bb3d5231625848974cee43e0d807 100644 --- a/src/operator/AvgPoolingImpl.cpp +++ b/src/operator/AvgPoolingImpl.cpp @@ -21,24 +21,24 @@ #include "aidge/operator/AvgPooling.hpp" #include "aidge/utils/Types.h" -template <> -void Aidge::AvgPoolingImpl2D_cpu::forward() { - const auto& op_ = dynamic_cast<const AvgPooling_Op<2>&>(mOp); +template <> void Aidge::AvgPoolingImpl2D_cpu::forward() { + const auto &op_ = dynamic_cast<const AvgPooling_Op<2> &>(mOp); assert(op_.getInput(0) && "missing input #0"); // Find the correct kernel type - const auto impl = Registrar<AvgPoolingImpl2D_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = Registrar<AvgPoolingImpl2D_cpu>::create( + getBestMatch(getRequiredSpec())); // Call kernel impl.forward(op_.strideDims(), - op_.kernelDims(), - op_.getInput(0)->template dims<4>(), - getCPUPtr(op_.getInput(0)), - getCPUPtr(op_.getOutput(0))); + op_.kernelDims(), + op_.getInput(0)->template dims<4>(), + getCPUPtr(op_.getInput(0)), + getCPUPtr(op_.getOutput(0))); } -template <> -void Aidge::AvgPoolingImpl2D_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for AvgPooling_Op<2> on backend cpu"); +template <> void Aidge::AvgPoolingImpl2D_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for AvgPooling_Op<2> on backend cpu"); } - diff --git a/src/operator/BatchNormImpl.cpp b/src/operator/BatchNormImpl.cpp index 9f1d986e63f14e6038c80054e5e3bc631ec24224..4781b0d61f63b7651c3076c39356f6f5b2768236 100644 --- a/src/operator/BatchNormImpl.cpp +++ b/src/operator/BatchNormImpl.cpp @@ -11,19 +11,17 @@ #include "aidge/backend/cpu/operator/BatchNormImpl.hpp" - #include <numeric> // std::accumulate #include <vector> -#include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/BatchNorm.hpp" +#include "aidge/utils/Types.h" #include "aidge/backend/cpu/operator/BatchNormImpl_kernels.hpp" -template <> -void Aidge::BatchNormImpl2D_cpu::forward() { - const auto& op_ = dynamic_cast<const BatchNorm_Op<2>&>(mOp); +template <> void Aidge::BatchNormImpl2D_cpu::forward() { + const auto &op_ = dynamic_cast<const BatchNorm_Op<2> &>(mOp); AIDGE_ASSERT(op_.getInput(0), "missing input #0 for BatchNorm Operator"); AIDGE_ASSERT(op_.getInput(1), "missing input #1 for BatchNorm Operator"); AIDGE_ASSERT(op_.getInput(2), "missing input #2 for BatchNorm Operator"); @@ -33,22 +31,24 @@ void Aidge::BatchNormImpl2D_cpu::forward() { AIDGE_ASSERT(op_.getOutput(0)->nbDims() == 4, ""); // Find the correct kernel type - const auto impl = Registrar<BatchNormImpl2D_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = Registrar<BatchNormImpl2D_cpu>::create( + getBestMatch(getRequiredSpec())); // Call kernel impl.forward(op_.epsilon(), - op_.momentum(), - op_.getInput(0)->template dims<4>(), - getCPUPtr(op_.getRawInput(0)), - getCPUPtr(op_.getRawInput(1)), - getCPUPtr(op_.getRawInput(2)), - getCPUPtr(op_.getRawInput(3)), - getCPUPtr(op_.getRawInput(4)), - getCPUPtr(op_.getRawOutput(0)), - true); + op_.momentum(), + op_.getInput(0)->template dims<4>(), + getCPUPtr(op_.getRawInput(0)), + getCPUPtr(op_.getRawInput(1)), + getCPUPtr(op_.getRawInput(2)), + getCPUPtr(op_.getRawInput(3)), + getCPUPtr(op_.getRawInput(4)), + getCPUPtr(op_.getRawOutput(0)), + true); } -template <> -void Aidge::BatchNormImpl2D_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for BatchNorm_Op<2> on backend cpu"); +template <> void Aidge::BatchNormImpl2D_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for BatchNorm_Op<2> on backend cpu"); } diff --git a/src/operator/BitShiftImpl.cpp b/src/operator/BitShiftImpl.cpp index 1e0f79fd29fd140f0b41c64d245b9b240da80028..80725e8201407444d4b6b1a26e3809189dd8b4ae 100644 --- a/src/operator/BitShiftImpl.cpp +++ b/src/operator/BitShiftImpl.cpp @@ -10,48 +10,46 @@ ********************************************************************************/ #include <cassert> -#include <chrono> // std::chrono::milliseconds +#include <chrono> // std::chrono::milliseconds #include <numeric> -#include <thread> // std::this_thread::sleep_for +#include <thread> // std::this_thread::sleep_for #include <vector> - -#include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/Broadcasting.hpp" #include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/utils/Types.h" #include "aidge/backend/cpu/operator/BitShiftImpl.hpp" #include "aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp" -template<> -void Aidge::BitShiftImpl_cpu::forward() { - - const auto& op_ = dynamic_cast<const BitShift_Op&>(mOp); +template <> void Aidge::BitShiftImpl_cpu::forward() { + const auto &op_ = dynamic_cast<const BitShift_Op &>(mOp); - const auto impl = Registrar<BitShiftImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<BitShiftImpl_cpu>::create(getBestMatch(getRequiredSpec())); - - const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()); - const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims()); + const std::vector<std::size_t> inputDims0 = getBroadcastedDims( + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()); + const std::vector<std::size_t> inputDims1 = getBroadcastedDims( + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims()); BitShift_Op::BitShiftDirection direction = op_.direction(); // Call kernel - impl.forward( - direction, - inputDims0, - inputDims1, - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawInput(1)), - getCPUPtr(mOp.getRawOutput(0))); - + impl.forward(direction, + inputDims0, + inputDims1, + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawInput(1)), + getCPUPtr(mOp.getRawOutput(0))); } -template <> -void Aidge::BitShiftImpl_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for BitShift_Op on backend cpu"); +template <> void Aidge::BitShiftImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for BitShift_Op on backend cpu"); } \ No newline at end of file diff --git a/src/operator/ClipImpl.cpp b/src/operator/ClipImpl.cpp index 931d25426a8f6e08363bfc08d23f1714e934634c..cf539768c94443ffda5eae01a0e5c0f4c1b347ee 100644 --- a/src/operator/ClipImpl.cpp +++ b/src/operator/ClipImpl.cpp @@ -12,56 +12,54 @@ #include <memory> #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/data/Tensor.hpp" #include "aidge/operator/Clip.hpp" -#include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Types.h" #include "aidge/backend/cpu/operator/ClipImpl.hpp" #include "aidge/backend/cpu/operator/ClipImpl_kernels.hpp" -template<> -void Aidge::ClipImpl_cpu::forward() { +template <> void Aidge::ClipImpl_cpu::forward() { - const Clip_Op& op_ = dynamic_cast<const Clip_Op&>(mOp); + const Clip_Op &op_ = dynamic_cast<const Clip_Op &>(mOp); std::shared_ptr<Tensor> in0 = op_.getInput(0); std::shared_ptr<Tensor> out0 = op_.getOutput(0); AIDGE_ASSERT(in0, "missing input #0"); /*AIDGE_ASSERT(in1, "missing input #1 -> Min value empty shape Tensor"); AIDGE_ASSERT(in2, "missing input #2 -> Max value empty shape Tensor");*/ // Find the correct kernel type - const auto impl = Registrar<ClipImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<ClipImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - impl.forward( - op_.min(), - op_.max(), - getCPUPtr(mOp.getRawInput(0)), - in0->size(), - getCPUPtr(mOp.getRawOutput(0)) - ); + impl.forward(op_.min(), + op_.max(), + getCPUPtr(mOp.getRawInput(0)), + in0->size(), + getCPUPtr(mOp.getRawOutput(0))); } -template<> -void Aidge::ClipImpl_cpu::backward() { +template <> void Aidge::ClipImpl_cpu::backward() { - const Clip_Op& op_ = dynamic_cast<const Clip_Op&>(mOp); - std::shared_ptr<Tensor> in0 = op_.getInput(0); - std::shared_ptr<Tensor> out0 = op_.getOutput(0); + const Clip_Op &op_ = dynamic_cast<const Clip_Op &>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); std::shared_ptr<Tensor> gra_in0 = op_.getInput(0)->grad(); - std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); - AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type()); - + std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); + AIDGE_ASSERT(out0, + "missing output #0 for current {} operator", + op_.type()); + // Find the correct kernel type - const auto impl = Registrar<ClipImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<ClipImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - impl.backward( - op_.min(), - op_.max(), - gra_in0->size(), - getCPUPtr(in0), - getCPUPtr(gra_out0), - getCPUPtr(gra_in0) - ); + impl.backward(op_.min(), + op_.max(), + gra_in0->size(), + getCPUPtr(in0), + getCPUPtr(gra_out0), + getCPUPtr(gra_in0)); } diff --git a/src/operator/ConstantOfShapeImpl.cpp b/src/operator/ConstantOfShapeImpl.cpp index 16e4b762ba04e5f01bfccf965f6de3650fa2e734..03b5a9726a4a132e279cf53b034db496853afc31 100644 --- a/src/operator/ConstantOfShapeImpl.cpp +++ b/src/operator/ConstantOfShapeImpl.cpp @@ -23,22 +23,24 @@ #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -template <> -void Aidge::ConstantOfShapeImpl_cpu::forward() { - const ConstantOfShape_Op &op_ = static_cast<const ConstantOfShape_Op &>(mOp); - // Check if input is provided - AIDGE_ASSERT(op_.getInput(0), "{} : Missing input 0", __func__); +template <> void Aidge::ConstantOfShapeImpl_cpu::forward() { + const ConstantOfShape_Op &op_ = + static_cast<const ConstantOfShape_Op &>(mOp); + // Check if input is provided + AIDGE_ASSERT(op_.getInput(0), "{} : Missing input 0", __func__); // Find the correct kernel type - const auto impl = Registrar<ConstantOfShapeImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = Registrar<ConstantOfShapeImpl_cpu>::create( + getBestMatch(getRequiredSpec())); // Call kernel impl.forward(op_.getOutput(0)->dims(), - op_.value(), - op_.getOutput(0)->getImpl()->rawPtr()); + op_.value(), + op_.getOutput(0)->getImpl()->rawPtr()); } -template <> -void Aidge::ConstantOfShapeImpl_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ConstantOfShape_Op on backend cpu"); +template <> void Aidge::ConstantOfShapeImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for ConstantOfShape_Op on backend cpu"); } diff --git a/src/operator/ConvDepthWiseImpl.cpp b/src/operator/ConvDepthWiseImpl.cpp index d86bba8d1abf348eb25e2d9c69d04b5c33a8a176..35f0b8ad484ab8e165106ee5b4dcb7385d9511ae 100644 --- a/src/operator/ConvDepthWiseImpl.cpp +++ b/src/operator/ConvDepthWiseImpl.cpp @@ -21,78 +21,98 @@ #include "aidge/utils/Log.hpp" #include "aidge/utils/Types.h" -template <> -void Aidge::ConvDepthWiseImpl1D_cpu::forward() { - const auto& op_ = dynamic_cast<const ConvDepthWise_Op<1>&>(mOp); +template <> void Aidge::ConvDepthWiseImpl1D_cpu::forward() { + const auto &op_ = dynamic_cast<const ConvDepthWise_Op<1> &>(mOp); - AIDGE_ASSERT(op_.getInput(0), "missing input #0 in ConvDepthWise Operator"); - AIDGE_ASSERT(op_.getInput(1), "missing input #1 in ConvDepthWise Operator"); + AIDGE_ASSERT(op_.getInput(0), + "missing input #0 in ConvDepthWise Operator"); + AIDGE_ASSERT(op_.getInput(1), + "missing input #1 in ConvDepthWise Operator"); - AIDGE_ASSERT((op_.getInput(0)->nbDims() == 3), "support for 4-dimensions tensors only"); + AIDGE_ASSERT((op_.getInput(0)->nbDims() == 3), + "support for 4-dimensions tensors only"); // Find the correct kernel type - const auto impl = Registrar<ConvDepthWiseImpl1D_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = Registrar<ConvDepthWiseImpl1D_cpu>::create( + getBestMatch(getRequiredSpec())); // Convert input data (no overhead if not needed!) // TODO: right now, if needed, memory will be allocated/deallocated at each // call to forward(). We might put the following shared_ptr as members of // this class to avoid that. std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; - const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)); - const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)); - const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); + const auto &input0 = + op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)); + const auto &input1 = + op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)); + const auto &input2 = + (op_.getInput(2)) + ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) + : Tensor(); // Call kernel - impl.forward(op_.strideDims(), - op_.dilationDims(), - op_.kernelDims(), // Conv attributes - op_.getInput(0)->template dims<3>(), // input dimensions - input0.getImpl()->rawPtr(), // input - input1.getImpl()->rawPtr(), // weight - (op_.getInput(2)) ? input2.getImpl()->rawPtr() : nullptr, // bias - getCPUPtr(mOp.getRawOutput(0)) // output - ); + impl.forward( + op_.strideDims(), + op_.dilationDims(), + op_.kernelDims(), // Conv attributes + op_.getInput(0)->template dims<3>(), // input dimensions + input0.getImpl()->rawPtr(), // input + input1.getImpl()->rawPtr(), // weight + (op_.getInput(2)) ? input2.getImpl()->rawPtr() : nullptr, // bias + getCPUPtr(mOp.getRawOutput(0)) // output + ); } -template <> -void Aidge::ConvDepthWiseImpl1D_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ConvDepthWise_Op<1> on backend cpu"); +template <> void Aidge::ConvDepthWiseImpl1D_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for ConvDepthWise_Op<1> on backend cpu"); } -template <> -void Aidge::ConvDepthWiseImpl2D_cpu::forward() { - const auto& op_ = dynamic_cast<const ConvDepthWise_Op<2>&>(mOp); +template <> void Aidge::ConvDepthWiseImpl2D_cpu::forward() { + const auto &op_ = dynamic_cast<const ConvDepthWise_Op<2> &>(mOp); - AIDGE_ASSERT(op_.getInput(0), "missing input #0 in ConvDepthWise Operator"); - AIDGE_ASSERT(op_.getInput(1), "missing input #1 in ConvDepthWise Operator"); - AIDGE_ASSERT(op_.getInput(2), "missing input #2 in ConvDepthWise Operator"); + AIDGE_ASSERT(op_.getInput(0), + "missing input #0 in ConvDepthWise Operator"); + AIDGE_ASSERT(op_.getInput(1), + "missing input #1 in ConvDepthWise Operator"); + AIDGE_ASSERT(op_.getInput(2), + "missing input #2 in ConvDepthWise Operator"); - AIDGE_ASSERT((op_.getInput(0)->nbDims() == 4), "support for 4-dimensions tensors only"); + AIDGE_ASSERT((op_.getInput(0)->nbDims() == 4), + "support for 4-dimensions tensors only"); // Find the correct kernel type - const auto impl = Registrar<ConvDepthWiseImpl2D_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = Registrar<ConvDepthWiseImpl2D_cpu>::create( + getBestMatch(getRequiredSpec())); - // Convert input data (no overhead if not needed!) + // Convert input data (no overhead if not needed!) // TODO: right now, if needed, memory will be allocated/deallocated at each // call to forward(). We might put the following shared_ptr as members of // this class to avoid that. std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; - const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)); - const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)); - const auto& input2 = op_.getInput(2) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); + const auto &input0 = + op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)); + const auto &input1 = + op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)); + const auto &input2 = + op_.getInput(2) + ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) + : Tensor(); // Call kernel impl.forward(op_.strideDims(), - op_.dilationDims(), - op_.kernelDims(), - op_.getInput(0)->template dims<4>(), - input0.getImpl()->rawPtr(), - input1.getImpl()->rawPtr(), - op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, - getCPUPtr(op_.getRawOutput(0))); + op_.dilationDims(), + op_.kernelDims(), + op_.getInput(0)->template dims<4>(), + input0.getImpl()->rawPtr(), + input1.getImpl()->rawPtr(), + op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, + getCPUPtr(op_.getRawOutput(0))); } -template <> -void Aidge::ConvDepthWiseImpl2D_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ConvDepthWise_Op<2> on backend cpu"); +template <> void Aidge::ConvDepthWiseImpl2D_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for ConvDepthWise_Op<2> on backend cpu"); } diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp index fdfe19fbf4bf3e71c86aa28b966cfb21a1b5ba40..8382c48e65d265c93a49c43f5edc927b98e877b6 100644 --- a/src/operator/ConvImpl.cpp +++ b/src/operator/ConvImpl.cpp @@ -22,78 +22,92 @@ #include "aidge/operator/Conv.hpp" #include "aidge/utils/Types.h" -template <> -void Aidge::ConvImpl1D_cpu::forward() { - const auto& op_ = static_cast<const Conv_Op<1>&>(mOp); +template <> void Aidge::ConvImpl1D_cpu::forward() { + const auto &op_ = static_cast<const Conv_Op<1> &>(mOp); // FIXME: uncomment the following code once memory handling will work AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator."); AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator."); // Find the correct kernel type - const auto impl = Registrar<ConvImpl1D_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<ConvImpl1D_cpu>::create(getBestMatch(getRequiredSpec())); // Convert input data (no overhead if not needed!) // TODO: right now, if needed, memory will be allocated/deallocated at each // call to forward(). We might put the following shared_ptr as members of // this class to avoid that. std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; - const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)); - const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)); - const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); + const auto &input0 = + op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)); + const auto &input1 = + op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)); + const auto &input2 = + (op_.getInput(2)) + ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) + : Tensor(); // Call kernel - impl.forward(op_.strideDims(), - op_.dilationDims(), - op_.kernelDims(), - op_.getInput(0)->template dims<3>(), // input dimensions - dynamic_cast<const Conv_Op<1>&>(mOp).outChannels(), // outChannels - input0.getImpl()->rawPtr(), // input - input1.getImpl()->rawPtr(), // weight - op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, // bias - getCPUPtr(mOp.getRawOutput(0)) // output - ); + impl.forward( + op_.strideDims(), + op_.dilationDims(), + op_.kernelDims(), + op_.getInput(0)->template dims<3>(), // input dimensions + dynamic_cast<const Conv_Op<1> &>(mOp).outChannels(), // outChannels + input0.getImpl()->rawPtr(), // input + input1.getImpl()->rawPtr(), // weight + op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, // bias + getCPUPtr(mOp.getRawOutput(0)) // output + ); } -template <> -void Aidge::ConvImpl1D_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Conv_Op<1> on backend cpu"); +template <> void Aidge::ConvImpl1D_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for Conv_Op<1> on backend cpu"); } -template <> -void Aidge::ConvImpl2D_cpu::forward() { - const auto& op_ = dynamic_cast<const Conv_Op<2>&>(mOp); +template <> void Aidge::ConvImpl2D_cpu::forward() { + const auto &op_ = dynamic_cast<const Conv_Op<2> &>(mOp); // FIXME: uncomment the following code once memory handling will work AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator."); AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator."); // Find the correct kernel type - const auto impl = Registrar<ConvImpl2D_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<ConvImpl2D_cpu>::create(getBestMatch(getRequiredSpec())); // Convert input data (no overhead if not needed!) // TODO: right now, if needed, memory will be allocated/deallocated at each // call to forward(). We might put the following shared_ptr as members of // this class to avoid that. std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; - const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)); - const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)); - const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); + const auto &input0 = + op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)); + const auto &input1 = + op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)); + const auto &input2 = + (op_.getInput(2)) + ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) + : Tensor(); // Call kernel - impl.forward(op_.strideDims(), - op_.dilationDims(), - op_.kernelDims(), - op_.getInput(0)->template dims<4>(), // input dimensions - dynamic_cast<const Conv_Op<2>&>(mOp).outChannels(), // outChannels - input0.getImpl()->rawPtr(), // input - input1.getImpl()->rawPtr(), // weight - op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, // bias - getCPUPtr(mOp.getRawOutput(0)) // output - ); + impl.forward( + op_.strideDims(), + op_.dilationDims(), + op_.kernelDims(), + op_.getInput(0)->template dims<4>(), // input dimensions + dynamic_cast<const Conv_Op<2> &>(mOp).outChannels(), // outChannels + input0.getImpl()->rawPtr(), // input + input1.getImpl()->rawPtr(), // weight + op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, // bias + getCPUPtr(mOp.getRawOutput(0)) // output + ); } -template <> -void Aidge::ConvImpl2D_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Conv_Op<2> on backend cpu"); +template <> void Aidge::ConvImpl2D_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for Conv_Op<2> on backend cpu"); } diff --git a/src/operator/DivImpl.cpp b/src/operator/DivImpl.cpp index 135b32b5005a961e55910e758f9b7102ca51b63c..0bf0b1678cfce0112b4b9914f40a9954ff16c966 100644 --- a/src/operator/DivImpl.cpp +++ b/src/operator/DivImpl.cpp @@ -19,20 +19,20 @@ #include "aidge/data/Tensor.hpp" #include "aidge/utils/Types.h" -template <> -void Aidge::DivImpl_cpu::forward() { +template <> void Aidge::DivImpl_cpu::forward() { // Find the correct kernel type // auto kernelFunc = Registrar<DivImplForward_cpu>::create({ // std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), // std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), // std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - // const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + // const std::vector<std::size_t> inputDims0 = + // getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), // std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()); - // const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + // const std::vector<std::size_t> inputDims1 = + // getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), // std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims()); - // auto a = std::static_pointer_cast<Tensor>(mOp.getRawInput(0)); // auto b = std::static_pointer_cast<Tensor>(mOp.getRawInput(1)); @@ -44,41 +44,54 @@ void Aidge::DivImpl_cpu::forward() { // getCPUPtr(mOp.getRawInput(1)), // getCPUPtr(mOp.getRawOutput(0))); -///////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////// // [5,2,1,7] & [2,6,7] // 1. Same number of dimensions -> [5,2,1,7] & [1,2,6,7] // 2. Find the highest equal dimension -> 3 - // Exception: if the first diverging dimension is the last one, then -> 4 (dims.size()) + // Exception: if the first diverging dimension is the last one, then -> + // 4 (dims.size()) // 3. Compute the highest number of contiguous data -> 7 // 4. Compute stride and offset step for the broadcast mechanism // 5. Call a simple kernel - const auto& opTensor = static_cast<const Div_Op&>(mOp); + const auto &opTensor = static_cast<const Div_Op &>(mOp); // Find the correct kernel type - const auto impl = Registrar<DivImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<DivImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Compute compatible input dimensions - std::vector<std::size_t> dims0 = opTensor.getInput(0)->dims(); - std::vector<std::size_t> dims1 = opTensor.getInput(1)->dims(); - const std::vector<std::size_t>& outDims = opTensor.getOutput(0)->dims(); + std::vector<std::size_t> dims0 = opTensor.getInput(0)->dims(); + std::vector<std::size_t> dims1 = opTensor.getInput(1)->dims(); + const std::vector<std::size_t> &outDims = opTensor.getOutput(0)->dims(); - // special case for equal dimensions, the kernel is called with the entire arrays at once + // special case for equal dimensions, the kernel is called with the entire + // arrays at once if (dims0 == dims1) { - const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>()); - impl.forward(input0_contiguous_size, input0_contiguous_size, input0_contiguous_size, - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawInput(1)), - getCPUPtr(mOp.getRawOutput(0))); + const std::size_t input0_contiguous_size = + std::accumulate(dims0.cbegin(), + dims0.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); + impl.forward(input0_contiguous_size, + input0_contiguous_size, + input0_contiguous_size, + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawInput(1)), + getCPUPtr(mOp.getRawOutput(0))); return; } - // set dimensions to be of equal size by filling the smallest one with ones. + // set dimensions to be of equal size by filling the smallest one with + // ones. if (dims0.size() > dims1.size()) { - dims1.insert(dims1.cbegin(), dims0.size() - dims1.size(), std::size_t(1)); - } - else if (dims1.size() > dims0.size()) { - dims0.insert(dims0.cbegin(), dims1.size() - dims0.size(), std::size_t(1)); + dims1.insert(dims1.cbegin(), + dims0.size() - dims1.size(), + std::size_t(1)); + } else if (dims1.size() > dims0.size()) { + dims0.insert(dims0.cbegin(), + dims1.size() - dims0.size(), + std::size_t(1)); } const std::size_t nbDims = dims0.size(); @@ -87,11 +100,14 @@ void Aidge::DivImpl_cpu::forward() { // std::size_t contiguousIdx = nbDims - 1; std::size_t contiguousIdx = nbDims; while (contiguousIdx-- > 0) { - // for (; contiguousIdx+1 > 0; --contiguousIdx) { + // for (; contiguousIdx+1 > 0; --contiguousIdx) { if (dims0[contiguousIdx] != dims1[contiguousIdx]) { - if (contiguousIdx == (nbDims -1)) { // last dimensions of one of the input Tensor are of size 1 - const std::vector<std::size_t>& dims = (dims0[contiguousIdx] == 1) ? dims0 : dims1; - while ((contiguousIdx+1 > 0) && (dims[contiguousIdx] == 1)) { + if (contiguousIdx == + (nbDims - 1)) { // last dimensions of one of the input Tensor + // are of size 1 + const std::vector<std::size_t> &dims = + (dims0[contiguousIdx] == 1) ? dims0 : dims1; + while ((contiguousIdx + 1 > 0) && (dims[contiguousIdx] == 1)) { --contiguousIdx; } } @@ -101,21 +117,41 @@ void Aidge::DivImpl_cpu::forward() { ++contiguousIdx; // Compute the highest number of contiguous data for each Tensor - const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin()+contiguousIdx, dims0.cend(), std::size_t(1), std::multiplies<std::size_t>()); - const std::size_t input1_contiguous_size = std::accumulate(dims1.cbegin()+contiguousIdx, dims1.cend(), std::size_t(1), std::multiplies<std::size_t>()); - const std::size_t output_contiguous_size = std::accumulate(outDims.cbegin()+contiguousIdx, outDims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t input0_contiguous_size = + std::accumulate(dims0.cbegin() + contiguousIdx, + dims0.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); + const std::size_t input1_contiguous_size = + std::accumulate(dims1.cbegin() + contiguousIdx, + dims1.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); + const std::size_t output_contiguous_size = + std::accumulate(outDims.cbegin() + contiguousIdx, + outDims.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); // initialize strides to iterate through data because of broadcasting - std::unique_ptr<std::int32_t[]> stride_post0 = std::make_unique<std::int32_t[]>(contiguousIdx); - std::unique_ptr<std::int32_t[]> stride_post1 = std::make_unique<std::int32_t[]>(contiguousIdx); - std::unique_ptr<std::int32_t[]> stride_step0 = std::make_unique<std::int32_t[]>(contiguousIdx); - std::unique_ptr<std::int32_t[]> stride_step1 = std::make_unique<std::int32_t[]>(contiguousIdx); + std::unique_ptr<std::int32_t[]> stride_post0 = + std::make_unique<std::int32_t[]>(contiguousIdx); + std::unique_ptr<std::int32_t[]> stride_post1 = + std::make_unique<std::int32_t[]>(contiguousIdx); + std::unique_ptr<std::int32_t[]> stride_step0 = + std::make_unique<std::int32_t[]>(contiguousIdx); + std::unique_ptr<std::int32_t[]> stride_step1 = + std::make_unique<std::int32_t[]>(contiguousIdx); if (contiguousIdx > 0) { stride_post0[contiguousIdx - 1] = 1; stride_post1[contiguousIdx - 1] = 1; - for (std::size_t i = contiguousIdx - 2; i != static_cast<std::size_t>(-1); --i) { - stride_post0[i] = stride_post0[i+1]*static_cast<std::int32_t>(dims0[i+1]); - stride_post1[i] = stride_post1[i+1]*static_cast<std::int32_t>(dims1[i+1]); + for (std::size_t i = contiguousIdx - 2; + i != static_cast<std::size_t>(-1); + --i) { + stride_post0[i] = + stride_post0[i + 1] * static_cast<std::int32_t>(dims0[i + 1]); + stride_post1[i] = + stride_post1[i + 1] * static_cast<std::int32_t>(dims1[i + 1]); } for (std::size_t i = 0; i != contiguousIdx; ++i) { stride_step0[i] = (dims0[i] == 1) ? 1 - stride_post0[i] : 1; @@ -128,17 +164,24 @@ void Aidge::DivImpl_cpu::forward() { std::size_t offsetIn1 = 0; std::size_t offsetOut = 0; - std::size_t dim = contiguousIdx - 1; - const std::size_t nbStacks = std::accumulate(outDims.cbegin(), outDims.cbegin() + contiguousIdx, std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t nbStacks = + std::accumulate(outDims.cbegin(), + outDims.cbegin() + contiguousIdx, + std::size_t(1), + std::multiplies<std::size_t>()); for (std::size_t stack = 0; stack < nbStacks;) { - impl.forward(input0_contiguous_size, input1_contiguous_size, output_contiguous_size, - getCPUPtr(mOp.getRawInput(0), offsetIn0*input0_contiguous_size), - getCPUPtr(mOp.getRawInput(1), offsetIn1*input1_contiguous_size), - getCPUPtr(mOp.getRawOutput(0), offsetOut*output_contiguous_size)); + impl.forward( + input0_contiguous_size, + input1_contiguous_size, + output_contiguous_size, + getCPUPtr(mOp.getRawInput(0), offsetIn0 * input0_contiguous_size), + getCPUPtr(mOp.getRawInput(1), offsetIn1 * input1_contiguous_size), + getCPUPtr(mOp.getRawOutput(0), + offsetOut * output_contiguous_size)); if (++stack < nbStacks) { std::size_t tmp_stack = stack; - while(tmp_stack % outDims[dim] == 0) { + while (tmp_stack % outDims[dim] == 0) { tmp_stack /= outDims[dim]; dim--; } @@ -150,7 +193,8 @@ void Aidge::DivImpl_cpu::forward() { } } -template <> -void Aidge::DivImpl_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Div_Op on backend cpu"); +template <> void Aidge::DivImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for Div_Op on backend cpu"); } diff --git a/src/operator/ErfImpl.cpp b/src/operator/ErfImpl.cpp index 42c6ce878abe227f74d7df4a9bf31ebc4c63eb88..1be68647af052a08b9a2bae687caa1f91aa402c1 100644 --- a/src/operator/ErfImpl.cpp +++ b/src/operator/ErfImpl.cpp @@ -19,22 +19,21 @@ #include "aidge/operator/Erf.hpp" #include "aidge/utils/Types.h" -template <> -void Aidge::ErfImpl_cpu::forward() { - const Erf_Op& op = static_cast<const Erf_Op&>(mOp); +template <> void Aidge::ErfImpl_cpu::forward() { + const Erf_Op &op = static_cast<const Erf_Op &>(mOp); // Find the correct kernel type - const auto impl = Registrar<ErfImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<ErfImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - impl.forward( - op.getInput(0)->size(), - op.getInput(0)->getImpl()->rawPtr(), - op.getOutput(0)->getImpl()->rawPtr() - ); + impl.forward(op.getInput(0)->size(), + op.getInput(0)->getImpl()->rawPtr(), + op.getOutput(0)->getImpl()->rawPtr()); } -template <> -void Aidge::ErfImpl_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Erf_Op on backend cpu"); +template <> void Aidge::ErfImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for Erf_Op on backend cpu"); } diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp index 359452712f94be078122266089cc1da89baf50d5..d0e8a5463f949c8d4cff22fb52c1dbff5aab448c 100644 --- a/src/operator/FCImpl.cpp +++ b/src/operator/FCImpl.cpp @@ -11,7 +11,7 @@ #include "aidge/backend/cpu/operator/FCImpl.hpp" -#include <cstddef> // std::size_t +#include <cstddef> // std::size_t #include <functional> #include <memory> #include <tuple> @@ -22,59 +22,72 @@ #include "aidge/utils/ErrorHandling.hpp" #include "aidge/utils/Types.h" - -template <> -void Aidge::FCImpl_cpu::forward() -{ - const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp); +template <> void Aidge::FCImpl_cpu::forward() { + const FC_Op &op_ = dynamic_cast<const FC_Op &>(mOp); AIDGE_ASSERT(op_.getInput(0), "missing input #0"); AIDGE_ASSERT(op_.getInput(1), "missing input #1"); - const auto impl = Registrar<FCImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<FCImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Convert input data (no overhead if not needed!) // TODO: right now, if needed, memory will be allocated/deallocated at each // call to forward(). We might put the following shared_ptr as members of // this class to avoid that. std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; - const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *(op_.getOutput(0))); - const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *(op_.getOutput(0))); - const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *(op_.getOutput(0))) : Tensor(); + const auto &input0 = + op_.getInput(0)->refCastFrom(input0Fallback, *(op_.getOutput(0))); + const auto &input1 = + op_.getInput(1)->refCastFrom(input1Fallback, *(op_.getOutput(0))); + const auto &input2 = + (op_.getInput(2)) + ? op_.getInput(2)->refCastFrom(input2Fallback, *(op_.getOutput(0))) + : Tensor(); // Call kernel const auto batchSize = (input0.dims().size() > 1) ? input0.dims()[0] : 1; impl.forward(batchSize, - input1.dims()[1], // nb input features - input1.dims()[0], // nb output features - input0.getImpl()->rawPtr(), - input1.getImpl()->rawPtr(), - (op_.getInput(2)) ? input2.getImpl()->rawPtr() : nullptr, - getCPUPtr(mOp.getRawOutput(0))); + input1.dims()[1], // nb input features + input1.dims()[0], // nb output features + input0.getImpl()->rawPtr(), + input1.getImpl()->rawPtr(), + (op_.getInput(2)) ? input2.getImpl()->rawPtr() : nullptr, + getCPUPtr(mOp.getRawOutput(0))); } -template <> -void Aidge::FCImpl_cpu::backward() -{ - const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp); - const auto& fc_grad = op_.getOutput(0)->grad(); +template <> void Aidge::FCImpl_cpu::backward() { + const FC_Op &op_ = dynamic_cast<const FC_Op &>(mOp); + const auto &fc_grad = op_.getOutput(0)->grad(); AIDGE_ASSERT(fc_grad, "missing ouput #0 gradient"); AIDGE_ASSERT(op_.getInput(0)->grad(), "missing input #0 gradient"); AIDGE_ASSERT(op_.getInput(1)->grad(), "missing input #1 gradient"); - const auto impl = Registrar<FCImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<FCImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Convert input data (no overhead if not needed!) // TODO: right now, if needed, memory will be allocated/deallocated at each // call to forward(). We might put the following shared_ptr as members of // this class to avoid that. - std::shared_ptr<Tensor> input0gradFallback, input1gradFallback, input2gradFallback; - const auto& input0grad = op_.getInput(0)->grad()->refCastFrom(input0gradFallback, *(op_.getOutput(0))); - const auto& input1grad = op_.getInput(1)->grad()->refCastFrom(input1gradFallback, *(op_.getOutput(0))); - const auto& input2grad = (op_.getInput(2)) ? op_.getInput(2)->grad()->refCastFrom(input2gradFallback, *(op_.getOutput(0))) : Tensor(); + std::shared_ptr<Tensor> input0gradFallback, input1gradFallback, + input2gradFallback; + const auto &input0grad = + op_.getInput(0)->grad()->refCastFrom(input0gradFallback, + *(op_.getOutput(0))); + const auto &input1grad = + op_.getInput(1)->grad()->refCastFrom(input1gradFallback, + *(op_.getOutput(0))); + const auto &input2grad = + (op_.getInput(2)) + ? op_.getInput(2)->grad()->refCastFrom(input2gradFallback, + *(op_.getOutput(0))) + : Tensor(); // Call kernel - const auto batchSize = (input0grad.dims().size() > 1) ? input0grad.dims()[0] : 1; - impl.backward(batchSize, + const auto batchSize = + (input0grad.dims().size() > 1) ? input0grad.dims()[0] : 1; + impl.backward( + batchSize, input1grad.dims()[1], // nb input features input1grad.dims()[0], // nb output features getCPUPtr(fc_grad), diff --git a/src/operator/FoldImpl.cpp b/src/operator/FoldImpl.cpp index 10f3d7b50bac9a1fbfc403609bdccb67a79cceac..fde5bf744d344af9abf9fb395858da16c94c0e69 100644 --- a/src/operator/FoldImpl.cpp +++ b/src/operator/FoldImpl.cpp @@ -15,32 +15,34 @@ #include <thread> // std::this_thread::sleep_for #include <vector> -#include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/Conv.hpp" +#include "aidge/utils/Types.h" #include "aidge/backend/cpu/operator/FoldImpl.hpp" #include "aidge/backend/cpu/operator/FoldImpl_kernels.hpp" -template <> -void Aidge::FoldImpl2D_cpu::forward() { - const auto& op_ = static_cast<const Fold_Op<2>&>(mOp); - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); +template <> void Aidge::FoldImpl2D_cpu::forward() { + const auto &op_ = static_cast<const Fold_Op<2> &>(mOp); + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && + "missing input #0"); // Find the correct kernel type - const auto impl = Registrar<FoldImpl2D_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<FoldImpl2D_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.forward(op_.outputDims(), - op_.strideDims(), - op_.dilationDims(), - op_.kernelDims(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawOutput(0))); + op_.strideDims(), + op_.dilationDims(), + op_.kernelDims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); } -template <> -void Aidge::FoldImpl2D_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Fold_Op<2> on backend cpu"); +template <> void Aidge::FoldImpl2D_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for Fold_Op<2> on backend cpu"); } diff --git a/src/operator/GlobalAveragePoolingImpl.cpp b/src/operator/GlobalAveragePoolingImpl.cpp index c53f92e199aee30d55ddafe39b5ef121979acbf7..bb00cf25bcbfd7c2b263c6a9c5023fae823ec3a1 100644 --- a/src/operator/GlobalAveragePoolingImpl.cpp +++ b/src/operator/GlobalAveragePoolingImpl.cpp @@ -23,24 +23,24 @@ #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" - -template <> -void Aidge::GlobalAveragePoolingImpl_cpu::forward() -{ - const GlobalAveragePooling_Op& op_ = static_cast<const GlobalAveragePooling_Op&>(mOp); +template <> void Aidge::GlobalAveragePoolingImpl_cpu::forward() { + const GlobalAveragePooling_Op &op_ = + static_cast<const GlobalAveragePooling_Op &>(mOp); // Check if input is provided AIDGE_ASSERT(op_.getInput(0), "missing input 0"); // Find the correct kernel type - const auto impl = Registrar<GlobalAveragePoolingImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = Registrar<GlobalAveragePoolingImpl_cpu>::create( + getBestMatch(getRequiredSpec())); // Call kernel impl.forward(op_.getInput(0)->dims(), - op_.getInput(0)->getImpl()->rawPtr(), - op_.getOutput(0)->getImpl()->rawPtr()); + op_.getInput(0)->getImpl()->rawPtr(), + op_.getOutput(0)->getImpl()->rawPtr()); } -template <> -void Aidge::GlobalAveragePoolingImpl_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for GlobalAveragePooling_Op on backend cpu"); +template <> void Aidge::GlobalAveragePoolingImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, + "Backward not yet implemented for " + "GlobalAveragePooling_Op on backend cpu"); } diff --git a/src/operator/GridSampleImpl.cpp b/src/operator/GridSampleImpl.cpp index 5b87390fc3de21d5d406d893e4827e80cce06c35..859e756bbc70d1a5906c15665c4ede4ead38dbdb 100644 --- a/src/operator/GridSampleImpl.cpp +++ b/src/operator/GridSampleImpl.cpp @@ -19,30 +19,33 @@ #include "aidge/operator/GridSample.hpp" #include "aidge/utils/Types.h" -template <> -void Aidge::GridSampleImpl_cpu::forward() { - const auto& op_ = static_cast<const GridSample_Op&>(mOp); +template <> void Aidge::GridSampleImpl_cpu::forward() { + const auto &op_ = static_cast<const GridSample_Op &>(mOp); // Find the correct kernel type - const auto impl = Registrar<GridSampleImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<GridSampleImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Convert input data (no overhead if not needed!) // TODO: right now, if needed, memory will be allocated/deallocated at each // call to forward(). We might put the following shared_ptr as members of // this class to avoid that. std::shared_ptr<Tensor> input0Fallback, input1Fallback; - const auto& input0 = std::make_shared<Tensor>(op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0))); - const auto& input1 = std::make_shared<Tensor>(op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0))); + const auto &input0 = std::make_shared<Tensor>( + op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0))); + const auto &input1 = std::make_shared<Tensor>( + op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0))); // Call kernel impl.forward(op_, - input0, // input - input1, // grid - op_.getOutput(0) // output - ); + input0, // input + input1, // grid + op_.getOutput(0) // output + ); } -template <> -void Aidge::GridSampleImpl_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for GridSample_Op on backend cpu"); +template <> void Aidge::GridSampleImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for GridSample_Op on backend cpu"); } diff --git a/src/operator/LeakyReLUImpl.cpp b/src/operator/LeakyReLUImpl.cpp index 6c0802dd967d2a20b34a2f1ca91fc0640c063c83..0dfeb5520e1034b998186eb9d2fc19d693630502 100644 --- a/src/operator/LeakyReLUImpl.cpp +++ b/src/operator/LeakyReLUImpl.cpp @@ -18,41 +18,41 @@ #include "aidge/data/Tensor.hpp" #include "aidge/operator/LeakyReLU.hpp" #include "aidge/utils/Log.hpp" -#include "aidge/utils/Types.h" #include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" -template <> -void Aidge::LeakyReLUImpl_cpu::forward() { - const LeakyReLU_Op& op_ = dynamic_cast<const LeakyReLU_Op&>(mOp); +template <> void Aidge::LeakyReLUImpl_cpu::forward() { + const LeakyReLU_Op &op_ = dynamic_cast<const LeakyReLU_Op &>(mOp); std::shared_ptr<Tensor> in0 = op_.getInput(0); std::shared_ptr<Tensor> out0 = op_.getOutput(0); AIDGE_ASSERT(in0, "missing input #0"); // Find the correct kernel type - const auto impl = Registrar<LeakyReLUImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<LeakyReLUImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.forward(op_.negativeSlope(), - in0->size(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawOutput(0))); + in0->size(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); } -template <> -void Aidge::LeakyReLUImpl_cpu::backward() { +template <> void Aidge::LeakyReLUImpl_cpu::backward() { // reversing in and out Data for backprop - const LeakyReLU_Op& op_ = dynamic_cast<const LeakyReLU_Op&>(mOp); - std::shared_ptr<Tensor> in0 = op_.getOutput(0)->grad(); + const LeakyReLU_Op &op_ = dynamic_cast<const LeakyReLU_Op &>(mOp); + std::shared_ptr<Tensor> in0 = op_.getOutput(0)->grad(); std::shared_ptr<Tensor> out0 = op_.getInput(0)->grad(); AIDGE_ASSERT(in0, "missing input #0"); // Find the correct kernel type - const auto impl = Registrar<LeakyReLUImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<LeakyReLUImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.backward(op_.negativeSlope(), - in0->size(), - getCPUPtr(in0), - getCPUPtr(out0)); + in0->size(), + getCPUPtr(in0), + getCPUPtr(out0)); } \ No newline at end of file diff --git a/src/operator/LnImpl.cpp b/src/operator/LnImpl.cpp index 79df733963ea8826439530d3adccde6affc9dfa8..6e019de9b01d9e5e425eea5e2d9ba45592f3ab44 100644 --- a/src/operator/LnImpl.cpp +++ b/src/operator/LnImpl.cpp @@ -15,41 +15,46 @@ #include <thread> // std::this_thread::sleep_for #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/Ln.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/LnImpl.hpp" #include "aidge/backend/cpu/operator/LnImpl_kernels.hpp" -template <> -void Aidge::LnImpl_cpu::forward() { - const Ln_Op& op_ = static_cast<const Ln_Op&>(mOp); - std::shared_ptr<Tensor> in0 = op_.getInput(0); +template <> void Aidge::LnImpl_cpu::forward() { + const Ln_Op &op_ = static_cast<const Ln_Op &>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); std::shared_ptr<Tensor> out0 = op_.getOutput(0); AIDGE_ASSERT(in0, "missing input #0"); // Find the correct kernel type - const auto impl = Registrar<LnImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<LnImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.forward(in0->size(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawOutput(0))); + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); } -template <> -void Aidge::LnImpl_cpu::backward() { - const Ln_Op& op_ = dynamic_cast<const Ln_Op&>(mOp); - std::shared_ptr<Tensor> in0 = op_.getInput(0); - std::shared_ptr<Tensor> out0 = op_.getOutput(0); +template <> void Aidge::LnImpl_cpu::backward() { + const Ln_Op &op_ = dynamic_cast<const Ln_Op &>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad(); std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); - AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type()); + AIDGE_ASSERT(out0, + "missing output #0 for current {} operator", + op_.type()); // Find the correct kernel type - const auto impl = Registrar<LnImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<LnImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - impl.backward(gra_int0->size(), getCPUPtr(in0), getCPUPtr(gra_out0), getCPUPtr(gra_int0)); + impl.backward(gra_int0->size(), + getCPUPtr(in0), + getCPUPtr(gra_out0), + getCPUPtr(gra_int0)); } diff --git a/src/operator/MatMulImpl.cpp b/src/operator/MatMulImpl.cpp index ccd3265ed230e4f9cdc5ad85785a6473d9f131f0..21c01e9a24aa9107a6dd046f7014569c90f36f3a 100644 --- a/src/operator/MatMulImpl.cpp +++ b/src/operator/MatMulImpl.cpp @@ -9,9 +9,9 @@ * ********************************************************************************/ -#include <cstddef> // std::size_t -#include <cstdint> // std::int32_t -#include <numeric> // std::accumulate +#include <cstddef> // std::size_t +#include <cstdint> // std::int32_t +#include <numeric> // std::accumulate #include <vector> #include "aidge/backend/cpu/data/GetCPUPtr.h" @@ -21,18 +21,21 @@ #include "aidge/backend/cpu/operator/MatMulImpl.hpp" #include "aidge/backend/cpu/operator/MatMulImpl_kernels.hpp" -template <> -void Aidge::MatMulImpl_cpu::forward() -{ - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(1)) && "missing input #1"); +template <> void Aidge::MatMulImpl_cpu::forward() { + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && + "missing input #0"); + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(1)) && + "missing input #1"); // Find the correct kernel type - const auto impl = Registrar<MatMulImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<MatMulImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Compute compatible input dimensions - std::vector<std::size_t> dims0 = static_cast<const MatMul_Op&>(mOp).getInput(0)->dims(); - std::vector<std::size_t> dims1 = static_cast<const MatMul_Op&>(mOp).getInput(1)->dims(); + std::vector<std::size_t> dims0 = + static_cast<const MatMul_Op &>(mOp).getInput(0)->dims(); + std::vector<std::size_t> dims1 = + static_cast<const MatMul_Op &>(mOp).getInput(1)->dims(); // keep second-to-last dimension of dims0 const std::size_t keepDim0 = (dims0.size() > 1) ? 1 : 0; @@ -47,10 +50,13 @@ void Aidge::MatMulImpl_cpu::forward() } if (dims0.size() > dims1.size()) { - dims1.insert(dims1.cbegin(), dims0.size() - dims1.size(), std::size_t(1)); - } - else if (dims1.size() > dims0.size()) { - dims0.insert(dims0.cbegin(), dims1.size() - dims0.size(), std::size_t(1)); + dims1.insert(dims1.cbegin(), + dims0.size() - dims1.size(), + std::size_t(1)); + } else if (dims1.size() > dims0.size()) { + dims0.insert(dims0.cbegin(), + dims1.size() - dims0.size(), + std::size_t(1)); } // const std::size_t dims_size = std::max(dims0.size(), dims1.size()); @@ -58,25 +64,41 @@ void Aidge::MatMulImpl_cpu::forward() const std::size_t nbDims = dims0.size(); // initialize strides to iterate through data because of broadcasting - std::unique_ptr<std::size_t[]> stride_post0 = std::make_unique<std::size_t[]>(nbDims - 2); - std::unique_ptr<std::size_t[]> stride_post1 = std::make_unique<std::size_t[]>(nbDims - 2); - std::unique_ptr<std::int32_t[]> stride_step0 = std::make_unique<std::int32_t[]>(nbDims - 2); - std::unique_ptr<std::int32_t[]> stride_step1 = std::make_unique<std::int32_t[]>(nbDims - 2); + std::unique_ptr<std::size_t[]> stride_post0 = + std::make_unique<std::size_t[]>(nbDims - 2); + std::unique_ptr<std::size_t[]> stride_post1 = + std::make_unique<std::size_t[]>(nbDims - 2); + std::unique_ptr<std::int32_t[]> stride_step0 = + std::make_unique<std::int32_t[]>(nbDims - 2); + std::unique_ptr<std::int32_t[]> stride_step1 = + std::make_unique<std::int32_t[]>(nbDims - 2); if (nbDims > 2) { stride_post0[nbDims - 3] = 1; stride_post1[nbDims - 3] = 1; - for (std::size_t i = nbDims-4; i != static_cast<std::size_t>(-1); --i) { - stride_post0[i] = stride_post0[i+1]*dims0[i+1]; - stride_post1[i] = stride_post1[i+1]*dims1[i+1]; + for (std::size_t i = nbDims - 4; i != static_cast<std::size_t>(-1); + --i) { + stride_post0[i] = stride_post0[i + 1] * dims0[i + 1]; + stride_post1[i] = stride_post1[i + 1] * dims1[i + 1]; } - for (std::size_t i = 0; i != nbDims-2; ++i) { - stride_step0[i] = (dims0[i] == 1) ? 1 - static_cast<std::int32_t>(stride_post0[i]) : 1; - stride_step1[i] = (dims1[i] == 1) ? 1 - static_cast<std::int32_t>(stride_post1[i]) : 1; + for (std::size_t i = 0; i != nbDims - 2; ++i) { + stride_step0[i] = + (dims0[i] == 1) + ? 1 - static_cast<std::int32_t>(stride_post0[i]) + : 1; + stride_step1[i] = + (dims1[i] == 1) + ? 1 - static_cast<std::int32_t>(stride_post1[i]) + : 1; } } - const std::vector<std::size_t>& outDims = static_cast<const MatMul_Op&>(mOp).getOutput(0)->dims(); - const std::size_t nbMatrices = std::accumulate(outDims.cbegin(), outDims.cend() - keepDim0 - keepDim1, 1, std::multiplies<std::size_t>()); + const std::vector<std::size_t> &outDims = + static_cast<const MatMul_Op &>(mOp).getOutput(0)->dims(); + const std::size_t nbMatrices = + std::accumulate(outDims.cbegin(), + outDims.cend() - keepDim0 - keepDim1, + 1, + std::multiplies<std::size_t>()); std::size_t dim = outDims.size() - 1 - keepDim0 - keepDim1; // variables for arrays offsets @@ -86,17 +108,20 @@ void Aidge::MatMulImpl_cpu::forward() const std::size_t n = dims0[nbDims - 2]; const std::size_t k = dims0[nbDims - 1]; const std::size_t m = dims1[nbDims - 1]; - const std::size_t matrix0Size = n*k; - const std::size_t matrix1Size = k*m; - const std::size_t matrixOutSize = n*m; + const std::size_t matrix0Size = n * k; + const std::size_t matrix1Size = k * m; + const std::size_t matrixOutSize = n * m; for (std::size_t stack = 0; stack < nbMatrices;) { - impl.forward(n, k, m, - getCPUPtr(mOp.getRawInput(0), offsetIn0*matrix0Size), - getCPUPtr(mOp.getRawInput(1), offsetIn1*matrix1Size), - getCPUPtr(mOp.getRawOutput(0), offsetOut*matrixOutSize)); + impl.forward( + n, + k, + m, + getCPUPtr(mOp.getRawInput(0), offsetIn0 * matrix0Size), + getCPUPtr(mOp.getRawInput(1), offsetIn1 * matrix1Size), + getCPUPtr(mOp.getRawOutput(0), offsetOut * matrixOutSize)); if (++stack < nbMatrices) { std::size_t tmp_stack = stack; - while(tmp_stack % outDims[dim] == 0) { + while (tmp_stack % outDims[dim] == 0) { tmp_stack /= outDims[dim]; dim--; } @@ -110,8 +135,9 @@ void Aidge::MatMulImpl_cpu::forward() // void Aidge::MatMulImpl_cpu::forward() // { -// assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); -// assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(1)) && "missing input #1"); +// assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing +// input #0"); assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(1)) +// && "missing input #1"); // // Find the correct kernel type // auto kernelFunc = Registrar<MatMulImplForward_cpu>::create( @@ -126,7 +152,8 @@ void Aidge::MatMulImpl_cpu::forward() // getCPUPtr(mOp.getRawOutput(0))); // } -template <> -void Aidge::MatMulImpl_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for MatMul_Op on backend cpu"); +template <> void Aidge::MatMulImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for MatMul_Op on backend cpu"); } diff --git a/src/operator/MaxPoolingImpl.cpp b/src/operator/MaxPoolingImpl.cpp index 90075a397be3f082ef95fd4df074c99d926fd385..2e8616c48b6b49986ddc9317722298a19c7bb554 100644 --- a/src/operator/MaxPoolingImpl.cpp +++ b/src/operator/MaxPoolingImpl.cpp @@ -19,24 +19,25 @@ #include "aidge/utils/Log.hpp" #include "aidge/utils/Types.h" -template <> -void Aidge::MaxPoolingImpl2D_cpu::forward() { - const auto& op_ = dynamic_cast<const MaxPooling_Op<2>&>(mOp); +template <> void Aidge::MaxPoolingImpl2D_cpu::forward() { + const auto &op_ = dynamic_cast<const MaxPooling_Op<2> &>(mOp); AIDGE_ASSERT(op_.getInput(0), "missing input #0 in MaxPooling Operator."); // Find the correct kernel type - const auto impl = Registrar<MaxPoolingImpl2D_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = Registrar<MaxPoolingImpl2D_cpu>::create( + getBestMatch(getRequiredSpec())); // Call kernel impl.forward(op_.strideDims(), - op_.kernelDims(), - op_.ceilMode(), - op_.getInput(0)->template dims<4>(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawOutput(0))); + op_.kernelDims(), + op_.ceilMode(), + op_.getInput(0)->template dims<4>(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); } -template <> -void Aidge::MaxPoolingImpl2D_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for MaxPooling_Op<2> on backend cpu"); +template <> void Aidge::MaxPoolingImpl2D_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for MaxPooling_Op<2> on backend cpu"); } diff --git a/src/operator/MulImpl.cpp b/src/operator/MulImpl.cpp index ea5e3d3ab8ac24934a0cb6f9042858fa094700af..07c3fb27cf0fe7e8dcb24ad355821be5dd8bd15e 100644 --- a/src/operator/MulImpl.cpp +++ b/src/operator/MulImpl.cpp @@ -15,37 +15,38 @@ #include <thread> // std::this_thread::sleep_for #include <vector> -#include "aidge/operator/Mul.hpp" -#include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/Broadcasting.hpp" #include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/operator/Mul.hpp" +#include "aidge/utils/Types.h" #include "aidge/backend/cpu/operator/MulImpl.hpp" #include "aidge/backend/cpu/operator/MulImpl_kernels.hpp" -template <> -void Aidge::MulImpl_cpu::forward() { - const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()); - const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims()); +template <> void Aidge::MulImpl_cpu::forward() { + const std::vector<std::size_t> inputDims0 = getBroadcastedDims( + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()); + const std::vector<std::size_t> inputDims1 = getBroadcastedDims( + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims()); // Find the correct kernel type - const auto impl = Registrar<MulImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<MulImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.forward(inputDims0, - inputDims1, - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawInput(1)), - getCPUPtr(mOp.getRawOutput(0))); + inputDims1, + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawInput(1)), + getCPUPtr(mOp.getRawOutput(0))); } -template <> -void Aidge::MulImpl_cpu::backward() { - const Mul_Op& op_ = dynamic_cast<const Mul_Op&>(mOp); - +template <> void Aidge::MulImpl_cpu::backward() { + const Mul_Op &op_ = dynamic_cast<const Mul_Op &>(mOp); + auto in0 = op_.getInput(0); auto in1 = op_.getInput(1); auto in0grad = op_.getInput(0)->grad(); @@ -53,17 +54,18 @@ void Aidge::MulImpl_cpu::backward() { auto out0grad = op_.getOutput(0)->grad(); // Find the correct kernel type - const auto impl = Registrar<MulImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<MulImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - impl.backward(/* input0Length */ in0grad->size(), - /* input1Length */ in1grad->size(), - /* grad0Length */ out0grad->size(), - /* input0Dims */ in0->dims(), - /* input1Dims */ in1->dims(), - getCPUPtr(in0), - getCPUPtr(in1), - getCPUPtr(out0grad), - getCPUPtr(in0grad), - getCPUPtr(in1grad)); + impl.backward(/* input0Length */ in0grad->size(), + /* input1Length */ in1grad->size(), + /* grad0Length */ out0grad->size(), + /* input0Dims */ in0->dims(), + /* input1Dims */ in1->dims(), + getCPUPtr(in0), + getCPUPtr(in1), + getCPUPtr(out0grad), + getCPUPtr(in0grad), + getCPUPtr(in1grad)); } diff --git a/src/operator/PadImpl.cpp b/src/operator/PadImpl.cpp index cdae21f8ed2757128f6a36b661b0897a4ba65f89..1ac74501de146464c2ef85d34f834d7b9f2a6153 100644 --- a/src/operator/PadImpl.cpp +++ b/src/operator/PadImpl.cpp @@ -11,66 +11,73 @@ #include <vector> -#include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/Conv.hpp" +#include "aidge/utils/Types.h" #include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/backend/cpu/operator/PadImpl_kernels.hpp" -Aidge::Elts_t Aidge::Pad_ProdConso_cpu::getNbRequiredProtected(Aidge::IOIndex_t inputIdx) const { - AIDGE_ASSERT(inputIdx == 0, "input index out of range." - "{} Operator has only one input", mOp.type()); - (void) inputIdx; - +Aidge::Elts_t Aidge::Pad_ProdConso_cpu::getNbRequiredProtected( + Aidge::IOIndex_t inputIdx) const { + AIDGE_ASSERT(inputIdx == 0, + "input index out of range." + "{} Operator has only one input", + mOp.type()); + (void)inputIdx; // Padding cannot be in-place! - // We must ensure that we do not override data that has not been consummed yet. - const auto inputSize = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(); - const auto outputSize = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->size(); + // We must ensure that we do not override data that has not been consummed + // yet. + const auto inputSize = + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(); + const auto outputSize = + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->size(); return Elts_t::DataElts(outputSize - inputSize); } -template <> -void Aidge::PadImpl1D_cpu::forward() { - const auto& op_ = dynamic_cast<const Pad_Op<1>&>(mOp); +template <> void Aidge::PadImpl1D_cpu::forward() { + const auto &op_ = dynamic_cast<const Pad_Op<1> &>(mOp); AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Pad Operator."); // Find the correct kernel type - const auto impl = Registrar<PadImpl1D_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<PadImpl1D_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.forward(op_.beginEndBorders(), - op_.borderType(), - op_.borderValue(), - op_.getInput(0)->template dims<3>(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawOutput(0))); + op_.borderType(), + op_.borderValue(), + op_.getInput(0)->template dims<3>(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); } -template <> -void Aidge::PadImpl1D_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Pad_Op<1> on backend cpu"); +template <> void Aidge::PadImpl1D_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for Pad_Op<1> on backend cpu"); } -template <> -void Aidge::PadImpl2D_cpu::forward() { - const auto& op_ = dynamic_cast<const Pad_Op<2>&>(mOp); +template <> void Aidge::PadImpl2D_cpu::forward() { + const auto &op_ = dynamic_cast<const Pad_Op<2> &>(mOp); AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Pad Operator."); // Find the correct kernel type - const auto impl = Registrar<PadImpl2D_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<PadImpl2D_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.forward(op_.beginEndBorders(), - op_.borderType(), - op_.borderValue(), - op_.getInput(0)->template dims<4>(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawOutput(0))); + op_.borderType(), + op_.borderValue(), + op_.getInput(0)->template dims<4>(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); } -template <> -void Aidge::PadImpl2D_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Pad_Op<2> on backend cpu"); +template <> void Aidge::PadImpl2D_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for Pad_Op<2> on backend cpu"); } diff --git a/src/operator/PowImpl.cpp b/src/operator/PowImpl.cpp index 74a7be71e176ba8e1cb8851050e575d6aa7465df..3a8d27c28fcd5b372c6a3d396fe2bd3e92da60d0 100644 --- a/src/operator/PowImpl.cpp +++ b/src/operator/PowImpl.cpp @@ -15,36 +15,37 @@ #include <thread> // std::this_thread::sleep_for #include <vector> -#include "aidge/operator/Pow.hpp" -#include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/Broadcasting.hpp" #include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/operator/Pow.hpp" +#include "aidge/utils/Types.h" #include "aidge/backend/cpu/operator/PowImpl.hpp" #include "aidge/backend/cpu/operator/PowImpl_kernels.hpp" -template <> -void Aidge::PowImpl_cpu::forward() { - const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()); - const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims()); +template <> void Aidge::PowImpl_cpu::forward() { + const std::vector<std::size_t> inputDims0 = getBroadcastedDims( + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()); + const std::vector<std::size_t> inputDims1 = getBroadcastedDims( + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims()); // Find the correct kernel type - const auto impl = Registrar<PowImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<PowImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.forward(inputDims0, - inputDims1, - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawInput(1)), - getCPUPtr(mOp.getRawOutput(0))); + inputDims1, + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawInput(1)), + getCPUPtr(mOp.getRawOutput(0))); } -template <> -void Aidge::PowImpl_cpu::backward() { - const Pow_Op& op_ = dynamic_cast<const Pow_Op&>(mOp); +template <> void Aidge::PowImpl_cpu::backward() { + const Pow_Op &op_ = dynamic_cast<const Pow_Op &>(mOp); auto in0 = op_.getInput(0); auto in1 = op_.getInput(1); @@ -52,21 +53,24 @@ void Aidge::PowImpl_cpu::backward() { auto in1grad = op_.getInput(1)->grad(); auto out0grad = op_.getOutput(0)->grad(); - const std::vector<std::size_t> input0gradDims = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->grad()->dims(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->grad()->dims()); - const std::vector<std::size_t> input1gradDims = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->grad()->dims(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->grad()->dims()); + const std::vector<std::size_t> input0gradDims = getBroadcastedDims( + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->grad()->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->grad()->dims()); + const std::vector<std::size_t> input1gradDims = getBroadcastedDims( + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->grad()->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->grad()->dims()); // Find the correct kernel type - const auto impl = Registrar<PowImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<PowImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.backward(input0gradDims, - input1gradDims, - out0grad->dims(), - getCPUPtr(in0), - getCPUPtr(in1), - getCPUPtr(out0grad), - getCPUPtr(in0grad), - getCPUPtr(in1grad)); + input1gradDims, + out0grad->dims(), + getCPUPtr(in0), + getCPUPtr(in1), + getCPUPtr(out0grad), + getCPUPtr(in0grad), + getCPUPtr(in1grad)); } \ No newline at end of file diff --git a/src/operator/ReLUImpl.cpp b/src/operator/ReLUImpl.cpp index 832f91aad347fc081439ec487d06b14b0e2fe8da..e81c373b560c834bf7cb7af815751d7b9a93719e 100644 --- a/src/operator/ReLUImpl.cpp +++ b/src/operator/ReLUImpl.cpp @@ -12,43 +12,48 @@ #include <memory> #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/data/Tensor.hpp" #include "aidge/operator/ReLU.hpp" -#include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Types.h" #include "aidge/backend/cpu/operator/ReLUImpl.hpp" #include "aidge/backend/cpu/operator/ReLUImpl_kernels.hpp" -template <> -void Aidge::ReLUImpl_cpu::forward() { - const ReLU_Op& op_ = dynamic_cast<const ReLU_Op&>(mOp); +template <> void Aidge::ReLUImpl_cpu::forward() { + const ReLU_Op &op_ = dynamic_cast<const ReLU_Op &>(mOp); std::shared_ptr<Tensor> in0 = op_.getInput(0); std::shared_ptr<Tensor> out0 = op_.getOutput(0); AIDGE_ASSERT(in0, "missing input #0"); // Find the correct kernel type - const auto impl = Registrar<ReLUImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<ReLUImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.forward(in0->size(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawOutput(0))); + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); } -template <> -void Aidge::ReLUImpl_cpu::backward() { - const ReLU_Op& op_ = dynamic_cast<const ReLU_Op&>(mOp); - std::shared_ptr<Tensor> in0 = op_.getInput(0); - std::shared_ptr<Tensor> out0 = op_.getOutput(0); +template <> void Aidge::ReLUImpl_cpu::backward() { + const ReLU_Op &op_ = dynamic_cast<const ReLU_Op &>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad(); - std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); - AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type()); + std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); + AIDGE_ASSERT(out0, + "missing output #0 for current {} operator", + op_.type()); // Find the correct kernel type - const auto impl = Registrar<ReLUImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<ReLUImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - impl.backward(gra_int0->size(), getCPUPtr(in0), getCPUPtr(gra_out0), getCPUPtr(gra_int0)); + impl.backward(gra_int0->size(), + getCPUPtr(in0), + getCPUPtr(gra_out0), + getCPUPtr(gra_int0)); } diff --git a/src/operator/ReduceMeanImpl.cpp b/src/operator/ReduceMeanImpl.cpp index 622672569372ff4e9f135e36255095f4246d5920..d6ae3fe10322f3caafb7219027bc1e335435c031 100644 --- a/src/operator/ReduceMeanImpl.cpp +++ b/src/operator/ReduceMeanImpl.cpp @@ -14,31 +14,31 @@ #include <memory> #include <vector> -#include "aidge/utils/Types.h" -#include "aidge/operator/ReduceMean.hpp" #include "aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp" +#include "aidge/operator/ReduceMean.hpp" +#include "aidge/utils/Types.h" -template <> -void Aidge::ReduceMeanImpl_cpu::forward() { - const ReduceMean_Op& op_ = dynamic_cast<const ReduceMean_Op&>(mOp); +template <> void Aidge::ReduceMeanImpl_cpu::forward() { + const ReduceMean_Op &op_ = dynamic_cast<const ReduceMean_Op &>(mOp); // Find the correct kernel type - const auto impl = Registrar<ReduceMeanImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<ReduceMeanImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.forward(op_.axes(), - op_.keepDims(), - op_.getInput(0)->dims(), - op_.getInput(0)->getImpl()->rawPtr(), - op_.getOutput(0)->getImpl()->rawPtr()); + op_.keepDims(), + op_.getInput(0)->dims(), + op_.getInput(0)->getImpl()->rawPtr(), + op_.getOutput(0)->getImpl()->rawPtr()); } -template <> -void Aidge::ReduceMeanImpl_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ReduceMean_Op on backend cpu"); +template <> void Aidge::ReduceMeanImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for ReduceMean_Op on backend cpu"); } - // void Aidge::ReduceMeanImpl1D_cpu::forward() { // // Find the correct kernel type @@ -48,7 +48,8 @@ void Aidge::ReduceMeanImpl_cpu::backward() { // std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); // // Call kernel -// kernelFunc(dynamic_cast<const ReduceMean_Op<1>&>(mOp).getStaticAttributes(), +// kernelFunc(dynamic_cast<const +// ReduceMean_Op<1>&>(mOp).getStaticAttributes(), // std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(), // std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), // std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); @@ -63,7 +64,8 @@ void Aidge::ReduceMeanImpl_cpu::backward() { // std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); // // Call kernel -// kernelFunc(dynamic_cast<const ReduceMean_Op<2>&>(mOp).getStaticAttributes(), +// kernelFunc(dynamic_cast<const +// ReduceMean_Op<2>&>(mOp).getStaticAttributes(), // std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(), // std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), // std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); @@ -78,7 +80,8 @@ void Aidge::ReduceMeanImpl_cpu::backward() { // std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); // // Call kernel -// kernelFunc(dynamic_cast<const ReduceMean_Op<3>&>(mOp).getStaticAttributes(), +// kernelFunc(dynamic_cast<const +// ReduceMean_Op<3>&>(mOp).getStaticAttributes(), // std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(), // std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), // std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); diff --git a/src/operator/ReduceSumImpl.cpp b/src/operator/ReduceSumImpl.cpp index aad0801835a74ecefb046f3dc64729ae1f8bd8bb..b0f7c575df00a88d3f53d5feb80ee240f444b8ea 100644 --- a/src/operator/ReduceSumImpl.cpp +++ b/src/operator/ReduceSumImpl.cpp @@ -14,26 +14,27 @@ #include <memory> #include <vector> -#include "aidge/utils/Types.h" -#include "aidge/operator/ReduceSum.hpp" #include "aidge/backend/cpu/operator/ReduceSumImpl_kernels.hpp" +#include "aidge/operator/ReduceSum.hpp" +#include "aidge/utils/Types.h" -template <> -void Aidge::ReduceSumImpl_cpu::forward() { - const ReduceSum_Op& op_ = dynamic_cast<const ReduceSum_Op&>(mOp); +template <> void Aidge::ReduceSumImpl_cpu::forward() { + const ReduceSum_Op &op_ = dynamic_cast<const ReduceSum_Op &>(mOp); // Find the correct kernel type - const auto impl = Registrar<ReduceSumImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<ReduceSumImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.forward(op_.axes(), - op_.keepDims(), - op_.getInput(0)->dims(), - op_.getInput(0)->getImpl()->rawPtr(), - op_.getOutput(0)->getImpl()->rawPtr()); + op_.keepDims(), + op_.getInput(0)->dims(), + op_.getInput(0)->getImpl()->rawPtr(), + op_.getOutput(0)->getImpl()->rawPtr()); } -template <> -void Aidge::ReduceSumImpl_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ReduceSum_Op on backend cpu"); +template <> void Aidge::ReduceSumImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for ReduceSum_Op on backend cpu"); } diff --git a/src/operator/ScalingImpl.cpp b/src/operator/ScalingImpl.cpp index 1e7a408f267c5eb2d60d188f0ed2ba0394222561..11ddb8ffbfceb109c0b03e5d4b3378fe6d60dc31 100644 --- a/src/operator/ScalingImpl.cpp +++ b/src/operator/ScalingImpl.cpp @@ -10,35 +10,36 @@ ********************************************************************************/ #include <cassert> -#include <numeric> // std::accumulate #include <functional> // std::multiplies +#include <numeric> // std::accumulate #include <vector> #include "aidge/operator/Scaling.hpp" +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/ScalingImpl.hpp" #include "aidge/backend/cpu/operator/ScalingImpl_kernels.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" -template <> -void Aidge::ScalingImpl_cpu::forward() { - const auto& op_ = dynamic_cast<const Scaling_Op&>(mOp); +template <> void Aidge::ScalingImpl_cpu::forward() { + const auto &op_ = dynamic_cast<const Scaling_Op &>(mOp); AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Scaling Operator."); // Find the correct kernel type - const auto impl = Registrar<ScalingImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<ScalingImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.forward(op_.scalingFactor(), - op_.quantizedNbBits(), - op_.isOutputUnsigned(), - op_.getInput(0)->size(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawOutput(0))); + op_.quantizedNbBits(), + op_.isOutputUnsigned(), + op_.getInput(0)->size(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); } -template <> -void Aidge::ScalingImpl_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Scaling_Op on backend cpu"); +template <> void Aidge::ScalingImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for Scaling_Op on backend cpu"); } diff --git a/src/operator/SigmoidImpl.cpp b/src/operator/SigmoidImpl.cpp index cdcbac85df3a38fea9b7100324e0618949262fc9..7242ef15444df135ce9fa661980b9cbd3f2a906f 100644 --- a/src/operator/SigmoidImpl.cpp +++ b/src/operator/SigmoidImpl.cpp @@ -15,40 +15,45 @@ #include <thread> // std::this_thread::sleep_for #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/Sigmoid.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/SigmoidImpl.hpp" #include "aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp" -template <> -void Aidge::SigmoidImpl_cpu::forward() { - const Sigmoid_Op& op_ = dynamic_cast<const Sigmoid_Op&>(mOp); +template <> void Aidge::SigmoidImpl_cpu::forward() { + const Sigmoid_Op &op_ = dynamic_cast<const Sigmoid_Op &>(mOp); std::shared_ptr<Tensor> in0 = op_.getInput(0); std::shared_ptr<Tensor> out0 = op_.getOutput(0); AIDGE_ASSERT(in0, "missing input #0"); // Find the correct kernel type - const auto impl = Registrar<SigmoidImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<SigmoidImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.forward(in0->size(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawOutput(0))); + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); } -template <> -void Aidge::SigmoidImpl_cpu::backward() { - const Sigmoid_Op& op_ = dynamic_cast<const Sigmoid_Op&>(mOp); - std::shared_ptr<Tensor> out0 = op_.getOutput(0); +template <> void Aidge::SigmoidImpl_cpu::backward() { + const Sigmoid_Op &op_ = dynamic_cast<const Sigmoid_Op &>(mOp); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad(); - std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); - AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type()); + std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); + AIDGE_ASSERT(out0, + "missing output #0 for current {} operator", + op_.type()); // Find the correct kernel type - const auto impl = Registrar<SigmoidImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<SigmoidImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - impl.backward(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0)); + impl.backward(gra_int0->size(), + getCPUPtr(out0), + getCPUPtr(gra_out0), + getCPUPtr(gra_int0)); } diff --git a/src/operator/SliceImpl.cpp b/src/operator/SliceImpl.cpp index 945c1bc752feb8e6a194b1aff99b26f01a6a0e69..eebf77b3c868a08c668d102722786c27f8ea3e2a 100644 --- a/src/operator/SliceImpl.cpp +++ b/src/operator/SliceImpl.cpp @@ -19,25 +19,26 @@ #include "aidge/utils/Log.hpp" #include "aidge/utils/Types.h" -template <> -void Aidge::SliceImpl_cpu::forward() { - const auto& op_ = dynamic_cast<const Slice_Op&>(mOp); +template <> void Aidge::SliceImpl_cpu::forward() { + const auto &op_ = dynamic_cast<const Slice_Op &>(mOp); AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Slice Operator."); // Find the correct kernel type - const auto impl = Registrar<SliceImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<SliceImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.forward(op_.starts(), - op_.ends(), - op_.axes(), - op_.steps(), - op_.getInput(0)->dims(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawOutput(0))); + op_.ends(), + op_.axes(), + op_.steps(), + op_.getInput(0)->dims(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); } -template <> -void Aidge::SliceImpl_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Slice_Op on backend cpu"); +template <> void Aidge::SliceImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for Slice_Op on backend cpu"); } diff --git a/src/operator/SoftmaxImpl.cpp b/src/operator/SoftmaxImpl.cpp index 8b6933f22f3673476f4a9f1e261fbcdc09857300..a517a64f0e790151c4c6a1e077a34f26b6cb2c59 100644 --- a/src/operator/SoftmaxImpl.cpp +++ b/src/operator/SoftmaxImpl.cpp @@ -15,30 +15,37 @@ #include <thread> // std::this_thread::sleep_for #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/Softmax.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" #include "aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp" -template <> -void Aidge::SoftmaxImpl_cpu::forward() { - const auto& op_ = dynamic_cast<const Softmax_Op&>(mOp); +template <> void Aidge::SoftmaxImpl_cpu::forward() { + const auto &op_ = dynamic_cast<const Softmax_Op &>(mOp); AIDGE_ASSERT(!op_.getInput(0)->empty(), "Softmax input empty"); - std::int32_t axis = (op_.axis() >= 0) ? op_.axis() : op_.getInput(0)->nbDims() + op_.axis(); + std::int32_t axis = (op_.axis() >= 0) + ? op_.axis() + : op_.getInput(0)->nbDims() + op_.axis(); // Find the correct kernel type - const auto impl = Registrar<SoftmaxImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<SoftmaxImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.forward(static_cast<std::size_t>(axis), // axisIdx - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) + ->getImpl() + ->rawPtr(), + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)) + ->getImpl() + ->rawPtr()); } -template <> -void Aidge::SoftmaxImpl_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Softmax_Op on backend cpu"); +template <> void Aidge::SoftmaxImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for Softmax_Op on backend cpu"); } diff --git a/src/operator/SqrtImpl.cpp b/src/operator/SqrtImpl.cpp index 25bdb42fd5140ef4f64d704fc3a5ccf237f17f81..e02700f5d51bcab47302fe077a4ea13ce6c35887 100644 --- a/src/operator/SqrtImpl.cpp +++ b/src/operator/SqrtImpl.cpp @@ -21,34 +21,34 @@ #include "aidge/backend/cpu/operator/SqrtImpl.hpp" #include "aidge/backend/cpu/operator/SqrtImpl_kernels.hpp" -template <> -void Aidge::SqrtImpl_cpu::forward() { - std::shared_ptr<Tensor> in0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0)); - std::shared_ptr<Tensor> out0 = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)); +template <> void Aidge::SqrtImpl_cpu::forward() { + std::shared_ptr<Tensor> in0 = + std::static_pointer_cast<Tensor>(mOp.getRawInput(0)); + std::shared_ptr<Tensor> out0 = + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)); AIDGE_ASSERT(in0, "missing input #0"); // Find the correct kernel type - const auto impl = Registrar<SqrtImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<SqrtImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.forward(in0->size(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawOutput(0))); + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); } -template <> -void Aidge::SqrtImpl_cpu::backward() { +template <> void Aidge::SqrtImpl_cpu::backward() { // reversing in and out Data for backprop - const Sqrt_Op& op_ = dynamic_cast<const Sqrt_Op&>(mOp); - std::shared_ptr<Tensor> out0grad = op_.getOutput(0)->grad(); + const Sqrt_Op &op_ = dynamic_cast<const Sqrt_Op &>(mOp); + std::shared_ptr<Tensor> out0grad = op_.getOutput(0)->grad(); std::shared_ptr<Tensor> in0grad = op_.getInput(0)->grad(); AIDGE_ASSERT(out0grad, "missing output #0"); // Find the correct kernel type - const auto impl = Registrar<SqrtImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<SqrtImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - impl.backward(out0grad->size(), - getCPUPtr(out0grad), - getCPUPtr(in0grad)); + impl.backward(out0grad->size(), getCPUPtr(out0grad), getCPUPtr(in0grad)); } \ No newline at end of file diff --git a/src/operator/SubImpl.cpp b/src/operator/SubImpl.cpp index d43771b967889183801cb93418c967ce9d9c8453..719dad900a0b6b7d54c0f62325602f6e1fad7e59 100644 --- a/src/operator/SubImpl.cpp +++ b/src/operator/SubImpl.cpp @@ -15,34 +15,37 @@ #include <thread> // std::this_thread::sleep_for #include <vector> -#include "aidge/operator/Sub.hpp" -#include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/Broadcasting.hpp" #include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/operator/Sub.hpp" +#include "aidge/utils/Types.h" #include "aidge/backend/cpu/operator/SubImpl.hpp" #include "aidge/backend/cpu/operator/SubImpl_kernels.hpp" -template <> -void Aidge::SubImpl_cpu::forward() { - const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()); - const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims()); +template <> void Aidge::SubImpl_cpu::forward() { + const std::vector<std::size_t> inputDims0 = getBroadcastedDims( + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()); + const std::vector<std::size_t> inputDims1 = getBroadcastedDims( + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims()); // Find the correct kernel type - const auto impl = Registrar<SubImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<SubImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.forward(inputDims0, - inputDims1, - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawInput(1)), - getCPUPtr(mOp.getRawOutput(0))); + inputDims1, + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawInput(1)), + getCPUPtr(mOp.getRawOutput(0))); } -template <> -void Aidge::SubImpl_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Sub_Op on backend cpu"); +template <> void Aidge::SubImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for Sub_Op on backend cpu"); } diff --git a/src/operator/TanhImpl.cpp b/src/operator/TanhImpl.cpp index ed8dce08b9f710c9e5830b2c72ffef71013edb6e..2565402fa10a840d859befba4fbc58906c0d0f52 100644 --- a/src/operator/TanhImpl.cpp +++ b/src/operator/TanhImpl.cpp @@ -15,41 +15,45 @@ #include <thread> // std::this_thread::sleep_for #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/Tanh.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/TanhImpl.hpp" #include "aidge/backend/cpu/operator/TanhImpl_kernels.hpp" -template <> -void Aidge::TanhImpl_cpu::forward() { - const Tanh_Op& op_ = dynamic_cast<const Tanh_Op&>(mOp); +template <> void Aidge::TanhImpl_cpu::forward() { + const Tanh_Op &op_ = dynamic_cast<const Tanh_Op &>(mOp); std::shared_ptr<Tensor> in0 = op_.getInput(0); std::shared_ptr<Tensor> out0 = op_.getOutput(0); AIDGE_ASSERT(in0, "missing input #0"); // Find the correct kernel type - const auto impl = Registrar<TanhImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<TanhImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.forward(in0->size(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawOutput(0))); + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); } -template <> -void Aidge::TanhImpl_cpu::backward() { - const Tanh_Op& op_ = dynamic_cast<const Tanh_Op&>(mOp); - std::shared_ptr<Tensor> out0 = op_.getOutput(0); - std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad(); - std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); - AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type()); +template <> void Aidge::TanhImpl_cpu::backward() { + const Tanh_Op &op_ = dynamic_cast<const Tanh_Op &>(mOp); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad(); + std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); + AIDGE_ASSERT(out0, + "missing output #0 for current {} operator", + op_.type()); // Find the correct kernel type - const auto impl = Registrar<TanhImpl_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<TanhImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - impl.backward(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0)); + impl.backward(gra_int0->size(), + getCPUPtr(out0), + getCPUPtr(gra_out0), + getCPUPtr(gra_int0)); } - diff --git a/unit_tests/data/Test_TensorImpl.cpp b/unit_tests/data/Test_TensorImpl.cpp index 4bfa10ab4e3d3f522015dbcb3654e105fbb14525..2c651a760fa01e814eb59bf60cd88b462c75d3b5 100644 --- a/unit_tests/data/Test_TensorImpl.cpp +++ b/unit_tests/data/Test_TensorImpl.cpp @@ -10,43 +10,46 @@ ********************************************************************************/ #include <catch2/catch_test_macros.hpp> -#include <cstddef> // std::size_t -#include <cstdint> // std::uint16_t #include <chrono> +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t #include <iostream> #include <memory> -#include <numeric> // std::accumulate -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <numeric> // std::accumulate +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution -#include "aidge/data/Tensor.hpp" #include "aidge/backend/cpu/data/TensorImpl.hpp" -#include "aidge/operator/Add.hpp" #include "aidge/backend/cpu/operator/AddImpl.hpp" +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/Add.hpp" namespace Aidge { -TEST_CASE("Test addition of Tensors","[TensorImpl][Add]") { +TEST_CASE("Test addition of Tensors", "[TensorImpl][Add]") { constexpr std::uint16_t NBTRIALS = 10; // Create a random number generator std::random_device rd; std::mt19937 gen(rd()); - std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 - std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); - std::uniform_int_distribution<int> boolDist(0,1); + std::uniform_real_distribution<float> valueDist( + 0.1f, + 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), + std::size_t(10)); + std::uniform_int_distribution<int> boolDist(0, 1); // Create MatMul Operator std::shared_ptr<Node> mySub = Add(); - auto op = std::static_pointer_cast<OperatorTensor>(mySub-> getOperator()); + auto op = std::static_pointer_cast<OperatorTensor>(mySub->getOperator()); op->setDataType(DataType::Float32); op->setBackend("cpu"); // Create 2 input Tensors std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>(); - op->associateInput(0,T0); + op->associateInput(0, T0); T0->setDataType(DataType::Float32); T0->setBackend("cpu"); std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(); - op -> associateInput(1,T1); + op->associateInput(1, T1); T1->setDataType(DataType::Float32); T1->setBackend("cpu"); @@ -64,7 +67,8 @@ TEST_CASE("Test addition of Tensors","[TensorImpl][Add]") { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { // generate 2 random Tensors - // handle dimensions, replace some dimensions with '1' to get broadcasting + // handle dimensions, replace some dimensions with '1' to get + // broadcasting constexpr std::size_t nbDims = 4; std::vector<std::size_t> dims; for (std::size_t i = 0; i < nbDims; ++i) { @@ -84,37 +88,51 @@ TEST_CASE("Test addition of Tensors","[TensorImpl][Add]") { } // create arrays and fill them with random values - float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]]; - float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]]; - float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]]; + float *array0 = new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]]; + float *array1 = new float[dims1[0] * dims1[1] * dims1[2] * dims1[3]]; + float *result = + new float[dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]]; - for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) { + for (std::size_t i = 0; i < dims0[0] * dims0[1] * dims0[2] * dims0[3]; + ++i) { array0[i] = valueDist(gen); } - for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) { + for (std::size_t i = 0; i < dims1[0] * dims1[1] * dims1[2] * dims1[3]; + ++i) { array1[i] = valueDist(gen); } // compute true result - const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1}; - const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1}; + const std::size_t strides0[nbDims] = {dims0[1] * dims0[2] * dims0[3], + dims0[2] * dims0[3], + dims0[3], + 1}; + const std::size_t strides1[nbDims] = {dims1[1] * dims1[2] * dims1[3], + dims1[2] * dims1[3], + dims1[3], + 1}; for (std::size_t a = 0; a < dimsOut[0]; ++a) { for (std::size_t b = 0; b < dimsOut[1]; ++b) { - const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0) - + strides0[1] * ((dims0[1] > 1) ? b : 0); - const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0) - + strides1[1] * ((dims1[1] > 1) ? b : 0); + const std::size_t idx0_0 = + strides0[0] * ((dims0[0] > 1) ? a : 0) + + strides0[1] * ((dims0[1] > 1) ? b : 0); + const std::size_t idx1_0 = + strides1[0] * ((dims1[0] > 1) ? a : 0) + + strides1[1] * ((dims1[1] > 1) ? b : 0); for (std::size_t c = 0; c < dimsOut[2]; ++c) { - const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a)); + const std::size_t idx_out = + dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a)); for (std::size_t d = 0; d < dimsOut[3]; ++d) { - std::size_t idx0 = idx0_0 - + strides0[2] * ((dims0[2] > 1) ? c : 0) - + ((dims0[3] > 1) ? d : 0); - std::size_t idx1 = idx1_0 - + strides1[2] * ((dims1[2] > 1) ? c : 0) - + ((dims1[3] > 1) ? d : 0); + std::size_t idx0 = + idx0_0 + strides0[2] * ((dims0[2] > 1) ? c : 0) + + ((dims0[3] > 1) ? d : 0); + std::size_t idx1 = + idx1_0 + strides1[2] * ((dims1[2] > 1) ? c : 0) + + ((dims1[3] > 1) ? d : 0); result[idx_out + d] = array0[idx0] + array1[idx1]; - // std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " - " << array1[idx1] << " -> " << idx_out + d << std::endl; + // std::cout << "(" << idx0 << ", " << idx1 << ") -> " + // << array0[idx0] << " - " << array1[idx1] << " -> " + // << idx_out + d << std::endl; } } } @@ -123,34 +141,41 @@ TEST_CASE("Test addition of Tensors","[TensorImpl][Add]") { // conversion to Aidge::Tensors // input0 T0->resize(dims0); - T0->getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]); + T0->getImpl()->setRawPtr(array0, + dims0[0] * dims0[1] * dims0[2] * dims0[3]); // input1 T1->resize(dims1); - T1->getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]); + T1->getImpl()->setRawPtr(array1, + dims1[0] * dims1[1] * dims1[2] * dims1[3]); // results Tres.resize(dimsOut); - Tres.getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]); + Tres.getImpl()->setRawPtr( + result, + dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]); Tensor T2 = *T0 + *T1; REQUIRE(T2 == Tres); - // no implementation + // no implementation Tensor T3(T1->dims()); REQUIRE_THROWS(*T0 + T3); // // wrong backend - // static Registrar<Add_Op> registrarAddImpl_custom("custom", [](const Add_Op& op) { return std::make_unique<AddImpl_cpu>(op); } ); - // static Registrar<Tensor> registrarTensorImpl_custom_Int32({"custom", DataType::Int32}, + // static Registrar<Add_Op> registrarAddImpl_custom("custom", [](const + // Add_Op& op) { return std::make_unique<AddImpl_cpu>(op); } ); static + // Registrar<Tensor> registrarTensorImpl_custom_Int32({"custom", + // DataType::Int32}, // [] (DeviceIdx_t device, std::vector<DimSize_t> dims) { - // return std::make_shared<TensorImpl_cpu<int>>(device, dims); + // return std::make_shared<TensorImpl_cpu<int>>(device, + // dims); // } // ); // T1.setBackend("custom"); // REQUIRE_THROWS(T0 + T1); - // wrong datatype + // wrong datatype Tensor T4(T1->dims()); T4.setDataType(DataType::Float64); REQUIRE_THROWS(*T0 + T4); @@ -161,34 +186,38 @@ TEST_CASE("Test addition of Tensors","[TensorImpl][Add]") { } } -TEST_CASE("Test substraction of Tensors","[TensorImpl][Sub]") { +TEST_CASE("Test substraction of Tensors", "[TensorImpl][Sub]") { Tensor T0 = Array3D<int, 2, 2, 2>{{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}}; Tensor T1 = Array3D<int, 2, 2, 2>{{{{7, 1}, {3, 7}}, {{54, 0}, {7, 12}}}}; Tensor T2 = T0 - T1; T2.print(); - REQUIRE(T2 == Tensor(Array3D<int, 2, 2, 2>{{{{-6,1},{0,-3}},{{-49,6},{0,-4}}}})); + REQUIRE(T2 == Tensor(Array3D<int, 2, 2, 2>{ + {{{-6, 1}, {0, -3}}, {{-49, 6}, {0, -4}}}})); Tensor T3(T1.dims()); REQUIRE_THROWS(T0 - T3); } -TEST_CASE("Test multiplication of Tensors","[TensorImpl][Mul]") { +TEST_CASE("Test multiplication of Tensors", "[TensorImpl][Mul]") { Tensor T0 = Array3D<int, 2, 2, 2>{{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}}; Tensor T1 = Array3D<int, 2, 2, 2>{{{{7, 2}, {3, 7}}, {{5, 6}, {7, 8}}}}; Tensor T2 = T0 * T1; T2.print(); - REQUIRE(T2 == Tensor(Array3D<int, 2, 2, 2>{{{{7,4},{9,28}},{{25,36},{49,64}}}})); + REQUIRE(T2 == Tensor(Array3D<int, 2, 2, 2>{ + {{{7, 4}, {9, 28}}, {{25, 36}, {49, 64}}}})); Tensor T3(T1.dims()); REQUIRE_THROWS(T0 * T3); } -TEST_CASE("Test division of Tensors","[TensorImpl][Div]") { - Tensor T0 = Array3D<int, 2, 2, 2>{{{{7,4},{9,28}},{{25,36},{49,64}}}}; +TEST_CASE("Test division of Tensors", "[TensorImpl][Div]") { + Tensor T0 = + Array3D<int, 2, 2, 2>{{{{7, 4}, {9, 28}}, {{25, 36}, {49, 64}}}}; Tensor T1 = Array3D<int, 2, 2, 2>{{{{7, 2}, {3, 7}}, {{5, 6}, {7, 8}}}}; Tensor T2 = T0 / T1; T2.print(); - REQUIRE(T2 == Tensor(Array3D<int, 2, 2, 2>{{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}})); + REQUIRE(T2 == Tensor(Array3D<int, 2, 2, 2>{ + {{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}})); Tensor T3(T1.dims()); REQUIRE_THROWS(T0 / T3); diff --git a/unit_tests/operator/Test_AddImpl.cpp b/unit_tests/operator/Test_AddImpl.cpp index bca4025705cb1c851dcf3e9accbf016c4535120a..718f333a6ac278c0b4921b753c6e9d336a9a0089 100644 --- a/unit_tests/operator/Test_AddImpl.cpp +++ b/unit_tests/operator/Test_AddImpl.cpp @@ -19,49 +19,46 @@ using namespace Aidge; TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") { - std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { - { // - { // - {{20, 47},{21, 48},{22, 49}}, // - {{23, 50},{24, 51},{25, 52}}, // - {{26, 53},{27, 54},{28, 55}} // - }, // - { // - {{29, 56},{30, 57},{31, 58}}, // - {{32, 59},{33, 60},{34, 61}}, // - {{35, 62},{36, 63},{37, 64}} // - }, // - { // - {{38, 65},{39, 66},{40, 67}}, // - {{41, 68},{42, 69},{43, 70}}, // - {{44, 71},{45, 72},{46, 73}} // - } // - } // - }); // - - SECTION("Two inputs") { - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + std::shared_ptr<Tensor> input1 = + std::make_shared<Tensor>(Array4D<int, 3, 3, 3, 2>{ { + // { - {{40, 94},{42, 96},{44, 98}}, - {{46, 100},{48, 102},{50, 104}}, - {{52, 106},{54, 108},{56, 110}} - }, + // + {{20, 47}, {21, 48}, {22, 49}}, // + {{23, 50}, {24, 51}, {25, 52}}, // + {{26, 53}, {27, 54}, {28, 55}} // + }, // { - {{58, 112},{60, 114},{62, 116}}, - {{64, 118},{66, 120},{68, 122}}, - {{70, 124},{72, 126},{74, 128}} - }, + // + {{29, 56}, {30, 57}, {31, 58}}, // + {{32, 59}, {33, 60}, {34, 61}}, // + {{35, 62}, {36, 63}, {37, 64}} // + }, // { - {{76, 130},{78, 132},{80, 134}}, - {{82, 136},{84, 138},{86, 140}}, - {{88, 142},{90, 144},{92, 146}} - } - } - }); + // + {{38, 65}, {39, 66}, {40, 67}}, // + {{41, 68}, {42, 69}, {43, 70}}, // + {{44, 71}, {45, 72}, {46, 73}} // + } // + } // + }); // + + SECTION("Two inputs") { + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( + Array4D<int, 3, 3, 3, 2>{{{{{40, 94}, {42, 96}, {44, 98}}, + {{46, 100}, {48, 102}, {50, 104}}, + {{52, 106}, {54, 108}, {56, 110}}}, + {{{58, 112}, {60, 114}, {62, 116}}, + {{64, 118}, {66, 120}, {68, 122}}, + {{70, 124}, {72, 126}, {74, 128}}}, + {{{76, 130}, {78, 132}, {80, 134}}, + {{82, 136}, {84, 138}, {86, 140}}, + {{88, 142}, {90, 144}, {92, 146}}}}}); std::shared_ptr<Node> myAdd = Add(); - auto op = std::static_pointer_cast<OperatorTensor>(myAdd -> getOperator()); + auto op = + std::static_pointer_cast<OperatorTensor>(myAdd->getOperator()); op->associateInput(0, input1); op->associateInput(1, input1); op->setBackend("cpu"); @@ -72,54 +69,70 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") { } SECTION("Broadcasting") { - std::shared_ptr<Tensor> input_0 = std::make_shared<Tensor>(Array4D<int,3,1,3,2> { - { // - { // - {{0, 1},{2, 3},{4, 5}} // - }, // - { // - {{6, 7},{8, 9},{10, 11}} // - }, // - { // - {{12, 13},{14, 15},{16, 17}} // - } // - } // - }); // - std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { - { // - { // - {{20, 21},{22, 23},{24, 25}}, // - {{26, 27},{28, 29},{30, 31}}, // - {{32, 33},{34, 35},{36, 37}} // - } // - } // - }); // + std::shared_ptr<Tensor> input_0 = + std::make_shared<Tensor>(Array4D<int, 3, 1, 3, 2>{ + { + // + { + // + {{0, 1}, {2, 3}, {4, 5}} // + }, // + { + // + {{6, 7}, {8, 9}, {10, 11}} // + }, // + { + // + {{12, 13}, {14, 15}, {16, 17}} // + } // + } // + }); // + std::shared_ptr<Tensor> input_1 = + std::make_shared<Tensor>(Array4D<int, 1, 3, 3, 2>{ + { + // + { + // + {{20, 21}, {22, 23}, {24, 25}}, // + {{26, 27}, {28, 29}, {30, 31}}, // + {{32, 33}, {34, 35}, {36, 37}} // + } // + } // + }); // - std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{100,200}}); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { - { // - { // - {{ 120, 222},{ 124, 226},{ 128, 230}}, // - {{ 126, 228},{ 130, 232},{ 134, 236}}, // - {{ 132, 234},{ 136, 238},{ 140, 242}} // - }, // - { // - {{ 126, 228},{ 130, 232},{ 134, 236}}, // - {{ 132, 234},{ 136, 238},{ 140, 242}}, // - {{ 138, 240},{ 142, 244},{ 146, 248}} // - }, // - { // - {{ 132, 234},{ 136, 238},{140, 242}}, // - {{ 138, 240},{ 142, 244},{146, 248}}, // - {{ 144, 246},{ 148, 250},{152, 254}} // - } // - } // - }); // + std::shared_ptr<Tensor> input_2 = + std::make_shared<Tensor>(Array1D<int, 2>{{100, 200}}); + std::shared_ptr<Tensor> expectedOutput = + std::make_shared<Tensor>(Array4D<int, 3, 3, 3, 2>{ + { + // + { + // + {{120, 222}, {124, 226}, {128, 230}}, // + {{126, 228}, {130, 232}, {134, 236}}, // + {{132, 234}, {136, 238}, {140, 242}} // + }, // + { + // + {{126, 228}, {130, 232}, {134, 236}}, // + {{132, 234}, {136, 238}, {140, 242}}, // + {{138, 240}, {142, 244}, {146, 248}} // + }, // + { + // + {{132, 234}, {136, 238}, {140, 242}}, // + {{138, 240}, {142, 244}, {146, 248}}, // + {{144, 246}, {148, 250}, {152, 254}} // + } // + } // + }); // std::shared_ptr<Node> myAdd_0 = Add(); std::shared_ptr<Node> myAdd_1 = Add(); - auto op_0 = std::static_pointer_cast<OperatorTensor>(myAdd_0 -> getOperator()); - auto op_1 = std::static_pointer_cast<OperatorTensor>(myAdd_1 -> getOperator()); + auto op_0 = + std::static_pointer_cast<OperatorTensor>(myAdd_0->getOperator()); + auto op_1 = + std::static_pointer_cast<OperatorTensor>(myAdd_1->getOperator()); op_0->associateInput(0, input_0); op_0->associateInput(1, input_1); diff --git a/unit_tests/operator/Test_AndImpl.cpp b/unit_tests/operator/Test_AndImpl.cpp index 053bb3ea4ed913bd388f3ae049c4d6402ad58d59..f78f719bbf66f4c2d732b1f7425cf383f58ce536 100644 --- a/unit_tests/operator/Test_AndImpl.cpp +++ b/unit_tests/operator/Test_AndImpl.cpp @@ -10,7 +10,7 @@ ********************************************************************************/ #include <catch2/catch_test_macros.hpp> -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution #include "aidge/data/Tensor.hpp" #include "aidge/operator/And.hpp" @@ -20,16 +20,20 @@ using namespace Aidge; TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") { - SECTION("ForwardDims") - { + SECTION("ForwardDims") { constexpr std::uint16_t NBTRIALS = 10; // Create a random number generator std::random_device rd; std::mt19937 gen(rd()); - std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 - std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); - std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); - std::uniform_int_distribution<int> boolDist(0,1); + std::uniform_real_distribution<float> valueDist( + 0.1f, + 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist( + std::size_t(2), + std::size_t(10)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), + std::size_t(5)); + std::uniform_int_distribution<int> boolDist(0, 1); SECTION("Same dimensions") { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { @@ -39,18 +43,21 @@ TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") { dims[i] = dimSizeDist(gen); } - std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims); + std::shared_ptr<Tensor> myInput1 = + std::make_shared<Tensor>(dims); myInput1->setBackend("cpu"); myInput1->setDataType(DataType::Float32); myInput1->zeros(); - std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims); + std::shared_ptr<Tensor> myInput2 = + std::make_shared<Tensor>(dims); myInput2->setBackend("cpu"); myInput2->setDataType(DataType::Float32); myInput2->zeros(); std::shared_ptr<Node> myAnd = And(); - auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); - op->associateInput(0,myInput1); - op->associateInput(1,myInput2); + auto op = std::static_pointer_cast<OperatorTensor>( + myAnd->getOperator()); + op->associateInput(0, myInput1); + op->associateInput(1, myInput2); op->setDataType(DataType::Float32); op->setBackend("cpu"); op->forwardDims(); @@ -73,22 +80,24 @@ TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") { if (boolDist(gen)) { dims2[i] = dim; } - expectedOutDims.push_back(std::max(dims1[i],dims2[i])); + expectedOutDims.push_back(std::max(dims1[i], dims2[i])); } - - std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims1); + std::shared_ptr<Tensor> myInput1 = + std::make_shared<Tensor>(dims1); myInput1->setBackend("cpu"); myInput1->setDataType(DataType::Float32); myInput1->zeros(); - std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims2); + std::shared_ptr<Tensor> myInput2 = + std::make_shared<Tensor>(dims2); myInput2->setBackend("cpu"); myInput2->setDataType(DataType::Float32); myInput2->zeros(); std::shared_ptr<Node> myAnd = And(); - auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); - op->associateInput(0,myInput1); - op->associateInput(1,myInput2); + auto op = std::static_pointer_cast<OperatorTensor>( + myAnd->getOperator()); + op->associateInput(0, myInput1); + op->associateInput(1, myInput2); op->setDataType(DataType::Float32); op->setBackend("cpu"); @@ -100,66 +109,68 @@ TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") { } } SECTION("Same size inputs") { - std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { - { // - { // - {{20, 15},{31, 11},{22, 49}}, // - {{41, 10},{24, 51},{27, 52}}, // - {{26, 53},{27, 54},{28, 55}} // - }, // - { // - {{29, 56},{30, 57},{31, 58}}, // - {{32, 59},{33, 60},{34, 61}}, // - {{35, 62},{36, 63},{37, 64}} // - }, // - { // - {{38, 65},{39, 66},{40, 67}}, // - {{41, 68},{42, 69},{43, 70}}, // - {{44, 71},{45, 72},{46, 73}} // - } // - } // - }); // - std::shared_ptr<Tensor> input2 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { - { // - { // - {{20, 47},{21, 48},{22, 49}}, // - {{23, 50},{24, 51},{25, 52}}, // - {{17, 53},{27, 26},{14, 33}} // - }, // - { // - {{29, 56},{30, 57},{31, 58}}, // - {{72, 44},{33, 20},{27, 55}}, // - {{35, 24},{25, 63},{28, 64}} // - }, // - { // - {{32, 65},{39, 66},{40, 70}}, // - {{41, 53},{42, 60},{34, 70}}, // - {{44, 71},{30, 12},{46, 73}} // - } // - } // - }); // - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { - { - { - {{1, 0},{0, 0},{1, 1}}, - {{0, 0},{1, 1},{0, 1}}, - {{0, 1},{1, 0},{0, 0}} - }, + std::shared_ptr<Tensor> input1 = + std::make_shared<Tensor>(Array4D<int, 3, 3, 3, 2>{ { - {{1, 1},{1, 1},{1, 1}}, - {{0, 0},{1, 0},{0, 0}}, - {{1, 0},{0, 1},{0, 1}} - }, + // + { + // + {{20, 15}, {31, 11}, {22, 49}}, // + {{41, 10}, {24, 51}, {27, 52}}, // + {{26, 53}, {27, 54}, {28, 55}} // + }, // + { + // + {{29, 56}, {30, 57}, {31, 58}}, // + {{32, 59}, {33, 60}, {34, 61}}, // + {{35, 62}, {36, 63}, {37, 64}} // + }, // + { + // + {{38, 65}, {39, 66}, {40, 67}}, // + {{41, 68}, {42, 69}, {43, 70}}, // + {{44, 71}, {45, 72}, {46, 73}} // + } // + } // + }); // + std::shared_ptr<Tensor> input2 = + std::make_shared<Tensor>(Array4D<int, 3, 3, 3, 2>{ { - {{0, 1},{1, 1},{1, 0}}, - {{1, 0},{1, 0},{0, 1}}, - {{1, 1},{0, 0},{1, 1}} - } - } - }); + // + { + // + {{20, 47}, {21, 48}, {22, 49}}, // + {{23, 50}, {24, 51}, {25, 52}}, // + {{17, 53}, {27, 26}, {14, 33}} // + }, // + { + // + {{29, 56}, {30, 57}, {31, 58}}, // + {{72, 44}, {33, 20}, {27, 55}}, // + {{35, 24}, {25, 63}, {28, 64}} // + }, // + { + // + {{32, 65}, {39, 66}, {40, 70}}, // + {{41, 53}, {42, 60}, {34, 70}}, // + {{44, 71}, {30, 12}, {46, 73}} // + } // + } // + }); // + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( + Array4D<int, 3, 3, 3, 2>{{{{{1, 0}, {0, 0}, {1, 1}}, + {{0, 0}, {1, 1}, {0, 1}}, + {{0, 1}, {1, 0}, {0, 0}}}, + {{{1, 1}, {1, 1}, {1, 1}}, + {{0, 0}, {1, 0}, {0, 0}}, + {{1, 0}, {0, 1}, {0, 1}}}, + {{{0, 1}, {1, 1}, {1, 0}}, + {{1, 0}, {1, 0}, {0, 1}}, + {{1, 1}, {0, 0}, {1, 1}}}}}); std::shared_ptr<Node> myAnd = And(); - auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); + auto op = + std::static_pointer_cast<OperatorTensor>(myAnd->getOperator()); op->associateInput(0, input1); op->associateInput(1, input2); op->setBackend("cpu"); @@ -170,29 +181,37 @@ TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") { } SECTION("Broadcasting") { - std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { - { // - { // - {{10, 20},{22, 23},{20, 20}}, // - {{10, 15},{10, 29},{20, 20}}, // - {{26, 25},{33, 20},{10, 20}} // - } // - } // - }); // - - std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{10, 20}}); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { - { // - { // - {{ 1, 1},{ 0, 0},{ 0, 1}}, // - {{ 1, 0},{ 1, 0},{ 0, 1}}, // - {{ 0, 0},{ 0, 1},{ 1, 1}} // - } // - } // - }); // + std::shared_ptr<Tensor> input_1 = + std::make_shared<Tensor>(Array4D<int, 1, 3, 3, 2>{ + { + // + { + // + {{10, 20}, {22, 23}, {20, 20}}, // + {{10, 15}, {10, 29}, {20, 20}}, // + {{26, 25}, {33, 20}, {10, 20}} // + } // + } // + }); // + + std::shared_ptr<Tensor> input_2 = + std::make_shared<Tensor>(Array1D<int, 2>{{10, 20}}); + std::shared_ptr<Tensor> expectedOutput = + std::make_shared<Tensor>(Array4D<int, 1, 3, 3, 2>{ + { + // + { + // + {{1, 1}, {0, 0}, {0, 1}}, // + {{1, 0}, {1, 0}, {0, 1}}, // + {{0, 0}, {0, 1}, {1, 1}} // + } // + } // + }); // std::shared_ptr<Node> myAnd = And(); - auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); + auto op = + std::static_pointer_cast<OperatorTensor>(myAnd->getOperator()); op->associateInput(0, input_1); op->associateInput(1, input_2); op->setDataType(DataType::Int32); diff --git a/unit_tests/operator/Test_ArgMaxImpl.cpp b/unit_tests/operator/Test_ArgMaxImpl.cpp index 9915d90423e976db1bdd2a694a2cfd7beb380cee..3890e5b1fdfa63d3df05c5e791174cebbdebb211 100644 --- a/unit_tests/operator/Test_ArgMaxImpl.cpp +++ b/unit_tests/operator/Test_ArgMaxImpl.cpp @@ -11,8 +11,8 @@ #include <catch2/catch_test_macros.hpp> #include <memory> -#include <numeric> // std::accumulate -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <numeric> // std::accumulate +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution #include "aidge/data/Tensor.hpp" #include "aidge/operator/ArgMax.hpp" @@ -24,41 +24,48 @@ using namespace Aidge; TEST_CASE("[cpu/operator] ArgMax(forward)", "[ArgMax][CPU]") { - SECTION("ForwardDims") - { + SECTION("ForwardDims") { constexpr std::uint16_t NBTRIALS = 10; // Create a random number generator std::random_device rd; std::mt19937 gen(rd()); - std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 - std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); - std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); - std::uniform_int_distribution<int> boolDist(0,1); + std::uniform_real_distribution<float> valueDist( + 0.1f, + 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist( + std::size_t(2), + std::size_t(10)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), + std::size_t(5)); + std::uniform_int_distribution<int> boolDist(0, 1); SECTION("KeepDims") { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { DimSize_t nbDims = nbDimsDist(gen); std::vector<DimSize_t> dims(nbDims); std::vector<DimSize_t> expectedOutDims(nbDims); - std::uniform_int_distribution<std::int32_t> axisDist(std::int32_t(0), std::int32_t(nbDims-1)); + std::uniform_int_distribution<std::int32_t> axisDist( + std::int32_t(0), + std::int32_t(nbDims - 1)); std::int32_t axis = axisDist(gen); for (std::size_t i = 0; i < nbDims; i++) { dims[i] = dimSizeDist(gen); if (i == axis) { expectedOutDims[i] = 1; - } - else { + } else { expectedOutDims[i] = dims[i]; } } - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(dims); myInput->setBackend("cpu"); myInput->setDataType(DataType::Float32); myInput->zeros(); std::shared_ptr<Node> myArgMax = ArgMax(axis); - auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator()); - op->associateInput(0,myInput); + auto op = std::static_pointer_cast<OperatorTensor>( + myArgMax->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); op->forwardDims(); @@ -72,24 +79,28 @@ TEST_CASE("[cpu/operator] ArgMax(forward)", "[ArgMax][CPU]") { DimSize_t nbDims = nbDimsDist(gen); std::vector<DimSize_t> dims(nbDims); std::vector<DimSize_t> expectedOutDims; - std::uniform_int_distribution<std::int32_t> axisDist(std::int32_t(0), std::int32_t(nbDims-1)); + std::uniform_int_distribution<std::int32_t> axisDist( + std::int32_t(0), + std::int32_t(nbDims - 1)); std::int32_t axis = axisDist(gen); for (std::size_t i = 0; i < nbDims; i++) { dims[i] = dimSizeDist(gen); - if(i != axis) { + if (i != axis) { expectedOutDims.push_back(dims[i]); } } - if(expectedOutDims.empty()) { + if (expectedOutDims.empty()) { expectedOutDims.push_back(1); } - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(dims); myInput->setBackend("cpu"); myInput->setDataType(DataType::Float32); std::shared_ptr<Node> myArgMax = ArgMax(axis, false); - auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator()); - op->associateInput(0,myInput); + auto op = std::static_pointer_cast<OperatorTensor>( + myArgMax->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); @@ -101,40 +112,22 @@ TEST_CASE("[cpu/operator] ArgMax(forward)", "[ArgMax][CPU]") { } } SECTION("3D Tensor") { - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,2,3,4> { - { - { - { 1.0, 2.0, 3.0, 4.0}, - { 8.0, 0.0, 17.0, 1.0}, - { 5.0, 10.0, 6.0, 0.0} - }, - { - { 7.0, 1.0, 9.0, 4.0}, - { 0.0, 8.0, 4.0, 2.0}, - { 9.0, 2.0, 0.0, 5.0} - } - } - }); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array3D<float, 2, 3, 4>{{{{1.0, 2.0, 3.0, 4.0}, + {8.0, 0.0, 17.0, 1.0}, + {5.0, 10.0, 6.0, 0.0}}, + {{7.0, 1.0, 9.0, 4.0}, + {0.0, 8.0, 4.0, 2.0}, + {9.0, 2.0, 0.0, 5.0}}}}); SECTION("Axis 2") { - Tensor myOutput = Tensor(Array3D<float,2,3, 1> { - { - { - {3.0}, - {2.0}, - {1.0} - }, - { - {2.0}, - {1.0}, - {0.0} - } - } - }); + Tensor myOutput = Tensor(Array3D<float, 2, 3, 1>{ + {{{3.0}, {2.0}, {1.0}}, {{2.0}, {1.0}, {0.0}}}}); std::shared_ptr<Node> myArgMax = ArgMax(2); - auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator()); - op->associateInput(0,myInput); + auto op = std::static_pointer_cast<OperatorTensor>( + myArgMax->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); myArgMax->forward(); @@ -143,16 +136,13 @@ TEST_CASE("[cpu/operator] ArgMax(forward)", "[ArgMax][CPU]") { } SECTION("Axis 2 with keep_dims false") { - Tensor myOutput = Tensor(Array2D<float,2,3> { - { - { 3.0, 2.0, 1.0 }, - { 2.0, 1.0, 0.0 } - } - }); + Tensor myOutput = Tensor( + Array2D<float, 2, 3>{{{3.0, 2.0, 1.0}, {2.0, 1.0, 0.0}}}); - std::shared_ptr<Node> myArgMax = ArgMax(2,0); - auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator()); - op->associateInput(0,myInput); + std::shared_ptr<Node> myArgMax = ArgMax(2, 0); + auto op = std::static_pointer_cast<OperatorTensor>( + myArgMax->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); myArgMax->forward(); @@ -160,20 +150,13 @@ TEST_CASE("[cpu/operator] ArgMax(forward)", "[ArgMax][CPU]") { REQUIRE(*(op->getOutput(0)) == myOutput); } SECTION("Axis 1") { - Tensor myOutput = Tensor(Array3D<float,2,1,4> { - { - { - { 1.0, 2.0, 1.0, 0.0 } - }, - { - { 2.0, 1.0, 0.0, 2.0 } - } - } - }); + Tensor myOutput = Tensor(Array3D<float, 2, 1, 4>{ + {{{1.0, 2.0, 1.0, 0.0}}, {{2.0, 1.0, 0.0, 2.0}}}}); std::shared_ptr<Node> myArgMax = ArgMax(1); - auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator()); - op->associateInput(0,myInput); + auto op = std::static_pointer_cast<OperatorTensor>( + myArgMax->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); myArgMax->forward(); @@ -181,47 +164,42 @@ TEST_CASE("[cpu/operator] ArgMax(forward)", "[ArgMax][CPU]") { REQUIRE(*(op->getOutput(0)) == myOutput); } SECTION("Axis 0") { - Tensor myOutput = Tensor(Array3D<float,1,3,4> { - { - { - { 1.0, 0.0, 1.0, 0.0 }, - { 0.0, 1.0, 0.0, 1.0 }, - { 1.0, 0.0, 0.0, 1.0 } - } - } - }); + Tensor myOutput = + Tensor(Array3D<float, 1, 3, 4>{{{{1.0, 0.0, 1.0, 0.0}, + {0.0, 1.0, 0.0, 1.0}, + {1.0, 0.0, 0.0, 1.0}}}}); std::shared_ptr<Node> myArgMax = ArgMax(0); - auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator()); - op->associateInput(0,myInput); + auto op = std::static_pointer_cast<OperatorTensor>( + myArgMax->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); - std::cout << " ............... "<< std::endl; + std::cout << " ............... " << std::endl; myArgMax->forward(); op->getOutput(0)->print(); - std::cout <<"------"<<std::endl; + std::cout << "------" << std::endl; myOutput.print(); REQUIRE(*(op->getOutput(0)) == myOutput); } } SECTION("Select_Last_Index") { - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array1D<float,10> { - { - 1.0, 5.0, 9.0, 0.0, 6.0, 2.0, 9.0, 4.0, 3.0, 9.0 - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> {{9}}); + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(Array1D<float, 10>{ + {1.0, 5.0, 9.0, 0.0, 6.0, 2.0, 9.0, 4.0, 3.0, 9.0}}); + std::shared_ptr<Tensor> myOutput = + std::make_shared<Tensor>(Array1D<float, 1>{{9}}); std::shared_ptr<Node> myArgMax = ArgMax(0, 1, 1); - auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator()); - op->associateInput(0,myInput); + auto op = + std::static_pointer_cast<OperatorTensor>(myArgMax->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); myArgMax->forward(); op->getOutput(0)->print(); REQUIRE(*(op->getOutput(0)) == *myOutput); - } } \ No newline at end of file diff --git a/unit_tests/operator/Test_Atan.cpp b/unit_tests/operator/Test_Atan.cpp index 9548e35d81b0423125424a4198d82558c4e57df4..6adfb71c68512c6ec1aac4dcb10a42ca8b054823 100644 --- a/unit_tests/operator/Test_Atan.cpp +++ b/unit_tests/operator/Test_Atan.cpp @@ -21,57 +21,75 @@ using namespace Aidge; TEST_CASE("[cpu/operator] Atan(forward)") { - SECTION("1D Tensor") { - std::shared_ptr<Tensor> input0 = - std::make_shared<Tensor>(Array1D<float, 10>{ - {0.41384590, 0.43120754, 0.93762982, 0.31049860, 0.77547199, - 0.09514862, 0.16145366, 0.42776686, 0.43487436, 0.41170865}}); - std::shared_ptr<Tensor> expectedOutput = - std::make_shared<Tensor>(Array1D<float, 10>{ - {0.39238522, 0.40711672, 0.75322037, 0.30106049, 0.65960488, - 0.09486303, 0.16007232, 0.40421187, 0.4102045, 0.39055911}}); + SECTION("1D Tensor") { + std::shared_ptr<Tensor> input0 = + std::make_shared<Tensor>(Array1D<float, 10>{{0.41384590, + 0.43120754, + 0.93762982, + 0.31049860, + 0.77547199, + 0.09514862, + 0.16145366, + 0.42776686, + 0.43487436, + 0.41170865}}); + std::shared_ptr<Tensor> expectedOutput = + std::make_shared<Tensor>(Array1D<float, 10>{{0.39238522, + 0.40711672, + 0.75322037, + 0.30106049, + 0.65960488, + 0.09486303, + 0.16007232, + 0.40421187, + 0.4102045, + 0.39055911}}); - std::shared_ptr<Node> myAtan = Atan(); - auto op = std::static_pointer_cast<OperatorTensor>(myAtan->getOperator()); - op->associateInput(0, input0); - op->setDataType(DataType::Float32); - op->setBackend("cpu"); - myAtan->forward(); + std::shared_ptr<Node> myAtan = Atan(); + auto op = + std::static_pointer_cast<OperatorTensor>(myAtan->getOperator()); + op->associateInput(0, input0); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myAtan->forward(); - float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); - float* expectedPtr = - static_cast<float*>(expectedOutput->getImpl()->rawPtr()); - for (std::size_t i = 0; i < expectedOutput->size(); ++i) { - REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001); + float *resPtr = + static_cast<float *>(op->getOutput(0)->getImpl()->rawPtr()); + float *expectedPtr = + static_cast<float *>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i < expectedOutput->size(); ++i) { + REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001); + } } - } - SECTION("3D Tensor") { - std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>( - Array3D<float, 2, 2, 3>{{{ - {0.97037154, 0.86208081, 0.77767169}, - {0.38160080, 0.11422747, 0.77284443}, - }, - {{0.51592529, 0.72543722, 0.54641193}, - {0.93866944, 0.97767913, 0.34172094}}}}); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( - Array3D<float, 2, 2, 3>{{{{0.77036231, 0.71146592, 0.66097706}, - {0.36454508, 0.11373451, 0.65796196}}, - {{0.47630652, 0.62759472, 0.50008428}, - {0.75377332, 0.77411225, 0.32928031}}}}); + SECTION("3D Tensor") { + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>( + Array3D<float, 2, 2, 3>{{{ + {0.97037154, 0.86208081, 0.77767169}, + {0.38160080, 0.11422747, 0.77284443}, + }, + {{0.51592529, 0.72543722, 0.54641193}, + {0.93866944, 0.97767913, 0.34172094}}}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( + Array3D<float, 2, 2, 3>{{{{0.77036231, 0.71146592, 0.66097706}, + {0.36454508, 0.11373451, 0.65796196}}, + {{0.47630652, 0.62759472, 0.50008428}, + {0.75377332, 0.77411225, 0.32928031}}}}); - std::shared_ptr<Node> myAtan = Atan(); - auto op = std::static_pointer_cast<OperatorTensor>(myAtan->getOperator()); - op->associateInput(0, input0); - op->setDataType(DataType::Float32); - op->setBackend("cpu"); - myAtan->forward(); + std::shared_ptr<Node> myAtan = Atan(); + auto op = + std::static_pointer_cast<OperatorTensor>(myAtan->getOperator()); + op->associateInput(0, input0); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myAtan->forward(); - float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); - float* expectedPtr = - static_cast<float*>(expectedOutput->getImpl()->rawPtr()); - for (std::size_t i = 0; i < expectedOutput->size(); ++i) { - REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001); + float *resPtr = + static_cast<float *>(op->getOutput(0)->getImpl()->rawPtr()); + float *expectedPtr = + static_cast<float *>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i < expectedOutput->size(); ++i) { + REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001); + } } - } } diff --git a/unit_tests/operator/Test_AvgPoolingImpl.cpp b/unit_tests/operator/Test_AvgPoolingImpl.cpp index aaa2757830c245275d02792a7a5a2eb1db32d7b8..b6f166d6d84096d95c3d7c6e729d310bf6a39b47 100644 --- a/unit_tests/operator/Test_AvgPoolingImpl.cpp +++ b/unit_tests/operator/Test_AvgPoolingImpl.cpp @@ -10,8 +10,8 @@ ********************************************************************************/ #include <catch2/catch_test_macros.hpp> -#include <memory> #include <cstdlib> +#include <memory> #include "aidge/data/Tensor.hpp" #include "aidge/operator/AvgPooling.hpp" @@ -21,57 +21,40 @@ using namespace Aidge; TEST_CASE("[cpu/operator] AvgPooling(forward)", "[AvgPooling][CPU]") { - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<float,2,2,5,5> { //NCHW - { - { - {{ 0, 1, 2, 3, 4}, - { 5, 6, 7, 8, 9}, - { 10, 11, 12, 13, 14}, - { 15, 16, 17, 18, 19}, - { 20, 21, 22, 23, 24}}, + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array4D<float, 2, 2, 5, 5>{// NCHW + {{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}, - {{ 25, 26, 27, 28, 29}, - { 30, 31, 32, 33, 34}, - { 35, 36, 37, 38, 39}, - { 40, 41, 42, 43, 44}, - { 45, 46, 47, 48, 49}} - }, - { - {{100, 101, 102, 103, 104}, - {105, 106, 107, 108, 109}, - {110, 111, 112, 113, 114}, - {115, 116, 117, 118, 119}, - {120, 121, 122, 123, 124}}, + {{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}}, + {{{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, - {{125, 126, 127, 128, 129}, - {130, 131, 132, 133, 134}, - {135, 136, 137, 138, 139}, - {140, 141, 142, 143, 144}, - {145, 146, 147, 148, 149}} - } - } - }); + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}}}}}); SECTION("Stride") { - std::shared_ptr<Node> myAvgPool = AvgPooling({2,2}, "mycdw", {2,2}); - auto op = std::static_pointer_cast<OperatorTensor>(myAvgPool -> getOperator()); + std::shared_ptr<Node> myAvgPool = AvgPooling({2, 2}, "mycdw", {2, 2}); + auto op = + std::static_pointer_cast<OperatorTensor>(myAvgPool->getOperator()); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<float,2,2,2,2> { - { - { - {{ 3, 5}, - { 13, 15}}, - {{ 28, 30}, - { 38, 40}} - }, - { - {{103, 105}, - {113, 115}}, - {{128, 130}, - {138, 140}} - } - } - }); - op->associateInput(0,myInput); + std::shared_ptr<Tensor> myOutput = + std::make_shared<Tensor>(Array4D<float, 2, 2, 2, 2>{ + {{{{3, 5}, {13, 15}}, {{28, 30}, {38, 40}}}, + {{{103, 105}, {113, 115}}, {{128, 130}, {138, 140}}}}}); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); myAvgPool->forward(); @@ -80,31 +63,32 @@ TEST_CASE("[cpu/operator] AvgPooling(forward)", "[AvgPooling][CPU]") { } SECTION("Stride >= feature dim") { - std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(Array4D<float,1,1,3,3> { //NCHW - { - { - {{0.3745, 0.9507, 0.7320}, - {0.5987, 0.1560, 0.1560}, - {0.0581, 0.8662, 0.6011}} - } - } - }); - std::shared_ptr<Node> myAvgPool = AvgPooling({3,3}, "mycdw", {3,3}); - auto op = std::static_pointer_cast<OperatorTensor>(myAvgPool -> getOperator()); + std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>( + Array4D<float, 1, 1, 3, 3>{// NCHW + {{{{0.3745, 0.9507, 0.7320}, + {0.5987, 0.1560, 0.1560}, + {0.0581, 0.8662, 0.6011}}}}}); + std::shared_ptr<Node> myAvgPool = AvgPooling({3, 3}, "mycdw", {3, 3}); + auto op = + std::static_pointer_cast<OperatorTensor>(myAvgPool->getOperator()); - Tensor myOutput = Array4D<float,1,1,1,1> { - {{{{(0.3745 + 0.9507 + 0.7320 + 0.5987 + 0.1560 + 0.1560 + 0.0581 + 0.8662 + 0.6011)/9.0}}}} - }; - op->associateInput(0,myInput2); + Tensor myOutput = Array4D<float, 1, 1, 1, 1>{ + {{{{(0.3745 + 0.9507 + 0.7320 + 0.5987 + 0.1560 + 0.1560 + 0.0581 + + 0.8662 + 0.6011) / + 9.0}}}}}; + op->associateInput(0, myInput2); op->setDataType(DataType::Float32); op->setBackend("cpu"); myAvgPool->forward(); op->getOutput(0)->print(); - float* outPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); - float* expectedOutPtr = static_cast<float*>(myOutput.getImpl()->rawPtr()); + float *outPtr = + static_cast<float *>(op->getOutput(0)->getImpl()->rawPtr()); + float *expectedOutPtr = + static_cast<float *>(myOutput.getImpl()->rawPtr()); for (std::size_t i = 0; i < 1; ++i) { REQUIRE(std::abs(outPtr[i] - expectedOutPtr[i]) < 0.00001); } } - // std::cout << static_cast<Tensor>((*op)["weight"])[0][0][0][0] << std::endl; + // std::cout << static_cast<Tensor>((*op)["weight"])[0][0][0][0] << + // std::endl; } \ No newline at end of file diff --git a/unit_tests/operator/Test_BatchNormImpl.cpp b/unit_tests/operator/Test_BatchNormImpl.cpp index 1b42c90dd09d63cd319f19bd29751da816db06c0..2969faadc2391b48cbc93d01d552be612bbfbe66 100644 --- a/unit_tests/operator/Test_BatchNormImpl.cpp +++ b/unit_tests/operator/Test_BatchNormImpl.cpp @@ -21,78 +21,75 @@ using namespace Aidge; TEST_CASE("[cpu/operator] BatchNorm(forward)", "[BatchNorm][CPU]") { - std::shared_ptr<Node> myBatchNorm = BatchNorm<2>(3, 0.00001F, 0.1F, "mybatchnorm"); - auto op = std::static_pointer_cast<OperatorTensor>(myBatchNorm -> getOperator()); - std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array1D<float,3> {{0.9044, 0.3028, 0.0218}}); - std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<float,3> {{0.1332, 0.7503, 0.0878}}); - std::shared_ptr<Tensor> myMean = std::make_shared<Tensor>(Array1D<float,3> {{0.9931, 0.8421, 0.9936}}); - std::shared_ptr<Tensor> myVar = std::make_shared<Tensor>(Array1D<float,3> {{0.4470, 0.3064, 0.7061}}); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<float,2,3,3,3> { //NCHW - { - { - {{8.28257084e-01, 7.99335480e-01, 7.36702740e-01}, - {2.36729562e-01, 8.61912668e-01, 9.93067741e-01}, - {1.63514376e-01, 8.95773172e-02, 2.96533108e-01}}, - {{2.20776618e-01, 5.89067876e-01, 2.03930080e-01}, - {1.31294072e-01, 7.10182846e-01, 1.08420849e-04}, - {7.21750259e-01, 4.38212037e-01, 5.08823872e-01}}, - {{4.30953979e-01, 1.51903450e-01, 3.76343548e-01}, - {8.07861805e-01, 7.79679358e-01, 5.01209974e-01}, - {9.31280375e-01, 9.94207084e-01, 1.74868107e-03}} - }, - { - {{6.22058094e-01, 2.32256651e-02, 6.18222237e-01}, - {9.58304763e-01, 2.11395025e-02, 4.95614648e-01}, - {2.50825584e-01, 4.50860739e-01, 3.80362332e-01}}, - {{9.91703272e-02, 5.06073236e-01, 4.88969564e-01}, - {1.12059772e-01, 7.64178872e-01, 7.60362148e-01}, - {2.84135342e-02, 4.29610193e-01, 1.27862811e-01}}, - {{9.57209170e-01, 8.22797656e-01, 1.91352129e-01}, - {9.52722490e-01, 6.35501027e-01, 5.67592978e-02}, - {2.00799644e-01, 4.00822222e-01, 9.14380193e-01}} - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<float,2,3,3,3> { - { - { - {{-0.08978321, -0.12890550, -0.21362889}, - {-0.88994324, -0.04425725, 0.13315639}, - {-0.98898154, -1.08899629, -0.80904692}}, - {{ 0.41042271, 0.61188596, 0.40120730}, - { 0.36147383, 0.67813843, 0.28971246}, - { 0.68446606, 0.52936459, 0.56799078}}, - {{ 0.07320327, 0.06596386, 0.07178652}, - { 0.08298140, 0.08225026, 0.07502592}, - { 0.08618324, 0.08781575, 0.06206840}} - }, - { - {{-0.36870885, -1.17875028, -0.37389761}, - { 0.08613246, -1.18157220, -0.53974909}, - {-0.87087554, -0.60028774, -0.69565099}}, - {{ 0.34390146, 0.56648612, 0.55713004}, - { 0.35095227, 0.70767546, 0.70558763}, - { 0.30519596, 0.52465916, 0.35959685}}, - {{ 0.08685592, 0.08336888, 0.06698728}, - { 0.08673952, 0.07850984, 0.06349554}, - { 0.06723238, 0.07242157, 0.08574481}} - } - } - }); - op->associateInput(0,myInput); - op->associateInput(1,myWeights); - op->associateInput(2,myBias); - op->associateInput(3,myMean); - op->associateInput(4,myVar); + std::shared_ptr<Node> myBatchNorm = + BatchNorm<2>(3, 0.00001F, 0.1F, "mybatchnorm"); + auto op = + std::static_pointer_cast<OperatorTensor>(myBatchNorm->getOperator()); + std::shared_ptr<Tensor> myWeights = + std::make_shared<Tensor>(Array1D<float, 3>{{0.9044, 0.3028, 0.0218}}); + std::shared_ptr<Tensor> myBias = + std::make_shared<Tensor>(Array1D<float, 3>{{0.1332, 0.7503, 0.0878}}); + std::shared_ptr<Tensor> myMean = + std::make_shared<Tensor>(Array1D<float, 3>{{0.9931, 0.8421, 0.9936}}); + std::shared_ptr<Tensor> myVar = + std::make_shared<Tensor>(Array1D<float, 3>{{0.4470, 0.3064, 0.7061}}); + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(Array4D<float, 2, 3, 3, 3>{ + // NCHW + {{{{8.28257084e-01, 7.99335480e-01, 7.36702740e-01}, + {2.36729562e-01, 8.61912668e-01, 9.93067741e-01}, + {1.63514376e-01, 8.95773172e-02, 2.96533108e-01}}, + {{2.20776618e-01, 5.89067876e-01, 2.03930080e-01}, + {1.31294072e-01, 7.10182846e-01, 1.08420849e-04}, + {7.21750259e-01, 4.38212037e-01, 5.08823872e-01}}, + {{4.30953979e-01, 1.51903450e-01, 3.76343548e-01}, + {8.07861805e-01, 7.79679358e-01, 5.01209974e-01}, + {9.31280375e-01, 9.94207084e-01, 1.74868107e-03}}}, + {{{6.22058094e-01, 2.32256651e-02, 6.18222237e-01}, + {9.58304763e-01, 2.11395025e-02, 4.95614648e-01}, + {2.50825584e-01, 4.50860739e-01, 3.80362332e-01}}, + {{9.91703272e-02, 5.06073236e-01, 4.88969564e-01}, + {1.12059772e-01, 7.64178872e-01, 7.60362148e-01}, + {2.84135342e-02, 4.29610193e-01, 1.27862811e-01}}, + {{9.57209170e-01, 8.22797656e-01, 1.91352129e-01}, + {9.52722490e-01, 6.35501027e-01, 5.67592978e-02}, + {2.00799644e-01, 4.00822222e-01, 9.14380193e-01}}}}}); + std::shared_ptr<Tensor> myOutput = + std::make_shared<Tensor>(Array4D<float, 2, 3, 3, 3>{ + {{{{-0.08978321, -0.12890550, -0.21362889}, + {-0.88994324, -0.04425725, 0.13315639}, + {-0.98898154, -1.08899629, -0.80904692}}, + {{0.41042271, 0.61188596, 0.40120730}, + {0.36147383, 0.67813843, 0.28971246}, + {0.68446606, 0.52936459, 0.56799078}}, + {{0.07320327, 0.06596386, 0.07178652}, + {0.08298140, 0.08225026, 0.07502592}, + {0.08618324, 0.08781575, 0.06206840}}}, + {{{-0.36870885, -1.17875028, -0.37389761}, + {0.08613246, -1.18157220, -0.53974909}, + {-0.87087554, -0.60028774, -0.69565099}}, + {{0.34390146, 0.56648612, 0.55713004}, + {0.35095227, 0.70767546, 0.70558763}, + {0.30519596, 0.52465916, 0.35959685}}, + {{0.08685592, 0.08336888, 0.06698728}, + {0.08673952, 0.07850984, 0.06349554}, + {0.06723238, 0.07242157, 0.08574481}}}}}); + op->associateInput(0, myInput); + op->associateInput(1, myWeights); + op->associateInput(2, myBias); + op->associateInput(3, myMean); + op->associateInput(4, myVar); op->setDataType(DataType::Float32); op->setBackend("cpu"); myBatchNorm->forward(); - float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); - float* expectedPtr = static_cast<float*>(myOutput->getImpl()->rawPtr()); - for (std::size_t i = 0; i< 54; ++i) { - REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + float *resPtr = + static_cast<float *>(op->getOutput(0)->getImpl()->rawPtr()); + float *expectedPtr = static_cast<float *>(myOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i < 54; ++i) { + REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001); } - // std::cout << static_cast<Tensor>((*op)["weight"])[0][0][0][0] << std::endl; + // std::cout << static_cast<Tensor>((*op)["weight"])[0][0][0][0] << + // std::endl; } \ No newline at end of file diff --git a/unit_tests/operator/Test_BitShift.cpp b/unit_tests/operator/Test_BitShift.cpp index a52990bc7991a325ce151cf6634b0d5a831992c8..cf6d1b4e593b423ecbba1e55189b6a7e1232a392 100644 --- a/unit_tests/operator/Test_BitShift.cpp +++ b/unit_tests/operator/Test_BitShift.cpp @@ -9,18 +9,18 @@ * ********************************************************************************/ +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/BitShift.hpp" +#include "aidge/utils/TensorUtils.hpp" #include <catch2/catch_test_macros.hpp> -#include <cstddef> // std::size_t -#include <cstdint> // std::uint16_t #include <chrono> +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t +#include <iomanip> #include <iostream> #include <memory> -#include <numeric> -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution -#include <iomanip> -#include "aidge/data/Tensor.hpp" -#include "aidge/operator/BitShift.hpp" -#include "aidge/utils/TensorUtils.hpp" +#include <numeric> +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution namespace Aidge { @@ -29,31 +29,34 @@ TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") { // Create a random number generator std::random_device rd; std::mt19937 gen(rd()); - std::uniform_int_distribution<int> valueDist(-15, 15); - std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(5)); - std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(3)); - std::uniform_int_distribution<int> boolDist(0,1); + std::uniform_int_distribution<int> valueDist(-15, 15); + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), + std::size_t(5)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), + std::size_t(3)); + std::uniform_int_distribution<int> boolDist(0, 1); - BitShift_Op::BitShiftDirection direction = BitShift_Op::BitShiftDirection::left; + BitShift_Op::BitShiftDirection direction = + BitShift_Op::BitShiftDirection::left; - if(valueDist(gen) % 2 == 0) - { + if (valueDist(gen) % 2 == 0) { direction = BitShift_Op::BitShiftDirection::right; } // Create BitShift Operator std::shared_ptr<Node> myBitShift = BitShift(direction); - auto op = std::static_pointer_cast<OperatorTensor>(myBitShift-> getOperator()); + auto op = + std::static_pointer_cast<OperatorTensor>(myBitShift->getOperator()); op->setDataType(DataType::Int32); op->setBackend("cpu"); // Create 2 input Tensors std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>(); - op->associateInput(0,T0); + op->associateInput(0, T0); T0->setDataType(DataType::Int32); T0->setBackend("cpu"); std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(); - op -> associateInput(1,T1); + op->associateInput(1, T1); T1->setDataType(DataType::Int32); T1->setBackend("cpu"); @@ -62,7 +65,8 @@ TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") { Tres->setDataType(DataType::Int32); Tres->setBackend("cpu"); - // To measure execution time of 'BitShift_Op::forward()' member function call + // To measure execution time of 'BitShift_Op::forward()' member function + // call std::chrono::time_point<std::chrono::system_clock> start; std::chrono::time_point<std::chrono::system_clock> end; @@ -79,44 +83,48 @@ TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") { for (std::size_t i = 0; i < nbDims; ++i) { dims.push_back(dimSizeDist(gen)); } - const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t nb_elements = + std::accumulate(dims.cbegin(), + dims.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); number_of_operation += nb_elements; // without broadcasting - int* array0 = new int[nb_elements]; - int* array1 = new int[nb_elements]; - int* result = new int[nb_elements]; + int *array0 = new int[nb_elements]; + int *array1 = new int[nb_elements]; + int *result = new int[nb_elements]; for (std::size_t i = 0; i < nb_elements; ++i) { array0[i] = valueDist(gen); - array1[i] = std::abs(valueDist(gen)); // bitshift is impossible with negative value - if(direction == BitShift_Op::BitShiftDirection::left) - { + array1[i] = std::abs(valueDist( + gen)); // bitshift is impossible with negative value + if (direction == BitShift_Op::BitShiftDirection::left) { result[i] = array0[i] << array1[i]; - } - else - { + } else { result[i] = array0[i] >> array1[i]; } } // input0 T0->resize(dims); - T0 -> getImpl() -> setRawPtr(array0, nb_elements); + T0->getImpl()->setRawPtr(array0, nb_elements); // input1 T1->resize(dims); - T1 -> getImpl() -> setRawPtr(array1, nb_elements); + T1->getImpl()->setRawPtr(array1, nb_elements); // results Tres->resize(dims); - Tres -> getImpl() -> setRawPtr(result, nb_elements); + Tres->getImpl()->setRawPtr(result, nb_elements); op->forwardDims(); start = std::chrono::system_clock::now(); myBitShift->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += + std::chrono::duration_cast<std::chrono::microseconds>( + end - start); bool is_eq = approxEq<int>(*(op->getOutput(0)), *Tres); @@ -128,18 +136,19 @@ TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") { delete[] array0; delete[] array1; delete[] result; - - } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + std::cout << "number of elements over time spent: " + << (number_of_operation / duration.count()) << std::endl; + std::cout << "total time: " << duration.count() << "μs" + << std::endl; } SECTION("Test BitShift kernels with Broadcasting") { std::size_t number_of_operation = 0; for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { // generate 2 random Tensors - // handle dimensions, replace some dimensions with '1' to get broadcasting + // handle dimensions, replace some dimensions with '1' to get + // broadcasting constexpr std::size_t nbDims = 4; std::vector<std::size_t> dims; for (std::size_t i = 0; i < nbDims; ++i) { @@ -159,42 +168,63 @@ TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") { } // create arrays and fill them with random values - int* array0 = new int[dims0[0]*dims0[1]*dims0[2]*dims0[3]]; - int* array1 = new int[dims1[0]*dims1[1]*dims1[2]*dims1[3]]; - int* result = new int[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]]; - - for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) { + int *array0 = + new int[dims0[0] * dims0[1] * dims0[2] * dims0[3]]; + int *array1 = + new int[dims1[0] * dims1[1] * dims1[2] * dims1[3]]; + int *result = + new int[dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]]; + + for (std::size_t i = 0; + i < dims0[0] * dims0[1] * dims0[2] * dims0[3]; + ++i) { array0[i] = valueDist(gen); } - for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) { + for (std::size_t i = 0; + i < dims1[0] * dims1[1] * dims1[2] * dims1[3]; + ++i) { array1[i] = std::abs(valueDist(gen)); } - //True result with broadcast - const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1}; - const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1}; + // True result with broadcast + const std::size_t strides0[nbDims] = { + dims0[1] * dims0[2] * dims0[3], + dims0[2] * dims0[3], + dims0[3], + 1}; + const std::size_t strides1[nbDims] = { + dims1[1] * dims1[2] * dims1[3], + dims1[2] * dims1[3], + dims1[3], + 1}; for (std::size_t a = 0; a < dimsOut[0]; ++a) { for (std::size_t b = 0; b < dimsOut[1]; ++b) { - const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0) - + strides0[1] * ((dims0[1] > 1) ? b : 0); - const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0) - + strides1[1] * ((dims1[1] > 1) ? b : 0); + const std::size_t idx0_0 = + strides0[0] * ((dims0[0] > 1) ? a : 0) + + strides0[1] * ((dims0[1] > 1) ? b : 0); + const std::size_t idx1_0 = + strides1[0] * ((dims1[0] > 1) ? a : 0) + + strides1[1] * ((dims1[1] > 1) ? b : 0); for (std::size_t c = 0; c < dimsOut[2]; ++c) { - const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a)); + const std::size_t idx_out = + dimsOut[3] * + (c + dimsOut[2] * (b + dimsOut[1] * a)); for (std::size_t d = 0; d < dimsOut[3]; ++d) { - std::size_t idx0 = idx0_0 - + strides0[2] * ((dims0[2] > 1) ? c : 0) - + ((dims0[3] > 1) ? d : 0); - std::size_t idx1 = idx1_0 - + strides1[2] * ((dims1[2] > 1) ? c : 0) - + ((dims1[3] > 1) ? d : 0); - if(direction == BitShift_Op::BitShiftDirection::left) - { - result[idx_out + d] = array0[idx0] << array1[idx1]; - } - else - { - result[idx_out + d] = array0[idx0] >> array1[idx1]; + std::size_t idx0 = + idx0_0 + + strides0[2] * ((dims0[2] > 1) ? c : 0) + + ((dims0[3] > 1) ? d : 0); + std::size_t idx1 = + idx1_0 + + strides1[2] * ((dims1[2] > 1) ? c : 0) + + ((dims1[3] > 1) ? d : 0); + if (direction == + BitShift_Op::BitShiftDirection::left) { + result[idx_out + d] = array0[idx0] + << array1[idx1]; + } else { + result[idx_out + d] = + array0[idx0] >> array1[idx1]; } } } @@ -204,27 +234,34 @@ TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") { // conversion to Aidge::Tensors // input0 T0->resize(dims0); - T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]); + T0->getImpl()->setRawPtr( + array0, + dims0[0] * dims0[1] * dims0[2] * dims0[3]); // input1 T1->resize(dims1); - T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]); + T1->getImpl()->setRawPtr( + array1, + dims1[0] * dims1[1] * dims1[2] * dims1[3]); // results Tres->resize(dimsOut); - Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]); + Tres->getImpl()->setRawPtr( + result, + dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]); // compute result op->forwardDims(); start = std::chrono::system_clock::now(); myBitShift->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += + std::chrono::duration_cast<std::chrono::microseconds>( + end - start); // comparison between truth and computed result bool equiv = (approxEq<int>(*(op->getOutput(0)), *Tres)); - if(equiv == false) - { + if (equiv == false) { std::cout << "Problem\n"; } REQUIRE(equiv); @@ -233,13 +270,18 @@ TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") { delete[] array1; delete[] result; - const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t nb_elements = + std::accumulate(dimsOut.cbegin(), + dimsOut.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); number_of_operation += nb_elements; } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + std::cout << "number of elements over time spent: " + << (number_of_operation / duration.count()) << std::endl; + std::cout << "total time: " << duration.count() << "μs" + << std::endl; } - -} + } } // namespace Aidge -} \ No newline at end of file +} // namespace Aidge \ No newline at end of file diff --git a/unit_tests/operator/Test_ClipImpl.cpp b/unit_tests/operator/Test_ClipImpl.cpp index 45c8da5bf7ecc84fad6b3e694fe204540f579af3..ef8f10da06459a324c1b557d41f6062c0528f80a 100644 --- a/unit_tests/operator/Test_ClipImpl.cpp +++ b/unit_tests/operator/Test_ClipImpl.cpp @@ -9,24 +9,27 @@ * ********************************************************************************/ +#include <algorithm> #include <catch2/catch_test_macros.hpp> -#include <cstddef> // std::size_t -#include <cstdint> // std::uint16_t #include <chrono> -#include <iostream> -#include <vector> -#include <algorithm> +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t #include <iomanip> +#include <iostream> #include <memory> -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <vector> +#include "aidge/backend/cpu.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/Clip.hpp" #include "aidge/operator/OperatorTensor.hpp" #include "aidge/utils/TensorUtils.hpp" -#include "aidge/backend/cpu.hpp" -void ComputeClipBackward(const std::vector<float>& vec1, std::vector<float>& vec2, float min, float max) { +void ComputeClipBackward(const std::vector<float> &vec1, + std::vector<float> &vec2, + float min, + float max) { if (vec1.size() != vec2.size()) { std::cerr << "Vectors should have the same sizes." << std::endl; return; @@ -38,23 +41,21 @@ void ComputeClipBackward(const std::vector<float>& vec1, std::vector<float>& vec } } } -namespace Aidge -{ -TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") - { +namespace Aidge { +TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") { const std::uint16_t NBTRIALS = 10; // Create a random number generator std::random_device rd; std::mt19937 gen(rd()); std::uniform_real_distribution<float> dis(0.0, 10.0); - std::uniform_real_distribution<float> dismin(0.0, 4.5); - std::uniform_real_distribution<float> dismax(5.5, 10.0); - std::uniform_int_distribution<std::size_t> distDims(5,15); + std::uniform_real_distribution<float> dismin(0.0, 4.5); + std::uniform_real_distribution<float> dismax(5.5, 10.0); + std::uniform_int_distribution<std::size_t> distDims(5, 15); std::uniform_int_distribution<std::size_t> distNbMatrix(1, 5); // Create MatMul Operator std::shared_ptr<Node> myClip = Aidge::Clip("nop"); - auto op = std::static_pointer_cast<OperatorTensor>(myClip -> getOperator()); + auto op = std::static_pointer_cast<OperatorTensor>(myClip->getOperator()); // To measure execution time of 'MatMul_Op::forward()' member function call std::chrono::time_point<std::chrono::system_clock> start; @@ -67,41 +68,44 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") // generate Tensors dimensions const std::size_t dim0 = distDims(gen); const std::size_t dim1 = distDims(gen); - totalComputation += dim0*dim1; + totalComputation += dim0 * dim1; // Create and populate the array with random float values - float* Array = new float[dim0*dim1]; - for (int i = 0; i < dim0*dim1; ++i) { + float *Array = new float[dim0 * dim1]; + for (int i = 0; i < dim0 * dim1; ++i) { Array[i] = dis(gen); // Generate random float value } // Convert Input to Tensor - std::shared_ptr<Tensor> TInput = std::make_shared<Tensor>(DataType::Float32); - TInput -> resize({dim0,dim1}); - TInput -> setBackend("cpu"); - TInput -> getImpl() -> setRawPtr(Array, dim0*dim1); - + std::shared_ptr<Tensor> TInput = + std::make_shared<Tensor>(DataType::Float32); + TInput->resize({dim0, dim1}); + TInput->setBackend("cpu"); + TInput->getImpl()->setRawPtr(Array, dim0 * dim1); + float min = dismin(gen); - std::shared_ptr<Tensor> Tmin = std::make_shared<Tensor>(DataType::Float32); - Tmin -> resize({}); - Tmin -> setBackend("cpu"); - Tmin -> getImpl() -> setRawPtr(&min,1); + std::shared_ptr<Tensor> Tmin = + std::make_shared<Tensor>(DataType::Float32); + Tmin->resize({}); + Tmin->setBackend("cpu"); + Tmin->getImpl()->setRawPtr(&min, 1); float max = dismax(gen); - std::shared_ptr<Tensor> Tmax = std::make_shared<Tensor>(DataType::Float32); - Tmax -> resize({}); - Tmax -> setBackend("cpu"); - Tmax -> getImpl() -> setRawPtr(&max,1); + std::shared_ptr<Tensor> Tmax = + std::make_shared<Tensor>(DataType::Float32); + Tmax->resize({}); + Tmax->setBackend("cpu"); + Tmax->getImpl()->setRawPtr(&max, 1); // convert res to Tensordf - std::vector<float> GT(Array, Array + (dim0*dim1)); - for (float& val : GT) - { + std::vector<float> GT(Array, Array + (dim0 * dim1)); + for (float &val : GT) { val = std::max(min, std::min(val, max)); } - std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32); - Tres -> resize({dim0,dim1}); - Tres -> setBackend("cpu"); - Tres -> getImpl() -> setRawPtr(GT.data(), dim0*dim1); + std::shared_ptr<Tensor> Tres = + std::make_shared<Tensor>(DataType::Float32); + Tres->resize({dim0, dim1}); + Tres->setBackend("cpu"); + Tres->getImpl()->setRawPtr(GT.data(), dim0 * dim1); op->associateInput(0, TInput); op->associateInput(1, Tmin); @@ -109,59 +113,65 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") op->setDataType(DataType::Float32); op->setBackend("cpu"); op->forwardDims(true); - + start = std::chrono::system_clock::now(); myClip->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += std::chrono::duration_cast<std::chrono::microseconds>( + end - start); REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); } - std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; + std::cout << "multiplications over time spent: " + << totalComputation / duration.count() << std::endl; std::cout << "total time: " << duration.count() << std::endl; - } + } SECTION("Clip test with min >= max [Forward]") { std::size_t totalComputation = 0; for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { // generate Tensors dimensions const std::size_t dim0 = distDims(gen); const std::size_t dim1 = distDims(gen); - totalComputation += dim0*dim1; + totalComputation += dim0 * dim1; // Create and populate the array with random float values - float* Array = new float[dim0*dim1]; - for (int i = 0; i < dim0*dim1; ++i) { + float *Array = new float[dim0 * dim1]; + for (int i = 0; i < dim0 * dim1; ++i) { Array[i] = dis(gen); // Generate random float value } // Convert Input to Tensor - std::shared_ptr<Tensor> TInput = std::make_shared<Tensor>(DataType::Float32); - TInput -> resize({dim0,dim1}); - TInput -> setBackend("cpu"); - TInput -> getImpl() -> setRawPtr(Array, dim0*dim1); - + std::shared_ptr<Tensor> TInput = + std::make_shared<Tensor>(DataType::Float32); + TInput->resize({dim0, dim1}); + TInput->setBackend("cpu"); + TInput->getImpl()->setRawPtr(Array, dim0 * dim1); + float min = dismax(gen); - std::shared_ptr<Tensor> Tmin = std::make_shared<Tensor>(DataType::Float32); - Tmin -> resize({}); - Tmin -> setBackend("cpu"); - Tmin -> getImpl() -> setRawPtr(&min,1); - - float max = dismin(gen); //We generate max and min so that max is always <= min - std::shared_ptr<Tensor> Tmax = std::make_shared<Tensor>(DataType::Float32); - Tmax -> resize({}); - Tmax -> setBackend("cpu"); - Tmax -> getImpl() -> setRawPtr(&max,1); + std::shared_ptr<Tensor> Tmin = + std::make_shared<Tensor>(DataType::Float32); + Tmin->resize({}); + Tmin->setBackend("cpu"); + Tmin->getImpl()->setRawPtr(&min, 1); + + float max = dismin( + gen); // We generate max and min so that max is always <= min + std::shared_ptr<Tensor> Tmax = + std::make_shared<Tensor>(DataType::Float32); + Tmax->resize({}); + Tmax->setBackend("cpu"); + Tmax->getImpl()->setRawPtr(&max, 1); // convert res to Tensor - std::vector<float> GT(Array, Array + (dim0*dim1)); - for (float& val : GT) - { + std::vector<float> GT(Array, Array + (dim0 * dim1)); + for (float &val : GT) { val = max; } - std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32); - Tres -> resize({dim0,dim1}); - Tres -> setBackend("cpu"); - Tres -> getImpl() -> setRawPtr(GT.data(), dim0*dim1); + std::shared_ptr<Tensor> Tres = + std::make_shared<Tensor>(DataType::Float32); + Tres->resize({dim0, dim1}); + Tres->setBackend("cpu"); + Tres->getImpl()->setRawPtr(GT.data(), dim0 * dim1); op->associateInput(0, TInput); op->associateInput(1, Tmin); @@ -169,56 +179,57 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") op->setDataType(DataType::Float32); op->setBackend("cpu"); op->forwardDims(true); - + start = std::chrono::system_clock::now(); myClip->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += std::chrono::duration_cast<std::chrono::microseconds>( + end - start); REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); } - std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; + std::cout << "multiplications over time spent: " + << totalComputation / duration.count() << std::endl; std::cout << "total time: " << duration.count() << std::endl; - } - SECTION("Clip with Clip Attr [Forward]") - { + } + SECTION("Clip with Clip Attr [Forward]") { std::size_t totalComputation = 0; - for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) - { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { float min = dismin(gen); float max = dismax(gen); - std::shared_ptr<Node> myCl = Aidge::Clip("",min,max); - auto op = std::static_pointer_cast<OperatorTensor>(myCl -> getOperator()); - + std::shared_ptr<Node> myCl = Aidge::Clip("", min, max); + auto op = + std::static_pointer_cast<OperatorTensor>(myCl->getOperator()); // generate Tensors dimensions const std::size_t dim0 = 3; const std::size_t dim1 = 3; - totalComputation += dim0*dim1; + totalComputation += dim0 * dim1; // Create and populate the array with random float values - float* Array = new float[dim0*dim1]; - for (int i = 0; i < dim0*dim1; ++i) { + float *Array = new float[dim0 * dim1]; + for (int i = 0; i < dim0 * dim1; ++i) { Array[i] = dis(gen); // Generate random float value } // Convert Input to Tensor - std::shared_ptr<Tensor> TInput = std::make_shared<Tensor>(DataType::Float32); - TInput -> resize({dim0,dim1}); - TInput -> setBackend("cpu"); - TInput -> getImpl() -> setRawPtr(Array, dim0*dim1); + std::shared_ptr<Tensor> TInput = + std::make_shared<Tensor>(DataType::Float32); + TInput->resize({dim0, dim1}); + TInput->setBackend("cpu"); + TInput->getImpl()->setRawPtr(Array, dim0 * dim1); // convert res to Tensordf - std::vector<float> GT(Array, Array + (dim0*dim1)); - for (float& val : GT) - { + std::vector<float> GT(Array, Array + (dim0 * dim1)); + for (float &val : GT) { val = std::max(min, std::min(val, max)); } - std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32); - Tres -> resize({dim0,dim1}); - Tres -> setBackend("cpu"); - Tres -> getImpl() -> setRawPtr(GT.data(), dim0*dim1); + std::shared_ptr<Tensor> Tres = + std::make_shared<Tensor>(DataType::Float32); + Tres->resize({dim0, dim1}); + Tres->setBackend("cpu"); + Tres->getImpl()->setRawPtr(GT.data(), dim0 * dim1); op->associateInput(0, TInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); @@ -227,11 +238,13 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") myCl->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += std::chrono::duration_cast<std::chrono::microseconds>( + end - start); REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); } - std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; + std::cout << "multiplications over time spent: " + << totalComputation / duration.count() << std::endl; std::cout << "total time: " << duration.count() << std::endl; } SECTION("Simple clip test [Backward]") { @@ -239,80 +252,90 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") duration = std::chrono::duration<double, std::micro>::zero(); for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { std::size_t totalComputation = 0; - for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { - // generate Tensors dimensions - const std::size_t dim0 = distDims(gen); - const std::size_t dim1 = distDims(gen); - - totalComputation += dim0*dim1; - - // Create and populate the array with random float values - float* Array = new float[dim0*dim1]; - float* gradArray = new float[dim0*dim1]; - for (int i = 0; i < dim0*dim1; ++i) { - Array[i] = dis(gen); // Generate random float value - gradArray[i] = dis(gen); + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + // generate Tensors dimensions + const std::size_t dim0 = distDims(gen); + const std::size_t dim1 = distDims(gen); + + totalComputation += dim0 * dim1; + + // Create and populate the array with random float values + float *Array = new float[dim0 * dim1]; + float *gradArray = new float[dim0 * dim1]; + for (int i = 0; i < dim0 * dim1; ++i) { + Array[i] = dis(gen); // Generate random float value + gradArray[i] = dis(gen); + } + + std::shared_ptr<Tensor> TGrad = + std::make_shared<Tensor>(DataType::Float32); + TGrad->resize({dim0, dim1}); + TGrad->setBackend("cpu"); + TGrad->getImpl()->setRawPtr(gradArray, dim0 * dim1); + + // Convert Input to Tensor + std::shared_ptr<Tensor> TInput = + std::make_shared<Tensor>(DataType::Float32); + TInput->resize({dim0, dim1}); + TInput->setBackend("cpu"); + TInput->getImpl()->setRawPtr(Array, dim0 * dim1); + + float min = dismin(gen); + std::shared_ptr<Tensor> Tmin = + std::make_shared<Tensor>(DataType::Float32); + Tmin->resize({}); + Tmin->setBackend("cpu"); + Tmin->getImpl()->setRawPtr(&min, 1); + + float max = dismax(gen); + std::shared_ptr<Tensor> Tmax = + std::make_shared<Tensor>(DataType::Float32); + Tmax->resize({}); + Tmax->setBackend("cpu"); + Tmax->getImpl()->setRawPtr(&max, 1); + // convert res to Tensor + std::vector<float> GT(Array, Array + (dim0 * dim1)); + for (float &val : GT) { + val = std::max(min, std::min(val, max)); // Clip operation + } + std::shared_ptr<Tensor> Tres = + std::make_shared<Tensor>(DataType::Float32); + Tres->resize({dim0, dim1}); + Tres->setBackend("cpu"); + Tres->getImpl()->setRawPtr(GT.data(), dim0 * dim1); + + op->associateInput(0, TInput); + op->associateInput(1, Tmin); + op->associateInput(2, Tmax); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + op->forwardDims(true); + myClip->forward(); + + op->getOutput(0)->setGrad(TGrad); + + start = std::chrono::system_clock::now(); + REQUIRE_NOTHROW(myClip->backward()); + end = std::chrono::system_clock::now(); + + auto GradTensor = op->getInput(0)->grad(); + float *BackwardTensor = + (float *)GradTensor->getImpl()->rawPtr(); + std::vector<float> GT0(Array, Array + (dim0 * dim1)); + std::vector<float> GT1(gradArray, gradArray + (dim0 * dim1)); + std::vector<float> BackwardTensorVec( + BackwardTensor, + BackwardTensor + (dim0 * dim1)); + ComputeClipBackward(GT0, GT1, min, max); + duration += + std::chrono::duration_cast<std::chrono::microseconds>( + end - start); + REQUIRE(GT1 == BackwardTensorVec); } - - std::shared_ptr<Tensor> TGrad = std::make_shared<Tensor>(DataType::Float32); - TGrad -> resize({dim0,dim1}); - TGrad -> setBackend("cpu"); - TGrad -> getImpl() -> setRawPtr(gradArray, dim0*dim1); - - // Convert Input to Tensor - std::shared_ptr<Tensor> TInput = std::make_shared<Tensor>(DataType::Float32); - TInput -> resize({dim0,dim1}); - TInput -> setBackend("cpu"); - TInput -> getImpl() -> setRawPtr(Array, dim0*dim1); - - float min = dismin(gen); - std::shared_ptr<Tensor> Tmin = std::make_shared<Tensor>(DataType::Float32); - Tmin -> resize({}); - Tmin -> setBackend("cpu"); - Tmin -> getImpl() -> setRawPtr(&min,1); - - float max = dismax(gen); - std::shared_ptr<Tensor> Tmax = std::make_shared<Tensor>(DataType::Float32); - Tmax -> resize({}); - Tmax -> setBackend("cpu"); - Tmax -> getImpl() -> setRawPtr(&max,1); - // convert res to Tensor - std::vector<float> GT(Array, Array + (dim0*dim1)); - for (float& val : GT) - { - val = std::max(min, std::min(val, max));//Clip operation - } - std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32); - Tres -> resize({dim0,dim1}); - Tres -> setBackend("cpu"); - Tres -> getImpl() -> setRawPtr(GT.data(), dim0*dim1); - - op->associateInput(0, TInput); - op->associateInput(1, Tmin); - op->associateInput(2, Tmax); - op->setDataType(DataType::Float32); - op->setBackend("cpu"); - op->forwardDims(true); - myClip->forward(); - - op->getOutput(0)->setGrad(TGrad); - - start = std::chrono::system_clock::now(); - REQUIRE_NOTHROW(myClip->backward()); - end = std::chrono::system_clock::now(); - - auto GradTensor = op->getInput(0)->grad(); - float* BackwardTensor = (float*)GradTensor->getImpl()->rawPtr(); - std::vector<float> GT0(Array,Array+(dim0*dim1)); - std::vector<float> GT1(gradArray,gradArray+(dim0*dim1)); - std::vector<float> BackwardTensorVec(BackwardTensor,BackwardTensor+(dim0*dim1)); - ComputeClipBackward(GT0,GT1,min,max); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); - REQUIRE(GT1 == BackwardTensorVec); + std::cout << "multiplications over time spent: " + << totalComputation / duration.count() << std::endl; + std::cout << "total time: " << duration.count() << std::endl; } - std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; - std::cout << "total time: " << duration.count() << std::endl; } - } -} // namespace Aidge -} \ No newline at end of file +} // namespace Aidge +} // namespace Aidge \ No newline at end of file diff --git a/unit_tests/operator/Test_ConstantOfShapeImpl.cpp b/unit_tests/operator/Test_ConstantOfShapeImpl.cpp index 42505d385fde7e72e09531f1607287ffc6978f75..5c3a6a0d7e054dd4e6d694e6f2554c05475986a5 100644 --- a/unit_tests/operator/Test_ConstantOfShapeImpl.cpp +++ b/unit_tests/operator/Test_ConstantOfShapeImpl.cpp @@ -38,83 +38,90 @@ namespace Aidge { TEST_CASE("[cpu/operator] ConstantOfShape", "[ConstantOfShape][CPU]") { - constexpr std::uint16_t NBTRIALS = 10; - // Create a random number generator - auto random_seed = Catch::Generators::Detail::getSeed; - std::mt19937 gen(random_seed()); - std::uniform_real_distribution<float> valueDist( - 0.1f, 1.1f); // Random float distribution between 0 and 1 - std::uniform_int_distribution<DimSize_t> input_tensor_size_dist( - std::size_t(1), std::size_t(10)); - std::uniform_int_distribution<int64_t> input_tensor_values_dist( - std::size_t(1), std::size_t(7)); - std::uniform_real_distribution<double> operator_attr_value_dist(-100., 100.); + constexpr std::uint16_t NBTRIALS = 10; + // Create a random number generator + auto random_seed = Catch::Generators::Detail::getSeed; + std::mt19937 gen(random_seed()); + std::uniform_real_distribution<float> valueDist( + 0.1f, + 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<DimSize_t> input_tensor_size_dist( + std::size_t(1), + std::size_t(10)); + std::uniform_int_distribution<int64_t> input_tensor_values_dist( + std::size_t(1), + std::size_t(7)); + std::uniform_real_distribution<double> operator_attr_value_dist(-100., + 100.); - /////////////////////////////////////////////// - // SETUP FUNCTIONS - auto generate_input_tensor = - [&gen, &input_tensor_size_dist, - &input_tensor_values_dist]() -> std::shared_ptr<Tensor> { - std::vector<DimSize_t> input_dims; - input_dims.push_back(input_tensor_size_dist(gen)); + /////////////////////////////////////////////// + // SETUP FUNCTIONS + auto generate_input_tensor = + [&gen, + &input_tensor_size_dist, + &input_tensor_values_dist]() -> std::shared_ptr<Tensor> { + std::vector<DimSize_t> input_dims; + input_dims.push_back(input_tensor_size_dist(gen)); - auto result = std::make_shared<Tensor>(input_dims); - result->setDataType(DataType::Int64); - result->setBackend("cpu"); - for (DimSize_t i = 0; i < result->size(); ++i) { - result->set<int64_t>(i, input_tensor_values_dist(gen)); - } - return result; - }; + auto result = std::make_shared<Tensor>(input_dims); + result->setDataType(DataType::Int64); + result->setBackend("cpu"); + for (DimSize_t i = 0; i < result->size(); ++i) { + result->set<int64_t>(i, input_tensor_values_dist(gen)); + } + return result; + }; - auto generate_random_operator = - [&gen, - &operator_attr_value_dist]() -> std::shared_ptr<ConstantOfShape_Op> { - auto node = ConstantOfShape(Tensor(operator_attr_value_dist(gen))); - auto op = std::static_pointer_cast<ConstantOfShape_Op>(node->getOperator()); - op->setDataType(DataType::Float64); - op->setBackend("cpu"); - return op; - }; + auto generate_random_operator = + [&gen, + &operator_attr_value_dist]() -> std::shared_ptr<ConstantOfShape_Op> { + auto node = ConstantOfShape(Tensor(operator_attr_value_dist(gen))); + auto op = + std::static_pointer_cast<ConstantOfShape_Op>(node->getOperator()); + op->setDataType(DataType::Float64); + op->setBackend("cpu"); + return op; + }; - auto generate_output_tensor = [](std::shared_ptr<Tensor> input_tensor, - std::shared_ptr<ConstantOfShape_Op> op) { - std::vector<DimSize_t> output_dims; - output_dims.reserve(input_tensor->size()); - for (DimSize_t i = 0; i < input_tensor->size(); ++i) { - output_dims.push_back(input_tensor->get<int64_t>(i)); - } - auto result = std::make_shared<Tensor>(output_dims); - result->setDataType(op->value().dataType()); - result->setBackend("cpu"); - constantFiller(result, op->value().get<double>(0)); - return result; - }; + auto generate_output_tensor = [](std::shared_ptr<Tensor> input_tensor, + std::shared_ptr<ConstantOfShape_Op> op) { + std::vector<DimSize_t> output_dims; + output_dims.reserve(input_tensor->size()); + for (DimSize_t i = 0; i < input_tensor->size(); ++i) { + output_dims.push_back(input_tensor->get<int64_t>(i)); + } + auto result = std::make_shared<Tensor>(output_dims); + result->setDataType(op->value().dataType()); + result->setBackend("cpu"); + constantFiller(result, op->value().get<double>(0)); + return result; + }; - ///////////////////////////////////// - // BENCHMARKING - std::chrono::time_point<std::chrono::system_clock> start; - std::chrono::time_point<std::chrono::system_clock> end; - std::chrono::duration<double, std::micro> duration{}; - int number_of_operation{0}; + ///////////////////////////////////// + // BENCHMARKING + std::chrono::time_point<std::chrono::system_clock> start; + std::chrono::time_point<std::chrono::system_clock> end; + std::chrono::duration<double, std::micro> duration{}; + int number_of_operation{0}; - SECTION("ConstantOfShapeImpl_cpu::forward()") { - for (int i = 0; i < NBTRIALS; ++i) { - auto input_T = generate_input_tensor(); - std::shared_ptr<ConstantOfShape_Op> op = generate_random_operator(); - auto output_T = generate_output_tensor(input_T, op); - op->associateInput(0, input_T); + SECTION("ConstantOfShapeImpl_cpu::forward()") { + for (int i = 0; i < NBTRIALS; ++i) { + auto input_T = generate_input_tensor(); + std::shared_ptr<ConstantOfShape_Op> op = + generate_random_operator(); + auto output_T = generate_output_tensor(input_T, op); + op->associateInput(0, input_T); - REQUIRE(op->forwardDims(true)); - REQUIRE_NOTHROW(op->forward()); + REQUIRE(op->forwardDims(true)); + REQUIRE_NOTHROW(op->forward()); - CHECK(output_T->nbDims() == op->getOutput(0)->nbDims()); - for (DimIdx_t i = 0; i < output_T->nbDims(); ++i) { - CHECK(output_T->dims().at(i) == op->getOutput(0)->dims().at(i)); - } - CHECK(approxEq<double>(*output_T, *op->getOutput(0))); + CHECK(output_T->nbDims() == op->getOutput(0)->nbDims()); + for (DimIdx_t i = 0; i < output_T->nbDims(); ++i) { + CHECK(output_T->dims().at(i) == + op->getOutput(0)->dims().at(i)); + } + CHECK(approxEq<double>(*output_T, *op->getOutput(0))); + } } - } } } // namespace Aidge - diff --git a/unit_tests/operator/Test_ConvDepthWiseImpl.cpp b/unit_tests/operator/Test_ConvDepthWiseImpl.cpp index f1594ef5a21070803a7b86861eac513708ec03a2..8750555bc2b64cae2740191195e3442b9ffe43e7 100644 --- a/unit_tests/operator/Test_ConvDepthWiseImpl.cpp +++ b/unit_tests/operator/Test_ConvDepthWiseImpl.cpp @@ -23,194 +23,174 @@ using namespace Aidge; TEST_CASE("[cpu/operator] ConvDepthWise(forward)", "[ConvDepthWise][CPU]") { SECTION("k[3,3]") { - std::shared_ptr<Node> myCDW = ConvDepthWise(4, {3,3}, "mycdw"); - auto op = std::static_pointer_cast<OperatorTensor>(myCDW -> getOperator()); - std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,1,3,3> { - { - {{ - { 0, 1, 2}, - { 3, 4, 5}, - { 6, 7, 8} - - }}, - {{ - { 27, 28, 29}, - { 30, 31, 32}, - { 33, 34, 35} - - }}, - {{ - { 54, 55, 56}, - { 57, 58, 59}, - { 60, 61, 62} - }}, - {{ - { 81, 82, 83}, - { 84, 85, 86}, - { 87, 88, 89} - }} - } - }); - std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}}); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,4,5,5> { //NCHW - { - { - {{ 0, 1, 2, 3, 4}, - { 5, 6, 7, 8, 9}, - { 10, 11, 12, 13, 14}, - { 15, 16, 17, 18, 19}, - { 20, 21, 22, 23, 24}}, - - {{ 25, 26, 27, 28, 29}, - { 30, 31, 32, 33, 34}, - { 35, 36, 37, 38, 39}, - { 40, 41, 42, 43, 44}, - { 45, 46, 47, 48, 49}}, - - {{ 50, 51, 52, 53, 54}, - { 55, 56, 57, 58, 59}, - { 60, 61, 62, 63, 64}, - { 65, 66, 67, 68, 69}, - { 70, 71, 72, 73, 74}}, - - {{ 75, 76, 77, 78, 79}, - { 80, 81, 82, 83, 84}, - { 85, 86, 87, 88, 89}, - { 90, 91, 92, 93, 94}, - { 95, 96, 97, 98, 99}} - }, - { - {{100, 101, 102, 103, 104}, - {105, 106, 107, 108, 109}, - {110, 111, 112, 113, 114}, - {115, 116, 117, 118, 119}, - {120, 121, 122, 123, 124}}, - - {{125, 126, 127, 128, 129}, - {130, 131, 132, 133, 134}, - {135, 136, 137, 138, 139}, - {140, 141, 142, 143, 144}, - {145, 146, 147, 148, 149}}, - - {{150, 151, 152, 153, 154}, - {155, 156, 157, 158, 159}, - {160, 161, 162, 163, 164}, - {165, 166, 167, 168, 169}, - {170, 171, 172, 173, 174}}, - - {{175, 176, 177, 178, 179}, - {180, 181, 182, 183, 184}, - {185, 186, 187, 188, 189}, - {190, 191, 192, 193, 194}, - {195, 196, 197, 198, 199}} - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> { - { - { - {{ 319, 355, 391}, - { 499, 535, 571}, - { 679, 715, 751}}, - - {{ 8745, 9024, 9303}, - { 10140, 10419, 10698}, - { 11535, 11814, 12093}}, - - {{ 29337, 29859, 30381}, - { 31947, 32469, 32991}, - { 34557, 35079, 35601}}, - - {{ 62061, 62826, 63591}, - { 65886, 66651, 67416}, - { 69711, 70476, 71241}} - }, - { - {{ 3919, 3955, 3991}, - { 4099, 4135, 4171}, - { 4279, 4315, 4351}}, - - {{ 36645, 36924, 37203}, - { 38040, 38319, 38598}, - { 39435, 39714, 39993}}, - - {{ 81537, 82059, 82581}, - { 84147, 84669, 85191}, - { 86757, 87279, 87801}}, - - {{138561, 139326, 140091}, - {142386, 143151, 143916}, - {146211, 146976, 147741}} - } - } - }); - op -> associateInput(0, myInput); - op -> associateInput(1, myWeights); - op -> associateInput(2, myBias); + std::shared_ptr<Node> myCDW = ConvDepthWise(4, {3, 3}, "mycdw"); + auto op = + std::static_pointer_cast<OperatorTensor>(myCDW->getOperator()); + std::shared_ptr<Tensor> myWeights = + std::make_shared<Tensor>(Array4D<int, 4, 1, 3, 3>{ + {{{{0, 1, 2}, {3, 4, 5}, {6, 7, 8} + + }}, + {{{27, 28, 29}, {30, 31, 32}, {33, 34, 35} + + }}, + {{{54, 55, 56}, {57, 58, 59}, {60, 61, 62}}}, + {{{81, 82, 83}, {84, 85, 86}, {87, 88, 89}}}}}); + std::shared_ptr<Tensor> myBias = + std::make_shared<Tensor>(Array1D<int, 4>{{7, 0, 9, 0}}); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array4D<int, 2, 4, 5, 5>{// NCHW + {{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}, + + {{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}, + + {{50, 51, 52, 53, 54}, + {55, 56, 57, 58, 59}, + {60, 61, 62, 63, 64}, + {65, 66, 67, 68, 69}, + {70, 71, 72, 73, 74}}, + + {{75, 76, 77, 78, 79}, + {80, 81, 82, 83, 84}, + {85, 86, 87, 88, 89}, + {90, 91, 92, 93, 94}, + {95, 96, 97, 98, 99}}}, + {{{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}}, + + {{150, 151, 152, 153, 154}, + {155, 156, 157, 158, 159}, + {160, 161, 162, 163, 164}, + {165, 166, 167, 168, 169}, + {170, 171, 172, 173, 174}}, + + {{175, 176, 177, 178, 179}, + {180, 181, 182, 183, 184}, + {185, 186, 187, 188, 189}, + {190, 191, 192, 193, 194}, + {195, 196, 197, 198, 199}}}}}); + std::shared_ptr<Tensor> myOutput = + std::make_shared<Tensor>(Array4D<int, 2, 4, 3, 3>{ + {{{{319, 355, 391}, {499, 535, 571}, {679, 715, 751}}, + + {{8745, 9024, 9303}, + {10140, 10419, 10698}, + {11535, 11814, 12093}}, + + {{29337, 29859, 30381}, + {31947, 32469, 32991}, + {34557, 35079, 35601}}, + + {{62061, 62826, 63591}, + {65886, 66651, 67416}, + {69711, 70476, 71241}}}, + {{{3919, 3955, 3991}, {4099, 4135, 4171}, {4279, 4315, 4351}}, + + {{36645, 36924, 37203}, + {38040, 38319, 38598}, + {39435, 39714, 39993}}, + + {{81537, 82059, 82581}, + {84147, 84669, 85191}, + {86757, 87279, 87801}}, + + {{138561, 139326, 140091}, + {142386, 143151, 143916}, + {146211, 146976, 147741}}}}}); + op->associateInput(0, myInput); + op->associateInput(1, myWeights); + op->associateInput(2, myBias); op->setDataType(DataType::Int32); op->setBackend("cpu"); - myCDW -> forward(); - op -> getOutput(0) -> print(); - REQUIRE(*(op -> getOutput(0)) == *myOutput); + myCDW->forward(); + op->getOutput(0)->print(); + REQUIRE(*(op->getOutput(0)) == *myOutput); } SECTION("point-wise") { - ConvDepthWise_Op<2> conv_op = ConvDepthWise_Op<2>({1,1}); - std::shared_ptr<Tensor> weights = std::make_shared<Tensor>(std::vector<std::size_t>({3,1,1,1})); - weights -> setBackend("cpu"); - std::shared_ptr<Tensor> biases = std::make_shared<Tensor>(std::vector<std::size_t>({3})); - biases -> setBackend("cpu"); - std::shared_ptr<Tensor> input = std::make_shared<Tensor>(std::vector<std::size_t>({2,3,5,5})); - input -> setBackend("cpu"); - std::shared_ptr<Tensor> expected_output = std::make_shared<Tensor>(std::vector<std::size_t>({2,3,5,5})); - expected_output -> setBackend("cpu"); - - float weighst_array[3] {-0.0045, -0.4223, -0.9452}; + ConvDepthWise_Op<2> conv_op = ConvDepthWise_Op<2>({1, 1}); + std::shared_ptr<Tensor> weights = + std::make_shared<Tensor>(std::vector<std::size_t>({3, 1, 1, 1})); + weights->setBackend("cpu"); + std::shared_ptr<Tensor> biases = + std::make_shared<Tensor>(std::vector<std::size_t>({3})); + biases->setBackend("cpu"); + std::shared_ptr<Tensor> input = + std::make_shared<Tensor>(std::vector<std::size_t>({2, 3, 5, 5})); + input->setBackend("cpu"); + std::shared_ptr<Tensor> expected_output = + std::make_shared<Tensor>(std::vector<std::size_t>({2, 3, 5, 5})); + expected_output->setBackend("cpu"); + + float weighst_array[3]{-0.0045, -0.4223, -0.9452}; weights->getImpl()->setRawPtr(weighst_array, 3); - float biases_array[3] {-0.8595, 0.7062, -0.0062}; + float biases_array[3]{-0.8595, 0.7062, -0.0062}; biases->getImpl()->setRawPtr(biases_array, 3); - float input_array[2*3*5*5] { - 0.6581, 0.2509, 0.2660, 0.8270, 0.8040, 0.3147, 0.5028, 0.2591, 0.8585, - 0.7762, 0.9972, 0.0305, 0.1202, 0.2682, 0.9306, 0.7927, 0.1494, 0.0678, - 0.5550, 0.4132, 0.4742, 0.6199, 0.1802, 0.6350, 0.2539, 0.5594, 0.0143, - 0.8656, 0.7105, 0.1420, 0.2464, 0.7883, 0.5715, 0.7642, 0.5492, 0.6628, - 0.4922, 0.7941, 0.8421, 0.7914, 0.0237, 0.8081, 0.0174, 0.6018, 0.7402, - 0.3770, 0.8786, 0.3651, 0.5355, 0.4267, 0.4457, 0.6756, 0.9631, 0.0145, - 0.4470, 0.5202, 0.2675, 0.5815, 0.3487, 0.3457, 0.7179, 0.0518, 0.1520, - 0.0573, 0.9219, 0.3615, 0.0866, 0.5237, 0.4725, 0.2565, 0.8726, 0.6434, - 0.6875, 0.2919, 0.3355, 0.1886, 0.1749, 0.0785, 0.4091, 0.1907, 0.4664, - 0.2738, 0.4784, 0.7807, 0.0687, 0.3091, 0.4557, 0.2277, 0.2424, 0.8691, - 0.1893, 0.2918, 0.5691, 0.1926, 0.2866, 0.0097, 0.5445, 0.5085, 0.1110, - 0.7099, 0.8927, 0.6182, 0.2538, 0.8694, 0.7872, 0.3196, 0.0710, 0.2888, - 0.0403, 0.1670, 0.6840, 0.7323, 0.4861, 0.3390, 0.1096, 0.5070, 0.3872, - 0.7473, 0.6224, 0.6910, 0.7530, 0.0149, 0.0866, 0.9022, 0.5027, 0.3849, - 0.5255, 0.1977, 0.0570, 0.9581, 0.5461, 0.4623, 0.0101, 0.2362, 0.5922, - 0.8398, 0.1497, 0.5160, 0.2862, 0.5931, 0.9728, 0.1353, 0.7790, 0.9137, + float input_array[2 * 3 * 5 * 5]{ + 0.6581, 0.2509, 0.2660, 0.8270, 0.8040, 0.3147, 0.5028, 0.2591, + 0.8585, 0.7762, 0.9972, 0.0305, 0.1202, 0.2682, 0.9306, 0.7927, + 0.1494, 0.0678, 0.5550, 0.4132, 0.4742, 0.6199, 0.1802, 0.6350, + 0.2539, 0.5594, 0.0143, 0.8656, 0.7105, 0.1420, 0.2464, 0.7883, + 0.5715, 0.7642, 0.5492, 0.6628, 0.4922, 0.7941, 0.8421, 0.7914, + 0.0237, 0.8081, 0.0174, 0.6018, 0.7402, 0.3770, 0.8786, 0.3651, + 0.5355, 0.4267, 0.4457, 0.6756, 0.9631, 0.0145, 0.4470, 0.5202, + 0.2675, 0.5815, 0.3487, 0.3457, 0.7179, 0.0518, 0.1520, 0.0573, + 0.9219, 0.3615, 0.0866, 0.5237, 0.4725, 0.2565, 0.8726, 0.6434, + 0.6875, 0.2919, 0.3355, 0.1886, 0.1749, 0.0785, 0.4091, 0.1907, + 0.4664, 0.2738, 0.4784, 0.7807, 0.0687, 0.3091, 0.4557, 0.2277, + 0.2424, 0.8691, 0.1893, 0.2918, 0.5691, 0.1926, 0.2866, 0.0097, + 0.5445, 0.5085, 0.1110, 0.7099, 0.8927, 0.6182, 0.2538, 0.8694, + 0.7872, 0.3196, 0.0710, 0.2888, 0.0403, 0.1670, 0.6840, 0.7323, + 0.4861, 0.3390, 0.1096, 0.5070, 0.3872, 0.7473, 0.6224, 0.6910, + 0.7530, 0.0149, 0.0866, 0.9022, 0.5027, 0.3849, 0.5255, 0.1977, + 0.0570, 0.9581, 0.5461, 0.4623, 0.0101, 0.2362, 0.5922, 0.8398, + 0.1497, 0.5160, 0.2862, 0.5931, 0.9728, 0.1353, 0.7790, 0.9137, 0.9351, 0.4036, 0.7638, 0.3873, 0.0494, 0.7450}; - input->getImpl()->setRawPtr(input_array, 2*3*5*5); - - float expected_output_array[2*3*5*5] { - -0.8624, -0.8606, -0.8607, -0.8632, -0.8631, -0.8609, -0.8617, -0.8606, - -0.8633, -0.8629, -0.8639, -0.8596, -0.8600, -0.8607, -0.8636, -0.8630, - -0.8601, -0.8598, -0.8620, -0.8613, -0.8616, -0.8622, -0.8603, -0.8623, - -0.8606, 0.4700, 0.7002, 0.3407, 0.4062, 0.6463, 0.6022, 0.3733, - 0.4649, 0.3835, 0.4743, 0.4263, 0.4984, 0.3709, 0.3506, 0.3720, - 0.6962, 0.3650, 0.6989, 0.4521, 0.3936, 0.5470, 0.3352, 0.5520, - 0.4801, 0.5260, -0.4274, -0.6447, -0.9165, -0.0199, -0.4287, -0.4979, - -0.2590, -0.5559, -0.3358, -0.3329, -0.6847, -0.0552, -0.1499, -0.0603, - -0.8776, -0.3479, -0.0881, -0.5011, -0.4528, -0.2486, -0.8309, -0.6143, - -0.6561, -0.2821, -0.3233, -0.8603, -0.8603, -0.8598, -0.8613, -0.8603, - -0.8616, -0.8607, -0.8616, -0.8630, -0.8598, -0.8609, -0.8615, -0.8605, - -0.8606, -0.8634, -0.8603, -0.8608, -0.8620, -0.8603, -0.8608, -0.8595, - -0.8619, -0.8617, -0.8600, -0.8626, 0.3292, 0.4451, 0.5991, 0.3390, - 0.3738, 0.5712, 0.6762, 0.5843, 0.6892, 0.6357, 0.4174, 0.3969, - 0.5009, 0.5631, 0.6599, 0.4921, 0.5427, 0.3906, 0.4434, 0.4144, - 0.3882, 0.6999, 0.6697, 0.3252, 0.4939, -0.3700, -0.5029, -0.1931, - -0.0601, -0.9118, -0.5224, -0.4432, -0.0157, -0.2294, -0.5660, -0.7999, - -0.1477, -0.4939, -0.2767, -0.5668, -0.9257, -0.1341, -0.7425, -0.8698, - -0.8900, -0.3877, -0.7282, -0.3722, -0.0529, -0.7103}; - expected_output->getImpl()->setRawPtr(expected_output_array, 2*3*5*5); + input->getImpl()->setRawPtr(input_array, 2 * 3 * 5 * 5); + + float expected_output_array[2 * 3 * 5 * 5]{ + -0.8624, -0.8606, -0.8607, -0.8632, -0.8631, -0.8609, -0.8617, + -0.8606, -0.8633, -0.8629, -0.8639, -0.8596, -0.8600, -0.8607, + -0.8636, -0.8630, -0.8601, -0.8598, -0.8620, -0.8613, -0.8616, + -0.8622, -0.8603, -0.8623, -0.8606, 0.4700, 0.7002, 0.3407, + 0.4062, 0.6463, 0.6022, 0.3733, 0.4649, 0.3835, 0.4743, + 0.4263, 0.4984, 0.3709, 0.3506, 0.3720, 0.6962, 0.3650, + 0.6989, 0.4521, 0.3936, 0.5470, 0.3352, 0.5520, 0.4801, + 0.5260, -0.4274, -0.6447, -0.9165, -0.0199, -0.4287, -0.4979, + -0.2590, -0.5559, -0.3358, -0.3329, -0.6847, -0.0552, -0.1499, + -0.0603, -0.8776, -0.3479, -0.0881, -0.5011, -0.4528, -0.2486, + -0.8309, -0.6143, -0.6561, -0.2821, -0.3233, -0.8603, -0.8603, + -0.8598, -0.8613, -0.8603, -0.8616, -0.8607, -0.8616, -0.8630, + -0.8598, -0.8609, -0.8615, -0.8605, -0.8606, -0.8634, -0.8603, + -0.8608, -0.8620, -0.8603, -0.8608, -0.8595, -0.8619, -0.8617, + -0.8600, -0.8626, 0.3292, 0.4451, 0.5991, 0.3390, 0.3738, + 0.5712, 0.6762, 0.5843, 0.6892, 0.6357, 0.4174, 0.3969, + 0.5009, 0.5631, 0.6599, 0.4921, 0.5427, 0.3906, 0.4434, + 0.4144, 0.3882, 0.6999, 0.6697, 0.3252, 0.4939, -0.3700, + -0.5029, -0.1931, -0.0601, -0.9118, -0.5224, -0.4432, -0.0157, + -0.2294, -0.5660, -0.7999, -0.1477, -0.4939, -0.2767, -0.5668, + -0.9257, -0.1341, -0.7425, -0.8698, -0.8900, -0.3877, -0.7282, + -0.3722, -0.0529, -0.7103}; + expected_output->getImpl()->setRawPtr(expected_output_array, + 2 * 3 * 5 * 5); conv_op.associateInput(0, input); conv_op.associateInput(1, weights); @@ -224,6 +204,9 @@ TEST_CASE("[cpu/operator] ConvDepthWise(forward)", "[ConvDepthWise][CPU]") { conv_op.getOutput(0)->print(); - REQUIRE(approxEq<float>(*(conv_op.getOutput(0)),*expected_output, 1e-3f, 1e-4f)); + REQUIRE(approxEq<float>(*(conv_op.getOutput(0)), + *expected_output, + 1e-3f, + 1e-4f)); } } \ No newline at end of file diff --git a/unit_tests/operator/Test_ConvImpl.cpp b/unit_tests/operator/Test_ConvImpl.cpp index e48d69c89eb0d6d52a834b3f32a41d8621fdd42b..477b3edc771b9fa038797d92a4452abb0317f8ef 100644 --- a/unit_tests/operator/Test_ConvImpl.cpp +++ b/unit_tests/operator/Test_ConvImpl.cpp @@ -23,134 +23,89 @@ using namespace Aidge; TEST_CASE("[cpu/operator] Conv(forward)", "[Conv][CPU]") { SECTION("Classic Conv") { - std::shared_ptr<Node> myConv = Conv(3,4,{3,3}, "myconv"); - auto op = std::static_pointer_cast<OperatorTensor>(myConv -> getOperator()); - std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> { - { - { - {{ 0, 1, 2}, - { 3, 4, 5}, - { 6, 7, 8}}, - {{ 9, 10, 11}, - { 12, 13, 14}, - { 15, 16, 17}}, - {{ 18, 19, 20}, - { 21, 22, 23}, - { 24, 25, 26}} - }, - { - {{ 27, 28, 29}, - { 30, 31, 32}, - { 33, 34, 35}}, - {{ 36, 37, 38}, - { 39, 40, 41}, - { 42, 43, 44}}, - {{ 45, 46, 47}, - { 48, 49, 50}, - { 51, 52, 53}} - }, - { - {{ 54, 55, 56}, - { 57, 58, 59}, - { 60, 61, 62}}, - {{ 63, 64, 65}, - { 66, 67, 68}, - { 69, 70, 71}}, - {{ 72, 73, 74}, - { 75, 76, 77}, - { 78, 79, 80}} - }, - { - {{ 81, 82, 83}, - { 84, 85, 86}, - { 87, 88, 89}}, - {{ 90, 91, 92}, - { 93, 94, 95}, - { 96, 97, 98}}, - {{ 99, 100, 101}, - {102, 103, 104}, - {105, 106, 107}} - } - } - }); - std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}}); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW - { - { - {{ 0, 1, 2, 3, 4}, - { 5, 6, 7, 8, 9}, - { 10, 11, 12, 13, 14}, - { 15, 16, 17, 18, 19}, - { 20, 21, 22, 23, 24}}, + std::shared_ptr<Node> myConv = Conv(3, 4, {3, 3}, "myconv"); + auto op = + std::static_pointer_cast<OperatorTensor>(myConv->getOperator()); + std::shared_ptr<Tensor> myWeights = + std::make_shared<Tensor>(Array4D<int, 4, 3, 3, 3>{ + {{{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}}, + {{9, 10, 11}, {12, 13, 14}, {15, 16, 17}}, + {{18, 19, 20}, {21, 22, 23}, {24, 25, 26}}}, + {{{27, 28, 29}, {30, 31, 32}, {33, 34, 35}}, + {{36, 37, 38}, {39, 40, 41}, {42, 43, 44}}, + {{45, 46, 47}, {48, 49, 50}, {51, 52, 53}}}, + {{{54, 55, 56}, {57, 58, 59}, {60, 61, 62}}, + {{63, 64, 65}, {66, 67, 68}, {69, 70, 71}}, + {{72, 73, 74}, {75, 76, 77}, {78, 79, 80}}}, + {{{81, 82, 83}, {84, 85, 86}, {87, 88, 89}}, + {{90, 91, 92}, {93, 94, 95}, {96, 97, 98}}, + {{99, 100, 101}, {102, 103, 104}, {105, 106, 107}}}}}); + std::shared_ptr<Tensor> myBias = + std::make_shared<Tensor>(Array1D<int, 4>{{7, 0, 9, 0}}); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array4D<int, 2, 3, 5, 5>{// NCHW + {{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}, - {{ 25, 26, 27, 28, 29}, - { 30, 31, 32, 33, 34}, - { 35, 36, 37, 38, 39}, - { 40, 41, 42, 43, 44}, - { 45, 46, 47, 48, 49}}, + {{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}, - {{ 50, 51, 52, 53, 54}, - { 55, 56, 57, 58, 59}, - { 60, 61, 62, 63, 64}, - { 65, 66, 67, 68, 69}, - { 70, 71, 72, 73, 74}} - }, - { - {{ 75, 76, 77, 78, 79}, - { 80, 81, 82, 83, 84}, - { 85, 86, 87, 88, 89}, - { 90, 91, 92, 93, 94}, - { 95, 96, 97, 98, 99}}, + {{50, 51, 52, 53, 54}, + {55, 56, 57, 58, 59}, + {60, 61, 62, 63, 64}, + {65, 66, 67, 68, 69}, + {70, 71, 72, 73, 74}}}, + {{{75, 76, 77, 78, 79}, + {80, 81, 82, 83, 84}, + {85, 86, 87, 88, 89}, + {90, 91, 92, 93, 94}, + {95, 96, 97, 98, 99}}, - {{100, 101, 102, 103, 104}, - {105, 106, 107, 108, 109}, - {110, 111, 112, 113, 114}, - {115, 116, 117, 118, 119}, - {120, 121, 122, 123, 124}}, + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, - {{125, 126, 127, 128, 129}, - {130, 131, 132, 133, 134}, - {135, 136, 137, 138, 139}, - {140, 141, 142, 143, 144}, - {145, 146, 147, 148, 149}} - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> { - { - { - {{ 15226, 15577, 15928}, - { 16981, 17332, 17683}, - { 18736, 19087, 19438}}, - {{ 37818, 38898, 39978}, - { 43218, 44298, 45378}, - { 48618, 49698, 50778}}, - {{ 60426, 62235, 64044}, - { 69471, 71280, 73089}, - { 78516, 80325, 82134}}, - {{ 83016, 85554, 88092}, - { 95706, 98244, 100782}, - {108396, 110934, 113472}} - }, - { - {{ 41551, 41902, 42253}, - { 43306, 43657, 44008}, - { 45061, 45412, 45763}}, - {{118818, 119898, 120978}, - {124218, 125298, 126378}, - {129618, 130698, 131778}}, - {{196101, 197910, 199719}, - {205146, 206955, 208764}, - {214191, 216000, 217809}}, - {{273366, 275904, 278442}, - {286056, 288594, 291132}, - {298746, 301284, 303822}} - } - } - }); - op->associateInput(0,myInput); - op->associateInput(1,myWeights); - op->associateInput(2,myBias); + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}}}}}); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>( + Array4D<int, 2, 4, 3, 3>{{{{{15226, 15577, 15928}, + {16981, 17332, 17683}, + {18736, 19087, 19438}}, + {{37818, 38898, 39978}, + {43218, 44298, 45378}, + {48618, 49698, 50778}}, + {{60426, 62235, 64044}, + {69471, 71280, 73089}, + {78516, 80325, 82134}}, + {{83016, 85554, 88092}, + {95706, 98244, 100782}, + {108396, 110934, 113472}}}, + {{{41551, 41902, 42253}, + {43306, 43657, 44008}, + {45061, 45412, 45763}}, + {{118818, 119898, 120978}, + {124218, 125298, 126378}, + {129618, 130698, 131778}}, + {{196101, 197910, 199719}, + {205146, 206955, 208764}, + {214191, 216000, 217809}}, + {{273366, 275904, 278442}, + {286056, 288594, 291132}, + {298746, 301284, 303822}}}}}); + op->associateInput(0, myInput); + op->associateInput(1, myWeights); + op->associateInput(2, myBias); op->setDataType(DataType::Int32); op->setBackend("cpu"); myConv->forward(); @@ -158,241 +113,529 @@ TEST_CASE("[cpu/operator] Conv(forward)", "[Conv][CPU]") { REQUIRE(*(op->getOutput(0)) == *myOutput); } SECTION("Point-wise") { - std::shared_ptr<Node> myConv = Conv(3,4,{1,1}, "myconv", {1,1}); - auto op = std::static_pointer_cast<OperatorTensor>(myConv -> getOperator()); - op->setInput(0, std::make_shared<Tensor>(Array4D<float,2,3,3,3> { - { - { - {{-1.38467371F, -0.87123615F, -0.22336592F}, - { 1.71736145F, 0.31888032F, -0.42451897F}, - { 0.30572093F, -0.77459252F, -1.55757248F}}, - {{ 0.99563611F, -0.87978584F, -0.60114205F}, - {-1.27415121F, 2.12278509F, -1.23465312F}, - {-0.48791388F, -0.91382301F, -0.65813726F}}, - {{ 0.07802387F, 0.52580875F, -0.48799172F}, - { 1.19136906F, -0.81400764F, -0.73599279F}, - {-1.40324783F, 0.03600367F, -0.06347727F}} - }, - { - {{ 0.67561489F, -0.09780689F, 1.84459400F}, - {-1.18453741F, 1.38354933F, 1.44513381F}, - { 0.85641253F, 2.21807575F, 0.52316552F}}, - {{ 0.34664667F, -0.19733144F, 1.14120162F}, - { 0.05164360F, 0.72810954F, -0.71064192F}, - {-0.60206831F, 0.96044880F, 0.40481427F}}, - {{-1.35434294F, 1.33470297F, 0.48353928F}, - {-0.19756168F, 1.26831138F, 1.22426283F}, - { 0.09811721F, 1.74225271F, -1.35267365F}} - } - } - })); - op->setInput(1, std::make_shared<Tensor>(Array4D<float,4,3,1,1> { - { - { - {{ 0.33669037F}}, - {{ 0.12880941F}}, - {{ 0.23446237F}} - }, - { - {{ 0.23033303F}}, - {{-1.12285638F}}, - {{-0.18632829F}} - }, - { - {{ 2.20820141F}}, - {{-0.63799703F}}, - {{ 0.46165723F}}}, - { - {{ 0.26735088F}}, - {{ 0.53490466F}}, - {{ 0.80935723F}} - } - } - })); - op->setInput(2, std::make_shared<Tensor>(Array1D<float,4> {{ 1.11029029F, -1.68979895F, -0.98895991F, 0.95797181F}})); - Tensor expectedOutput = Array4D<float,2,4,3,3> { - { - { - {{ 0.79062498F, 0.82691115F, 0.84323663F}, - { 1.80371785F, 1.30023468F, 0.63576132F}, - { 0.82136691F, 0.74022496F, 0.48621333F}}, - {{-3.14122939F, -1.00057328F, -0.97532475F}, - {-0.08553087F, -3.84826040F, -0.26410526F}, - {-0.81005937F, -0.84882969F, -1.29773819F}}, - {{-4.64579105F, -2.10878062F, -1.32395494F}, - { 4.16622877F, -2.01493120F, -1.47845459F}, - {-0.65039843F, -2.09977841F, -4.03780890F}}, - {{ 1.18349767F, 0.68001163F, 0.18174142F}, - { 1.69980371F, 1.51988935F, -0.41162649F}, - {-0.35700959F, 0.29121545F, 0.13813695F}} - }, - { - {{ 1.06487226F, 1.36487913F, 1.99171650F}, - { 0.67179936F, 1.96727657F, 1.79235911F}, - { 1.34408879F, 2.38930249F, 1.02142799F}}, - {{-1.67106462F, -1.73944509F, -2.63643050F}, - {-1.98381400F, -2.42500663F, -0.78710288F}, - {-0.83478457F, -2.58197999F, -1.77180362F}}, - {{-0.34346789F, -0.46286502F, 2.57942152F}, - {-3.72881150F, 2.18718910F, 3.22076392F}, - { 1.33158576F, 4.10055828F, -0.71644694F}}, - {{ 0.22787374F, 1.90652108F, 2.45291567F}, - { 0.50901115F, 2.74385118F, 1.95506990F}, - { 0.94429719F, 3.47482967F, 0.21958135F}} - } - } - }; + std::shared_ptr<Node> myConv = Conv(3, 4, {1, 1}, "myconv", {1, 1}); + auto op = + std::static_pointer_cast<OperatorTensor>(myConv->getOperator()); + op->setInput(0, + std::make_shared<Tensor>(Array4D<float, 2, 3, 3, 3>{ + {{{{-1.38467371F, -0.87123615F, -0.22336592F}, + {1.71736145F, 0.31888032F, -0.42451897F}, + {0.30572093F, -0.77459252F, -1.55757248F}}, + {{0.99563611F, -0.87978584F, -0.60114205F}, + {-1.27415121F, 2.12278509F, -1.23465312F}, + {-0.48791388F, -0.91382301F, -0.65813726F}}, + {{0.07802387F, 0.52580875F, -0.48799172F}, + {1.19136906F, -0.81400764F, -0.73599279F}, + {-1.40324783F, 0.03600367F, -0.06347727F}}}, + {{{0.67561489F, -0.09780689F, 1.84459400F}, + {-1.18453741F, 1.38354933F, 1.44513381F}, + {0.85641253F, 2.21807575F, 0.52316552F}}, + {{0.34664667F, -0.19733144F, 1.14120162F}, + {0.05164360F, 0.72810954F, -0.71064192F}, + {-0.60206831F, 0.96044880F, 0.40481427F}}, + {{-1.35434294F, 1.33470297F, 0.48353928F}, + {-0.19756168F, 1.26831138F, 1.22426283F}, + {0.09811721F, 1.74225271F, -1.35267365F}}}}})); + op->setInput( + 1, + std::make_shared<Tensor>(Array4D<float, 4, 3, 1, 1>{ + {{{{0.33669037F}}, {{0.12880941F}}, {{0.23446237F}}}, + {{{0.23033303F}}, {{-1.12285638F}}, {{-0.18632829F}}}, + {{{2.20820141F}}, {{-0.63799703F}}, {{0.46165723F}}}, + {{{0.26735088F}}, {{0.53490466F}}, {{0.80935723F}}}}})); + op->setInput( + 2, + std::make_shared<Tensor>(Array1D<float, 4>{ + {1.11029029F, -1.68979895F, -0.98895991F, 0.95797181F}})); + Tensor expectedOutput = Array4D<float, 2, 4, 3, 3>{ + {{{{0.79062498F, 0.82691115F, 0.84323663F}, + {1.80371785F, 1.30023468F, 0.63576132F}, + {0.82136691F, 0.74022496F, 0.48621333F}}, + {{-3.14122939F, -1.00057328F, -0.97532475F}, + {-0.08553087F, -3.84826040F, -0.26410526F}, + {-0.81005937F, -0.84882969F, -1.29773819F}}, + {{-4.64579105F, -2.10878062F, -1.32395494F}, + {4.16622877F, -2.01493120F, -1.47845459F}, + {-0.65039843F, -2.09977841F, -4.03780890F}}, + {{1.18349767F, 0.68001163F, 0.18174142F}, + {1.69980371F, 1.51988935F, -0.41162649F}, + {-0.35700959F, 0.29121545F, 0.13813695F}}}, + {{{1.06487226F, 1.36487913F, 1.99171650F}, + {0.67179936F, 1.96727657F, 1.79235911F}, + {1.34408879F, 2.38930249F, 1.02142799F}}, + {{-1.67106462F, -1.73944509F, -2.63643050F}, + {-1.98381400F, -2.42500663F, -0.78710288F}, + {-0.83478457F, -2.58197999F, -1.77180362F}}, + {{-0.34346789F, -0.46286502F, 2.57942152F}, + {-3.72881150F, 2.18718910F, 3.22076392F}, + {1.33158576F, 4.10055828F, -0.71644694F}}, + {{0.22787374F, 1.90652108F, 2.45291567F}, + {0.50901115F, 2.74385118F, 1.95506990F}, + {0.94429719F, 3.47482967F, 0.21958135F}}}}}; op->setDataType(DataType::Float32); op->setBackend("cpu"); myConv->forward(); - float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); - float* expectedPtr = static_cast<float*>(expectedOutput.getImpl()->rawPtr()); - for (std::size_t i = 0; i< expectedOutput.size(); ++i) { - REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + float *resPtr = + static_cast<float *>(op->getOutput(0)->getImpl()->rawPtr()); + float *expectedPtr = + static_cast<float *>(expectedOutput.getImpl()->rawPtr()); + for (std::size_t i = 0; i < expectedOutput.size(); ++i) { + REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001); } } SECTION("Strided and dilated Conv") { - std::shared_ptr<Node> myConv = Conv(3,4,{3,3}, "myconv", {3,3},{2,2}); - auto op = std::static_pointer_cast<OperatorTensor>(myConv -> getOperator()); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<float,2,3,8,8> { - {{{ - {0.0107F, 0.5076F, 0.2293F, 0.0486F, 0.7375F, 0.2637F, 0.9615F, 0.9138F}, - {0.0678F, 0.5604F, 0.1940F, 0.0287F, 0.1029F, 0.2059F, 0.5058F, 0.9885F}, - {0.9904F, 0.2890F, 0.4606F, 0.1055F, 0.9028F, 0.1654F, 0.6499F, 0.4775F}, - {0.9499F, 0.4695F, 0.1713F, 0.0731F, 0.4913F, 0.8921F, 0.1782F, 0.1111F}, - {0.2479F, 0.4669F, 0.1078F, 0.6153F, 0.0299F, 0.6484F, 0.2397F, 0.1814F}, - {0.3779F, 0.9032F, 0.5651F, 0.3896F, 0.8439F, 0.6404F, 0.3813F, 0.0841F}, - {0.5566F, 0.8950F, 0.1226F, 0.8881F, 0.9870F, 0.6256F, 0.6387F, 0.0628F}, - {0.2857F, 0.0579F, 0.6247F, 0.1286F, 0.0951F, 0.1268F, 0.9510F, 0.3789F}}, + std::shared_ptr<Node> myConv = + Conv(3, 4, {3, 3}, "myconv", {3, 3}, {2, 2}); + auto op = + std::static_pointer_cast<OperatorTensor>(myConv->getOperator()); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array4D<float, 2, 3, 8, 8>{{{{{0.0107F, + 0.5076F, + 0.2293F, + 0.0486F, + 0.7375F, + 0.2637F, + 0.9615F, + 0.9138F}, + {0.0678F, + 0.5604F, + 0.1940F, + 0.0287F, + 0.1029F, + 0.2059F, + 0.5058F, + 0.9885F}, + {0.9904F, + 0.2890F, + 0.4606F, + 0.1055F, + 0.9028F, + 0.1654F, + 0.6499F, + 0.4775F}, + {0.9499F, + 0.4695F, + 0.1713F, + 0.0731F, + 0.4913F, + 0.8921F, + 0.1782F, + 0.1111F}, + {0.2479F, + 0.4669F, + 0.1078F, + 0.6153F, + 0.0299F, + 0.6484F, + 0.2397F, + 0.1814F}, + {0.3779F, + 0.9032F, + 0.5651F, + 0.3896F, + 0.8439F, + 0.6404F, + 0.3813F, + 0.0841F}, + {0.5566F, + 0.8950F, + 0.1226F, + 0.8881F, + 0.9870F, + 0.6256F, + 0.6387F, + 0.0628F}, + {0.2857F, + 0.0579F, + 0.6247F, + 0.1286F, + 0.0951F, + 0.1268F, + 0.9510F, + 0.3789F}}, - {{0.7648F, 0.5340F, 0.1024F, 0.4098F, 0.9958F, 0.7941F, 0.1190F, 0.7328F}, - {0.4532F, 0.6598F, 0.9146F, 0.1690F, 0.6041F, 0.7230F, 0.5719F, 0.9282F}, - {0.2862F, 0.2329F, 0.7302F, 0.6717F, 0.1983F, 0.1876F, 0.4561F, 0.2126F}, - {0.7849F, 0.0239F, 0.7977F, 0.5935F, 0.9958F, 0.4703F, 0.4612F, 0.1627F}, - {0.6393F, 0.3544F, 0.8643F, 0.5039F, 0.8087F, 0.6521F, 0.5086F, 0.9331F}, - {0.7749F, 0.9798F, 0.6820F, 0.7869F, 0.5144F, 0.2941F, 0.8137F, 0.4561F}, - {0.6505F, 0.3974F, 0.6909F, 0.7019F, 0.2729F, 0.4240F, 0.0162F, 0.1536F}, - {0.3529F, 0.8821F, 0.1812F, 0.3426F, 0.3472F, 0.0300F, 0.8841F, 0.8088F}}, + {{0.7648F, + 0.5340F, + 0.1024F, + 0.4098F, + 0.9958F, + 0.7941F, + 0.1190F, + 0.7328F}, + {0.4532F, + 0.6598F, + 0.9146F, + 0.1690F, + 0.6041F, + 0.7230F, + 0.5719F, + 0.9282F}, + {0.2862F, + 0.2329F, + 0.7302F, + 0.6717F, + 0.1983F, + 0.1876F, + 0.4561F, + 0.2126F}, + {0.7849F, + 0.0239F, + 0.7977F, + 0.5935F, + 0.9958F, + 0.4703F, + 0.4612F, + 0.1627F}, + {0.6393F, + 0.3544F, + 0.8643F, + 0.5039F, + 0.8087F, + 0.6521F, + 0.5086F, + 0.9331F}, + {0.7749F, + 0.9798F, + 0.6820F, + 0.7869F, + 0.5144F, + 0.2941F, + 0.8137F, + 0.4561F}, + {0.6505F, + 0.3974F, + 0.6909F, + 0.7019F, + 0.2729F, + 0.4240F, + 0.0162F, + 0.1536F}, + {0.3529F, + 0.8821F, + 0.1812F, + 0.3426F, + 0.3472F, + 0.0300F, + 0.8841F, + 0.8088F}}, - {{0.5099F, 0.3323F, 0.1488F, 0.3424F, 0.1494F, 0.6225F, 0.8103F, 0.5995F}, - {0.9198F, 0.5635F, 0.8908F, 0.9378F, 0.6689F, 0.3176F, 0.3755F, 0.3883F}, - {0.0626F, 0.5309F, 0.0307F, 0.3955F, 0.2794F, 0.1420F, 0.4758F, 0.7558F}, - {0.6154F, 0.5280F, 0.2318F, 0.3832F, 0.4435F, 0.3490F, 0.4043F, 0.5872F}, - {0.3705F, 0.3848F, 0.2182F, 0.8332F, 0.4559F, 0.5310F, 0.4611F, 0.4236F}, - {0.6141F, 0.8103F, 0.2260F, 0.9907F, 0.5615F, 0.4520F, 0.6949F, 0.0175F}, - {0.3969F, 0.5021F, 0.0970F, 0.9937F, 0.9270F, 0.4302F, 0.2868F, 0.3891F}, - {0.8693F, 0.5170F, 0.5348F, 0.2676F, 0.9769F, 0.3356F, 0.9427F, 0.3908F}} - }, - { - {{0.4803F, 0.5223F, 0.6395F, 0.8402F, 0.4442F, 0.6377F, 0.7852F, 0.9063F}, - {0.0361F, 0.0470F, 0.3104F, 0.6921F, 0.0543F, 0.4490F, 0.9541F, 0.7395F}, - {0.3832F, 0.3828F, 0.2236F, 0.2068F, 0.4369F, 0.7443F, 0.6952F, 0.6394F}, - {0.5309F, 0.8483F, 0.1991F, 0.9756F, 0.8969F, 0.7284F, 0.4657F, 0.5486F}, - {0.8839F, 0.3260F, 0.6892F, 0.4074F, 0.9473F, 0.5526F, 0.4147F, 0.4786F}, - {0.9674F, 0.0952F, 0.8379F, 0.2163F, 0.9420F, 0.4046F, 0.1339F, 0.5234F}, - {0.4213F, 0.8392F, 0.3184F, 0.4576F, 0.9349F, 0.8267F, 0.0931F, 0.8009F}, - {0.5570F, 0.5871F, 0.4175F, 0.5465F, 0.6679F, 0.9224F, 0.0049F, 0.9421F}}, + {{0.5099F, + 0.3323F, + 0.1488F, + 0.3424F, + 0.1494F, + 0.6225F, + 0.8103F, + 0.5995F}, + {0.9198F, + 0.5635F, + 0.8908F, + 0.9378F, + 0.6689F, + 0.3176F, + 0.3755F, + 0.3883F}, + {0.0626F, + 0.5309F, + 0.0307F, + 0.3955F, + 0.2794F, + 0.1420F, + 0.4758F, + 0.7558F}, + {0.6154F, + 0.5280F, + 0.2318F, + 0.3832F, + 0.4435F, + 0.3490F, + 0.4043F, + 0.5872F}, + {0.3705F, + 0.3848F, + 0.2182F, + 0.8332F, + 0.4559F, + 0.5310F, + 0.4611F, + 0.4236F}, + {0.6141F, + 0.8103F, + 0.2260F, + 0.9907F, + 0.5615F, + 0.4520F, + 0.6949F, + 0.0175F}, + {0.3969F, + 0.5021F, + 0.0970F, + 0.9937F, + 0.9270F, + 0.4302F, + 0.2868F, + 0.3891F}, + {0.8693F, + 0.5170F, + 0.5348F, + 0.2676F, + 0.9769F, + 0.3356F, + 0.9427F, + 0.3908F}}}, + {{{0.4803F, + 0.5223F, + 0.6395F, + 0.8402F, + 0.4442F, + 0.6377F, + 0.7852F, + 0.9063F}, + {0.0361F, + 0.0470F, + 0.3104F, + 0.6921F, + 0.0543F, + 0.4490F, + 0.9541F, + 0.7395F}, + {0.3832F, + 0.3828F, + 0.2236F, + 0.2068F, + 0.4369F, + 0.7443F, + 0.6952F, + 0.6394F}, + {0.5309F, + 0.8483F, + 0.1991F, + 0.9756F, + 0.8969F, + 0.7284F, + 0.4657F, + 0.5486F}, + {0.8839F, + 0.3260F, + 0.6892F, + 0.4074F, + 0.9473F, + 0.5526F, + 0.4147F, + 0.4786F}, + {0.9674F, + 0.0952F, + 0.8379F, + 0.2163F, + 0.9420F, + 0.4046F, + 0.1339F, + 0.5234F}, + {0.4213F, + 0.8392F, + 0.3184F, + 0.4576F, + 0.9349F, + 0.8267F, + 0.0931F, + 0.8009F}, + {0.5570F, + 0.5871F, + 0.4175F, + 0.5465F, + 0.6679F, + 0.9224F, + 0.0049F, + 0.9421F}}, - {{0.3739F, 0.6230F, 0.7613F, 0.1337F, 0.8527F, 0.0557F, 0.6424F, 0.8463F}, - {0.7179F, 0.5638F, 0.2457F, 0.4579F, 0.0487F, 0.8693F, 0.8216F, 0.0415F}, - {0.1724F, 0.5108F, 0.9103F, 0.0850F, 0.0080F, 0.8927F, 0.7706F, 0.3600F}, - {0.7751F, 0.8828F, 0.7872F, 0.4541F, 0.3181F, 0.1855F, 0.2486F, 0.0033F}, - {0.5558F, 0.3500F, 0.6034F, 0.1763F, 0.7418F, 0.5190F, 0.5147F, 0.4090F}, - {0.4476F, 0.1249F, 0.8116F, 0.9091F, 0.1738F, 0.6150F, 0.3285F, 0.3133F}, - {0.5657F, 0.4447F, 0.5049F, 0.3425F, 0.7443F, 0.2718F, 0.2466F, 0.5586F}, - {0.3684F, 0.7616F, 0.5165F, 0.9621F, 0.2864F, 0.7747F, 0.8110F, 0.7045F}}, + {{0.3739F, + 0.6230F, + 0.7613F, + 0.1337F, + 0.8527F, + 0.0557F, + 0.6424F, + 0.8463F}, + {0.7179F, + 0.5638F, + 0.2457F, + 0.4579F, + 0.0487F, + 0.8693F, + 0.8216F, + 0.0415F}, + {0.1724F, + 0.5108F, + 0.9103F, + 0.0850F, + 0.0080F, + 0.8927F, + 0.7706F, + 0.3600F}, + {0.7751F, + 0.8828F, + 0.7872F, + 0.4541F, + 0.3181F, + 0.1855F, + 0.2486F, + 0.0033F}, + {0.5558F, + 0.3500F, + 0.6034F, + 0.1763F, + 0.7418F, + 0.5190F, + 0.5147F, + 0.4090F}, + {0.4476F, + 0.1249F, + 0.8116F, + 0.9091F, + 0.1738F, + 0.6150F, + 0.3285F, + 0.3133F}, + {0.5657F, + 0.4447F, + 0.5049F, + 0.3425F, + 0.7443F, + 0.2718F, + 0.2466F, + 0.5586F}, + {0.3684F, + 0.7616F, + 0.5165F, + 0.9621F, + 0.2864F, + 0.7747F, + 0.8110F, + 0.7045F}}, - {{0.4570F, 0.4577F, 0.0373F, 0.6084F, 0.4632F, 0.3472F, 0.9917F, 0.2011F}, - {0.7921F, 0.2202F, 0.9525F, 0.7274F, 0.3357F, 0.0076F, 0.5786F, 0.3034F}, - {0.6510F, 0.0798F, 0.2757F, 0.1738F, 0.3046F, 0.2197F, 0.3872F, 0.5650F}, - {0.1532F, 0.3204F, 0.6094F, 0.3287F, 0.8903F, 0.9773F, 0.7950F, 0.2845F}, - {0.2482F, 0.3395F, 0.8795F, 0.4325F, 0.1395F, 0.2457F, 0.2968F, 0.5424F}, - {0.8636F, 0.7426F, 0.2151F, 0.6900F, 0.3938F, 0.0062F, 0.4980F, 0.4098F}, - {0.8026F, 0.0464F, 0.2662F, 0.7835F, 0.8444F, 0.0688F, 0.8796F, 0.7625F}, - {0.2764F, 0.5341F, 0.1773F, 0.6671F, 0.7555F, 0.5235F, 0.7142F, 0.9423F}}}} - }); - std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<float,4> {{ 0.1902F, -0.1789F, -0.0314F, -0.0589F}}); - std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<float,4,3,3,3> { //NCHW - { - { - {{ 0.0039F, 0.1098F, -0.0834F}, - {-0.0890F, 0.0725F, -0.1178F}, - { 0.1056F, -0.0924F, -0.0574F}}, - {{ 0.0070F, -0.0730F, -0.0674F}, - {-0.0380F, -0.1025F, -0.0085F}, - {-0.1451F, -0.0656F, 0.1137F}}, - {{ 0.1020F, 0.1025F, -0.0678F}, - { 0.0028F, 0.1512F, -0.0871F}, - { 0.1563F, -0.1446F, -0.1636F}} - }, - { - {{ 0.1472F, 0.0025F, -0.0281F}, - { 0.0350F, 0.0296F, -0.1711F}, - {-0.1197F, -0.1198F, -0.1130F}}, - {{-0.1492F, 0.1554F, -0.1044F}, - { 0.1203F, -0.1596F, 0.0589F}, - {-0.0436F, -0.1876F, -0.0816F}}, - {{ 0.1572F, -0.0982F, 0.1293F}, - { 0.1358F, 0.1559F, 0.1322F}, - { 0.0296F, -0.0354F, -0.0632F}} - }, - { - {{-0.0941F, -0.0479F, 0.0908F}, - {-0.1319F, -0.1333F, 0.1223F}, - {-0.1098F, 0.1924F, 0.1075F}}, - {{ 0.1796F, 0.0213F, 0.0626F}, - { 0.0275F, 0.1883F, -0.0818F}, - { 0.0363F, 0.0684F, 0.1094F}}, - {{ 0.1131F, 0.1258F, -0.0558F}, - { 0.1498F, 0.0322F, -0.0186F}, - {-0.1801F, -0.0358F, 0.1727F}} - }, - { - {{-0.1500F, -0.0554F, -0.0994F}, - {-0.0818F, -0.1223F, 0.1365F}, - { 0.1281F, 0.1507F, -0.0890F}}, - {{-0.0444F, -0.1071F, -0.1632F}, - { 0.0757F, -0.1235F, 0.0408F}, - { 0.0401F, -0.1914F, 0.1772F}}, - {{-0.0714F, 0.1582F, -0.0065F}, - {-0.0119F, 0.1375F, -0.0727F}, - {-0.1532F, -0.1826F, -0.0417F}} - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<float,2,4,2,2> { - { - { - {{-0.2174F, -0.0778F}, - {-0.2584F, 0.2303F}}, - {{-0.7686F, -0.3879F}, - {-0.1775F, 0.0119F}}, - {{ 0.5180F, 0.5087F}, - { 0.5398F, 0.3476F}}, - {{-0.5258F, -0.3128F}, - {-0.6673F, -0.1827F}} - }, - { - {{-0.1902F, -0.0467F}, - {-0.3327F, -0.1701F}}, - {{-0.5505F, -0.4875F}, - {-0.4119F, -0.5726F}}, - {{ 0.5777F, 0.4428F}, - { 0.6121F, 0.7221F}}, - {{-0.6009F, -0.6335F}, - {-0.5159F, -0.3353F}} - } - } - }); - op->associateInput(0,myInput); - op->associateInput(1,myWeights); - op->associateInput(2,myBias); + {{0.4570F, + 0.4577F, + 0.0373F, + 0.6084F, + 0.4632F, + 0.3472F, + 0.9917F, + 0.2011F}, + {0.7921F, + 0.2202F, + 0.9525F, + 0.7274F, + 0.3357F, + 0.0076F, + 0.5786F, + 0.3034F}, + {0.6510F, + 0.0798F, + 0.2757F, + 0.1738F, + 0.3046F, + 0.2197F, + 0.3872F, + 0.5650F}, + {0.1532F, + 0.3204F, + 0.6094F, + 0.3287F, + 0.8903F, + 0.9773F, + 0.7950F, + 0.2845F}, + {0.2482F, + 0.3395F, + 0.8795F, + 0.4325F, + 0.1395F, + 0.2457F, + 0.2968F, + 0.5424F}, + {0.8636F, + 0.7426F, + 0.2151F, + 0.6900F, + 0.3938F, + 0.0062F, + 0.4980F, + 0.4098F}, + {0.8026F, + 0.0464F, + 0.2662F, + 0.7835F, + 0.8444F, + 0.0688F, + 0.8796F, + 0.7625F}, + {0.2764F, + 0.5341F, + 0.1773F, + 0.6671F, + 0.7555F, + 0.5235F, + 0.7142F, + 0.9423F}}}}}); + std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>( + Array1D<float, 4>{{0.1902F, -0.1789F, -0.0314F, -0.0589F}}); + std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>( + Array4D<float, 4, 3, 3, 3>{// NCHW + {{{{0.0039F, 0.1098F, -0.0834F}, + {-0.0890F, 0.0725F, -0.1178F}, + {0.1056F, -0.0924F, -0.0574F}}, + {{0.0070F, -0.0730F, -0.0674F}, + {-0.0380F, -0.1025F, -0.0085F}, + {-0.1451F, -0.0656F, 0.1137F}}, + {{0.1020F, 0.1025F, -0.0678F}, + {0.0028F, 0.1512F, -0.0871F}, + {0.1563F, -0.1446F, -0.1636F}}}, + {{{0.1472F, 0.0025F, -0.0281F}, + {0.0350F, 0.0296F, -0.1711F}, + {-0.1197F, -0.1198F, -0.1130F}}, + {{-0.1492F, 0.1554F, -0.1044F}, + {0.1203F, -0.1596F, 0.0589F}, + {-0.0436F, -0.1876F, -0.0816F}}, + {{0.1572F, -0.0982F, 0.1293F}, + {0.1358F, 0.1559F, 0.1322F}, + {0.0296F, -0.0354F, -0.0632F}}}, + {{{-0.0941F, -0.0479F, 0.0908F}, + {-0.1319F, -0.1333F, 0.1223F}, + {-0.1098F, 0.1924F, 0.1075F}}, + {{0.1796F, 0.0213F, 0.0626F}, + {0.0275F, 0.1883F, -0.0818F}, + {0.0363F, 0.0684F, 0.1094F}}, + {{0.1131F, 0.1258F, -0.0558F}, + {0.1498F, 0.0322F, -0.0186F}, + {-0.1801F, -0.0358F, 0.1727F}}}, + {{{-0.1500F, -0.0554F, -0.0994F}, + {-0.0818F, -0.1223F, 0.1365F}, + {0.1281F, 0.1507F, -0.0890F}}, + {{-0.0444F, -0.1071F, -0.1632F}, + {0.0757F, -0.1235F, 0.0408F}, + {0.0401F, -0.1914F, 0.1772F}}, + {{-0.0714F, 0.1582F, -0.0065F}, + {-0.0119F, 0.1375F, -0.0727F}, + {-0.1532F, -0.1826F, -0.0417F}}}}}); + std::shared_ptr<Tensor> myOutput = + std::make_shared<Tensor>(Array4D<float, 2, 4, 2, 2>{ + {{{{-0.2174F, -0.0778F}, {-0.2584F, 0.2303F}}, + {{-0.7686F, -0.3879F}, {-0.1775F, 0.0119F}}, + {{0.5180F, 0.5087F}, {0.5398F, 0.3476F}}, + {{-0.5258F, -0.3128F}, {-0.6673F, -0.1827F}}}, + {{{-0.1902F, -0.0467F}, {-0.3327F, -0.1701F}}, + {{-0.5505F, -0.4875F}, {-0.4119F, -0.5726F}}, + {{0.5777F, 0.4428F}, {0.6121F, 0.7221F}}, + {{-0.6009F, -0.6335F}, {-0.5159F, -0.3353F}}}}}); + op->associateInput(0, myInput); + op->associateInput(1, myWeights); + op->associateInput(2, myBias); op->setDataType(DataType::Float32); op->setBackend("cpu"); op->forwardDims(); myConv->forward(); op->getOutput(0)->print(); - REQUIRE(approxEq<float>(*(op->getOutput(0)),*myOutput, 1e-3f, 1e-4f)); + REQUIRE(approxEq<float>(*(op->getOutput(0)), *myOutput, 1e-3f, 1e-4f)); } } \ No newline at end of file diff --git a/unit_tests/operator/Test_DivImpl.cpp b/unit_tests/operator/Test_DivImpl.cpp index 5d7dfdf12032d4c444e38cda6d2a4298fc552b14..1f3505ee14e78d58f4937104d3556c2ad5cb7692 100644 --- a/unit_tests/operator/Test_DivImpl.cpp +++ b/unit_tests/operator/Test_DivImpl.cpp @@ -10,13 +10,13 @@ ********************************************************************************/ #include <catch2/catch_test_macros.hpp> -#include <cstddef> // std::size_t -#include <cstdint> // std::uint16_t #include <chrono> +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t #include <iostream> #include <memory> -#include <numeric> // std::accumulate -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <numeric> // std::accumulate +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution #include "aidge/data/Tensor.hpp" #include "aidge/operator/Div.hpp" @@ -29,24 +29,28 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") { // Create a random number generator std::random_device rd; std::mt19937 gen(rd()); - std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 - std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); - std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); - std::uniform_int_distribution<int> boolDist(0,1); + std::uniform_real_distribution<float> valueDist( + 0.1f, + 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), + std::size_t(10)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), + std::size_t(5)); + std::uniform_int_distribution<int> boolDist(0, 1); // Create MatMul Operator std::shared_ptr<Node> myDiv = Div(); - auto op = std::static_pointer_cast<OperatorTensor>(myDiv-> getOperator()); + auto op = std::static_pointer_cast<OperatorTensor>(myDiv->getOperator()); op->setDataType(DataType::Float32); op->setBackend("cpu"); // Create 2 input Tensors std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>(); - op->associateInput(0,T0); + op->associateInput(0, T0); T0->setDataType(DataType::Float32); T0->setBackend("cpu"); std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(); - op -> associateInput(1,T1); + op->associateInput(1, T1); T1->setDataType(DataType::Float32); T1->setBackend("cpu"); @@ -61,12 +65,8 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") { std::chrono::duration<double, std::micro> duration{}; SECTION("DivImpl_cpu::forward()") { - SECTION("Scalar / Scalar") { - - } - SECTION("Scalar / +1-D Tensor") { - - } + SECTION("Scalar / Scalar") {} + SECTION("Scalar / +1-D Tensor") {} SECTION("+1-D Tensor / +1-D Tensor - same dimensions") { std::size_t number_of_operation = 0; @@ -77,13 +77,17 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") { for (std::size_t i = 0; i < nbDims; ++i) { dims.push_back(dimSizeDist(gen)); } - const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t nb_elements = + std::accumulate(dims.cbegin(), + dims.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); number_of_operation += nb_elements; // without broadcasting - float* array0 = new float[nb_elements]; - float* array1 = new float[nb_elements]; - float* result = new float[nb_elements]; + float *array0 = new float[nb_elements]; + float *array1 = new float[nb_elements]; + float *result = new float[nb_elements]; for (std::size_t i = 0; i < nb_elements; ++i) { array0[i] = valueDist(gen); @@ -93,21 +97,23 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") { // input0 T0->resize(dims); - T0 -> getImpl() -> setRawPtr(array0, nb_elements); + T0->getImpl()->setRawPtr(array0, nb_elements); // input1 T1->resize(dims); - T1 -> getImpl() -> setRawPtr(array1, nb_elements); + T1->getImpl()->setRawPtr(array1, nb_elements); // results Tres->resize(dims); - Tres -> getImpl() -> setRawPtr(result, nb_elements); + Tres->getImpl()->setRawPtr(result, nb_elements); op->forwardDims(); start = std::chrono::system_clock::now(); myDiv->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += + std::chrono::duration_cast<std::chrono::microseconds>( + end - start); REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); @@ -117,8 +123,10 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") { // with broadcasting } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + std::cout << "number of elements over time spent: " + << (number_of_operation / duration.count()) << std::endl; + std::cout << "total time: " << duration.count() << "μs" + << std::endl; } SECTION("+1-D Tensor / +1-D Tensor - broadcasting") { @@ -126,7 +134,8 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { // generate 2 random Tensors - // handle dimensions, replace some dimensions with '1' to get broadcasting + // handle dimensions, replace some dimensions with '1' to get + // broadcasting constexpr std::size_t nbDims = 4; std::vector<std::size_t> dims; for (std::size_t i = 0; i < nbDims; ++i) { @@ -146,37 +155,62 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") { } // create arrays and fill them with random values - float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]]; - float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]]; - float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]]; - - for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) { + float *array0 = + new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]]; + float *array1 = + new float[dims1[0] * dims1[1] * dims1[2] * dims1[3]]; + float *result = new float[dimsOut[0] * dimsOut[1] * + dimsOut[2] * dimsOut[3]]; + + for (std::size_t i = 0; + i < dims0[0] * dims0[1] * dims0[2] * dims0[3]; + ++i) { array0[i] = valueDist(gen); } - for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) { + for (std::size_t i = 0; + i < dims1[0] * dims1[1] * dims1[2] * dims1[3]; + ++i) { array1[i] = valueDist(gen); } // compute true result - const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1}; - const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1}; + const std::size_t strides0[nbDims] = { + dims0[1] * dims0[2] * dims0[3], + dims0[2] * dims0[3], + dims0[3], + 1}; + const std::size_t strides1[nbDims] = { + dims1[1] * dims1[2] * dims1[3], + dims1[2] * dims1[3], + dims1[3], + 1}; for (std::size_t a = 0; a < dimsOut[0]; ++a) { for (std::size_t b = 0; b < dimsOut[1]; ++b) { - const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0) - + strides0[1] * ((dims0[1] > 1) ? b : 0); - const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0) - + strides1[1] * ((dims1[1] > 1) ? b : 0); + const std::size_t idx0_0 = + strides0[0] * ((dims0[0] > 1) ? a : 0) + + strides0[1] * ((dims0[1] > 1) ? b : 0); + const std::size_t idx1_0 = + strides1[0] * ((dims1[0] > 1) ? a : 0) + + strides1[1] * ((dims1[1] > 1) ? b : 0); for (std::size_t c = 0; c < dimsOut[2]; ++c) { - const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a)); + const std::size_t idx_out = + dimsOut[3] * + (c + dimsOut[2] * (b + dimsOut[1] * a)); for (std::size_t d = 0; d < dimsOut[3]; ++d) { - std::size_t idx0 = idx0_0 - + strides0[2] * ((dims0[2] > 1) ? c : 0) - + ((dims0[3] > 1) ? d : 0); - std::size_t idx1 = idx1_0 - + strides1[2] * ((dims1[2] > 1) ? c : 0) - + ((dims1[3] > 1) ? d : 0); - result[idx_out + d] = array0[idx0] / array1[idx1]; - // std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " / " << array1[idx1] << " -> " << idx_out + d << std::endl; + std::size_t idx0 = + idx0_0 + + strides0[2] * ((dims0[2] > 1) ? c : 0) + + ((dims0[3] > 1) ? d : 0); + std::size_t idx1 = + idx1_0 + + strides1[2] * ((dims1[2] > 1) ? c : 0) + + ((dims1[3] > 1) ? d : 0); + result[idx_out + d] = + array0[idx0] / array1[idx1]; + // std::cout << "(" << idx0 << ", " << idx1 << + // ") -> " << array0[idx0] << " / " << + // array1[idx1] << " -> " << idx_out + d << + // std::endl; } } } @@ -185,22 +219,30 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") { // conversion to Aidge::Tensors // input0 T0->resize(dims0); - T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]); + T0->getImpl()->setRawPtr( + array0, + dims0[0] * dims0[1] * dims0[2] * dims0[3]); // input1 T1->resize(dims1); - T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]); + T1->getImpl()->setRawPtr( + array1, + dims1[0] * dims1[1] * dims1[2] * dims1[3]); // results Tres->resize(dimsOut); - Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]); + Tres->getImpl()->setRawPtr( + result, + dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]); // compute result op->forwardDims(); start = std::chrono::system_clock::now(); myDiv->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += + std::chrono::duration_cast<std::chrono::microseconds>( + end - start); // comparison between truth and computed result REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); @@ -209,15 +251,23 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") { delete[] array1; delete[] result; - const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t nb_elements = + std::accumulate(dimsOut.cbegin(), + dimsOut.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); number_of_operation += nb_elements; } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + std::cout << "number of elements over time spent: " + << (number_of_operation / duration.count()) << std::endl; + std::cout << "total time: " << duration.count() << "μs" + << std::endl; } SECTION("+1-D Tensor / 1-D Tensor") { std::size_t number_of_operation = 0; - std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(std::size_t(1), std::size_t(3)); + std::uniform_int_distribution<std::size_t> nbRemovedDimsDist( + std::size_t(1), + std::size_t(3)); for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { // generate 2 random Tensors @@ -234,15 +284,24 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") { dims1[i] = 1; } } - dims1.erase(dims1.cbegin(), dims1.cbegin() + nbRemovedDimsDist(gen)); + dims1.erase(dims1.cbegin(), + dims1.cbegin() + nbRemovedDimsDist(gen)); // create arrays and fill them with random values - float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]]; - std::size_t array1_size = std::accumulate(dims1.cbegin(), dims1.cend(), std::size_t(1), std::multiplies<std::size_t>()); - float* array1 = new float[array1_size]; - float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]]; - - for (std::size_t i = 0; i < (dims0[0]*dims0[1]*dims0[2]*dims0[3]); ++i) { + float *array0 = + new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]]; + std::size_t array1_size = + std::accumulate(dims1.cbegin(), + dims1.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); + float *array1 = new float[array1_size]; + float *result = new float[dimsOut[0] * dimsOut[1] * + dimsOut[2] * dimsOut[3]]; + + for (std::size_t i = 0; + i < (dims0[0] * dims0[1] * dims0[2] * dims0[3]); + ++i) { array0[i] = valueDist(gen); } for (std::size_t i = 0; i < array1_size; ++i) { @@ -251,27 +310,48 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") { // compute true result auto dims1_tmp = dims1; - dims1_tmp.insert(dims1_tmp.cbegin(), 4 - dims1_tmp.size(), std::size_t(1)); - - const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1}; - const std::size_t strides1[nbDims] = {dims1_tmp[1]*dims1_tmp[2]*dims1_tmp[3], dims1_tmp[2]*dims1_tmp[3], dims1_tmp[3], 1}; + dims1_tmp.insert(dims1_tmp.cbegin(), + 4 - dims1_tmp.size(), + std::size_t(1)); + + const std::size_t strides0[nbDims] = { + dims0[1] * dims0[2] * dims0[3], + dims0[2] * dims0[3], + dims0[3], + 1}; + const std::size_t strides1[nbDims] = { + dims1_tmp[1] * dims1_tmp[2] * dims1_tmp[3], + dims1_tmp[2] * dims1_tmp[3], + dims1_tmp[3], + 1}; for (std::size_t a = 0; a < dimsOut[0]; ++a) { for (std::size_t b = 0; b < dimsOut[1]; ++b) { - const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0) - + strides0[1] * ((dims0[1] > 1) ? b : 0); - const std::size_t idx1_0 = strides1[0] * ((dims1_tmp[0] > 1) ? a : 0) - + strides1[1] * ((dims1_tmp[1] > 1) ? b : 0); + const std::size_t idx0_0 = + strides0[0] * ((dims0[0] > 1) ? a : 0) + + strides0[1] * ((dims0[1] > 1) ? b : 0); + const std::size_t idx1_0 = + strides1[0] * ((dims1_tmp[0] > 1) ? a : 0) + + strides1[1] * ((dims1_tmp[1] > 1) ? b : 0); for (std::size_t c = 0; c < dimsOut[2]; ++c) { - const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a)); + const std::size_t idx_out = + dimsOut[3] * + (c + dimsOut[2] * (b + dimsOut[1] * a)); for (std::size_t d = 0; d < dimsOut[3]; ++d) { - std::size_t idx0 = idx0_0 - + strides0[2] * ((dims0[2] > 1) ? c : 0) - + ((dims0[3] > 1) ? d : 0); - std::size_t idx1 = idx1_0 - + strides1[2] * ((dims1_tmp[2] > 1) ? c : 0) - + ((dims1_tmp[3] > 1) ? d : 0); - result[idx_out + d] = array0[idx0] / array1[idx1]; - // std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " / " << array1[idx1] << " -> " << idx_out + d << std::endl; + std::size_t idx0 = + idx0_0 + + strides0[2] * ((dims0[2] > 1) ? c : 0) + + ((dims0[3] > 1) ? d : 0); + std::size_t idx1 = + idx1_0 + + strides1[2] * + ((dims1_tmp[2] > 1) ? c : 0) + + ((dims1_tmp[3] > 1) ? d : 0); + result[idx_out + d] = + array0[idx0] / array1[idx1]; + // std::cout << "(" << idx0 << ", " << idx1 << + // ") -> " << array0[idx0] << " / " << + // array1[idx1] << " -> " << idx_out + d << + // std::endl; } } } @@ -280,22 +360,28 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") { // conversion to Aidge::Tensors // input0 T0->resize(dims0); - T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]); + T0->getImpl()->setRawPtr( + array0, + dims0[0] * dims0[1] * dims0[2] * dims0[3]); // input1 T1->resize(dims1); - T1 -> getImpl() -> setRawPtr(array1, array1_size); + T1->getImpl()->setRawPtr(array1, array1_size); // results Tres->resize(dimsOut); - Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]); + Tres->getImpl()->setRawPtr( + result, + dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]); // compute result op->forwardDims(); start = std::chrono::system_clock::now(); myDiv->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += + std::chrono::duration_cast<std::chrono::microseconds>( + end - start); // comparison between truth and computed result REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); @@ -304,12 +390,18 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") { delete[] array1; delete[] result; - const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t nb_elements = + std::accumulate(dimsOut.cbegin(), + dimsOut.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); number_of_operation += nb_elements; } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + std::cout << "number of elements over time spent: " + << (number_of_operation / duration.count()) << std::endl; + std::cout << "total time: " << duration.count() << "μs" + << std::endl; } } } diff --git a/unit_tests/operator/Test_ErfImpl.cpp b/unit_tests/operator/Test_ErfImpl.cpp index 2826b5b57d431cf8296a9869f88f7d642c59c963..8203e79326ad321eb997eeae2f735a253f06c469 100644 --- a/unit_tests/operator/Test_ErfImpl.cpp +++ b/unit_tests/operator/Test_ErfImpl.cpp @@ -18,71 +18,80 @@ #include <memory> - using namespace Aidge; TEST_CASE("[cpu/operator] Erf(forward)") { SECTION("1D Tensor") { - std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<float,10> { - {0.41384590, 0.43120754, 0.93762982, 0.31049860, 0.77547199, 0.09514862, - 0.16145366, 0.42776686, 0.43487436, 0.41170865} - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array1D<float,10> { - {0.44163144, 0.45801866, 0.81516320, 0.33941913, 0.72722000, 0.10704061, - 0.18061027, 0.45479023, 0.46144873, 0.43959764} - }); + std::shared_ptr<Tensor> input0 = + std::make_shared<Tensor>(Array1D<float, 10>{{0.41384590, + 0.43120754, + 0.93762982, + 0.31049860, + 0.77547199, + 0.09514862, + 0.16145366, + 0.42776686, + 0.43487436, + 0.41170865}}); + std::shared_ptr<Tensor> expectedOutput = + std::make_shared<Tensor>(Array1D<float, 10>{{0.44163144, + 0.45801866, + 0.81516320, + 0.33941913, + 0.72722000, + 0.10704061, + 0.18061027, + 0.45479023, + 0.46144873, + 0.43959764}}); std::shared_ptr<Node> myErf = Erf(); - auto op = std::static_pointer_cast<OperatorTensor>(myErf -> getOperator()); - op->associateInput(0,input0); + auto op = + std::static_pointer_cast<OperatorTensor>(myErf->getOperator()); + op->associateInput(0, input0); op->setDataType(DataType::Float32); op->setBackend("cpu"); myErf->forward(); - float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); - float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); - for (std::size_t i = 0; i< expectedOutput->size(); ++i) { - REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + float *resPtr = + static_cast<float *>(op->getOutput(0)->getImpl()->rawPtr()); + float *expectedPtr = + static_cast<float *>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i < expectedOutput->size(); ++i) { + REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001); } } SECTION("3D Tensor") { - std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array3D<float,2,2,3> { - { - { - {0.97037154, 0.86208081, 0.77767169}, - {0.38160080, 0.11422747, 0.77284443}, - }, - { - {0.51592529, 0.72543722, 0.54641193}, - {0.93866944, 0.97767913, 0.34172094} - } - } - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<float,2,2,3> { - { - { - {0.83003384, 0.77721894, 0.72857803}, - {0.41057193, 0.12833349, 0.72559172}, - }, - { - {0.53438270, 0.69507217, 0.56032562}, - {0.81564975, 0.83322692, 0.37109339} - } - } - }); + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>( + Array3D<float, 2, 2, 3>{{{ + {0.97037154, 0.86208081, 0.77767169}, + {0.38160080, 0.11422747, 0.77284443}, + }, + {{0.51592529, 0.72543722, 0.54641193}, + {0.93866944, 0.97767913, 0.34172094}}}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( + Array3D<float, 2, 2, 3>{{{ + {0.83003384, 0.77721894, 0.72857803}, + {0.41057193, 0.12833349, 0.72559172}, + }, + {{0.53438270, 0.69507217, 0.56032562}, + {0.81564975, 0.83322692, 0.37109339}}}}); std::shared_ptr<Node> myErf = Erf(); - auto op = std::static_pointer_cast<OperatorTensor>(myErf -> getOperator()); - op->associateInput(0,input0); + auto op = + std::static_pointer_cast<OperatorTensor>(myErf->getOperator()); + op->associateInput(0, input0); op->setDataType(DataType::Float32); op->setBackend("cpu"); myErf->forward(); - float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); - float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); - for (std::size_t i = 0; i< expectedOutput->size(); ++i) { - REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + float *resPtr = + static_cast<float *>(op->getOutput(0)->getImpl()->rawPtr()); + float *expectedPtr = + static_cast<float *>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i < expectedOutput->size(); ++i) { + REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001); } } } \ No newline at end of file diff --git a/unit_tests/operator/Test_FCImpl.cpp b/unit_tests/operator/Test_FCImpl.cpp index b2566f26d984fb1d89052745ec35870c6b935d48..9d7ca9113baec1855eb8ceac2c0565f64eb35824 100644 --- a/unit_tests/operator/Test_FCImpl.cpp +++ b/unit_tests/operator/Test_FCImpl.cpp @@ -20,91 +20,105 @@ using namespace Aidge; TEST_CASE("[cpu/oeprator] FC(forward)", "[FC][CPU]") { - std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array2D<int, 5, 75>{ - {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}}); - std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}}); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<int, 2, 5>{ - {{23601, 23602, 23603, 23604, 23605}, {68601, 68602, 68603, 68604, 68605}}}); + std::shared_ptr<Tensor> myWeights = + std::make_shared<Tensor>(Array2D<int, 5, 75>{ + {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}}); + std::shared_ptr<Tensor> myBias = + std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}}); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>( + Array2D<int, 2, 5>{{{23601, 23602, 23603, 23604, 23605}, + {68601, 68602, 68603, 68604, 68605}}}); std::shared_ptr<Node> myFC = FC(75, 5, false, "myfc"); - auto op = std::static_pointer_cast<OperatorTensor>(myFC -> getOperator()); - op -> associateInput(1, myWeights); - op -> associateInput(2, myBias); + auto op = std::static_pointer_cast<OperatorTensor>(myFC->getOperator()); + op->associateInput(1, myWeights); + op->associateInput(2, myBias); SECTION("2D input") { - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array2D<int, 2, 75>{ - {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, - 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, - 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74}, - {75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, - 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149}}}); + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(Array2D<int, 2, 75>{ + {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74}, + {75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, + 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, + 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, + 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, + 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, + 141, 142, 143, 144, 145, 146, 147, 148, 149}}}); op->associateInput(0, myInput); - op -> setDataType(DataType::Int32); - op -> setBackend("cpu"); + op->setDataType(DataType::Int32); + op->setBackend("cpu"); myFC->forward(); REQUIRE(*(op->getOutput(0)) == *myOutput); } SECTION("4D input") { - std::shared_ptr<Tensor> myInput = - std::make_shared<Tensor>(Array4D<int, 2, 3, 5, 5>{{{{{0, 1, 2, 3, 4}, - {5, 6, 7, 8, 9}, - {10, 11, 12, 13, 14}, - {15, 16, 17, 18, 19}, - {20, 21, 22, 23, 24}}, - {{25, 26, 27, 28, 29}, - {30, 31, 32, 33, 34}, - {35, 36, 37, 38, 39}, - {40, 41, 42, 43, 44}, - {45, 46, 47, 48, 49}}, - {{50, 51, 52, 53, 54}, - {55, 56, 57, 58, 59}, - {60, 61, 62, 63, 64}, - {65, 66, 67, 68, 69}, - {70, 71, 72, 73, 74}}}, - {{{75, 76, 77, 78, 79}, - {80, 81, 82, 83, 84}, - {85, 86, 87, 88, 89}, - {90, 91, 92, 93, 94}, - {95, 96, 97, 98, 99}}, - {{100, 101, 102, 103, 104}, - {105, 106, 107, 108, 109}, - {110, 111, 112, 113, 114}, - {115, 116, 117, 118, 119}, - {120, 121, 122, 123, 124}}, - {{125, 126, 127, 128, 129}, - {130, 131, 132, 133, 134}, - {135, 136, 137, 138, 139}, - {140, 141, 142, 143, 144}, - {145, 146, 147, 148, 149}}}}}); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array4D<int, 2, 3, 5, 5>{{{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}, + {{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}, + {{50, 51, 52, 53, 54}, + {55, 56, 57, 58, 59}, + {60, 61, 62, 63, 64}, + {65, 66, 67, 68, 69}, + {70, 71, 72, 73, 74}}}, + {{{75, 76, 77, 78, 79}, + {80, 81, 82, 83, 84}, + {85, 86, 87, 88, 89}, + {90, 91, 92, 93, 94}, + {95, 96, 97, 98, 99}}, + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}}}}}); op->associateInput(0, myInput); - op -> setDataType(DataType::Int32); - op -> setBackend("cpu"); + op->setDataType(DataType::Int32); + op->setBackend("cpu"); myFC->forward(); REQUIRE(*(op->getOutput(0)) == *myOutput); } - // std::cout << static_cast<Tensor>((*myFC->getOperator())["weight"])[0][0][0][0] << std::endl; + // std::cout << + // static_cast<Tensor>((*myFC->getOperator())["weight"])[0][0][0][0] << + // std::endl; } \ No newline at end of file diff --git a/unit_tests/operator/Test_FoldImpl.cpp b/unit_tests/operator/Test_FoldImpl.cpp index 6832f5a42d796d9261495794e0758ce1b6df0346..bdd8b88e82f6abe90fad14699f023f6c7a94cd51 100644 --- a/unit_tests/operator/Test_FoldImpl.cpp +++ b/unit_tests/operator/Test_FoldImpl.cpp @@ -15,154 +15,111 @@ #include "aidge/data/Tensor.hpp" #include "aidge/graph/GraphView.hpp" -#include "aidge/scheduler/SequentialScheduler.hpp" #include "aidge/operator/Fold.hpp" -#include "aidge/operator/Unfold.hpp" #include "aidge/operator/MatMul.hpp" #include "aidge/operator/Reshape.hpp" +#include "aidge/operator/Unfold.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" #include "aidge/backend/cpu.hpp" using namespace Aidge; TEST_CASE("[cpu/operator] Fold(forward)", "[Fold][CPU]") { - std::shared_ptr<Node> myUnfold = Unfold({3,3}, "myunfold"); + std::shared_ptr<Node> myUnfold = Unfold({3, 3}, "myunfold"); std::shared_ptr<Node> myReshape = Reshape({4, 27}, "myreshape"); std::shared_ptr<Node> myMatMul = MatMul("mymatmul"); - std::shared_ptr<Node> myFold = Fold({3,3}, {1,1}, "myfold"); + std::shared_ptr<Node> myFold = Fold({3, 3}, {1, 1}, "myfold"); myUnfold->addChild(myMatMul, 0, 1); myReshape->addChild(myMatMul, 0, 0); myMatMul->addChild(myFold, 0, 0); - std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> { - { - { - {{ 0, 1, 2}, - { 3, 4, 5}, - { 6, 7, 8}}, - {{ 9, 10, 11}, - { 12, 13, 14}, - { 15, 16, 17}}, - {{ 18, 19, 20}, - { 21, 22, 23}, - { 24, 25, 26}} - }, - { - {{ 27, 28, 29}, - { 30, 31, 32}, - { 33, 34, 35}}, - {{ 36, 37, 38}, - { 39, 40, 41}, - { 42, 43, 44}}, - {{ 45, 46, 47}, - { 48, 49, 50}, - { 51, 52, 53}} - }, - { - {{ 54, 55, 56}, - { 57, 58, 59}, - { 60, 61, 62}}, - {{ 63, 64, 65}, - { 66, 67, 68}, - { 69, 70, 71}}, - {{ 72, 73, 74}, - { 75, 76, 77}, - { 78, 79, 80}} - }, - { - {{ 81, 82, 83}, - { 84, 85, 86}, - { 87, 88, 89}}, - {{ 90, 91, 92}, - { 93, 94, 95}, - { 96, 97, 98}}, - {{ 99, 100, 101}, - {102, 103, 104}, - {105, 106, 107}} - } - } - }); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW - { - { - {{ 0, 1, 2, 3, 4}, - { 5, 6, 7, 8, 9}, - { 10, 11, 12, 13, 14}, - { 15, 16, 17, 18, 19}, - { 20, 21, 22, 23, 24}}, + std::shared_ptr<Tensor> myWeights = + std::make_shared<Tensor>(Array4D<int, 4, 3, 3, 3>{ + {{{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}}, + {{9, 10, 11}, {12, 13, 14}, {15, 16, 17}}, + {{18, 19, 20}, {21, 22, 23}, {24, 25, 26}}}, + {{{27, 28, 29}, {30, 31, 32}, {33, 34, 35}}, + {{36, 37, 38}, {39, 40, 41}, {42, 43, 44}}, + {{45, 46, 47}, {48, 49, 50}, {51, 52, 53}}}, + {{{54, 55, 56}, {57, 58, 59}, {60, 61, 62}}, + {{63, 64, 65}, {66, 67, 68}, {69, 70, 71}}, + {{72, 73, 74}, {75, 76, 77}, {78, 79, 80}}}, + {{{81, 82, 83}, {84, 85, 86}, {87, 88, 89}}, + {{90, 91, 92}, {93, 94, 95}, {96, 97, 98}}, + {{99, 100, 101}, {102, 103, 104}, {105, 106, 107}}}}}); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array4D<int, 2, 3, 5, 5>{// NCHW + {{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}, - {{ 25, 26, 27, 28, 29}, - { 30, 31, 32, 33, 34}, - { 35, 36, 37, 38, 39}, - { 40, 41, 42, 43, 44}, - { 45, 46, 47, 48, 49}}, + {{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}, - {{ 50, 51, 52, 53, 54}, - { 55, 56, 57, 58, 59}, - { 60, 61, 62, 63, 64}, - { 65, 66, 67, 68, 69}, - { 70, 71, 72, 73, 74}} - }, - { - {{ 75, 76, 77, 78, 79}, - { 80, 81, 82, 83, 84}, - { 85, 86, 87, 88, 89}, - { 90, 91, 92, 93, 94}, - { 95, 96, 97, 98, 99}}, + {{50, 51, 52, 53, 54}, + {55, 56, 57, 58, 59}, + {60, 61, 62, 63, 64}, + {65, 66, 67, 68, 69}, + {70, 71, 72, 73, 74}}}, + {{{75, 76, 77, 78, 79}, + {80, 81, 82, 83, 84}, + {85, 86, 87, 88, 89}, + {90, 91, 92, 93, 94}, + {95, 96, 97, 98, 99}}, - {{100, 101, 102, 103, 104}, - {105, 106, 107, 108, 109}, - {110, 111, 112, 113, 114}, - {115, 116, 117, 118, 119}, - {120, 121, 122, 123, 124}}, + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, - {{125, 126, 127, 128, 129}, - {130, 131, 132, 133, 134}, - {135, 136, 137, 138, 139}, - {140, 141, 142, 143, 144}, - {145, 146, 147, 148, 149}} - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> { - { - { - {{ 15219, 15570, 15921}, - { 16974, 17325, 17676}, - { 18729, 19080, 19431}}, - {{ 37818, 38898, 39978}, - { 43218, 44298, 45378}, - { 48618, 49698, 50778}}, - {{ 60417, 62226, 64035}, - { 69462, 71271, 73080}, - { 78507, 80316, 82125}}, - {{ 83016, 85554, 88092}, - { 95706, 98244, 100782}, - { 108396, 110934, 113472}} - }, - { - {{ 41544, 41895, 42246}, - { 43299, 43650, 44001}, - { 45054, 45405, 45756}}, - {{ 118818, 119898, 120978}, - { 124218, 125298, 126378}, - { 129618, 130698, 131778}}, - {{ 196092, 197901, 199710}, - { 205137, 206946, 208755}, - { 214182, 215991, 217800}}, - {{ 273366, 275904, 278442}, - { 286056, 288594, 291132}, - { 298746, 301284, 303822}} - } - } - }); + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}}}}}); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>( + Array4D<int, 2, 4, 3, 3>{{{{{15219, 15570, 15921}, + {16974, 17325, 17676}, + {18729, 19080, 19431}}, + {{37818, 38898, 39978}, + {43218, 44298, 45378}, + {48618, 49698, 50778}}, + {{60417, 62226, 64035}, + {69462, 71271, 73080}, + {78507, 80316, 82125}}, + {{83016, 85554, 88092}, + {95706, 98244, 100782}, + {108396, 110934, 113472}}}, + {{{41544, 41895, 42246}, + {43299, 43650, 44001}, + {45054, 45405, 45756}}, + {{118818, 119898, 120978}, + {124218, 125298, 126378}, + {129618, 130698, 131778}}, + {{196092, 197901, 199710}, + {205137, 206946, 208755}, + {214182, 215991, 217800}}, + {{273366, 275904, 278442}, + {286056, 288594, 291132}, + {298746, 301284, 303822}}}}}); - auto opUnfold = std::static_pointer_cast<OperatorTensor>(myUnfold -> getOperator()); - auto opReshape = std::static_pointer_cast<OperatorTensor>(myReshape -> getOperator()); - auto opMatMul = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator()); - auto opFold = std::static_pointer_cast<OperatorTensor>(myFold -> getOperator()); - opUnfold->associateInput(0,myInput); - opReshape->associateInput(0,myWeights); + auto opUnfold = + std::static_pointer_cast<OperatorTensor>(myUnfold->getOperator()); + auto opReshape = + std::static_pointer_cast<OperatorTensor>(myReshape->getOperator()); + auto opMatMul = + std::static_pointer_cast<OperatorTensor>(myMatMul->getOperator()); + auto opFold = + std::static_pointer_cast<OperatorTensor>(myFold->getOperator()); + opUnfold->associateInput(0, myInput); + opReshape->associateInput(0, myWeights); auto g = getConnectedGraphView(myMatMul); g->setDataType(DataType::Int32); @@ -173,6 +130,6 @@ TEST_CASE("[cpu/operator] Fold(forward)", "[Fold][CPU]") { SequentialScheduler scheduler(g); scheduler.forward(); - //opFold->getOutput(0)->print(); + // opFold->getOutput(0)->print(); REQUIRE(*(opFold->getOutput(0)) == *myOutput); } \ No newline at end of file diff --git a/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp index d5f2065b624de431b43edef9a83bf079905129dd..9e6d93bf7d4129dc5055f7e901c1fde77da90987 100644 --- a/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp +++ b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp @@ -27,539 +27,584 @@ // debug print function void print_tensor(Aidge::Tensor &T) { - // Print tensors - std::cout << "Tensor : size =  ["; - for (auto &dim : T.dims()) { - std::cout << dim << " , "; - } - std::cout << "]" << std::endl; - T.print(); + // Print tensors + std::cout << "Tensor : size =  ["; + for (auto &dim : T.dims()) { + std::cout << dim << " , "; + } + std::cout << "]" << std::endl; + T.print(); } namespace Aidge { TEST_CASE("[cpu/operator] GlobalAveragePooling", "[GlobalAveragePooling][CPU]") { - constexpr std::uint16_t NBTRIALS = 10; - // Create a random number generator - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_real_distribution<float> valueDist( - 0.1f, 1.1f); // Random float distribution between 0 and 1 - std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), - std::size_t(10)); - - std::uniform_int_distribution<std::size_t> nbLowDimsDist(std::size_t(1), - std::size_t(2)); - std::uniform_int_distribution<std::size_t> nbHighDimsDist(std::size_t(3), - std::size_t(7)); - - // Create MatGlobalAveragePooling Operator - std::shared_ptr<Node> globAvgPool = GlobalAveragePooling(); - auto op = - std::static_pointer_cast<OperatorTensor>(globAvgPool->getOperator()); - op->setDataType(DataType::Float32); - op->setBackend("cpu"); - - // Create the input Tensor - std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>(); - op->associateInput(0, T0); - T0->setDataType(DataType::Float32); - T0->setBackend("cpu"); - - // Create results Tensor - std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(); - Tres->setDataType(DataType::Float32); - Tres->setBackend("cpu"); - - // To measure execution time of 'MatGlobalAveragePooling_Op::forward()' member - // function call - std::chrono::time_point<std::chrono::system_clock> start; - std::chrono::time_point<std::chrono::system_clock> end; - std::chrono::duration<double, std::micro> duration{}; - int number_of_operation{0}; - - SECTION("GlobalAveragePoolingImpl_cpu::forward()") { - SECTION( - "1-2Dim > not enough dimensions leads to function throwing an error") { - // generate a random tensors - const std::size_t nbDims = nbLowDimsDist(gen); - std::vector<std::size_t> dims; - for (std::size_t i = 0; i < nbDims; ++i) { - dims.push_back(dimSizeDist(gen)); - } - const std::size_t nb_elements = - std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), - std::multiplies<std::size_t>()); - - float *array0 = new float[nb_elements]; - for (std::size_t i = 0; i < nb_elements; ++i) { - array0[i] = valueDist(gen); - } - // input0 - T0->resize(dims); - T0->getImpl()->setRawPtr(array0, nb_elements); - - REQUIRE_THROWS(globAvgPool->forward()); - delete[] array0; - } - - SECTION("3+Dim") { - SECTION("Fill a tensor with all values set as N will result with every " - "output being N") { - // generate the tensor - const std::size_t nbDims = nbHighDimsDist(gen); - std::vector<std::size_t> dims_in; - for (std::size_t i = 0; i < nbDims; ++i) { - dims_in.push_back(dimSizeDist(gen)); - } - // create in nb_elems - const std::size_t in_nb_elems = - std::accumulate(dims_in.cbegin(), dims_in.cend(), std::size_t(1), - std::multiplies<std::size_t>()); - const DimSize_t in_batch_nb_elems = in_nb_elems / dims_in[0]; - const DimSize_t in_channel_nb_elems = in_batch_nb_elems / dims_in[1]; - - number_of_operation += - in_nb_elems + - dims_in[1]; // averaging per channel : 1 addition per element in - // the channel + 1 division this for every batch - // create out nb_elems - std::vector<std::size_t> dims_out{dims_in[0], dims_in[1]}; - const std::size_t out_nb_elems = - std::accumulate(dims_out.cbegin(), dims_out.cend(), std::size_t(1), - std::multiplies<std::size_t>()); - const DimSize_t out_batch_nb_elems = out_nb_elems / dims_out[0]; - - // iterate over each batch/channel - float *array0 = new float[in_nb_elems]; - float *result = new float[out_nb_elems]; - float val = valueDist(gen); - for (std::size_t batch = 0; batch < dims_in[0]; ++batch) { - for (std::size_t channel = 0; channel < dims_in[1]; ++channel) { - for (std::size_t i = 0; i < in_channel_nb_elems; ++i) - - { - array0[batch * in_batch_nb_elems + channel * in_channel_nb_elems + - i] = val; + constexpr std::uint16_t NBTRIALS = 10; + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> valueDist( + 0.1f, + 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), + std::size_t(10)); + + std::uniform_int_distribution<std::size_t> nbLowDimsDist(std::size_t(1), + std::size_t(2)); + std::uniform_int_distribution<std::size_t> nbHighDimsDist(std::size_t(3), + std::size_t(7)); + + // Create MatGlobalAveragePooling Operator + std::shared_ptr<Node> globAvgPool = GlobalAveragePooling(); + auto op = + std::static_pointer_cast<OperatorTensor>(globAvgPool->getOperator()); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + // Create the input Tensor + std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>(); + op->associateInput(0, T0); + T0->setDataType(DataType::Float32); + T0->setBackend("cpu"); + + // Create results Tensor + std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(); + Tres->setDataType(DataType::Float32); + Tres->setBackend("cpu"); + + // To measure execution time of 'MatGlobalAveragePooling_Op::forward()' + // member function call + std::chrono::time_point<std::chrono::system_clock> start; + std::chrono::time_point<std::chrono::system_clock> end; + std::chrono::duration<double, std::micro> duration{}; + int number_of_operation{0}; + + SECTION("GlobalAveragePoolingImpl_cpu::forward()") { + SECTION("1-2Dim > not enough dimensions leads to function throwing an " + "error") { + // generate a random tensors + const std::size_t nbDims = nbLowDimsDist(gen); + std::vector<std::size_t> dims; + for (std::size_t i = 0; i < nbDims; ++i) { + dims.push_back(dimSizeDist(gen)); } - result[batch * out_batch_nb_elems + channel] = val; - } - } - - // input0 - T0->resize(dims_in); - T0->getImpl()->setRawPtr(array0, in_nb_elems); - - // results - Tres->resize(dims_out); - Tres->getImpl()->setRawPtr(result, out_nb_elems); - - op->forwardDims(); - start = std::chrono::system_clock::now(); - REQUIRE_NOTHROW(globAvgPool->forward()); - end = std::chrono::system_clock::now(); - duration += - std::chrono::duration_cast<std::chrono::microseconds>(end - start); + const std::size_t nb_elements = + std::accumulate(dims.cbegin(), + dims.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); + + float *array0 = new float[nb_elements]; + for (std::size_t i = 0; i < nb_elements; ++i) { + array0[i] = valueDist(gen); + } + // input0 + T0->resize(dims); + T0->getImpl()->setRawPtr(array0, nb_elements); - REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims()); - for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) { - REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i)); + REQUIRE_THROWS(globAvgPool->forward()); + delete[] array0; } - REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); - - delete[] array0; - delete[] result; - } - - SECTION("random testing") { - for (int trial = 0; trial < NBTRIALS; ++trial) { - // generate the tensor - const std::size_t nbDims = nbHighDimsDist(gen); - std::vector<std::size_t> dims_in; - for (std::size_t i = 0; i < nbDims; ++i) { - dims_in.push_back(dimSizeDist(gen)); - } - // create in nb_elems - const std::size_t in_nb_elems = - std::accumulate(dims_in.cbegin(), dims_in.cend(), std::size_t(1), - std::multiplies<std::size_t>()); - const DimSize_t in_batch_nb_elems = in_nb_elems / dims_in[0]; - const DimSize_t in_channel_nb_elems = in_batch_nb_elems / dims_in[1]; - number_of_operation += - in_nb_elems + - dims_in[1]; // averaging per channel : 1 addition per element in - // the channel + 1 division this for every batch - - // create out nb_elems - std::vector<std::size_t> dims_out{dims_in[0], dims_in[1]}; - const std::size_t out_nb_elems = - std::accumulate(dims_out.cbegin(), dims_out.cend(), - std::size_t(1), std::multiplies<std::size_t>()); - const DimSize_t out_batch_nb_elems = out_nb_elems / dims_out[0]; - - // iterate over each batch/channel - float *array0 = new float[in_nb_elems]; - float *result = new float[out_nb_elems]; - for (std::size_t batch = 0; batch < dims_in[0]; ++batch) { - for (std::size_t channel = 0; channel < dims_in[1]; ++channel) { - float channel_sum = 0; - for (std::size_t i = 0; i < in_channel_nb_elems; ++i) - - { + SECTION("3+Dim") { + SECTION("Fill a tensor with all values set as N will result with " + "every " + "output being N") { + // generate the tensor + const std::size_t nbDims = nbHighDimsDist(gen); + std::vector<std::size_t> dims_in; + for (std::size_t i = 0; i < nbDims; ++i) { + dims_in.push_back(dimSizeDist(gen)); + } + // create in nb_elems + const std::size_t in_nb_elems = + std::accumulate(dims_in.cbegin(), + dims_in.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); + const DimSize_t in_batch_nb_elems = in_nb_elems / dims_in[0]; + const DimSize_t in_channel_nb_elems = + in_batch_nb_elems / dims_in[1]; + + number_of_operation += + in_nb_elems + + dims_in[1]; // averaging per channel : 1 addition per + // element in the channel + 1 division this + // for every batch + // create out nb_elems + std::vector<std::size_t> dims_out{dims_in[0], dims_in[1]}; + const std::size_t out_nb_elems = + std::accumulate(dims_out.cbegin(), + dims_out.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); + const DimSize_t out_batch_nb_elems = + out_nb_elems / dims_out[0]; + + // iterate over each batch/channel + float *array0 = new float[in_nb_elems]; + float *result = new float[out_nb_elems]; float val = valueDist(gen); - array0[batch * in_batch_nb_elems + - channel * in_channel_nb_elems + i] = val; - channel_sum += val; - } - result[batch * out_batch_nb_elems + channel] = - channel_sum / in_channel_nb_elems; + for (std::size_t batch = 0; batch < dims_in[0]; ++batch) { + for (std::size_t channel = 0; channel < dims_in[1]; + ++channel) { + for (std::size_t i = 0; i < in_channel_nb_elems; ++i) + + { + array0[batch * in_batch_nb_elems + + channel * in_channel_nb_elems + i] = val; + } + result[batch * out_batch_nb_elems + channel] = val; + } + } + + // input0 + T0->resize(dims_in); + T0->getImpl()->setRawPtr(array0, in_nb_elems); + + // results + Tres->resize(dims_out); + Tres->getImpl()->setRawPtr(result, out_nb_elems); + + op->forwardDims(); + start = std::chrono::system_clock::now(); + REQUIRE_NOTHROW(globAvgPool->forward()); + end = std::chrono::system_clock::now(); + duration += + std::chrono::duration_cast<std::chrono::microseconds>( + end - start); + + REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims()); + for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) { + REQUIRE(Tres->dims().at(i) == + op->getOutput(0)->dims().at(i)); + } + + REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); + + delete[] array0; + delete[] result; } - } - // input0 - T0->resize(dims_in); - T0->getImpl()->setRawPtr(array0, in_nb_elems); - - // results - Tres->resize(dims_out); - Tres->getImpl()->setRawPtr(result, out_nb_elems); - - op->forwardDims(); - start = std::chrono::system_clock::now(); - REQUIRE_NOTHROW(globAvgPool->forward()); - end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>( - end - start); - - REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims()); - for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) { - REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i)); - } - - REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres, 1e-4f)); - - delete[] array0; - delete[] result; - } - } - SECTION("Using result from a pytorch function as groundtruth") { - DimSize_t batch_size = 2; - DimSize_t channels = 3; - DimSize_t height = 4; - DimSize_t width = 3; - DimSize_t depth = 2; - - SECTION("2D_img") { - const std::vector<DimSize_t> in_dims{batch_size, channels, height, - width}; - const std::vector<DimSize_t> out_dims{batch_size, channels}; - DimSize_t in_nb_elems = batch_size * channels * height * width; - DimSize_t out_nb_elems = batch_size * channels; - number_of_operation += - in_nb_elems + - channels; // averaging per channel : 1 addition per element in - // the channel + 1 division this for every batch - auto input = new float[in_nb_elems]; - auto result = new float[out_nb_elems]; - input[0] = 0.1807716; - input[1] = -0.0699881; - input[2] = -0.3596235; - input[3] = -0.9152045; - input[4] = 0.6257653; - input[5] = 0.0255099; - input[6] = 0.9545137; - input[7] = 0.0643485; - input[8] = 0.3611506; - input[9] = 1.1678782; - input[10] = -1.3498932; - input[11] = -0.5101767; - input[12] = 0.2359577; - input[13] = -0.2397784; - input[14] = -0.9211147; - input[15] = 1.5432971; - input[16] = 1.3488258; - input[17] = -0.1396417; - input[18] = 0.2857972; - input[19] = 0.9651205; - input[20] = -2.0371499; - input[21] = 0.4931363; - input[22] = 1.4869986; - input[23] = 0.5910330; - input[24] = 0.1260297; - input[25] = -1.5626874; - input[26] = -1.1601028; - input[27] = -0.3348408; - input[28] = 0.4477722; - input[29] = -0.8016447; - input[30] = 1.5236114; - input[31] = 2.5085869; - input[32] = -0.6630959; - input[33] = -0.2512752; - input[34] = 1.0101448; - input[35] = 0.1215468; - input[36] = 0.1583993; - input[37] = 1.1340188; - input[38] = -1.1538976; - input[39] = -0.2983968; - input[40] = -0.5075365; - input[41] = -0.9239212; - input[42] = 0.5467061; - input[43] = -1.4947776; - input[44] = -1.2057148; - input[45] = 0.5718198; - input[46] = -0.5973545; - input[47] = -0.6936757; - input[48] = 1.6455388; - input[49] = -0.8029931; - input[50] = 1.3514109; - input[51] = -0.2759193; - input[52] = -1.5108346; - input[53] = 2.1047730; - input[54] = 2.7629590; - input[55] = -1.7465292; - input[56] = 0.8353187; - input[57] = -1.9560477; - input[58] = -0.8002653; - input[59] = -0.5044988; - input[60] = -0.0711742; - input[61] = -0.5130699; - input[62] = -1.0307810; - input[63] = 0.9154347; - input[64] = -0.2282317; - input[65] = -0.6884708; - input[66] = 0.1832259; - input[67] = 0.6003584; - input[68] = -1.5429375; - input[69] = -0.3465560; - input[70] = -0.1476223; - input[71] = 0.6469797; - - result[0] = 0.0145876; - result[1] = 0.3010401; - result[2] = 0.0803371; - - result[3] = -0.3720275; - result[4] = 0.0919094; - result[5] = -0.1852371; - - // input0 - T0->resize(in_dims); - T0->getImpl()->setRawPtr(input, in_nb_elems); - - // results - Tres->resize(out_dims); - Tres->getImpl()->setRawPtr(result, out_nb_elems); - op->forwardDims(); - start = std::chrono::system_clock::now(); - REQUIRE_NOTHROW(globAvgPool->forward()); - end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>( - end - start); - - REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims()); - for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) { - REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i)); - } - REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); - delete[] input; - delete[] result; - } - SECTION("3D_img") { - const std::vector<DimSize_t> in_dims{batch_size, channels, height, - width, depth}; - const std::vector<DimSize_t> out_dims{batch_size, channels}; - DimSize_t in_nb_elems = - batch_size * channels * height * width * depth; - number_of_operation += - in_nb_elems + - channels; // averaging per channel : 1 addition per element in - // the channel + 1 division this for every batch - DimSize_t out_nb_elems = batch_size * channels; - auto input = new float[in_nb_elems]; - auto result = new float[out_nb_elems]; - input[0] = 0.0061403; - input[1] = -0.9665052; - input[2] = 0.3582928; - input[3] = 0.1072854; - input[4] = 1.2463317; - input[5] = 1.2460036; - input[6] = 0.3534451; - input[7] = 0.9425349; - input[8] = -0.2103887; - input[9] = -0.7959853; - input[10] = 0.1297970; - input[11] = -1.9445597; - input[12] = 0.0609514; - input[13] = -0.2379328; - input[14] = 1.9020044; - input[15] = -1.1762751; - input[16] = 0.3404147; - input[17] = 1.1685153; - input[18] = -0.6526139; - input[19] = 0.3767620; - input[20] = 0.1887376; - input[21] = 0.5154487; - input[22] = 0.6371427; - input[23] = -0.3948864; - input[24] = -1.1571540; - input[25] = 0.2896117; - input[26] = 0.6163548; - input[27] = -0.4370409; - input[28] = 0.6589766; - input[29] = 0.6587803; - input[30] = -1.3702172; - input[31] = -1.6210355; - input[32] = 0.5872851; - input[33] = 0.2860694; - input[34] = 0.0082870; - input[35] = -0.2523253; - input[36] = -1.3247224; - input[37] = 0.1891782; - input[38] = 0.0211001; - input[39] = 0.9404197; - input[40] = -0.5576900; - input[41] = -0.6939272; - input[42] = -0.3252473; - input[43] = 1.2439330; - input[44] = -1.1671864; - input[45] = -0.4091243; - input[46] = 1.2600617; - input[47] = -1.5630058; - input[48] = 1.1346143; - input[49] = -0.0823837; - input[50] = 0.2893163; - input[51] = 0.8357732; - input[52] = -0.2449911; - input[53] = 0.2712233; - input[54] = 0.0936364; - input[55] = -0.8834321; - input[56] = -0.3274170; - input[57] = 0.0783938; - input[58] = -0.3807656; - input[59] = 0.3775077; - input[60] = 0.1119123; - input[61] = 2.3142793; - input[62] = -0.7989057; - input[63] = -0.5643027; - input[64] = -1.1346605; - input[65] = 0.1705271; - input[66] = 0.9946650; - input[67] = 1.2625724; - input[68] = 1.6218156; - input[69] = 1.0774711; - input[70] = 0.5947813; - input[71] = -1.5290873; - input[72] = 2.0437069; - input[73] = -0.1656267; - input[74] = 0.0870704; - input[75] = -0.5276564; - input[76] = -0.1002882; - input[77] = 1.0539219; - input[78] = -0.6230739; - input[79] = -1.5905718; - input[80] = -0.9741858; - input[81] = -0.1869211; - input[82] = 0.5816050; - input[83] = -2.6339815; - input[84] = -1.0764544; - input[85] = 2.5903966; - input[86] = 0.4940658; - input[87] = 0.4671729; - input[88] = 0.6588292; - input[89] = -0.7257792; - input[90] = 1.4280071; - input[91] = -1.2187740; - input[92] = 0.7380729; - input[93] = -1.1599953; - input[94] = -1.4355115; - input[95] = -1.5304037; - input[96] = 0.8474578; - input[97] = 0.0774260; - input[98] = 0.5433396; - input[99] = -0.8438400; - input[100] = -0.1089903; - input[101] = -0.6354192; - input[102] = 0.8772392; - input[103] = 0.2844733; - input[104] = 0.0975270; - input[105] = -0.9785872; - input[106] = -0.4320499; - input[107] = -1.4937501; - input[108] = -2.0644901; - input[109] = 0.0851217; - input[110] = 0.6644159; - input[111] = 0.4168026; - input[112] = 0.0958830; - input[113] = -1.5699565; - input[114] = 0.3739572; - input[115] = -0.1420672; - input[116] = -0.7864021; - input[117] = 0.2443752; - input[118] = -0.9811850; - input[119] = -0.0698569; - input[120] = 0.1463890; - input[121] = 0.2536245; - input[122] = 0.2136150; - input[123] = 0.3113698; - input[124] = 1.8353856; - input[125] = 1.4473228; - input[126] = -0.7373698; - input[127] = 0.2485314; - input[128] = -0.4789796; - input[129] = -0.3396149; - input[130] = 0.6438198; - input[131] = 0.7287521; - input[132] = -1.5119252; - input[133] = -0.1006494; - input[134] = 1.8955028; - input[135] = 1.0871323; - input[136] = 0.3620502; - input[137] = -0.8826663; - input[138] = 1.2220223; - input[139] = -1.2817260; - input[140] = 1.4153577; - input[141] = 0.4148015; - input[142] = 1.3458617; - input[143] = 1.9718349; - - result[0] = 0.1333608; - result[1] = -0.1716091; - result[2] = 0.2201060; - result[3] = -0.1585989; - result[4] = -0.2291074; - result[5] = 0.4254351; - - // input0 - T0->resize(in_dims); - T0->getImpl()->setRawPtr(input, in_nb_elems); - - // results - Tres->resize(out_dims); - Tres->getImpl()->setRawPtr(result, out_nb_elems); - op->forwardDims(); - start = std::chrono::system_clock::now(); - REQUIRE_NOTHROW(globAvgPool->forward()); - end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>( - end - start); - - REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims()); - for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) { - REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i)); - } - REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); - delete[] input; - delete[] result; + SECTION("random testing") { + for (int trial = 0; trial < NBTRIALS; ++trial) { + // generate the tensor + const std::size_t nbDims = nbHighDimsDist(gen); + std::vector<std::size_t> dims_in; + for (std::size_t i = 0; i < nbDims; ++i) { + dims_in.push_back(dimSizeDist(gen)); + } + // create in nb_elems + const std::size_t in_nb_elems = + std::accumulate(dims_in.cbegin(), + dims_in.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); + const DimSize_t in_batch_nb_elems = + in_nb_elems / dims_in[0]; + const DimSize_t in_channel_nb_elems = + in_batch_nb_elems / dims_in[1]; + number_of_operation += + in_nb_elems + + dims_in[1]; // averaging per channel : 1 addition per + // element in the channel + 1 division + // this for every batch + + // create out nb_elems + std::vector<std::size_t> dims_out{dims_in[0], dims_in[1]}; + const std::size_t out_nb_elems = + std::accumulate(dims_out.cbegin(), + dims_out.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); + const DimSize_t out_batch_nb_elems = + out_nb_elems / dims_out[0]; + + // iterate over each batch/channel + float *array0 = new float[in_nb_elems]; + float *result = new float[out_nb_elems]; + for (std::size_t batch = 0; batch < dims_in[0]; ++batch) { + for (std::size_t channel = 0; channel < dims_in[1]; + ++channel) { + float channel_sum = 0; + for (std::size_t i = 0; i < in_channel_nb_elems; + ++i) + + { + float val = valueDist(gen); + array0[batch * in_batch_nb_elems + + channel * in_channel_nb_elems + i] = + val; + channel_sum += val; + } + result[batch * out_batch_nb_elems + channel] = + channel_sum / in_channel_nb_elems; + } + } + + // input0 + T0->resize(dims_in); + T0->getImpl()->setRawPtr(array0, in_nb_elems); + + // results + Tres->resize(dims_out); + Tres->getImpl()->setRawPtr(result, out_nb_elems); + + op->forwardDims(); + start = std::chrono::system_clock::now(); + REQUIRE_NOTHROW(globAvgPool->forward()); + end = std::chrono::system_clock::now(); + duration += + std::chrono::duration_cast<std::chrono::microseconds>( + end - start); + + REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims()); + for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); + ++i) { + REQUIRE(Tres->dims().at(i) == + op->getOutput(0)->dims().at(i)); + } + + REQUIRE( + approxEq<float>(*(op->getOutput(0)), *Tres, 1e-4f)); + + delete[] array0; + delete[] result; + } + } + SECTION("Using result from a pytorch function as groundtruth") { + DimSize_t batch_size = 2; + DimSize_t channels = 3; + DimSize_t height = 4; + DimSize_t width = 3; + DimSize_t depth = 2; + + SECTION("2D_img") { + const std::vector<DimSize_t> in_dims{batch_size, + channels, + height, + width}; + const std::vector<DimSize_t> out_dims{batch_size, + channels}; + DimSize_t in_nb_elems = + batch_size * channels * height * width; + DimSize_t out_nb_elems = batch_size * channels; + number_of_operation += + in_nb_elems + + channels; // averaging per channel : 1 addition per + // element in the channel + 1 division this + // for every batch + auto input = new float[in_nb_elems]; + auto result = new float[out_nb_elems]; + input[0] = 0.1807716; + input[1] = -0.0699881; + input[2] = -0.3596235; + input[3] = -0.9152045; + input[4] = 0.6257653; + input[5] = 0.0255099; + input[6] = 0.9545137; + input[7] = 0.0643485; + input[8] = 0.3611506; + input[9] = 1.1678782; + input[10] = -1.3498932; + input[11] = -0.5101767; + input[12] = 0.2359577; + input[13] = -0.2397784; + input[14] = -0.9211147; + input[15] = 1.5432971; + input[16] = 1.3488258; + input[17] = -0.1396417; + input[18] = 0.2857972; + input[19] = 0.9651205; + input[20] = -2.0371499; + input[21] = 0.4931363; + input[22] = 1.4869986; + input[23] = 0.5910330; + input[24] = 0.1260297; + input[25] = -1.5626874; + input[26] = -1.1601028; + input[27] = -0.3348408; + input[28] = 0.4477722; + input[29] = -0.8016447; + input[30] = 1.5236114; + input[31] = 2.5085869; + input[32] = -0.6630959; + input[33] = -0.2512752; + input[34] = 1.0101448; + input[35] = 0.1215468; + input[36] = 0.1583993; + input[37] = 1.1340188; + input[38] = -1.1538976; + input[39] = -0.2983968; + input[40] = -0.5075365; + input[41] = -0.9239212; + input[42] = 0.5467061; + input[43] = -1.4947776; + input[44] = -1.2057148; + input[45] = 0.5718198; + input[46] = -0.5973545; + input[47] = -0.6936757; + input[48] = 1.6455388; + input[49] = -0.8029931; + input[50] = 1.3514109; + input[51] = -0.2759193; + input[52] = -1.5108346; + input[53] = 2.1047730; + input[54] = 2.7629590; + input[55] = -1.7465292; + input[56] = 0.8353187; + input[57] = -1.9560477; + input[58] = -0.8002653; + input[59] = -0.5044988; + input[60] = -0.0711742; + input[61] = -0.5130699; + input[62] = -1.0307810; + input[63] = 0.9154347; + input[64] = -0.2282317; + input[65] = -0.6884708; + input[66] = 0.1832259; + input[67] = 0.6003584; + input[68] = -1.5429375; + input[69] = -0.3465560; + input[70] = -0.1476223; + input[71] = 0.6469797; + + result[0] = 0.0145876; + result[1] = 0.3010401; + result[2] = 0.0803371; + + result[3] = -0.3720275; + result[4] = 0.0919094; + result[5] = -0.1852371; + + // input0 + T0->resize(in_dims); + T0->getImpl()->setRawPtr(input, in_nb_elems); + + // results + Tres->resize(out_dims); + Tres->getImpl()->setRawPtr(result, out_nb_elems); + op->forwardDims(); + start = std::chrono::system_clock::now(); + REQUIRE_NOTHROW(globAvgPool->forward()); + end = std::chrono::system_clock::now(); + duration += + std::chrono::duration_cast<std::chrono::microseconds>( + end - start); + + REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims()); + for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); + ++i) { + REQUIRE(Tres->dims().at(i) == + op->getOutput(0)->dims().at(i)); + } + REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); + delete[] input; + delete[] result; + } + SECTION("3D_img") { + const std::vector<DimSize_t> in_dims{batch_size, + channels, + height, + width, + depth}; + const std::vector<DimSize_t> out_dims{batch_size, + channels}; + DimSize_t in_nb_elems = + batch_size * channels * height * width * depth; + number_of_operation += + in_nb_elems + + channels; // averaging per channel : 1 addition per + // element in the channel + 1 division this + // for every batch + DimSize_t out_nb_elems = batch_size * channels; + auto input = new float[in_nb_elems]; + auto result = new float[out_nb_elems]; + input[0] = 0.0061403; + input[1] = -0.9665052; + input[2] = 0.3582928; + input[3] = 0.1072854; + input[4] = 1.2463317; + input[5] = 1.2460036; + input[6] = 0.3534451; + input[7] = 0.9425349; + input[8] = -0.2103887; + input[9] = -0.7959853; + input[10] = 0.1297970; + input[11] = -1.9445597; + input[12] = 0.0609514; + input[13] = -0.2379328; + input[14] = 1.9020044; + input[15] = -1.1762751; + input[16] = 0.3404147; + input[17] = 1.1685153; + input[18] = -0.6526139; + input[19] = 0.3767620; + input[20] = 0.1887376; + input[21] = 0.5154487; + input[22] = 0.6371427; + input[23] = -0.3948864; + input[24] = -1.1571540; + input[25] = 0.2896117; + input[26] = 0.6163548; + input[27] = -0.4370409; + input[28] = 0.6589766; + input[29] = 0.6587803; + input[30] = -1.3702172; + input[31] = -1.6210355; + input[32] = 0.5872851; + input[33] = 0.2860694; + input[34] = 0.0082870; + input[35] = -0.2523253; + input[36] = -1.3247224; + input[37] = 0.1891782; + input[38] = 0.0211001; + input[39] = 0.9404197; + input[40] = -0.5576900; + input[41] = -0.6939272; + input[42] = -0.3252473; + input[43] = 1.2439330; + input[44] = -1.1671864; + input[45] = -0.4091243; + input[46] = 1.2600617; + input[47] = -1.5630058; + input[48] = 1.1346143; + input[49] = -0.0823837; + input[50] = 0.2893163; + input[51] = 0.8357732; + input[52] = -0.2449911; + input[53] = 0.2712233; + input[54] = 0.0936364; + input[55] = -0.8834321; + input[56] = -0.3274170; + input[57] = 0.0783938; + input[58] = -0.3807656; + input[59] = 0.3775077; + input[60] = 0.1119123; + input[61] = 2.3142793; + input[62] = -0.7989057; + input[63] = -0.5643027; + input[64] = -1.1346605; + input[65] = 0.1705271; + input[66] = 0.9946650; + input[67] = 1.2625724; + input[68] = 1.6218156; + input[69] = 1.0774711; + input[70] = 0.5947813; + input[71] = -1.5290873; + input[72] = 2.0437069; + input[73] = -0.1656267; + input[74] = 0.0870704; + input[75] = -0.5276564; + input[76] = -0.1002882; + input[77] = 1.0539219; + input[78] = -0.6230739; + input[79] = -1.5905718; + input[80] = -0.9741858; + input[81] = -0.1869211; + input[82] = 0.5816050; + input[83] = -2.6339815; + input[84] = -1.0764544; + input[85] = 2.5903966; + input[86] = 0.4940658; + input[87] = 0.4671729; + input[88] = 0.6588292; + input[89] = -0.7257792; + input[90] = 1.4280071; + input[91] = -1.2187740; + input[92] = 0.7380729; + input[93] = -1.1599953; + input[94] = -1.4355115; + input[95] = -1.5304037; + input[96] = 0.8474578; + input[97] = 0.0774260; + input[98] = 0.5433396; + input[99] = -0.8438400; + input[100] = -0.1089903; + input[101] = -0.6354192; + input[102] = 0.8772392; + input[103] = 0.2844733; + input[104] = 0.0975270; + input[105] = -0.9785872; + input[106] = -0.4320499; + input[107] = -1.4937501; + input[108] = -2.0644901; + input[109] = 0.0851217; + input[110] = 0.6644159; + input[111] = 0.4168026; + input[112] = 0.0958830; + input[113] = -1.5699565; + input[114] = 0.3739572; + input[115] = -0.1420672; + input[116] = -0.7864021; + input[117] = 0.2443752; + input[118] = -0.9811850; + input[119] = -0.0698569; + input[120] = 0.1463890; + input[121] = 0.2536245; + input[122] = 0.2136150; + input[123] = 0.3113698; + input[124] = 1.8353856; + input[125] = 1.4473228; + input[126] = -0.7373698; + input[127] = 0.2485314; + input[128] = -0.4789796; + input[129] = -0.3396149; + input[130] = 0.6438198; + input[131] = 0.7287521; + input[132] = -1.5119252; + input[133] = -0.1006494; + input[134] = 1.8955028; + input[135] = 1.0871323; + input[136] = 0.3620502; + input[137] = -0.8826663; + input[138] = 1.2220223; + input[139] = -1.2817260; + input[140] = 1.4153577; + input[141] = 0.4148015; + input[142] = 1.3458617; + input[143] = 1.9718349; + + result[0] = 0.1333608; + result[1] = -0.1716091; + result[2] = 0.2201060; + result[3] = -0.1585989; + result[4] = -0.2291074; + result[5] = 0.4254351; + + // input0 + T0->resize(in_dims); + T0->getImpl()->setRawPtr(input, in_nb_elems); + + // results + Tres->resize(out_dims); + Tres->getImpl()->setRawPtr(result, out_nb_elems); + op->forwardDims(); + start = std::chrono::system_clock::now(); + REQUIRE_NOTHROW(globAvgPool->forward()); + end = std::chrono::system_clock::now(); + duration += + std::chrono::duration_cast<std::chrono::microseconds>( + end - start); + + REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims()); + for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); + ++i) { + REQUIRE(Tres->dims().at(i) == + op->getOutput(0)->dims().at(i)); + } + REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); + delete[] input; + delete[] result; + } + } + std::cout << "GlobalAveragePooling total execution time : " + << duration.count() << "µs" << std::endl; + std::cout << "Number of operations : " << number_of_operation + << std::endl; + std::cout << "Operation / µs = " + << number_of_operation / duration.count() << std::endl; } - } - std::cout << "GlobalAveragePooling total execution time : " - << duration.count() << "µs" << std::endl; - std::cout << "Number of operations : " << number_of_operation - << std::endl; - std::cout << "Operation / µs = " << number_of_operation / duration.count() - << std::endl; } - } } } // namespace Aidge diff --git a/unit_tests/operator/Test_LeakyReLUImpl.cpp b/unit_tests/operator/Test_LeakyReLUImpl.cpp index 85dd9f99ee425216f8495e7813b35ce69be9c806..18901d3b73cb32421fd9156d5b02d2e3625f9e7f 100644 --- a/unit_tests/operator/Test_LeakyReLUImpl.cpp +++ b/unit_tests/operator/Test_LeakyReLUImpl.cpp @@ -20,16 +20,15 @@ using namespace Aidge; TEST_CASE("[cpu/operator] LeakyReLU(forward)", "[LeakyReLU][CPU]") { SECTION("1D Tensor") { - std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<int,10> { - {0, 1, 2,-3, 4,-5,-6, 7, 8, 9} - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array1D<int,10> { - {0, 1, 2, 0, 4, 0, 0, 7, 8, 9} - }); + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>( + Array1D<int, 10>{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( + Array1D<int, 10>{{0, 1, 2, 0, 4, 0, 0, 7, 8, 9}}); std::shared_ptr<Node> myLeakyReLU = LeakyReLU(); - auto op = std::static_pointer_cast<OperatorTensor>(myLeakyReLU -> getOperator()); - op->associateInput(0,input0); + auto op = std::static_pointer_cast<OperatorTensor>( + myLeakyReLU->getOperator()); + op->associateInput(0, input0); op->setDataType(DataType::Int32); op->setBackend("cpu"); myLeakyReLU->forward(); @@ -37,22 +36,17 @@ TEST_CASE("[cpu/operator] LeakyReLU(forward)", "[LeakyReLU][CPU]") { } SECTION("2D Tensor") { - std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array2D<int,2,10> { - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - } - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<int,2,10> { - { - { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, - { 0, 4, 2, 0, 4, 0, 0, 7, 0,10} - } - }); + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>( + Array2D<int, 2, 10>{{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( + Array2D<int, 2, 10>{{{0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, + {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}}}); std::shared_ptr<Node> myLeakyReLU = LeakyReLU(); - auto op = std::static_pointer_cast<OperatorTensor>(myLeakyReLU -> getOperator()); - op->associateInput(0,input0); + auto op = std::static_pointer_cast<OperatorTensor>( + myLeakyReLU->getOperator()); + op->associateInput(0, input0); op->setDataType(DataType::Int32); op->setBackend("cpu"); myLeakyReLU->forward(); @@ -60,34 +54,21 @@ TEST_CASE("[cpu/operator] LeakyReLU(forward)", "[LeakyReLU][CPU]") { } SECTION("3D Tensor") { - std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array3D<int,2,2,10> { - { - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - }, - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - } - } - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,2,2,10> { - { - { - { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, - { 0, 4, 2, 0, 4, 0, 0, 7, 0,10} - }, - { - { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, - { 0, 4, 2, 0, 4, 0, 0, 7, 0,10} - } - } - }); + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>( + Array3D<int, 2, 2, 10>{{{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}, + {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( + Array3D<int, 2, 2, 10>{{{{0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, + {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}}, + {{0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, + {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}}}}); std::shared_ptr<Node> myLeakyReLU = LeakyReLU(); - auto op = std::static_pointer_cast<OperatorTensor>(myLeakyReLU -> getOperator()); - op->associateInput(0,input0); + auto op = std::static_pointer_cast<OperatorTensor>( + myLeakyReLU->getOperator()); + op->associateInput(0, input0); op->setDataType(DataType::Int32); op->setBackend("cpu"); myLeakyReLU->forward(); @@ -95,58 +76,30 @@ TEST_CASE("[cpu/operator] LeakyReLU(forward)", "[LeakyReLU][CPU]") { } SECTION("4D Tensor") { - std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<int,2,2,2,10> { - { - { - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - }, - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - } - }, - { - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - }, - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - } - } - } - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,2,2,2,10> { - { - { - { - { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, - { 0, 4, 2, 0, 4, 0, 0, 7, 0,10} - }, - { - { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, - { 0, 4, 2, 0, 4, 0, 0, 7, 0,10} - } - }, - { - { - { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, - { 0, 4, 2, 0, 4, 0, 0, 7, 0,10} - }, - { - { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, - { 0, 4, 2, 0, 4, 0, 0, 7, 0,10} - } - } - } - }); + std::shared_ptr<Tensor> input0 = + std::make_shared<Tensor>(Array4D<int, 2, 2, 2, 10>{ + {{{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}, + {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}}, + {{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}, + {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}}}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( + Array4D<int, 2, 2, 2, 10>{{{{{0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, + {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}}, + {{0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, + {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}}}, + {{{0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, + {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}}, + {{0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, + {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}}}}}); std::shared_ptr<Node> myLeakyReLU = LeakyReLU(); - auto op = std::static_pointer_cast<OperatorTensor>(myLeakyReLU -> getOperator()); - op->associateInput(0,input0); + auto op = std::static_pointer_cast<OperatorTensor>( + myLeakyReLU->getOperator()); + op->associateInput(0, input0); op->setDataType(DataType::Int32); op->setBackend("cpu"); myLeakyReLU->forward(); @@ -154,16 +107,17 @@ TEST_CASE("[cpu/operator] LeakyReLU(forward)", "[LeakyReLU][CPU]") { } SECTION("Test construction attribute: negative_slop") { - std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<float,10> { - {0.0f, 1.0f, 2.0f,-3.0f, 4.0f,-5.0f,-6.0f, 7.0f, 8.0f, 9.0f} - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array1D<float,10> { - {0.0f, 1.0f, 2.0f,-1.5f, 4.0f,-2.5f,-3.0f, 7.0f, 8.0f, 9.0f} - }); + std::shared_ptr<Tensor> input0 = std::make_shared< + Tensor>(Array1D<float, 10>{ + {0.0f, 1.0f, 2.0f, -3.0f, 4.0f, -5.0f, -6.0f, 7.0f, 8.0f, 9.0f}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared< + Tensor>(Array1D<float, 10>{ + {0.0f, 1.0f, 2.0f, -1.5f, 4.0f, -2.5f, -3.0f, 7.0f, 8.0f, 9.0f}}); std::shared_ptr<Node> myLeakyReLU = LeakyReLU(0.5f); - auto op = std::static_pointer_cast<OperatorTensor>(myLeakyReLU -> getOperator()); - op->associateInput(0,input0); + auto op = std::static_pointer_cast<OperatorTensor>( + myLeakyReLU->getOperator()); + op->associateInput(0, input0); op->setDataType(DataType::Float32); op->setBackend("cpu"); myLeakyReLU->forward(); diff --git a/unit_tests/operator/Test_MatMulImpl.cpp b/unit_tests/operator/Test_MatMulImpl.cpp index d6e934b4dc8d84e8a595eb74d1af9d2c68c892d1..141126fd43144121f6e8a1c905806c9518cc1a7d 100644 --- a/unit_tests/operator/Test_MatMulImpl.cpp +++ b/unit_tests/operator/Test_MatMulImpl.cpp @@ -10,12 +10,12 @@ ********************************************************************************/ #include <catch2/catch_test_macros.hpp> -#include <cstddef> // std::size_t -#include <cstdint> // std::uint16_t #include <chrono> +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t #include <iostream> #include <memory> -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution #include "aidge/data/Tensor.hpp" #include "aidge/operator/MatMul.hpp" @@ -31,13 +31,16 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { // Create a random number generator std::random_device rd; std::mt19937 gen(rd()); - std::uniform_real_distribution<float> dis(0.0, 1.0); // Random float distribution between 0 and 1 + std::uniform_real_distribution<float> dis( + 0.0, + 1.0); // Random float distribution between 0 and 1 std::uniform_int_distribution<std::size_t> distDims(10, 100); std::uniform_int_distribution<std::size_t> distNbMatrix(1, 5); // Create MatMul Operator std::shared_ptr<Node> myMatMul = MatMul(); - auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator()); + auto op = + std::static_pointer_cast<OperatorTensor>(myMatMul->getOperator()); // To measure execution time of 'MatMul_Op::forward()' member function call std::chrono::time_point<std::chrono::system_clock> start; @@ -51,44 +54,47 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { const std::size_t dim0 = distDims(gen); const std::size_t dim1 = distDims(gen); const std::size_t dim2 = distDims(gen); - totalComputation += dim0*dim1*dim2; + totalComputation += dim0 * dim1 * dim2; // Create and populate the array with random float values - float* bigArray1 = new float[dim0*dim1]; - for (int i = 0; i < dim0*dim1; ++i) { + float *bigArray1 = new float[dim0 * dim1]; + for (int i = 0; i < dim0 * dim1; ++i) { bigArray1[i] = dis(gen); // Generate random float value } - float* bigArray2 = new float[dim1*dim2]; - for (int i = 0; i < dim1*dim2; ++i) { + float *bigArray2 = new float[dim1 * dim2]; + for (int i = 0; i < dim1 * dim2; ++i) { bigArray2[i] = dis(gen); // Generate random float value } - float* res = new float[dim0*dim2]; + float *res = new float[dim0 * dim2]; for (int i = 0; i < dim0; ++i) { for (int j = 0; j < dim2; ++j) { float sum = 0.0; for (int k = 0; k < dim1; ++k) { - sum += bigArray1[i*dim1+k] * bigArray2[k*dim2+j]; + sum += + bigArray1[i * dim1 + k] * bigArray2[k * dim2 + j]; } - res[i*dim2+j] = sum; + res[i * dim2 + j] = sum; } } - // Convert bigArray1 to Tensor - std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32); - T1 -> resize({dim0,dim1}); - T1 -> setBackend("cpu"); - T1 -> getImpl() -> setRawPtr(bigArray1, dim0*dim1); + std::shared_ptr<Tensor> T1 = + std::make_shared<Tensor>(DataType::Float32); + T1->resize({dim0, dim1}); + T1->setBackend("cpu"); + T1->getImpl()->setRawPtr(bigArray1, dim0 * dim1); // Convert bigArray2 to Tensor - std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32); - T2 -> resize({dim1,dim2}); - T2 -> setBackend("cpu"); - T2 -> getImpl() -> setRawPtr(bigArray2, dim1*dim2); + std::shared_ptr<Tensor> T2 = + std::make_shared<Tensor>(DataType::Float32); + T2->resize({dim1, dim2}); + T2->setBackend("cpu"); + T2->getImpl()->setRawPtr(bigArray2, dim1 * dim2); // convert res to Tensor - std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32); - Tres -> resize({dim0,dim2}); - Tres -> setBackend("cpu"); - Tres -> getImpl() -> setRawPtr(res, dim0*dim2); + std::shared_ptr<Tensor> Tres = + std::make_shared<Tensor>(DataType::Float32); + Tres->resize({dim0, dim2}); + Tres->setBackend("cpu"); + Tres->getImpl()->setRawPtr(res, dim0 * dim2); op->associateInput(0, T1); op->associateInput(1, T2); @@ -98,7 +104,8 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { start = std::chrono::system_clock::now(); myMatMul->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += std::chrono::duration_cast<std::chrono::microseconds>( + end - start); REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); @@ -106,7 +113,8 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { delete[] bigArray2; delete[] res; } - std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; + std::cout << "multiplications over time spent: " + << totalComputation / duration.count() << std::endl; std::cout << "total time: " << duration.count() << std::endl; } @@ -119,44 +127,48 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { const std::size_t dim0 = distDims(gen); const std::size_t dim1 = distDims(gen); const std::size_t dim2 = distDims(gen); - totalComputation += dim0*dim1*dim2*dimNb; + totalComputation += dim0 * dim1 * dim2 * dimNb; // Create and populate the array with random float values - float* bigArray1 = new float[dimNb*dim0*dim1]; - for (std::size_t i = 0; i < dimNb*dim0*dim1; ++i) { + float *bigArray1 = new float[dimNb * dim0 * dim1]; + for (std::size_t i = 0; i < dimNb * dim0 * dim1; ++i) { bigArray1[i] = dis(gen); // Generate random float value } - float* bigArray2 = new float[dimNb*dim1*dim2]; - for (int i = 0; i < dimNb*dim1*dim2; ++i) { + float *bigArray2 = new float[dimNb * dim1 * dim2]; + for (int i = 0; i < dimNb * dim1 * dim2; ++i) { bigArray2[i] = dis(gen); // Generate random float value } - float* res = new float[dimNb*dim0*dim2]; + float *res = new float[dimNb * dim0 * dim2]; for (std::size_t n = 0; n < dimNb; ++n) { for (int i = 0; i < dim0; ++i) { for (int j = 0; j < dim2; ++j) { float sum = 0.0; for (int k = 0; k < dim1; ++k) { - sum += bigArray1[n*dim0*dim1 + i*dim1 + k] * bigArray2[n*dim2*dim1+k*dim2+j]; + sum += bigArray1[n * dim0 * dim1 + i * dim1 + k] * + bigArray2[n * dim2 * dim1 + k * dim2 + j]; } - res[n*dim0*dim2+i*dim2+j] = sum; + res[n * dim0 * dim2 + i * dim2 + j] = sum; } } } // Convert bigArray1 to Tensor - std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32); - T1 -> resize({dimNb,dim0,dim1}); - T1 -> setBackend("cpu"); - T1 -> getImpl() -> setRawPtr(bigArray1, dimNb*dim0*dim1); + std::shared_ptr<Tensor> T1 = + std::make_shared<Tensor>(DataType::Float32); + T1->resize({dimNb, dim0, dim1}); + T1->setBackend("cpu"); + T1->getImpl()->setRawPtr(bigArray1, dimNb * dim0 * dim1); // Convert bigArray2 to Tensor - std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32); - T2 -> resize({dimNb,dim1,dim2}); - T2 -> setBackend("cpu"); - T2 -> getImpl() -> setRawPtr(bigArray2, dimNb*dim1*dim2); + std::shared_ptr<Tensor> T2 = + std::make_shared<Tensor>(DataType::Float32); + T2->resize({dimNb, dim1, dim2}); + T2->setBackend("cpu"); + T2->getImpl()->setRawPtr(bigArray2, dimNb * dim1 * dim2); // convert res to Tensor - std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32); - Tres -> resize({dimNb,dim0,dim2}); - Tres -> setBackend("cpu"); - Tres -> getImpl() -> setRawPtr(res, dimNb*dim0*dim2); + std::shared_ptr<Tensor> Tres = + std::make_shared<Tensor>(DataType::Float32); + Tres->resize({dimNb, dim0, dim2}); + Tres->setBackend("cpu"); + Tres->getImpl()->setRawPtr(res, dimNb * dim0 * dim2); op->associateInput(0, T1); op->associateInput(1, T2); @@ -166,7 +178,8 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { start = std::chrono::system_clock::now(); myMatMul->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += std::chrono::duration_cast<std::chrono::microseconds>( + end - start); REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); @@ -174,7 +187,8 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { delete[] bigArray2; delete[] res; } - std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; + std::cout << "multiplications over time spent: " + << totalComputation / duration.count() << std::endl; std::cout << "total time: " << duration.count() << std::endl; } @@ -188,46 +202,55 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { const std::size_t dim0 = distDims(gen); const std::size_t dim1 = distDims(gen); const std::size_t dim2 = distDims(gen); - totalComputation += dim0*dim1*dim2*dimNb1*dimNb2; + totalComputation += dim0 * dim1 * dim2 * dimNb1 * dimNb2; // Create and populate the array with random float values - float* bigArray1 = new float[dimNb1*dimNb2*dim0*dim1]; - for (std::size_t i = 0; i < dimNb1*dimNb2*dim0*dim1; ++i) { + float *bigArray1 = new float[dimNb1 * dimNb2 * dim0 * dim1]; + for (std::size_t i = 0; i < dimNb1 * dimNb2 * dim0 * dim1; ++i) { bigArray1[i] = dis(gen); // Generate random float value } - float* bigArray2 = new float[dimNb1*dimNb2*dim1*dim2]; - for (std::size_t i = 0; i < dimNb1*dimNb2*dim1*dim2; ++i) { + float *bigArray2 = new float[dimNb1 * dimNb2 * dim1 * dim2]; + for (std::size_t i = 0; i < dimNb1 * dimNb2 * dim1 * dim2; ++i) { bigArray2[i] = dis(gen); // Generate random float value } - float* res = new float[dimNb1*dimNb2*dim0*dim2]; + float *res = new float[dimNb1 * dimNb2 * dim0 * dim2]; for (std::size_t n1 = 0; n1 < dimNb1; ++n1) { for (std::size_t n2 = 0; n2 < dimNb2; ++n2) { for (int i = 0; i < dim0; ++i) { for (int j = 0; j < dim2; ++j) { float sum = 0.0; for (int k = 0; k < dim1; ++k) { - sum += bigArray1[n1*dimNb2*dim0*dim1+n2*dim0*dim1+i*dim1+k] * bigArray2[n1*dimNb2*dim1*dim2+n2*dim1*dim2+k*dim2+j]; + sum += + bigArray1[n1 * dimNb2 * dim0 * dim1 + + n2 * dim0 * dim1 + i * dim1 + + k] * + bigArray2[n1 * dimNb2 * dim1 * dim2 + + n2 * dim1 * dim2 + k * dim2 + j]; } - res[n1*dimNb2*dim0*dim2+n2*dim0*dim2+i*dim2+j] = sum; + res[n1 * dimNb2 * dim0 * dim2 + n2 * dim0 * dim2 + + i * dim2 + j] = sum; } } } } // Convert bigArray1 to Tensor - std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32); - T1 -> resize({dimNb1,dimNb2,dim0,dim1}); - T1 -> setBackend("cpu"); - T1 -> getImpl() -> setRawPtr(bigArray1, dimNb1*dimNb2*dim0*dim1); + std::shared_ptr<Tensor> T1 = + std::make_shared<Tensor>(DataType::Float32); + T1->resize({dimNb1, dimNb2, dim0, dim1}); + T1->setBackend("cpu"); + T1->getImpl()->setRawPtr(bigArray1, dimNb1 * dimNb2 * dim0 * dim1); // Convert bigArray2 to Tensor - std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32); - T2 -> resize({dimNb1,dimNb2,dim1,dim2}); - T2 -> setBackend("cpu"); - T2 -> getImpl() -> setRawPtr(bigArray2, dimNb1*dimNb2*dim1*dim2); + std::shared_ptr<Tensor> T2 = + std::make_shared<Tensor>(DataType::Float32); + T2->resize({dimNb1, dimNb2, dim1, dim2}); + T2->setBackend("cpu"); + T2->getImpl()->setRawPtr(bigArray2, dimNb1 * dimNb2 * dim1 * dim2); // convert res to Tensor - std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32); - Tres -> resize({dimNb1,dimNb2,dim0,dim2}); - Tres -> setBackend("cpu"); - Tres -> getImpl() -> setRawPtr(res, dimNb1*dimNb2*dim0*dim2); + std::shared_ptr<Tensor> Tres = + std::make_shared<Tensor>(DataType::Float32); + Tres->resize({dimNb1, dimNb2, dim0, dim2}); + Tres->setBackend("cpu"); + Tres->getImpl()->setRawPtr(res, dimNb1 * dimNb2 * dim0 * dim2); op->associateInput(0, T1); op->associateInput(1, T2); @@ -237,14 +260,16 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { start = std::chrono::system_clock::now(); myMatMul->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += std::chrono::duration_cast<std::chrono::microseconds>( + end - start); REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); delete[] bigArray1; delete[] bigArray2; delete[] res; } - std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; + std::cout << "multiplications over time spent: " + << totalComputation / duration.count() << std::endl; std::cout << "total time: " << duration.count() << std::endl; } @@ -252,18 +277,18 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { // allows to test both computation with a 1-D Tensor and broadcasting // input_0 std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>(); - op->associateInput(0,T0); + op->associateInput(0, T0); const std::size_t dim0 = distNbMatrix(gen); const std::size_t dim1 = distNbMatrix(gen) + 1; const std::size_t dim2 = distNbMatrix(gen); const std::size_t dim3 = distNbMatrix(gen); - T0->resize({dim0,dim1,dim2,dim3}); + T0->resize({dim0, dim1, dim2, dim3}); T0->setDataType(DataType::Float32); T0->setBackend("cpu"); // input_1 std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(); - op -> associateInput(1,T1); + op->associateInput(1, T1); T1->resize({dim3}); T1->setDataType(DataType::Float32); T1->setBackend("cpu"); @@ -272,7 +297,6 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { op->setBackend("cpu"); op->forwardDims(); myMatMul->forward(); - } } } // namespace Aidge \ No newline at end of file diff --git a/unit_tests/operator/Test_MaxPoolingImpl.cpp b/unit_tests/operator/Test_MaxPoolingImpl.cpp index af04ede4e33c32ce785804e2484b6ba9ac5edc36..c026d2dc9dfca2f0faca77dd28601e4959ccca2c 100644 --- a/unit_tests/operator/Test_MaxPoolingImpl.cpp +++ b/unit_tests/operator/Test_MaxPoolingImpl.cpp @@ -10,8 +10,8 @@ ********************************************************************************/ #include <catch2/catch_test_macros.hpp> -#include <memory> #include <cstdlib> +#include <memory> #include "aidge/data/Tensor.hpp" #include "aidge/operator/MaxPooling.hpp" @@ -20,59 +20,44 @@ using namespace Aidge; - TEST_CASE("[cpu/operator] MaxPooling(forward)", "[MaxPooling][CPU]") { - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<float,2,2,5,5> { //NCHW - { - { - {{-0.3848, 0.2166, -0.4373, 0.6142, 0.5277}, - {0.7995, 0.3638, -1.4589, -1.0843, 1.0918}, - {0.7147, 0.0936, -1.2902, 1.2037, 0.4874}, - {-0.5981, 2.1184, -0.9175, 1.3859, 0.3305}, - {-1.7700, 0.0563, -0.3914, 0.0538, -0.3955}}, + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(Array4D<float, 2, 2, 5, 5>{ + // NCHW + {{{{-0.3848, 0.2166, -0.4373, 0.6142, 0.5277}, + {0.7995, 0.3638, -1.4589, -1.0843, 1.0918}, + {0.7147, 0.0936, -1.2902, 1.2037, 0.4874}, + {-0.5981, 2.1184, -0.9175, 1.3859, 0.3305}, + {-1.7700, 0.0563, -0.3914, 0.0538, -0.3955}}, - {{-3.1409, -0.4554, 0.0524, 2.2291, 0.4859}, - {-0.7465, -0.6567, -2.3703, -0.6386, -1.4152}, - { 2.2329, -0.5850, 0.0700, 1.2838, -1.7363}, - { 0.2139, 0.0624, -1.0689, -0.8221, -0.8038}, - { 0.1886, -0.7840, -0.2313, 0.2651, -1.6244}} - }, - { - {{ 0.4371, 1.6417, 0.9129, 0.6325, 0.5438}, - {-2.3552, -0.8850, -0.0232, -0.5462, -1.2011}, - {1.7653, -1.6668, -1.0814, 0.6182, 1.2071}, - {0.9541, -0.5133, 0.8664, -0.8892, 1.4585}, - {1.0220, -0.5107, 0.1829, -0.2301, -0.4268}}, + {{-3.1409, -0.4554, 0.0524, 2.2291, 0.4859}, + {-0.7465, -0.6567, -2.3703, -0.6386, -1.4152}, + {2.2329, -0.5850, 0.0700, 1.2838, -1.7363}, + {0.2139, 0.0624, -1.0689, -0.8221, -0.8038}, + {0.1886, -0.7840, -0.2313, 0.2651, -1.6244}}}, + {{{0.4371, 1.6417, 0.9129, 0.6325, 0.5438}, + {-2.3552, -0.8850, -0.0232, -0.5462, -1.2011}, + {1.7653, -1.6668, -1.0814, 0.6182, 1.2071}, + {0.9541, -0.5133, 0.8664, -0.8892, 1.4585}, + {1.0220, -0.5107, 0.1829, -0.2301, -0.4268}}, - {{ 1.0429, 0.6279, -0.2875, 0.7187, -0.1500}, - {1.6041, 2.9635, 1.4172, -0.7517, 0.5441}, - {-0.2276, 0.0857, 0.6776, -0.1389, -0.0614}, - {-0.1547, -0.3435, 0.0650, -0.5095, -1.8073}, - {1.7217, 0.3999, -0.5953, 1.0604, -0.4126}} - } - } - }); + {{1.0429, 0.6279, -0.2875, 0.7187, -0.1500}, + {1.6041, 2.9635, 1.4172, -0.7517, 0.5441}, + {-0.2276, 0.0857, 0.6776, -0.1389, -0.0614}, + {-0.1547, -0.3435, 0.0650, -0.5095, -1.8073}, + {1.7217, 0.3999, -0.5953, 1.0604, -0.4126}}}}}); SECTION("Stride") { - std::shared_ptr<Node> myMaxPool = MaxPooling({2,2}, "mycdw", {2,2}); - auto op = std::static_pointer_cast<OperatorTensor>(myMaxPool -> getOperator()); + std::shared_ptr<Node> myMaxPool = MaxPooling({2, 2}, "mycdw", {2, 2}); + auto op = + std::static_pointer_cast<OperatorTensor>(myMaxPool->getOperator()); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<float,2,2,2,2> { - { - { - {{ 0.7995, 0.6142}, - { 2.1184, 1.3859}}, - {{ -0.4554, 2.2291}, - { 2.2329, 1.2838}} - }, - { - {{1.6417, 0.9129}, - {1.7653, 0.8664}}, - {{2.9635, 1.4172}, - {0.0857, 0.6776}} - } - } - }); - myMaxPool->getOperator()->associateInput(0,myInput); + std::shared_ptr<Tensor> myOutput = + std::make_shared<Tensor>(Array4D<float, 2, 2, 2, 2>{ + {{{{0.7995, 0.6142}, {2.1184, 1.3859}}, + {{-0.4554, 2.2291}, {2.2329, 1.2838}}}, + {{{1.6417, 0.9129}, {1.7653, 0.8664}}, + {{2.9635, 1.4172}, {0.0857, 0.6776}}}}}); + myMaxPool->getOperator()->associateInput(0, myInput); myMaxPool->getOperator()->setDataType(DataType::Float32); myMaxPool->getOperator()->setBackend("cpu"); myMaxPool->forward(); diff --git a/unit_tests/operator/Test_MetaOperator.cpp b/unit_tests/operator/Test_MetaOperator.cpp index 271a1e2f9860d92f840916f6b2e396993b0bea39..1ea4fe8c3195c89c51def462809107caac7da373 100644 --- a/unit_tests/operator/Test_MetaOperator.cpp +++ b/unit_tests/operator/Test_MetaOperator.cpp @@ -14,7 +14,6 @@ #include <cstdlib> #include <memory> -#include "aidge/utils/TensorUtils.hpp" #include "aidge/backend/cpu/operator/ConvImpl.hpp" #include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/data/Tensor.hpp" @@ -23,56 +22,60 @@ #include "aidge/operator/MetaOperatorDefs.hpp" #include "aidge/operator/Pad.hpp" #include "aidge/operator/Pop.hpp" -#include "aidge/scheduler/SequentialScheduler.hpp" #include "aidge/scheduler/ParallelScheduler.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" +#include "aidge/utils/TensorUtils.hpp" using namespace Aidge; TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { - SECTION("PaddedConv(forward)") { - std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>( - Array4D<double, 4, 3, 3, 3>{{{{{6.20986394e-01, 1.19775136e-03, 7.22876095e-02}, - {1.16492919e-01, 8.21634093e-02, 1.17413265e-01}, - {2.23743494e-01, 3.99495413e-01, 5.55552411e-01}}, - {{6.64970077e-01, 9.62199940e-01, 4.87531967e-01}, - {6.12586558e-01, 8.09918671e-02, 8.40649383e-01}, - {4.15264406e-01, 8.28247138e-01, 1.52301135e-01}}, - {{1.76992844e-02, 7.78697112e-01, 8.14531592e-01}, - {1.36960611e-01, 4.64806728e-01, 4.85150000e-01}, - {4.34776520e-01, 9.51740977e-01, 9.05793799e-01}}}, - - {{{1.71925246e-02, 1.91082720e-01, 3.67982644e-01}, - {1.56806559e-01, 6.22280998e-01, 3.15827594e-01}, - {6.04359038e-01, 2.83095947e-01, 6.11168892e-01}}, - {{2.76942832e-01, 1.89768419e-01, 8.07988176e-01}, - {1.67925807e-01, 2.68356150e-01, 6.28875602e-01}, - {1.69093357e-04, 9.64788636e-01, 7.29254981e-01}}, - {{6.34030122e-01, 1.32087038e-01, 3.33857107e-01}, - {7.63047502e-01, 5.12539506e-02, 9.77400493e-01}, - {8.06151288e-01, 2.60237147e-01, 3.93729313e-01}}}, - - {{{5.84605240e-01, 4.74648725e-01, 8.54111741e-01}, - {7.10897067e-02, 5.02579011e-01, 3.35236224e-01}, - {9.08637408e-01, 8.02903830e-01, 2.83929907e-01}}, - {{3.68206999e-01, 9.18579021e-02, 7.33168098e-01}, - {1.59875539e-01, 9.13163381e-01, 3.59806060e-01}, - {1.41295882e-01, 7.00312185e-01, 5.63728289e-01}}, - {{9.39513546e-01, 1.91704891e-01, 1.11454944e-01}, - {5.46298282e-01, 2.89698587e-01, 2.62612651e-01}, - {1.18554992e-01, 4.32147376e-02, 7.53016994e-01}}}, - - {{{9.53179175e-01, 2.05041054e-02, 1.11318451e-01}, - {8.67878485e-01, 2.93263422e-01, 8.03912714e-01}, - {8.93620255e-01, 1.37831128e-01, 3.83640583e-01}}, - {{3.96020188e-01, 6.24959320e-01, 1.90709175e-01}, - {5.80538620e-01, 6.63031275e-01, 2.07247191e-01}, - {5.65672171e-01, 5.57014317e-01, 9.26909496e-01}}, - {{3.43901418e-01, 4.47741636e-01, 6.59249367e-01}, - {7.34639028e-01, 2.84957200e-02, 9.70225217e-01}, - {1.33578790e-02, 6.12054702e-01, 9.36685235e-02}}}}}); - std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>( - Array1D<double, 4>{{0.16884905, 0.27994487, 0.57227465, 0.06435205}}); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<double, 2, 3, 5, 5>{ + SECTION("PaddedConv(forward)") { + std::shared_ptr<Tensor> myWeights = + std::make_shared<Tensor>(Array4D<double, 4, 3, 3, 3>{ + {{{{6.20986394e-01, 1.19775136e-03, 7.22876095e-02}, + {1.16492919e-01, 8.21634093e-02, 1.17413265e-01}, + {2.23743494e-01, 3.99495413e-01, 5.55552411e-01}}, + {{6.64970077e-01, 9.62199940e-01, 4.87531967e-01}, + {6.12586558e-01, 8.09918671e-02, 8.40649383e-01}, + {4.15264406e-01, 8.28247138e-01, 1.52301135e-01}}, + {{1.76992844e-02, 7.78697112e-01, 8.14531592e-01}, + {1.36960611e-01, 4.64806728e-01, 4.85150000e-01}, + {4.34776520e-01, 9.51740977e-01, 9.05793799e-01}}}, + + {{{1.71925246e-02, 1.91082720e-01, 3.67982644e-01}, + {1.56806559e-01, 6.22280998e-01, 3.15827594e-01}, + {6.04359038e-01, 2.83095947e-01, 6.11168892e-01}}, + {{2.76942832e-01, 1.89768419e-01, 8.07988176e-01}, + {1.67925807e-01, 2.68356150e-01, 6.28875602e-01}, + {1.69093357e-04, 9.64788636e-01, 7.29254981e-01}}, + {{6.34030122e-01, 1.32087038e-01, 3.33857107e-01}, + {7.63047502e-01, 5.12539506e-02, 9.77400493e-01}, + {8.06151288e-01, 2.60237147e-01, 3.93729313e-01}}}, + + {{{5.84605240e-01, 4.74648725e-01, 8.54111741e-01}, + {7.10897067e-02, 5.02579011e-01, 3.35236224e-01}, + {9.08637408e-01, 8.02903830e-01, 2.83929907e-01}}, + {{3.68206999e-01, 9.18579021e-02, 7.33168098e-01}, + {1.59875539e-01, 9.13163381e-01, 3.59806060e-01}, + {1.41295882e-01, 7.00312185e-01, 5.63728289e-01}}, + {{9.39513546e-01, 1.91704891e-01, 1.11454944e-01}, + {5.46298282e-01, 2.89698587e-01, 2.62612651e-01}, + {1.18554992e-01, 4.32147376e-02, 7.53016994e-01}}}, + + {{{9.53179175e-01, 2.05041054e-02, 1.11318451e-01}, + {8.67878485e-01, 2.93263422e-01, 8.03912714e-01}, + {8.93620255e-01, 1.37831128e-01, 3.83640583e-01}}, + {{3.96020188e-01, 6.24959320e-01, 1.90709175e-01}, + {5.80538620e-01, 6.63031275e-01, 2.07247191e-01}, + {5.65672171e-01, 5.57014317e-01, 9.26909496e-01}}, + {{3.43901418e-01, 4.47741636e-01, 6.59249367e-01}, + {7.34639028e-01, 2.84957200e-02, 9.70225217e-01}, + {1.33578790e-02, 6.12054702e-01, 9.36685235e-02}}}}}); + std::shared_ptr<Tensor> myBias = + std::make_shared<Tensor>(Array1D<double, 4>{ + {0.16884905, 0.27994487, 0.57227465, 0.06435205}}); + std::shared_ptr<Tensor> myInput = std::make_shared< + Tensor>(Array4D<double, 2, 3, 5, 5>{ // NCHW {{{{0.43224481, 0.9047832, 0.18402257, 0.06162838, 0.52490127}, {0.27773404, 0.55402353, 0.9485062, 0.31197083, 0.80328607}, @@ -108,93 +111,106 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { {0.95873236, 0.6742374, 0.55679676, 0.6323497, 0.34072958}, {0.49694061, 0.79173045, 0.19738225, 0.14755281, 0.80818177}, {0.02332061, 0.74270703, 0.59415632, 0.08195934, 0.46295434}, - {0.71426058, 0.85032931, 0.90750818, 0.28768431, 0.4401146}}}}}); - - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>( - Array4D<double, 2, 4, 5, 5>{{{{{3.40294218, 3.74021220, 4.02050114, 4.07054710, 2.46286273}, - {4.61770582, 6.70517588, 6.50356627, 6.29688787, 3.53332567}, - {5.47480106, 5.92094421, 6.64605665, 7.95090199, 4.28721523}, - {4.01485729, 6.06748962, 7.52447891, 7.37980652, 5.28401136}, - {2.83065438, 3.62033439, 3.56222963, 5.56103945, 3.23335814}}, - - {{3.30230498, 4.92814112, 4.34710836, 3.96262765, 2.97987890}, - {4.49693012, 6.68929291, 5.53603029, 5.68874264, 4.28756475}, - {4.20528078, 6.82776880, 6.70569849, 7.12809610, 4.40845442}, - {4.31169367, 6.73352146, 6.30962515, 7.45826864, 4.99164438}, - {2.18136287, 4.28968000, 4.20080042, 4.89814138, 2.87394023}}, - - {{3.54787683, 4.35851812, 4.63881302, 4.23359537, 3.16992092}, - {5.25099468, 7.54282856, 6.69849157, 5.64309788, 4.56919575}, - {4.71914101, 7.52830601, 6.71450949, 7.81113863, 5.84658146}, - {4.97893143, 7.39293909, 6.89905310, 8.14430809, 5.62998581}, - {2.79735112, 4.80967140, 5.57630205, 5.38828325, 4.57078695}}, - - {{3.03048635, 5.04540300, 4.21824932, 4.87323284, 2.35113740}, - {4.45167351, 6.47721338, 7.40922976, 6.70445728, 3.60700107}, - {3.77927423, 6.82826376, 7.41777134, 7.57402420, 5.13131523}, - {4.08747244, 7.07994175, 7.57206821, 8.51897335, 5.26987123}, - {2.34426999, 4.60127831, 4.86486769, 6.01579571, 3.97803569}}}, - - - {{{3.84700942, 4.25972605, 3.05269003, 3.78043652, 2.08771229}, - {6.00459957, 6.05633259, 4.45951605, 4.54089880, 4.03066444}, - {5.41579390, 7.29543972, 6.18680000, 5.58812714, 3.45964241}, - {6.04531050, 7.70924091, 5.52207708, 5.02131319, 4.09403706}, - {3.18092418, 4.45422697, 4.04294252, 3.86577177, 2.18776536}}, - - {{4.02600670, 4.27603531, 3.81011319, 4.03631020, 2.57254648}, - {5.33471155, 5.72588634, 5.12079763, 5.11733150, 3.76836705}, - {5.62947607, 5.92492962, 6.24170446, 6.44130468, 3.44276404}, - {5.38414621, 6.02679539, 5.88985586, 5.90263271, 3.15044069}, - {3.31261086, 4.44371319, 3.47660780, 4.15411520, 1.48961508}}, - - {{3.95879412, 4.17324543, 3.70114422, 3.27447152, 3.09713888}, - {5.78258181, 6.57920837, 4.99913597, 6.20961237, 4.98552179}, - {5.84685421, 7.19971228, 6.66386652, 6.68013430, 4.90963316}, - {5.24417877, 7.06430531, 6.58512402, 6.02492285, 4.48986387}, - {3.64294529, 5.00678444, 5.04760027, 4.72895622, 2.67990756}}, - - {{3.48610687, 4.12853813, 4.07563591, 3.51327014, 2.44217038}, - {4.80529881, 7.33211374, 5.14774036, 4.77281189, 4.44612408}, - {5.11703110, 7.55168772, 7.14374542, 6.43696356, 4.10621357}, - {5.41270018, 6.85949135, 6.73503923, 5.74601364, 4.46150303}, - {3.16612267, 4.38248920, 5.23248482, 4.21292210, 2.86031270}}}}}); - - std::shared_ptr<Node> myConv = Conv<2>(3, 4, {3, 3}, "myconv"); - auto convOp = std::static_pointer_cast<OperatorTensor>(myConv->getOperator()); - - std::shared_ptr<Node> myPad = + {0.71426058, + 0.85032931, + 0.90750818, + 0.28768431, + 0.4401146}}}}}); + + std::shared_ptr<Tensor> myOutput = std::make_shared< + Tensor>(Array4D<double, 2, 4, 5, 5>{ + {{{{3.40294218, 3.74021220, 4.02050114, 4.07054710, 2.46286273}, + {4.61770582, 6.70517588, 6.50356627, 6.29688787, 3.53332567}, + {5.47480106, 5.92094421, 6.64605665, 7.95090199, 4.28721523}, + {4.01485729, 6.06748962, 7.52447891, 7.37980652, 5.28401136}, + {2.83065438, 3.62033439, 3.56222963, 5.56103945, 3.23335814}}, + + {{3.30230498, 4.92814112, 4.34710836, 3.96262765, 2.97987890}, + {4.49693012, 6.68929291, 5.53603029, 5.68874264, 4.28756475}, + {4.20528078, 6.82776880, 6.70569849, 7.12809610, 4.40845442}, + {4.31169367, 6.73352146, 6.30962515, 7.45826864, 4.99164438}, + {2.18136287, 4.28968000, 4.20080042, 4.89814138, 2.87394023}}, + + {{3.54787683, 4.35851812, 4.63881302, 4.23359537, 3.16992092}, + {5.25099468, 7.54282856, 6.69849157, 5.64309788, 4.56919575}, + {4.71914101, 7.52830601, 6.71450949, 7.81113863, 5.84658146}, + {4.97893143, 7.39293909, 6.89905310, 8.14430809, 5.62998581}, + {2.79735112, 4.80967140, 5.57630205, 5.38828325, 4.57078695}}, + + {{3.03048635, 5.04540300, 4.21824932, 4.87323284, 2.35113740}, + {4.45167351, 6.47721338, 7.40922976, 6.70445728, 3.60700107}, + {3.77927423, 6.82826376, 7.41777134, 7.57402420, 5.13131523}, + {4.08747244, 7.07994175, 7.57206821, 8.51897335, 5.26987123}, + {2.34426999, 4.60127831, 4.86486769, 6.01579571, 3.97803569}}}, + + {{{3.84700942, 4.25972605, 3.05269003, 3.78043652, 2.08771229}, + {6.00459957, 6.05633259, 4.45951605, 4.54089880, 4.03066444}, + {5.41579390, 7.29543972, 6.18680000, 5.58812714, 3.45964241}, + {6.04531050, 7.70924091, 5.52207708, 5.02131319, 4.09403706}, + {3.18092418, 4.45422697, 4.04294252, 3.86577177, 2.18776536}}, + + {{4.02600670, 4.27603531, 3.81011319, 4.03631020, 2.57254648}, + {5.33471155, 5.72588634, 5.12079763, 5.11733150, 3.76836705}, + {5.62947607, 5.92492962, 6.24170446, 6.44130468, 3.44276404}, + {5.38414621, 6.02679539, 5.88985586, 5.90263271, 3.15044069}, + {3.31261086, 4.44371319, 3.47660780, 4.15411520, 1.48961508}}, + + {{3.95879412, 4.17324543, 3.70114422, 3.27447152, 3.09713888}, + {5.78258181, 6.57920837, 4.99913597, 6.20961237, 4.98552179}, + {5.84685421, 7.19971228, 6.66386652, 6.68013430, 4.90963316}, + {5.24417877, 7.06430531, 6.58512402, 6.02492285, 4.48986387}, + {3.64294529, 5.00678444, 5.04760027, 4.72895622, 2.67990756}}, + + {{3.48610687, 4.12853813, 4.07563591, 3.51327014, 2.44217038}, + {4.80529881, 7.33211374, 5.14774036, 4.77281189, 4.44612408}, + {5.11703110, 7.55168772, 7.14374542, 6.43696356, 4.10621357}, + {5.41270018, 6.85949135, 6.73503923, 5.74601364, 4.46150303}, + {3.16612267, + 4.38248920, + 5.23248482, + 4.21292210, + 2.86031270}}}}}); + + std::shared_ptr<Node> myConv = Conv<2>(3, 4, {3, 3}, "myconv"); + auto convOp = + std::static_pointer_cast<OperatorTensor>(myConv->getOperator()); + + std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "myPad", PadBorderType::Constant, 0.0); - auto padOp = std::static_pointer_cast<OperatorTensor>(myPad->getOperator()); - - convOp->setInput(1, myWeights); - convOp->setInput(2, myBias); - - myPad->addChild(myConv, 0, 0); - padOp->setInput(0, myInput); - - padOp->setDataType(DataType::Float64); - padOp->setBackend("cpu"); - convOp->setDataType(DataType::Float64); - convOp->setBackend("cpu"); - - myPad->forward(); - myConv->forward(); - convOp -> getOutput(0) -> print(); - - double* computedOutput = static_cast<double*>(convOp->getOutput(0)->getImpl()->rawPtr()); - double* expectedOutput = static_cast<double*>(myOutput->getImpl()->rawPtr()); - for (std::size_t i = 0; i < myOutput->size(); ++i) { - REQUIRE(std::abs(computedOutput[i] - expectedOutput[i]) < 1e-5); - } + auto padOp = + std::static_pointer_cast<OperatorTensor>(myPad->getOperator()); + + convOp->setInput(1, myWeights); + convOp->setInput(2, myBias); + + myPad->addChild(myConv, 0, 0); + padOp->setInput(0, myInput); + + padOp->setDataType(DataType::Float64); + padOp->setBackend("cpu"); + convOp->setDataType(DataType::Float64); + convOp->setBackend("cpu"); + + myPad->forward(); + myConv->forward(); + convOp->getOutput(0)->print(); + + double *computedOutput = + static_cast<double *>(convOp->getOutput(0)->getImpl()->rawPtr()); + double *expectedOutput = + static_cast<double *>(myOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i < myOutput->size(); ++i) { + REQUIRE(std::abs(computedOutput[i] - expectedOutput[i]) < 1e-5); + } - std::shared_ptr<Node> myPaddedConv = + std::shared_ptr<Node> myPaddedConv = PaddedConv(3, 4, {3, 3}, "myPaddedConv", {1, 1}, {1, 1, 1, 1}); - } + } SECTION("LSTM(forward)") { auto pop = Pop(); auto myLSTM = LSTM(32, 64, 0, true, "ltsm"); - auto op = std::dynamic_pointer_cast<MetaOperator_Op>(myLSTM->getOperator()); + auto op = + std::dynamic_pointer_cast<MetaOperator_Op>(myLSTM->getOperator()); auto microGraph = op->getMicroGraph(); microGraph->save("lstm", false, true); @@ -209,14 +225,14 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { } REQUIRE(myLSTM->nbOutputs() == 2); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( - Array2D<float, 16, 32>{}); - std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>( - Array2D<float, 32, 64>{}); - std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>( - Array2D<float, 64, 32>{}); - std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>( - Array2D<float, 64, 64>{}); + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(Array2D<float, 16, 32>{}); + std::shared_ptr<Tensor> myInit = + std::make_shared<Tensor>(Array2D<float, 32, 64>{}); + std::shared_ptr<Tensor> myInitW = + std::make_shared<Tensor>(Array2D<float, 64, 32>{}); + std::shared_ptr<Tensor> myInitR = + std::make_shared<Tensor>(Array2D<float, 64, 64>{}); pop->addChild(myLSTM, 0, 0); pop->getOperator()->associateInput(0, myInput); @@ -246,7 +262,9 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { microGraph->save("lstm_dims", true, true); REQUIRE(op->dimsForwarded()); - auto microGraphScheduler = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraphScheduler(); + auto microGraphScheduler = + std::dynamic_pointer_cast<MetaOperator_Op>(op) + ->getMicroGraphScheduler(); microGraphScheduler->saveSchedulingDiagram("lstm_scheduling"); REQUIRE(op->getNbConsumedData(0).data == 512); @@ -259,9 +277,11 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { } SECTION("LSTM(forward_values)") { auto myLSTM = LSTM(2, 3, 0, true, "ltsm"); - auto op = std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator()); + auto op = + std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator()); - auto microGraph = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraph(); + auto microGraph = + std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraph(); microGraph->save("lstm", false, false); REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8); @@ -276,12 +296,14 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( Array2D<float, 3, 2>{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}}); - std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); + std::shared_ptr<Tensor> myInit = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>( Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}}); - std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); + std::shared_ptr<Tensor> myInitR = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); op->associateInput(0, myInput); op->associateInput(17, myInit); @@ -308,12 +330,13 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { microGraph->save("lstm_values_dims", false, true); std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.0952412, 0.0952412, 0.0952412}, - {0.25606447, 0.25606447, 0.25606447}, - {0.40323776, 0.40323776, 0.40323776}}}); - + Array2D<float, 3, 3>{{{0.0952412, 0.0952412, 0.0952412}, + {0.25606447, 0.25606447, 0.25606447}, + {0.40323776, 0.40323776, 0.40323776}}}); - auto microGraphScheduler = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraphScheduler(); + auto microGraphScheduler = + std::dynamic_pointer_cast<MetaOperator_Op>(op) + ->getMicroGraphScheduler(); microGraphScheduler->saveSchedulingDiagram("lstm_values_scheduling"); op->getOutput(0)->print(); @@ -325,7 +348,8 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { auto pop = Pop(); auto myLSTM = LSTM(2, 3, 2, true, "ltsm"); auto myGraph = Sequential({pop, myLSTM}); - auto op = std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator()); + auto op = + std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator()); REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8); REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data); @@ -338,13 +362,16 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { REQUIRE(myLSTM->nbOutputs() == 2); std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( - Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); - std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); + Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, + {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); + std::shared_ptr<Tensor> myInit = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>( Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}}); - std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); + std::shared_ptr<Tensor> myInitR = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); pop->getOperator()->associateInput(0, myInput); op->associateInput(17, myInit); @@ -371,9 +398,9 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { scheduler.saveSchedulingDiagram("lstm_seq_schedule"); std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372}, - {0.49801484, 0.49801484, 0.49801484}, - {0.67162132, 0.67162132, 0.67162132}}}); + Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372}, + {0.49801484, 0.49801484, 0.49801484}, + {0.67162132, 0.67162132, 0.67162132}}}); myGraph->save("lstm_seq_mygraph", true, true); @@ -385,7 +412,8 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { SECTION("LSTM(forward_values_seq_flatten)(sequential)") { auto pop = Pop(); auto myLSTM = LSTM(2, 3, 2, true, "ltsm"); - auto op = std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator()); + auto op = + std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator()); // Here we test LSTM as it is was flatten in the graph. // We just borrow its micro-graph into our larger myGraph graph. @@ -405,13 +433,16 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { REQUIRE(myLSTM->nbOutputs() == 2); std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( - Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); - std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); + Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, + {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); + std::shared_ptr<Tensor> myInit = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>( Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}}); - std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); + std::shared_ptr<Tensor> myInitR = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); pop->getOperator()->associateInput(0, myInput); op->associateInput(17, myInit); @@ -419,16 +450,32 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { // Weights X auto prodX = Producer(myInitW); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first, 0, 1); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first, 0, 1); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first, 0, 1); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first, 0, 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first, + 0, + 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first, + 0, + 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first, + 0, + 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first, + 0, + 1); // Weights H auto prodH = Producer(myInitR); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first, 0, 1); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first, 0, 1); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first, 0, 1); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first, 0, 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first, + 0, + 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first, + 0, + 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first, + 0, + 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first, + 0, + 1); myGraph->add({prodX, prodH}); myGraph->setDataType(DataType::Float32); @@ -436,9 +483,9 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { myGraph->save("lstm_seq_flatten", true, true); std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372}, - {0.49801484, 0.49801484, 0.49801484}, - {0.67162132, 0.67162132, 0.67162132}}}); + Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372}, + {0.49801484, 0.49801484, 0.49801484}, + {0.67162132, 0.67162132, 0.67162132}}}); auto scheduler = SequentialScheduler(myGraph); scheduler.generateScheduling(); @@ -454,7 +501,8 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { SECTION("LSTM(forward_values_seq_flatten)(parallel)") { auto pop = Pop(); auto myLSTM = LSTM(2, 3, 2, true, "ltsm"); - auto op = std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator()); + auto op = + std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator()); // Here we test LSTM as it is was flatten in the graph. // We just borrow its micro-graph into our larger myGraph graph. @@ -474,13 +522,16 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { REQUIRE(myLSTM->nbOutputs() == 2); std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( - Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); - std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); + Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, + {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); + std::shared_ptr<Tensor> myInit = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>( Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}}); - std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); + std::shared_ptr<Tensor> myInitR = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); pop->getOperator()->associateInput(0, myInput); op->associateInput(17, myInit); @@ -488,16 +539,32 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { // Weights X auto prodX = Producer(myInitW); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first, 0, 1); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first, 0, 1); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first, 0, 1); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first, 0, 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first, + 0, + 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first, + 0, + 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first, + 0, + 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first, + 0, + 1); // Weights H auto prodH = Producer(myInitR); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first, 0, 1); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first, 0, 1); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first, 0, 1); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first, 0, 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first, + 0, + 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first, + 0, + 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first, + 0, + 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first, + 0, + 1); myGraph->add({prodX, prodH}); myGraph->setDataType(DataType::Float32); @@ -505,9 +572,9 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { myGraph->save("lstm_seq_flatten", true, true); std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372}, - {0.49801484, 0.49801484, 0.49801484}, - {0.67162132, 0.67162132, 0.67162132}}}); + Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372}, + {0.49801484, 0.49801484, 0.49801484}, + {0.67162132, 0.67162132, 0.67162132}}}); auto scheduler = ParallelScheduler(myGraph); scheduler.generateScheduling(); diff --git a/unit_tests/operator/Test_MulImpl.cpp b/unit_tests/operator/Test_MulImpl.cpp index 3378861d0d3d7e74e7867c2765a0b09069fa8caf..f228a42827fc218fae5e3fd1abcafe2c908d0215 100644 --- a/unit_tests/operator/Test_MulImpl.cpp +++ b/unit_tests/operator/Test_MulImpl.cpp @@ -10,13 +10,13 @@ ********************************************************************************/ #include <catch2/catch_test_macros.hpp> -#include <cstddef> // std::size_t -#include <cstdint> // std::uint16_t #include <chrono> +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t #include <iostream> #include <memory> -#include <numeric> // std::accumulate -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <numeric> // std::accumulate +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution #include "aidge/data/Tensor.hpp" #include "aidge/operator/Mul.hpp" @@ -24,360 +24,210 @@ namespace Aidge { - TEST_CASE("[CPU/Operator] Mul Backward", "[Mul][CPU][Backward]") - { - std::shared_ptr<Node> myMul = Mul(); - auto op = std::static_pointer_cast<OperatorTensor>(myMul->getOperator()); - op->setDataType(DataType::Float32); - op->setBackend("cpu"); - - SECTION("Case 1: 2D and 1D tensors") { - const auto T0 = std::make_shared<Tensor>(Array2D<float,2,3>( - { - { - {1,2,3},{4,5,6} - } - } - )); +TEST_CASE("[CPU/Operator] Mul Backward", "[Mul][CPU][Backward]") { + std::shared_ptr<Node> myMul = Mul(); + auto op = std::static_pointer_cast<OperatorTensor>(myMul->getOperator()); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); - const auto T1 = std::make_shared<Tensor>(Array1D<float,3>( - {0.1,0.2,0.3} - )); + SECTION("Case 1: 2D and 1D tensors") { + const auto T0 = std::make_shared<Tensor>( + Array2D<float, 2, 3>({{{1, 2, 3}, {4, 5, 6}}})); - T0->setDataType(DataType::Float32); - T0->setBackend("cpu"); - T1->setDataType(DataType::Float32); - T1->setBackend("cpu"); + const auto T1 = + std::make_shared<Tensor>(Array1D<float, 3>({0.1, 0.2, 0.3})); - op->getOutput(0)->setGrad(std::make_shared<Tensor>(Array2D<float,2,3>({{{1.0,1.0,1.0},{1.0,1.0,1.0}}}))); + T0->setDataType(DataType::Float32); + T0->setBackend("cpu"); + T1->setDataType(DataType::Float32); + T1->setBackend("cpu"); - op->associateInput(0,T0); - op->associateInput(1,T1); - op->forwardDims(); + op->getOutput(0)->setGrad(std::make_shared<Tensor>( + Array2D<float, 2, 3>({{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}}))); - myMul->forward(); - myMul->backward(); + op->associateInput(0, T0); + op->associateInput(1, T1); + op->forwardDims(); - auto T0Grad = std::make_shared<Tensor>(Array2D<float, 2,3>({{{0.1,0.2,0.3},{0.1, 0.2, 0.3}}})); - auto T1Grad = std::make_shared<Tensor>(Array1D<float, 3>({5,7,9})); + myMul->forward(); + myMul->backward(); - REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *T0Grad)); - REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *T1Grad)); - } + auto T0Grad = std::make_shared<Tensor>( + Array2D<float, 2, 3>({{{0.1, 0.2, 0.3}, {0.1, 0.2, 0.3}}})); + auto T1Grad = std::make_shared<Tensor>(Array1D<float, 3>({5, 7, 9})); - SECTION("Case 2: 3D and 1D tensors") { - const auto T0 = std::make_shared<Tensor>(Array3D<float,2,2,3>( - { - { - { - {1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0} - }, - { - {7.0, 8.0, 9.0}, - {10.0, 11.0, 12.0} - } - } - } - )); - - const auto T1 = std::make_shared<Tensor>(Array1D<float, 3>({0.3,0.2,0.1})); - - const auto newGrad = std::make_shared<Tensor>(Array3D<float,2,2,3>( - { - { - { - {1, 1, 1}, - {1, 1, 1} - }, - { - {1, 1, 1}, - {1, 1, 1} - } - } - } - )); - - const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float,2,2,3>( - { - { - { - {0.3, 0.2, 0.1}, - {0.3, 0.2, 0.1} - }, - { - {0.3, 0.2, 0.1}, - {0.3, 0.2, 0.1} - } - } - } - )); + REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *T0Grad)); + REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *T1Grad)); + } - const auto expectedGrad1 = std::make_shared<Tensor>(Array1D<float,3>( - {22.0, 26.0, 30.0} - )); + SECTION("Case 2: 3D and 1D tensors") { + const auto T0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + {{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}, + {{7.0, 8.0, 9.0}, {10.0, 11.0, 12.0}}}})); - for(auto T: {T0, T1, newGrad, expectedGrad0, expectedGrad1}) - { - T->setBackend("cpu") ; - T->setDataType(DataType::Float32); - } + const auto T1 = + std::make_shared<Tensor>(Array1D<float, 3>({0.3, 0.2, 0.1})); - op->associateInput(0, T0); - op->associateInput(1, T1); - op->getOutput(0)->setGrad(newGrad); - op->forwardDims(); + const auto newGrad = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + {{{{1, 1, 1}, {1, 1, 1}}, {{1, 1, 1}, {1, 1, 1}}}})); - myMul->backward(); + const auto expectedGrad0 = std::make_shared<Tensor>( + Array3D<float, 2, 2, 3>({{{{0.3, 0.2, 0.1}, {0.3, 0.2, 0.1}}, + {{0.3, 0.2, 0.1}, {0.3, 0.2, 0.1}}}})); - REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0)); - REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1)); - } + const auto expectedGrad1 = + std::make_shared<Tensor>(Array1D<float, 3>({22.0, 26.0, 30.0})); - SECTION("Case 3: 4D and 2D tensors") { - const auto T0 = std::make_shared<Tensor>(Array4D<float,2, 2, 3, 3>( - { - { - { - { - {1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}, - {7.0, 8.0, 9.0} - }, - { - {10.0, 11.0, 12.0}, - {13.0, 14.0, 15.0}, - {16.0, 17.0, 18.0} - } - }, - { - { - {19.0, 20.0, 21.0}, - {22.0, 23.0, 24.0}, - {25.0, 26.0, 27.0} - }, - { - {28.0, 29.0, 30.0}, - {31.0, 32.0, 33.0}, - {34.0, 35.0, 36.0} - } - } - } - } - )); - - const auto T1 = std::make_shared<Tensor>(Array2D<float, 3,3>( - { - { - {0.5,0.3,0.1}, - {0.4,0.2,0.6}, - {0.7,0.8,0.9} - } - } - )); - - const auto newGrad = std::make_shared<Tensor>(Array4D<float,2, 2, 3, 3>( - { - { - { - { - {1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0} - }, - { - {1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0} - } - }, - { - { - {1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0} - }, - { - {1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0} - } - } - } - } - )); - - const auto expectedGrad0 = std::make_shared<Tensor>(Array4D<float,2,2,3,3>( - { - { - { - { - {0.5, 0.3, 0.1}, - {0.4, 0.2, 0.6}, - {0.7, 0.8, 0.9} - }, - { - {0.5, 0.3, 0.1}, - {0.4, 0.2, 0.6}, - {0.7, 0.8, 0.9} - } - }, - { - { - {0.5, 0.3, 0.1}, - {0.4, 0.2, 0.6}, - {0.7, 0.8, 0.9} - }, - { - {0.5, 0.3, 0.1}, - {0.4, 0.2, 0.6}, - {0.7, 0.8, 0.9} - } - } - } - } - )); - - const auto expectedGrad1 = std::make_shared<Tensor>(Array2D<float,3, 3>( - { - { - {58.0, 62.0, 66.0}, - {70.0, 74.0, 78.0}, - {82.0, 86.0, 90.0} - } - } - )); + for (auto T : {T0, T1, newGrad, expectedGrad0, expectedGrad1}) { + T->setBackend("cpu"); + T->setDataType(DataType::Float32); + } - for(const auto T: {T0, T1, newGrad, expectedGrad0, expectedGrad1}) - { - T->setBackend("cpu") ; - T->setDataType(DataType::Float32); - } + op->associateInput(0, T0); + op->associateInput(1, T1); + op->getOutput(0)->setGrad(newGrad); + op->forwardDims(); - op->associateInput(0, T0); - op->associateInput(1, T1); - op->getOutput(0)->setGrad(newGrad); - op->forwardDims(); + myMul->backward(); - myMul->backward(); + REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0)); + REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1)); + } - REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0)); - REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1)); + SECTION("Case 3: 4D and 2D tensors") { + const auto T0 = std::make_shared<Tensor>(Array4D<float, 2, 2, 3, 3>( + {{{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}, {7.0, 8.0, 9.0}}, + {{10.0, 11.0, 12.0}, {13.0, 14.0, 15.0}, {16.0, 17.0, 18.0}}}, + {{{19.0, 20.0, 21.0}, {22.0, 23.0, 24.0}, {25.0, 26.0, 27.0}}, + {{28.0, 29.0, 30.0}, + {31.0, 32.0, 33.0}, + {34.0, 35.0, 36.0}}}}})); + + const auto T1 = std::make_shared<Tensor>(Array2D<float, 3, 3>( + {{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}})); + + const auto newGrad = + std::make_shared<Tensor>(Array4D<float, 2, 2, 3, 3>( + {{{{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}, + {{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}}, + {{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}, + {{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}}}})); + + const auto expectedGrad0 = + std::make_shared<Tensor>(Array4D<float, 2, 2, 3, 3>( + {{{{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}, + {{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}}, + {{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}, + {{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}}}})); + + const auto expectedGrad1 = std::make_shared<Tensor>( + Array2D<float, 3, 3>({{{58.0, 62.0, 66.0}, + {70.0, 74.0, 78.0}, + {82.0, 86.0, 90.0}}})); + + for (const auto T : {T0, T1, newGrad, expectedGrad0, expectedGrad1}) { + T->setBackend("cpu"); + T->setDataType(DataType::Float32); } - SECTION("Case 4: 3D and 2D tensors") { - const auto T0 = std::make_shared<Tensor>(Array3D<float, 2, 3, 4>( - { - { - { - {1.0, 2.0, 3.0, 4.0}, - {5.0, 6.0, 7.0, 8.0}, - {9.0, 10.0, 11.0, 12.0}, - }, - { - {13.0, 14.0, 15.0, 16.0}, - {17.0, 18.0, 19.0, 20.0}, - {21.0, 22.0, 23.0, 24.0}, - } - } - } - )); - - const auto T1 = std::make_shared<Tensor>(Array2D<float, 3, 4>( - { - { - {0.1, 0.2, 0.3, 0.4}, - {0.5, 0.6, 0.7, 0.8}, - {0.9, 1.0, 1.1, 1.2} - } - } - )); - - const auto newGrad = std::make_shared<Tensor>(Array3D<float, 2,3,4>( - { - { - { - {1.0, 1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0, 1.0}, - }, - { - {1.0, 1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0, 1.0}, - {1.0, 1.0, 1.0, 1.0}, - } - } - } - )); - - const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float,2,3,4>( - { - { - { - {0.1, 0.2, 0.3, 0.4}, - {0.5, 0.6, 0.7, 0.8}, - {0.9, 1.0, 1.1, 1.2} - }, - { - {0.1, 0.2, 0.3, 0.4}, - {0.5, 0.6, 0.7, 0.8}, - {0.9, 1.0, 1.1, 1.2} - } - } - } - )); - - const auto expectedGrad1 = std::make_shared<Tensor>(Array2D<float,3, 4>( - { - { - {14.0, 16.0, 18.0, 20.0}, - {22.0, 24.0, 26.0, 28.0}, - {30.0, 32.0, 34.0, 36.0} - } - } - )); - - for(const auto T: {T0, T1, newGrad, expectedGrad0, expectedGrad1}) - { - T->setBackend("cpu") ; - T->setDataType(DataType::Float32); - } + op->associateInput(0, T0); + op->associateInput(1, T1); + op->getOutput(0)->setGrad(newGrad); + op->forwardDims(); - op->associateInput(0, T0); - op->associateInput(1, T1); - op->getOutput(0)->setGrad(newGrad); - op->forwardDims(); + myMul->backward(); - myMul->backward(); + REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0)); + REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1)); + } - REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0)); - REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1)); + SECTION("Case 4: 3D and 2D tensors") { + const auto T0 = std::make_shared<Tensor>( + Array3D<float, 2, 3, 4>({{{ + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + }, + { + {13.0, 14.0, 15.0, 16.0}, + {17.0, 18.0, 19.0, 20.0}, + {21.0, 22.0, 23.0, 24.0}, + }}})); + + const auto T1 = std::make_shared<Tensor>( + Array2D<float, 3, 4>({{{0.1, 0.2, 0.3, 0.4}, + {0.5, 0.6, 0.7, 0.8}, + {0.9, 1.0, 1.1, 1.2}}})); + + const auto newGrad = std::make_shared<Tensor>( + Array3D<float, 2, 3, 4>({{{ + {1.0, 1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0, 1.0}, + }, + { + {1.0, 1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0, 1.0}, + }}})); + + const auto expectedGrad0 = std::make_shared<Tensor>( + Array3D<float, 2, 3, 4>({{{{0.1, 0.2, 0.3, 0.4}, + {0.5, 0.6, 0.7, 0.8}, + {0.9, 1.0, 1.1, 1.2}}, + {{0.1, 0.2, 0.3, 0.4}, + {0.5, 0.6, 0.7, 0.8}, + {0.9, 1.0, 1.1, 1.2}}}})); + + const auto expectedGrad1 = std::make_shared<Tensor>( + Array2D<float, 3, 4>({{{14.0, 16.0, 18.0, 20.0}, + {22.0, 24.0, 26.0, 28.0}, + {30.0, 32.0, 34.0, 36.0}}})); + + for (const auto T : {T0, T1, newGrad, expectedGrad0, expectedGrad1}) { + T->setBackend("cpu"); + T->setDataType(DataType::Float32); } + + op->associateInput(0, T0); + op->associateInput(1, T1); + op->getOutput(0)->setGrad(newGrad); + op->forwardDims(); + + myMul->backward(); + + REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0)); + REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1)); } +} TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { constexpr std::uint16_t NBTRIALS = 10; // Create a random number generator std::random_device rd; std::mt19937 gen(rd()); - std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 - std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); - std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(3)); - std::uniform_int_distribution<int> boolDist(0,1); + std::uniform_real_distribution<float> valueDist( + 0.1f, + 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), + std::size_t(10)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), + std::size_t(3)); + std::uniform_int_distribution<int> boolDist(0, 1); // Create MatMul Operator std::shared_ptr<Node> myMul = Mul(); - auto op = std::static_pointer_cast<OperatorTensor>(myMul-> getOperator()); + auto op = std::static_pointer_cast<OperatorTensor>(myMul->getOperator()); op->setDataType(DataType::Float32); op->setBackend("cpu"); // Create 2 input Tensors std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>(); - op->associateInput(0,T0); + op->associateInput(0, T0); T0->setDataType(DataType::Float32); T0->setBackend("cpu"); std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(); - op -> associateInput(1,T1); + op->associateInput(1, T1); T1->setDataType(DataType::Float32); T1->setBackend("cpu"); @@ -391,14 +241,9 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { std::chrono::time_point<std::chrono::system_clock> end; std::chrono::duration<double, std::micro> duration{}; - SECTION("MulImpl_cpu::forward()") { - SECTION("Scalar / Scalar") { - - } - SECTION("Scalar / +1-D Tensor") { - - } + SECTION("Scalar / Scalar") {} + SECTION("Scalar / +1-D Tensor") {} SECTION("+1-D Tensor / +1-D Tensor - same dimensions") { std::size_t number_of_operation = 0; @@ -413,13 +258,17 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { dims.push_back(dimSizeDist(gen)); } - const auto nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const auto nb_elements = + std::accumulate(dims.cbegin(), + dims.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); number_of_operation += nb_elements; // without broadcasting - float* array0 = new float[nb_elements]; - float* array1 = new float[nb_elements]; - float* result = new float[nb_elements]; + float *array0 = new float[nb_elements]; + float *array1 = new float[nb_elements]; + float *result = new float[nb_elements]; for (std::size_t i = 0; i < nb_elements; ++i) { array0[i] = valueDist(gen); @@ -429,21 +278,23 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { // input0 T0->resize(dims); - T0 -> getImpl() -> setRawPtr(array0, nb_elements); + T0->getImpl()->setRawPtr(array0, nb_elements); // input1 T1->resize(dims); - T1 -> getImpl() -> setRawPtr(array1, nb_elements); + T1->getImpl()->setRawPtr(array1, nb_elements); // results Tres->resize(dims); - Tres -> getImpl() -> setRawPtr(result, nb_elements); + Tres->getImpl()->setRawPtr(result, nb_elements); op->forwardDims(); start = std::chrono::system_clock::now(); myMul->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += + std::chrono::duration_cast<std::chrono::microseconds>( + end - start); REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); @@ -451,24 +302,25 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { delete[] array1; delete[] result; } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + std::cout << "number of elements over time spent: " + << (number_of_operation / duration.count()) << std::endl; + std::cout << "total time: " << duration.count() << "μs" + << std::endl; } - SECTION("+1-D Tensor / +1-D Tensor - broadcasting") { std::size_t number_of_operation = 0; for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { // generate 2 random Tensors - // handle dimensions, replace some dimensions with '1' to get broadcasting + // handle dimensions, replace some dimensions with '1' to get + // broadcasting constexpr std::size_t nbDims = 4; std::vector<std::size_t> dimensions; - for (std::size_t i = 0; i < nbDims; ++i) - { + for (std::size_t i = 0; i < nbDims; ++i) { dimensions.push_back(dimSizeDist(gen)); } @@ -476,77 +328,90 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { auto dims1 = dimensions; auto dimsOut = dimensions; - for (std::size_t i = 0; i < nbDims; ++i) - { - if (boolDist(gen)) - { + for (std::size_t i = 0; i < nbDims; ++i) { + if (boolDist(gen)) { dims0[i] = 1; } - if (boolDist(gen)) - { + if (boolDist(gen)) { dims1[i] = 1; } dimsOut[i] = (dims0[i] == 1) ? dims1[i] : dims0[i]; } - for(auto dim : dims0) - { + for (auto dim : dims0) { Log::info("Dimension of input 0 : {}", dim); } - for(auto dim : dims1) - { + for (auto dim : dims1) { Log::info("Dimension of input 1 : {}", dim); } // create arrays and fill them with random values - float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]]; - float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]]; - float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]]; - - - for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) - { + float *array0 = + new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]]; + float *array1 = + new float[dims1[0] * dims1[1] * dims1[2] * dims1[3]]; + float *result = new float[dimsOut[0] * dimsOut[1] * + dimsOut[2] * dimsOut[3]]; + + for (std::size_t i = 0; + i < dims0[0] * dims0[1] * dims0[2] * dims0[3]; + ++i) { array0[i] = valueDist(gen); } - for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) - { + for (std::size_t i = 0; + i < dims1[0] * dims1[1] * dims1[2] * dims1[3]; + ++i) { array1[i] = valueDist(gen); } // compute true result - const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1}; - const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1}; - - for (std::size_t a = 0; a < dimsOut[0]; ++a) - { - for (std::size_t b = 0; b < dimsOut[1]; ++b) - { - const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0) - + strides0[1] * ((dims0[1] > 1) ? b : 0); - - const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0) - + strides1[1] * ((dims1[1] > 1) ? b : 0); - - for (std::size_t c = 0; c < dimsOut[2]; ++c) - { - const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a)); - - for (std::size_t d = 0; d < dimsOut[3]; ++d) - { - std::size_t idx0 = idx0_0 - + strides0[2] * ((dims0[2] > 1) ? c : 0) - + ((dims0[3] > 1) ? d : 0); - - std::size_t idx1 = idx1_0 - + strides1[2] * ((dims1[2] > 1) ? c : 0) - + ((dims1[3] > 1) ? d : 0); - - result[idx_out + d] = array0[idx0] * array1[idx1]; - // std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " * " << array1[idx1] << " -> " << idx_out + d << std::endl; + const std::size_t strides0[nbDims] = { + dims0[1] * dims0[2] * dims0[3], + dims0[2] * dims0[3], + dims0[3], + 1}; + const std::size_t strides1[nbDims] = { + dims1[1] * dims1[2] * dims1[3], + dims1[2] * dims1[3], + dims1[3], + 1}; + + for (std::size_t a = 0; a < dimsOut[0]; ++a) { + for (std::size_t b = 0; b < dimsOut[1]; ++b) { + const std::size_t idx0_0 = + strides0[0] * ((dims0[0] > 1) ? a : 0) + + strides0[1] * ((dims0[1] > 1) ? b : 0); + + const std::size_t idx1_0 = + strides1[0] * ((dims1[0] > 1) ? a : 0) + + strides1[1] * ((dims1[1] > 1) ? b : 0); + + for (std::size_t c = 0; c < dimsOut[2]; ++c) { + const std::size_t idx_out = + dimsOut[3] * + (c + dimsOut[2] * (b + dimsOut[1] * a)); + + for (std::size_t d = 0; d < dimsOut[3]; ++d) { + std::size_t idx0 = + idx0_0 + + strides0[2] * ((dims0[2] > 1) ? c : 0) + + ((dims0[3] > 1) ? d : 0); + + std::size_t idx1 = + idx1_0 + + strides1[2] * ((dims1[2] > 1) ? c : 0) + + ((dims1[3] > 1) ? d : 0); + + result[idx_out + d] = + array0[idx0] * array1[idx1]; + // std::cout << "(" << idx0 << ", " << idx1 << + // ") -> " << array0[idx0] << " * " << + // array1[idx1] << " -> " << idx_out + d << + // std::endl; } } } @@ -555,22 +420,30 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { // conversion to Aidge::Tensors // input0 T0->resize(dims0); - T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]); + T0->getImpl()->setRawPtr( + array0, + dims0[0] * dims0[1] * dims0[2] * dims0[3]); // input1 T1->resize(dims1); - T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]); + T1->getImpl()->setRawPtr( + array1, + dims1[0] * dims1[1] * dims1[2] * dims1[3]); // results Tres->resize(dimsOut); - Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]); + Tres->getImpl()->setRawPtr( + result, + dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]); // compute result op->forwardDims(); start = std::chrono::system_clock::now(); myMul->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += + std::chrono::duration_cast<std::chrono::microseconds>( + end - start); // comparison between truth and computed result REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); @@ -579,15 +452,23 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { delete[] array1; delete[] result; - const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t nb_elements = + std::accumulate(dimsOut.cbegin(), + dimsOut.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); number_of_operation += nb_elements; } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + std::cout << "number of elements over time spent: " + << (number_of_operation / duration.count()) << std::endl; + std::cout << "total time: " << duration.count() << "μs" + << std::endl; } SECTION("+1-D Tensor / 1-D Tensor") { std::size_t number_of_operation = 0; - std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(std::size_t(1), std::size_t(3)); + std::uniform_int_distribution<std::size_t> nbRemovedDimsDist( + std::size_t(1), + std::size_t(3)); for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { // generate 2 random Tensors @@ -604,15 +485,24 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { dims1[i] = 1; } } - dims1.erase(dims1.cbegin(), dims1.cbegin() + nbRemovedDimsDist(gen)); + dims1.erase(dims1.cbegin(), + dims1.cbegin() + nbRemovedDimsDist(gen)); // create arrays and fill them with random values - float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]]; - std::size_t array1_size = std::accumulate(dims1.cbegin(), dims1.cend(), std::size_t(1), std::multiplies<std::size_t>()); - float* array1 = new float[array1_size]; - float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]]; - - for (std::size_t i = 0; i < (dims0[0]*dims0[1]*dims0[2]*dims0[3]); ++i) { + float *array0 = + new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]]; + std::size_t array1_size = + std::accumulate(dims1.cbegin(), + dims1.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); + float *array1 = new float[array1_size]; + float *result = new float[dimsOut[0] * dimsOut[1] * + dimsOut[2] * dimsOut[3]]; + + for (std::size_t i = 0; + i < (dims0[0] * dims0[1] * dims0[2] * dims0[3]); + ++i) { array0[i] = valueDist(gen); } for (std::size_t i = 0; i < array1_size; ++i) { @@ -621,27 +511,48 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { // compute true result auto dims1_tmp = dims1; - dims1_tmp.insert(dims1_tmp.cbegin(), 4 - dims1_tmp.size(), std::size_t(1)); - - const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1}; - const std::size_t strides1[nbDims] = {dims1_tmp[1]*dims1_tmp[2]*dims1_tmp[3], dims1_tmp[2]*dims1_tmp[3], dims1_tmp[3], 1}; + dims1_tmp.insert(dims1_tmp.cbegin(), + 4 - dims1_tmp.size(), + std::size_t(1)); + + const std::size_t strides0[nbDims] = { + dims0[1] * dims0[2] * dims0[3], + dims0[2] * dims0[3], + dims0[3], + 1}; + const std::size_t strides1[nbDims] = { + dims1_tmp[1] * dims1_tmp[2] * dims1_tmp[3], + dims1_tmp[2] * dims1_tmp[3], + dims1_tmp[3], + 1}; for (std::size_t a = 0; a < dimsOut[0]; ++a) { for (std::size_t b = 0; b < dimsOut[1]; ++b) { - const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0) - + strides0[1] * ((dims0[1] > 1) ? b : 0); - const std::size_t idx1_0 = strides1[0] * ((dims1_tmp[0] > 1) ? a : 0) - + strides1[1] * ((dims1_tmp[1] > 1) ? b : 0); + const std::size_t idx0_0 = + strides0[0] * ((dims0[0] > 1) ? a : 0) + + strides0[1] * ((dims0[1] > 1) ? b : 0); + const std::size_t idx1_0 = + strides1[0] * ((dims1_tmp[0] > 1) ? a : 0) + + strides1[1] * ((dims1_tmp[1] > 1) ? b : 0); for (std::size_t c = 0; c < dimsOut[2]; ++c) { - const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a)); + const std::size_t idx_out = + dimsOut[3] * + (c + dimsOut[2] * (b + dimsOut[1] * a)); for (std::size_t d = 0; d < dimsOut[3]; ++d) { - std::size_t idx0 = idx0_0 - + strides0[2] * ((dims0[2] > 1) ? c : 0) - + ((dims0[3] > 1) ? d : 0); - std::size_t idx1 = idx1_0 - + strides1[2] * ((dims1_tmp[2] > 1) ? c : 0) - + ((dims1_tmp[3] > 1) ? d : 0); - result[idx_out + d] = array0[idx0] * array1[idx1]; - // std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " * " << array1[idx1] << " -> " << idx_out + d << std::endl; + std::size_t idx0 = + idx0_0 + + strides0[2] * ((dims0[2] > 1) ? c : 0) + + ((dims0[3] > 1) ? d : 0); + std::size_t idx1 = + idx1_0 + + strides1[2] * + ((dims1_tmp[2] > 1) ? c : 0) + + ((dims1_tmp[3] > 1) ? d : 0); + result[idx_out + d] = + array0[idx0] * array1[idx1]; + // std::cout << "(" << idx0 << ", " << idx1 << + // ") -> " << array0[idx0] << " * " << + // array1[idx1] << " -> " << idx_out + d << + // std::endl; } } } @@ -650,22 +561,28 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { // conversion to Aidge::Tensors // input0 T0->resize(dims0); - T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]); + T0->getImpl()->setRawPtr( + array0, + dims0[0] * dims0[1] * dims0[2] * dims0[3]); // input1 T1->resize(dims1); - T1 -> getImpl() -> setRawPtr(array1, array1_size); + T1->getImpl()->setRawPtr(array1, array1_size); // results Tres->resize(dimsOut); - Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]); + Tres->getImpl()->setRawPtr( + result, + dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]); // compute result op->forwardDims(); start = std::chrono::system_clock::now(); myMul->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += + std::chrono::duration_cast<std::chrono::microseconds>( + end - start); // comparison between truth and computed result REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); @@ -674,12 +591,18 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { delete[] array1; delete[] result; - const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t nb_elements = + std::accumulate(dimsOut.cbegin(), + dimsOut.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); number_of_operation += nb_elements; } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + std::cout << "number of elements over time spent: " + << (number_of_operation / duration.count()) << std::endl; + std::cout << "total time: " << duration.count() << "μs" + << std::endl; } } } diff --git a/unit_tests/operator/Test_PadImpl.cpp b/unit_tests/operator/Test_PadImpl.cpp index cdd3a5f979085f3782776ce69ddd92c0d53150c4..77f0add738bef378a4e4e72f86a589e2f867d449 100644 --- a/unit_tests/operator/Test_PadImpl.cpp +++ b/unit_tests/operator/Test_PadImpl.cpp @@ -24,106 +24,98 @@ TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") { SECTION("Symmetric Pad") { const int pv = 0; // pad value - std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Constant, static_cast<double>(pv)); - auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator()); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW - { - { - {{ 0, 1, 2, 3, 4}, - { 5, 6, 7, 8, 9}, - { 10, 11, 12, 13, 14}, - { 15, 16, 17, 18, 19}, - { 20, 21, 22, 23, 24}}, - - {{ 25, 26, 27, 28, 29}, - { 30, 31, 32, 33, 34}, - { 35, 36, 37, 38, 39}, - { 40, 41, 42, 43, 44}, - { 45, 46, 47, 48, 49}}, - - {{ 50, 51, 52, 53, 54}, - { 55, 56, 57, 58, 59}, - { 60, 61, 62, 63, 64}, - { 65, 66, 67, 68, 69}, - { 70, 71, 72, 73, 74}} - }, - { - {{ 75, 76, 77, 78, 79}, - { 80, 81, 82, 83, 84}, - { 85, 86, 87, 88, 89}, - { 90, 91, 92, 93, 94}, - { 95, 96, 97, 98, 99}}, - - {{100, 101, 102, 103, 104}, - {105, 106, 107, 108, 109}, - {110, 111, 112, 113, 114}, - {115, 116, 117, 118, 119}, - {120, 121, 122, 123, 124}}, - - {{125, 126, 127, 128, 129}, - {130, 131, 132, 133, 134}, - {135, 136, 137, 138, 139}, - {140, 141, 142, 143, 144}, - {145, 146, 147, 148, 149}} - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW - { - { - {{ pv, pv, pv, pv, pv, pv, pv}, - { pv, 0, 1, 2, 3, 4, pv}, - { pv, 5, 6, 7, 8, 9, pv}, - { pv, 10, 11, 12, 13, 14, pv}, - { pv, 15, 16, 17, 18, 19, pv}, - { pv, 20, 21, 22, 23, 24, pv}, - { pv, pv, pv, pv, pv, pv, pv}}, - - {{ pv, pv, pv, pv, pv, pv, pv}, - { pv, 25, 26, 27, 28, 29, pv}, - { pv, 30, 31, 32, 33, 34, pv}, - { pv, 35, 36, 37, 38, 39, pv}, - { pv, 40, 41, 42, 43, 44, pv}, - { pv, 45, 46, 47, 48, 49, pv}, - { pv, pv, pv, pv, pv, pv, pv}}, - - {{ pv, pv, pv, pv, pv, pv, pv}, - { pv, 50, 51, 52, 53, 54, pv}, - { pv, 55, 56, 57, 58, 59, pv}, - { pv, 60, 61, 62, 63, 64, pv}, - { pv, 65, 66, 67, 68, 69, pv}, - { pv, 70, 71, 72, 73, 74, pv}, - { pv, pv, pv, pv, pv, pv, pv}} - }, - { - {{ pv, pv, pv, pv, pv, pv, pv}, - { pv, 75, 76, 77, 78, 79, pv}, - { pv, 80, 81, 82, 83, 84, pv}, - { pv, 85, 86, 87, 88, 89, pv}, - { pv, 90, 91, 92, 93, 94, pv}, - { pv, 95, 96, 97, 98, 99, pv}, - { pv, pv, pv, pv, pv, pv, pv}}, - - {{ pv, pv, pv, pv, pv, pv, pv}, - {pv, 100, 101, 102, 103, 104, pv}, - {pv, 105, 106, 107, 108, 109, pv}, - {pv, 110, 111, 112, 113, 114, pv}, - {pv, 115, 116, 117, 118, 119, pv}, - {pv, 120, 121, 122, 123, 124, pv}, - { pv, pv, pv, pv, pv, pv, pv}}, - - {{ pv, pv, pv, pv, pv, pv, pv}, - {pv, 125, 126, 127, 128, 129, pv}, - {pv, 130, 131, 132, 133, 134, pv}, - {pv, 135, 136, 137, 138, 139, pv}, - {pv, 140, 141, 142, 143, 144, pv}, - {pv, 145, 146, 147, 148, 149, pv}, - { pv, pv, pv, pv, pv, pv, pv}} - } - } - }); - - myPad->getOperator()->associateInput(0,myInput); + std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, + "mypad", + PadBorderType::Constant, + static_cast<double>(pv)); + auto op = + std::static_pointer_cast<OperatorTensor>(myPad->getOperator()); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array4D<int, 2, 3, 5, 5>{// NCHW + {{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}, + + {{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}, + + {{50, 51, 52, 53, 54}, + {55, 56, 57, 58, 59}, + {60, 61, 62, 63, 64}, + {65, 66, 67, 68, 69}, + {70, 71, 72, 73, 74}}}, + {{{75, 76, 77, 78, 79}, + {80, 81, 82, 83, 84}, + {85, 86, 87, 88, 89}, + {90, 91, 92, 93, 94}, + {95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}}}}}); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>( + Array4D<int, 2, 3, 7, 7>{// NCHW + {{{{pv, pv, pv, pv, pv, pv, pv}, + {pv, 0, 1, 2, 3, 4, pv}, + {pv, 5, 6, 7, 8, 9, pv}, + {pv, 10, 11, 12, 13, 14, pv}, + {pv, 15, 16, 17, 18, 19, pv}, + {pv, 20, 21, 22, 23, 24, pv}, + {pv, pv, pv, pv, pv, pv, pv}}, + + {{pv, pv, pv, pv, pv, pv, pv}, + {pv, 25, 26, 27, 28, 29, pv}, + {pv, 30, 31, 32, 33, 34, pv}, + {pv, 35, 36, 37, 38, 39, pv}, + {pv, 40, 41, 42, 43, 44, pv}, + {pv, 45, 46, 47, 48, 49, pv}, + {pv, pv, pv, pv, pv, pv, pv}}, + + {{pv, pv, pv, pv, pv, pv, pv}, + {pv, 50, 51, 52, 53, 54, pv}, + {pv, 55, 56, 57, 58, 59, pv}, + {pv, 60, 61, 62, 63, 64, pv}, + {pv, 65, 66, 67, 68, 69, pv}, + {pv, 70, 71, 72, 73, 74, pv}, + {pv, pv, pv, pv, pv, pv, pv}}}, + {{{pv, pv, pv, pv, pv, pv, pv}, + {pv, 75, 76, 77, 78, 79, pv}, + {pv, 80, 81, 82, 83, 84, pv}, + {pv, 85, 86, 87, 88, 89, pv}, + {pv, 90, 91, 92, 93, 94, pv}, + {pv, 95, 96, 97, 98, 99, pv}, + {pv, pv, pv, pv, pv, pv, pv}}, + + {{pv, pv, pv, pv, pv, pv, pv}, + {pv, 100, 101, 102, 103, 104, pv}, + {pv, 105, 106, 107, 108, 109, pv}, + {pv, 110, 111, 112, 113, 114, pv}, + {pv, 115, 116, 117, 118, 119, pv}, + {pv, 120, 121, 122, 123, 124, pv}, + {pv, pv, pv, pv, pv, pv, pv}}, + + {{pv, pv, pv, pv, pv, pv, pv}, + {pv, 125, 126, 127, 128, 129, pv}, + {pv, 130, 131, 132, 133, 134, pv}, + {pv, 135, 136, 137, 138, 139, pv}, + {pv, 140, 141, 142, 143, 144, pv}, + {pv, 145, 146, 147, 148, 149, pv}, + {pv, pv, pv, pv, pv, pv, pv}}}}}); + + myPad->getOperator()->associateInput(0, myInput); myPad->getOperator()->setDataType(DataType::Int32); myPad->getOperator()->setBackend("cpu"); myPad->forward(); @@ -134,100 +126,92 @@ TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") { SECTION("Asymmetric Pad") { const int pv = 0; // pad value - std::shared_ptr<Node> myPad = Pad<2>({1, 0, 0, 1}, "mypad", PadBorderType::Constant, static_cast<double>(pv)); - auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator()); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW - { - { - {{ 0, 1, 2, 3, 4}, - { 5, 6, 7, 8, 9}, - { 10, 11, 12, 13, 14}, - { 15, 16, 17, 18, 19}, - { 20, 21, 22, 23, 24}}, - - {{ 25, 26, 27, 28, 29}, - { 30, 31, 32, 33, 34}, - { 35, 36, 37, 38, 39}, - { 40, 41, 42, 43, 44}, - { 45, 46, 47, 48, 49}}, - - {{ 50, 51, 52, 53, 54}, - { 55, 56, 57, 58, 59}, - { 60, 61, 62, 63, 64}, - { 65, 66, 67, 68, 69}, - { 70, 71, 72, 73, 74}} - }, - { - {{ 75, 76, 77, 78, 79}, - { 80, 81, 82, 83, 84}, - { 85, 86, 87, 88, 89}, - { 90, 91, 92, 93, 94}, - { 95, 96, 97, 98, 99}}, - - {{100, 101, 102, 103, 104}, - {105, 106, 107, 108, 109}, - {110, 111, 112, 113, 114}, - {115, 116, 117, 118, 119}, - {120, 121, 122, 123, 124}}, - - {{125, 126, 127, 128, 129}, - {130, 131, 132, 133, 134}, - {135, 136, 137, 138, 139}, - {140, 141, 142, 143, 144}, - {145, 146, 147, 148, 149}} - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,6,6> { //NCHW - { - { - {{ pv, pv, pv, pv, pv, pv}, - { 0, 1, 2, 3, 4, pv}, - { 5, 6, 7, 8, 9, pv}, - { 10, 11, 12, 13, 14, pv}, - { 15, 16, 17, 18, 19, pv}, - { 20, 21, 22, 23, 24, pv}}, - - {{ pv, pv, pv, pv, pv, pv}, - { 25, 26, 27, 28, 29, pv}, - { 30, 31, 32, 33, 34, pv}, - { 35, 36, 37, 38, 39, pv}, - { 40, 41, 42, 43, 44, pv}, - { 45, 46, 47, 48, 49, pv}}, - - {{ pv, pv, pv, pv, pv, pv}, - { 50, 51, 52, 53, 54, pv}, - { 55, 56, 57, 58, 59, pv}, - { 60, 61, 62, 63, 64, pv}, - { 65, 66, 67, 68, 69, pv}, - { 70, 71, 72, 73, 74, pv}} - }, - { - {{ pv, pv, pv, pv, pv, pv}, - { 75, 76, 77, 78, 79, pv}, - { 80, 81, 82, 83, 84, pv}, - { 85, 86, 87, 88, 89, pv}, - { 90, 91, 92, 93, 94, pv}, - { 95, 96, 97, 98, 99, pv}}, - - {{ pv, pv, pv, pv, pv, pv}, - { 100, 101, 102, 103, 104, pv}, - { 105, 106, 107, 108, 109, pv}, - { 110, 111, 112, 113, 114, pv}, - { 115, 116, 117, 118, 119, pv}, - { 120, 121, 122, 123, 124, pv}}, - - {{ pv, pv, pv, pv, pv, pv}, - { 125, 126, 127, 128, 129, pv}, - { 130, 131, 132, 133, 134, pv}, - { 135, 136, 137, 138, 139, pv}, - { 140, 141, 142, 143, 144, pv}, - { 145, 146, 147, 148, 149, pv}} - } - } - }); - - myPad->getOperator()->associateInput(0,myInput); + std::shared_ptr<Node> myPad = Pad<2>({1, 0, 0, 1}, + "mypad", + PadBorderType::Constant, + static_cast<double>(pv)); + auto op = + std::static_pointer_cast<OperatorTensor>(myPad->getOperator()); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array4D<int, 2, 3, 5, 5>{// NCHW + {{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}, + + {{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}, + + {{50, 51, 52, 53, 54}, + {55, 56, 57, 58, 59}, + {60, 61, 62, 63, 64}, + {65, 66, 67, 68, 69}, + {70, 71, 72, 73, 74}}}, + {{{75, 76, 77, 78, 79}, + {80, 81, 82, 83, 84}, + {85, 86, 87, 88, 89}, + {90, 91, 92, 93, 94}, + {95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}}}}}); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>( + Array4D<int, 2, 3, 6, 6>{// NCHW + {{{{pv, pv, pv, pv, pv, pv}, + {0, 1, 2, 3, 4, pv}, + {5, 6, 7, 8, 9, pv}, + {10, 11, 12, 13, 14, pv}, + {15, 16, 17, 18, 19, pv}, + {20, 21, 22, 23, 24, pv}}, + + {{pv, pv, pv, pv, pv, pv}, + {25, 26, 27, 28, 29, pv}, + {30, 31, 32, 33, 34, pv}, + {35, 36, 37, 38, 39, pv}, + {40, 41, 42, 43, 44, pv}, + {45, 46, 47, 48, 49, pv}}, + + {{pv, pv, pv, pv, pv, pv}, + {50, 51, 52, 53, 54, pv}, + {55, 56, 57, 58, 59, pv}, + {60, 61, 62, 63, 64, pv}, + {65, 66, 67, 68, 69, pv}, + {70, 71, 72, 73, 74, pv}}}, + {{{pv, pv, pv, pv, pv, pv}, + {75, 76, 77, 78, 79, pv}, + {80, 81, 82, 83, 84, pv}, + {85, 86, 87, 88, 89, pv}, + {90, 91, 92, 93, 94, pv}, + {95, 96, 97, 98, 99, pv}}, + + {{pv, pv, pv, pv, pv, pv}, + {100, 101, 102, 103, 104, pv}, + {105, 106, 107, 108, 109, pv}, + {110, 111, 112, 113, 114, pv}, + {115, 116, 117, 118, 119, pv}, + {120, 121, 122, 123, 124, pv}}, + + {{pv, pv, pv, pv, pv, pv}, + {125, 126, 127, 128, 129, pv}, + {130, 131, 132, 133, 134, pv}, + {135, 136, 137, 138, 139, pv}, + {140, 141, 142, 143, 144, pv}, + {145, 146, 147, 148, 149, pv}}}}}); + + myPad->getOperator()->associateInput(0, myInput); myPad->getOperator()->setDataType(DataType::Int32); myPad->getOperator()->setBackend("cpu"); myPad->forward(); @@ -236,106 +220,97 @@ TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") { } SECTION("Pad Edge") { - std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Edge); - auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator()); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW - { - { - {{ 0, 1, 2, 3, 4}, - { 5, 6, 7, 8, 9}, - { 10, 11, 12, 13, 14}, - { 15, 16, 17, 18, 19}, - { 20, 21, 22, 23, 24}}, - - {{ 25, 26, 27, 28, 29}, - { 30, 31, 32, 33, 34}, - { 35, 36, 37, 38, 39}, - { 40, 41, 42, 43, 44}, - { 45, 46, 47, 48, 49}}, - - {{ 50, 51, 52, 53, 54}, - { 55, 56, 57, 58, 59}, - { 60, 61, 62, 63, 64}, - { 65, 66, 67, 68, 69}, - { 70, 71, 72, 73, 74}} - }, - { - {{ 75, 76, 77, 78, 79}, - { 80, 81, 82, 83, 84}, - { 85, 86, 87, 88, 89}, - { 90, 91, 92, 93, 94}, - { 95, 96, 97, 98, 99}}, - - {{100, 101, 102, 103, 104}, - {105, 106, 107, 108, 109}, - {110, 111, 112, 113, 114}, - {115, 116, 117, 118, 119}, - {120, 121, 122, 123, 124}}, - - {{125, 126, 127, 128, 129}, - {130, 131, 132, 133, 134}, - {135, 136, 137, 138, 139}, - {140, 141, 142, 143, 144}, - {145, 146, 147, 148, 149}} - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW - { - { - {{ 0, 0, 1, 2, 3, 4, 4}, - { 0, 0, 1, 2, 3, 4, 4}, - { 5, 5, 6, 7, 8, 9, 9}, - { 10, 10, 11, 12, 13, 14, 14}, - { 15, 15, 16, 17, 18, 19, 19}, - { 20, 20, 21, 22, 23, 24, 24}, - { 20, 20, 21, 22, 23, 24, 24}}, - - {{ 25, 25, 26, 27, 28, 29, 29}, - { 25, 25, 26, 27, 28, 29, 29}, - { 30, 30, 31, 32, 33, 34, 34}, - { 35, 35, 36, 37, 38, 39, 39}, - { 40, 40, 41, 42, 43, 44, 44}, - { 45, 45, 46, 47, 48, 49, 49}, - { 45, 45, 46, 47, 48, 49, 49}}, - - {{ 50, 50, 51, 52, 53, 54, 54}, - { 50, 50, 51, 52, 53, 54, 54}, - { 55, 55, 56, 57, 58, 59, 59}, - { 60, 60, 61, 62, 63, 64, 64}, - { 65, 65, 66, 67, 68, 69, 69}, - { 70, 70, 71, 72, 73, 74, 74}, - { 70, 70, 71, 72, 73, 74, 74}} - }, - { - {{ 75, 75, 76, 77, 78, 79, 79}, - { 75, 75, 76, 77, 78, 79, 79}, - { 80, 80, 81, 82, 83, 84, 84}, - { 85, 85, 86, 87, 88, 89, 89}, - { 90, 90, 91, 92, 93, 94, 94}, - { 95, 95, 96, 97, 98, 99, 99}, - { 95, 95, 96, 97, 98, 99, 99}}, - - {{100, 100, 101, 102, 103, 104, 104}, - {100, 100, 101, 102, 103, 104, 104}, - {105, 105, 106, 107, 108, 109, 109}, - {110, 110, 111, 112, 113, 114, 114}, - {115, 115, 116, 117, 118, 119, 119}, - {120, 120, 121, 122, 123, 124, 124}, - {120, 120, 121, 122, 123, 124, 124}}, - - {{125, 125, 126, 127, 128, 129, 129}, - {125, 125, 126, 127, 128, 129, 129}, - {130, 130, 131, 132, 133, 134, 134}, - {135, 135, 136, 137, 138, 139, 139}, - {140, 140, 141, 142, 143, 144, 144}, - {145, 145, 146, 147, 148, 149, 149}, - {145, 145, 146, 147, 148, 149, 149}} - } - } - }); - - myPad->getOperator()->associateInput(0,myInput); + std::shared_ptr<Node> myPad = + Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Edge); + auto op = + std::static_pointer_cast<OperatorTensor>(myPad->getOperator()); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array4D<int, 2, 3, 5, 5>{// NCHW + {{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}, + + {{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}, + + {{50, 51, 52, 53, 54}, + {55, 56, 57, 58, 59}, + {60, 61, 62, 63, 64}, + {65, 66, 67, 68, 69}, + {70, 71, 72, 73, 74}}}, + {{{75, 76, 77, 78, 79}, + {80, 81, 82, 83, 84}, + {85, 86, 87, 88, 89}, + {90, 91, 92, 93, 94}, + {95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}}}}}); + std::shared_ptr<Tensor> myOutput = + std::make_shared<Tensor>(Array4D<int, 2, 3, 7, 7>{ + // NCHW + {{{{0, 0, 1, 2, 3, 4, 4}, + {0, 0, 1, 2, 3, 4, 4}, + {5, 5, 6, 7, 8, 9, 9}, + {10, 10, 11, 12, 13, 14, 14}, + {15, 15, 16, 17, 18, 19, 19}, + {20, 20, 21, 22, 23, 24, 24}, + {20, 20, 21, 22, 23, 24, 24}}, + + {{25, 25, 26, 27, 28, 29, 29}, + {25, 25, 26, 27, 28, 29, 29}, + {30, 30, 31, 32, 33, 34, 34}, + {35, 35, 36, 37, 38, 39, 39}, + {40, 40, 41, 42, 43, 44, 44}, + {45, 45, 46, 47, 48, 49, 49}, + {45, 45, 46, 47, 48, 49, 49}}, + + {{50, 50, 51, 52, 53, 54, 54}, + {50, 50, 51, 52, 53, 54, 54}, + {55, 55, 56, 57, 58, 59, 59}, + {60, 60, 61, 62, 63, 64, 64}, + {65, 65, 66, 67, 68, 69, 69}, + {70, 70, 71, 72, 73, 74, 74}, + {70, 70, 71, 72, 73, 74, 74}}}, + {{{75, 75, 76, 77, 78, 79, 79}, + {75, 75, 76, 77, 78, 79, 79}, + {80, 80, 81, 82, 83, 84, 84}, + {85, 85, 86, 87, 88, 89, 89}, + {90, 90, 91, 92, 93, 94, 94}, + {95, 95, 96, 97, 98, 99, 99}, + {95, 95, 96, 97, 98, 99, 99}}, + + {{100, 100, 101, 102, 103, 104, 104}, + {100, 100, 101, 102, 103, 104, 104}, + {105, 105, 106, 107, 108, 109, 109}, + {110, 110, 111, 112, 113, 114, 114}, + {115, 115, 116, 117, 118, 119, 119}, + {120, 120, 121, 122, 123, 124, 124}, + {120, 120, 121, 122, 123, 124, 124}}, + + {{125, 125, 126, 127, 128, 129, 129}, + {125, 125, 126, 127, 128, 129, 129}, + {130, 130, 131, 132, 133, 134, 134}, + {135, 135, 136, 137, 138, 139, 139}, + {140, 140, 141, 142, 143, 144, 144}, + {145, 145, 146, 147, 148, 149, 149}, + {145, 145, 146, 147, 148, 149, 149}}}}}); + + myPad->getOperator()->associateInput(0, myInput); myPad->getOperator()->setDataType(DataType::Int32); myPad->getOperator()->setBackend("cpu"); myPad->forward(); @@ -344,114 +319,93 @@ TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") { } SECTION("Pad Reflect") { - std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Reflect); - auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator()); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW - { - { - {{ 0, 1, 2, 3, 4}, - { 5, 6, 7, 8, 9}, - { 10, 11, 12, 13, 14}, - { 15, 16, 17, 18, 19}, - { 20, 21, 22, 23, 24}}, - - {{ 25, 26, 27, 28, 29}, - { 30, 31, 32, 33, 34}, - { 35, 36, 37, 38, 39}, - { 40, 41, 42, 43, 44}, - { 45, 46, 47, 48, 49}}, - - {{ 50, 51, 52, 53, 54}, - { 55, 56, 57, 58, 59}, - { 60, 61, 62, 63, 64}, - { 65, 66, 67, 68, 69}, - { 70, 71, 72, 73, 74}} - }, - { - {{ 75, 76, 77, 78, 79}, - { 80, 81, 82, 83, 84}, - { 85, 86, 87, 88, 89}, - { 90, 91, 92, 93, 94}, - { 95, 96, 97, 98, 99}}, - - {{100, 101, 102, 103, 104}, - {105, 106, 107, 108, 109}, - {110, 111, 112, 113, 114}, - {115, 116, 117, 118, 119}, - {120, 121, 122, 123, 124}}, - - {{125, 126, 127, 128, 129}, - {130, 131, 132, 133, 134}, - {135, 136, 137, 138, 139}, - {140, 141, 142, 143, 144}, - {145, 146, 147, 148, 149}} - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW - { - { - { - { 6, 5, 6, 7, 8, 9, 5}, - { 1, 0, 1, 2, 3, 4, 0}, - { 6, 5, 6, 7, 8, 9, 5}, - { 11, 10, 11, 12, 13, 14, 10}, - { 16, 15, 16, 17, 18, 19, 15}, - { 21, 20, 21, 22, 23, 24, 20}, - { 1, 0, 1, 2, 3, 4, 0} - }, - { - { 31, 30, 31, 32, 33, 34, 30}, - { 26, 25, 26, 27, 28, 29, 25}, - { 31, 30, 31, 32, 33, 34, 30}, - { 36, 35, 36, 37, 38, 39, 35}, - { 41, 40, 41, 42, 43, 44, 40}, - { 46, 45, 46, 47, 48, 49, 45}, - { 26, 25, 26, 27, 28, 29, 25} - }, - { - { 56, 55, 56, 57, 58, 59, 55}, - { 51, 50, 51, 52, 53, 54, 50}, - { 56, 55, 56, 57, 58, 59, 55}, - { 61, 60, 61, 62, 63, 64, 60}, - { 66, 65, 66, 67, 68, 69, 65}, - { 71, 70, 71, 72, 73, 74, 70}, - { 51, 50, 51, 52, 53, 54, 50} - } - }, - { - { - { 81, 80, 81, 82, 83, 84, 80}, - { 76, 75, 76, 77, 78, 79, 75}, - { 81, 80, 81, 82, 83, 84, 80}, - { 86, 85, 86, 87, 88, 89, 85}, - { 91, 90, 91, 92, 93, 94, 90}, - { 96, 95, 96, 97, 98, 99, 95}, - { 76, 75, 76, 77, 78, 79, 75} - }, - { - { 106, 105, 106, 107, 108, 109, 105}, - { 101, 100, 101, 102, 103, 104, 100}, - { 106, 105, 106, 107, 108, 109, 105}, - { 111, 110, 111, 112, 113, 114, 110}, - { 116, 115, 116, 117, 118, 119, 115}, - { 121, 120, 121, 122, 123, 124, 120}, - { 101, 100, 101, 102, 103, 104, 100} - }, - { - { 131, 130, 131, 132, 133, 134, 130}, - { 126, 125, 126, 127, 128, 129, 125}, - { 131, 130, 131, 132, 133, 134, 130}, - { 136, 135, 136, 137, 138, 139, 135}, - { 141, 140, 141, 142, 143, 144, 140}, - { 146, 145, 146, 147, 148, 149, 145}, - { 126, 125, 126, 127, 128, 129, 125} - } - } - } - }); - - myPad->getOperator()->associateInput(0,myInput); + std::shared_ptr<Node> myPad = + Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Reflect); + auto op = + std::static_pointer_cast<OperatorTensor>(myPad->getOperator()); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array4D<int, 2, 3, 5, 5>{// NCHW + {{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}, + + {{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}, + + {{50, 51, 52, 53, 54}, + {55, 56, 57, 58, 59}, + {60, 61, 62, 63, 64}, + {65, 66, 67, 68, 69}, + {70, 71, 72, 73, 74}}}, + {{{75, 76, 77, 78, 79}, + {80, 81, 82, 83, 84}, + {85, 86, 87, 88, 89}, + {90, 91, 92, 93, 94}, + {95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}}}}}); + std::shared_ptr<Tensor> myOutput = + std::make_shared<Tensor>(Array4D<int, 2, 3, 7, 7>{ + // NCHW + {{{{6, 5, 6, 7, 8, 9, 5}, + {1, 0, 1, 2, 3, 4, 0}, + {6, 5, 6, 7, 8, 9, 5}, + {11, 10, 11, 12, 13, 14, 10}, + {16, 15, 16, 17, 18, 19, 15}, + {21, 20, 21, 22, 23, 24, 20}, + {1, 0, 1, 2, 3, 4, 0}}, + {{31, 30, 31, 32, 33, 34, 30}, + {26, 25, 26, 27, 28, 29, 25}, + {31, 30, 31, 32, 33, 34, 30}, + {36, 35, 36, 37, 38, 39, 35}, + {41, 40, 41, 42, 43, 44, 40}, + {46, 45, 46, 47, 48, 49, 45}, + {26, 25, 26, 27, 28, 29, 25}}, + {{56, 55, 56, 57, 58, 59, 55}, + {51, 50, 51, 52, 53, 54, 50}, + {56, 55, 56, 57, 58, 59, 55}, + {61, 60, 61, 62, 63, 64, 60}, + {66, 65, 66, 67, 68, 69, 65}, + {71, 70, 71, 72, 73, 74, 70}, + {51, 50, 51, 52, 53, 54, 50}}}, + {{{81, 80, 81, 82, 83, 84, 80}, + {76, 75, 76, 77, 78, 79, 75}, + {81, 80, 81, 82, 83, 84, 80}, + {86, 85, 86, 87, 88, 89, 85}, + {91, 90, 91, 92, 93, 94, 90}, + {96, 95, 96, 97, 98, 99, 95}, + {76, 75, 76, 77, 78, 79, 75}}, + {{106, 105, 106, 107, 108, 109, 105}, + {101, 100, 101, 102, 103, 104, 100}, + {106, 105, 106, 107, 108, 109, 105}, + {111, 110, 111, 112, 113, 114, 110}, + {116, 115, 116, 117, 118, 119, 115}, + {121, 120, 121, 122, 123, 124, 120}, + {101, 100, 101, 102, 103, 104, 100}}, + {{131, 130, 131, 132, 133, 134, 130}, + {126, 125, 126, 127, 128, 129, 125}, + {131, 130, 131, 132, 133, 134, 130}, + {136, 135, 136, 137, 138, 139, 135}, + {141, 140, 141, 142, 143, 144, 140}, + {146, 145, 146, 147, 148, 149, 145}, + {126, 125, 126, 127, 128, 129, 125}}}}}); + + myPad->getOperator()->associateInput(0, myInput); myPad->getOperator()->setDataType(DataType::Int32); myPad->getOperator()->setBackend("cpu"); myPad->forward(); @@ -460,106 +414,97 @@ TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") { } SECTION("Pad Wrap") { - std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Wrap); - auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator()); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW - { - { - {{ 0, 1, 2, 3, 4}, - { 5, 6, 7, 8, 9}, - { 10, 11, 12, 13, 14}, - { 15, 16, 17, 18, 19}, - { 20, 21, 22, 23, 24}}, - - {{ 25, 26, 27, 28, 29}, - { 30, 31, 32, 33, 34}, - { 35, 36, 37, 38, 39}, - { 40, 41, 42, 43, 44}, - { 45, 46, 47, 48, 49}}, - - {{ 50, 51, 52, 53, 54}, - { 55, 56, 57, 58, 59}, - { 60, 61, 62, 63, 64}, - { 65, 66, 67, 68, 69}, - { 70, 71, 72, 73, 74}} - }, - { - {{ 75, 76, 77, 78, 79}, - { 80, 81, 82, 83, 84}, - { 85, 86, 87, 88, 89}, - { 90, 91, 92, 93, 94}, - { 95, 96, 97, 98, 99}}, - - {{100, 101, 102, 103, 104}, - {105, 106, 107, 108, 109}, - {110, 111, 112, 113, 114}, - {115, 116, 117, 118, 119}, - {120, 121, 122, 123, 124}}, - - {{125, 126, 127, 128, 129}, - {130, 131, 132, 133, 134}, - {135, 136, 137, 138, 139}, - {140, 141, 142, 143, 144}, - {145, 146, 147, 148, 149}} - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW - { - { - {{ 24, 20, 21, 22, 23, 24, 20}, - { 4, 0, 1, 2, 3, 4, 0}, - { 9, 5, 6, 7, 8, 9, 5}, - { 14, 10, 11, 12, 13, 14, 10}, - { 19, 15, 16, 17, 18, 19, 15}, - { 24, 20, 21, 22, 23, 24, 20}, - { 4, 0, 1, 2, 3, 4, 0}}, - - {{ 49, 45, 46, 47, 48, 49, 45}, - { 29, 25, 26, 27, 28, 29, 25}, - { 34, 30, 31, 32, 33, 34, 30}, - { 39, 35, 36, 37, 38, 39, 35}, - { 44, 40, 41, 42, 43, 44, 40}, - { 49, 45, 46, 47, 48, 49, 45}, - { 29, 25, 26, 27, 28, 29, 25}}, - - {{ 74, 70, 71, 72, 73, 74, 70}, - { 54, 50, 51, 52, 53, 54, 50}, - { 59, 55, 56, 57, 58, 59, 55}, - { 64, 60, 61, 62, 63, 64, 60}, - { 69, 65, 66, 67, 68, 69, 65}, - { 74, 70, 71, 72, 73, 74, 70}, - { 54, 50, 51, 52, 53, 54, 50}} - }, - { - {{ 99, 95, 96, 97, 98, 99, 95}, - { 79, 75, 76, 77, 78, 79, 75}, - { 84, 80, 81, 82, 83, 84, 80}, - { 89, 85, 86, 87, 88, 89, 85}, - { 94, 90, 91, 92, 93, 94, 90}, - { 99, 95, 96, 97, 98, 99, 95}, - { 79, 75, 76, 77, 78, 79, 75}}, - - {{124, 120, 121, 122, 123, 124, 120}, - {104, 100, 101, 102, 103, 104, 100}, - {109, 105, 106, 107, 108, 109, 105}, - {114, 110, 111, 112, 113, 114, 110}, - {119, 115, 116, 117, 118, 119, 115}, - {124, 120, 121, 122, 123, 124, 120}, - {104, 100, 101, 102, 103, 104, 100}}, - - {{149, 145, 146, 147, 148, 149, 145}, - {129, 125, 126, 127, 128, 129, 125}, - {134, 130, 131, 132, 133, 134, 130}, - {139, 135, 136, 137, 138, 139, 135}, - {144, 140, 141, 142, 143, 144, 140}, - {149, 145, 146, 147, 148, 149, 145}, - {129, 125, 126, 127, 128, 129, 125}} - } - } - }); - - myPad->getOperator()->associateInput(0,myInput); + std::shared_ptr<Node> myPad = + Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Wrap); + auto op = + std::static_pointer_cast<OperatorTensor>(myPad->getOperator()); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array4D<int, 2, 3, 5, 5>{// NCHW + {{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}, + + {{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}, + + {{50, 51, 52, 53, 54}, + {55, 56, 57, 58, 59}, + {60, 61, 62, 63, 64}, + {65, 66, 67, 68, 69}, + {70, 71, 72, 73, 74}}}, + {{{75, 76, 77, 78, 79}, + {80, 81, 82, 83, 84}, + {85, 86, 87, 88, 89}, + {90, 91, 92, 93, 94}, + {95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}}}}}); + std::shared_ptr<Tensor> myOutput = + std::make_shared<Tensor>(Array4D<int, 2, 3, 7, 7>{ + // NCHW + {{{{24, 20, 21, 22, 23, 24, 20}, + {4, 0, 1, 2, 3, 4, 0}, + {9, 5, 6, 7, 8, 9, 5}, + {14, 10, 11, 12, 13, 14, 10}, + {19, 15, 16, 17, 18, 19, 15}, + {24, 20, 21, 22, 23, 24, 20}, + {4, 0, 1, 2, 3, 4, 0}}, + + {{49, 45, 46, 47, 48, 49, 45}, + {29, 25, 26, 27, 28, 29, 25}, + {34, 30, 31, 32, 33, 34, 30}, + {39, 35, 36, 37, 38, 39, 35}, + {44, 40, 41, 42, 43, 44, 40}, + {49, 45, 46, 47, 48, 49, 45}, + {29, 25, 26, 27, 28, 29, 25}}, + + {{74, 70, 71, 72, 73, 74, 70}, + {54, 50, 51, 52, 53, 54, 50}, + {59, 55, 56, 57, 58, 59, 55}, + {64, 60, 61, 62, 63, 64, 60}, + {69, 65, 66, 67, 68, 69, 65}, + {74, 70, 71, 72, 73, 74, 70}, + {54, 50, 51, 52, 53, 54, 50}}}, + {{{99, 95, 96, 97, 98, 99, 95}, + {79, 75, 76, 77, 78, 79, 75}, + {84, 80, 81, 82, 83, 84, 80}, + {89, 85, 86, 87, 88, 89, 85}, + {94, 90, 91, 92, 93, 94, 90}, + {99, 95, 96, 97, 98, 99, 95}, + {79, 75, 76, 77, 78, 79, 75}}, + + {{124, 120, 121, 122, 123, 124, 120}, + {104, 100, 101, 102, 103, 104, 100}, + {109, 105, 106, 107, 108, 109, 105}, + {114, 110, 111, 112, 113, 114, 110}, + {119, 115, 116, 117, 118, 119, 115}, + {124, 120, 121, 122, 123, 124, 120}, + {104, 100, 101, 102, 103, 104, 100}}, + + {{149, 145, 146, 147, 148, 149, 145}, + {129, 125, 126, 127, 128, 129, 125}, + {134, 130, 131, 132, 133, 134, 130}, + {139, 135, 136, 137, 138, 139, 135}, + {144, 140, 141, 142, 143, 144, 140}, + {149, 145, 146, 147, 148, 149, 145}, + {129, 125, 126, 127, 128, 129, 125}}}}}); + + myPad->getOperator()->associateInput(0, myInput); myPad->getOperator()->setDataType(DataType::Int32); myPad->getOperator()->setBackend("cpu"); myPad->forward(); diff --git a/unit_tests/operator/Test_PaddedConv.cpp b/unit_tests/operator/Test_PaddedConv.cpp index b7584ad069336a270ed07c32d4c07552888b6587..3e2bad72bb9a874e5ce8af7753b22fd4076640bb 100644 --- a/unit_tests/operator/Test_PaddedConv.cpp +++ b/unit_tests/operator/Test_PaddedConv.cpp @@ -24,133 +24,88 @@ using namespace Aidge; TEST_CASE("[cpu/operator] PaddedConv(forward)", "[PaddedConv][CPU]") { SECTION("Classic Conv") { - std::shared_ptr<Node> myConv = PaddedConv(3,4,{3,3}, "myconv"); - auto op = std::static_pointer_cast<OperatorTensor>(myConv -> getOperator()); - std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> { - { - { - {{ 0, 1, 2}, - { 3, 4, 5}, - { 6, 7, 8}}, - {{ 9, 10, 11}, - { 12, 13, 14}, - { 15, 16, 17}}, - {{ 18, 19, 20}, - { 21, 22, 23}, - { 24, 25, 26}} - }, - { - {{ 27, 28, 29}, - { 30, 31, 32}, - { 33, 34, 35}}, - {{ 36, 37, 38}, - { 39, 40, 41}, - { 42, 43, 44}}, - {{ 45, 46, 47}, - { 48, 49, 50}, - { 51, 52, 53}} - }, - { - {{ 54, 55, 56}, - { 57, 58, 59}, - { 60, 61, 62}}, - {{ 63, 64, 65}, - { 66, 67, 68}, - { 69, 70, 71}}, - {{ 72, 73, 74}, - { 75, 76, 77}, - { 78, 79, 80}} - }, - { - {{ 81, 82, 83}, - { 84, 85, 86}, - { 87, 88, 89}}, - {{ 90, 91, 92}, - { 93, 94, 95}, - { 96, 97, 98}}, - {{ 99, 100, 101}, - {102, 103, 104}, - {105, 106, 107}} - } - } - }); - std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}}); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW - { - { - {{ 0, 1, 2, 3, 4}, - { 5, 6, 7, 8, 9}, - { 10, 11, 12, 13, 14}, - { 15, 16, 17, 18, 19}, - { 20, 21, 22, 23, 24}}, - - {{ 25, 26, 27, 28, 29}, - { 30, 31, 32, 33, 34}, - { 35, 36, 37, 38, 39}, - { 40, 41, 42, 43, 44}, - { 45, 46, 47, 48, 49}}, - - {{ 50, 51, 52, 53, 54}, - { 55, 56, 57, 58, 59}, - { 60, 61, 62, 63, 64}, - { 65, 66, 67, 68, 69}, - { 70, 71, 72, 73, 74}} - }, - { - {{ 75, 76, 77, 78, 79}, - { 80, 81, 82, 83, 84}, - { 85, 86, 87, 88, 89}, - { 90, 91, 92, 93, 94}, - { 95, 96, 97, 98, 99}}, - - {{100, 101, 102, 103, 104}, - {105, 106, 107, 108, 109}, - {110, 111, 112, 113, 114}, - {115, 116, 117, 118, 119}, - {120, 121, 122, 123, 124}}, - - {{125, 126, 127, 128, 129}, - {130, 131, 132, 133, 134}, - {135, 136, 137, 138, 139}, - {140, 141, 142, 143, 144}, - {145, 146, 147, 148, 149}} - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> { - { - { - {{ 15226, 15577, 15928}, - { 16981, 17332, 17683}, - { 18736, 19087, 19438}}, - {{ 37818, 38898, 39978}, - { 43218, 44298, 45378}, - { 48618, 49698, 50778}}, - {{ 60426, 62235, 64044}, - { 69471, 71280, 73089}, - { 78516, 80325, 82134}}, - {{ 83016, 85554, 88092}, - { 95706, 98244, 100782}, - {108396, 110934, 113472}} - }, - { - {{ 41551, 41902, 42253}, - { 43306, 43657, 44008}, - { 45061, 45412, 45763}}, - {{118818, 119898, 120978}, - {124218, 125298, 126378}, - {129618, 130698, 131778}}, - {{196101, 197910, 199719}, - {205146, 206955, 208764}, - {214191, 216000, 217809}}, - {{273366, 275904, 278442}, - {286056, 288594, 291132}, - {298746, 301284, 303822}} - } - } - }); - - myConv->getOperator()->associateInput(0,myInput); + std::shared_ptr<Node> myConv = PaddedConv(3, 4, {3, 3}, "myconv"); + auto op = + std::static_pointer_cast<OperatorTensor>(myConv->getOperator()); + std::shared_ptr<Tensor> myWeights = + std::make_shared<Tensor>(Array4D<int, 4, 3, 3, 3>{ + {{{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}}, + {{9, 10, 11}, {12, 13, 14}, {15, 16, 17}}, + {{18, 19, 20}, {21, 22, 23}, {24, 25, 26}}}, + {{{27, 28, 29}, {30, 31, 32}, {33, 34, 35}}, + {{36, 37, 38}, {39, 40, 41}, {42, 43, 44}}, + {{45, 46, 47}, {48, 49, 50}, {51, 52, 53}}}, + {{{54, 55, 56}, {57, 58, 59}, {60, 61, 62}}, + {{63, 64, 65}, {66, 67, 68}, {69, 70, 71}}, + {{72, 73, 74}, {75, 76, 77}, {78, 79, 80}}}, + {{{81, 82, 83}, {84, 85, 86}, {87, 88, 89}}, + {{90, 91, 92}, {93, 94, 95}, {96, 97, 98}}, + {{99, 100, 101}, {102, 103, 104}, {105, 106, 107}}}}}); + std::shared_ptr<Tensor> myBias = + std::make_shared<Tensor>(Array1D<int, 4>{{7, 0, 9, 0}}); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array4D<int, 2, 3, 5, 5>{// NCHW + {{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}, + + {{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}, + + {{50, 51, 52, 53, 54}, + {55, 56, 57, 58, 59}, + {60, 61, 62, 63, 64}, + {65, 66, 67, 68, 69}, + {70, 71, 72, 73, 74}}}, + {{{75, 76, 77, 78, 79}, + {80, 81, 82, 83, 84}, + {85, 86, 87, 88, 89}, + {90, 91, 92, 93, 94}, + {95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}}}}}); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>( + Array4D<int, 2, 4, 3, 3>{{{{{15226, 15577, 15928}, + {16981, 17332, 17683}, + {18736, 19087, 19438}}, + {{37818, 38898, 39978}, + {43218, 44298, 45378}, + {48618, 49698, 50778}}, + {{60426, 62235, 64044}, + {69471, 71280, 73089}, + {78516, 80325, 82134}}, + {{83016, 85554, 88092}, + {95706, 98244, 100782}, + {108396, 110934, 113472}}}, + {{{41551, 41902, 42253}, + {43306, 43657, 44008}, + {45061, 45412, 45763}}, + {{118818, 119898, 120978}, + {124218, 125298, 126378}, + {129618, 130698, 131778}}, + {{196101, 197910, 199719}, + {205146, 206955, 208764}, + {214191, 216000, 217809}}, + {{273366, 275904, 278442}, + {286056, 288594, 291132}, + {298746, 301284, 303822}}}}}); + + myConv->getOperator()->associateInput(0, myInput); myConv->input(1).first->getOperator()->setOutput(0, myWeights); myConv->input(2).first->getOperator()->setOutput(0, myBias); @@ -164,155 +119,112 @@ TEST_CASE("[cpu/operator] PaddedConv(forward)", "[PaddedConv][CPU]") { REQUIRE(*(op->getOutput(0)) == *myOutput); } SECTION("test Padding") { - std::shared_ptr<Node> myConv = PaddedConv(3,4,{3,3}, "myconv", {1,1}, {1,1,1,1}); - auto op = std::static_pointer_cast<OperatorTensor>(myConv -> getOperator()); - std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> { - { - { - {{ 0, 1, 2}, - { 3, 4, 5}, - { 6, 7, 8}}, - {{ 9, 10, 11}, - { 12, 13, 14}, - { 15, 16, 17}}, - {{ 18, 19, 20}, - { 21, 22, 23}, - { 24, 25, 26}} - }, - { - {{ 27, 28, 29}, - { 30, 31, 32}, - { 33, 34, 35}}, - {{ 36, 37, 38}, - { 39, 40, 41}, - { 42, 43, 44}}, - {{ 45, 46, 47}, - { 48, 49, 50}, - { 51, 52, 53}} - }, - { - {{ 54, 55, 56}, - { 57, 58, 59}, - { 60, 61, 62}}, - {{ 63, 64, 65}, - { 66, 67, 68}, - { 69, 70, 71}}, - {{ 72, 73, 74}, - { 75, 76, 77}, - { 78, 79, 80}} - }, - { - {{ 81, 82, 83}, - { 84, 85, 86}, - { 87, 88, 89}}, - {{ 90, 91, 92}, - { 93, 94, 95}, - { 96, 97, 98}}, - {{ 99, 100, 101}, - {102, 103, 104}, - {105, 106, 107}} - } - } - }); - std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}}); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW - { - { - {{ 0, 1, 2, 3, 4}, - { 5, 6, 7, 8, 9}, - { 10, 11, 12, 13, 14}, - { 15, 16, 17, 18, 19}, - { 20, 21, 22, 23, 24}}, - - {{ 25, 26, 27, 28, 29}, - { 30, 31, 32, 33, 34}, - { 35, 36, 37, 38, 39}, - { 40, 41, 42, 43, 44}, - { 45, 46, 47, 48, 49}}, - - {{ 50, 51, 52, 53, 54}, - { 55, 56, 57, 58, 59}, - { 60, 61, 62, 63, 64}, - { 65, 66, 67, 68, 69}, - { 70, 71, 72, 73, 74}} - }, - { - {{ 75, 76, 77, 78, 79}, - { 80, 81, 82, 83, 84}, - { 85, 86, 87, 88, 89}, - { 90, 91, 92, 93, 94}, - { 95, 96, 97, 98, 99}}, - - {{100, 101, 102, 103, 104}, - {105, 106, 107, 108, 109}, - {110, 111, 112, 113, 114}, - {115, 116, 117, 118, 119}, - {120, 121, 122, 123, 124}}, - - {{125, 126, 127, 128, 129}, - {130, 131, 132, 133, 134}, - {135, 136, 137, 138, 139}, - {140, 141, 142, 143, 144}, - {145, 146, 147, 148, 149}} - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,5,5> { - { - { - {{ 6895, 10225, 10486, 10747, 7063}, - { 10303, 15226, 15577, 15928, 10429}, - { 11518, 16981, 17332, 17683, 11554}, - { 12733, 18736, 19087, 19438, 12679}, - { 8047, 11791, 11998, 12205, 7927}}, - - {{ 15960, 24069, 24816, 25563, 17100}, - { 25119, 37818, 38898, 39978, 26703}, - { 28764, 43218, 44298, 45378, 30258}, - { 32409, 48618, 49698, 50778, 33813}, - { 21972, 32925, 33618, 34311, 22824}}, - - {{ 25041, 37929, 39162, 40395, 27153}, - { 39951, 60426, 62235, 64044, 42993}, - { 46026, 69471, 71280, 73089, 48978}, - { 52101, 78516, 80325, 82134, 54963}, - { 35913, 54075, 55254, 56433, 37737}}, - - {{ 34104, 51771, 53490, 55209, 37188}, - { 54765, 83016, 85554, 88092, 59265}, - { 63270, 95706, 98244, 100782, 67680}, - { 71775, 108396, 110934, 113472, 76095}, - { 49836, 75207, 76872, 78537, 52632}} - }, - { - {{ 20395, 29800, 30061, 30322, 19663}, - { 28528, 41551, 41902, 42253, 27304}, - { 29743, 43306, 43657, 44008, 28429}, - { 30958, 45061, 45412, 45763, 29554}, - { 18847, 27316, 27523, 27730, 17827}}, - - {{ 53760, 80094, 80841, 81588, 54000}, - { 79794, 118818, 119898, 120978, 80028}, - { 83439, 124218, 125298, 126378, 83583}, - { 87084, 129618, 130698, 131778, 87138}, - { 57072, 84900, 85593, 86286, 57024}}, - - {{ 87141, 130404, 131637, 132870, 88353}, - {131076, 196101, 197910, 199719, 132768}, - {137151, 205146, 206955, 208764, 138753}, - {143226, 214191, 216000, 217809, 144738}, - { 95313, 142500, 143679, 144858, 96237}}, - - {{120504, 180696, 182415, 184134, 122688}, - {182340, 273366, 275904, 278442, 185490}, - {190845, 286056, 288594, 291132, 193905}, - {199350, 298746, 301284, 303822, 202320}, - {133536, 200082, 201747, 203412, 135432}} - } - } - }); - - myConv->getOperator()->associateInput(0,myInput); + std::shared_ptr<Node> myConv = + PaddedConv(3, 4, {3, 3}, "myconv", {1, 1}, {1, 1, 1, 1}); + auto op = + std::static_pointer_cast<OperatorTensor>(myConv->getOperator()); + std::shared_ptr<Tensor> myWeights = + std::make_shared<Tensor>(Array4D<int, 4, 3, 3, 3>{ + {{{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}}, + {{9, 10, 11}, {12, 13, 14}, {15, 16, 17}}, + {{18, 19, 20}, {21, 22, 23}, {24, 25, 26}}}, + {{{27, 28, 29}, {30, 31, 32}, {33, 34, 35}}, + {{36, 37, 38}, {39, 40, 41}, {42, 43, 44}}, + {{45, 46, 47}, {48, 49, 50}, {51, 52, 53}}}, + {{{54, 55, 56}, {57, 58, 59}, {60, 61, 62}}, + {{63, 64, 65}, {66, 67, 68}, {69, 70, 71}}, + {{72, 73, 74}, {75, 76, 77}, {78, 79, 80}}}, + {{{81, 82, 83}, {84, 85, 86}, {87, 88, 89}}, + {{90, 91, 92}, {93, 94, 95}, {96, 97, 98}}, + {{99, 100, 101}, {102, 103, 104}, {105, 106, 107}}}}}); + std::shared_ptr<Tensor> myBias = + std::make_shared<Tensor>(Array1D<int, 4>{{7, 0, 9, 0}}); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array4D<int, 2, 3, 5, 5>{// NCHW + {{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}, + + {{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}, + + {{50, 51, 52, 53, 54}, + {55, 56, 57, 58, 59}, + {60, 61, 62, 63, 64}, + {65, 66, 67, 68, 69}, + {70, 71, 72, 73, 74}}}, + {{{75, 76, 77, 78, 79}, + {80, 81, 82, 83, 84}, + {85, 86, 87, 88, 89}, + {90, 91, 92, 93, 94}, + {95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}}}}}); + std::shared_ptr<Tensor> myOutput = + std::make_shared<Tensor>(Array4D<int, 2, 4, 5, 5>{ + {{{{6895, 10225, 10486, 10747, 7063}, + {10303, 15226, 15577, 15928, 10429}, + {11518, 16981, 17332, 17683, 11554}, + {12733, 18736, 19087, 19438, 12679}, + {8047, 11791, 11998, 12205, 7927}}, + + {{15960, 24069, 24816, 25563, 17100}, + {25119, 37818, 38898, 39978, 26703}, + {28764, 43218, 44298, 45378, 30258}, + {32409, 48618, 49698, 50778, 33813}, + {21972, 32925, 33618, 34311, 22824}}, + + {{25041, 37929, 39162, 40395, 27153}, + {39951, 60426, 62235, 64044, 42993}, + {46026, 69471, 71280, 73089, 48978}, + {52101, 78516, 80325, 82134, 54963}, + {35913, 54075, 55254, 56433, 37737}}, + + {{34104, 51771, 53490, 55209, 37188}, + {54765, 83016, 85554, 88092, 59265}, + {63270, 95706, 98244, 100782, 67680}, + {71775, 108396, 110934, 113472, 76095}, + {49836, 75207, 76872, 78537, 52632}}}, + {{{20395, 29800, 30061, 30322, 19663}, + {28528, 41551, 41902, 42253, 27304}, + {29743, 43306, 43657, 44008, 28429}, + {30958, 45061, 45412, 45763, 29554}, + {18847, 27316, 27523, 27730, 17827}}, + + {{53760, 80094, 80841, 81588, 54000}, + {79794, 118818, 119898, 120978, 80028}, + {83439, 124218, 125298, 126378, 83583}, + {87084, 129618, 130698, 131778, 87138}, + {57072, 84900, 85593, 86286, 57024}}, + + {{87141, 130404, 131637, 132870, 88353}, + {131076, 196101, 197910, 199719, 132768}, + {137151, 205146, 206955, 208764, 138753}, + {143226, 214191, 216000, 217809, 144738}, + {95313, 142500, 143679, 144858, 96237}}, + + {{120504, 180696, 182415, 184134, 122688}, + {182340, 273366, 275904, 278442, 185490}, + {190845, 286056, 288594, 291132, 193905}, + {199350, 298746, 301284, 303822, 202320}, + {133536, 200082, 201747, 203412, 135432}}}}}); + + myConv->getOperator()->associateInput(0, myInput); myConv->input(1).first->getOperator()->setOutput(0, myWeights); myConv->input(2).first->getOperator()->setOutput(0, myBias); diff --git a/unit_tests/operator/Test_PowImpl.cpp b/unit_tests/operator/Test_PowImpl.cpp index cb5d8872c9c7242bb4aa4efca388d53b578417f9..a833f0273835294bf897ac85805776b606c57df0 100644 --- a/unit_tests/operator/Test_PowImpl.cpp +++ b/unit_tests/operator/Test_PowImpl.cpp @@ -10,14 +10,14 @@ ********************************************************************************/ #include <catch2/catch_test_macros.hpp> -#include <cmath> -#include <cstddef> // std::size_t -#include <cstdint> // std::uint16_t #include <chrono> +#include <cmath> +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t #include <iostream> #include <memory> -#include <numeric> // std::accumulate -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <numeric> // std::accumulate +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution #include "aidge/data/Tensor.hpp" #include "aidge/operator/Pow.hpp" @@ -30,24 +30,28 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { // Create a random number generator std::random_device rd; std::mt19937 gen(rd()); - std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 - std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); - std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); - std::uniform_int_distribution<int> boolDist(0,1); + std::uniform_real_distribution<float> valueDist( + 0.1f, + 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), + std::size_t(10)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), + std::size_t(5)); + std::uniform_int_distribution<int> boolDist(0, 1); // Create MatPow Operator std::shared_ptr<Node> myPow = Pow(); - auto op = std::static_pointer_cast<OperatorTensor>(myPow-> getOperator()); + auto op = std::static_pointer_cast<OperatorTensor>(myPow->getOperator()); op->setDataType(DataType::Float32); op->setBackend("cpu"); // Create 2 input Tensors std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>(); - op->associateInput(0,T0); + op->associateInput(0, T0); T0->setDataType(DataType::Float32); T0->setBackend("cpu"); std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(); - op -> associateInput(1,T1); + op->associateInput(1, T1); T1->setDataType(DataType::Float32); T1->setBackend("cpu"); @@ -62,12 +66,8 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { std::chrono::duration<double, std::micro> duration{}; SECTION("PowImpl_cpu::forward()") { - SECTION("Scalar / Scalar") { - - } - SECTION("Scalar / +1-D Tensor") { - - } + SECTION("Scalar / Scalar") {} + SECTION("Scalar / +1-D Tensor") {} SECTION("+1-D Tensor / +1-D Tensor - same dimensions") { std::size_t number_of_operation = 0; @@ -78,13 +78,17 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { for (std::size_t i = 0; i < nbDims; ++i) { dims.push_back(dimSizeDist(gen)); } - const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t nb_elements = + std::accumulate(dims.cbegin(), + dims.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); number_of_operation += nb_elements; // without broadcasting - float* array0 = new float[nb_elements]; - float* array1 = new float[nb_elements]; - float* result = new float[nb_elements]; + float *array0 = new float[nb_elements]; + float *array1 = new float[nb_elements]; + float *result = new float[nb_elements]; for (std::size_t i = 0; i < nb_elements; ++i) { array0[i] = valueDist(gen); @@ -94,21 +98,23 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { // input0 T0->resize(dims); - T0 -> getImpl() -> setRawPtr(array0, nb_elements); + T0->getImpl()->setRawPtr(array0, nb_elements); // input1 T1->resize(dims); - T1 -> getImpl() -> setRawPtr(array1, nb_elements); + T1->getImpl()->setRawPtr(array1, nb_elements); // results Tres->resize(dims); - Tres -> getImpl() -> setRawPtr(result, nb_elements); + Tres->getImpl()->setRawPtr(result, nb_elements); op->forwardDims(); start = std::chrono::system_clock::now(); myPow->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += + std::chrono::duration_cast<std::chrono::microseconds>( + end - start); REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); @@ -118,8 +124,10 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { // with broadcasting } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + std::cout << "number of elements over time spent: " + << (number_of_operation / duration.count()) << std::endl; + std::cout << "total time: " << duration.count() << "μs" + << std::endl; } SECTION("+1-D Tensor / +1-D Tensor - broadcasting") { @@ -127,7 +135,8 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { // generate 2 random Tensors - // handle dimensions, replace some dimensions with '1' to get broadcasting + // handle dimensions, replace some dimensions with '1' to get + // broadcasting constexpr std::size_t nbDims = 4; std::vector<std::size_t> dims; for (std::size_t i = 0; i < nbDims; ++i) { @@ -147,37 +156,62 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { } // create arrays and fill them with random values - float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]]; - float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]]; - float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]]; - - for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) { + float *array0 = + new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]]; + float *array1 = + new float[dims1[0] * dims1[1] * dims1[2] * dims1[3]]; + float *result = new float[dimsOut[0] * dimsOut[1] * + dimsOut[2] * dimsOut[3]]; + + for (std::size_t i = 0; + i < dims0[0] * dims0[1] * dims0[2] * dims0[3]; + ++i) { array0[i] = valueDist(gen); } - for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) { + for (std::size_t i = 0; + i < dims1[0] * dims1[1] * dims1[2] * dims1[3]; + ++i) { array1[i] = valueDist(gen); } // compute true result - const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1}; - const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1}; + const std::size_t strides0[nbDims] = { + dims0[1] * dims0[2] * dims0[3], + dims0[2] * dims0[3], + dims0[3], + 1}; + const std::size_t strides1[nbDims] = { + dims1[1] * dims1[2] * dims1[3], + dims1[2] * dims1[3], + dims1[3], + 1}; for (std::size_t a = 0; a < dimsOut[0]; ++a) { for (std::size_t b = 0; b < dimsOut[1]; ++b) { - const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0) - + strides0[1] * ((dims0[1] > 1) ? b : 0); - const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0) - + strides1[1] * ((dims1[1] > 1) ? b : 0); + const std::size_t idx0_0 = + strides0[0] * ((dims0[0] > 1) ? a : 0) + + strides0[1] * ((dims0[1] > 1) ? b : 0); + const std::size_t idx1_0 = + strides1[0] * ((dims1[0] > 1) ? a : 0) + + strides1[1] * ((dims1[1] > 1) ? b : 0); for (std::size_t c = 0; c < dimsOut[2]; ++c) { - const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a)); + const std::size_t idx_out = + dimsOut[3] * + (c + dimsOut[2] * (b + dimsOut[1] * a)); for (std::size_t d = 0; d < dimsOut[3]; ++d) { - std::size_t idx0 = idx0_0 - + strides0[2] * ((dims0[2] > 1) ? c : 0) - + ((dims0[3] > 1) ? d : 0); - std::size_t idx1 = idx1_0 - + strides1[2] * ((dims1[2] > 1) ? c : 0) - + ((dims1[3] > 1) ? d : 0); - result[idx_out + d] = std::pow(array0[idx0], array1[idx1]); - // std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " ** " << array1[idx1] << " -> " << idx_out + d << std::endl; + std::size_t idx0 = + idx0_0 + + strides0[2] * ((dims0[2] > 1) ? c : 0) + + ((dims0[3] > 1) ? d : 0); + std::size_t idx1 = + idx1_0 + + strides1[2] * ((dims1[2] > 1) ? c : 0) + + ((dims1[3] > 1) ? d : 0); + result[idx_out + d] = + std::pow(array0[idx0], array1[idx1]); + // std::cout << "(" << idx0 << ", " << idx1 << + // ") -> " << array0[idx0] << " ** " << + // array1[idx1] << " -> " << idx_out + d << + // std::endl; } } } @@ -186,22 +220,30 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { // conversion to Aidge::Tensors // input0 T0->resize(dims0); - T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]); + T0->getImpl()->setRawPtr( + array0, + dims0[0] * dims0[1] * dims0[2] * dims0[3]); // input1 T1->resize(dims1); - T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]); + T1->getImpl()->setRawPtr( + array1, + dims1[0] * dims1[1] * dims1[2] * dims1[3]); // results Tres->resize(dimsOut); - Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]); + Tres->getImpl()->setRawPtr( + result, + dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]); // compute result op->forwardDims(); start = std::chrono::system_clock::now(); myPow->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += + std::chrono::duration_cast<std::chrono::microseconds>( + end - start); // comparison between truth and computed result REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); @@ -210,15 +252,23 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { delete[] array1; delete[] result; - const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t nb_elements = + std::accumulate(dimsOut.cbegin(), + dimsOut.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); number_of_operation += nb_elements; } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + std::cout << "number of elements over time spent: " + << (number_of_operation / duration.count()) << std::endl; + std::cout << "total time: " << duration.count() << "μs" + << std::endl; } SECTION("+1-D Tensor / 1-D Tensor") { std::size_t number_of_operation = 0; - std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(std::size_t(1), std::size_t(3)); + std::uniform_int_distribution<std::size_t> nbRemovedDimsDist( + std::size_t(1), + std::size_t(3)); for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { // generate 2 random Tensors @@ -235,15 +285,24 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { dims1[i] = 1; } } - dims1.erase(dims1.cbegin(), dims1.cbegin() + nbRemovedDimsDist(gen)); + dims1.erase(dims1.cbegin(), + dims1.cbegin() + nbRemovedDimsDist(gen)); // create arrays and fill them with random values - float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]]; - std::size_t array1_size = std::accumulate(dims1.cbegin(), dims1.cend(), std::size_t(1), std::multiplies<std::size_t>()); - float* array1 = new float[array1_size]; - float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]]; - - for (std::size_t i = 0; i < (dims0[0]*dims0[1]*dims0[2]*dims0[3]); ++i) { + float *array0 = + new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]]; + std::size_t array1_size = + std::accumulate(dims1.cbegin(), + dims1.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); + float *array1 = new float[array1_size]; + float *result = new float[dimsOut[0] * dimsOut[1] * + dimsOut[2] * dimsOut[3]]; + + for (std::size_t i = 0; + i < (dims0[0] * dims0[1] * dims0[2] * dims0[3]); + ++i) { array0[i] = valueDist(gen); } for (std::size_t i = 0; i < array1_size; ++i) { @@ -252,27 +311,48 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { // compute true result auto dims1_tmp = dims1; - dims1_tmp.insert(dims1_tmp.cbegin(), 4 - dims1_tmp.size(), std::size_t(1)); - - const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1}; - const std::size_t strides1[nbDims] = {dims1_tmp[1]*dims1_tmp[2]*dims1_tmp[3], dims1_tmp[2]*dims1_tmp[3], dims1_tmp[3], 1}; + dims1_tmp.insert(dims1_tmp.cbegin(), + 4 - dims1_tmp.size(), + std::size_t(1)); + + const std::size_t strides0[nbDims] = { + dims0[1] * dims0[2] * dims0[3], + dims0[2] * dims0[3], + dims0[3], + 1}; + const std::size_t strides1[nbDims] = { + dims1_tmp[1] * dims1_tmp[2] * dims1_tmp[3], + dims1_tmp[2] * dims1_tmp[3], + dims1_tmp[3], + 1}; for (std::size_t a = 0; a < dimsOut[0]; ++a) { for (std::size_t b = 0; b < dimsOut[1]; ++b) { - const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0) - + strides0[1] * ((dims0[1] > 1) ? b : 0); - const std::size_t idx1_0 = strides1[0] * ((dims1_tmp[0] > 1) ? a : 0) - + strides1[1] * ((dims1_tmp[1] > 1) ? b : 0); + const std::size_t idx0_0 = + strides0[0] * ((dims0[0] > 1) ? a : 0) + + strides0[1] * ((dims0[1] > 1) ? b : 0); + const std::size_t idx1_0 = + strides1[0] * ((dims1_tmp[0] > 1) ? a : 0) + + strides1[1] * ((dims1_tmp[1] > 1) ? b : 0); for (std::size_t c = 0; c < dimsOut[2]; ++c) { - const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a)); + const std::size_t idx_out = + dimsOut[3] * + (c + dimsOut[2] * (b + dimsOut[1] * a)); for (std::size_t d = 0; d < dimsOut[3]; ++d) { - std::size_t idx0 = idx0_0 - + strides0[2] * ((dims0[2] > 1) ? c : 0) - + ((dims0[3] > 1) ? d : 0); - std::size_t idx1 = idx1_0 - + strides1[2] * ((dims1_tmp[2] > 1) ? c : 0) - + ((dims1_tmp[3] > 1) ? d : 0); - result[idx_out + d] = std::pow(array0[idx0], array1[idx1]); - // std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " ** " << array1[idx1] << " -> " << idx_out + d << std::endl; + std::size_t idx0 = + idx0_0 + + strides0[2] * ((dims0[2] > 1) ? c : 0) + + ((dims0[3] > 1) ? d : 0); + std::size_t idx1 = + idx1_0 + + strides1[2] * + ((dims1_tmp[2] > 1) ? c : 0) + + ((dims1_tmp[3] > 1) ? d : 0); + result[idx_out + d] = + std::pow(array0[idx0], array1[idx1]); + // std::cout << "(" << idx0 << ", " << idx1 << + // ") -> " << array0[idx0] << " ** " << + // array1[idx1] << " -> " << idx_out + d << + // std::endl; } } } @@ -281,22 +361,28 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { // conversion to Aidge::Tensors // input0 T0->resize(dims0); - T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]); + T0->getImpl()->setRawPtr( + array0, + dims0[0] * dims0[1] * dims0[2] * dims0[3]); // input1 T1->resize(dims1); - T1 -> getImpl() -> setRawPtr(array1, array1_size); + T1->getImpl()->setRawPtr(array1, array1_size); // results Tres->resize(dimsOut); - Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]); + Tres->getImpl()->setRawPtr( + result, + dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]); // compute result op->forwardDims(); start = std::chrono::system_clock::now(); myPow->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += + std::chrono::duration_cast<std::chrono::microseconds>( + end - start); // comparison between truth and computed result REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); @@ -305,95 +391,51 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { delete[] array1; delete[] result; - const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t nb_elements = + std::accumulate(dimsOut.cbegin(), + dimsOut.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); number_of_operation += nb_elements; } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + std::cout << "number of elements over time spent: " + << (number_of_operation / duration.count()) << std::endl; + std::cout << "total time: " << duration.count() << "μs" + << std::endl; } } - SECTION("PowImpl_cpu::backward()") { SECTION("3D Tensors") { - const auto input0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( - { - { - { - {2.0, 3.0}, - {4.0, 5.0} - }, - { - {6.0, 7.0}, - {8.0, 9.0} - } - } - } - )); - const auto input1 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( - { - { - { - {1.0, 2.0}, - {3.0, 2.0} - }, - { - {2.0, 3.0}, - {1.0, 0.5} - } - } - } - )); - const auto gradOut = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( - { - { - { - {0.5, 1.0}, - {1.5, 2.0} - }, - { - {2.5, 3.0}, - {3.5, 4.0} - } - } - } - )); - const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( - { - { - { - {0.50000000, 6.00000000}, - {72.00000000, 20.00000000} - }, - { - {30.00000000, 441.00000000}, - {3.50000000, 0.66666669} - } - } - } - )); - const auto expectedGrad1 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( - { - { - { - { 0.693147182, 9.88751030}, - {1.33084259e+02, 8.04718933e+01} - }, - { - {1.61258362e+02, 2.00234143e+03}, - {5.82243652e+01, 2.63666954e+01} - } - } - } - )); - for(const auto T: {input0, input1, gradOut, expectedGrad0, expectedGrad1}) - { - T->setBackend("cpu") ; - T->setDataType(DataType::Float32); + const auto input0 = + std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + {{{{2.0, 3.0}, {4.0, 5.0}}, {{6.0, 7.0}, {8.0, 9.0}}}})); + const auto input1 = + std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + {{{{1.0, 2.0}, {3.0, 2.0}}, {{2.0, 3.0}, {1.0, 0.5}}}})); + const auto gradOut = + std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + {{{{0.5, 1.0}, {1.5, 2.0}}, {{2.5, 3.0}, {3.5, 4.0}}}})); + const auto expectedGrad0 = + std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + {{{{0.50000000, 6.00000000}, {72.00000000, 20.00000000}}, + {{30.00000000, 441.00000000}, + {3.50000000, 0.66666669}}}})); + const auto expectedGrad1 = + std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + {{{{0.693147182, 9.88751030}, + {1.33084259e+02, 8.04718933e+01}}, + {{1.61258362e+02, 2.00234143e+03}, + {5.82243652e+01, 2.63666954e+01}}}})); + for (const auto T : + {input0, input1, gradOut, expectedGrad0, expectedGrad1}) { + T->setBackend("cpu"); + T->setDataType(DataType::Float32); } std::shared_ptr<Node> powOp = Pow(); - auto opr = std::static_pointer_cast<OperatorTensor>(powOp-> getOperator()); + auto opr = + std::static_pointer_cast<OperatorTensor>(powOp->getOperator()); opr->setDataType(DataType::Float32); opr->setBackend("cpu"); opr->associateInput(0, input0); @@ -402,71 +444,40 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { opr->forward(); powOp->backward(); - REQUIRE(approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0)); - REQUIRE(approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1)); + REQUIRE( + approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0)); + REQUIRE( + approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1)); } SECTION("Broadcasting") { - const auto input0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( - { - { - { - {1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0} - }, - { - {1.5, 2.5, 3.5}, - {4.5, 5.5, 6.5} - } - } - } - )); - const auto input1 = std::make_shared<Tensor>(Array1D<float, 3>( - { - {0.1, 0.2, 0.3} - } - )); - - const auto gradOut = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( - { - { - { - {1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0} - }, - { - {6.0, 5.0, 4.0}, - {3.0, 2.0, 1.0} - } - } - } - )); - const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( - { - { - { - {0.10000000, 0.22973967, 0.41711676}, - {0.11486985, 0.27594593, 0.51353097} - }, - { - {0.41655189, 0.48044977, 0.49926791}, - {0.07748720, 0.10227509, 0.08092485} - } - } - } - )); - const auto expectedGrad1 = std::make_shared<Tensor>(Array1D<float, 3>( - { - {14.14779854, 22.99299049, 33.56402588} - } - )); - - for(const auto T: {input0, input1, gradOut, expectedGrad0, expectedGrad1}) - { - T->setBackend("cpu") ; - T->setDataType(DataType::Float32); + const auto input0 = + std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + {{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}, + {{1.5, 2.5, 3.5}, {4.5, 5.5, 6.5}}}})); + const auto input1 = + std::make_shared<Tensor>(Array1D<float, 3>({{0.1, 0.2, 0.3}})); + + const auto gradOut = + std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + {{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}, + {{6.0, 5.0, 4.0}, {3.0, 2.0, 1.0}}}})); + const auto expectedGrad0 = + std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + {{{{0.10000000, 0.22973967, 0.41711676}, + {0.11486985, 0.27594593, 0.51353097}}, + {{0.41655189, 0.48044977, 0.49926791}, + {0.07748720, 0.10227509, 0.08092485}}}})); + const auto expectedGrad1 = std::make_shared<Tensor>( + Array1D<float, 3>({{14.14779854, 22.99299049, 33.56402588}})); + + for (const auto T : + {input0, input1, gradOut, expectedGrad0, expectedGrad1}) { + T->setBackend("cpu"); + T->setDataType(DataType::Float32); } std::shared_ptr<Node> powOp = Pow(); - auto opr = std::static_pointer_cast<OperatorTensor>(powOp-> getOperator()); + auto opr = + std::static_pointer_cast<OperatorTensor>(powOp->getOperator()); opr->setDataType(DataType::Float32); opr->setBackend("cpu"); opr->associateInput(0, input0); @@ -475,8 +486,10 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { powOp->forward(); powOp->backward(); - REQUIRE(approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0)); - REQUIRE(approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1)); + REQUIRE( + approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0)); + REQUIRE( + approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1)); } } } diff --git a/unit_tests/operator/Test_ReLUImpl.cpp b/unit_tests/operator/Test_ReLUImpl.cpp index 106d29ecfbf8ba785b4f9e5dba75daa272a86b26..b760929e32c97cfaa262099140641ea6ef8136e8 100644 --- a/unit_tests/operator/Test_ReLUImpl.cpp +++ b/unit_tests/operator/Test_ReLUImpl.cpp @@ -18,21 +18,19 @@ #include <memory> - using namespace Aidge; TEST_CASE("[cpu/operator] ReLU(forward)", "[ReLU][CPU]") { SECTION("1D Tensor") { - std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<int,10> { - {0, 1, 2,-3, 4,-5,-6, 7, 8, 9} - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array1D<int,10> { - {0, 1, 2, 0, 4, 0, 0, 7, 8, 9} - }); + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>( + Array1D<int, 10>{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( + Array1D<int, 10>{{0, 1, 2, 0, 4, 0, 0, 7, 8, 9}}); std::shared_ptr<Node> myReLU = ReLU(); - auto op = std::static_pointer_cast<OperatorTensor>(myReLU -> getOperator()); - op->associateInput(0,input0); + auto op = + std::static_pointer_cast<OperatorTensor>(myReLU->getOperator()); + op->associateInput(0, input0); op->setDataType(DataType::Int32); op->setBackend("cpu"); myReLU->forward(); @@ -40,22 +38,17 @@ TEST_CASE("[cpu/operator] ReLU(forward)", "[ReLU][CPU]") { } SECTION("2D Tensor") { - std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array2D<int,2,10> { - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - } - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<int,2,10> { - { - { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, - { 0, 4, 2, 0, 4, 0, 0, 7, 0,10} - } - }); + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>( + Array2D<int, 2, 10>{{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( + Array2D<int, 2, 10>{{{0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, + {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}}}); std::shared_ptr<Node> myReLU = ReLU(); - auto op = std::static_pointer_cast<OperatorTensor>(myReLU -> getOperator()); - op->associateInput(0,input0); + auto op = + std::static_pointer_cast<OperatorTensor>(myReLU->getOperator()); + op->associateInput(0, input0); op->setDataType(DataType::Int32); op->setBackend("cpu"); myReLU->forward(); @@ -63,34 +56,21 @@ TEST_CASE("[cpu/operator] ReLU(forward)", "[ReLU][CPU]") { } SECTION("3D Tensor") { - std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array3D<int,2,2,10> { - { - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - }, - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - } - } - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,2,2,10> { - { - { - { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, - { 0, 4, 2, 0, 4, 0, 0, 7, 0,10} - }, - { - { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, - { 0, 4, 2, 0, 4, 0, 0, 7, 0,10} - } - } - }); + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>( + Array3D<int, 2, 2, 10>{{{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}, + {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( + Array3D<int, 2, 2, 10>{{{{0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, + {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}}, + {{0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, + {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}}}}); std::shared_ptr<Node> myReLU = ReLU(); - auto op = std::static_pointer_cast<OperatorTensor>(myReLU -> getOperator()); - op->associateInput(0,input0); + auto op = + std::static_pointer_cast<OperatorTensor>(myReLU->getOperator()); + op->associateInput(0, input0); op->setDataType(DataType::Int32); op->setBackend("cpu"); myReLU->forward(); @@ -98,58 +78,30 @@ TEST_CASE("[cpu/operator] ReLU(forward)", "[ReLU][CPU]") { } SECTION("4D Tensor") { - std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<int,2,2,2,10> { - { - { - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - }, - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - } - }, - { - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - }, - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - } - } - } - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,2,2,2,10> { - { - { - { - { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, - { 0, 4, 2, 0, 4, 0, 0, 7, 0,10} - }, - { - { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, - { 0, 4, 2, 0, 4, 0, 0, 7, 0,10} - } - }, - { - { - { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, - { 0, 4, 2, 0, 4, 0, 0, 7, 0,10} - }, - { - { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, - { 0, 4, 2, 0, 4, 0, 0, 7, 0,10} - } - } - } - }); + std::shared_ptr<Tensor> input0 = + std::make_shared<Tensor>(Array4D<int, 2, 2, 2, 10>{ + {{{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}, + {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}}, + {{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}, + {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}}}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( + Array4D<int, 2, 2, 2, 10>{{{{{0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, + {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}}, + {{0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, + {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}}}, + {{{0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, + {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}}, + {{0, 1, 2, 0, 4, 0, 0, 7, 8, 9}, + {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}}}}}); std::shared_ptr<Node> myReLU = ReLU(); - auto op = std::static_pointer_cast<OperatorTensor>(myReLU -> getOperator()); - op->associateInput(0,input0); + auto op = + std::static_pointer_cast<OperatorTensor>(myReLU->getOperator()); + op->associateInput(0, input0); op->setDataType(DataType::Int32); op->setBackend("cpu"); myReLU->forward(); diff --git a/unit_tests/operator/Test_ReduceMeanImpl.cpp b/unit_tests/operator/Test_ReduceMeanImpl.cpp index dd647c7ba3f90fe7f3554aae7133e97ffa9c99ba..a414a6403625aadce45400654371e10252ac5f7f 100644 --- a/unit_tests/operator/Test_ReduceMeanImpl.cpp +++ b/unit_tests/operator/Test_ReduceMeanImpl.cpp @@ -11,12 +11,12 @@ #include <catch2/catch_test_macros.hpp> #include <memory> -#include <numeric> // std::accumulate -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <numeric> // std::accumulate +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution #include "aidge/data/Tensor.hpp" -#include "aidge/operator/ReduceMean.hpp" #include "aidge/operator/Conv.hpp" +#include "aidge/operator/ReduceMean.hpp" #include "aidge/backend/cpu.hpp" #include "aidge/utils/TensorUtils.hpp" @@ -24,16 +24,20 @@ using namespace Aidge; TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") { - SECTION("ForwardDims") - { + SECTION("ForwardDims") { constexpr std::uint16_t NBTRIALS = 10; // Create a random number generator std::random_device rd; std::mt19937 gen(rd()); - std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 - std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); - std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); - std::uniform_int_distribution<int> boolDist(0,1); + std::uniform_real_distribution<float> valueDist( + 0.1f, + 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist( + std::size_t(2), + std::size_t(10)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), + std::size_t(5)); + std::uniform_int_distribution<int> boolDist(0, 1); SECTION("KeepDims") { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { @@ -44,22 +48,27 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") { for (std::size_t i = 0; i < nbDims; i++) { dims[i] = dimSizeDist(gen); expectedOutDims[i] = dims[i]; - if(boolDist(gen)) { + if (boolDist(gen)) { axes.push_back(i); expectedOutDims[i] = 1; } } - if (axes.empty()) { // Default behaviour if no axes are provided is to reduce all dimensions - std::fill(expectedOutDims.begin(), expectedOutDims.end(), 1); + if (axes.empty()) { // Default behaviour if no axes are + // provided is to reduce all dimensions + std::fill(expectedOutDims.begin(), + expectedOutDims.end(), + 1); } - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(dims); myInput->setBackend("cpu"); myInput->setDataType(DataType::Float32); myInput->zeros(); std::shared_ptr<Node> myReduceMean = ReduceMean(axes, true); - auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); - op->associateInput(0,myInput); + auto op = std::static_pointer_cast<OperatorTensor>( + myReduceMean->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); op->forwardDims(); @@ -76,23 +85,27 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") { std::vector<std::int32_t> axes; for (std::size_t i = 0; i < nbDims; i++) { dims[i] = dimSizeDist(gen); - if(boolDist(gen)) { + if (boolDist(gen)) { axes.push_back(i); - } - else { + } else { expectedOutDims.push_back(dims[i]); } } - if (axes.empty() || expectedOutDims.empty()) { // Default behaviour if no axes are provided is to reduce all dimensions - expectedOutDims = std::vector<DimSize_t>{1}; + if (axes.empty() || + expectedOutDims + .empty()) { // Default behaviour if no axes are + // provided is to reduce all dimensions + expectedOutDims = std::vector<DimSize_t>{1}; } - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(dims); myInput->setBackend("cpu"); myInput->setDataType(DataType::Float32); std::shared_ptr<Node> myReduceMean = ReduceMean(axes, false); - auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); - op->associateInput(0,myInput); + auto op = std::static_pointer_cast<OperatorTensor>( + myReduceMean->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); @@ -109,12 +122,15 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") { for (std::size_t i = 0; i < nbDims; i++) { dims[i] = dimSizeDist(gen); } - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(dims); myInput->setBackend("cpu"); myInput->setDataType(DataType::Float32); - std::shared_ptr<Node> myReduceMean = ReduceMean(std::vector<int32_t>{}, false, true); - auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); - op->associateInput(0,myInput); + std::shared_ptr<Node> myReduceMean = + ReduceMean(std::vector<int32_t>{}, false, true); + auto op = std::static_pointer_cast<OperatorTensor>( + myReduceMean->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); @@ -131,12 +147,15 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") { for (std::size_t i = 0; i < nbDims; i++) { dims[i] = dimSizeDist(gen); } - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(dims); myInput->setBackend("cpu"); myInput->setDataType(DataType::Float32); - std::shared_ptr<Node> myReduceMean = ReduceMean({}, false, false); - auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); - op->associateInput(0,myInput); + std::shared_ptr<Node> myReduceMean = + ReduceMean({}, false, false); + auto op = std::static_pointer_cast<OperatorTensor>( + myReduceMean->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); @@ -149,34 +168,20 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") { } SECTION("KeepDims") { SECTION("test 1") { - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> { - { - { - { 5.0, 1.0 }, - { 20.0, 2.0 } - }, - { - { 30.0, 1.0 }, - { 40.0, 2.0 } - }, - { - { 55.0, 1.0 }, - { 60.0, 2.0 } - } - } - }); - Tensor myOutput = Tensor(Array3D<float,3,1,2> { - { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array3D<float, 3, 2, 2>{{{{5.0, 1.0}, {20.0, 2.0}}, + {{30.0, 1.0}, {40.0, 2.0}}, + {{55.0, 1.0}, {60.0, 2.0}}}}); + Tensor myOutput = Tensor(Array3D<float, 3, 1, 2>{{ - {{ 12.5, 1.5 }}, - {{ 35.0, 1.5 }}, - {{ 57.5, 1.5 }} - } - }); + {{12.5, 1.5}}, + {{35.0, 1.5}}, + {{57.5, 1.5}}}}); std::shared_ptr<Node> myReduceMean = ReduceMean({1}, 1); - auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); - op->associateInput(0,myInput); + auto op = std::static_pointer_cast<OperatorTensor>( + myReduceMean->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); myReduceMean->forward(); @@ -185,37 +190,21 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") { REQUIRE(*(op->getOutput(0)) == myOutput); } SECTION("test 2") { - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,3,2> { - { - { - { 0.0, 0.0 }, - { 1.0, 1.0 }, - { 2.0, 2.0 } - }, - { - { 3.0, 3.0 }, - { 4.0, 4.0 }, - { 5.0, 5.0 } - }, - { - { 6.0, 6.0 }, - { 7.0, 7.0 }, - { 8.0, 8.0 } - } - } - }); - Tensor myOutput = Tensor(Array3D<float,3,1,1> { - { + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(Array3D<float, 3, 3, 2>{ + {{{0.0, 0.0}, {1.0, 1.0}, {2.0, 2.0}}, + {{3.0, 3.0}, {4.0, 4.0}, {5.0, 5.0}}, + {{6.0, 6.0}, {7.0, 7.0}, {8.0, 8.0}}}}); + Tensor myOutput = Tensor(Array3D<float, 3, 1, 1>{{ - {{ 1.0 }}, - {{ 4.0 }}, - {{ 7.0 }} - } - }); + {{1.0}}, + {{4.0}}, + {{7.0}}}}); std::shared_ptr<Node> myReduceMean = ReduceMean({1, 2}, 1); - auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); - op->associateInput(0,myInput); + auto op = std::static_pointer_cast<OperatorTensor>( + myReduceMean->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); myReduceMean->forward(); @@ -225,66 +214,37 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") { } } SECTION("not_KeepDims") { - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> { - { - { - { 5.0, 1.0 }, - { 20.0, 2.0 } - }, - { - { 30.0, 1.0 }, - { 40.0, 2.0 } - }, - { - { 55.0, 1.0 }, - { 60.0, 2.0 } - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<float,3,2> { - { - { 12.5, 1.5 }, - { 35.0, 1.5 }, - { 57.5, 1.5 } - } - }); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array3D<float, 3, 2, 2>{{{{5.0, 1.0}, {20.0, 2.0}}, + {{30.0, 1.0}, {40.0, 2.0}}, + {{55.0, 1.0}, {60.0, 2.0}}}}); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>( + Array2D<float, 3, 2>{{{12.5, 1.5}, {35.0, 1.5}, {57.5, 1.5}}}); std::shared_ptr<Node> myReduceMean = ReduceMean({1}, 0); - auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); - op->associateInput(0,myInput); + auto op = std::static_pointer_cast<OperatorTensor>( + myReduceMean->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); myReduceMean->forward(); op->getOutput(0)->print(); REQUIRE(*(op->getOutput(0)) == *myOutput); - } SECTION("all_axes") { SECTION("1") { - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> { - { - { - { 5.0, 1.0 }, - { 20.0, 2.0 } - }, - { - { 30.0, 1.0 }, - { 40.0, 2.0 } - }, - { - { 55.0, 1.0 }, - { 60.0, 2.0 } - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> { - {18.25} - }); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array3D<float, 3, 2, 2>{{{{5.0, 1.0}, {20.0, 2.0}}, + {{30.0, 1.0}, {40.0, 2.0}}, + {{55.0, 1.0}, {60.0, 2.0}}}}); + std::shared_ptr<Tensor> myOutput = + std::make_shared<Tensor>(Array1D<float, 1>{{18.25}}); std::shared_ptr<Node> myReduceMean = ReduceMean({}, 0); - auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); - op->associateInput(0,myInput); + auto op = std::static_pointer_cast<OperatorTensor>( + myReduceMean->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); myReduceMean->forward(); @@ -293,20 +253,20 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") { REQUIRE(*(op->getOutput(0)) == *myOutput); } SECTION("2") { - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array2D<float,5,4> { - {{ 0.004232f, 0.105120f, 0.045124f, 0.009205f}, - { 0.000766f, 0.272162f, 0.503560f, 0.044163f}, - { 0.049755f, 0.000305f, 0.143634f, 0.013253f}, - { 0.096258f, 0.311231f, 0.358143f, 0.000452f}, - { 0.468617f, 0.015693f, 0.145316f, 0.000105f}} - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> { - {0.1293547f} - }); + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(Array2D<float, 5, 4>{ + {{0.004232f, 0.105120f, 0.045124f, 0.009205f}, + {0.000766f, 0.272162f, 0.503560f, 0.044163f}, + {0.049755f, 0.000305f, 0.143634f, 0.013253f}, + {0.096258f, 0.311231f, 0.358143f, 0.000452f}, + {0.468617f, 0.015693f, 0.145316f, 0.000105f}}}); + std::shared_ptr<Tensor> myOutput = + std::make_shared<Tensor>(Array1D<float, 1>{{0.1293547f}}); std::shared_ptr<Node> myReduceMean = ReduceMean({}, 0); - auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); - op->associateInput(0,myInput); + auto op = std::static_pointer_cast<OperatorTensor>( + myReduceMean->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); myReduceMean->forward(); @@ -314,26 +274,15 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") { REQUIRE(approxEq<float>(*(op->getOutput(0)), *myOutput)); } SECTION("noop_with_empty_axes") { - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> { - { - { - { 5.0, 1.0 }, - { 20.0, 2.0 } - }, - { - { 30.0, 1.0 }, - { 40.0, 2.0 } - }, - { - { 55.0, 1.0 }, - { 60.0, 2.0 } - } - } - }); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array3D<float, 3, 2, 2>{{{{5.0, 1.0}, {20.0, 2.0}}, + {{30.0, 1.0}, {40.0, 2.0}}, + {{55.0, 1.0}, {60.0, 2.0}}}}); std::shared_ptr<Node> myReduceMean = ReduceMean({}, 0, 1); - auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); - op->associateInput(0,myInput); + auto op = std::static_pointer_cast<OperatorTensor>( + myReduceMean->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); myReduceMean->forward(); diff --git a/unit_tests/operator/Test_ReduceSumImpl.cpp b/unit_tests/operator/Test_ReduceSumImpl.cpp index 49569d1f65ff6c51f9681632b16375605ab326e7..654227894cf543b307e7953309d063a4702b6757 100644 --- a/unit_tests/operator/Test_ReduceSumImpl.cpp +++ b/unit_tests/operator/Test_ReduceSumImpl.cpp @@ -11,12 +11,12 @@ #include <catch2/catch_test_macros.hpp> #include <memory> -#include <numeric> // std::accumulate -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <numeric> // std::accumulate +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution #include "aidge/data/Tensor.hpp" -#include "aidge/operator/ReduceSum.hpp" #include "aidge/operator/Conv.hpp" +#include "aidge/operator/ReduceSum.hpp" #include "aidge/backend/cpu.hpp" #include "aidge/utils/TensorUtils.hpp" @@ -24,16 +24,20 @@ using namespace Aidge; TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") { - SECTION("ForwardDims") - { + SECTION("ForwardDims") { constexpr std::uint16_t NBTRIALS = 10; // Create a random number generator std::random_device rd; std::mt19937 gen(rd()); - std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 - std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); - std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); - std::uniform_int_distribution<int> boolDist(0,1); + std::uniform_real_distribution<float> valueDist( + 0.1f, + 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist( + std::size_t(2), + std::size_t(10)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), + std::size_t(5)); + std::uniform_int_distribution<int> boolDist(0, 1); SECTION("KeepDims") { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { @@ -44,22 +48,27 @@ TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") { for (std::size_t i = 0; i < nbDims; i++) { dims[i] = dimSizeDist(gen); expectedOutDims[i] = dims[i]; - if(boolDist(gen)) { + if (boolDist(gen)) { axes.push_back(i); expectedOutDims[i] = 1; } } - if (axes.empty()) { // Default behaviour if no axes are provided is to reduce all dimensions - std::fill(expectedOutDims.begin(), expectedOutDims.end(), 1); + if (axes.empty()) { // Default behaviour if no axes are + // provided is to reduce all dimensions + std::fill(expectedOutDims.begin(), + expectedOutDims.end(), + 1); } - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(dims); myInput->setBackend("cpu"); myInput->setDataType(DataType::Float32); myInput->zeros(); std::shared_ptr<Node> myReduceSum = ReduceSum(axes, true); - auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); - op->associateInput(0,myInput); + auto op = std::static_pointer_cast<OperatorTensor>( + myReduceSum->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); op->forwardDims(); @@ -76,23 +85,27 @@ TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") { std::vector<std::int32_t> axes; for (std::size_t i = 0; i < nbDims; i++) { dims[i] = dimSizeDist(gen); - if(boolDist(gen)) { + if (boolDist(gen)) { axes.push_back(i); - } - else { + } else { expectedOutDims.push_back(dims[i]); } } - if (axes.empty() || expectedOutDims.empty()) { // Default behaviour if no axes are provided is to reduce all dimensions - expectedOutDims = std::vector<DimSize_t>{1}; + if (axes.empty() || + expectedOutDims + .empty()) { // Default behaviour if no axes are + // provided is to reduce all dimensions + expectedOutDims = std::vector<DimSize_t>{1}; } - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(dims); myInput->setBackend("cpu"); myInput->setDataType(DataType::Float32); std::shared_ptr<Node> myReduceSum = ReduceSum(axes, false); - auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); - op->associateInput(0,myInput); + auto op = std::static_pointer_cast<OperatorTensor>( + myReduceSum->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); @@ -109,12 +122,15 @@ TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") { for (std::size_t i = 0; i < nbDims; i++) { dims[i] = dimSizeDist(gen); } - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(dims); myInput->setBackend("cpu"); myInput->setDataType(DataType::Float32); - std::shared_ptr<Node> myReduceSum = ReduceSum(std::vector<int32_t>{}, false, true); - auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); - op->associateInput(0,myInput); + std::shared_ptr<Node> myReduceSum = + ReduceSum(std::vector<int32_t>{}, false, true); + auto op = std::static_pointer_cast<OperatorTensor>( + myReduceSum->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); @@ -131,12 +147,15 @@ TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") { for (std::size_t i = 0; i < nbDims; i++) { dims[i] = dimSizeDist(gen); } - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(dims); myInput->setBackend("cpu"); myInput->setDataType(DataType::Float32); - std::shared_ptr<Node> myReduceSum = ReduceSum({}, false, false); - auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); - op->associateInput(0,myInput); + std::shared_ptr<Node> myReduceSum = + ReduceSum({}, false, false); + auto op = std::static_pointer_cast<OperatorTensor>( + myReduceSum->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); @@ -149,34 +168,20 @@ TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") { } SECTION("KeepDims") { SECTION("test 1") { - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> { - { - { - { 5.0, 1.0 }, - { 20.0, 2.0 } - }, - { - { 30.0, 1.0 }, - { 40.0, 2.0 } - }, - { - { 55.0, 1.0 }, - { 60.0, 2.0 } - } - } - }); - Tensor myOutput = Tensor(Array3D<float,3,1,2> { - { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array3D<float, 3, 2, 2>{{{{5.0, 1.0}, {20.0, 2.0}}, + {{30.0, 1.0}, {40.0, 2.0}}, + {{55.0, 1.0}, {60.0, 2.0}}}}); + Tensor myOutput = Tensor(Array3D<float, 3, 1, 2>{{ - {{ 25.0, 3.0 }}, - {{ 70.0, 3.0 }}, - {{ 115.0, 3.0 }} - } - }); + {{25.0, 3.0}}, + {{70.0, 3.0}}, + {{115.0, 3.0}}}}); std::shared_ptr<Node> myReduceSum = ReduceSum({1}, 1); - auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); - op->associateInput(0,myInput); + auto op = std::static_pointer_cast<OperatorTensor>( + myReduceSum->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); myReduceSum->forward(); @@ -185,37 +190,21 @@ TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") { REQUIRE(*(op->getOutput(0)) == myOutput); } SECTION("test 2") { - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,3,2> { - { - { - { 0.0, 0.0 }, - { 1.0, 1.0 }, - { 2.0, 2.0 } - }, - { - { 3.0, 3.0 }, - { 4.0, 4.0 }, - { 5.0, 5.0 } - }, - { - { 6.0, 6.0 }, - { 7.0, 7.0 }, - { 8.0, 8.0 } - } - } - }); - Tensor myOutput = Tensor(Array3D<float,3,1,1> { - { + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(Array3D<float, 3, 3, 2>{ + {{{0.0, 0.0}, {1.0, 1.0}, {2.0, 2.0}}, + {{3.0, 3.0}, {4.0, 4.0}, {5.0, 5.0}}, + {{6.0, 6.0}, {7.0, 7.0}, {8.0, 8.0}}}}); + Tensor myOutput = Tensor(Array3D<float, 3, 1, 1>{{ - {{ 6.0 }}, - {{ 24.0 }}, - {{ 42.0 }} - } - }); + {{6.0}}, + {{24.0}}, + {{42.0}}}}); std::shared_ptr<Node> myReduceSum = ReduceSum({1, 2}, 1); - auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); - op->associateInput(0,myInput); + auto op = std::static_pointer_cast<OperatorTensor>( + myReduceSum->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); myReduceSum->forward(); @@ -225,66 +214,37 @@ TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") { } } SECTION("not_KeepDims") { - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> { - { - { - { 5.0, 1.0 }, - { 20.0, 2.0 } - }, - { - { 30.0, 1.0 }, - { 40.0, 2.0 } - }, - { - { 55.0, 1.0 }, - { 60.0, 2.0 } - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<float,3,2> { - { - { 25.0, 3.0 }, - { 70.0, 3.0 }, - { 115.0, 3.0 } - } - }); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array3D<float, 3, 2, 2>{{{{5.0, 1.0}, {20.0, 2.0}}, + {{30.0, 1.0}, {40.0, 2.0}}, + {{55.0, 1.0}, {60.0, 2.0}}}}); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>( + Array2D<float, 3, 2>{{{25.0, 3.0}, {70.0, 3.0}, {115.0, 3.0}}}); std::shared_ptr<Node> myReduceSum = ReduceSum({1}, 0); - auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); - op->associateInput(0,myInput); + auto op = std::static_pointer_cast<OperatorTensor>( + myReduceSum->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); myReduceSum->forward(); op->getOutput(0)->print(); REQUIRE(*(op->getOutput(0)) == *myOutput); - } SECTION("all_axes") { SECTION("1") { - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> { - { - { - { 5.0, 1.0 }, - { 20.0, 2.0 } - }, - { - { 30.0, 1.0 }, - { 40.0, 2.0 } - }, - { - { 55.0, 1.0 }, - { 60.0, 2.0 } - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> { - {219.0} - }); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array3D<float, 3, 2, 2>{{{{5.0, 1.0}, {20.0, 2.0}}, + {{30.0, 1.0}, {40.0, 2.0}}, + {{55.0, 1.0}, {60.0, 2.0}}}}); + std::shared_ptr<Tensor> myOutput = + std::make_shared<Tensor>(Array1D<float, 1>{{219.0}}); std::shared_ptr<Node> myReduceSum = ReduceSum({}, 0); - auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); - op->associateInput(0,myInput); + auto op = std::static_pointer_cast<OperatorTensor>( + myReduceSum->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); myReduceSum->forward(); @@ -293,20 +253,20 @@ TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") { REQUIRE(*(op->getOutput(0)) == *myOutput); } SECTION("2") { - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array2D<float,5,4> { - {{ 0.004232f, 0.105120f, 0.045124f, 0.009205f}, - { 0.000766f, 0.272162f, 0.503560f, 0.044163f}, - { 0.049755f, 0.000305f, 0.143634f, 0.013253f}, - { 0.096258f, 0.311231f, 0.358143f, 0.000452f}, - { 0.468617f, 0.015693f, 0.145316f, 0.000105f}} - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> { - {2.587094f} - }); + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(Array2D<float, 5, 4>{ + {{0.004232f, 0.105120f, 0.045124f, 0.009205f}, + {0.000766f, 0.272162f, 0.503560f, 0.044163f}, + {0.049755f, 0.000305f, 0.143634f, 0.013253f}, + {0.096258f, 0.311231f, 0.358143f, 0.000452f}, + {0.468617f, 0.015693f, 0.145316f, 0.000105f}}}); + std::shared_ptr<Tensor> myOutput = + std::make_shared<Tensor>(Array1D<float, 1>{{2.587094f}}); std::shared_ptr<Node> myReduceSum = ReduceSum({0, 1}, 0); - auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); - op->associateInput(0,myInput); + auto op = std::static_pointer_cast<OperatorTensor>( + myReduceSum->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); myReduceSum->forward(); @@ -314,26 +274,15 @@ TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") { REQUIRE(approxEq<float>(*(op->getOutput(0)), *myOutput)); } SECTION("noop_with_empty_axes") { - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> { - { - { - { 5.0, 1.0 }, - { 20.0, 2.0 } - }, - { - { 30.0, 1.0 }, - { 40.0, 2.0 } - }, - { - { 55.0, 1.0 }, - { 60.0, 2.0 } - } - } - }); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array3D<float, 3, 2, 2>{{{{5.0, 1.0}, {20.0, 2.0}}, + {{30.0, 1.0}, {40.0, 2.0}}, + {{55.0, 1.0}, {60.0, 2.0}}}}); std::shared_ptr<Node> myReduceSum = ReduceSum({}, 0, 1); - auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); - op->associateInput(0,myInput); + auto op = std::static_pointer_cast<OperatorTensor>( + myReduceSum->getOperator()); + op->associateInput(0, myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); myReduceSum->forward(); diff --git a/unit_tests/operator/Test_SliceImpl.cpp b/unit_tests/operator/Test_SliceImpl.cpp index 2b9f89e62c09c04a7f848c362336418ef62aecce..6bd5920f0c50410a5e699a6c0bbc019b50e79d76 100644 --- a/unit_tests/operator/Test_SliceImpl.cpp +++ b/unit_tests/operator/Test_SliceImpl.cpp @@ -18,22 +18,24 @@ using namespace Aidge; TEST_CASE("[cpu/operator] Slice(forward)", "[Slice][CPU]") { SECTION("1D Tensor") { - std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<int,10> { - {0, 1, -2,-3, 4,-5,-6, 7, 8, 9} - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array1D<int,3> { - {0, 1, -2} - }); - std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,1>{{0}}); - std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,1>{{3}}); - std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,1>{{0}}); + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>( + Array1D<int, 10>{{0, 1, -2, -3, 4, -5, -6, 7, 8, 9}}); + std::shared_ptr<Tensor> expectedOutput = + std::make_shared<Tensor>(Array1D<int, 3>{{0, 1, -2}}); + std::shared_ptr<Tensor> starts = + std::make_shared<Tensor>(Array1D<int, 1>{{0}}); + std::shared_ptr<Tensor> ends = + std::make_shared<Tensor>(Array1D<int, 1>{{3}}); + std::shared_ptr<Tensor> axes = + std::make_shared<Tensor>(Array1D<int, 1>{{0}}); std::shared_ptr<Node> mySlice = Slice(); - auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator()); - mySlice->getOperator()->associateInput(0,input0); - mySlice->getOperator()->associateInput(1,starts); - mySlice->getOperator()->associateInput(2,ends); - mySlice->getOperator()->associateInput(3,axes); + auto op = + std::static_pointer_cast<OperatorTensor>(mySlice->getOperator()); + mySlice->getOperator()->associateInput(0, input0); + mySlice->getOperator()->associateInput(1, starts); + mySlice->getOperator()->associateInput(2, ends); + mySlice->getOperator()->associateInput(3, axes); mySlice->getOperator()->setDataType(DataType::Int32); mySlice->getOperator()->setBackend("cpu"); mySlice->forward(); @@ -44,28 +46,25 @@ TEST_CASE("[cpu/operator] Slice(forward)", "[Slice][CPU]") { } SECTION("2D Tensor") { - std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array2D<int,2,10> { - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - } - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<int,2,3> { - { - {-5,-6, 7}, - {-5,-6, 7} - } - }); - std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,2>{{0,5}}); - std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,2>{{2,8}}); - std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,2>{{0,1}}); + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>( + Array2D<int, 2, 10>{{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( + Array2D<int, 2, 3>{{{-5, -6, 7}, {-5, -6, 7}}}); + std::shared_ptr<Tensor> starts = + std::make_shared<Tensor>(Array1D<int, 2>{{0, 5}}); + std::shared_ptr<Tensor> ends = + std::make_shared<Tensor>(Array1D<int, 2>{{2, 8}}); + std::shared_ptr<Tensor> axes = + std::make_shared<Tensor>(Array1D<int, 2>{{0, 1}}); std::shared_ptr<Node> mySlice = Slice(); - auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator()); - mySlice->getOperator()->associateInput(0,input0); - mySlice->getOperator()->associateInput(1,starts); - mySlice->getOperator()->associateInput(2,ends); - mySlice->getOperator()->associateInput(3,axes); + auto op = + std::static_pointer_cast<OperatorTensor>(mySlice->getOperator()); + mySlice->getOperator()->associateInput(0, input0); + mySlice->getOperator()->associateInput(1, starts); + mySlice->getOperator()->associateInput(2, ends); + mySlice->getOperator()->associateInput(3, axes); mySlice->getOperator()->setDataType(DataType::Int32); mySlice->getOperator()->setBackend("cpu"); mySlice->forward(); @@ -76,35 +75,27 @@ TEST_CASE("[cpu/operator] Slice(forward)", "[Slice][CPU]") { } SECTION("3D Tensor") { - std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array3D<int,2,2,10> { - { - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - }, - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - } - } - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,1,1,3> { - { - { - { 4,-5,-6} - } - } - }); - std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,3>{{0,1,4}}); - std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,3>{{1,2,7}}); - std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,3>{{0,1,2}}); + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>( + Array3D<int, 2, 2, 10>{{{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}, + {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}}}); + std::shared_ptr<Tensor> expectedOutput = + std::make_shared<Tensor>(Array3D<int, 1, 1, 3>{{{{4, -5, -6}}}}); + std::shared_ptr<Tensor> starts = + std::make_shared<Tensor>(Array1D<int, 3>{{0, 1, 4}}); + std::shared_ptr<Tensor> ends = + std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 7}}); + std::shared_ptr<Tensor> axes = + std::make_shared<Tensor>(Array1D<int, 3>{{0, 1, 2}}); std::shared_ptr<Node> mySlice = Slice(); - auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator()); - mySlice->getOperator()->associateInput(0,input0); - mySlice->getOperator()->associateInput(1,starts); - mySlice->getOperator()->associateInput(2,ends); - mySlice->getOperator()->associateInput(3,axes); + auto op = + std::static_pointer_cast<OperatorTensor>(mySlice->getOperator()); + mySlice->getOperator()->associateInput(0, input0); + mySlice->getOperator()->associateInput(1, starts); + mySlice->getOperator()->associateInput(2, ends); + mySlice->getOperator()->associateInput(3, axes); mySlice->getOperator()->setDataType(DataType::Int32); mySlice->getOperator()->setBackend("cpu"); mySlice->forward(); @@ -115,64 +106,40 @@ TEST_CASE("[cpu/operator] Slice(forward)", "[Slice][CPU]") { } SECTION("4D Tensor") { - std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<int,2,2,2,10> { - { - { - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - }, - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - } - }, - { - { - { 0, 1, 2,-3, 6,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - }, - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3,11,-5,-6, 7,-1,10} - } - } - } - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,2,2,2,10> { - { - { - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - }, - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - } - }, - { - { - { 0, 1, 2,-3, 6,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - }, - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3,11,-5,-6, 7,-1,10} - } - } - } - }); - std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,4>{{0,0,0,0}}); - std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,4>{{2,2,2,10}}); - std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,4>{{0,1,2,3}}); + std::shared_ptr<Tensor> input0 = + std::make_shared<Tensor>(Array4D<int, 2, 2, 2, 10>{ + {{{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}, + {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}}, + {{{0, 1, 2, -3, 6, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}, + {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 11, -5, -6, 7, -1, 10}}}}}); + std::shared_ptr<Tensor> expectedOutput = + std::make_shared<Tensor>(Array4D<int, 2, 2, 2, 10>{ + {{{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}, + {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}}, + {{{0, 1, 2, -3, 6, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}, + {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 11, -5, -6, 7, -1, 10}}}}}); + std::shared_ptr<Tensor> starts = + std::make_shared<Tensor>(Array1D<int, 4>{{0, 0, 0, 0}}); + std::shared_ptr<Tensor> ends = + std::make_shared<Tensor>(Array1D<int, 4>{{2, 2, 2, 10}}); + std::shared_ptr<Tensor> axes = + std::make_shared<Tensor>(Array1D<int, 4>{{0, 1, 2, 3}}); std::shared_ptr<Node> mySlice = Slice(); - auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator()); - mySlice->getOperator()->associateInput(0,input0); - mySlice->getOperator()->associateInput(1,starts); - mySlice->getOperator()->associateInput(2,ends); - mySlice->getOperator()->associateInput(3,axes); + auto op = + std::static_pointer_cast<OperatorTensor>(mySlice->getOperator()); + mySlice->getOperator()->associateInput(0, input0); + mySlice->getOperator()->associateInput(1, starts); + mySlice->getOperator()->associateInput(2, ends); + mySlice->getOperator()->associateInput(3, axes); mySlice->getOperator()->setDataType(DataType::Int32); mySlice->getOperator()->setBackend("cpu"); mySlice->forward(); @@ -183,43 +150,24 @@ TEST_CASE("[cpu/operator] Slice(forward)", "[Slice][CPU]") { } SECTION("Attributes instead of inputs") { - std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<int,2,2,2,10> { - { - { - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - }, - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - } - }, - { - { - { 0, 1, 2,-3, 6,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} - }, - { - { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, - {-5, 4, 2,-3,11,-5,-6, 7,-1,10} - } - } - } - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,1,1,1,5> { - { - { - { - { 0, 1, 2,-3, 4} - } - } - } - }); - - std::shared_ptr<Node> mySlice = Slice({0,0,0,0}, {1,1,1,5}, {0,1,2,3}, {1,1,1,1}); - auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator()); - mySlice->getOperator()->associateInput(0,input0); + std::shared_ptr<Tensor> input0 = + std::make_shared<Tensor>(Array4D<int, 2, 2, 2, 10>{ + {{{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}, + {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}}, + {{{0, 1, 2, -3, 6, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}, + {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}, + {-5, 4, 2, -3, 11, -5, -6, 7, -1, 10}}}}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( + Array4D<int, 1, 1, 1, 5>{{{{{0, 1, 2, -3, 4}}}}}); + + std::shared_ptr<Node> mySlice = + Slice({0, 0, 0, 0}, {1, 1, 1, 5}, {0, 1, 2, 3}, {1, 1, 1, 1}); + auto op = + std::static_pointer_cast<OperatorTensor>(mySlice->getOperator()); + mySlice->getOperator()->associateInput(0, input0); mySlice->getOperator()->setDataType(DataType::Int32); mySlice->getOperator()->setBackend("cpu"); mySlice->forward(); @@ -230,44 +178,27 @@ TEST_CASE("[cpu/operator] Slice(forward)", "[Slice][CPU]") { } SECTION("Different Steps") { - std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array3D<int,4,2,8> { - { - { - { 0, 1, 2,-3, 4,-5,-6,7}, - {-5, 4, 2,-3, 4,-5,-6,-7} - }, - { - { 10, 11, 12,-13, 14,-15,-16,17}, - {-15, 14, 12,-13, 14,-15,-16,-17} - }, - { - { 20, 21, 22,-23, 24,-25,-26,27}, - {-25, 24, 22,-23, 24,-25,-26,-27} - }, - { - { 30, 31, 32,-33, 34,-35,-36,37}, - {-35, 34, 32,-33, 34,-35,-36,-37} - } - } - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,2,1,3> { - { - { - { 7, 4, 1} - }, - { - { 27, 24, 21} - } - } - }); - - std::shared_ptr<Node> mySlice = Slice({0,0,7}, {4,1,0}, {0,1,2}, {2,1,-3}); + std::shared_ptr<Tensor> input0 = + std::make_shared<Tensor>(Array3D<int, 4, 2, 8>{ + {{{0, 1, 2, -3, 4, -5, -6, 7}, {-5, 4, 2, -3, 4, -5, -6, -7}}, + {{10, 11, 12, -13, 14, -15, -16, 17}, + {-15, 14, 12, -13, 14, -15, -16, -17}}, + {{20, 21, 22, -23, 24, -25, -26, 27}, + {-25, 24, 22, -23, 24, -25, -26, -27}}, + {{30, 31, 32, -33, 34, -35, -36, 37}, + {-35, 34, 32, -33, 34, -35, -36, -37}}}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( + Array3D<int, 2, 1, 3>{{{{7, 4, 1}}, {{27, 24, 21}}}}); + + std::shared_ptr<Node> mySlice = + Slice({0, 0, 7}, {4, 1, 0}, {0, 1, 2}, {2, 1, -3}); // Steps are 2,1,-3 so the slice will be: // on Axis 0: from 0 to 4 by step of 2 // on Axis 1: from 0 to 1 by step of 1 // on Axis 2: from 7 to 0 by step of -3 (reverse the order of elements) - auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator()); - mySlice->getOperator()->associateInput(0,input0); + auto op = + std::static_pointer_cast<OperatorTensor>(mySlice->getOperator()); + mySlice->getOperator()->associateInput(0, input0); mySlice->getOperator()->setDataType(DataType::Int32); mySlice->getOperator()->setBackend("cpu"); mySlice->forward(); diff --git a/unit_tests/operator/Test_SoftmaxImpl.cpp b/unit_tests/operator/Test_SoftmaxImpl.cpp index da6c6f0d35a1db9ad9099a40b7e83459e14a20f5..17b384af123a5e55fe39e79f69844e97dfadb1ff 100644 --- a/unit_tests/operator/Test_SoftmaxImpl.cpp +++ b/unit_tests/operator/Test_SoftmaxImpl.cpp @@ -22,102 +22,121 @@ using namespace Aidge; TEST_CASE("[cpu/operator] Softmax(forward)", "[Softmax][CPU]") { SECTION("2D Tensor") { - std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array2D<float,2,10> { - { - {-0.21908280, 0.62226844, -0.01738115, 0.49075750, 0.42159843, - -0.70403218, 0.95780319, 1.39435363, 0.25255841, 0.20038256}, - { 0.23626225, 1.84539008, 1.89050162, -0.64871430, 0.37908587, - 0.35077620, -0.78156322, -0.98952234, 0.04166317, 1.34357309} - } - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,10> { - { - {0.04883239, 0.11326669, 0.05974559, 0.09930880, 0.09267281, 0.03006749, - 0.15842478, 0.24514021, 0.07825989, 0.07428131}, - {0.05429055, 0.27136859, 0.28389078, 0.02240700, 0.06262558, 0.06087753, - 0.01961952, 0.01593576, 0.04469007, 0.16429459} - } - }); + std::shared_ptr<Tensor> input = + std::make_shared<Tensor>(Array2D<float, 2, 10>{{{-0.21908280, + 0.62226844, + -0.01738115, + 0.49075750, + 0.42159843, + -0.70403218, + 0.95780319, + 1.39435363, + 0.25255841, + 0.20038256}, + {0.23626225, + 1.84539008, + 1.89050162, + -0.64871430, + 0.37908587, + 0.35077620, + -0.78156322, + -0.98952234, + 0.04166317, + 1.34357309}}}); + std::shared_ptr<Tensor> expectedOutput = + std::make_shared<Tensor>(Array2D<float, 2, 10>{{{0.04883239, + 0.11326669, + 0.05974559, + 0.09930880, + 0.09267281, + 0.03006749, + 0.15842478, + 0.24514021, + 0.07825989, + 0.07428131}, + {0.05429055, + 0.27136859, + 0.28389078, + 0.02240700, + 0.06262558, + 0.06087753, + 0.01961952, + 0.01593576, + 0.04469007, + 0.16429459}}}); std::shared_ptr<Node> mySoftmax = Softmax(1); - auto op = std::static_pointer_cast<OperatorTensor>(mySoftmax -> getOperator()); - op->associateInput(0,input); + auto op = + std::static_pointer_cast<OperatorTensor>(mySoftmax->getOperator()); + op->associateInput(0, input); op->setDataType(DataType::Float32); op->setBackend("cpu"); mySoftmax->forward(); - float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); - float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); - for (std::size_t i = 0; i< expectedOutput->size(); ++i) { - REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + float *resPtr = + static_cast<float *>(op->getOutput(0)->getImpl()->rawPtr()); + float *expectedPtr = + static_cast<float *>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i < expectedOutput->size(); ++i) { + REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001); } - } SECTION("4D Tensor") { - std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array4D<float,2,3,3,3> { - { - { - {{8.28257084e-01, 7.99335480e-01, 7.36702740e-01}, - {2.36729562e-01, 8.61912668e-01, 9.93067741e-01}, - {1.63514376e-01, 8.95773172e-02, 2.96533108e-01}}, - {{2.20776618e-01, 5.89067876e-01, 2.03930080e-01}, - {1.31294072e-01, 7.10182846e-01, 1.08420849e-04}, - {7.21750259e-01, 4.38212037e-01, 5.08823872e-01}}, - {{4.30953979e-01, 1.51903450e-01, 3.76343548e-01}, - {8.07861805e-01, 7.79679358e-01, 5.01209974e-01}, - {9.31280375e-01, 9.94207084e-01, 1.74868107e-03}} - }, - { - {{6.22058094e-01, 2.32256651e-02, 6.18222237e-01}, - {9.58304763e-01, 2.11395025e-02, 4.95614648e-01}, - {2.50825584e-01, 4.50860739e-01, 3.80362332e-01}}, - {{9.91703272e-02, 5.06073236e-01, 4.88969564e-01}, - {1.12059772e-01, 7.64178872e-01, 7.60362148e-01}, - {2.84135342e-02, 4.29610193e-01, 1.27862811e-01}}, - {{9.57209170e-01, 8.22797656e-01, 1.91352129e-01}, - {9.52722490e-01, 6.35501027e-01, 5.67592978e-02}, - {2.00799644e-01, 4.00822222e-01, 9.14380193e-01}} - } - } - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,2,3,3,3> { - { - { - {{0.45109013, 0.42849392, 0.43775153}, - {0.27246451, 0.35967633, 0.50454903}, - {0.20397615, 0.20457645, 0.33543545}}, - {{0.24571852, 0.34723747, 0.25694931}, - {0.24519968, 0.30904123, 0.18692467}, - {0.35646603, 0.28991172, 0.41476840}}, - {{0.30319133, 0.22426860, 0.30529919}, - {0.48233581, 0.33128241, 0.30852637}, - {0.43955776, 0.50551182, 0.24979614}} - }, - { - {{0.33434108, 0.20638679, 0.39505392}, - {0.41263384, 0.20198789, 0.33922729}, - {0.36339980, 0.34127754, 0.28713942}}, - {{0.19819947, 0.33448750, 0.34715438}, - {0.17702937, 0.42464229, 0.44204772}, - {0.29093260, 0.33410171, 0.22306615}}, - {{0.46745953, 0.45912567, 0.25779176}, - {0.41033682, 0.37336978, 0.21872495}, - {0.34566763, 0.32462072, 0.48979440}} - } - } - }); + std::shared_ptr<Tensor> input = + std::make_shared<Tensor>(Array4D<float, 2, 3, 3, 3>{ + {{{{8.28257084e-01, 7.99335480e-01, 7.36702740e-01}, + {2.36729562e-01, 8.61912668e-01, 9.93067741e-01}, + {1.63514376e-01, 8.95773172e-02, 2.96533108e-01}}, + {{2.20776618e-01, 5.89067876e-01, 2.03930080e-01}, + {1.31294072e-01, 7.10182846e-01, 1.08420849e-04}, + {7.21750259e-01, 4.38212037e-01, 5.08823872e-01}}, + {{4.30953979e-01, 1.51903450e-01, 3.76343548e-01}, + {8.07861805e-01, 7.79679358e-01, 5.01209974e-01}, + {9.31280375e-01, 9.94207084e-01, 1.74868107e-03}}}, + {{{6.22058094e-01, 2.32256651e-02, 6.18222237e-01}, + {9.58304763e-01, 2.11395025e-02, 4.95614648e-01}, + {2.50825584e-01, 4.50860739e-01, 3.80362332e-01}}, + {{9.91703272e-02, 5.06073236e-01, 4.88969564e-01}, + {1.12059772e-01, 7.64178872e-01, 7.60362148e-01}, + {2.84135342e-02, 4.29610193e-01, 1.27862811e-01}}, + {{9.57209170e-01, 8.22797656e-01, 1.91352129e-01}, + {9.52722490e-01, 6.35501027e-01, 5.67592978e-02}, + {2.00799644e-01, 4.00822222e-01, 9.14380193e-01}}}}}); + std::shared_ptr<Tensor> expectedOutput = + std::make_shared<Tensor>(Array4D<float, 2, 3, 3, 3>{ + {{{{0.45109013, 0.42849392, 0.43775153}, + {0.27246451, 0.35967633, 0.50454903}, + {0.20397615, 0.20457645, 0.33543545}}, + {{0.24571852, 0.34723747, 0.25694931}, + {0.24519968, 0.30904123, 0.18692467}, + {0.35646603, 0.28991172, 0.41476840}}, + {{0.30319133, 0.22426860, 0.30529919}, + {0.48233581, 0.33128241, 0.30852637}, + {0.43955776, 0.50551182, 0.24979614}}}, + {{{0.33434108, 0.20638679, 0.39505392}, + {0.41263384, 0.20198789, 0.33922729}, + {0.36339980, 0.34127754, 0.28713942}}, + {{0.19819947, 0.33448750, 0.34715438}, + {0.17702937, 0.42464229, 0.44204772}, + {0.29093260, 0.33410171, 0.22306615}}, + {{0.46745953, 0.45912567, 0.25779176}, + {0.41033682, 0.37336978, 0.21872495}, + {0.34566763, 0.32462072, 0.48979440}}}}}); std::shared_ptr<Node> mySoftmax = Softmax(1); - auto op = std::static_pointer_cast<OperatorTensor>(mySoftmax -> getOperator()); - op->associateInput(0,input); + auto op = + std::static_pointer_cast<OperatorTensor>(mySoftmax->getOperator()); + op->associateInput(0, input); op->setDataType(DataType::Float32); op->setBackend("cpu"); mySoftmax->forward(); - float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); - float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); - for (std::size_t i = 0; i< expectedOutput->size(); ++i) { - REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + float *resPtr = + static_cast<float *>(op->getOutput(0)->getImpl()->rawPtr()); + float *expectedPtr = + static_cast<float *>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i < expectedOutput->size(); ++i) { + REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001); } } } \ No newline at end of file diff --git a/unit_tests/operator/Test_SqrtImpl.cpp b/unit_tests/operator/Test_SqrtImpl.cpp index d630c66c8b8085e6d382841da6b7cac2c88b1dd0..025ef6c30e28d96dba004c39a95ac1fb09e53c4e 100644 --- a/unit_tests/operator/Test_SqrtImpl.cpp +++ b/unit_tests/operator/Test_SqrtImpl.cpp @@ -22,100 +22,87 @@ using namespace Aidge; TEST_CASE("[cpu/operator] Sqrt(forward)", "[Sqrt][CPU]") { SECTION("2D Tensor") { - std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array2D<float,2,2> { - { - {16.00000000, 0.62226844}, - { 0.00000000, 1.84539008} - } - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> { - { - {4.00000000, 0.78883994}, - {0.00000000, 1.35845140} - } - }); + std::shared_ptr<Tensor> input = + std::make_shared<Tensor>(Array2D<float, 2, 2>{ + {{16.00000000, 0.62226844}, {0.00000000, 1.84539008}}}); + std::shared_ptr<Tensor> expectedOutput = + std::make_shared<Tensor>(Array2D<float, 2, 2>{ + {{4.00000000, 0.78883994}, {0.00000000, 1.35845140}}}); std::shared_ptr<Node> mySqrt = Sqrt(); - auto op = std::static_pointer_cast<OperatorTensor>(mySqrt -> getOperator()); - mySqrt->getOperator()->associateInput(0,input); + auto op = + std::static_pointer_cast<OperatorTensor>(mySqrt->getOperator()); + mySqrt->getOperator()->associateInput(0, input); mySqrt->getOperator()->setDataType(DataType::Float32); mySqrt->getOperator()->setBackend("cpu"); mySqrt->forward(); - float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); - float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); - for (std::size_t i = 0; i< 4; ++i) { - REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + float *resPtr = + static_cast<float *>(op->getOutput(0)->getImpl()->rawPtr()); + float *expectedPtr = + static_cast<float *>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i < 4; ++i) { + REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001); } - } SECTION("4D Tensor") { - std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array4D<float,2,3,3,3> { - { - { - {{0.06218481, 0.46850157, 0.60914326}, - {0.57470602, 0.09943211, 0.59992820}, - {0.99623793, 0.54931718, 0.89343822}}, - {{0.75176072, 0.38237786, 0.84824580}, - {0.10619396, 0.11959118, 0.93499404}, - {0.65563291, 0.02913034, 0.17093092}}, - {{0.36303985, 0.92073035, 0.79146117}, - {0.88962847, 0.94561219, 0.92033130}, - {0.52903181, 0.13397896, 0.76086712}} - }, - { - {{0.31242222, 0.80526417, 0.48411584}, - {0.84375203, 0.65408552, 0.55028963}, - {0.77546734, 0.06203610, 0.83163154}}, - {{0.46342927, 0.53631741, 0.39145601}, - {0.14204198, 0.84214240, 0.94185621}, - {0.05068624, 0.99889028, 0.38464361}}, - {{0.37591159, 0.51769549, 0.30288595}, - {0.96883464, 0.35154045, 0.55648762}, - {0.13022375, 0.73467660, 0.02705121}} - } - } - }); + std::shared_ptr<Tensor> input = + std::make_shared<Tensor>(Array4D<float, 2, 3, 3, 3>{ + {{{{0.06218481, 0.46850157, 0.60914326}, + {0.57470602, 0.09943211, 0.59992820}, + {0.99623793, 0.54931718, 0.89343822}}, + {{0.75176072, 0.38237786, 0.84824580}, + {0.10619396, 0.11959118, 0.93499404}, + {0.65563291, 0.02913034, 0.17093092}}, + {{0.36303985, 0.92073035, 0.79146117}, + {0.88962847, 0.94561219, 0.92033130}, + {0.52903181, 0.13397896, 0.76086712}}}, + {{{0.31242222, 0.80526417, 0.48411584}, + {0.84375203, 0.65408552, 0.55028963}, + {0.77546734, 0.06203610, 0.83163154}}, + {{0.46342927, 0.53631741, 0.39145601}, + {0.14204198, 0.84214240, 0.94185621}, + {0.05068624, 0.99889028, 0.38464361}}, + {{0.37591159, 0.51769549, 0.30288595}, + {0.96883464, 0.35154045, 0.55648762}, + {0.13022375, 0.73467660, 0.02705121}}}}}); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,2,3,3,3> { - { - { - {{0.24936883, 0.6844717, 0.7804763}, - {0.75809366, 0.31532857, 0.7745503}, - {0.9981172, 0.7411593, 0.9452186}}, - {{0.86704135, 0.6183671, 0.9210026}, - {0.32587415, 0.34581956, 0.9669509}, - {0.80971164, 0.17067613, 0.41343793}}, - {{0.60252786, 0.9595469, 0.88964105}, - {0.9432012, 0.97242594, 0.95933896}, - {0.7273457, 0.36603138, 0.87227696}} - }, - { - {{0.55894744, 0.89736515, 0.69578433}, - {0.91855973, 0.8087555, 0.7418151}, - {0.88060623, 0.24907047, 0.91193837}}, - {{0.6807564, 0.73233694, 0.6256645}, - {0.37688458, 0.9176832, 0.9704928}, - {0.22513604, 0.99944496, 0.62019646}}, - {{0.6131163, 0.7195106, 0.5503507}, - {0.984294, 0.59290844, 0.745981}, - {0.3608653, 0.8571328, 0.16447252}} - } - } - }); + std::shared_ptr<Tensor> expectedOutput = + std::make_shared<Tensor>(Array4D<float, 2, 3, 3, 3>{ + {{{{0.24936883, 0.6844717, 0.7804763}, + {0.75809366, 0.31532857, 0.7745503}, + {0.9981172, 0.7411593, 0.9452186}}, + {{0.86704135, 0.6183671, 0.9210026}, + {0.32587415, 0.34581956, 0.9669509}, + {0.80971164, 0.17067613, 0.41343793}}, + {{0.60252786, 0.9595469, 0.88964105}, + {0.9432012, 0.97242594, 0.95933896}, + {0.7273457, 0.36603138, 0.87227696}}}, + {{{0.55894744, 0.89736515, 0.69578433}, + {0.91855973, 0.8087555, 0.7418151}, + {0.88060623, 0.24907047, 0.91193837}}, + {{0.6807564, 0.73233694, 0.6256645}, + {0.37688458, 0.9176832, 0.9704928}, + {0.22513604, 0.99944496, 0.62019646}}, + {{0.6131163, 0.7195106, 0.5503507}, + {0.984294, 0.59290844, 0.745981}, + {0.3608653, 0.8571328, 0.16447252}}}}}); std::shared_ptr<Node> mySqrt = Sqrt(); - auto op = std::static_pointer_cast<OperatorTensor>(mySqrt -> getOperator()); - mySqrt->getOperator()->associateInput(0,input); + auto op = + std::static_pointer_cast<OperatorTensor>(mySqrt->getOperator()); + mySqrt->getOperator()->associateInput(0, input); mySqrt->getOperator()->setDataType(DataType::Float32); mySqrt->getOperator()->setBackend("cpu"); mySqrt->forward(); - float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); - float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); - for (std::size_t i = 0; i< 54; ++i) { - REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + float *resPtr = + static_cast<float *>(op->getOutput(0)->getImpl()->rawPtr()); + float *expectedPtr = + static_cast<float *>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i < 54; ++i) { + REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001); } } } \ No newline at end of file diff --git a/unit_tests/operator/Test_SubImpl.cpp b/unit_tests/operator/Test_SubImpl.cpp index 44666ae631152c8898e24f7003b0c2ede8c67b84..8bf6937708f57fc76d6939bc1960087cd67148a9 100644 --- a/unit_tests/operator/Test_SubImpl.cpp +++ b/unit_tests/operator/Test_SubImpl.cpp @@ -10,13 +10,13 @@ ********************************************************************************/ #include <catch2/catch_test_macros.hpp> -#include <cstddef> // std::size_t -#include <cstdint> // std::uint16_t #include <chrono> +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t #include <iostream> #include <memory> -#include <numeric> // std::accumulate -#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <numeric> // std::accumulate +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution #include "aidge/data/Tensor.hpp" #include "aidge/operator/Sub.hpp" @@ -29,24 +29,28 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") { // Create a random number generator std::random_device rd; std::mt19937 gen(rd()); - std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 - std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); - std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); - std::uniform_int_distribution<int> boolDist(0,1); + std::uniform_real_distribution<float> valueDist( + 0.1f, + 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), + std::size_t(10)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), + std::size_t(5)); + std::uniform_int_distribution<int> boolDist(0, 1); // Create MatMul Operator std::shared_ptr<Node> mySub = Sub(); - auto op = std::static_pointer_cast<OperatorTensor>(mySub-> getOperator()); + auto op = std::static_pointer_cast<OperatorTensor>(mySub->getOperator()); op->setDataType(DataType::Float32); op->setBackend("cpu"); // Create 2 input Tensors std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>(); - op->associateInput(0,T0); + op->associateInput(0, T0); T0->setDataType(DataType::Float32); T0->setBackend("cpu"); std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(); - op -> associateInput(1,T1); + op->associateInput(1, T1); T1->setDataType(DataType::Float32); T1->setBackend("cpu"); @@ -61,12 +65,8 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") { std::chrono::duration<double, std::micro> duration{}; SECTION("SubImpl_cpu::forward()") { - SECTION("Scalar / Scalar") { - - } - SECTION("Scalar / +1-D Tensor") { - - } + SECTION("Scalar / Scalar") {} + SECTION("Scalar / +1-D Tensor") {} SECTION("+1-D Tensor / +1-D Tensor - same dimensions") { std::size_t number_of_operation = 0; @@ -77,13 +77,17 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") { for (std::size_t i = 0; i < nbDims; ++i) { dims.push_back(dimSizeDist(gen)); } - const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t nb_elements = + std::accumulate(dims.cbegin(), + dims.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); number_of_operation += nb_elements; // without broadcasting - float* array0 = new float[nb_elements]; - float* array1 = new float[nb_elements]; - float* result = new float[nb_elements]; + float *array0 = new float[nb_elements]; + float *array1 = new float[nb_elements]; + float *result = new float[nb_elements]; for (std::size_t i = 0; i < nb_elements; ++i) { array0[i] = valueDist(gen); @@ -93,21 +97,23 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") { // input0 T0->resize(dims); - T0 -> getImpl() -> setRawPtr(array0, nb_elements); + T0->getImpl()->setRawPtr(array0, nb_elements); // input1 T1->resize(dims); - T1 -> getImpl() -> setRawPtr(array1, nb_elements); + T1->getImpl()->setRawPtr(array1, nb_elements); // results Tres->resize(dims); - Tres -> getImpl() -> setRawPtr(result, nb_elements); + Tres->getImpl()->setRawPtr(result, nb_elements); op->forwardDims(); start = std::chrono::system_clock::now(); mySub->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += + std::chrono::duration_cast<std::chrono::microseconds>( + end - start); REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); @@ -117,8 +123,10 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") { // with broadcasting } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + std::cout << "number of elements over time spent: " + << (number_of_operation / duration.count()) << std::endl; + std::cout << "total time: " << duration.count() << "μs" + << std::endl; } SECTION("+1-D Tensor / +1-D Tensor - broadcasting") { @@ -126,7 +134,8 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { // generate 2 random Tensors - // handle dimensions, replace some dimensions with '1' to get broadcasting + // handle dimensions, replace some dimensions with '1' to get + // broadcasting constexpr std::size_t nbDims = 4; std::vector<std::size_t> dims; for (std::size_t i = 0; i < nbDims; ++i) { @@ -146,37 +155,62 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") { } // create arrays and fill them with random values - float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]]; - float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]]; - float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]]; - - for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) { + float *array0 = + new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]]; + float *array1 = + new float[dims1[0] * dims1[1] * dims1[2] * dims1[3]]; + float *result = new float[dimsOut[0] * dimsOut[1] * + dimsOut[2] * dimsOut[3]]; + + for (std::size_t i = 0; + i < dims0[0] * dims0[1] * dims0[2] * dims0[3]; + ++i) { array0[i] = valueDist(gen); } - for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) { + for (std::size_t i = 0; + i < dims1[0] * dims1[1] * dims1[2] * dims1[3]; + ++i) { array1[i] = valueDist(gen); } // compute true result - const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1}; - const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1}; + const std::size_t strides0[nbDims] = { + dims0[1] * dims0[2] * dims0[3], + dims0[2] * dims0[3], + dims0[3], + 1}; + const std::size_t strides1[nbDims] = { + dims1[1] * dims1[2] * dims1[3], + dims1[2] * dims1[3], + dims1[3], + 1}; for (std::size_t a = 0; a < dimsOut[0]; ++a) { for (std::size_t b = 0; b < dimsOut[1]; ++b) { - const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0) - + strides0[1] * ((dims0[1] > 1) ? b : 0); - const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0) - + strides1[1] * ((dims1[1] > 1) ? b : 0); + const std::size_t idx0_0 = + strides0[0] * ((dims0[0] > 1) ? a : 0) + + strides0[1] * ((dims0[1] > 1) ? b : 0); + const std::size_t idx1_0 = + strides1[0] * ((dims1[0] > 1) ? a : 0) + + strides1[1] * ((dims1[1] > 1) ? b : 0); for (std::size_t c = 0; c < dimsOut[2]; ++c) { - const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a)); + const std::size_t idx_out = + dimsOut[3] * + (c + dimsOut[2] * (b + dimsOut[1] * a)); for (std::size_t d = 0; d < dimsOut[3]; ++d) { - std::size_t idx0 = idx0_0 - + strides0[2] * ((dims0[2] > 1) ? c : 0) - + ((dims0[3] > 1) ? d : 0); - std::size_t idx1 = idx1_0 - + strides1[2] * ((dims1[2] > 1) ? c : 0) - + ((dims1[3] > 1) ? d : 0); - result[idx_out + d] = array0[idx0] - array1[idx1]; - // std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " - " << array1[idx1] << " -> " << idx_out + d << std::endl; + std::size_t idx0 = + idx0_0 + + strides0[2] * ((dims0[2] > 1) ? c : 0) + + ((dims0[3] > 1) ? d : 0); + std::size_t idx1 = + idx1_0 + + strides1[2] * ((dims1[2] > 1) ? c : 0) + + ((dims1[3] > 1) ? d : 0); + result[idx_out + d] = + array0[idx0] - array1[idx1]; + // std::cout << "(" << idx0 << ", " << idx1 << + // ") -> " << array0[idx0] << " - " << + // array1[idx1] << " -> " << idx_out + d << + // std::endl; } } } @@ -185,22 +219,30 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") { // conversion to Aidge::Tensors // input0 T0->resize(dims0); - T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]); + T0->getImpl()->setRawPtr( + array0, + dims0[0] * dims0[1] * dims0[2] * dims0[3]); // input1 T1->resize(dims1); - T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]); + T1->getImpl()->setRawPtr( + array1, + dims1[0] * dims1[1] * dims1[2] * dims1[3]); // results Tres->resize(dimsOut); - Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]); + Tres->getImpl()->setRawPtr( + result, + dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]); // compute result op->forwardDims(); start = std::chrono::system_clock::now(); mySub->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += + std::chrono::duration_cast<std::chrono::microseconds>( + end - start); // comparison between truth and computed result REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); @@ -209,15 +251,23 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") { delete[] array1; delete[] result; - const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t nb_elements = + std::accumulate(dimsOut.cbegin(), + dimsOut.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); number_of_operation += nb_elements; } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + std::cout << "number of elements over time spent: " + << (number_of_operation / duration.count()) << std::endl; + std::cout << "total time: " << duration.count() << "μs" + << std::endl; } SECTION("+1-D Tensor / 1-D Tensor") { std::size_t number_of_operation = 0; - std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(std::size_t(1), std::size_t(3)); + std::uniform_int_distribution<std::size_t> nbRemovedDimsDist( + std::size_t(1), + std::size_t(3)); for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { // generate 2 random Tensors @@ -234,15 +284,24 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") { dims1[i] = 1; } } - dims1.erase(dims1.cbegin(), dims1.cbegin() + nbRemovedDimsDist(gen)); + dims1.erase(dims1.cbegin(), + dims1.cbegin() + nbRemovedDimsDist(gen)); // create arrays and fill them with random values - float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]]; - std::size_t array1_size = std::accumulate(dims1.cbegin(), dims1.cend(), std::size_t(1), std::multiplies<std::size_t>()); - float* array1 = new float[array1_size]; - float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]]; - - for (std::size_t i = 0; i < (dims0[0]*dims0[1]*dims0[2]*dims0[3]); ++i) { + float *array0 = + new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]]; + std::size_t array1_size = + std::accumulate(dims1.cbegin(), + dims1.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); + float *array1 = new float[array1_size]; + float *result = new float[dimsOut[0] * dimsOut[1] * + dimsOut[2] * dimsOut[3]]; + + for (std::size_t i = 0; + i < (dims0[0] * dims0[1] * dims0[2] * dims0[3]); + ++i) { array0[i] = valueDist(gen); } for (std::size_t i = 0; i < array1_size; ++i) { @@ -251,27 +310,48 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") { // compute true result auto dims1_tmp = dims1; - dims1_tmp.insert(dims1_tmp.cbegin(), 4 - dims1_tmp.size(), std::size_t(1)); - - const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1}; - const std::size_t strides1[nbDims] = {dims1_tmp[1]*dims1_tmp[2]*dims1_tmp[3], dims1_tmp[2]*dims1_tmp[3], dims1_tmp[3], 1}; + dims1_tmp.insert(dims1_tmp.cbegin(), + 4 - dims1_tmp.size(), + std::size_t(1)); + + const std::size_t strides0[nbDims] = { + dims0[1] * dims0[2] * dims0[3], + dims0[2] * dims0[3], + dims0[3], + 1}; + const std::size_t strides1[nbDims] = { + dims1_tmp[1] * dims1_tmp[2] * dims1_tmp[3], + dims1_tmp[2] * dims1_tmp[3], + dims1_tmp[3], + 1}; for (std::size_t a = 0; a < dimsOut[0]; ++a) { for (std::size_t b = 0; b < dimsOut[1]; ++b) { - const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0) - + strides0[1] * ((dims0[1] > 1) ? b : 0); - const std::size_t idx1_0 = strides1[0] * ((dims1_tmp[0] > 1) ? a : 0) - + strides1[1] * ((dims1_tmp[1] > 1) ? b : 0); + const std::size_t idx0_0 = + strides0[0] * ((dims0[0] > 1) ? a : 0) + + strides0[1] * ((dims0[1] > 1) ? b : 0); + const std::size_t idx1_0 = + strides1[0] * ((dims1_tmp[0] > 1) ? a : 0) + + strides1[1] * ((dims1_tmp[1] > 1) ? b : 0); for (std::size_t c = 0; c < dimsOut[2]; ++c) { - const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a)); + const std::size_t idx_out = + dimsOut[3] * + (c + dimsOut[2] * (b + dimsOut[1] * a)); for (std::size_t d = 0; d < dimsOut[3]; ++d) { - std::size_t idx0 = idx0_0 - + strides0[2] * ((dims0[2] > 1) ? c : 0) - + ((dims0[3] > 1) ? d : 0); - std::size_t idx1 = idx1_0 - + strides1[2] * ((dims1_tmp[2] > 1) ? c : 0) - + ((dims1_tmp[3] > 1) ? d : 0); - result[idx_out + d] = array0[idx0] - array1[idx1]; - // std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " - " << array1[idx1] << " -> " << idx_out + d << std::endl; + std::size_t idx0 = + idx0_0 + + strides0[2] * ((dims0[2] > 1) ? c : 0) + + ((dims0[3] > 1) ? d : 0); + std::size_t idx1 = + idx1_0 + + strides1[2] * + ((dims1_tmp[2] > 1) ? c : 0) + + ((dims1_tmp[3] > 1) ? d : 0); + result[idx_out + d] = + array0[idx0] - array1[idx1]; + // std::cout << "(" << idx0 << ", " << idx1 << + // ") -> " << array0[idx0] << " - " << + // array1[idx1] << " -> " << idx_out + d << + // std::endl; } } } @@ -280,22 +360,28 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") { // conversion to Aidge::Tensors // input0 T0->resize(dims0); - T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]); + T0->getImpl()->setRawPtr( + array0, + dims0[0] * dims0[1] * dims0[2] * dims0[3]); // input1 T1->resize(dims1); - T1 -> getImpl() -> setRawPtr(array1, array1_size); + T1->getImpl()->setRawPtr(array1, array1_size); // results Tres->resize(dimsOut); - Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]); + Tres->getImpl()->setRawPtr( + result, + dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]); // compute result op->forwardDims(); start = std::chrono::system_clock::now(); mySub->forward(); end = std::chrono::system_clock::now(); - duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + duration += + std::chrono::duration_cast<std::chrono::microseconds>( + end - start); // comparison between truth and computed result REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); @@ -304,12 +390,18 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") { delete[] array1; delete[] result; - const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t nb_elements = + std::accumulate(dimsOut.cbegin(), + dimsOut.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); number_of_operation += nb_elements; } - std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; - std::cout << "total time: " << duration.count() << "μs" << std::endl; + std::cout << "number of elements over time spent: " + << (number_of_operation / duration.count()) << std::endl; + std::cout << "total time: " << duration.count() << "μs" + << std::endl; } } } diff --git a/unit_tests/recipies/Test_ConstantFolding.cpp b/unit_tests/recipies/Test_ConstantFolding.cpp index cd035fd5336d3cb66fc70b1c0a4e5c82c9bef0d8..69e41c8d5b5a223201f8123ef0e2980ac3ae447a 100644 --- a/unit_tests/recipies/Test_ConstantFolding.cpp +++ b/unit_tests/recipies/Test_ConstantFolding.cpp @@ -11,11 +11,11 @@ #include <catch2/catch_test_macros.hpp> -#include "aidge/recipes/Recipes.hpp" +#include "aidge/graph/OpArgs.hpp" #include "aidge/operator/Add.hpp" #include "aidge/operator/MatMul.hpp" #include "aidge/operator/Producer.hpp" -#include "aidge/graph/OpArgs.hpp" +#include "aidge/recipes/Recipes.hpp" #include "aidge/scheduler/SequentialScheduler.hpp" #include "aidge/utils/TensorUtils.hpp" #include <cstddef> @@ -29,11 +29,34 @@ TEST_CASE("[ConstantFolding] forward", "[ConstantFolding][forward][CPU]") { auto matmul1 = MatMul("matmul1"); auto add1 = Add("add1"); - auto b0 = Producer(std::make_shared<Tensor>(Array1D<float,5>{{1, 2, 3, 4, 5}}), "B0", true); - auto w0 = Producer(std::make_shared<Tensor>(Array2D<float,5,5>{{{1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}, {1, 2, 3, 4, 5}}}), "W0", true); - auto b1 = Producer(std::make_shared<Tensor>(Array1D<float,5>{{1, 2, 3, 4, 5}}), "B1", true); - auto w1 = Producer(std::make_shared<Tensor>(Array2D<float,5,5>{{{6, 7, 8, 9, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}}}),"W1", true); - auto input = Producer(std::make_shared<Tensor>(Array2D<float,2,5>{{{1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}}}), "input", true); + auto b0 = + Producer(std::make_shared<Tensor>(Array1D<float, 5>{{1, 2, 3, 4, 5}}), + "B0", + true); + auto w0 = Producer( + std::make_shared<Tensor>(Array2D<float, 5, 5>{{{1, 2, 3, 4, 5}, + {6, 7, 8, 9, 0}, + {1, 2, 3, 4, 5}, + {6, 7, 8, 9, 0}, + {1, 2, 3, 4, 5}}}), + "W0", + true); + auto b1 = + Producer(std::make_shared<Tensor>(Array1D<float, 5>{{1, 2, 3, 4, 5}}), + "B1", + true); + auto w1 = Producer( + std::make_shared<Tensor>(Array2D<float, 5, 5>{{{6, 7, 8, 9, 0}, + {1, 2, 3, 4, 5}, + {6, 7, 8, 9, 0}, + {1, 2, 3, 4, 5}, + {6, 7, 8, 9, 0}}}), + "W1", + true); + auto input = Producer(std::make_shared<Tensor>(Array2D<float, 2, 5>{ + {{1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}}}), + "input", + true); input->addChild(matmul0, 0, 0); w0->addChild(matmul0, 0, 1); @@ -54,32 +77,38 @@ TEST_CASE("[ConstantFolding] forward", "[ConstantFolding][forward][CPU]") { // Check original graph REQUIRE(g->getNodes() == - std::set<std::shared_ptr<Node>>({input, w0, matmul0, b0, add0, w1, matmul1, b1, add1})); - REQUIRE(((matmul0->getParent(0) == input) && (matmul0->getParent(1) == w0))); + std::set<std::shared_ptr<Node>>( + {input, w0, matmul0, b0, add0, w1, matmul1, b1, add1})); + REQUIRE( + ((matmul0->getParent(0) == input) && (matmul0->getParent(1) == w0))); REQUIRE(((add0->getParent(0) == matmul0) && (add0->getParent(1) == b0))); - REQUIRE(((matmul1->getParent(0) == add0) && (matmul1->getParent(1) == w1))); + REQUIRE( + ((matmul1->getParent(0) == add0) && (matmul1->getParent(1) == w1))); REQUIRE(((add1->getParent(0) == matmul1) && (add1->getParent(1) == b1))); auto scheduler = SequentialScheduler(g); scheduler.forward(); - const std::shared_ptr<Tensor> result = std::make_shared<Tensor>(Array2D<float,2,5>{{ - { 1201.000000, 1532.000000, 1863.000000, 2194.000000, 785.000000}, - { 2501.000000, 3207.000000, 3913.000000, 4619.000000, 1735.000000} - }}); + const std::shared_ptr<Tensor> result = std::make_shared< + Tensor>(Array2D<float, 2, 5>{ + {{1201.000000, 1532.000000, 1863.000000, 2194.000000, 785.000000}, + {2501.000000, 3207.000000, 3913.000000, 4619.000000, 1735.000000}}}); auto add1Op = std::static_pointer_cast<Add_Op>(add1->getOperator()); REQUIRE(approxEq<float>(*(add1Op->getOutput(0)), *result)); - // Transform GraphView inplace + // Transform GraphView inplace constantFolding(g); - // Check new GraphView - std::set<std::shared_ptr<Node>> newNodes = g->getNodes(); - REQUIRE(newNodes != std::set<std::shared_ptr<Node>>({input, w0, matmul0, b0, add0, w1, matmul1, b1, add1})); - REQUIRE(newNodes.size() == 1); - REQUIRE((*newNodes.cbegin())->type() == "Producer"); + // Check new GraphView + std::set<std::shared_ptr<Node>> newNodes = g->getNodes(); + REQUIRE(newNodes != + std::set<std::shared_ptr<Node>>( + {input, w0, matmul0, b0, add0, w1, matmul1, b1, add1})); + REQUIRE(newNodes.size() == 1); + REQUIRE((*newNodes.cbegin())->type() == "Producer"); - auto prodOp = std::static_pointer_cast<Producer_Op>((*newNodes.cbegin())->getOperator()); + auto prodOp = std::static_pointer_cast<Producer_Op>( + (*newNodes.cbegin())->getOperator()); REQUIRE(approxEq<float>(*(prodOp->getOutput(0)), *result)); } diff --git a/unit_tests/recipies/Test_ConvToMatMul.cpp b/unit_tests/recipies/Test_ConvToMatMul.cpp index 05c5eef83394ba8c965dfabae2bcd8c2b4502c79..4bcb4c9d7106c2045eccbf76461381c9b6546f21 100644 --- a/unit_tests/recipies/Test_ConvToMatMul.cpp +++ b/unit_tests/recipies/Test_ConvToMatMul.cpp @@ -11,12 +11,12 @@ #include <catch2/catch_test_macros.hpp> -#include "aidge/recipes/Recipes.hpp" +#include "aidge/filler/Filler.hpp" +#include "aidge/graph/OpArgs.hpp" #include "aidge/operator/Conv.hpp" #include "aidge/operator/Producer.hpp" +#include "aidge/recipes/Recipes.hpp" #include "aidge/scheduler/SequentialScheduler.hpp" -#include "aidge/filler/Filler.hpp" -#include "aidge/graph/OpArgs.hpp" #include <cstddef> using namespace Aidge; @@ -26,23 +26,43 @@ TEST_CASE("[ConvToMatMul] conv") { auto conv2 = Conv(4, 7, {3, 3}, "conv2", {1, 1}, {1, 1}, true); auto conv3 = Conv(7, 10, {1, 1}, "conv3", {2, 2}); - auto g1 = Sequential({ - Producer({2, 3, 13, 24}, "dataProvider"), - conv1, - conv2, - conv3 - }); + auto g1 = Sequential( + {Producer({2, 3, 13, 24}, "dataProvider"), conv1, conv2, conv3}); g1->setBackend("cpu"); g1->forwardDims(); // Random initialization of input and weights - uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv1->getOperator())->getInput(0), -10.0, 10.0); - uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv1->getOperator())->getInput(1), -10.0, 10.0); - uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv1->getOperator())->getInput(2), -10.0, 10.0); - uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv2->getOperator())->getInput(1), -10.0, 10.0); - uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv3->getOperator())->getInput(1), -10.0, 10.0); - uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv3->getOperator())->getInput(2), -10.0, 10.0); + uniformFiller<float>( + std::static_pointer_cast<OperatorTensor>(conv1->getOperator()) + ->getInput(0), + -10.0, + 10.0); + uniformFiller<float>( + std::static_pointer_cast<OperatorTensor>(conv1->getOperator()) + ->getInput(1), + -10.0, + 10.0); + uniformFiller<float>( + std::static_pointer_cast<OperatorTensor>(conv1->getOperator()) + ->getInput(2), + -10.0, + 10.0); + uniformFiller<float>( + std::static_pointer_cast<OperatorTensor>(conv2->getOperator()) + ->getInput(1), + -10.0, + 10.0); + uniformFiller<float>( + std::static_pointer_cast<OperatorTensor>(conv3->getOperator()) + ->getInput(1), + -10.0, + 10.0); + uniformFiller<float>( + std::static_pointer_cast<OperatorTensor>(conv3->getOperator()) + ->getInput(2), + -10.0, + 10.0); auto s1 = SequentialScheduler(g1); s1.forward(); @@ -52,7 +72,7 @@ TEST_CASE("[ConvToMatMul] conv") { auto g2 = g1->clone(); g2->forwardDims(); REQUIRE(convToMatMul(g2) == 3); - + g2->setBackend("cpu"); auto s2 = SequentialScheduler(g2); @@ -60,14 +80,19 @@ TEST_CASE("[ConvToMatMul] conv") { g2->save("convToMatMul_after"); - auto g1OutOp = std::static_pointer_cast<OperatorTensor>((*g1->outputNodes().cbegin())->getOperator()); - auto g2OutOp = std::static_pointer_cast<OperatorTensor>((*g1->outputNodes().cbegin())->getOperator()); + auto g1OutOp = std::static_pointer_cast<OperatorTensor>( + (*g1->outputNodes().cbegin())->getOperator()); + auto g2OutOp = std::static_pointer_cast<OperatorTensor>( + (*g1->outputNodes().cbegin())->getOperator()); REQUIRE(*(g1OutOp->getOutput(0)) == *(g2OutOp->getOutput(0))); - // Simplify the graph: freeze parameters to allow reshaping of the Producers + // Simplify the graph: freeze parameters to allow reshaping of the + // Producers for (auto node : g2->getNodes()) { - if (node->type() == Producer_Op::Type && node->name() != "dataProvider") { - std::static_pointer_cast<Producer_Op>(node->getOperator())->constant() = true; + if (node->type() == Producer_Op::Type && + node->name() != "dataProvider") { + std::static_pointer_cast<Producer_Op>(node->getOperator()) + ->constant() = true; } } diff --git a/unit_tests/recipies/Test_ExplicitCastMove.cpp b/unit_tests/recipies/Test_ExplicitCastMove.cpp index 27c788961b787c6f5248254f19ef7ac7a4366206..17af28a197e410d3ff129c9e561727c14160e37f 100644 --- a/unit_tests/recipies/Test_ExplicitCastMove.cpp +++ b/unit_tests/recipies/Test_ExplicitCastMove.cpp @@ -11,10 +11,10 @@ #include <catch2/catch_test_macros.hpp> -#include "aidge/recipes/Recipes.hpp" +#include "aidge/graph/OpArgs.hpp" #include "aidge/operator/Conv.hpp" #include "aidge/operator/Producer.hpp" -#include "aidge/graph/OpArgs.hpp" +#include "aidge/recipes/Recipes.hpp" #include <cstddef> using namespace Aidge; @@ -24,12 +24,8 @@ TEST_CASE("[ExplicitCastMove] conv") { auto conv2 = Conv(32, 64, {3, 3}, "conv2"); auto conv3 = Conv(64, 10, {1, 1}, "conv3", {2, 2}); - auto g1 = Sequential({ - Producer({16, 3, 224, 224}, "dataProvider"), - conv1, - conv2, - conv3 - }); + auto g1 = Sequential( + {Producer({16, 3, 224, 224}, "dataProvider"), conv1, conv2, conv3}); g1->setBackend("cpu"); conv1->getOperator()->setDataType(DataType::Int32); diff --git a/unit_tests/recipies/Test_FuseBatchNorm.cpp b/unit_tests/recipies/Test_FuseBatchNorm.cpp index 68a01541894ba25a8841343d2b3943ccc08c7a9d..754c6771f303031ccb635a4690043c333f59af1d 100644 --- a/unit_tests/recipies/Test_FuseBatchNorm.cpp +++ b/unit_tests/recipies/Test_FuseBatchNorm.cpp @@ -10,13 +10,13 @@ ********************************************************************************/ #include <catch2/catch_test_macros.hpp> -#include <memory> #include <cmath> +#include <memory> #include "aidge/graph/GraphView.hpp" #include "aidge/graph/OpArgs.hpp" -#include "aidge/operator/Conv.hpp" #include "aidge/operator/BatchNorm.hpp" +#include "aidge/operator/Conv.hpp" #include "aidge/operator/Producer.hpp" #include "aidge/recipes/Recipes.hpp" #include "aidge/scheduler/SequentialScheduler.hpp" @@ -30,86 +30,80 @@ TEST_CASE("[core/recipes] FuseBatchNorm", "[recipes][FuseBatchNorm]") { auto myConv = Conv(3, 3, {1, 1}, "conv1"); auto myBN = BatchNorm<2>(32, 1.0e-5F, 0.1F, "batchnorm1"); - auto myProdOp = std::static_pointer_cast<Producer_Op>(myProd->getOperator()); - auto myConvOp = std::static_pointer_cast<Conv_Op<2>>(myConv->getOperator()); - auto myBNOp = std::static_pointer_cast<BatchNorm_Op<2>>(myBN->getOperator()); + auto myProdOp = + std::static_pointer_cast<Producer_Op>(myProd->getOperator()); + auto myConvOp = + std::static_pointer_cast<Conv_Op<2>>(myConv->getOperator()); + auto myBNOp = + std::static_pointer_cast<BatchNorm_Op<2>>(myBN->getOperator()); - myProdOp->setOutput(0, std::make_shared<Tensor>(Array4D<float,2,3,3,3> { //NCHW - { - { - {{8.28257084e-01, 7.99335480e-01, 7.36702740e-01}, - {2.36729562e-01, 8.61912668e-01, 9.93067741e-01}, - {1.63514376e-01, 8.95773172e-02, 2.96533108e-01}}, - {{2.20776618e-01, 5.89067876e-01, 2.03930080e-01}, - {1.31294072e-01, 7.10182846e-01, 1.08420849e-04}, - {7.21750259e-01, 4.38212037e-01, 5.08823872e-01}}, - {{4.30953979e-01, 1.51903450e-01, 3.76343548e-01}, - {8.07861805e-01, 7.79679358e-01, 5.01209974e-01}, - {9.31280375e-01, 9.94207084e-01, 1.74868107e-03}} - }, - { - {{6.22058094e-01, 2.32256651e-02, 6.18222237e-01}, - {9.58304763e-01, 2.11395025e-02, 4.95614648e-01}, - {2.50825584e-01, 4.50860739e-01, 3.80362332e-01}}, - {{9.91703272e-02, 5.06073236e-01, 4.88969564e-01}, - {1.12059772e-01, 7.64178872e-01, 7.60362148e-01}, - {2.84135342e-02, 4.29610193e-01, 1.27862811e-01}}, - {{9.57209170e-01, 8.22797656e-01, 1.91352129e-01}, - {9.52722490e-01, 6.35501027e-01, 5.67592978e-02}, - {2.00799644e-01, 4.00822222e-01, 9.14380193e-01}} - } - } - })); - myConvOp -> setInput(1, std::make_shared<Tensor>(Array4D<float,3,3,1,1> { //NCHW - { - { - {{8.28257084e-01}}, - {{7.99335480e-01}}, - {{7.36702740e-01}} - }, - { - {{2.36729562e-01}}, - {{8.61912668e-01}}, - {{9.93067741e-01}} - }, - { - {{1.63514376e-01}}, - {{8.95773172e-02}}, - {{2.96533108e-01}} - } - } - })); - myConvOp -> setInput(2, std::make_shared<Tensor>(Array1D<float,3> {{0.4470, 0.3064, 0.7061}})); - myBNOp -> setInput(1, std::make_shared<Tensor>(Array1D<float,3> {{0.9044, 0.3028, 0.0218}})); - myBNOp -> setInput(2, std::make_shared<Tensor>(Array1D<float,3> {{0.1332, 0.7503, 0.0878}})); - myBNOp -> setInput(3, std::make_shared<Tensor>(Array1D<float,3> {{0.9931, 0.8421, 0.9936}})); - myBNOp -> setInput(4, std::make_shared<Tensor>(Array1D<float,3> {{0.4470, 0.3064, 0.7061}})); + myProdOp->setOutput( + 0, + std::make_shared<Tensor>(Array4D<float, 2, 3, 3, 3>{ + // NCHW + {{{{8.28257084e-01, 7.99335480e-01, 7.36702740e-01}, + {2.36729562e-01, 8.61912668e-01, 9.93067741e-01}, + {1.63514376e-01, 8.95773172e-02, 2.96533108e-01}}, + {{2.20776618e-01, 5.89067876e-01, 2.03930080e-01}, + {1.31294072e-01, 7.10182846e-01, 1.08420849e-04}, + {7.21750259e-01, 4.38212037e-01, 5.08823872e-01}}, + {{4.30953979e-01, 1.51903450e-01, 3.76343548e-01}, + {8.07861805e-01, 7.79679358e-01, 5.01209974e-01}, + {9.31280375e-01, 9.94207084e-01, 1.74868107e-03}}}, + {{{6.22058094e-01, 2.32256651e-02, 6.18222237e-01}, + {9.58304763e-01, 2.11395025e-02, 4.95614648e-01}, + {2.50825584e-01, 4.50860739e-01, 3.80362332e-01}}, + {{9.91703272e-02, 5.06073236e-01, 4.88969564e-01}, + {1.12059772e-01, 7.64178872e-01, 7.60362148e-01}, + {2.84135342e-02, 4.29610193e-01, 1.27862811e-01}}, + {{9.57209170e-01, 8.22797656e-01, 1.91352129e-01}, + {9.52722490e-01, 6.35501027e-01, 5.67592978e-02}, + {2.00799644e-01, 4.00822222e-01, 9.14380193e-01}}}}})); + myConvOp->setInput( + 1, + std::make_shared<Tensor>(Array4D<float, 3, 3, 1, 1>{ + // NCHW + {{{{8.28257084e-01}}, {{7.99335480e-01}}, {{7.36702740e-01}}}, + {{{2.36729562e-01}}, {{8.61912668e-01}}, {{9.93067741e-01}}}, + {{{1.63514376e-01}}, {{8.95773172e-02}}, {{2.96533108e-01}}}}})); + myConvOp->setInput( + 2, + std::make_shared<Tensor>(Array1D<float, 3>{{0.4470, 0.3064, 0.7061}})); + myBNOp->setInput( + 1, + std::make_shared<Tensor>(Array1D<float, 3>{{0.9044, 0.3028, 0.0218}})); + myBNOp->setInput( + 2, + std::make_shared<Tensor>(Array1D<float, 3>{{0.1332, 0.7503, 0.0878}})); + myBNOp->setInput( + 3, + std::make_shared<Tensor>(Array1D<float, 3>{{0.9931, 0.8421, 0.9936}})); + myBNOp->setInput( + 4, + std::make_shared<Tensor>(Array1D<float, 3>{{0.4470, 0.3064, 0.7061}})); - auto g1 = Sequential({ - myProd, - myConv, - myBN - }); - g1 -> setName("fuseBNGraph"); - g1 -> compile("cpu", DataType::Float32); + auto g1 = Sequential({myProd, myConv, myBN}); + g1->setName("fuseBNGraph"); + g1->compile("cpu", DataType::Float32); auto s = SequentialScheduler(g1); s.forward(); - std::shared_ptr<Tensor> res1 = std::make_shared<Tensor>(*(myBNOp -> getOutput(0))); + std::shared_ptr<Tensor> res1 = + std::make_shared<Tensor>(*(myBNOp->getOutput(0))); fuseBatchNorm(g1); s.resetScheduling(); s.forward(); - std::shared_ptr<Tensor> res2 = std::make_shared<Tensor>(*(myConvOp -> getOutput(0))); + std::shared_ptr<Tensor> res2 = + std::make_shared<Tensor>(*(myConvOp->getOutput(0))); - REQUIRE(g1 -> outputNodes().size() == 1); - REQUIRE(g1 -> inputNodes().size() == 0); + REQUIRE(g1->outputNodes().size() == 1); + REQUIRE(g1->inputNodes().size() == 0); bool eq = true; for (std::size_t i = 0; i < res1->size(); ++i) { - eq &= std::abs(res1->get<float>(i) - res2->get<float>(i)) < 1.0e-06; + eq &= std::abs(res1->get<float>(i) - res2->get<float>(i)) < 1.0e-06; } REQUIRE(eq); - } } // namespace Aidge diff --git a/unit_tests/recipies/Test_HorizontalTiling.cpp b/unit_tests/recipies/Test_HorizontalTiling.cpp index 7c127548417492141c3ea1eeb9374042befe75d2..e2611e8a3a5b4b3d128683ca268b234162125ba4 100644 --- a/unit_tests/recipies/Test_HorizontalTiling.cpp +++ b/unit_tests/recipies/Test_HorizontalTiling.cpp @@ -14,12 +14,11 @@ #include "aidge/graph/GraphView.hpp" #include "aidge/graph/OpArgs.hpp" +#include "aidge/operator/Concat.hpp" #include "aidge/operator/Conv.hpp" #include "aidge/operator/ReLU.hpp" #include "aidge/recipes/Recipes.hpp" #include "aidge/scheduler/SequentialScheduler.hpp" -#include "aidge/operator/Concat.hpp" - namespace Aidge { @@ -29,136 +28,90 @@ TEST_CASE("[core/recipes] Tiling(transformation)", "[Tiling][Recipes]") { SECTION("Simple Node: Conv") { std::shared_ptr<Node> myReLU = ReLU("myReLU"); - std::shared_ptr<Node> myConv = Conv(3,4,{3,3}, "myconv"); - std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> { - { - { - {{ 0, 1, 2}, - { 3, 4, 5}, - { 6, 7, 8}}, - {{ 9, 10, 11}, - { 12, 13, 14}, - { 15, 16, 17}}, - {{ 18, 19, 20}, - { 21, 22, 23}, - { 24, 25, 26}} - }, - { - {{ 27, 28, 29}, - { 30, 31, 32}, - { 33, 34, 35}}, - {{ 36, 37, 38}, - { 39, 40, 41}, - { 42, 43, 44}}, - {{ 45, 46, 47}, - { 48, 49, 50}, - { 51, 52, 53}} - }, - { - {{ 54, 55, 56}, - { 57, 58, 59}, - { 60, 61, 62}}, - {{ 63, 64, 65}, - { 66, 67, 68}, - { 69, 70, 71}}, - {{ 72, 73, 74}, - { 75, 76, 77}, - { 78, 79, 80}} - }, - { - {{ 81, 82, 83}, - { 84, 85, 86}, - { 87, 88, 89}}, - {{ 90, 91, 92}, - { 93, 94, 95}, - { 96, 97, 98}}, - {{ 99, 100, 101}, - {102, 103, 104}, - {105, 106, 107}} - } - } - }); - std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}}); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW - { - { - {{ 0, 1, 2, 3, 4}, - { 5, 6, 7, 8, 9}, - { 10, 11, 12, 13, 14}, - { 15, 16, 17, 18, 19}, - { 20, 21, 22, 23, 24}}, - - {{ 25, 26, 27, 28, 29}, - { 30, 31, 32, 33, 34}, - { 35, 36, 37, 38, 39}, - { 40, 41, 42, 43, 44}, - { 45, 46, 47, 48, 49}}, - - {{ 50, 51, 52, 53, 54}, - { 55, 56, 57, 58, 59}, - { 60, 61, 62, 63, 64}, - { 65, 66, 67, 68, 69}, - { 70, 71, 72, 73, 74}} - }, - { - {{ 75, 76, 77, 78, 79}, - { 80, 81, 82, 83, 84}, - { 85, 86, 87, 88, 89}, - { 90, 91, 92, 93, 94}, - { 95, 96, 97, 98, 99}}, - - {{100, 101, 102, 103, 104}, - {105, 106, 107, 108, 109}, - {110, 111, 112, 113, 114}, - {115, 116, 117, 118, 119}, - {120, 121, 122, 123, 124}}, - - {{125, 126, 127, 128, 129}, - {130, 131, 132, 133, 134}, - {135, 136, 137, 138, 139}, - {140, 141, 142, 143, 144}, - {145, 146, 147, 148, 149}} - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> { - { - { - {{ 15226, 15577, 15928}, - { 16981, 17332, 17683}, - { 18736, 19087, 19438}}, - - {{ 37818, 38898, 39978}, - { 43218, 44298, 45378}, - { 48618, 49698, 50778}}, - - {{ 60426, 62235, 64044}, - { 69471, 71280, 73089}, - { 78516, 80325, 82134}}, - - {{ 83016, 85554, 88092}, - { 95706, 98244, 100782}, - {108396, 110934, 113472}} - }, - { - {{ 41551, 41902, 42253}, - { 43306, 43657, 44008}, - { 45061, 45412, 45763}}, - - {{118818, 119898, 120978}, - {124218, 125298, 126378}, - {129618, 130698, 131778}}, - - {{196101, 197910, 199719}, - {205146, 206955, 208764}, - {214191, 216000, 217809}}, - - {{273366, 275904, 278442}, - {286056, 288594, 291132}, - {298746, 301284, 303822}} - } - } - }); + std::shared_ptr<Node> myConv = Conv(3, 4, {3, 3}, "myconv"); + std::shared_ptr<Tensor> myWeights = + std::make_shared<Tensor>(Array4D<int, 4, 3, 3, 3>{ + {{{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}}, + {{9, 10, 11}, {12, 13, 14}, {15, 16, 17}}, + {{18, 19, 20}, {21, 22, 23}, {24, 25, 26}}}, + {{{27, 28, 29}, {30, 31, 32}, {33, 34, 35}}, + {{36, 37, 38}, {39, 40, 41}, {42, 43, 44}}, + {{45, 46, 47}, {48, 49, 50}, {51, 52, 53}}}, + {{{54, 55, 56}, {57, 58, 59}, {60, 61, 62}}, + {{63, 64, 65}, {66, 67, 68}, {69, 70, 71}}, + {{72, 73, 74}, {75, 76, 77}, {78, 79, 80}}}, + {{{81, 82, 83}, {84, 85, 86}, {87, 88, 89}}, + {{90, 91, 92}, {93, 94, 95}, {96, 97, 98}}, + {{99, 100, 101}, {102, 103, 104}, {105, 106, 107}}}}}); + std::shared_ptr<Tensor> myBias = + std::make_shared<Tensor>(Array1D<int, 4>{{7, 0, 9, 0}}); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array4D<int, 2, 3, 5, 5>{// NCHW + {{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}, + + {{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}, + + {{50, 51, 52, 53, 54}, + {55, 56, 57, 58, 59}, + {60, 61, 62, 63, 64}, + {65, 66, 67, 68, 69}, + {70, 71, 72, 73, 74}}}, + {{{75, 76, 77, 78, 79}, + {80, 81, 82, 83, 84}, + {85, 86, 87, 88, 89}, + {90, 91, 92, 93, 94}, + {95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}}}}}); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>( + Array4D<int, 2, 4, 3, 3>{{{{{15226, 15577, 15928}, + {16981, 17332, 17683}, + {18736, 19087, 19438}}, + + {{37818, 38898, 39978}, + {43218, 44298, 45378}, + {48618, 49698, 50778}}, + + {{60426, 62235, 64044}, + {69471, 71280, 73089}, + {78516, 80325, 82134}}, + + {{83016, 85554, 88092}, + {95706, 98244, 100782}, + {108396, 110934, 113472}}}, + {{{41551, 41902, 42253}, + {43306, 43657, 44008}, + {45061, 45412, 45763}}, + + {{118818, 119898, 120978}, + {124218, 125298, 126378}, + {129618, 130698, 131778}}, + + {{196101, 197910, 199719}, + {205146, 206955, 208764}, + {214191, 216000, 217809}}, + + {{273366, 275904, 278442}, + {286056, 288594, 291132}, + {298746, 301284, 303822}}}}}); myReLU->getOperator()->associateInput(0, myInput); myReLU->addChild(myConv, 0, 0); myConv->getOperator()->setInput(1, myWeights); @@ -167,18 +120,29 @@ TEST_CASE("[core/recipes] Tiling(transformation)", "[Tiling][Recipes]") { std::shared_ptr<GraphView> g = std::make_shared<GraphView>(); g->add({myReLU, myConv}); g->compile("cpu", DataType::Int32); - std::set<std::shared_ptr<Node>> tiledConv = getConvHorizontalTiling(myConv, 2, 3); + std::set<std::shared_ptr<Node>> tiledConv = + getConvHorizontalTiling(myConv, 2, 3); SequentialScheduler s(g); s.forward(); - REQUIRE(*(std::dynamic_pointer_cast<Conv_Op<2>>(myConv->getOperator())->getOutput(0)) == *myOutput); - - GraphView::replace({myConv, myConv->getParent(1), myConv->getParent(2)}, tiledConv); - g->compile("cpu", DataType::Int32, 0, {{2,3,5,5}}); // changes myInput DataType from Int32 to Float32. Why?????? + REQUIRE( + *(std::dynamic_pointer_cast<Conv_Op<2>>(myConv->getOperator()) + ->getOutput(0)) == *myOutput); + + GraphView::replace( + {myConv, myConv->getParent(1), myConv->getParent(2)}, + tiledConv); + g->compile("cpu", + DataType::Int32, + 0, + {{2, 3, 5, 5}}); // changes myInput DataType from Int32 + // to Float32. Why?????? s.resetScheduling(); s.forward(); - REQUIRE(*(std::dynamic_pointer_cast<OperatorTensor>((*g->outputNodes().begin())->getOperator())->getOutput(0)) == *myOutput); + REQUIRE(*(std::dynamic_pointer_cast<OperatorTensor>( + (*g->outputNodes().begin())->getOperator()) + ->getOutput(0)) == *myOutput); } } } diff --git a/unit_tests/scheduler/Test_CastMove.cpp b/unit_tests/scheduler/Test_CastMove.cpp index 5ca2cd9de4dcc9dab2c78f7ae1e1bf3090db8f2b..4c1bb18e4f6eb0682ab841a726f9d351971a5aef 100644 --- a/unit_tests/scheduler/Test_CastMove.cpp +++ b/unit_tests/scheduler/Test_CastMove.cpp @@ -14,79 +14,83 @@ #include <string> #include "aidge/data/Tensor.hpp" -#include "aidge/utils/TensorUtils.hpp" -#include "aidge/graph/Node.hpp" #include "aidge/graph/GraphView.hpp" +#include "aidge/graph/Node.hpp" #include "aidge/graph/OpArgs.hpp" -#include "aidge/scheduler/SequentialScheduler.hpp" #include "aidge/recipes/Recipes.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" +#include "aidge/utils/TensorUtils.hpp" #include "aidge/backend/cpu.hpp" using namespace Aidge; TEST_CASE("[cpu/castmove] CastMove(forward)") { - std::shared_ptr<Tensor> inputTensor = - std::make_shared<Tensor>(Array4D<int, 2, 1, 5, 5>{{{{{0, 1, 2, 3, 4}, - {5, 6, 7, 8, 9}, - {10, 11, 12, 13, 14}, - {15, 16, 17, 18, 19}, - {20, 21, 22, 23, 24}}}, - {{{25, 26, 27, 28, 29}, - {30, 31, 32, 33, 34}, - {35, 36, 37, 38, 39}, - {40, 41, 42, 43, 44}, - {45, 46, 47, 48, 49}}}}}); - - std::shared_ptr<Tensor> weight1 = std::make_shared<Tensor>( - Array4D<int, 3, 1, 3, 3>{{{{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}}, - {{{10, 11, 12}, {13, 14, 15}, {16, 17, 18}}}, - {{{19, 20, 21}, {22, 23, 24}, {25, 26, 27}}}}}); - - std::shared_ptr<Tensor> bias1 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}}); + std::shared_ptr<Tensor> inputTensor = std::make_shared<Tensor>( + Array4D<int, 2, 1, 5, 5>{{{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}}, + {{{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}}}}); + + std::shared_ptr<Tensor> weight1 = + std::make_shared<Tensor>(Array4D<int, 3, 1, 3, 3>{ + {{{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}}, + {{{10, 11, 12}, {13, 14, 15}, {16, 17, 18}}}, + {{{19, 20, 21}, {22, 23, 24}, {25, 26, 27}}}}}); + + std::shared_ptr<Tensor> bias1 = + std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}}); SECTION("Test implicit") { - std::shared_ptr<GraphView> g = - Sequential({ - Conv(1, 3, {3, 3}, "conv1"), - Conv(3, 4, {1, 1}, "conv2"), - Conv(4, 3, {1, 1}, "conv3"), - FC(27, 5, false, "fc")}); + std::shared_ptr<GraphView> g = Sequential({Conv(1, 3, {3, 3}, "conv1"), + Conv(3, 4, {1, 1}, "conv2"), + Conv(4, 3, {1, 1}, "conv3"), + FC(27, 5, false, "fc")}); g->getNode("conv1")->getOperator()->setInput(0, inputTensor); g->getNode("conv1")->getOperator()->setInput(1, weight1); g->getNode("conv1")->getOperator()->setInput(2, bias1); - std::shared_ptr<Tensor> weight2 = - std::make_shared<Tensor>(Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}}, - {{{4}}, {{5}}, {{6}}}, - {{{7}}, {{8}}, {{9}}}, - {{{10}}, {{11}}, {{12}}}}}); - std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}}); + std::shared_ptr<Tensor> weight2 = std::make_shared<Tensor>( + Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}}, + {{{4}}, {{5}}, {{6}}}, + {{{7}}, {{8}}, {{9}}}, + {{{10}}, {{11}}, {{12}}}}}); + std::shared_ptr<Tensor> bias2 = + std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}}); g->getNode("conv2")->getOperator()->setInput(1, weight2); g->getNode("conv2")->getOperator()->setInput(2, bias2); // *(g->getNode("conv2")->getOperator()->input(1, weight2); std::shared_ptr<Tensor> weight3 = std::make_shared<Tensor>( - Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}}, - {{{5}}, {{6}}, {{7}}, {{8}}}, - {{{9}}, {{10}}, {{11}}, {{12}}}}}); - std::shared_ptr<Tensor> bias3 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}}); + Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}}, + {{{5}}, {{6}}, {{7}}, {{8}}}, + {{{9}}, {{10}}, {{11}}, {{12}}}}}); + std::shared_ptr<Tensor> bias3 = + std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}}); g->getNode("conv3")->getOperator()->setInput(1, weight3); g->getNode("conv3")->getOperator()->setInput(2, bias3); - std::shared_ptr<Tensor> weightfc = std::make_shared<Tensor>( - Array2D<int, 5, 27>{{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, - {13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9}, - {10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6}, - {7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3}, - {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, - 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}}); - std::shared_ptr<Tensor> biasfc = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}}); + std::shared_ptr<Tensor> weightfc = + std::make_shared<Tensor>(Array2D<int, 5, 27>{ + {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6}, + {7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3}, + {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}}); + std::shared_ptr<Tensor> biasfc = + std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}}); g->getNode("fc")->getOperator()->setInput(1, weightfc); g->getNode("fc")->getOperator()->setInput(2, biasfc); @@ -101,94 +105,140 @@ TEST_CASE("[cpu/castmove] CastMove(forward)") { REQUIRE_NOTHROW(scheduler.forward()); scheduler.saveSchedulingDiagram("schedulingSequential"); - std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{ - {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}}, - {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}}, - {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}}, - {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}}, - {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}}, - {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}}); - - std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(Array4D<int, 2, 4, 3, 3>{ - {{{{6099, 7017, 7935}, {10689, 11607, 12525}, {15279, 16197, 17115}}, - {{13786, 15838, 17890}, {24046, 26098, 28150}, {34306, 36358, 38410}}, - {{21473, 24659, 27845}, {37403, 40589, 43775}, {53333, 56519, 59705}}, - {{29160, 33480, 37800}, {50760, 55080, 59400}, {72360, 76680, 81000}}}, - {{{29049, 29967, 30885}, {33639, 34557, 35475}, {38229, 39147, 40065}}, - {{65086, 67138, 69190}, {75346, 77398, 79450}, {85606, 87658, 89710}}, - {{101123, 104309, 107495}, {117053, 120239, 123425}, {132983, 136169, 139355}}, - {{137160, 141480, 145800}, {158760, 163080, 167400}, {180360, 184680, 189000}}}}}); - - std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{ - {{{{214731, 246591, 278451}, {374031, 405891, 437751}, {533331, 565191, 597051}}, - {{496804, 570568, 644332}, {865624, 939388, 1013152}, {1234444, 1308208, 1381972}}, - {{778877, 894545, 1010213}, {1357217, 1472885, 1588553}, {1935557, 2051225, 2166893}}}, - {{{1011231, 1043091, 1074951}, {1170531, 1202391, 1234251}, {1329831, 1361691, 1393551}}, - {{2340904, 2414668, 2488432}, {2709724, 2783488, 2857252}, {3078544, 3152308, 3226072}}, - {{3670577, 3786245, 3901913}, {4248917, 4364585, 4480253}, {4827257, 4942925, 5058593}}}}}); + std::shared_ptr<Tensor> expectedOutput1 = std::make_shared< + Tensor>(Array4D<int, 2, 3, 3, 3>{ + {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}}, + {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}}, + {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}}, + {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}}, + {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}}, + {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}}); + + std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>( + Array4D<int, 2, 4, 3, 3>{{{{{6099, 7017, 7935}, + {10689, 11607, 12525}, + {15279, 16197, 17115}}, + {{13786, 15838, 17890}, + {24046, 26098, 28150}, + {34306, 36358, 38410}}, + {{21473, 24659, 27845}, + {37403, 40589, 43775}, + {53333, 56519, 59705}}, + {{29160, 33480, 37800}, + {50760, 55080, 59400}, + {72360, 76680, 81000}}}, + {{{29049, 29967, 30885}, + {33639, 34557, 35475}, + {38229, 39147, 40065}}, + {{65086, 67138, 69190}, + {75346, 77398, 79450}, + {85606, 87658, 89710}}, + {{101123, 104309, 107495}, + {117053, 120239, 123425}, + {132983, 136169, 139355}}, + {{137160, 141480, 145800}, + {158760, 163080, 167400}, + {180360, 184680, 189000}}}}}); + + std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>( + Array4D<int, 2, 3, 3, 3>{{{{{214731, 246591, 278451}, + {374031, 405891, 437751}, + {533331, 565191, 597051}}, + {{496804, 570568, 644332}, + {865624, 939388, 1013152}, + {1234444, 1308208, 1381972}}, + {{778877, 894545, 1010213}, + {1357217, 1472885, 1588553}, + {1935557, 2051225, 2166893}}}, + {{{1011231, 1043091, 1074951}, + {1170531, 1202391, 1234251}, + {1329831, 1361691, 1393551}}, + {{2340904, 2414668, 2488432}, + {2709724, 2783488, 2857252}, + {3078544, 3152308, 3226072}}, + {{3670577, 3786245, 3901913}, + {4248917, 4364585, 4480253}, + {4827257, 4942925, 5058593}}}}}); Tensor expectedOutput4 = Array2D<int, 2, 5>{ - {{205050376, 198925904, 181355097, 196978090, 238868348}, - {598467376, 561797804, 560823897, 593043790, 698672948}}}; - std::shared_ptr<Tensor> other1 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv1")->getOperator())->getOutput(0); + {{205050376, 198925904, 181355097, 196978090, 238868348}, + {598467376, 561797804, 560823897, 593043790, 698672948}}}; + std::shared_ptr<Tensor> other1 = + std::static_pointer_cast<OperatorTensor>( + g->getNode("conv1")->getOperator()) + ->getOutput(0); REQUIRE(approxEq<float, int>(*other1, *expectedOutput1, 0.0, 1.0e-12)); - std::shared_ptr<Tensor> other2 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv2")->getOperator())->getOutput(0); + std::shared_ptr<Tensor> other2 = + std::static_pointer_cast<OperatorTensor>( + g->getNode("conv2")->getOperator()) + ->getOutput(0); REQUIRE(approxEq<int>(*other2, *expectedOutput2, 0.0, 1.0e-12)); - std::shared_ptr<Tensor> other3 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv3")->getOperator())->getOutput(0); - REQUIRE(approxEq<double, int>(*other3, *expectedOutput3, 0.0, 1.0e-12)); - std::shared_ptr<Tensor> other4 = std::static_pointer_cast<OperatorTensor>(g->getNode("fc")->getOperator())->getOutput(0); + std::shared_ptr<Tensor> other3 = + std::static_pointer_cast<OperatorTensor>( + g->getNode("conv3")->getOperator()) + ->getOutput(0); + REQUIRE( + approxEq<double, int>(*other3, *expectedOutput3, 0.0, 1.0e-12)); + std::shared_ptr<Tensor> other4 = + std::static_pointer_cast<OperatorTensor>( + g->getNode("fc")->getOperator()) + ->getOutput(0); REQUIRE(approxEq<int>(*other4, expectedOutput4, 0.0, 1.0e-12)); } SECTION("Half") { - Tensor refTensor = Array2D<float, 3, 2>{{{0.0, 1.0},{2.1, 3.4},{5000.0, 1.0e5}}}; + Tensor refTensor = + Array2D<float, 3, 2>{{{0.0, 1.0}, {2.1, 3.4}, {5000.0, 1.0e5}}}; Tensor tensor(DataType::Float16); tensor.copyCastFrom(refTensor); - REQUIRE(approxEq<float, half_float::half>(refTensor, tensor, 1.0e-3, 0.0)); + REQUIRE( + approxEq<float, half_float::half>(refTensor, tensor, 1.0e-3, 0.0)); } SECTION("Test explicit") { - std::shared_ptr<GraphView> g = - Sequential({ - Conv(1, 3, {3, 3}, "conv1"), - Conv(3, 4, {1, 1}, "conv2"), - Conv(4, 3, {1, 1}, "conv3"), - FC(27, 5, false, "fc")}); + std::shared_ptr<GraphView> g = Sequential({Conv(1, 3, {3, 3}, "conv1"), + Conv(3, 4, {1, 1}, "conv2"), + Conv(4, 3, {1, 1}, "conv3"), + FC(27, 5, false, "fc")}); g->getNode("conv1")->getOperator()->setInput(0, inputTensor); g->getNode("conv1")->getOperator()->setInput(1, weight1); g->getNode("conv1")->getOperator()->setInput(2, bias1); - std::shared_ptr<Tensor> weight2 = - std::make_shared<Tensor>(Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}}, - {{{4}}, {{5}}, {{6}}}, - {{{7}}, {{8}}, {{9}}}, - {{{10}}, {{11}}, {{12}}}}}); - std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}}); + std::shared_ptr<Tensor> weight2 = std::make_shared<Tensor>( + Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}}, + {{{4}}, {{5}}, {{6}}}, + {{{7}}, {{8}}, {{9}}}, + {{{10}}, {{11}}, {{12}}}}}); + std::shared_ptr<Tensor> bias2 = + std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}}); g->getNode("conv2")->getOperator()->setInput(1, weight2); g->getNode("conv2")->getOperator()->setInput(2, bias2); // *(g->getNode("conv2")->getOperator()->input(1, weight2); std::shared_ptr<Tensor> weight3 = std::make_shared<Tensor>( - Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}}, - {{{5}}, {{6}}, {{7}}, {{8}}}, - {{{9}}, {{10}}, {{11}}, {{12}}}}}); - std::shared_ptr<Tensor> bias3 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}}); + Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}}, + {{{5}}, {{6}}, {{7}}, {{8}}}, + {{{9}}, {{10}}, {{11}}, {{12}}}}}); + std::shared_ptr<Tensor> bias3 = + std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}}); g->getNode("conv3")->getOperator()->setInput(1, weight3); g->getNode("conv3")->getOperator()->setInput(2, bias3); - std::shared_ptr<Tensor> weightfc = std::make_shared<Tensor>( - Array2D<int, 5, 27>{{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, - {13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9}, - {10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6}, - {7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3}, - {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, - 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}}); - std::shared_ptr<Tensor> biasfc = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}}); + std::shared_ptr<Tensor> weightfc = + std::make_shared<Tensor>(Array2D<int, 5, 27>{ + {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6}, + {7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3}, + {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}}); + std::shared_ptr<Tensor> biasfc = + std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}}); g->getNode("fc")->getOperator()->setInput(1, weightfc); g->getNode("fc")->getOperator()->setInput(2, biasfc); @@ -205,42 +255,84 @@ TEST_CASE("[cpu/castmove] CastMove(forward)") { REQUIRE_NOTHROW(scheduler.forward()); scheduler.saveSchedulingDiagram("schedulingSequential"); - std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{ - {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}}, - {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}}, - {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}}, - {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}}, - {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}}, - {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}}); - - std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(Array4D<int, 2, 4, 3, 3>{ - {{{{6099, 7017, 7935}, {10689, 11607, 12525}, {15279, 16197, 17115}}, - {{13786, 15838, 17890}, {24046, 26098, 28150}, {34306, 36358, 38410}}, - {{21473, 24659, 27845}, {37403, 40589, 43775}, {53333, 56519, 59705}}, - {{29160, 33480, 37800}, {50760, 55080, 59400}, {72360, 76680, 81000}}}, - {{{29049, 29967, 30885}, {33639, 34557, 35475}, {38229, 39147, 40065}}, - {{65086, 67138, 69190}, {75346, 77398, 79450}, {85606, 87658, 89710}}, - {{101123, 104309, 107495}, {117053, 120239, 123425}, {132983, 136169, 139355}}, - {{137160, 141480, 145800}, {158760, 163080, 167400}, {180360, 184680, 189000}}}}}); - - std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{ - {{{{214731, 246591, 278451}, {374031, 405891, 437751}, {533331, 565191, 597051}}, - {{496804, 570568, 644332}, {865624, 939388, 1013152}, {1234444, 1308208, 1381972}}, - {{778877, 894545, 1010213}, {1357217, 1472885, 1588553}, {1935557, 2051225, 2166893}}}, - {{{1011231, 1043091, 1074951}, {1170531, 1202391, 1234251}, {1329831, 1361691, 1393551}}, - {{2340904, 2414668, 2488432}, {2709724, 2783488, 2857252}, {3078544, 3152308, 3226072}}, - {{3670577, 3786245, 3901913}, {4248917, 4364585, 4480253}, {4827257, 4942925, 5058593}}}}}); + std::shared_ptr<Tensor> expectedOutput1 = std::make_shared< + Tensor>(Array4D<int, 2, 3, 3, 3>{ + {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}}, + {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}}, + {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}}, + {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}}, + {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}}, + {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}}); + + std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>( + Array4D<int, 2, 4, 3, 3>{{{{{6099, 7017, 7935}, + {10689, 11607, 12525}, + {15279, 16197, 17115}}, + {{13786, 15838, 17890}, + {24046, 26098, 28150}, + {34306, 36358, 38410}}, + {{21473, 24659, 27845}, + {37403, 40589, 43775}, + {53333, 56519, 59705}}, + {{29160, 33480, 37800}, + {50760, 55080, 59400}, + {72360, 76680, 81000}}}, + {{{29049, 29967, 30885}, + {33639, 34557, 35475}, + {38229, 39147, 40065}}, + {{65086, 67138, 69190}, + {75346, 77398, 79450}, + {85606, 87658, 89710}}, + {{101123, 104309, 107495}, + {117053, 120239, 123425}, + {132983, 136169, 139355}}, + {{137160, 141480, 145800}, + {158760, 163080, 167400}, + {180360, 184680, 189000}}}}}); + + std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>( + Array4D<int, 2, 3, 3, 3>{{{{{214731, 246591, 278451}, + {374031, 405891, 437751}, + {533331, 565191, 597051}}, + {{496804, 570568, 644332}, + {865624, 939388, 1013152}, + {1234444, 1308208, 1381972}}, + {{778877, 894545, 1010213}, + {1357217, 1472885, 1588553}, + {1935557, 2051225, 2166893}}}, + {{{1011231, 1043091, 1074951}, + {1170531, 1202391, 1234251}, + {1329831, 1361691, 1393551}}, + {{2340904, 2414668, 2488432}, + {2709724, 2783488, 2857252}, + {3078544, 3152308, 3226072}}, + {{3670577, 3786245, 3901913}, + {4248917, 4364585, 4480253}, + {4827257, 4942925, 5058593}}}}}); Tensor expectedOutput4 = Array2D<int, 2, 5>{ - {{205050376, 198925904, 181355097, 196978090, 238868348}, - {598467376, 561797804, 560823897, 593043790, 698672948}}}; - std::shared_ptr<Tensor> other1 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv1")->getOperator())->getOutput(0); + {{205050376, 198925904, 181355097, 196978090, 238868348}, + {598467376, 561797804, 560823897, 593043790, 698672948}}}; + std::shared_ptr<Tensor> other1 = + std::static_pointer_cast<OperatorTensor>( + g->getNode("conv1")->getOperator()) + ->getOutput(0); REQUIRE(approxEq<float, int>(*other1, *expectedOutput1, 0.0, 1.0e-12)); - std::shared_ptr<Tensor> other2 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv2")->getOperator())->getOutput(0); + std::shared_ptr<Tensor> other2 = + std::static_pointer_cast<OperatorTensor>( + g->getNode("conv2")->getOperator()) + ->getOutput(0); REQUIRE(approxEq<int>(*other2, *expectedOutput2, 0.0, 1.0e-12)); - std::shared_ptr<Tensor> other3 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv3")->getOperator())->getOutput(0); - REQUIRE(approxEq<double, int>(*other3, *expectedOutput3, 0.0, 1.0e-12)); - std::shared_ptr<Tensor> other4 = std::static_pointer_cast<OperatorTensor>(g->getNode("fc")->getOperator())->getOutput(0); + std::shared_ptr<Tensor> other3 = + std::static_pointer_cast<OperatorTensor>( + g->getNode("conv3")->getOperator()) + ->getOutput(0); + REQUIRE( + approxEq<double, int>(*other3, *expectedOutput3, 0.0, 1.0e-12)); + std::shared_ptr<Tensor> other4 = + std::static_pointer_cast<OperatorTensor>( + g->getNode("fc")->getOperator()) + ->getOutput(0); REQUIRE(approxEq<int>(*other4, expectedOutput4, 0.0, 1.0e-12)); } } diff --git a/unit_tests/scheduler/Test_Scheduler.cpp b/unit_tests/scheduler/Test_Scheduler.cpp index 78a10c308a60f026b83ea64cfbd25a848099eb90..7e2dd7a5227c132173525581e07c438bd08907f8 100644 --- a/unit_tests/scheduler/Test_Scheduler.cpp +++ b/unit_tests/scheduler/Test_Scheduler.cpp @@ -14,81 +14,84 @@ #include <string> #include "aidge/data/Tensor.hpp" -#include "aidge/graph/Node.hpp" #include "aidge/graph/GraphView.hpp" +#include "aidge/graph/Node.hpp" #include "aidge/graph/OpArgs.hpp" #include "aidge/operator/Memorize.hpp" -#include "aidge/scheduler/SequentialScheduler.hpp" #include "aidge/scheduler/ParallelScheduler.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" #include "aidge/backend/cpu.hpp" #include "aidge/recipes/GraphViewHelper.hpp" - namespace Aidge { TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") { - std::shared_ptr<Tensor> inputTensor = - std::make_shared<Tensor>(Array4D<int, 2, 1, 5, 5>{{{{{0, 1, 2, 3, 4}, - {5, 6, 7, 8, 9}, - {10, 11, 12, 13, 14}, - {15, 16, 17, 18, 19}, - {20, 21, 22, 23, 24}}}, - {{{25, 26, 27, 28, 29}, - {30, 31, 32, 33, 34}, - {35, 36, 37, 38, 39}, - {40, 41, 42, 43, 44}, - {45, 46, 47, 48, 49}}}}}); - - std::shared_ptr<Tensor> weight1 = std::make_shared<Tensor>( - Array4D<int, 3, 1, 3, 3>{{{{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}}, - {{{10, 11, 12}, {13, 14, 15}, {16, 17, 18}}}, - {{{19, 20, 21}, {22, 23, 24}, {25, 26, 27}}}}}); - - std::shared_ptr<Tensor> bias1 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}}); + std::shared_ptr<Tensor> inputTensor = std::make_shared<Tensor>( + Array4D<int, 2, 1, 5, 5>{{{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}}, + {{{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}}}}); + + std::shared_ptr<Tensor> weight1 = + std::make_shared<Tensor>(Array4D<int, 3, 1, 3, 3>{ + {{{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}}, + {{{10, 11, 12}, {13, 14, 15}, {16, 17, 18}}}, + {{{19, 20, 21}, {22, 23, 24}, {25, 26, 27}}}}}); + + std::shared_ptr<Tensor> bias1 = + std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}}); SECTION("Test Sequential graph") { - std::shared_ptr<GraphView> g = - Sequential({ - Conv(1, 3, {3, 3}, "conv1"), - Conv(3, 4, {1, 1}, "conv2"), - Conv(4, 3, {1, 1}, "conv3"), - FC(27, 5, false, "fc")}); + std::shared_ptr<GraphView> g = Sequential({Conv(1, 3, {3, 3}, "conv1"), + Conv(3, 4, {1, 1}, "conv2"), + Conv(4, 3, {1, 1}, "conv3"), + FC(27, 5, false, "fc")}); g->getNode("conv1")->getOperator()->setInput(0, inputTensor); g->getNode("conv1")->getOperator()->setInput(1, weight1); g->getNode("conv1")->getOperator()->setInput(2, bias1); - std::shared_ptr<Tensor> weight2 = - std::make_shared<Tensor>(Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}}, - {{{4}}, {{5}}, {{6}}}, - {{{7}}, {{8}}, {{9}}}, - {{{10}}, {{11}}, {{12}}}}}); - std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}}); + std::shared_ptr<Tensor> weight2 = std::make_shared<Tensor>( + Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}}, + {{{4}}, {{5}}, {{6}}}, + {{{7}}, {{8}}, {{9}}}, + {{{10}}, {{11}}, {{12}}}}}); + std::shared_ptr<Tensor> bias2 = + std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}}); g->getNode("conv2")->getOperator()->setInput(1, weight2); g->getNode("conv2")->getOperator()->setInput(2, bias2); // *(g->getNode("conv2")->getOperator()->input(1, weight2); std::shared_ptr<Tensor> weight3 = std::make_shared<Tensor>( - Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}}, - {{{5}}, {{6}}, {{7}}, {{8}}}, - {{{9}}, {{10}}, {{11}}, {{12}}}}}); - std::shared_ptr<Tensor> bias3 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}}); + Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}}, + {{{5}}, {{6}}, {{7}}, {{8}}}, + {{{9}}, {{10}}, {{11}}, {{12}}}}}); + std::shared_ptr<Tensor> bias3 = + std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}}); g->getNode("conv3")->getOperator()->setInput(1, weight3); g->getNode("conv3")->getOperator()->setInput(2, bias3); - std::shared_ptr<Tensor> weightfc = std::make_shared<Tensor>( - Array2D<int, 5, 27>{{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, - {13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9}, - {10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6}, - {7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3}, - {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, - 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}}); - std::shared_ptr<Tensor> biasfc = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}}); + std::shared_ptr<Tensor> weightfc = + std::make_shared<Tensor>(Array2D<int, 5, 27>{ + {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6}, + {7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3}, + {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}}); + std::shared_ptr<Tensor> biasfc = + std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}}); g->getNode("fc")->getOperator()->setInput(1, weightfc); g->getNode("fc")->getOperator()->setInput(2, biasfc); @@ -100,101 +103,150 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") { REQUIRE_NOTHROW(scheduler.forward()); scheduler.saveSchedulingDiagram("schedulingSequential"); - std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{ - {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}}, - {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}}, - {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}}, - {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}}, - {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}}, - {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}}); - - std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(Array4D<int, 2, 4, 3, 3>{ - {{{{6099, 7017, 7935}, {10689, 11607, 12525}, {15279, 16197, 17115}}, - {{13786, 15838, 17890}, {24046, 26098, 28150}, {34306, 36358, 38410}}, - {{21473, 24659, 27845}, {37403, 40589, 43775}, {53333, 56519, 59705}}, - {{29160, 33480, 37800}, {50760, 55080, 59400}, {72360, 76680, 81000}}}, - {{{29049, 29967, 30885}, {33639, 34557, 35475}, {38229, 39147, 40065}}, - {{65086, 67138, 69190}, {75346, 77398, 79450}, {85606, 87658, 89710}}, - {{101123, 104309, 107495}, {117053, 120239, 123425}, {132983, 136169, 139355}}, - {{137160, 141480, 145800}, {158760, 163080, 167400}, {180360, 184680, 189000}}}}}); - - std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{ - {{{{214731, 246591, 278451}, {374031, 405891, 437751}, {533331, 565191, 597051}}, - {{496804, 570568, 644332}, {865624, 939388, 1013152}, {1234444, 1308208, 1381972}}, - {{778877, 894545, 1010213}, {1357217, 1472885, 1588553}, {1935557, 2051225, 2166893}}}, - {{{1011231, 1043091, 1074951}, {1170531, 1202391, 1234251}, {1329831, 1361691, 1393551}}, - {{2340904, 2414668, 2488432}, {2709724, 2783488, 2857252}, {3078544, 3152308, 3226072}}, - {{3670577, 3786245, 3901913}, {4248917, 4364585, 4480253}, {4827257, 4942925, 5058593}}}}}); + std::shared_ptr<Tensor> expectedOutput1 = std::make_shared< + Tensor>(Array4D<int, 2, 3, 3, 3>{ + {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}}, + {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}}, + {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}}, + {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}}, + {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}}, + {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}}); + + std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>( + Array4D<int, 2, 4, 3, 3>{{{{{6099, 7017, 7935}, + {10689, 11607, 12525}, + {15279, 16197, 17115}}, + {{13786, 15838, 17890}, + {24046, 26098, 28150}, + {34306, 36358, 38410}}, + {{21473, 24659, 27845}, + {37403, 40589, 43775}, + {53333, 56519, 59705}}, + {{29160, 33480, 37800}, + {50760, 55080, 59400}, + {72360, 76680, 81000}}}, + {{{29049, 29967, 30885}, + {33639, 34557, 35475}, + {38229, 39147, 40065}}, + {{65086, 67138, 69190}, + {75346, 77398, 79450}, + {85606, 87658, 89710}}, + {{101123, 104309, 107495}, + {117053, 120239, 123425}, + {132983, 136169, 139355}}, + {{137160, 141480, 145800}, + {158760, 163080, 167400}, + {180360, 184680, 189000}}}}}); + + std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>( + Array4D<int, 2, 3, 3, 3>{{{{{214731, 246591, 278451}, + {374031, 405891, 437751}, + {533331, 565191, 597051}}, + {{496804, 570568, 644332}, + {865624, 939388, 1013152}, + {1234444, 1308208, 1381972}}, + {{778877, 894545, 1010213}, + {1357217, 1472885, 1588553}, + {1935557, 2051225, 2166893}}}, + {{{1011231, 1043091, 1074951}, + {1170531, 1202391, 1234251}, + {1329831, 1361691, 1393551}}, + {{2340904, 2414668, 2488432}, + {2709724, 2783488, 2857252}, + {3078544, 3152308, 3226072}}, + {{3670577, 3786245, 3901913}, + {4248917, 4364585, 4480253}, + {4827257, 4942925, 5058593}}}}}); Tensor expectedOutput4 = Array2D<int, 2, 5>{ - {{205050376, 198925904, 181355097, 196978090, 238868348}, - {598467376, 561797804, 560823897, 593043790, 698672948}}}; - std::shared_ptr<Tensor> other1 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv1")->getOperator())->getOutput(0); + {{205050376, 198925904, 181355097, 196978090, 238868348}, + {598467376, 561797804, 560823897, 593043790, 698672948}}}; + std::shared_ptr<Tensor> other1 = + std::static_pointer_cast<OperatorTensor>( + g->getNode("conv1")->getOperator()) + ->getOutput(0); bool equal1 = (*other1 == *expectedOutput1); REQUIRE(equal1); - std::shared_ptr<Tensor> other2 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv2")->getOperator())->getOutput(0); + std::shared_ptr<Tensor> other2 = + std::static_pointer_cast<OperatorTensor>( + g->getNode("conv2")->getOperator()) + ->getOutput(0); bool equal2 = (*other2 == *expectedOutput2); REQUIRE(equal2); - std::shared_ptr<Tensor> other3 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv3")->getOperator())->getOutput(0); + std::shared_ptr<Tensor> other3 = + std::static_pointer_cast<OperatorTensor>( + g->getNode("conv3")->getOperator()) + ->getOutput(0); bool equal3 = (*other3 == *expectedOutput3); REQUIRE(equal3); - std::shared_ptr<Tensor> other4 = std::static_pointer_cast<OperatorTensor>(g->getNode("fc")->getOperator())->getOutput(0); + std::shared_ptr<Tensor> other4 = + std::static_pointer_cast<OperatorTensor>( + g->getNode("fc")->getOperator()) + ->getOutput(0); bool equal4 = (*other4 == expectedOutput4); REQUIRE(equal4); } SECTION("Test Parallel graph") { - std::shared_ptr<GraphView> g = - Sequential({Conv(1, 3, {3, 3}, "inputConv"), - Parallel({ - Sequential({ - Parallel({ - Conv(3, 3, {1, 1}, "conv1.1"), - Conv(3, 3, {1, 1}, "conv1.2")}), - Add("add1")}), - Conv(3, 3, {1, 1}, "conv1.3")}), - Add("add2"), - Conv(3, 2, {1, 1}, "conv2"), - FC(18, 5, false, "out")}); + std::shared_ptr<GraphView> g = Sequential( + {Conv(1, 3, {3, 3}, "inputConv"), + Parallel({Sequential({Parallel({Conv(3, 3, {1, 1}, "conv1.1"), + Conv(3, 3, {1, 1}, "conv1.2")}), + Add("add1")}), + Conv(3, 3, {1, 1}, "conv1.3")}), + Add("add2"), + Conv(3, 2, {1, 1}, "conv2"), + FC(18, 5, false, "out")}); g->getNode("inputConv")->getOperator()->setInput(0, inputTensor); g->getNode("inputConv")->getOperator()->setInput(1, weight1); g->getNode("inputConv")->getOperator()->setInput(2, bias1); - std::shared_ptr<Tensor> conv11Weight = std::make_shared<Tensor>(Array4D<int, 3, 3, 1, 1>{ - {{{{1}}, {{2}}, {{3}}}, {{{4}}, {{5}}, {{6}}}, {{{7}}, {{8}}, {{9}}}}}); + std::shared_ptr<Tensor> conv11Weight = std::make_shared<Tensor>( + Array4D<int, 3, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}}, + {{{4}}, {{5}}, {{6}}}, + {{{7}}, {{8}}, {{9}}}}}); g->getNode("conv1.1")->getOperator()->setInput(1, conv11Weight); g->getNode("conv1.1")->getOperator()->setInput(2, bias1); - std::shared_ptr<Tensor> conv12Weight = std::make_shared<Tensor>(Array4D<int, 3, 3, 1, 1>{ - {{{{11}}, {{12}}, {{13}}}, {{{14}}, {{15}}, {{16}}}, {{{17}}, {{18}}, {{19}}}}}); + std::shared_ptr<Tensor> conv12Weight = std::make_shared<Tensor>( + Array4D<int, 3, 3, 1, 1>{{{{{11}}, {{12}}, {{13}}}, + {{{14}}, {{15}}, {{16}}}, + {{{17}}, {{18}}, {{19}}}}}); g->getNode("conv1.2")->getOperator()->setInput(1, conv12Weight); g->getNode("conv1.2")->getOperator()->setInput(2, bias1); - std::shared_ptr<Tensor> conv13Weight = std::make_shared<Tensor>(Array4D<int, 3, 3, 1, 1>{ - {{{{21}}, {{22}}, {{23}}}, {{{24}}, {{25}}, {{26}}}, {{{27}}, {{28}}, {{29}}}}}); + std::shared_ptr<Tensor> conv13Weight = std::make_shared<Tensor>( + Array4D<int, 3, 3, 1, 1>{{{{{21}}, {{22}}, {{23}}}, + {{{24}}, {{25}}, {{26}}}, + {{{27}}, {{28}}, {{29}}}}}); g->getNode("conv1.3")->getOperator()->setInput(1, conv13Weight); g->getNode("conv1.3")->getOperator()->setInput(2, bias1); - std::shared_ptr<Tensor> conv2Weight = std::make_shared<Tensor>( - Array4D<int, 2, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}}, {{{4}}, {{5}}, {{6}}}}}); - std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 2>{{1, 2}}); + std::shared_ptr<Tensor> conv2Weight = + std::make_shared<Tensor>(Array4D<int, 2, 3, 1, 1>{ + {{{{1}}, {{2}}, {{3}}}, {{{4}}, {{5}}, {{6}}}}}); + std::shared_ptr<Tensor> bias2 = + std::make_shared<Tensor>(Array1D<int, 2>{{1, 2}}); g->getNode("conv2")->getOperator()->setInput(1, conv2Weight); g->getNode("conv2")->getOperator()->setInput(2, bias2); - std::shared_ptr<Tensor> fcWeight = std::make_shared<Tensor>( - Array2D<int, 5, 18>{{{1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3}, - {4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1}, - {2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4}, - {5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2}, - {3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5}}}); - std::shared_ptr<Tensor> fcBias = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}}); + std::shared_ptr<Tensor> fcWeight = + std::make_shared<Tensor>(Array2D<int, 5, 18>{ + {{1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3}, + {4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1}, + {2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4}, + {5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2}, + {3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5}}}); + std::shared_ptr<Tensor> fcBias = + std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}}); g->getNode("out")->getOperator()->setInput(1, fcWeight); g->getNode("out")->getOperator()->setInput(2, fcBias); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( - Array2D<int, 2, 5>{{{124324368, 130692907, 133325056, 125044620, 142843879}, - {369195468, 394615207, 382643056, 379441320, 416291779}}}); + std::shared_ptr<Tensor> expectedOutput = + std::make_shared<Tensor>(Array2D<int, 2, 5>{ + {{124324368, 130692907, 133325056, 125044620, 142843879}, + {369195468, 394615207, 382643056, 379441320, 416291779}}}); g->setBackend("cpu"); g->setDataType(Aidge::DataType::Int32); @@ -202,22 +254,21 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") { SequentialScheduler scheduler(g); REQUIRE_NOTHROW(scheduler.forward()); scheduler.saveSchedulingDiagram("schedulingSequential"); - std::shared_ptr<Tensor> result = - std::static_pointer_cast<Tensor>(g->getNode("out")->getOperator()->getRawOutput(0)); + std::shared_ptr<Tensor> result = std::static_pointer_cast<Tensor>( + g->getNode("out")->getOperator()->getRawOutput(0)); bool equal = (*result == *expectedOutput); REQUIRE(equal); } - SECTION("Test Residual graph") { - } + SECTION("Test Residual graph") {} SECTION("Test Recurrent graph (sequential)") { std::shared_ptr<Tensor> in = std::make_shared<Tensor>( - Array2D<int, 2, 3>{{{1, 2, 3}, {4, 5, 6}}}); + Array2D<int, 2, 3>{{{1, 2, 3}, {4, 5, 6}}}); std::shared_ptr<Tensor> initTensor = std::make_shared<Tensor>( - Array2D<int, 2, 3>{{{0, 0, 0}, {1, 1, 1}}}); + Array2D<int, 2, 3>{{{0, 0, 0}, {1, 1, 1}}}); std::shared_ptr<Tensor> biasTensor = std::make_shared<Tensor>( - Array2D<int, 2, 3>{{{2, 0, 0}, {1, 0, 0}}}); + Array2D<int, 2, 3>{{{2, 0, 0}, {1, 0, 0}}}); auto add1 = Add("add1"); auto mem = Memorize(3, "mem1"); @@ -245,23 +296,22 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") { scheduler.saveSchedulingDiagram("schedulingRecurrent_seq"); std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( - Array2D<int, 2, 3>{{{5, 6, 9}, {14, 16, 19}}}); - std::shared_ptr<Tensor> result = - std::static_pointer_cast<Tensor>(g->getNode("add2")->getOperator()->getRawOutput(0)); + Array2D<int, 2, 3>{{{5, 6, 9}, {14, 16, 19}}}); + std::shared_ptr<Tensor> result = std::static_pointer_cast<Tensor>( + g->getNode("add2")->getOperator()->getRawOutput(0)); result->print(); expectedOutput->print(); bool equal = (*result == *expectedOutput); REQUIRE(equal); } - SECTION("Test Recurrent graph (parallel)") { std::shared_ptr<Tensor> in = std::make_shared<Tensor>( - Array2D<int, 2, 3>{{{1, 2, 3}, {4, 5, 6}}}); + Array2D<int, 2, 3>{{{1, 2, 3}, {4, 5, 6}}}); std::shared_ptr<Tensor> initTensor = std::make_shared<Tensor>( - Array2D<int, 2, 3>{{{0, 0, 0}, {1, 1, 1}}}); + Array2D<int, 2, 3>{{{0, 0, 0}, {1, 1, 1}}}); std::shared_ptr<Tensor> biasTensor = std::make_shared<Tensor>( - Array2D<int, 2, 3>{{{2, 0, 0}, {1, 0, 0}}}); + Array2D<int, 2, 3>{{{2, 0, 0}, {1, 0, 0}}}); auto add1 = Add("add1"); auto mem = Memorize(3, "mem1"); @@ -288,9 +338,9 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") { scheduler.saveSchedulingDiagram("schedulingRecurrent_par"); std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( - Array2D<int, 2, 3>{{{5, 6, 9}, {14, 16, 19}}}); - std::shared_ptr<Tensor> result = - std::static_pointer_cast<Tensor>(g->getNode("add2")->getOperator()->getRawOutput(0)); + Array2D<int, 2, 3>{{{5, 6, 9}, {14, 16, 19}}}); + std::shared_ptr<Tensor> result = std::static_pointer_cast<Tensor>( + g->getNode("add2")->getOperator()->getRawOutput(0)); result->print(); expectedOutput->print(); bool equal = (*result == *expectedOutput); @@ -298,54 +348,57 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") { } SECTION("Test ConnectInput graph") { - std::shared_ptr<GraphView> g = - Sequential({ - Conv(1, 3, {3, 3}, "conv1"), - Conv(3, 4, {1, 1}, "conv2"), - Conv(4, 3, {1, 1}, "conv3"), - FC(27, 5, false, "fc")}); + std::shared_ptr<GraphView> g = Sequential({Conv(1, 3, {3, 3}, "conv1"), + Conv(3, 4, {1, 1}, "conv2"), + Conv(4, 3, {1, 1}, "conv3"), + FC(27, 5, false, "fc")}); // g->getNode("conv1")->getOperator()->setInput(0, inputTensor); g->getNode("conv1")->getOperator()->setInput(1, weight1); g->getNode("conv1")->getOperator()->setInput(2, bias1); - std::shared_ptr<Tensor> weight2 = - std::make_shared<Tensor>(Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}}, - {{{4}}, {{5}}, {{6}}}, - {{{7}}, {{8}}, {{9}}}, - {{{10}}, {{11}}, {{12}}}}}); - std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}}); + std::shared_ptr<Tensor> weight2 = std::make_shared<Tensor>( + Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}}, + {{{4}}, {{5}}, {{6}}}, + {{{7}}, {{8}}, {{9}}}, + {{{10}}, {{11}}, {{12}}}}}); + std::shared_ptr<Tensor> bias2 = + std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}}); g->getNode("conv2")->getOperator()->setInput(1, weight2); g->getNode("conv2")->getOperator()->setInput(2, bias2); // *(g->getNode("conv2")->getOperator()->input(1, weight2); std::shared_ptr<Tensor> weight3 = std::make_shared<Tensor>( - Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}}, - {{{5}}, {{6}}, {{7}}, {{8}}}, - {{{9}}, {{10}}, {{11}}, {{12}}}}}); - std::shared_ptr<Tensor> bias3 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}}); + Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}}, + {{{5}}, {{6}}, {{7}}, {{8}}}, + {{{9}}, {{10}}, {{11}}, {{12}}}}}); + std::shared_ptr<Tensor> bias3 = + std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}}); g->getNode("conv3")->getOperator()->setInput(1, weight3); g->getNode("conv3")->getOperator()->setInput(2, bias3); - std::shared_ptr<Tensor> weightfc = std::make_shared<Tensor>( - Array2D<int, 5, 27>{{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, - {13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9}, - {10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6}, - {7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3}, - {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, - 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}}); - std::shared_ptr<Tensor> biasfc = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}}); + std::shared_ptr<Tensor> weightfc = + std::make_shared<Tensor>(Array2D<int, 5, 27>{ + {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6}, + {7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3}, + {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}}); + std::shared_ptr<Tensor> biasfc = + std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}}); g->getNode("fc")->getOperator()->setInput(1, weightfc); g->getNode("fc")->getOperator()->setInput(2, biasfc); // input->addChild(g); g->setDataType(Aidge::DataType::Int32); g->setBackend("cpu"); - std::vector<std::vector<Aidge::DimSize_t>> dims = {inputTensor->dims()}; + std::vector<std::vector<Aidge::DimSize_t>> dims = { + inputTensor->dims()}; g->forwardDims(dims); SequentialScheduler scheduler(g); @@ -354,87 +407,132 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") { scheduler.saveSchedulingDiagram("schedulingSequential"); - std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{ - {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}}, - {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}}, - {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}}, - {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}}, - {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}}, - {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}}); - - std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(Array4D<int, 2, 4, 3, 3>{ - {{{{6099, 7017, 7935}, {10689, 11607, 12525}, {15279, 16197, 17115}}, - {{13786, 15838, 17890}, {24046, 26098, 28150}, {34306, 36358, 38410}}, - {{21473, 24659, 27845}, {37403, 40589, 43775}, {53333, 56519, 59705}}, - {{29160, 33480, 37800}, {50760, 55080, 59400}, {72360, 76680, 81000}}}, - {{{29049, 29967, 30885}, {33639, 34557, 35475}, {38229, 39147, 40065}}, - {{65086, 67138, 69190}, {75346, 77398, 79450}, {85606, 87658, 89710}}, - {{101123, 104309, 107495}, {117053, 120239, 123425}, {132983, 136169, 139355}}, - {{137160, 141480, 145800}, {158760, 163080, 167400}, {180360, 184680, 189000}}}}}); - - std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{ - {{{{214731, 246591, 278451}, {374031, 405891, 437751}, {533331, 565191, 597051}}, - {{496804, 570568, 644332}, {865624, 939388, 1013152}, {1234444, 1308208, 1381972}}, - {{778877, 894545, 1010213}, {1357217, 1472885, 1588553}, {1935557, 2051225, 2166893}}}, - {{{1011231, 1043091, 1074951}, {1170531, 1202391, 1234251}, {1329831, 1361691, 1393551}}, - {{2340904, 2414668, 2488432}, {2709724, 2783488, 2857252}, {3078544, 3152308, 3226072}}, - {{3670577, 3786245, 3901913}, {4248917, 4364585, 4480253}, {4827257, 4942925, 5058593}}}}}); + std::shared_ptr<Tensor> expectedOutput1 = std::make_shared< + Tensor>(Array4D<int, 2, 3, 3, 3>{ + {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}}, + {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}}, + {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}}, + {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}}, + {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}}, + {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}}); + + std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>( + Array4D<int, 2, 4, 3, 3>{{{{{6099, 7017, 7935}, + {10689, 11607, 12525}, + {15279, 16197, 17115}}, + {{13786, 15838, 17890}, + {24046, 26098, 28150}, + {34306, 36358, 38410}}, + {{21473, 24659, 27845}, + {37403, 40589, 43775}, + {53333, 56519, 59705}}, + {{29160, 33480, 37800}, + {50760, 55080, 59400}, + {72360, 76680, 81000}}}, + {{{29049, 29967, 30885}, + {33639, 34557, 35475}, + {38229, 39147, 40065}}, + {{65086, 67138, 69190}, + {75346, 77398, 79450}, + {85606, 87658, 89710}}, + {{101123, 104309, 107495}, + {117053, 120239, 123425}, + {132983, 136169, 139355}}, + {{137160, 141480, 145800}, + {158760, 163080, 167400}, + {180360, 184680, 189000}}}}}); + + std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>( + Array4D<int, 2, 3, 3, 3>{{{{{214731, 246591, 278451}, + {374031, 405891, 437751}, + {533331, 565191, 597051}}, + {{496804, 570568, 644332}, + {865624, 939388, 1013152}, + {1234444, 1308208, 1381972}}, + {{778877, 894545, 1010213}, + {1357217, 1472885, 1588553}, + {1935557, 2051225, 2166893}}}, + {{{1011231, 1043091, 1074951}, + {1170531, 1202391, 1234251}, + {1329831, 1361691, 1393551}}, + {{2340904, 2414668, 2488432}, + {2709724, 2783488, 2857252}, + {3078544, 3152308, 3226072}}, + {{3670577, 3786245, 3901913}, + {4248917, 4364585, 4480253}, + {4827257, 4942925, 5058593}}}}}); Tensor expectedOutput4 = Array2D<int, 2, 5>{ - {{205050376, 198925904, 181355097, 196978090, 238868348}, - {598467376, 561797804, 560823897, 593043790, 698672948}}}; - std::shared_ptr<Tensor> other1 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv1")->getOperator())->getOutput(0); + {{205050376, 198925904, 181355097, 196978090, 238868348}, + {598467376, 561797804, 560823897, 593043790, 698672948}}}; + std::shared_ptr<Tensor> other1 = + std::static_pointer_cast<OperatorTensor>( + g->getNode("conv1")->getOperator()) + ->getOutput(0); bool equal1 = (*other1 == *expectedOutput1); REQUIRE(equal1); - std::shared_ptr<Tensor> other2 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv2")->getOperator())->getOutput(0); + std::shared_ptr<Tensor> other2 = + std::static_pointer_cast<OperatorTensor>( + g->getNode("conv2")->getOperator()) + ->getOutput(0); bool equal2 = (*other2 == *expectedOutput2); REQUIRE(equal2); - std::shared_ptr<Tensor> other3 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv3")->getOperator())->getOutput(0); + std::shared_ptr<Tensor> other3 = + std::static_pointer_cast<OperatorTensor>( + g->getNode("conv3")->getOperator()) + ->getOutput(0); bool equal3 = (*other3 == *expectedOutput3); REQUIRE(equal3); - std::shared_ptr<Tensor> other4 = std::static_pointer_cast<OperatorTensor>(g->getNode("fc")->getOperator())->getOutput(0); + std::shared_ptr<Tensor> other4 = + std::static_pointer_cast<OperatorTensor>( + g->getNode("fc")->getOperator()) + ->getOutput(0); bool equal4 = (*other4 == expectedOutput4); REQUIRE(equal4); } } -TEST_CASE("[cpu/scheduler] SequentialScheduler(backward)", "[scheduler][backward]") { +TEST_CASE("[cpu/scheduler] SequentialScheduler(backward)", + "[scheduler][backward]") { // create GraphView - std::shared_ptr<GraphView> gv = Sequential({ReLU("relu0"), Sqrt("srqt0"), ReLU("relu1")}); - - std::shared_ptr<Tensor> inputTensor = - std::make_shared<Tensor>(Array4D<float, 2, 1, 5, 5>{{{{{0.0f, 1.0f, 2.0f, 3.0f, 4.0f}, - {5.0f, 6.0f, 7.0f, 8.0f, 9.0f}, - {10.0f, 11.0f, 12.0f, 13.0f, 14.0f}, - {15.0f, 16.0f, 17.0f, 18.0f, 19.0f}, - {20.0f, 21.0f, 22.0f, 23.0f, 24.0f}}}, - {{{25.0f, 26.0f, 27.0f, 28.0f, 29.0f}, - {30.0f, 31.0f, 32.0f, 33.0f, 34.0f}, - {35.0f, 36.0f, 37.0f, 38.0f, 39.0f}, - {40.0f, 41.0f, 42.0f, 43.0f, 44.0f}, - {45.0f, 46.0f, 47.0f, 48.0f, 49.0f}}}}}); + std::shared_ptr<GraphView> gv = + Sequential({ReLU("relu0"), Sqrt("srqt0"), ReLU("relu1")}); + + std::shared_ptr<Tensor> inputTensor = std::make_shared<Tensor>( + Array4D<float, 2, 1, 5, 5>{{{{{0.0f, 1.0f, 2.0f, 3.0f, 4.0f}, + {5.0f, 6.0f, 7.0f, 8.0f, 9.0f}, + {10.0f, 11.0f, 12.0f, 13.0f, 14.0f}, + {15.0f, 16.0f, 17.0f, 18.0f, 19.0f}, + {20.0f, 21.0f, 22.0f, 23.0f, 24.0f}}}, + {{{25.0f, 26.0f, 27.0f, 28.0f, 29.0f}, + {30.0f, 31.0f, 32.0f, 33.0f, 34.0f}, + {35.0f, 36.0f, 37.0f, 38.0f, 39.0f}, + {40.0f, 41.0f, 42.0f, 43.0f, 44.0f}, + {45.0f, 46.0f, 47.0f, 48.0f, 49.0f}}}}}); auto label = inputTensor; // implem already set to default auto myProd = Producer(inputTensor, "prod"); - myProd -> addChild(gv); - gv -> compile("cpu", DataType::Float32); + myProd->addChild(gv); + gv->compile("cpu", DataType::Float32); SequentialScheduler scheduler(gv); scheduler.forward(); auto outNode = gv->getOrderedOutputs()[0].first; - std::shared_ptr<Tensor> predictedOutput = std::dynamic_pointer_cast<OperatorTensor>(outNode->getOperator())->getOutput(0); - std::shared_ptr<Tensor> targetOutput = - std::make_shared<Tensor>(Array4D<float, 2, 1, 5, 5>{{{{{0.0f, 1.0f, 1.0f, 2.0f, 2.0f}, - {2.0f, 2.0f, 3.0f, 3.0f, 3.0f}, - {3.0f, 3.0f, 3.0f, 4.0f, 4.0f}, - {4.0f, 4.0f, 4.0f, 4.0f, 4.0f}, - {4.0f, 5.0f, 5.0f, 5.0f, 5.0f}}}, - {{{5.0f, 5.0f, 5.0f, 5.0f, 5.0f}, - {5.0f, 6.0f, 6.0f, 6.0f, 6.0f}, - {6.0f, 6.0f, 6.0f, 6.0f, 6.0f}, - {6.0f, 6.0f, 6.0f, 7.0f, 7.0f}, - {7.0f, 7.0f, 7.0f, 7.0f, 7.0f}}}}}); + std::shared_ptr<Tensor> predictedOutput = + std::dynamic_pointer_cast<OperatorTensor>(outNode->getOperator()) + ->getOutput(0); + std::shared_ptr<Tensor> targetOutput = std::make_shared<Tensor>( + Array4D<float, 2, 1, 5, 5>{{{{{0.0f, 1.0f, 1.0f, 2.0f, 2.0f}, + {2.0f, 2.0f, 3.0f, 3.0f, 3.0f}, + {3.0f, 3.0f, 3.0f, 4.0f, 4.0f}, + {4.0f, 4.0f, 4.0f, 4.0f, 4.0f}, + {4.0f, 5.0f, 5.0f, 5.0f, 5.0f}}}, + {{{5.0f, 5.0f, 5.0f, 5.0f, 5.0f}, + {5.0f, 6.0f, 6.0f, 6.0f, 6.0f}, + {6.0f, 6.0f, 6.0f, 6.0f, 6.0f}, + {6.0f, 6.0f, 6.0f, 7.0f, 7.0f}, + {7.0f, 7.0f, 7.0f, 7.0f, 7.0f}}}}}); predictedOutput->setGrad(targetOutput); REQUIRE_NOTHROW(scheduler.backward()); }