diff --git a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp index ed838a94cc0c0238a870427c3b774b29f7818b09..d5e5561d02aacd8532f74d2bfd4ee2fb5a5b5dc3 100644 --- a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp @@ -25,6 +25,40 @@ namespace Aidge { + +template <typename T> +typename std::enable_if<std::is_floating_point<T>::value, T>::type +stableMean(const T* vec, size_t size) { + T mean = 0; + for (size_t i = 0; i < size; ++i) { + mean = std::fma<T>(vec[i] - mean, 1.0f / (i + 1), mean); + } + return mean; +} + +// Specialization for integers: perform the mean computation in float +template <typename T> +typename std::enable_if<!std::is_floating_point<T>::value, T>::type +stableMean(const T* vec, size_t size) { + double mean = 0; + for (size_t i = 0; i < size; ++i) { + mean = std::fma<double>(vec[i] - mean, 1.0f / (i + 1), mean); + } + return mean; +} + +template <typename T> +typename std::enable_if<std::is_floating_point<T>::value, T>::type +castFromFloat(T value) { + return value; +} + +template <typename T> +typename std::enable_if<!std::is_floating_point<T>::value, T>::type +castFromFloat(double value) { + return static_cast<T>(std::nearbyint(value)); +} + template <class I, class O> void GlobalAveragePoolingImpl_cpu_forward_kernel( const std::vector<DimSize_t> &dims, const void *input_, void *output_) { @@ -49,12 +83,7 @@ void GlobalAveragePoolingImpl_cpu_forward_kernel( for (DimSize_t channel = 0; channel < dims[1]; ++channel) { const I *filter_start = std::next( input, (batch * in_batch_nb_elems) + (channel * in_channel_nb_elems)); - I mean = 0; - for (size_t i = 0; i < in_channel_nb_elems; ++i) { - // Single pass numerically stable mean, using the fmaf - mean = fmaf(filter_start[i] - mean, 1.0f/(i+1), mean); - } - output[batch * out_batch_nb_elems + channel] = mean; + output[batch * out_batch_nb_elems + channel] = castFromFloat<O>(stableMean<I>(filter_start, in_channel_nb_elems)); } } } diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp index 5a143164d7e4fa2585ea72c38eaaa123f215d21a..864b89c4fa4667b70e43ed7436382e30bc150745 100644 --- a/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp @@ -25,6 +25,40 @@ #include "aidge/utils/Registrar.hpp" namespace Aidge { + +template <typename T> +typename std::enable_if<std::is_floating_point<T>::value, T>::type +stableMean(const T* vec, size_t len, size_t stride) { + T mean = 0; + for (size_t i = 0; i < len; ++i) { + mean = std::fma<T>(vec[i * stride] - mean, 1.0f / (i + 1), mean); + } + return mean; +} + +// Specialization for integers: perform the mean computation in float +template <typename T> +typename std::enable_if<!std::is_floating_point<T>::value, T>::type +stableMean(const T* vec, size_t len, size_t stride) { + double mean = 0; + for (size_t i = 0; i < len; ++i) { + mean = std::fma<double>(vec[i * stride] - mean, 1.0f / (i + 1), mean); + } + return mean; +} + +template <typename T> +typename std::enable_if<std::is_floating_point<T>::value, T>::type +castFromFloat(T value) { + return value; +} + +template <typename T> +typename std::enable_if<!std::is_floating_point<T>::value, T>::type +castFromFloat(double value) { + return static_cast<T>(std::nearbyint(value)); +} + template <class I, class O> void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes, DimSize_t /*keepDims*/, @@ -50,12 +84,7 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes, for (std::size_t post = 0; post < stride_post; ++post) { const std::size_t idx_i = pre * dim_i * stride_post + post; const std::size_t idx_o = pre * stride_post + post; - O mean = 0; - for (std::size_t i = 0; i < dim_i; ++i) { - // Single pass numerically stable mean, using the fmaf - mean = fmaf(input[idx_i + i*stride_post] - mean, 1.0f/(i+1), mean); - } - output[idx_o] = mean; + output[idx_o] = castFromFloat<O>(stableMean(input + idx_i, dim_i, stride_post)); } } } else { @@ -72,8 +101,9 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes, stride_pre[i] = stride_pre[i-1]*inputDims[i-1]; } - const I* inputAccumulation = input; - I* outputAccumulation = nullptr; + // Type should be the return type of stableMean<I>(), which is always floating point + const decltype(stableMean<I>(input, 0, 0))* inputAccumulation = nullptr; + decltype(stableMean<I>(input, 0, 0))* outputAccumulation = nullptr; for (const auto& axisInt : axes) { const std::size_t a = static_cast<std::size_t>(axisInt); @@ -84,23 +114,23 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes, for (std::size_t post = 0; post < stride_post[a]; ++post) { const std::size_t idx_i = pre * dim_i * stride_post[a] + post; const std::size_t idx_o = pre * stride_post[a] + post; - I mean = 0; - for (std::size_t i = 0; i < dim_i; ++i) { - // Single pass numerically stable mean, using the fmaf - mean = fmaf(inputAccumulation[idx_i + i*stride_post[a]] - mean, 1.0f/(i+1), mean); + if (inputAccumulation == nullptr) { + outputAccumulation[idx_o] = stableMean<I>(input + idx_i, dim_i, stride_post[a]); + } + else { + outputAccumulation[idx_o] = stableMean<I>(inputAccumulation + idx_i, dim_i, stride_post[a]); } - outputAccumulation[idx_o] = mean; } } std::for_each(stride_pre.get()+a+1, stride_pre.get()+nb_dims, [dim_i] (std::size_t& val) { val /= dim_i; }); - if (inputAccumulation != input) { + if (inputAccumulation != nullptr) { delete[] inputAccumulation; } inputAccumulation = outputAccumulation; } - // Copy elements from inputAccumulation to output while dividing by divisor - std::copy(inputAccumulation, inputAccumulation + outputElements, output); + std::transform(inputAccumulation, inputAccumulation + outputElements, output, + [](auto value) { return castFromFloat<O>(value); }); if (outputAccumulation) { delete[] outputAccumulation; } diff --git a/unit_tests/operator/Test_MetaOperator.cpp b/unit_tests/operator/Test_MetaOperator.cpp index 271a1e2f9860d92f840916f6b2e396993b0bea39..23bacda590dfed82eca623016787388e56ceed79 100644 --- a/unit_tests/operator/Test_MetaOperator.cpp +++ b/unit_tests/operator/Test_MetaOperator.cpp @@ -9,70 +9,79 @@ * ********************************************************************************/ -#include <catch2/catch_test_macros.hpp> #include <cmath> #include <cstdlib> #include <memory> +#include <random> + +#include <catch2/catch_test_macros.hpp> -#include "aidge/utils/TensorUtils.hpp" #include "aidge/backend/cpu/operator/ConvImpl.hpp" #include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/data/Tensor.hpp" +#include "aidge/filler/Filler.hpp" #include "aidge/operator/Conv.hpp" +#include "aidge/operator/FC.hpp" +#include "aidge/operator/Identity.hpp" #include "aidge/operator/MetaOperator.hpp" #include "aidge/operator/MetaOperatorDefs.hpp" #include "aidge/operator/Pad.hpp" #include "aidge/operator/Pop.hpp" -#include "aidge/scheduler/SequentialScheduler.hpp" +#include "aidge/operator/Stack.hpp" #include "aidge/scheduler/ParallelScheduler.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" +#include "aidge/utils/TensorUtils.hpp" using namespace Aidge; TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { - SECTION("PaddedConv(forward)") { - std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>( - Array4D<double, 4, 3, 3, 3>{{{{{6.20986394e-01, 1.19775136e-03, 7.22876095e-02}, - {1.16492919e-01, 8.21634093e-02, 1.17413265e-01}, - {2.23743494e-01, 3.99495413e-01, 5.55552411e-01}}, - {{6.64970077e-01, 9.62199940e-01, 4.87531967e-01}, - {6.12586558e-01, 8.09918671e-02, 8.40649383e-01}, - {4.15264406e-01, 8.28247138e-01, 1.52301135e-01}}, - {{1.76992844e-02, 7.78697112e-01, 8.14531592e-01}, - {1.36960611e-01, 4.64806728e-01, 4.85150000e-01}, - {4.34776520e-01, 9.51740977e-01, 9.05793799e-01}}}, - - {{{1.71925246e-02, 1.91082720e-01, 3.67982644e-01}, - {1.56806559e-01, 6.22280998e-01, 3.15827594e-01}, - {6.04359038e-01, 2.83095947e-01, 6.11168892e-01}}, - {{2.76942832e-01, 1.89768419e-01, 8.07988176e-01}, - {1.67925807e-01, 2.68356150e-01, 6.28875602e-01}, - {1.69093357e-04, 9.64788636e-01, 7.29254981e-01}}, - {{6.34030122e-01, 1.32087038e-01, 3.33857107e-01}, - {7.63047502e-01, 5.12539506e-02, 9.77400493e-01}, - {8.06151288e-01, 2.60237147e-01, 3.93729313e-01}}}, - - {{{5.84605240e-01, 4.74648725e-01, 8.54111741e-01}, - {7.10897067e-02, 5.02579011e-01, 3.35236224e-01}, - {9.08637408e-01, 8.02903830e-01, 2.83929907e-01}}, - {{3.68206999e-01, 9.18579021e-02, 7.33168098e-01}, - {1.59875539e-01, 9.13163381e-01, 3.59806060e-01}, - {1.41295882e-01, 7.00312185e-01, 5.63728289e-01}}, - {{9.39513546e-01, 1.91704891e-01, 1.11454944e-01}, - {5.46298282e-01, 2.89698587e-01, 2.62612651e-01}, - {1.18554992e-01, 4.32147376e-02, 7.53016994e-01}}}, - - {{{9.53179175e-01, 2.05041054e-02, 1.11318451e-01}, - {8.67878485e-01, 2.93263422e-01, 8.03912714e-01}, - {8.93620255e-01, 1.37831128e-01, 3.83640583e-01}}, - {{3.96020188e-01, 6.24959320e-01, 1.90709175e-01}, - {5.80538620e-01, 6.63031275e-01, 2.07247191e-01}, - {5.65672171e-01, 5.57014317e-01, 9.26909496e-01}}, - {{3.43901418e-01, 4.47741636e-01, 6.59249367e-01}, - {7.34639028e-01, 2.84957200e-02, 9.70225217e-01}, - {1.33578790e-02, 6.12054702e-01, 9.36685235e-02}}}}}); - std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>( - Array1D<double, 4>{{0.16884905, 0.27994487, 0.57227465, 0.06435205}}); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<double, 2, 3, 5, 5>{ + SECTION("PaddedConv(forward)") { + std::shared_ptr<Tensor> myWeights = + std::make_shared<Tensor>(Array4D<double, 4, 3, 3, 3>{ + {{{{6.20986394e-01, 1.19775136e-03, 7.22876095e-02}, + {1.16492919e-01, 8.21634093e-02, 1.17413265e-01}, + {2.23743494e-01, 3.99495413e-01, 5.55552411e-01}}, + {{6.64970077e-01, 9.62199940e-01, 4.87531967e-01}, + {6.12586558e-01, 8.09918671e-02, 8.40649383e-01}, + {4.15264406e-01, 8.28247138e-01, 1.52301135e-01}}, + {{1.76992844e-02, 7.78697112e-01, 8.14531592e-01}, + {1.36960611e-01, 4.64806728e-01, 4.85150000e-01}, + {4.34776520e-01, 9.51740977e-01, 9.05793799e-01}}}, + + {{{1.71925246e-02, 1.91082720e-01, 3.67982644e-01}, + {1.56806559e-01, 6.22280998e-01, 3.15827594e-01}, + {6.04359038e-01, 2.83095947e-01, 6.11168892e-01}}, + {{2.76942832e-01, 1.89768419e-01, 8.07988176e-01}, + {1.67925807e-01, 2.68356150e-01, 6.28875602e-01}, + {1.69093357e-04, 9.64788636e-01, 7.29254981e-01}}, + {{6.34030122e-01, 1.32087038e-01, 3.33857107e-01}, + {7.63047502e-01, 5.12539506e-02, 9.77400493e-01}, + {8.06151288e-01, 2.60237147e-01, 3.93729313e-01}}}, + + {{{5.84605240e-01, 4.74648725e-01, 8.54111741e-01}, + {7.10897067e-02, 5.02579011e-01, 3.35236224e-01}, + {9.08637408e-01, 8.02903830e-01, 2.83929907e-01}}, + {{3.68206999e-01, 9.18579021e-02, 7.33168098e-01}, + {1.59875539e-01, 9.13163381e-01, 3.59806060e-01}, + {1.41295882e-01, 7.00312185e-01, 5.63728289e-01}}, + {{9.39513546e-01, 1.91704891e-01, 1.11454944e-01}, + {5.46298282e-01, 2.89698587e-01, 2.62612651e-01}, + {1.18554992e-01, 4.32147376e-02, 7.53016994e-01}}}, + + {{{9.53179175e-01, 2.05041054e-02, 1.11318451e-01}, + {8.67878485e-01, 2.93263422e-01, 8.03912714e-01}, + {8.93620255e-01, 1.37831128e-01, 3.83640583e-01}}, + {{3.96020188e-01, 6.24959320e-01, 1.90709175e-01}, + {5.80538620e-01, 6.63031275e-01, 2.07247191e-01}, + {5.65672171e-01, 5.57014317e-01, 9.26909496e-01}}, + {{3.43901418e-01, 4.47741636e-01, 6.59249367e-01}, + {7.34639028e-01, 2.84957200e-02, 9.70225217e-01}, + {1.33578790e-02, 6.12054702e-01, 9.36685235e-02}}}}}); + std::shared_ptr<Tensor> myBias = + std::make_shared<Tensor>(Array1D<double, 4>{ + {0.16884905, 0.27994487, 0.57227465, 0.06435205}}); + std::shared_ptr<Tensor> myInput = std::make_shared< + Tensor>(Array4D<double, 2, 3, 5, 5>{ // NCHW {{{{0.43224481, 0.9047832, 0.18402257, 0.06162838, 0.52490127}, {0.27773404, 0.55402353, 0.9485062, 0.31197083, 0.80328607}, @@ -108,93 +117,107 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { {0.95873236, 0.6742374, 0.55679676, 0.6323497, 0.34072958}, {0.49694061, 0.79173045, 0.19738225, 0.14755281, 0.80818177}, {0.02332061, 0.74270703, 0.59415632, 0.08195934, 0.46295434}, - {0.71426058, 0.85032931, 0.90750818, 0.28768431, 0.4401146}}}}}); - - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>( - Array4D<double, 2, 4, 5, 5>{{{{{3.40294218, 3.74021220, 4.02050114, 4.07054710, 2.46286273}, - {4.61770582, 6.70517588, 6.50356627, 6.29688787, 3.53332567}, - {5.47480106, 5.92094421, 6.64605665, 7.95090199, 4.28721523}, - {4.01485729, 6.06748962, 7.52447891, 7.37980652, 5.28401136}, - {2.83065438, 3.62033439, 3.56222963, 5.56103945, 3.23335814}}, - - {{3.30230498, 4.92814112, 4.34710836, 3.96262765, 2.97987890}, - {4.49693012, 6.68929291, 5.53603029, 5.68874264, 4.28756475}, - {4.20528078, 6.82776880, 6.70569849, 7.12809610, 4.40845442}, - {4.31169367, 6.73352146, 6.30962515, 7.45826864, 4.99164438}, - {2.18136287, 4.28968000, 4.20080042, 4.89814138, 2.87394023}}, - - {{3.54787683, 4.35851812, 4.63881302, 4.23359537, 3.16992092}, - {5.25099468, 7.54282856, 6.69849157, 5.64309788, 4.56919575}, - {4.71914101, 7.52830601, 6.71450949, 7.81113863, 5.84658146}, - {4.97893143, 7.39293909, 6.89905310, 8.14430809, 5.62998581}, - {2.79735112, 4.80967140, 5.57630205, 5.38828325, 4.57078695}}, - - {{3.03048635, 5.04540300, 4.21824932, 4.87323284, 2.35113740}, - {4.45167351, 6.47721338, 7.40922976, 6.70445728, 3.60700107}, - {3.77927423, 6.82826376, 7.41777134, 7.57402420, 5.13131523}, - {4.08747244, 7.07994175, 7.57206821, 8.51897335, 5.26987123}, - {2.34426999, 4.60127831, 4.86486769, 6.01579571, 3.97803569}}}, - - - {{{3.84700942, 4.25972605, 3.05269003, 3.78043652, 2.08771229}, - {6.00459957, 6.05633259, 4.45951605, 4.54089880, 4.03066444}, - {5.41579390, 7.29543972, 6.18680000, 5.58812714, 3.45964241}, - {6.04531050, 7.70924091, 5.52207708, 5.02131319, 4.09403706}, - {3.18092418, 4.45422697, 4.04294252, 3.86577177, 2.18776536}}, - - {{4.02600670, 4.27603531, 3.81011319, 4.03631020, 2.57254648}, - {5.33471155, 5.72588634, 5.12079763, 5.11733150, 3.76836705}, - {5.62947607, 5.92492962, 6.24170446, 6.44130468, 3.44276404}, - {5.38414621, 6.02679539, 5.88985586, 5.90263271, 3.15044069}, - {3.31261086, 4.44371319, 3.47660780, 4.15411520, 1.48961508}}, - - {{3.95879412, 4.17324543, 3.70114422, 3.27447152, 3.09713888}, - {5.78258181, 6.57920837, 4.99913597, 6.20961237, 4.98552179}, - {5.84685421, 7.19971228, 6.66386652, 6.68013430, 4.90963316}, - {5.24417877, 7.06430531, 6.58512402, 6.02492285, 4.48986387}, - {3.64294529, 5.00678444, 5.04760027, 4.72895622, 2.67990756}}, - - {{3.48610687, 4.12853813, 4.07563591, 3.51327014, 2.44217038}, - {4.80529881, 7.33211374, 5.14774036, 4.77281189, 4.44612408}, - {5.11703110, 7.55168772, 7.14374542, 6.43696356, 4.10621357}, - {5.41270018, 6.85949135, 6.73503923, 5.74601364, 4.46150303}, - {3.16612267, 4.38248920, 5.23248482, 4.21292210, 2.86031270}}}}}); - - std::shared_ptr<Node> myConv = Conv<2>(3, 4, {3, 3}, "myconv"); - auto convOp = std::static_pointer_cast<OperatorTensor>(myConv->getOperator()); - - std::shared_ptr<Node> myPad = + {0.71426058, + 0.85032931, + 0.90750818, + 0.28768431, + 0.4401146}}}}}); + + std::shared_ptr<Tensor> myOutput = std::make_shared< + Tensor>(Array4D<double, 2, 4, 5, 5>{ + {{{{3.40294218, 3.74021220, 4.02050114, 4.07054710, 2.46286273}, + {4.61770582, 6.70517588, 6.50356627, 6.29688787, 3.53332567}, + {5.47480106, 5.92094421, 6.64605665, 7.95090199, 4.28721523}, + {4.01485729, 6.06748962, 7.52447891, 7.37980652, 5.28401136}, + {2.83065438, 3.62033439, 3.56222963, 5.56103945, 3.23335814}}, + + {{3.30230498, 4.92814112, 4.34710836, 3.96262765, 2.97987890}, + {4.49693012, 6.68929291, 5.53603029, 5.68874264, 4.28756475}, + {4.20528078, 6.82776880, 6.70569849, 7.12809610, 4.40845442}, + {4.31169367, 6.73352146, 6.30962515, 7.45826864, 4.99164438}, + {2.18136287, 4.28968000, 4.20080042, 4.89814138, 2.87394023}}, + + {{3.54787683, 4.35851812, 4.63881302, 4.23359537, 3.16992092}, + {5.25099468, 7.54282856, 6.69849157, 5.64309788, 4.56919575}, + {4.71914101, 7.52830601, 6.71450949, 7.81113863, 5.84658146}, + {4.97893143, 7.39293909, 6.89905310, 8.14430809, 5.62998581}, + {2.79735112, 4.80967140, 5.57630205, 5.38828325, 4.57078695}}, + + {{3.03048635, 5.04540300, 4.21824932, 4.87323284, 2.35113740}, + {4.45167351, 6.47721338, 7.40922976, 6.70445728, 3.60700107}, + {3.77927423, 6.82826376, 7.41777134, 7.57402420, 5.13131523}, + {4.08747244, 7.07994175, 7.57206821, 8.51897335, 5.26987123}, + {2.34426999, 4.60127831, 4.86486769, 6.01579571, 3.97803569}}}, + + {{{3.84700942, 4.25972605, 3.05269003, 3.78043652, 2.08771229}, + {6.00459957, 6.05633259, 4.45951605, 4.54089880, 4.03066444}, + {5.41579390, 7.29543972, 6.18680000, 5.58812714, 3.45964241}, + {6.04531050, 7.70924091, 5.52207708, 5.02131319, 4.09403706}, + {3.18092418, 4.45422697, 4.04294252, 3.86577177, 2.18776536}}, + + {{4.02600670, 4.27603531, 3.81011319, 4.03631020, 2.57254648}, + {5.33471155, 5.72588634, 5.12079763, 5.11733150, 3.76836705}, + {5.62947607, 5.92492962, 6.24170446, 6.44130468, 3.44276404}, + {5.38414621, 6.02679539, 5.88985586, 5.90263271, 3.15044069}, + {3.31261086, 4.44371319, 3.47660780, 4.15411520, 1.48961508}}, + + {{3.95879412, 4.17324543, 3.70114422, 3.27447152, 3.09713888}, + {5.78258181, 6.57920837, 4.99913597, 6.20961237, 4.98552179}, + {5.84685421, 7.19971228, 6.66386652, 6.68013430, 4.90963316}, + {5.24417877, 7.06430531, 6.58512402, 6.02492285, 4.48986387}, + {3.64294529, 5.00678444, 5.04760027, 4.72895622, 2.67990756}}, + + {{3.48610687, 4.12853813, 4.07563591, 3.51327014, 2.44217038}, + {4.80529881, 7.33211374, 5.14774036, 4.77281189, 4.44612408}, + {5.11703110, 7.55168772, 7.14374542, 6.43696356, 4.10621357}, + {5.41270018, 6.85949135, 6.73503923, 5.74601364, 4.46150303}, + {3.16612267, + 4.38248920, + 5.23248482, + 4.21292210, + 2.86031270}}}}}); + + std::shared_ptr<Node> myConv = Conv<2>(3, 4, {3, 3}, "myconv"); + auto convOp = + std::static_pointer_cast<OperatorTensor>(myConv->getOperator()); + + std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "myPad", PadBorderType::Constant, 0.0); - auto padOp = std::static_pointer_cast<OperatorTensor>(myPad->getOperator()); - - convOp->setInput(1, myWeights); - convOp->setInput(2, myBias); - - myPad->addChild(myConv, 0, 0); - padOp->setInput(0, myInput); - - padOp->setDataType(DataType::Float64); - padOp->setBackend("cpu"); - convOp->setDataType(DataType::Float64); - convOp->setBackend("cpu"); - - myPad->forward(); - myConv->forward(); - convOp -> getOutput(0) -> print(); - - double* computedOutput = static_cast<double*>(convOp->getOutput(0)->getImpl()->rawPtr()); - double* expectedOutput = static_cast<double*>(myOutput->getImpl()->rawPtr()); - for (std::size_t i = 0; i < myOutput->size(); ++i) { - REQUIRE(std::abs(computedOutput[i] - expectedOutput[i]) < 1e-5); - } + auto padOp = + std::static_pointer_cast<OperatorTensor>(myPad->getOperator()); + + convOp->setInput(1, myWeights); + convOp->setInput(2, myBias); + + myPad->addChild(myConv, 0, 0); + padOp->setInput(0, myInput); + + padOp->setDataType(DataType::Float64); + padOp->setBackend("cpu"); + convOp->setDataType(DataType::Float64); + convOp->setBackend("cpu"); + + myPad->forward(); + myConv->forward(); + convOp->getOutput(0)->print(); + + double *computedOutput = + static_cast<double *>(convOp->getOutput(0)->getImpl()->rawPtr()); + double *expectedOutput = + static_cast<double *>(myOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i < myOutput->size(); ++i) { + REQUIRE(std::abs(computedOutput[i] - expectedOutput[i]) < 1e-5); + } - std::shared_ptr<Node> myPaddedConv = + std::shared_ptr<Node> myPaddedConv = PaddedConv(3, 4, {3, 3}, "myPaddedConv", {1, 1}, {1, 1, 1, 1}); - } + } SECTION("LSTM(forward)") { + auto pop = Pop(); auto myLSTM = LSTM(32, 64, 0, true, "ltsm"); - auto op = std::dynamic_pointer_cast<MetaOperator_Op>(myLSTM->getOperator()); + auto op = + std::dynamic_pointer_cast<MetaOperator_Op>(myLSTM->getOperator()); auto microGraph = op->getMicroGraph(); microGraph->save("lstm", false, true); @@ -209,14 +232,14 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { } REQUIRE(myLSTM->nbOutputs() == 2); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( - Array2D<float, 16, 32>{}); - std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>( - Array2D<float, 32, 64>{}); - std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>( - Array2D<float, 64, 32>{}); - std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>( - Array2D<float, 64, 64>{}); + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(Array2D<float, 16, 32>{}); + std::shared_ptr<Tensor> myInit = + std::make_shared<Tensor>(Array2D<float, 32, 64>{}); + std::shared_ptr<Tensor> myInitW = + std::make_shared<Tensor>(Array2D<float, 64, 32>{}); + std::shared_ptr<Tensor> myInitR = + std::make_shared<Tensor>(Array2D<float, 64, 64>{}); pop->addChild(myLSTM, 0, 0); pop->getOperator()->associateInput(0, myInput); @@ -246,7 +269,9 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { microGraph->save("lstm_dims", true, true); REQUIRE(op->dimsForwarded()); - auto microGraphScheduler = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraphScheduler(); + auto microGraphScheduler = + std::dynamic_pointer_cast<MetaOperator_Op>(op) + ->getMicroGraphScheduler(); microGraphScheduler->saveSchedulingDiagram("lstm_scheduling"); REQUIRE(op->getNbConsumedData(0).data == 512); @@ -257,11 +282,14 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { REQUIRE(microGraphScheduler->getStaticScheduling(1).size() == 24); REQUIRE(microGraphScheduler->getStaticScheduling(15).size() == 24); } + SECTION("LSTM(forward_values)") { auto myLSTM = LSTM(2, 3, 0, true, "ltsm"); - auto op = std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator()); + auto op = + std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator()); - auto microGraph = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraph(); + auto microGraph = + std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraph(); microGraph->save("lstm", false, false); REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8); @@ -276,12 +304,14 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( Array2D<float, 3, 2>{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}}); - std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); + std::shared_ptr<Tensor> myInit = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>( Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}}); - std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); + std::shared_ptr<Tensor> myInitR = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); op->associateInput(0, myInput); op->associateInput(17, myInit); @@ -308,12 +338,13 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { microGraph->save("lstm_values_dims", false, true); std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.0952412, 0.0952412, 0.0952412}, - {0.25606447, 0.25606447, 0.25606447}, - {0.40323776, 0.40323776, 0.40323776}}}); + Array2D<float, 3, 3>{{{0.0952412, 0.0952412, 0.0952412}, + {0.25606447, 0.25606447, 0.25606447}, + {0.40323776, 0.40323776, 0.40323776}}}); - - auto microGraphScheduler = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraphScheduler(); + auto microGraphScheduler = + std::dynamic_pointer_cast<MetaOperator_Op>(op) + ->getMicroGraphScheduler(); microGraphScheduler->saveSchedulingDiagram("lstm_values_scheduling"); op->getOutput(0)->print(); @@ -321,11 +352,13 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { REQUIRE(approxEq<float>(*(op->getOutput(0)), *myHiddenState)); } + SECTION("LSTM(forward_values_seq)") { auto pop = Pop(); auto myLSTM = LSTM(2, 3, 2, true, "ltsm"); auto myGraph = Sequential({pop, myLSTM}); - auto op = std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator()); + auto op = + std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator()); REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8); REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data); @@ -338,13 +371,16 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { REQUIRE(myLSTM->nbOutputs() == 2); std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( - Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); - std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); + Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, + {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); + std::shared_ptr<Tensor> myInit = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>( Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}}); - std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); + std::shared_ptr<Tensor> myInitR = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); pop->getOperator()->associateInput(0, myInput); op->associateInput(17, myInit); @@ -371,9 +407,9 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { scheduler.saveSchedulingDiagram("lstm_seq_schedule"); std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372}, - {0.49801484, 0.49801484, 0.49801484}, - {0.67162132, 0.67162132, 0.67162132}}}); + Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372}, + {0.49801484, 0.49801484, 0.49801484}, + {0.67162132, 0.67162132, 0.67162132}}}); myGraph->save("lstm_seq_mygraph", true, true); @@ -382,10 +418,12 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { REQUIRE(approxEq<float>(*(op->getOutput(0)), *myHiddenState)); } + SECTION("LSTM(forward_values_seq_flatten)(sequential)") { auto pop = Pop(); auto myLSTM = LSTM(2, 3, 2, true, "ltsm"); - auto op = std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator()); + auto op = + std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator()); // Here we test LSTM as it is was flatten in the graph. // We just borrow its micro-graph into our larger myGraph graph. @@ -405,13 +443,16 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { REQUIRE(myLSTM->nbOutputs() == 2); std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( - Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); - std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); + Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, + {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); + std::shared_ptr<Tensor> myInit = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>( Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}}); - std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); + std::shared_ptr<Tensor> myInitR = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); pop->getOperator()->associateInput(0, myInput); op->associateInput(17, myInit); @@ -419,16 +460,32 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { // Weights X auto prodX = Producer(myInitW); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first, 0, 1); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first, 0, 1); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first, 0, 1); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first, 0, 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first, + 0, + 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first, + 0, + 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first, + 0, + 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first, + 0, + 1); // Weights H auto prodH = Producer(myInitR); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first, 0, 1); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first, 0, 1); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first, 0, 1); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first, 0, 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first, + 0, + 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first, + 0, + 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first, + 0, + 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first, + 0, + 1); myGraph->add({prodX, prodH}); myGraph->setDataType(DataType::Float32); @@ -436,9 +493,9 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { myGraph->save("lstm_seq_flatten", true, true); std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372}, - {0.49801484, 0.49801484, 0.49801484}, - {0.67162132, 0.67162132, 0.67162132}}}); + Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372}, + {0.49801484, 0.49801484, 0.49801484}, + {0.67162132, 0.67162132, 0.67162132}}}); auto scheduler = SequentialScheduler(myGraph); scheduler.generateScheduling(); @@ -454,7 +511,8 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { SECTION("LSTM(forward_values_seq_flatten)(parallel)") { auto pop = Pop(); auto myLSTM = LSTM(2, 3, 2, true, "ltsm"); - auto op = std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator()); + auto op = + std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator()); // Here we test LSTM as it is was flatten in the graph. // We just borrow its micro-graph into our larger myGraph graph. @@ -474,13 +532,16 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { REQUIRE(myLSTM->nbOutputs() == 2); std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( - Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); - std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); + Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, + {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); + std::shared_ptr<Tensor> myInit = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>( Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}}); - std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); + std::shared_ptr<Tensor> myInitR = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); pop->getOperator()->associateInput(0, myInput); op->associateInput(17, myInit); @@ -488,16 +549,32 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { // Weights X auto prodX = Producer(myInitW); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first, 0, 1); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first, 0, 1); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first, 0, 1); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first, 0, 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first, + 0, + 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first, + 0, + 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first, + 0, + 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first, + 0, + 1); // Weights H auto prodH = Producer(myInitR); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first, 0, 1); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first, 0, 1); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first, 0, 1); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first, 0, 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first, + 0, + 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first, + 0, + 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first, + 0, + 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first, + 0, + 1); myGraph->add({prodX, prodH}); myGraph->setDataType(DataType::Float32); @@ -505,9 +582,9 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { myGraph->save("lstm_seq_flatten", true, true); std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372}, - {0.49801484, 0.49801484, 0.49801484}, - {0.67162132, 0.67162132, 0.67162132}}}); + Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372}, + {0.49801484, 0.49801484, 0.49801484}, + {0.67162132, 0.67162132, 0.67162132}}}); auto scheduler = ParallelScheduler(myGraph); scheduler.generateScheduling(); @@ -519,4 +596,308 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { REQUIRE(approxEq<float>(*(op->getOutput(0)), *myHiddenState)); } -} \ No newline at end of file + + SECTION("Leaky(forward)(fixed)") { + + constexpr auto inChannels = 10; + constexpr auto outChannels = 5; + + constexpr auto beta = 0.95; + constexpr auto threshold = 1.0; + constexpr auto nbTimeSteps = 2; + + auto myWeights = + std::make_shared<Tensor>(Array2D<float, outChannels, inChannels>{{ + {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0}, + {1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1}, + {0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4}, + {0.4, 0.3, 0.2, 0.1, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5}, + {0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0.0}, + }}); + + auto myWeights2 = + std::make_shared<Tensor>(Array2D<float, inChannels, outChannels>{{ + {0.1, 0.2, 0.3, 0.4, 0.5}, + {0.6, 0.7, 0.8, 0.9, 1.0}, + {1.0, 0.9, 0.8, 0.7, 0.6}, + {0.5, 0.4, 0.3, 0.2, 0.1}, + {0.5, 0.6, 0.7, 0.8, 0.9}, + {1.0, 0.1, 0.2, 0.3, 0.4}, + {0.4, 0.3, 0.2, 0.1, 0.0}, + {0.1, 0.2, 0.3, 0.4, 0.5}, + {0.9, 0.8, 0.7, 0.6, 0.5}, + {0.4, 0.3, 0.2, 0.1, 0.0}, + }}); + + auto myInput = std::make_shared<Tensor>(Array2D<float, 2, 10>{{ + {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0}, + {1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1}, + }}); + + // py/snn Torch computed result, output of fc1 at time step 1 + auto expectedOutputlif1ts1 = + std::make_shared<Tensor>(Array2D<float, 2, 5>{{ + {3.850, 2.2000, 2.6500, 1.5000, 1.6500}, + {2.200, 3.8500, 3.4000, 1.2500, 3.3000}, + }}); + + auto expectedOutputfc2ts1 = + std::make_shared<Tensor>(Array2D<float, 2, 10>{{ + {1.5000, + 4.0000, + 4.0000, + 1.5000, + 3.5000, + 2.0000, + 1.0000, + 1.5000, + 3.5000, + 1.0000}, + {1.5000, + 4.0000, + 4.0000, + 1.5000, + 3.5000, + 2.0000, + 1.0000, + 1.5000, + 3.5000, + 1.0000}, + }}); + + auto expectedOutputlif1ts2 = + std::make_shared<Tensor>(Array2D<float, 2, 5>{{ + {6.5075, 3.2900, 4.1675, 1.9250, 2.2175}, + {3.2900, 6.5075, 5.6300, 1.4375, 5.4350}, + }}); + + // NOTE: Same output as before, because for all channels, we have a + // potential higher than threshold. Thus the lif neuron fires at every + // timestep for every channel. + auto expectedOutputfc2ts2 = + std::make_shared<Tensor>(Array2D<float, 2, 10>{{ + {1.5000, + 4.0000, + 4.0000, + 1.5000, + 3.5000, + 2.0000, + 1.0000, + 1.5000, + 3.5000, + 1.0000}, + {1.5000, + 4.0000, + 4.0000, + 1.5000, + 3.5000, + 2.0000, + 1.0000, + 1.5000, + 3.5000, + 1.0000}, + }}); + + auto init = std::make_shared<Tensor>(Array2D<float, 2, 5>{}); + uniformFiller<float>(init, 0.0, 0.0); + + auto fc1 = FC(inChannels, outChannels, true, "myfc"); + auto fc2 = FC(outChannels, inChannels, true, "fc2"); + // NOTE: Account for init step by adding 1 to the max timestep + // parameter. + auto lif1 = Leaky(nbTimeSteps + 1, beta, threshold, "leaky"); + + // associateInput() does not work + fc1->input(1).first->getOperator()->setOutput(0, myWeights); + fc2->input(1).first->getOperator()->setOutput(0, myWeights2); + + auto fc1Op = + std::static_pointer_cast<OperatorTensor>(fc1->getOperator()); + auto lif1Op = + std::static_pointer_cast<MetaOperator_Op>(lif1->getOperator()); + auto fc2Op = + std::static_pointer_cast<OperatorTensor>(fc2->getOperator()); + + fc1Op->associateInput(0, myInput); + lif1Op->associateInput(1, init); + lif1Op->associateInput(2, init); + + fc1->addChild(lif1, 0, 0); + lif1->addChild(fc2, 1, 0); + + auto g = std::make_shared<GraphView>(); + g->add({fc1, lif1, fc2}); + g->compile("cpu", DataType::Float32); + auto scheduler = SequentialScheduler(g); + + // Forward 1 (simulate timestep 0) + scheduler.forward(true); + REQUIRE(approxEq<float>(*(lif1Op->getOutput(0)), + *(expectedOutputlif1ts1))); + REQUIRE( + approxEq<float>(*(fc2Op->getOutput(0)), *(expectedOutputfc2ts1))); + + // Forward 1 (simulate timestep 1) + scheduler.forward(true); + REQUIRE(approxEq<float>(*(lif1Op->getOutput(0)), + *(expectedOutputlif1ts2))); + REQUIRE( + approxEq<float>(*(fc2Op->getOutput(0)), *(expectedOutputfc2ts2))); + } + + SECTION("Leaky(forward)") { + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> valueDist( + 0.1f, + 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), + std::size_t(4)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(3), + std::size_t(3)); + std::uniform_int_distribution<int> boolDist(0, 1); + std::uniform_real_distribution<float> betaDist(0,1); + + const std::size_t nbDims = nbDimsDist(gen); + Log::info("Nbdims : {}", nbDims); + std::vector<std::size_t> dims; + for (std::size_t i = 0; i < nbDims; ++i) { + dims.push_back(dimSizeDist(gen)); + } + Log::info("timesteps : {}", dims[0]); + Log::info("dimensions : "); + for (auto dim : dims) { + Log::info("{}", dim); + } + + const auto nbTimeSteps = dims[0]; + const auto beta = betaDist(gen); + + auto myLeaky = Leaky(nbTimeSteps, beta, 1.0, "leaky"); + auto op = + std::static_pointer_cast<MetaOperator_Op>(myLeaky->getOperator()); + // auto stack = Stack(2); + auto mem_rec = Stack(nbTimeSteps, "mem_rec"); + auto spk_rec = Stack(nbTimeSteps, "spk_rec"); + auto pop = Pop("popinput"); + + // Here we test LSTM as it is was flatten in the graph. + // We just borrow its micro-graph into our larger myGraph graph. + auto myGraph = std::make_shared<GraphView>(); + + pop->addChild(op->getMicroGraph()->getOrderedInputs()[0].first, 0, 0); + // 0 for mem 1 for stack + op->getMicroGraph()->getOrderedOutputs()[1].first->addChild(mem_rec, + 0, + 0); + op->getMicroGraph()->getOrderedOutputs()[0].first->addChild(spk_rec, + 0, + 0); + for (auto node : op->getMicroGraph()->getOrderedOutputs()) { + Log::info("name of output {}", node.first->name()); + } + + myGraph->add(pop); + myGraph->add(op->getMicroGraph()); + myGraph->add(mem_rec); + myGraph->add(spk_rec); + myGraph->save("mg", true, true); + + // 3 outputs + REQUIRE(myLeaky->nbInputs() == 3); + REQUIRE(myLeaky->inputCategory(0) == InputCategory::Data); + // Two spikes connected to nothing, + the Add node real output + REQUIRE(myLeaky->nbOutputs() == 4); + + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, + {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); + + // std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( + // Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, + // {{2.0, 3.0}, {4.0, 5.0}, + // {6.0, 7.0}}}}); + + // Generate input + std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>(); + T0->setDataType(DataType::Float32); + T0->setBackend("cpu"); + + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(); + expectedOutput->setDataType(DataType::Float32); + expectedOutput->setBackend("cpu"); + + const auto nb_elements = + std::accumulate(dims.cbegin(), + dims.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); + float *input = new float[nb_elements]; + float *result = new float[nb_elements]; + + for (std::size_t i = 0; i < nb_elements; ++i) { + input[i] = valueDist(gen); + } + T0->resize(dims); + T0->getImpl()->setRawPtr(input, nb_elements); + T0->print(); + + // Elements popped at each time step + auto nbElementsPerTimeStep = nb_elements / dims[0]; + + // Init + for (int i = 0; i < nbElementsPerTimeStep; ++i) { + result[i] = input[i]; + } + + // Reccurence + for (int i = 1; i < dims[0]; ++i) { + auto offset = nbElementsPerTimeStep * i; + auto prev = nbElementsPerTimeStep * (i - 1); + for (int j = 0; j < nbElementsPerTimeStep; ++j) { + auto reset = (result[prev + j] > 1.0 ? 1 : 0); + result[offset + j] = + result[prev + j] * beta + input[offset + j] - reset; + } + } + + expectedOutput->resize(dims); + expectedOutput->getImpl()->setRawPtr(result, nb_elements); + Log::info("Expected ouptut : "); + expectedOutput->print(); + + std::shared_ptr<Tensor> myInit = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); + + auto initMemdims = + std::vector<std::size_t>(dims.begin() + 1, dims.end()); + Log::info("dimensions : "); + for (auto dim : initMemdims) { + Log::info("{}", dim); + } + std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>( + Array2D<float, 3, 2>{{{0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}}}); + + std::shared_ptr<Tensor> myInitR = + std::make_shared<Tensor>(initMemdims); + myInitR->setDataType(DataType::Float32); + myInitR->setBackend("cpu"); + uniformFiller<float>(myInitR, 0, 0); + + pop->getOperator()->associateInput(0, T0); + op->associateInput(1, myInitR); + op->associateInput(2, myInitR); + + myGraph->compile("cpu", DataType::Float32); + + auto scheduler = SequentialScheduler(myGraph); + REQUIRE_NOTHROW(scheduler.generateScheduling()); + REQUIRE_NOTHROW(scheduler.forward(true)); + + auto memOp = + std::static_pointer_cast<OperatorTensor>(spk_rec->getOperator()); + REQUIRE(approxEq<float>(*(memOp->getOutput(0)), *(expectedOutput))); + } +}