From 742b763f233004df607c59a65a5d881f41ab0f6d Mon Sep 17 00:00:00 2001 From: cmoineau <cyril.moineau@cea.fr> Date: Mon, 10 Feb 2025 09:47:43 +0000 Subject: [PATCH 01/22] Add back MR https://gitlab.eclipse.org/eclipse/aidge/aidge_backend_cpu/-/merge_requests/131. --- unit_tests/operator/Test_MetaOperator.cpp | 765 ++++++++++++++++------ 1 file changed, 573 insertions(+), 192 deletions(-) diff --git a/unit_tests/operator/Test_MetaOperator.cpp b/unit_tests/operator/Test_MetaOperator.cpp index 271a1e2f..4fe39630 100644 --- a/unit_tests/operator/Test_MetaOperator.cpp +++ b/unit_tests/operator/Test_MetaOperator.cpp @@ -9,70 +9,79 @@ * ********************************************************************************/ -#include <catch2/catch_test_macros.hpp> #include <cmath> #include <cstdlib> #include <memory> +#include <random> + +#include <catch2/catch_test_macros.hpp> -#include "aidge/utils/TensorUtils.hpp" #include "aidge/backend/cpu/operator/ConvImpl.hpp" #include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/data/Tensor.hpp" +#include "aidge/filler/Filler.hpp" #include "aidge/operator/Conv.hpp" +#include "aidge/operator/FC.hpp" +#include "aidge/operator/Identity.hpp" #include "aidge/operator/MetaOperator.hpp" #include "aidge/operator/MetaOperatorDefs.hpp" #include "aidge/operator/Pad.hpp" #include "aidge/operator/Pop.hpp" -#include "aidge/scheduler/SequentialScheduler.hpp" +#include "aidge/operator/Stack.hpp" #include "aidge/scheduler/ParallelScheduler.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" +#include "aidge/utils/TensorUtils.hpp" using namespace Aidge; TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { - SECTION("PaddedConv(forward)") { - std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>( - Array4D<double, 4, 3, 3, 3>{{{{{6.20986394e-01, 1.19775136e-03, 7.22876095e-02}, - {1.16492919e-01, 8.21634093e-02, 1.17413265e-01}, - {2.23743494e-01, 3.99495413e-01, 5.55552411e-01}}, - {{6.64970077e-01, 9.62199940e-01, 4.87531967e-01}, - {6.12586558e-01, 8.09918671e-02, 8.40649383e-01}, - {4.15264406e-01, 8.28247138e-01, 1.52301135e-01}}, - {{1.76992844e-02, 7.78697112e-01, 8.14531592e-01}, - {1.36960611e-01, 4.64806728e-01, 4.85150000e-01}, - {4.34776520e-01, 9.51740977e-01, 9.05793799e-01}}}, - - {{{1.71925246e-02, 1.91082720e-01, 3.67982644e-01}, - {1.56806559e-01, 6.22280998e-01, 3.15827594e-01}, - {6.04359038e-01, 2.83095947e-01, 6.11168892e-01}}, - {{2.76942832e-01, 1.89768419e-01, 8.07988176e-01}, - {1.67925807e-01, 2.68356150e-01, 6.28875602e-01}, - {1.69093357e-04, 9.64788636e-01, 7.29254981e-01}}, - {{6.34030122e-01, 1.32087038e-01, 3.33857107e-01}, - {7.63047502e-01, 5.12539506e-02, 9.77400493e-01}, - {8.06151288e-01, 2.60237147e-01, 3.93729313e-01}}}, - - {{{5.84605240e-01, 4.74648725e-01, 8.54111741e-01}, - {7.10897067e-02, 5.02579011e-01, 3.35236224e-01}, - {9.08637408e-01, 8.02903830e-01, 2.83929907e-01}}, - {{3.68206999e-01, 9.18579021e-02, 7.33168098e-01}, - {1.59875539e-01, 9.13163381e-01, 3.59806060e-01}, - {1.41295882e-01, 7.00312185e-01, 5.63728289e-01}}, - {{9.39513546e-01, 1.91704891e-01, 1.11454944e-01}, - {5.46298282e-01, 2.89698587e-01, 2.62612651e-01}, - {1.18554992e-01, 4.32147376e-02, 7.53016994e-01}}}, - - {{{9.53179175e-01, 2.05041054e-02, 1.11318451e-01}, - {8.67878485e-01, 2.93263422e-01, 8.03912714e-01}, - {8.93620255e-01, 1.37831128e-01, 3.83640583e-01}}, - {{3.96020188e-01, 6.24959320e-01, 1.90709175e-01}, - {5.80538620e-01, 6.63031275e-01, 2.07247191e-01}, - {5.65672171e-01, 5.57014317e-01, 9.26909496e-01}}, - {{3.43901418e-01, 4.47741636e-01, 6.59249367e-01}, - {7.34639028e-01, 2.84957200e-02, 9.70225217e-01}, - {1.33578790e-02, 6.12054702e-01, 9.36685235e-02}}}}}); - std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>( - Array1D<double, 4>{{0.16884905, 0.27994487, 0.57227465, 0.06435205}}); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<double, 2, 3, 5, 5>{ + SECTION("PaddedConv(forward)") { + std::shared_ptr<Tensor> myWeights = + std::make_shared<Tensor>(Array4D<double, 4, 3, 3, 3>{ + {{{{6.20986394e-01, 1.19775136e-03, 7.22876095e-02}, + {1.16492919e-01, 8.21634093e-02, 1.17413265e-01}, + {2.23743494e-01, 3.99495413e-01, 5.55552411e-01}}, + {{6.64970077e-01, 9.62199940e-01, 4.87531967e-01}, + {6.12586558e-01, 8.09918671e-02, 8.40649383e-01}, + {4.15264406e-01, 8.28247138e-01, 1.52301135e-01}}, + {{1.76992844e-02, 7.78697112e-01, 8.14531592e-01}, + {1.36960611e-01, 4.64806728e-01, 4.85150000e-01}, + {4.34776520e-01, 9.51740977e-01, 9.05793799e-01}}}, + + {{{1.71925246e-02, 1.91082720e-01, 3.67982644e-01}, + {1.56806559e-01, 6.22280998e-01, 3.15827594e-01}, + {6.04359038e-01, 2.83095947e-01, 6.11168892e-01}}, + {{2.76942832e-01, 1.89768419e-01, 8.07988176e-01}, + {1.67925807e-01, 2.68356150e-01, 6.28875602e-01}, + {1.69093357e-04, 9.64788636e-01, 7.29254981e-01}}, + {{6.34030122e-01, 1.32087038e-01, 3.33857107e-01}, + {7.63047502e-01, 5.12539506e-02, 9.77400493e-01}, + {8.06151288e-01, 2.60237147e-01, 3.93729313e-01}}}, + + {{{5.84605240e-01, 4.74648725e-01, 8.54111741e-01}, + {7.10897067e-02, 5.02579011e-01, 3.35236224e-01}, + {9.08637408e-01, 8.02903830e-01, 2.83929907e-01}}, + {{3.68206999e-01, 9.18579021e-02, 7.33168098e-01}, + {1.59875539e-01, 9.13163381e-01, 3.59806060e-01}, + {1.41295882e-01, 7.00312185e-01, 5.63728289e-01}}, + {{9.39513546e-01, 1.91704891e-01, 1.11454944e-01}, + {5.46298282e-01, 2.89698587e-01, 2.62612651e-01}, + {1.18554992e-01, 4.32147376e-02, 7.53016994e-01}}}, + + {{{9.53179175e-01, 2.05041054e-02, 1.11318451e-01}, + {8.67878485e-01, 2.93263422e-01, 8.03912714e-01}, + {8.93620255e-01, 1.37831128e-01, 3.83640583e-01}}, + {{3.96020188e-01, 6.24959320e-01, 1.90709175e-01}, + {5.80538620e-01, 6.63031275e-01, 2.07247191e-01}, + {5.65672171e-01, 5.57014317e-01, 9.26909496e-01}}, + {{3.43901418e-01, 4.47741636e-01, 6.59249367e-01}, + {7.34639028e-01, 2.84957200e-02, 9.70225217e-01}, + {1.33578790e-02, 6.12054702e-01, 9.36685235e-02}}}}}); + std::shared_ptr<Tensor> myBias = + std::make_shared<Tensor>(Array1D<double, 4>{ + {0.16884905, 0.27994487, 0.57227465, 0.06435205}}); + std::shared_ptr<Tensor> myInput = std::make_shared< + Tensor>(Array4D<double, 2, 3, 5, 5>{ // NCHW {{{{0.43224481, 0.9047832, 0.18402257, 0.06162838, 0.52490127}, {0.27773404, 0.55402353, 0.9485062, 0.31197083, 0.80328607}, @@ -108,93 +117,107 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { {0.95873236, 0.6742374, 0.55679676, 0.6323497, 0.34072958}, {0.49694061, 0.79173045, 0.19738225, 0.14755281, 0.80818177}, {0.02332061, 0.74270703, 0.59415632, 0.08195934, 0.46295434}, - {0.71426058, 0.85032931, 0.90750818, 0.28768431, 0.4401146}}}}}); - - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>( - Array4D<double, 2, 4, 5, 5>{{{{{3.40294218, 3.74021220, 4.02050114, 4.07054710, 2.46286273}, - {4.61770582, 6.70517588, 6.50356627, 6.29688787, 3.53332567}, - {5.47480106, 5.92094421, 6.64605665, 7.95090199, 4.28721523}, - {4.01485729, 6.06748962, 7.52447891, 7.37980652, 5.28401136}, - {2.83065438, 3.62033439, 3.56222963, 5.56103945, 3.23335814}}, - - {{3.30230498, 4.92814112, 4.34710836, 3.96262765, 2.97987890}, - {4.49693012, 6.68929291, 5.53603029, 5.68874264, 4.28756475}, - {4.20528078, 6.82776880, 6.70569849, 7.12809610, 4.40845442}, - {4.31169367, 6.73352146, 6.30962515, 7.45826864, 4.99164438}, - {2.18136287, 4.28968000, 4.20080042, 4.89814138, 2.87394023}}, - - {{3.54787683, 4.35851812, 4.63881302, 4.23359537, 3.16992092}, - {5.25099468, 7.54282856, 6.69849157, 5.64309788, 4.56919575}, - {4.71914101, 7.52830601, 6.71450949, 7.81113863, 5.84658146}, - {4.97893143, 7.39293909, 6.89905310, 8.14430809, 5.62998581}, - {2.79735112, 4.80967140, 5.57630205, 5.38828325, 4.57078695}}, - - {{3.03048635, 5.04540300, 4.21824932, 4.87323284, 2.35113740}, - {4.45167351, 6.47721338, 7.40922976, 6.70445728, 3.60700107}, - {3.77927423, 6.82826376, 7.41777134, 7.57402420, 5.13131523}, - {4.08747244, 7.07994175, 7.57206821, 8.51897335, 5.26987123}, - {2.34426999, 4.60127831, 4.86486769, 6.01579571, 3.97803569}}}, - - - {{{3.84700942, 4.25972605, 3.05269003, 3.78043652, 2.08771229}, - {6.00459957, 6.05633259, 4.45951605, 4.54089880, 4.03066444}, - {5.41579390, 7.29543972, 6.18680000, 5.58812714, 3.45964241}, - {6.04531050, 7.70924091, 5.52207708, 5.02131319, 4.09403706}, - {3.18092418, 4.45422697, 4.04294252, 3.86577177, 2.18776536}}, - - {{4.02600670, 4.27603531, 3.81011319, 4.03631020, 2.57254648}, - {5.33471155, 5.72588634, 5.12079763, 5.11733150, 3.76836705}, - {5.62947607, 5.92492962, 6.24170446, 6.44130468, 3.44276404}, - {5.38414621, 6.02679539, 5.88985586, 5.90263271, 3.15044069}, - {3.31261086, 4.44371319, 3.47660780, 4.15411520, 1.48961508}}, - - {{3.95879412, 4.17324543, 3.70114422, 3.27447152, 3.09713888}, - {5.78258181, 6.57920837, 4.99913597, 6.20961237, 4.98552179}, - {5.84685421, 7.19971228, 6.66386652, 6.68013430, 4.90963316}, - {5.24417877, 7.06430531, 6.58512402, 6.02492285, 4.48986387}, - {3.64294529, 5.00678444, 5.04760027, 4.72895622, 2.67990756}}, - - {{3.48610687, 4.12853813, 4.07563591, 3.51327014, 2.44217038}, - {4.80529881, 7.33211374, 5.14774036, 4.77281189, 4.44612408}, - {5.11703110, 7.55168772, 7.14374542, 6.43696356, 4.10621357}, - {5.41270018, 6.85949135, 6.73503923, 5.74601364, 4.46150303}, - {3.16612267, 4.38248920, 5.23248482, 4.21292210, 2.86031270}}}}}); - - std::shared_ptr<Node> myConv = Conv<2>(3, 4, {3, 3}, "myconv"); - auto convOp = std::static_pointer_cast<OperatorTensor>(myConv->getOperator()); - - std::shared_ptr<Node> myPad = + {0.71426058, + 0.85032931, + 0.90750818, + 0.28768431, + 0.4401146}}}}}); + + std::shared_ptr<Tensor> myOutput = std::make_shared< + Tensor>(Array4D<double, 2, 4, 5, 5>{ + {{{{3.40294218, 3.74021220, 4.02050114, 4.07054710, 2.46286273}, + {4.61770582, 6.70517588, 6.50356627, 6.29688787, 3.53332567}, + {5.47480106, 5.92094421, 6.64605665, 7.95090199, 4.28721523}, + {4.01485729, 6.06748962, 7.52447891, 7.37980652, 5.28401136}, + {2.83065438, 3.62033439, 3.56222963, 5.56103945, 3.23335814}}, + + {{3.30230498, 4.92814112, 4.34710836, 3.96262765, 2.97987890}, + {4.49693012, 6.68929291, 5.53603029, 5.68874264, 4.28756475}, + {4.20528078, 6.82776880, 6.70569849, 7.12809610, 4.40845442}, + {4.31169367, 6.73352146, 6.30962515, 7.45826864, 4.99164438}, + {2.18136287, 4.28968000, 4.20080042, 4.89814138, 2.87394023}}, + + {{3.54787683, 4.35851812, 4.63881302, 4.23359537, 3.16992092}, + {5.25099468, 7.54282856, 6.69849157, 5.64309788, 4.56919575}, + {4.71914101, 7.52830601, 6.71450949, 7.81113863, 5.84658146}, + {4.97893143, 7.39293909, 6.89905310, 8.14430809, 5.62998581}, + {2.79735112, 4.80967140, 5.57630205, 5.38828325, 4.57078695}}, + + {{3.03048635, 5.04540300, 4.21824932, 4.87323284, 2.35113740}, + {4.45167351, 6.47721338, 7.40922976, 6.70445728, 3.60700107}, + {3.77927423, 6.82826376, 7.41777134, 7.57402420, 5.13131523}, + {4.08747244, 7.07994175, 7.57206821, 8.51897335, 5.26987123}, + {2.34426999, 4.60127831, 4.86486769, 6.01579571, 3.97803569}}}, + + {{{3.84700942, 4.25972605, 3.05269003, 3.78043652, 2.08771229}, + {6.00459957, 6.05633259, 4.45951605, 4.54089880, 4.03066444}, + {5.41579390, 7.29543972, 6.18680000, 5.58812714, 3.45964241}, + {6.04531050, 7.70924091, 5.52207708, 5.02131319, 4.09403706}, + {3.18092418, 4.45422697, 4.04294252, 3.86577177, 2.18776536}}, + + {{4.02600670, 4.27603531, 3.81011319, 4.03631020, 2.57254648}, + {5.33471155, 5.72588634, 5.12079763, 5.11733150, 3.76836705}, + {5.62947607, 5.92492962, 6.24170446, 6.44130468, 3.44276404}, + {5.38414621, 6.02679539, 5.88985586, 5.90263271, 3.15044069}, + {3.31261086, 4.44371319, 3.47660780, 4.15411520, 1.48961508}}, + + {{3.95879412, 4.17324543, 3.70114422, 3.27447152, 3.09713888}, + {5.78258181, 6.57920837, 4.99913597, 6.20961237, 4.98552179}, + {5.84685421, 7.19971228, 6.66386652, 6.68013430, 4.90963316}, + {5.24417877, 7.06430531, 6.58512402, 6.02492285, 4.48986387}, + {3.64294529, 5.00678444, 5.04760027, 4.72895622, 2.67990756}}, + + {{3.48610687, 4.12853813, 4.07563591, 3.51327014, 2.44217038}, + {4.80529881, 7.33211374, 5.14774036, 4.77281189, 4.44612408}, + {5.11703110, 7.55168772, 7.14374542, 6.43696356, 4.10621357}, + {5.41270018, 6.85949135, 6.73503923, 5.74601364, 4.46150303}, + {3.16612267, + 4.38248920, + 5.23248482, + 4.21292210, + 2.86031270}}}}}); + + std::shared_ptr<Node> myConv = Conv<2>(3, 4, {3, 3}, "myconv"); + auto convOp = + std::static_pointer_cast<OperatorTensor>(myConv->getOperator()); + + std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "myPad", PadBorderType::Constant, 0.0); - auto padOp = std::static_pointer_cast<OperatorTensor>(myPad->getOperator()); - - convOp->setInput(1, myWeights); - convOp->setInput(2, myBias); - - myPad->addChild(myConv, 0, 0); - padOp->setInput(0, myInput); - - padOp->setDataType(DataType::Float64); - padOp->setBackend("cpu"); - convOp->setDataType(DataType::Float64); - convOp->setBackend("cpu"); - - myPad->forward(); - myConv->forward(); - convOp -> getOutput(0) -> print(); - - double* computedOutput = static_cast<double*>(convOp->getOutput(0)->getImpl()->rawPtr()); - double* expectedOutput = static_cast<double*>(myOutput->getImpl()->rawPtr()); - for (std::size_t i = 0; i < myOutput->size(); ++i) { - REQUIRE(std::abs(computedOutput[i] - expectedOutput[i]) < 1e-5); - } + auto padOp = + std::static_pointer_cast<OperatorTensor>(myPad->getOperator()); + + convOp->setInput(1, myWeights); + convOp->setInput(2, myBias); + + myPad->addChild(myConv, 0, 0); + padOp->setInput(0, myInput); + + padOp->setDataType(DataType::Float64); + padOp->setBackend("cpu"); + convOp->setDataType(DataType::Float64); + convOp->setBackend("cpu"); + + myPad->forward(); + myConv->forward(); + convOp->getOutput(0)->print(); + + double *computedOutput = + static_cast<double *>(convOp->getOutput(0)->getImpl()->rawPtr()); + double *expectedOutput = + static_cast<double *>(myOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i < myOutput->size(); ++i) { + REQUIRE(std::abs(computedOutput[i] - expectedOutput[i]) < 1e-5); + } - std::shared_ptr<Node> myPaddedConv = + std::shared_ptr<Node> myPaddedConv = PaddedConv(3, 4, {3, 3}, "myPaddedConv", {1, 1}, {1, 1, 1, 1}); - } + } SECTION("LSTM(forward)") { + auto pop = Pop(); auto myLSTM = LSTM(32, 64, 0, true, "ltsm"); - auto op = std::dynamic_pointer_cast<MetaOperator_Op>(myLSTM->getOperator()); + auto op = + std::dynamic_pointer_cast<MetaOperator_Op>(myLSTM->getOperator()); auto microGraph = op->getMicroGraph(); microGraph->save("lstm", false, true); @@ -209,14 +232,14 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { } REQUIRE(myLSTM->nbOutputs() == 2); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( - Array2D<float, 16, 32>{}); - std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>( - Array2D<float, 32, 64>{}); - std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>( - Array2D<float, 64, 32>{}); - std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>( - Array2D<float, 64, 64>{}); + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(Array2D<float, 16, 32>{}); + std::shared_ptr<Tensor> myInit = + std::make_shared<Tensor>(Array2D<float, 32, 64>{}); + std::shared_ptr<Tensor> myInitW = + std::make_shared<Tensor>(Array2D<float, 64, 32>{}); + std::shared_ptr<Tensor> myInitR = + std::make_shared<Tensor>(Array2D<float, 64, 64>{}); pop->addChild(myLSTM, 0, 0); pop->getOperator()->associateInput(0, myInput); @@ -246,7 +269,9 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { microGraph->save("lstm_dims", true, true); REQUIRE(op->dimsForwarded()); - auto microGraphScheduler = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraphScheduler(); + auto microGraphScheduler = + std::dynamic_pointer_cast<MetaOperator_Op>(op) + ->getMicroGraphScheduler(); microGraphScheduler->saveSchedulingDiagram("lstm_scheduling"); REQUIRE(op->getNbConsumedData(0).data == 512); @@ -257,11 +282,14 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { REQUIRE(microGraphScheduler->getStaticScheduling(1).size() == 24); REQUIRE(microGraphScheduler->getStaticScheduling(15).size() == 24); } + SECTION("LSTM(forward_values)") { auto myLSTM = LSTM(2, 3, 0, true, "ltsm"); - auto op = std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator()); + auto op = + std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator()); - auto microGraph = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraph(); + auto microGraph = + std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraph(); microGraph->save("lstm", false, false); REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8); @@ -276,12 +304,14 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( Array2D<float, 3, 2>{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}}); - std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); + std::shared_ptr<Tensor> myInit = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>( Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}}); - std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); + std::shared_ptr<Tensor> myInitR = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); op->associateInput(0, myInput); op->associateInput(17, myInit); @@ -308,12 +338,13 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { microGraph->save("lstm_values_dims", false, true); std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.0952412, 0.0952412, 0.0952412}, - {0.25606447, 0.25606447, 0.25606447}, - {0.40323776, 0.40323776, 0.40323776}}}); + Array2D<float, 3, 3>{{{0.0952412, 0.0952412, 0.0952412}, + {0.25606447, 0.25606447, 0.25606447}, + {0.40323776, 0.40323776, 0.40323776}}}); - - auto microGraphScheduler = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraphScheduler(); + auto microGraphScheduler = + std::dynamic_pointer_cast<MetaOperator_Op>(op) + ->getMicroGraphScheduler(); microGraphScheduler->saveSchedulingDiagram("lstm_values_scheduling"); op->getOutput(0)->print(); @@ -321,11 +352,13 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { REQUIRE(approxEq<float>(*(op->getOutput(0)), *myHiddenState)); } + SECTION("LSTM(forward_values_seq)") { auto pop = Pop(); auto myLSTM = LSTM(2, 3, 2, true, "ltsm"); auto myGraph = Sequential({pop, myLSTM}); - auto op = std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator()); + auto op = + std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator()); REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8); REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data); @@ -338,13 +371,16 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { REQUIRE(myLSTM->nbOutputs() == 2); std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( - Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); - std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); + Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, + {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); + std::shared_ptr<Tensor> myInit = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>( Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}}); - std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); + std::shared_ptr<Tensor> myInitR = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); pop->getOperator()->associateInput(0, myInput); op->associateInput(17, myInit); @@ -371,9 +407,9 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { scheduler.saveSchedulingDiagram("lstm_seq_schedule"); std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372}, - {0.49801484, 0.49801484, 0.49801484}, - {0.67162132, 0.67162132, 0.67162132}}}); + Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372}, + {0.49801484, 0.49801484, 0.49801484}, + {0.67162132, 0.67162132, 0.67162132}}}); myGraph->save("lstm_seq_mygraph", true, true); @@ -382,10 +418,12 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { REQUIRE(approxEq<float>(*(op->getOutput(0)), *myHiddenState)); } + SECTION("LSTM(forward_values_seq_flatten)(sequential)") { auto pop = Pop(); auto myLSTM = LSTM(2, 3, 2, true, "ltsm"); - auto op = std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator()); + auto op = + std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator()); // Here we test LSTM as it is was flatten in the graph. // We just borrow its micro-graph into our larger myGraph graph. @@ -405,13 +443,16 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { REQUIRE(myLSTM->nbOutputs() == 2); std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( - Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); - std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); + Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, + {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); + std::shared_ptr<Tensor> myInit = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>( Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}}); - std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); + std::shared_ptr<Tensor> myInitR = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); pop->getOperator()->associateInput(0, myInput); op->associateInput(17, myInit); @@ -419,16 +460,32 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { // Weights X auto prodX = Producer(myInitW); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first, 0, 1); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first, 0, 1); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first, 0, 1); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first, 0, 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first, + 0, + 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first, + 0, + 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first, + 0, + 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first, + 0, + 1); // Weights H auto prodH = Producer(myInitR); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first, 0, 1); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first, 0, 1); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first, 0, 1); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first, 0, 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first, + 0, + 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first, + 0, + 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first, + 0, + 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first, + 0, + 1); myGraph->add({prodX, prodH}); myGraph->setDataType(DataType::Float32); @@ -436,9 +493,9 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { myGraph->save("lstm_seq_flatten", true, true); std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372}, - {0.49801484, 0.49801484, 0.49801484}, - {0.67162132, 0.67162132, 0.67162132}}}); + Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372}, + {0.49801484, 0.49801484, 0.49801484}, + {0.67162132, 0.67162132, 0.67162132}}}); auto scheduler = SequentialScheduler(myGraph); scheduler.generateScheduling(); @@ -454,7 +511,8 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { SECTION("LSTM(forward_values_seq_flatten)(parallel)") { auto pop = Pop(); auto myLSTM = LSTM(2, 3, 2, true, "ltsm"); - auto op = std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator()); + auto op = + std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator()); // Here we test LSTM as it is was flatten in the graph. // We just borrow its micro-graph into our larger myGraph graph. @@ -474,13 +532,16 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { REQUIRE(myLSTM->nbOutputs() == 2); std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( - Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); - std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); + Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, + {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); + std::shared_ptr<Tensor> myInit = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>( Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}}); - std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); + std::shared_ptr<Tensor> myInitR = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); pop->getOperator()->associateInput(0, myInput); op->associateInput(17, myInit); @@ -488,16 +549,32 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { // Weights X auto prodX = Producer(myInitW); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first, 0, 1); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first, 0, 1); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first, 0, 1); - prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first, 0, 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first, + 0, + 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first, + 0, + 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first, + 0, + 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first, + 0, + 1); // Weights H auto prodH = Producer(myInitR); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first, 0, 1); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first, 0, 1); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first, 0, 1); - prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first, 0, 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first, + 0, + 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first, + 0, + 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first, + 0, + 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first, + 0, + 1); myGraph->add({prodX, prodH}); myGraph->setDataType(DataType::Float32); @@ -505,9 +582,9 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { myGraph->save("lstm_seq_flatten", true, true); std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>( - Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372}, - {0.49801484, 0.49801484, 0.49801484}, - {0.67162132, 0.67162132, 0.67162132}}}); + Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372}, + {0.49801484, 0.49801484, 0.49801484}, + {0.67162132, 0.67162132, 0.67162132}}}); auto scheduler = ParallelScheduler(myGraph); scheduler.generateScheduling(); @@ -519,4 +596,308 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { REQUIRE(approxEq<float>(*(op->getOutput(0)), *myHiddenState)); } -} \ No newline at end of file + + SECTION("Leaky(forward)(fixed)") { + + constexpr auto inChannels = 10; + constexpr auto outChannels = 5; + + constexpr auto beta = 0.95; + constexpr auto threshold = 1.0; + constexpr auto nbTimeSteps = 2; + + auto myWeights = + std::make_shared<Tensor>(Array2D<float, outChannels, inChannels>{{ + {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0}, + {1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1}, + {0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4}, + {0.4, 0.3, 0.2, 0.1, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5}, + {0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0.0}, + }}); + + auto myWeights2 = + std::make_shared<Tensor>(Array2D<float, inChannels, outChannels>{{ + {0.1, 0.2, 0.3, 0.4, 0.5}, + {0.6, 0.7, 0.8, 0.9, 1.0}, + {1.0, 0.9, 0.8, 0.7, 0.6}, + {0.5, 0.4, 0.3, 0.2, 0.1}, + {0.5, 0.6, 0.7, 0.8, 0.9}, + {1.0, 0.1, 0.2, 0.3, 0.4}, + {0.4, 0.3, 0.2, 0.1, 0.0}, + {0.1, 0.2, 0.3, 0.4, 0.5}, + {0.9, 0.8, 0.7, 0.6, 0.5}, + {0.4, 0.3, 0.2, 0.1, 0.0}, + }}); + + auto myInput = std::make_shared<Tensor>(Array2D<float, 2, 10>{{ + {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0}, + {1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1}, + }}); + + // py/snn Torch computed result, output of fc1 at time step 1 + auto expectedOutputlif1ts1 = + std::make_shared<Tensor>(Array2D<float, 2, 5>{{ + {3.850, 2.2000, 2.6500, 1.5000, 1.6500}, + {2.200, 3.8500, 3.4000, 1.2500, 3.3000}, + }}); + + auto expectedOutputfc2ts1 = + std::make_shared<Tensor>(Array2D<float, 2, 10>{{ + {1.5000, + 4.0000, + 4.0000, + 1.5000, + 3.5000, + 2.0000, + 1.0000, + 1.5000, + 3.5000, + 1.0000}, + {1.5000, + 4.0000, + 4.0000, + 1.5000, + 3.5000, + 2.0000, + 1.0000, + 1.5000, + 3.5000, + 1.0000}, + }}); + + auto expectedOutputlif1ts2 = + std::make_shared<Tensor>(Array2D<float, 2, 5>{{ + {6.5075, 3.2900, 4.1675, 1.9250, 2.2175}, + {3.2900, 6.5075, 5.6300, 1.4375, 5.4350}, + }}); + + // NOTE: Same output as before, because for all channels, we have a + // potential higher than threshold. Thus the lif neuron fires at every + // timestep for every channel. + auto expectedOutputfc2ts2 = + std::make_shared<Tensor>(Array2D<float, 2, 10>{{ + {1.5000, + 4.0000, + 4.0000, + 1.5000, + 3.5000, + 2.0000, + 1.0000, + 1.5000, + 3.5000, + 1.0000}, + {1.5000, + 4.0000, + 4.0000, + 1.5000, + 3.5000, + 2.0000, + 1.0000, + 1.5000, + 3.5000, + 1.0000}, + }}); + + auto init = std::make_shared<Tensor>(Array2D<float, 2, 5>{}); + uniformFiller<float>(init, 0.0, 0.0); + + auto fc1 = FC(inChannels, outChannels, true, "myfc"); + auto fc2 = FC(outChannels, inChannels, true, "fc2"); + // NOTE: Account for init step by adding 1 to the max timestep + // parameter. + auto lif1 = Leaky(nbTimeSteps + 1, beta, threshold, "leaky"); + + // associateInput() does not work + fc1->input(1).first->getOperator()->setOutput(0, myWeights); + fc2->input(1).first->getOperator()->setOutput(0, myWeights2); + + auto fc1Op = + std::static_pointer_cast<OperatorTensor>(fc1->getOperator()); + auto lif1Op = + std::static_pointer_cast<MetaOperator_Op>(lif1->getOperator()); + auto fc2Op = + std::static_pointer_cast<OperatorTensor>(fc2->getOperator()); + + fc1Op->associateInput(0, myInput); + lif1Op->associateInput(1, init); + lif1Op->associateInput(2, init); + + fc1->addChild(lif1, 0, 0); + lif1->addChild(fc2, 1, 0); + + auto g = std::make_shared<GraphView>(); + g->add({fc1, lif1, fc2}); + g->compile("cpu", DataType::Float32); + auto scheduler = SequentialScheduler(g); + + // Forward 1 (simulate timestep 0) + scheduler.forward(true); + REQUIRE(approxEq<float>(*(lif1Op->getOutput(0)), + *(expectedOutputlif1ts1))); + REQUIRE( + approxEq<float>(*(fc2Op->getOutput(0)), *(expectedOutputfc2ts1))); + + // Forward 1 (simulate timestep 1) + scheduler.forward(true); + REQUIRE(approxEq<float>(*(lif1Op->getOutput(0)), + *(expectedOutputlif1ts2))); + REQUIRE( + approxEq<float>(*(fc2Op->getOutput(0)), *(expectedOutputfc2ts2))); + } + + SECTION("Leaky(forward)") { + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> valueDist( + 0.1f, + 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), + std::size_t(4)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(3), + std::size_t(3)); + std::uniform_int_distribution<int> boolDist(0, 1); + std::uniform_real_distribution<float> betaDist(0,1); + + const std::size_t nbDims = nbDimsDist(gen); + Log::info("Nbdims : {}", nbDims); + std::vector<std::size_t> dims; + for (std::size_t i = 0; i < nbDims; ++i) { + dims.push_back(dimSizeDist(gen)); + } + Log::info("timesteps : {}", dims[0]); + Log::info("dimensions : "); + for (auto dim : dims) { + Log::info("{}", dim); + } + + const auto nbTimeSteps = dims[0]; + const auto beta = betaDist(gen); + + auto myLeaky = Leaky(nbTimeSteps, beta, 1.0, "leaky"); + auto op = + std::static_pointer_cast<MetaOperator_Op>(myLeaky->getOperator()); + // auto stack = Stack(2); + auto mem_rec = Stack(nbTimeSteps, "mem_rec"); + auto spk_rec = Stack(nbTimeSteps, "spk_rec"); + auto pop = Pop("popinput"); + + // Here we test LSTM as it is was flatten in the graph. + // We just borrow its micro-graph into our larger myGraph graph. + auto myGraph = std::make_shared<GraphView>(); + + pop->addChild(op->getMicroGraph()->getOrderedInputs()[0].first, 0, 0); + // 0 for mem 1 for stack + op->getMicroGraph()->getOrderedOutputs()[1].first->addChild(mem_rec, + 0, + 0); + op->getMicroGraph()->getOrderedOutputs()[0].first->addChild(spk_rec, + 0, + 0); + for (auto node : op->getMicroGraph()->getOrderedOutputs()) { + Log::info("name of output {}", node.first->name()); + } + + myGraph->add(pop); + myGraph->add(op->getMicroGraph()); + myGraph->add(mem_rec); + myGraph->add(spk_rec); + myGraph->save("mg", true, true); + + // 3 outputs + REQUIRE(myLeaky->nbInputs() == 3); + REQUIRE(myLeaky->inputCategory(0) == InputCategory::Data); + // Two spikes connected to nothing, + the Add node real output + REQUIRE(myLeaky->nbOutputs() == 4); + + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, + {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); + + // std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( + // Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, + // {{2.0, 3.0}, {4.0, 5.0}, + // {6.0, 7.0}}}}); + + // Generate input + std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>(); + T0->setDataType(DataType::Float32); + T0->setBackend("cpu"); + + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(); + expectedOutput->setDataType(DataType::Float32); + expectedOutput->setBackend("cpu"); + + const auto nb_elements = + std::accumulate(dims.cbegin(), + dims.cend(), + std::size_t(1), + std::multiplies<std::size_t>()); + float *input = new float[nb_elements]; + float *result = new float[nb_elements]; + + for (std::size_t i = 0; i < nb_elements; ++i) { + input[i] = valueDist(gen); + } + T0->resize(dims); + T0->getImpl()->setRawPtr(input, nb_elements); + T0->print(); + + // Elements popped at each time step + auto nbElementsPerTimeStep = nb_elements / dims[0]; + + // Init + for (int i = 0; i < nbElementsPerTimeStep; ++i) { + result[i] = input[i]; + } + + // Reccurence + for (int i = 1; i < dims[0]; ++i) { + auto offset = nbElementsPerTimeStep * i; + auto prev = nbElementsPerTimeStep * (i - 1); + for (int j = 0; j < nbElementsPerTimeStep; ++j) { + auto reset = (result[prev + j] > 1.0 ? 1 : 0); + result[offset + j] = + result[prev + j] * beta + input[offset + j] - reset; + } + } + + expectedOutput->resize(dims); + expectedOutput->getImpl()->setRawPtr(result, nb_elements); + Log::info("Expected ouptut : "); + expectedOutput->print(); + + std::shared_ptr<Tensor> myInit = + std::make_shared<Tensor>(Array2D<float, 3, 3>{ + {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); + + auto initMemdims = + std::vector<std::size_t>(dims.begin() + 1, dims.end()); + Log::info("dimensions : "); + for (auto dim : initMemdims) { + Log::info("{}", dim); + } + std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>( + Array2D<float, 3, 2>{{{0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}}}); + + std::shared_ptr<Tensor> myInitR = + std::make_shared<Tensor>(initMemdims); + myInitR->setDataType(DataType::Float32); + myInitR->setBackend("cpu"); + uniformFiller<float>(myInitR, 0, 0); + + pop->getOperator()->associateInput(0, T0); + op->associateInput(1, myInitR); + op->associateInput(2, myInitR); + + myGraph->compile("cpu", DataType::Float32); + + auto scheduler = SequentialScheduler(myGraph); + REQUIRE_NOTHROW(scheduler.generateScheduling()); + REQUIRE_NOTHROW(scheduler.forward(true)); + + auto memOp = + std::static_pointer_cast<OperatorTensor>(spk_rec->getOperator()); + REQUIRE(approxEq<float>(*(memOp->getOutput(0)), *(expectedOutput))); + } +} -- GitLab From ff3a3ed7ad4fc32ba3f0d80b35aeaaf5c1420a61 Mon Sep 17 00:00:00 2001 From: Jerome Hue <jerome.hue@cea.fr> Date: Thu, 6 Feb 2025 11:59:50 +0100 Subject: [PATCH 02/22] Implement backward function for Div operator --- .../aidge/backend/cpu/operator/DivImpl.hpp | 13 +- .../backend/cpu/operator/DivImpl_kernels.hpp | 61 +++- src/operator/DivImpl.cpp | 23 +- unit_tests/operator/Test_DivImpl.cpp | 271 ++++++++++++++++++ 4 files changed, 363 insertions(+), 5 deletions(-) diff --git a/include/aidge/backend/cpu/operator/DivImpl.hpp b/include/aidge/backend/cpu/operator/DivImpl.hpp index 40c1b678..a507690b 100644 --- a/include/aidge/backend/cpu/operator/DivImpl.hpp +++ b/include/aidge/backend/cpu/operator/DivImpl.hpp @@ -24,7 +24,18 @@ namespace Aidge { // Operator implementation entry point for the backend using DivImpl_cpu = OperatorImpl_cpu<Div_Op, - void(const std::size_t, const std::size_t, const std::size_t, const void*, const void*,void*)>; + void(const std::size_t, const std::size_t, const std::size_t, const void*, const void*,void*), + void(const std::size_t, + const std::size_t, + const std::size_t, + const std::vector<std::size_t>, + const std::vector<std::size_t>, + const std::vector<std::size_t>, + const void*, + const void*, + const void*, + void*, + void*)>; // Implementation entry point registration to Operator REGISTRAR(Div_Op, "cpu", Aidge::DivImpl_cpu::create); diff --git a/include/aidge/backend/cpu/operator/DivImpl_kernels.hpp b/include/aidge/backend/cpu/operator/DivImpl_kernels.hpp index ed6e55a7..5d3ee7f6 100644 --- a/include/aidge/backend/cpu/operator/DivImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/DivImpl_kernels.hpp @@ -17,6 +17,7 @@ #include <cstdint> // std::int32_t, std::int64_t #include <functional> // std::multiplies +#include "aidge/backend/cpu/operator/MulImpl_kernels.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/backend/cpu/data/Broadcasting.hpp" @@ -69,16 +70,70 @@ constexpr void DivImpl_cpu_forward_kernel(const std::size_t input1size_, } } + +template <class I1, class I2, class O> +void DivImpl_cpu_backward_kernel(const std::size_t input0Length, + const std::size_t input1Length, + const std::size_t gradOutputLength, + const std::vector<std::size_t>& dims0, + const std::vector<std::size_t>& dims1, + const std::vector<std::size_t>& outputDims, + const void* input0_, + const void* input1_, + const void* grad_output_, + void* gradientInput0_, + void* gradientInput1_) +{ + const I1* input0 = static_cast<const I1*>(input0_); // a + const I2* input1 = static_cast<const I2*>(input1_); // b + const O* grad_output = static_cast<const O*>(grad_output_); + auto* grad_input_0 = static_cast<I1*>(gradientInput0_); // gradient w.r.t. a + auto* grad_input_1 = static_cast<I2*>(gradientInput1_); // gradient w.r.t. b + + std::fill_n(grad_input_0, input0Length, static_cast<I1>(0)); + std::fill_n(grad_input_1, input1Length, static_cast<I2>(0)); + + // Broadcast dims0 and dims1 to match the shape of outputDims + auto broadcastedDims0 = getBroadcastedDims(outputDims, dims0); + auto broadcastedDims1 = getBroadcastedDims(outputDims, dims1); + + for (std::size_t i = 0; i < gradOutputLength; ++i) { + auto idxOutputGrad = getMultiDimIndices(outputDims, i); + std::vector<std::size_t> idxInput0(broadcastedDims0.size()); + std::vector<std::size_t> idxInput1(broadcastedDims1.size()); + + // Map output indices to input indices, considering broadcasting + for (std::size_t dimension = 0; dimension < broadcastedDims0.size(); ++dimension) { + idxInput0[dimension] = (broadcastedDims0[dimension] == 1) ? 0 : idxOutputGrad[dimension]; + } + + for (std::size_t dimension = 0; dimension < broadcastedDims1.size(); ++dimension) { + idxInput1[dimension] = (broadcastedDims1[dimension] == 1) ? 0 : idxOutputGrad[dimension]; + } + + auto idx0 = getFlattenedIndex(broadcastedDims0, idxInput0); + auto idx1 = getFlattenedIndex(broadcastedDims1, idxInput1); + + // grad_a = grad_output * (1/b) + grad_input_0[idx0] += static_cast<I1>(grad_output[i] / input1[idx1]); + + // grad_b = grad_output * (-a/b²) + grad_input_1[idx1] += static_cast<I2>(grad_output[i] * (-input0[idx0] / (input1[idx1] * input1[idx1]))); + } +} + + // Kernels registration to implementation entry point REGISTRAR(DivImpl_cpu, {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<float, float, float>, nullptr}); + {ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<float, float, float>, Aidge::DivImpl_cpu_backward_kernel<float, float, float>}); REGISTRAR(DivImpl_cpu, {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<double, double, double>, nullptr}); + {ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<double, double, double>, Aidge::DivImpl_cpu_backward_kernel<double, double, double>}); REGISTRAR(DivImpl_cpu, {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, nullptr}); + {ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, + Aidge::DivImpl_cpu_backward_kernel<std::int32_t, std::int32_t, std::int32_t>}); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_DIVIMPL_KERNELS_H_ */ diff --git a/src/operator/DivImpl.cpp b/src/operator/DivImpl.cpp index 135b32b5..67444cb8 100644 --- a/src/operator/DivImpl.cpp +++ b/src/operator/DivImpl.cpp @@ -152,5 +152,26 @@ void Aidge::DivImpl_cpu::forward() { template <> void Aidge::DivImpl_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Div_Op on backend cpu"); + const Div_Op& op_ = dynamic_cast<const Div_Op&>(mOp); + + auto in0 = op_.getInput(0); + auto in1 = op_.getInput(1); + auto in0grad = op_.getInput(0)->grad(); + auto in1grad = op_.getInput(1)->grad(); + auto out0grad = op_.getOutput(0)->grad(); + + const auto impl = Registrar<DivImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + impl.backward(in0grad->size(), + in1grad->size(), + out0grad->size(), + in0->dims(), + in1->dims(), + out0grad->dims(), + getCPUPtr(in0), + getCPUPtr(in1), + getCPUPtr(out0grad), + getCPUPtr(in0grad), + getCPUPtr(in1grad)); } + diff --git a/unit_tests/operator/Test_DivImpl.cpp b/unit_tests/operator/Test_DivImpl.cpp index 4037b2ad..4e7657ed 100644 --- a/unit_tests/operator/Test_DivImpl.cpp +++ b/unit_tests/operator/Test_DivImpl.cpp @@ -322,4 +322,275 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") { } } } + +TEST_CASE("[CPU/Operator] Div(Backward)", "[Div][CPU][Backward]") { + std::shared_ptr<Div_Op> op = std::make_shared<Div_Op>(); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + // NOTE: The first four tests use fixed values, the last one uses random values but static dimensions. + + SECTION("Case 1: 1D and 2D Tensors") { + const auto T0 = std::make_shared<Tensor>( + Array2D<cpptype_t<DataType::Float32>, 2, 3>({{{1, 2, 3}, {4, 5, 6}}})); + + const auto T1 = + std::make_shared<Tensor>(Array1D<cpptype_t<DataType::Float32>, 3>({0.1, 0.2, 0.3})); + + op->associateInput(0, T0); + op->associateInput(1, T1); + op->getOutput(0)->setGrad(std::make_shared<Tensor>( + Array2D<float, 2, 3>({{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}}))); + op->forwardDims(); + + op->backward(); + + const Tensor expectedGrad0 = + Array2D<cpptype_t<DataType::Float32>, 2, 3>({{{10, 5, 3.3333}, {10, 5, 3.3333}}}); + + const Tensor expectedGrad1 = Array1D<cpptype_t<DataType::Float32>, 3>({-500, -175, -100}); + + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0)); + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1)); + } + + SECTION("Case 2: 3D and 1D tensors") { + const auto T0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + {{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}, + {{7.0, 8.0, 9.0}, {10.0, 11.0, 12.0}}}})); + + const auto T1 = + std::make_shared<Tensor>(Array1D<float, 3>({0.3, 0.2, 0.1})); + + const auto newGrad = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + {{{{1, 1, 1}, {1, 1, 1}}, {{1, 1, 1}, {1, 1, 1}}}})); + + const Tensor expectedGrad0 = + Array3D<float, 2, 2, 3>({{{{3.3333, 5.0, 10}, {3.3333, 5.0, 10}}, + {{3.3333, 5.0, 10}, {3.3333, 5.0, 10}}}}); + + const Tensor expectedGrad1 = Array1D<cpptype_t<DataType::Float32>, 3>({-244.4444, -650.0, -3000.0}); + + op->associateInput(0, T0); + op->associateInput(1, T1); + op->getOutput(0)->setGrad(newGrad); + op->forwardDims(); + + op->backward(); + + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0)); + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1)); + } + + SECTION("Case 3: 4D and 2D tensors") { + const auto T0 = std::make_shared<Tensor>(Array4D<cpptype_t<DataType::Float32>, 2, 2, 3, 3>( + {{{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}, {7.0, 8.0, 9.0}}, + {{10.0, 11.0, 12.0}, {13.0, 14.0, 15.0}, {16.0, 17.0, 18.0}}}, + {{{19.0, 20.0, 21.0}, {22.0, 23.0, 24.0}, {25.0, 26.0, 27.0}}, + {{28.0, 29.0, 30.0}, + {31.0, 32.0, 33.0}, + {34.0, 35.0, 36.0}}}}})); + + const auto T1 = std::make_shared<Tensor>(Array2D<cpptype_t<DataType::Float32>, 3, 3>( + {{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}})); + + const auto newGrad = + std::make_shared<Tensor>(Array4D<cpptype_t<DataType::Float32>, 2, 2, 3, 3>( + {{{{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}, + {{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}}, + {{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}, + {{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}}}})); + + const Tensor expectedGrad0 = + Array4D<cpptype_t<DataType::Float32>, 2, 2, 3, 3>( + {{{{{2, 3.3333, 10}, {2.5, 5.0, 1.66667}, {1.42857, 1.2500, 1.11111}}, + {{2, 3.3333, 10}, {2.5, 5.0, 1.66667}, {1.42857, 1.2500, 1.11111}}}, + {{{2, 3.3333, 10}, {2.5, 5.0, 1.66667}, {1.42857, 1.2500, 1.11111}}, + {{2, 3.3333, 10}, {2.5, 5.0, 1.66667}, {1.42857, 1.2500, 1.11111}}}}}); + + const Tensor expectedGrad1 = + Array2D<cpptype_t<DataType::Float32>, 3, 3>({{{-232.0, -688.888, -6600.0}, + {-437.5, -1850.0, -216.66667}, + {-167.3469, -134.3750, -111.111}}}); + + op->associateInput(0, T0); + op->associateInput(1, T1); + op->getOutput(0)->setGrad(newGrad); + op->forwardDims(); + + op->backward(); + + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0)); + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1)); + } + + SECTION("Case 4: 3D and 2D tensors") { + const auto T0 = std::make_shared<Tensor>( + Array3D<float, 2, 3, 4>({{{ + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + }, + { + {13.0, 14.0, 15.0, 16.0}, + {17.0, 18.0, 19.0, 20.0}, + {21.0, 22.0, 23.0, 24.0}, + }}})); + + const auto T1 = std::make_shared<Tensor>( + Array2D<cpptype_t<DataType::Float32>, 3, 4>({{{0.1, 0.2, 0.3, 0.4}, + {0.5, 0.6, 0.7, 0.8}, + {0.9, 1.0, 1.1, 1.2}}})); + + const auto newGrad = std::make_shared<Tensor>( + Array3D<cpptype_t<DataType::Float32>, 2, 3, 4>({{{ + {1.0, 1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0, 1.0}, + }, + { + {1.0, 1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0, 1.0}, + }}})); + + const Tensor expectedGrad0 = + Array3D<cpptype_t<DataType::Float32>, 2, 3, 4>({{{ + {10, 5, 3.33333, 2.5}, + {2, 1.66667, 1.42857, 1.2500}, + {1.11111, 1.0, 0.90909, 0.83333}}, + {{10, 5, 3.33333, 2.5}, + {2, 1.66667, 1.42857, 1.2500}, + {1.11111, 1.0, 0.90909, 0.83333}}}}); + + const Tensor expectedGrad1 = + Array2D<cpptype_t<DataType::Float32>, 3, 4>({{ + {-1400.0, -400.0, -200.0, -125.0}, + {-88.0, -66.66667, -53.0612, -43.750}, + {-37.0370, -32.0, -28.0992, -25.00}}}); + + op->associateInput(0, T0); + op->associateInput(1, T1); + op->getOutput(0)->setGrad(newGrad); + op->forwardDims(); + + op->backward(); + + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0)); + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1)); + } + + SECTION("Case 5: Tensors with random values") { + + // Use random values + const std::vector<std::size_t> dims0 = {5, 2, 1, 7}; // First tensor + const std::vector<std::size_t> dims1 = {2, 6, 7}; // Second tensor + const std::vector<std::size_t> outputDims = {5, 2, 6, 7}; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> dist(0.1f, 1.0f); + + auto T0 = std::make_shared<Tensor>(dims0); + T0->setDataType(DataType::Float32); + T0->setBackend("cpu"); + float* input0Data = static_cast<float*>(T0->getImpl()->rawPtr()); + // Fill with random values + for (std::size_t i = 0; i < T0->size(); ++i) { + input0Data[i] = dist(gen); + } + + auto T1 = std::make_shared<Tensor>(dims1); + T1->setDataType(DataType::Float32); + T1->setBackend("cpu"); + float* input1Data = static_cast<float*>(T1->getImpl()->rawPtr()); + // Fill with random values + for (std::size_t i = 0; i < T1->size(); ++i) { + input1Data[i] = dist(gen); + } + + op->associateInput(0, T0); + op->associateInput(1, T1); + + op->forwardDims(); + op->forward(); + + Tensor expectedOutput{outputDims}; + expectedOutput.setBackend("cpu"); + float* expectedOutputData = static_cast<float*>(expectedOutput.getImpl()->rawPtr()); + + for (std::size_t n = 0; n < 5; ++n) { + for (std::size_t c = 0; c < 2; ++c) { + for (std::size_t h = 0; h < 6; ++h) { + for (std::size_t w = 0; w < 7; ++w) { + std::size_t outIdx = w + 7 * (h + 6 * (c + 2 * n)); + std::size_t in0Idx = + w + 7 * (0 + 1 * (c + 2 * n)); // middle dim is 1 + std::size_t in1Idx = + w + 7 * (h + 6 * c); // no n dimension + + expectedOutputData[outIdx] = input0Data[in0Idx] / input1Data[in1Idx]; + } + } + } + } + + auto outputTensor = op->getOutput(0); + + REQUIRE(approxEq<float>(*outputTensor, expectedOutput)); + + // Backward pass + std::vector<float> gradOutputData(expectedOutput.size()); + for (auto &val : gradOutputData) { + val = dist(gen); + } + + op->getOutput(0)->setGrad(std::make_shared<Tensor>()); + op->getOutput(0)->grad()->resize(outputDims); + op->getOutput(0)->grad()->getImpl()->setRawPtr(gradOutputData.data(), + expectedOutput.size()); + + // Compute reference gradients + std::vector<float> expectedGrad0(T0->size(), 0.0f); + std::vector<float> expectedGrad1(T1->size(), 0.0f); + + for (std::size_t n = 0; n < 5; ++n) { + for (std::size_t c = 0; c < 2; ++c) { + for (std::size_t h = 0; h < 6; ++h) { + for (std::size_t w = 0; w < 7; ++w) { + std::size_t outIdx = w + 7 * (h + 6 * (c + 2 * n)); + std::size_t in0Idx = w + 7 * (0 + 1 * (c + 2 * n)); + std::size_t in1Idx = w + 7 * (h + 6 * c); + + expectedGrad0[in0Idx] += + gradOutputData[outIdx] * (1.0f / input1Data[in1Idx]); + + expectedGrad1[in1Idx] += + gradOutputData[outIdx] * (-input0Data[in0Idx] / (input1Data[in1Idx] * input1Data[in1Idx])); + } + } + } + } + + // Perform backward pass + op->backward(); + + auto expectedGrad0Tensor = std::make_shared<Tensor>(); + expectedGrad0Tensor->resize(T0->dims()); + expectedGrad0Tensor->setBackend("cpu"); + expectedGrad0Tensor->setDataType(DataType::Float32); + expectedGrad0Tensor->getImpl()->setRawPtr(expectedGrad0.data(), + expectedGrad0.size()); + + auto expectedGrad1Tensor = std::make_shared<Tensor>(T1->dims()); + expectedGrad1Tensor->setBackend("cpu"); + expectedGrad1Tensor->setDataType(DataType::Float32); + expectedGrad1Tensor->getImpl()->setRawPtr(expectedGrad1.data(), + expectedGrad1.size()); + + // Verify backward pass + REQUIRE(approxEq<float>(*T0->grad(), *expectedGrad0Tensor)); + REQUIRE(approxEq<float>(*T1->grad(), *expectedGrad1Tensor)); + } +} } // namespace Aidge -- GitLab From 8a6699936cc68401f588760e51b7382dfef32fc7 Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Thu, 20 Feb 2025 11:10:50 +0100 Subject: [PATCH 03/22] Added /bigobj for unit tests on Windows --- unit_tests/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/unit_tests/CMakeLists.txt b/unit_tests/CMakeLists.txt index 6c7af9c3..e1f261d0 100644 --- a/unit_tests/CMakeLists.txt +++ b/unit_tests/CMakeLists.txt @@ -25,6 +25,10 @@ target_link_libraries(tests${module_name} PRIVATE ${module_name}) target_link_libraries(tests${module_name} PRIVATE Catch2::Catch2WithMain) +target_compile_options(tests${module_name} PRIVATE + $<$<CXX_COMPILER_ID:MSVC>: + /bigobj>) + list(APPEND CMAKE_MODULE_PATH ${catch2_SOURCE_DIR}/extras) include(CTest) include(Catch) -- GitLab From 97d0996af09c481aec835064ec6ad30027a10c40 Mon Sep 17 00:00:00 2001 From: hrouis <houssemeddine.rouis92@gmail.com> Date: Fri, 24 Jan 2025 16:06:22 +0100 Subject: [PATCH 04/22] add Equal operator --- include/aidge/backend/cpu.hpp | 1 + .../aidge/backend/cpu/operator/EqualImpl.hpp | 32 +++ .../cpu/operator/EqualImpl_kernels.hpp | 163 ++++++++++++++ src/operator/EqualImpl.cpp | 61 ++++++ unit_tests/operator/Test_EqualImpl.cpp | 205 ++++++++++++++++++ 5 files changed, 462 insertions(+) create mode 100644 include/aidge/backend/cpu/operator/EqualImpl.hpp create mode 100644 include/aidge/backend/cpu/operator/EqualImpl_kernels.hpp create mode 100644 src/operator/EqualImpl.cpp create mode 100644 unit_tests/operator/Test_EqualImpl.cpp diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index 5db19a2b..ffc03ae5 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -29,6 +29,7 @@ #include "aidge/backend/cpu/operator/ConvImpl.hpp" #include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp" #include "aidge/backend/cpu/operator/DivImpl.hpp" +#include "aidge/backend/cpu/operator/EqualImpl.hpp" #include "aidge/backend/cpu/operator/ErfImpl.hpp" #include "aidge/backend/cpu/operator/ExpandImpl.hpp" #include "aidge/backend/cpu/operator/FCImpl.hpp" diff --git a/include/aidge/backend/cpu/operator/EqualImpl.hpp b/include/aidge/backend/cpu/operator/EqualImpl.hpp new file mode 100644 index 00000000..e2489096 --- /dev/null +++ b/include/aidge/backend/cpu/operator/EqualImpl.hpp @@ -0,0 +1,32 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_EQUALIMPL_H_ +#define AIDGE_CPU_OPERATOR_EQUALIMPL_H_ + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/Equal.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include <memory> +#include <vector> + +namespace Aidge { +// Operator implementation entry point for the backend +using EqualImpl_cpu = OperatorImpl_cpu<Equal_Op, + void(std::vector<std::size_t>, std::vector<std::size_t>, const std::vector<std::size_t>&, const void*, const void*, void*)>; + +// Implementation entry point registration to Operator +REGISTRAR(Equal_Op, "cpu", Aidge::EqualImpl_cpu::create); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_EQUALIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/EqualImpl_kernels.hpp b/include/aidge/backend/cpu/operator/EqualImpl_kernels.hpp new file mode 100644 index 00000000..3c8ff0f4 --- /dev/null +++ b/include/aidge/backend/cpu/operator/EqualImpl_kernels.hpp @@ -0,0 +1,163 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_EQUALIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_EQUALIMPL_KERNELS_H_ + +#include "aidge/backend/cpu/operator/EqualImpl.hpp" +#include "aidge/utils/Registrar.hpp" + +namespace Aidge { + +namespace { +// suppose values are contiguous in memory +template <class I, class O> +void equal_contiguous_arrays(const std::size_t input1size, + const std::size_t input2size, + const std::size_t output1size, + const I* input1, + const I* input2, + O* output) +{ + for (std::size_t i = 0; i < output1size; ++i) + { + const std::size_t in1_id = (input1size != 1) ? i : 0; + const std::size_t in2_id = (input2size != 1) ? i : 0; + output[i] = static_cast<O>(input1[in1_id] == input2[in2_id]); + } +} +} + + +template <class I, class O> +void EqualImpl_cpu_forward_kernel(std::vector<std::size_t> dims0, + std::vector<std::size_t> dims1, + const std::vector<std::size_t>& outputDims, + const void* input0_, + const void* input1_, + void* output_) { + + const I* input_0 = static_cast<const I*>(input0_); + const I* input_1 = static_cast<const I*>(input1_); + O* output = static_cast<O*>(output_); + + // [5,2,1,7] & [2,6,7] + // 1. Same number of dimensions -> [5,2,1,7] & [1,2,6,7] + // 2. Find the highest equal dimension -> 3 + // Exception: if the first diverging dimension is the last one, then -> 4 (dims.size()) + // 3. Compute the highest number of contiguous data -> 7 + // 4. Compute stride and offset step for the broadcast mechanism + // 5. Call a simple kernel + + // special case for equal dimensions, the kernel is called with the entire arrays at once + if (dims0 == dims1) { + const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>()); + for (std::size_t i = 0; i < input0_contiguous_size; ++i) + { + output[i] = static_cast<O>(input_0[i] == input_1[i]); + } + return; + } + + // set dimensions to be of equal size by filling the smallest one with ones. + if (dims0.size() > dims1.size()) { + dims1.insert(dims1.cbegin(), dims0.size() - dims1.size(), std::size_t(1)); + } + else if (dims1.size() > dims0.size()) { + dims0.insert(dims0.cbegin(), dims1.size() - dims0.size(), std::size_t(1)); + } + + const std::size_t nbDims = dims0.size(); + + // Find the highest equal dimension + // std::size_t contiguousIdx = nbDims - 1; + std::size_t contiguousIdx = nbDims; + while (contiguousIdx-- > 0) { + // for (; contiguousIdx+1 > 0; --contiguousIdx) { + if (dims0[contiguousIdx] != dims1[contiguousIdx]) { + if (contiguousIdx == (nbDims -1)) { // last dimensions of one of the input Tensor are of size 1 + const std::vector<std::size_t>& dims = (dims0[contiguousIdx] == 1) ? dims0 : dims1; + while ((contiguousIdx+1 > 0) && (dims[contiguousIdx] == 1)) { + --contiguousIdx; + } + } + break; + } + } + ++contiguousIdx; + + // Compute the highest number of contiguous data for each Tensor + const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin()+contiguousIdx, dims0.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t input1_contiguous_size = std::accumulate(dims1.cbegin()+contiguousIdx, dims1.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t output_contiguous_size = std::accumulate(outputDims.cbegin()+contiguousIdx, outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + + // initialize strides to iterate through data because of broadcasting + std::unique_ptr<std::int32_t[]> stride_post0 = std::make_unique<std::int32_t[]>(contiguousIdx); + std::unique_ptr<std::int32_t[]> stride_post1 = std::make_unique<std::int32_t[]>(contiguousIdx); + std::unique_ptr<std::int32_t[]> stride_step0 = std::make_unique<std::int32_t[]>(contiguousIdx); + std::unique_ptr<std::int32_t[]> stride_step1 = std::make_unique<std::int32_t[]>(contiguousIdx); + if (contiguousIdx > 0) { + stride_post0[contiguousIdx - 1] = 1; + stride_post1[contiguousIdx - 1] = 1; + for (std::size_t i = contiguousIdx - 2; i != static_cast<std::size_t>(-1); --i) { + stride_post0[i] = stride_post0[i+1]*static_cast<std::int32_t>(dims0[i+1]); + stride_post1[i] = stride_post1[i+1]*static_cast<std::int32_t>(dims1[i+1]); + } + for (std::size_t i = 0; i != contiguousIdx; ++i) { + stride_step0[i] = (dims0[i] == 1) ? 1 - stride_post0[i] : 1; + stride_step1[i] = (dims1[i] == 1) ? 1 - stride_post1[i] : 1; + } + } + + // variables for arrays offsets + std::size_t offsetIn0 = 0; + std::size_t offsetIn1 = 0; + std::size_t offsetOut = 0; + + + std::size_t dim = contiguousIdx - 1; + const std::size_t nbStacks = std::accumulate(outputDims.cbegin(), outputDims.cbegin() + contiguousIdx, std::size_t(1), std::multiplies<std::size_t>()); + for (std::size_t stack = 0; stack < nbStacks;) { + equal_contiguous_arrays<I,O>(input0_contiguous_size, input1_contiguous_size, output_contiguous_size, + input_0 + offsetIn0*input0_contiguous_size, + input_1 + offsetIn1*input1_contiguous_size, + output + offsetOut*output_contiguous_size); + if (++stack < nbStacks) { + std::size_t tmp_stack = stack; + while(tmp_stack % outputDims[dim] == 0) { + tmp_stack /= outputDims[dim]; + dim--; + } + offsetIn0 += stride_step0[dim]; + offsetIn1 += stride_step1[dim]; + ++offsetOut; + dim = contiguousIdx - 1; + } + } +} + +// Kernels registration to implementation entry point +REGISTRAR(EqualImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}}, + {ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<float, float>, nullptr}); +REGISTRAR(EqualImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}}, + {ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<double, double>, nullptr}); +REGISTRAR(EqualImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}}, + {ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr}); +REGISTRAR(EqualImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int64}}, + {ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, nullptr}); + +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_EQUALIMPL_KERNELS_H_ */ diff --git a/src/operator/EqualImpl.cpp b/src/operator/EqualImpl.cpp new file mode 100644 index 00000000..5926212e --- /dev/null +++ b/src/operator/EqualImpl.cpp @@ -0,0 +1,61 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <chrono> // std::chrono::milliseconds +#include <numeric> // std::accumulate +#include <thread> // std::this_thread::sleep_for +#include <vector> + +#include "aidge/operator/Equal.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/Broadcasting.hpp" +#include "aidge/backend/cpu/data/GetCPUPtr.h" + +#include "aidge/backend/cpu/operator/EqualImpl.hpp" +#include "aidge/backend/cpu/operator/EqualImpl_kernels.hpp" + +template <> +void Aidge::EqualImpl_cpu::forward() { + const Equal_Op& op = static_cast<const Equal_Op&>(mOp); + // Check inputs + AIDGE_ASSERT(op.getInput(0), "missing input in Equal operator"); + AIDGE_ASSERT(op.getInput(0)->hasImpl(), "cannot run Equal forward because the 0-th input has no implementation."); + + AIDGE_ASSERT(op.getInput(1), "missing input in Equal operator"); + AIDGE_ASSERT(op.getInput(1)->hasImpl(), "cannot run Equal forward because the 1st input has no implementation."); + + AIDGE_ASSERT(op.getInput(1)->dataType() == op.getInput(0)->dataType(), "Cannot Equal inputs with two differents data type."); + + // Find the correct kernel type + const auto impl = Registrar<EqualImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. + std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; + const auto& input0 = op.getInput(0)->refCastFrom(input0Fallback, *op.getInput(0)); + const auto& input1 = op.getInput(1)->refCastFrom(input1Fallback, *op.getInput(1)); + + + impl.forward(op.getInput(0)->dims(), + op.getInput(1)->dims(), + op.getOutput(0)->dims(), + input0.getImpl()->rawPtr(), + input1.getImpl()->rawPtr(), + getCPUPtr(op.getRawOutput(0))); +} + +template <> +void Aidge::EqualImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Equal_Op on backend cpu"); +} diff --git a/unit_tests/operator/Test_EqualImpl.cpp b/unit_tests/operator/Test_EqualImpl.cpp new file mode 100644 index 00000000..a229b8ce --- /dev/null +++ b/unit_tests/operator/Test_EqualImpl.cpp @@ -0,0 +1,205 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/Equal.hpp" + +#include "aidge/backend/cpu.hpp" + +using namespace Aidge; + +TEST_CASE("[cpu/operator] Equal(forward)", "[Equal][CPU]") { + SECTION("ForwardDims") + { + constexpr std::uint16_t NBTRIALS = 10; + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); + std::uniform_int_distribution<int> boolDist(0,1); + + SECTION("Same dimensions") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + } + + std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims); + myInput1->setBackend("cpu"); + myInput1->setDataType(DataType::Float32); + myInput1->zeros(); + std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims); + myInput2->setBackend("cpu"); + myInput2->setDataType(DataType::Float32); + myInput2->zeros(); + std::shared_ptr<Node> myEqual = Equal(); + auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator()); + op->associateInput(0,myInput1); + op->associateInput(1,myInput2); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == dims); + } + } + SECTION("Broadcasting") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims1(nbDims, 1); + std::vector<DimSize_t> dims2(nbDims, 1); + std::vector<DimSize_t> expectedOutDims; + for (std::size_t i = 0; i < nbDims; i++) { + DimSize_t dim = dimSizeDist(gen); + if (boolDist(gen)) { + dims1[i] = dim; + } + if (boolDist(gen)) { + dims2[i] = dim; + } + expectedOutDims.push_back(std::max(dims1[i],dims2[i])); + } + + + std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims1); + myInput1->setBackend("cpu"); + myInput1->setDataType(DataType::Float32); + myInput1->zeros(); + std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims2); + myInput2->setBackend("cpu"); + myInput2->setDataType(DataType::Float32); + myInput2->zeros(); + std::shared_ptr<Node> myEqual = Equal(); + auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator()); + op->associateInput(0,myInput1); + op->associateInput(1,myInput2); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == expectedOutDims); + } + } + } + SECTION("Same size inputs") { + std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + { // + { // + {{20, 15},{31, 11},{22, 49}}, // + {{41, 10},{24, 51},{27, 52}}, // + {{26, 53},{27, 54},{28, 55}} // + }, // + { // + {{29, 56},{30, 57},{31, 58}}, // + {{32, 59},{33, 60},{34, 61}}, // + {{35, 62},{36, 63},{37, 64}} // + }, // + { // + {{38, 65},{39, 66},{40, 67}}, // + {{41, 68},{42, 69},{43, 70}}, // + {{44, 71},{45, 72},{46, 73}} // + } // + } // + }); // + std::shared_ptr<Tensor> input2 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + { // + { // + {{20, 47},{21, 48},{22, 49}}, // + {{23, 50},{24, 51},{25, 52}}, // + {{17, 53},{27, 26},{14, 33}} // + }, // + { // + {{29, 56},{30, 57},{31, 58}}, // + {{72, 44},{33, 20},{27, 55}}, // + {{35, 24},{25, 63},{28, 64}} // + }, // + { // + {{32, 65},{39, 66},{40, 70}}, // + {{41, 53},{42, 60},{34, 70}}, // + {{44, 71},{30, 12},{46, 73}} // + } // + } // + }); // + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + { + { + {{1, 0},{0, 0},{1, 1}}, + {{0, 0},{1, 1},{0, 1}}, + {{0, 1},{1, 0},{0, 0}} + }, + { + {{1, 1},{1, 1},{1, 1}}, + {{0, 0},{1, 0},{0, 0}}, + {{1, 0},{0, 1},{0, 1}} + }, + { + {{0, 1},{1, 1},{1, 0}}, + {{1, 0},{1, 0},{0, 1}}, + {{1, 1},{0, 0},{1, 1}} + } + } + }); + + std::shared_ptr<Node> myEqual = Equal(); + auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator()); + op->associateInput(0, input1); + op->associateInput(1, input2); + op->setBackend("cpu"); + op->setDataType(DataType::Int32); + myEqual->forward(); + + REQUIRE(*(op->getOutput(0)) == *expectedOutput); + } + + SECTION("Broadcasting") { + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { + { // + { // + {{10, 20},{22, 23},{20, 20}}, // + {{10, 15},{10, 29},{20, 20}}, // + {{26, 25},{33, 20},{10, 20}} // + } // + } // + }); // + + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{10, 20}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { + { // + { // + {{ 1, 1},{ 0, 0},{ 0, 1}}, // + {{ 1, 0},{ 1, 0},{ 0, 1}}, // + {{ 0, 0},{ 0, 1},{ 1, 1}} // + } // + } // + }); // + + std::shared_ptr<Node> myEqual = Equal(); + auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator()); + op->associateInput(0, input_1); + op->associateInput(1, input_2); + op->setDataType(DataType::Int32); + op->setBackend("cpu"); + myEqual->forward(); + op->getOutput(0)->print(); + expectedOutput->print(); + REQUIRE(*op->getOutput(0) == *expectedOutput); + } +} \ No newline at end of file -- GitLab From 8701618c638707e45478b781a78a1d64ec16f407 Mon Sep 17 00:00:00 2001 From: hrouis <houssemeddine.rouis92@gmail.com> Date: Fri, 24 Jan 2025 16:07:13 +0100 Subject: [PATCH 05/22] fix And operator --- .../backend/cpu/operator/AndImpl_kernels.hpp | 29 ++- unit_tests/operator/Test_AndImpl.cpp | 191 +++++++++--------- 2 files changed, 108 insertions(+), 112 deletions(-) diff --git a/include/aidge/backend/cpu/operator/AndImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AndImpl_kernels.hpp index 73b710e0..d7c8ebcf 100644 --- a/include/aidge/backend/cpu/operator/AndImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AndImpl_kernels.hpp @@ -20,7 +20,7 @@ namespace Aidge { namespace { // suppose values are contiguous in memory template <class I, class O> -void equal_contiguous_arrays(const std::size_t input1size, +void and_contiguous_arrays(const std::size_t input1size, const std::size_t input2size, const std::size_t output1size, const I* input1, @@ -31,14 +31,14 @@ void equal_contiguous_arrays(const std::size_t input1size, { const std::size_t in1_id = (input1size != 1) ? i : 0; const std::size_t in2_id = (input2size != 1) ? i : 0; - output[i] = static_cast<O>(input1[in1_id] == input2[in2_id]); + output[i] = static_cast<O>(input1[in1_id] && input2[in2_id]); } } } template <class I, class O> -void EqualImpl_cpu_forward_kernel(std::vector<std::size_t> dims0, +void AndImpl_cpu_forward_kernel(std::vector<std::size_t> dims0, std::vector<std::size_t> dims1, const std::vector<std::size_t>& outputDims, const void* input0_, @@ -60,9 +60,8 @@ void EqualImpl_cpu_forward_kernel(std::vector<std::size_t> dims0, // special case for equal dimensions, the kernel is called with the entire arrays at once if (dims0 == dims1) { const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>()); - for (std::size_t i = 0; i < input0_contiguous_size; ++i) - { - output[i] = static_cast<O>(input_0[i] == input_1[i]); + for (std::size_t i = 0; i < input0_contiguous_size; ++i) { + output[i] = static_cast<O>(input_0[i] && input_1[i]); } return; } @@ -126,7 +125,7 @@ void EqualImpl_cpu_forward_kernel(std::vector<std::size_t> dims0, std::size_t dim = contiguousIdx - 1; const std::size_t nbStacks = std::accumulate(outputDims.cbegin(), outputDims.cbegin() + contiguousIdx, std::size_t(1), std::multiplies<std::size_t>()); for (std::size_t stack = 0; stack < nbStacks;) { - equal_contiguous_arrays<I,O>(input0_contiguous_size, input1_contiguous_size, output_contiguous_size, + and_contiguous_arrays<I,O>(input0_contiguous_size, input1_contiguous_size, output_contiguous_size, input_0 + offsetIn0*input0_contiguous_size, input_1 + offsetIn1*input1_contiguous_size, output + offsetOut*output_contiguous_size); @@ -146,17 +145,17 @@ void EqualImpl_cpu_forward_kernel(std::vector<std::size_t> dims0, // Kernels registration to implementation entry point REGISTRAR(AndImpl_cpu, - {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<float, float>, nullptr}); + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}}, + {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<float, float>, nullptr}); REGISTRAR(AndImpl_cpu, - {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<double, double>, nullptr}); + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}}, + {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<double, double>, nullptr}); REGISTRAR(AndImpl_cpu, - {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr}); + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}}, + {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr}); REGISTRAR(AndImpl_cpu, - {DataType::Int64}, - {ProdConso::inPlaceModel, Aidge::EqualImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, nullptr}); + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int64}}, + {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, nullptr}); } // namespace Aidge diff --git a/unit_tests/operator/Test_AndImpl.cpp b/unit_tests/operator/Test_AndImpl.cpp index c2309dce..978a89e5 100644 --- a/unit_tests/operator/Test_AndImpl.cpp +++ b/unit_tests/operator/Test_AndImpl.cpp @@ -26,75 +26,92 @@ using namespace Aidge; TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") { - SECTION("ForwardDims") - { + SECTION("ForwardDims") { constexpr std::uint16_t NBTRIALS = 10; // Create a random number generator std::random_device rd; std::mt19937 gen(rd()); - std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 - std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); - std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); - std::uniform_int_distribution<int> boolDist(0,1); + std::uniform_int_distribution<int> boolDist(0, 1); // Use 0 for false, 1 for true + std::uniform_int_distribution<std::size_t> dimSizeDist(2, 10); + std::uniform_int_distribution<std::size_t> nbDimsDist(1, 5); SECTION("Same dimensions") { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { DimSize_t nbDims = nbDimsDist(gen); std::vector<DimSize_t> dims(nbDims); - for (std::size_t i = 0; i < nbDims; i++) { + for (std::size_t i = 0; i < nbDims; ++i) { dims[i] = dimSizeDist(gen); } - + const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + float* array0 = new float[nb_elements]; + float* array1 = new float[nb_elements]; + for (std::size_t i = 0; i < nb_elements; ++i) { + array0[i] = boolDist(gen); + array1[i] = boolDist(gen); + } std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims); - myInput1->setBackend("cpu"); - myInput1->setDataType(DataType::Float32); - myInput1->zeros(); std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims); - myInput2->setBackend("cpu"); + myInput1->setDataType(DataType::Float32); myInput2->setDataType(DataType::Float32); - myInput2->zeros(); + myInput1->setBackend("cpu"); + myInput2->setBackend("cpu"); + + myInput1 -> getImpl() -> setRawPtr(array0, nb_elements); + myInput2 -> getImpl() -> setRawPtr(array1, nb_elements); + std::shared_ptr<Node> myAnd = And(); - auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); - op->associateInput(0,myInput1); - op->associateInput(1,myInput2); + auto op = std::static_pointer_cast<OperatorTensor>(myAnd->getOperator()); + op->associateInput(0, myInput1); + op->associateInput(1, myInput2); op->setDataType(DataType::Float32); op->setBackend("cpu"); op->forwardDims(); const auto outputDims = op->getOutput(0)->dims(); REQUIRE(outputDims == dims); + delete[] array0; + delete[] array1; } } + SECTION("Broadcasting") { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { DimSize_t nbDims = nbDimsDist(gen); std::vector<DimSize_t> dims1(nbDims, 1); std::vector<DimSize_t> dims2(nbDims, 1); std::vector<DimSize_t> expectedOutDims; - for (std::size_t i = 0; i < nbDims; i++) { + for (std::size_t i = 0; i < nbDims; ++i) { DimSize_t dim = dimSizeDist(gen); - if (boolDist(gen)) { - dims1[i] = dim; - } - if (boolDist(gen)) { - dims2[i] = dim; - } - expectedOutDims.push_back(std::max(dims1[i],dims2[i])); + if (boolDist(gen)) dims1[i] = dim; + if (boolDist(gen)) dims2[i] = dim; + expectedOutDims.push_back(std::max(dims1[i], dims2[i])); } + const std::size_t nb_elements0 = std::accumulate(dims1.cbegin(), dims1.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t nb_elements1 = std::accumulate(dims2.cbegin(), dims2.cend(), std::size_t(1), std::multiplies<std::size_t>()); + float* array0 = new float[nb_elements0]; + float* array1 = new float[nb_elements1]; + for (std::size_t i = 0; i < nb_elements0; ++i) { + array0[i] = boolDist(gen); + } + for (std::size_t i = 0; i < nb_elements1; ++i) { + array1[i] = boolDist(gen); + } std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims1); - myInput1->setBackend("cpu"); - myInput1->setDataType(DataType::Float32); - myInput1->zeros(); std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims2); - myInput2->setBackend("cpu"); + myInput1->setDataType(DataType::Float32); myInput2->setDataType(DataType::Float32); - myInput2->zeros(); + myInput1->setBackend("cpu"); + myInput2->setBackend("cpu"); + myInput1 -> getImpl() -> setRawPtr(array0, nb_elements0); + myInput2 -> getImpl() -> setRawPtr(array1, nb_elements1); + + std::shared_ptr<Node> myAnd = And(); - auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); - op->associateInput(0,myInput1); - op->associateInput(1,myInput2); + auto op = std::static_pointer_cast<OperatorTensor>(myAnd->getOperator()); + op->associateInput(0, myInput1); + op->associateInput(1, myInput2); op->setDataType(DataType::Float32); op->setBackend("cpu"); @@ -102,80 +119,48 @@ TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") { const auto outputDims = op->getOutput(0)->dims(); REQUIRE(outputDims == expectedOutDims); + delete[] array0; + delete[] array1; } } } + SECTION("Same size inputs") { - std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { - { // - { // - {{20, 15},{31, 11},{22, 49}}, // - {{41, 10},{24, 51},{27, 52}}, // - {{26, 53},{27, 54},{28, 55}} // - }, // - { // - {{29, 56},{30, 57},{31, 58}}, // - {{32, 59},{33, 60},{34, 61}}, // - {{35, 62},{36, 63},{37, 64}} // - }, // - { // - {{38, 65},{39, 66},{40, 67}}, // - {{41, 68},{42, 69},{43, 70}}, // - {{44, 71},{45, 72},{46, 73}} // - } // - } // - }); // - std::shared_ptr<Tensor> input2 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { - { // - { // - {{20, 47},{21, 48},{22, 49}}, // - {{23, 50},{24, 51},{25, 52}}, // - {{17, 53},{27, 26},{14, 33}} // - }, // - { // - {{29, 56},{30, 57},{31, 58}}, // - {{72, 44},{33, 20},{27, 55}}, // - {{35, 24},{25, 63},{28, 64}} // - }, // - { // - {{32, 65},{39, 66},{40, 70}}, // - {{41, 53},{42, 60},{34, 70}}, // - {{44, 71},{30, 12},{46, 73}} // - } // - } // - }); // - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<float, 2, 2, 2, 2>{ { - { - {{1, 0},{0, 0},{1, 1}}, - {{0, 0},{1, 1},{0, 1}}, - {{0, 1},{1, 0},{0, 0}} - }, - { - {{1, 1},{1, 1},{1, 1}}, - {{0, 0},{1, 0},{0, 0}}, - {{1, 0},{0, 1},{0, 1}} - }, - { - {{0, 1},{1, 1},{1, 0}}, - {{1, 0},{1, 0},{0, 1}}, - {{1, 1},{0, 0},{1, 1}} - } - } - }); + {{{1, 0}, {0, 1}}, + {{1, 1}, {0, 0}}}, + {{{0, 1}, {1, 0}}, + {{1, 0}, {0, 1}}}} + }); + std::shared_ptr<Tensor> input2 = std::make_shared<Tensor>(Array4D<float, 2, 2, 2, 2>{ + { + {{{1, 1}, {0, 0}}, + {{0, 1}, {1, 1}}}, + {{{1, 1}, {0, 0}}, + {{0, 1}, {1, 0}}}} + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float, 2, 2, 2, 2>{ + { + {{{1, 0}, {0, 0}}, + {{0, 1}, {0, 0}}}, + {{{0, 1}, {0, 0}}, + {{0, 0}, {0, 0}}}} + }); std::shared_ptr<Node> myAnd = And(); - auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); + auto op = std::static_pointer_cast<OperatorTensor>(myAnd->getOperator()); op->associateInput(0, input1); op->associateInput(1, input2); op->setBackend("cpu"); - op->setDataType(DataType::Int32); + op->setDataType(DataType::Float32); myAnd->forward(); - + op->getOutput(0)->print(); REQUIRE(*(op->getOutput(0)) == *expectedOutput); } SECTION("Broadcasting") { +<<<<<<< HEAD std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { { // { // @@ -196,16 +181,28 @@ TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") { } // } // }); // +======= + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<float, 1, 2, 2, 2>{ + { + {{{1, 0}, {1, 0}}, + {{1, 1}, {0, 0}}}} + }); + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float, 2>{{1, 0}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float, 1, 2, 2, 2>{ + { + {{{1, 0}, {1, 0}}, + {{1, 0}, {0, 0}}}} + }); +>>>>>>> fix and kernel and unit tests std::shared_ptr<Node> myAnd = And(); - auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); + auto op = std::static_pointer_cast<OperatorTensor>(myAnd->getOperator()); op->associateInput(0, input_1); op->associateInput(1, input_2); - op->setDataType(DataType::Int32); + op->setDataType(DataType::Float32); op->setBackend("cpu"); myAnd->forward(); - op->getOutput(0)->print(); - expectedOutput->print(); - REQUIRE(*op->getOutput(0) == *expectedOutput); + + REQUIRE(*(op->getOutput(0)) == *expectedOutput); } -} \ No newline at end of file +} -- GitLab From 0fbfb571b52f38f02c1a691cecac20d3843cbf22 Mon Sep 17 00:00:00 2001 From: hrouis <houssemeddine.rouis92@gmail.com> Date: Fri, 24 Jan 2025 16:08:17 +0100 Subject: [PATCH 06/22] add dilations to maxpool --- .../backend/cpu/operator/MaxPoolingImpl.hpp | 1 + .../cpu/operator/MaxPoolingImpl_kernels.hpp | 126 ++---------------- src/operator/MaxPoolingImpl.cpp | 1 + unit_tests/operator/Test_MaxPoolingImpl.cpp | 35 +++++ 4 files changed, 49 insertions(+), 114 deletions(-) diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp index 68cc3621..062088a1 100644 --- a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp +++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp @@ -28,6 +28,7 @@ namespace Aidge { using MaxPooling2D_Op = MaxPooling_Op<2>; using MaxPoolingImpl2D_cpu = OperatorImpl_cpu<MaxPooling_Op<2>, void(const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, const std::array<DimSize_t, 2>&, const bool, const std::array<DimSize_t, 4> &, diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp index 7b6f04f1..250b11b0 100644 --- a/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp @@ -35,28 +35,23 @@ namespace Aidge { template <class I, class O> void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, const std::array<DimSize_t, 2>& kernelDims, + const std::array<DimSize_t, 2>& dilations, const bool /*ceilMode*/, const std::array<DimSize_t, 4> &dims, const void *input_, void *output_) { - // FIXME: missing convolution parameters as arguments const I *input = static_cast<const I *>(input_); O *output = static_cast<O *>(output_); // output H size const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) / + static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - (kernelDims[0] - 1) * dilations[0] - 1 + strideDims[0]) / static_cast<float>(strideDims[0]))); // output W size const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) / + static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - (kernelDims[1] - 1) * dilations[1] - 1 + strideDims[1]) / static_cast<float>(strideDims[1]))); - // TODO: kernel computation - // output (batch, outCh, Xout, Yout) - // input (batch, ch, Xin, Yin) - // weight (outCh, ch, kernelX, kernelY) - // does not take Dilation parameter into account using signedsize = std::make_signed<std::size_t>::type; for (std::size_t batch = 0; batch < dims[0]; ++batch) { for (std::size_t ch = 0; ch < dims[1]; ++ch) { @@ -77,12 +72,15 @@ void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD I poolValue(0.0); bool valid = false; - for (unsigned int channel = 0; channel < dims[1]; - ++channel){ - for (unsigned int sy = syMin; sy < syMax; ++sy) { - for (unsigned int sx = sxMin; sx < sxMax; ++sx) - { - const I value = input[iIndex + (ix+sx)*dims[3] + (iy+sy)]; + for (unsigned int sy = syMin; sy < syMax; ++sy) { + for (unsigned int sx = sxMin; sx < sxMax; ++sx) { + // Apply dilation factor to kernel indices + const std::size_t dilated_sx = sx * dilations[0]; + const std::size_t dilated_sy = sy * dilations[1]; + + // Ensure indices are within bounds + if ((ix + dilated_sx) < dims[2] && (iy + dilated_sy) < dims[3]) { + const I value = input[iIndex + (ix + dilated_sx) * dims[3] + (iy + dilated_sy)]; if (!valid || value > poolValue) { poolValue = value; @@ -98,106 +96,6 @@ void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD } } -//N2D2 version -/* -template <class T> -void N2D2::PoolCell_Frame_Kernels::forwardMax(const T* alpha, - const Tensor<T>& - inputs, - const Descriptor& desc, - const T* beta, - Tensor<T>& outputs, - Tensor<ArgMax>& argMax, - bool useArgMax, - const Tensor<bool>& maps) -{ - const unsigned int size = inputs.dimB() * outputs.dimZ(); - -#if defined(_OPENMP) && _OPENMP >= 200805 -#pragma omp parallel for collapse(2) if (size > 16) -#else -#pragma omp parallel for if (inputs.dimB() > 4 && size > 16) -#endif - for (int batchPos = 0; batchPos < (int)inputs.dimB(); ++batchPos) { - for (unsigned int output = 0; output < outputs.dimZ(); ++output) { - for (unsigned int oy = 0; oy < outputs.dimY(); ++oy) { - for (unsigned int ox = 0; ox < outputs.dimX(); ++ox) { - const unsigned int sxMin = (unsigned int)std::max( - desc.padding[0] - (int)(ox * desc.stride[0]), 0); - const unsigned int syMin = (unsigned int)std::max( - desc.padding[1] - (int)(oy * desc.stride[1]), 0); - const unsigned int sxMax = Utils::clamp - <int>(inputs.dimX() + desc.padding[0] - ox * desc.stride[0], - 0, - desc.pool[0]); - const unsigned int syMax = Utils::clamp - <int>(inputs.dimY() + desc.padding[1] - oy * desc.stride[1], - 0, - desc.pool[1]); - - const int ix = (int)(ox * desc.stride[0]) - desc.padding[0]; - const int iy = (int)(oy * desc.stride[1]) - desc.padding[1]; - - T poolValue(0.0); - - // For each output, compute the pool value - if (useArgMax) { - const ArgMax inputMax - = argMax(ox, oy, output, batchPos); - - if (inputMax.valid) { - poolValue = inputs(inputMax.ix, - inputMax.iy, - inputMax.channel, - batchPos); - } - } - else { - unsigned int ixMax = 0; - unsigned int iyMax = 0; - unsigned int channelMax = 0; - bool valid = false; - - for (unsigned int channel = 0; channel < inputs.dimZ(); - ++channel) - { - if (!maps.empty() && !maps(output, channel)) - continue; - - for (unsigned int sy = syMin; sy < syMax; ++sy) { - for (unsigned int sx = sxMin; sx < sxMax; ++sx) - { - const T value = inputs(ix + sx, - iy + sy, - channel, - batchPos); - - if (!valid || value > poolValue) { - poolValue = value; - valid = true; - - ixMax = ix + sx; - iyMax = iy + sy; - channelMax = channel; - } - } - } - } - - argMax(ox, oy, output, batchPos) - = ArgMax(ixMax, iyMax, channelMax, valid); - } - - outputs(ox, oy, output, batchPos) - = (*alpha) * poolValue - + (*beta) * outputs(ox, oy, output, batchPos); - } - } - } - } -} - -*/ // Kernels registration to implementation entry point REGISTRAR(MaxPoolingImpl2D_cpu, diff --git a/src/operator/MaxPoolingImpl.cpp b/src/operator/MaxPoolingImpl.cpp index 90075a39..13ef75b0 100644 --- a/src/operator/MaxPoolingImpl.cpp +++ b/src/operator/MaxPoolingImpl.cpp @@ -30,6 +30,7 @@ void Aidge::MaxPoolingImpl2D_cpu::forward() { // Call kernel impl.forward(op_.strideDims(), op_.kernelDims(), + op_.dilations(), op_.ceilMode(), op_.getInput(0)->template dims<4>(), getCPUPtr(mOp.getRawInput(0)), diff --git a/unit_tests/operator/Test_MaxPoolingImpl.cpp b/unit_tests/operator/Test_MaxPoolingImpl.cpp index de02df2b..6b7e6d2f 100644 --- a/unit_tests/operator/Test_MaxPoolingImpl.cpp +++ b/unit_tests/operator/Test_MaxPoolingImpl.cpp @@ -80,4 +80,39 @@ TEST_CASE("[cpu/operator] MaxPooling(forward)", "[MaxPooling][CPU]") { op->getOutput(0)->print(); REQUIRE(*(op->getOutput(0)) == myOutput); } + SECTION("Dilation") { + std::shared_ptr<Node> myMaxPool = MaxPooling({2,2}, "mycdw", {2,2}, {2,2}); // Dilation 2x2 + auto op = std::static_pointer_cast<OperatorTensor>(myMaxPool -> getOperator()); + + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<float,2,2,2,2> { + { + { + { + {0.71470, 0.52770}, + {0.71470, 0.48740} + }, + { + {2.23290, 0.48590}, + {2.23290, 0.07000} + } + }, + { + { + {1.76530, 1.20710}, + {1.76530, 1.20710} + }, + { + {1.04290, 0.67760}, + {1.72170, 0.67760} + } + } + } + }); + myMaxPool->getOperator()->associateInput(0,myInput); + myMaxPool->getOperator()->setDataType(DataType::Float32); + myMaxPool->getOperator()->setBackend("cpu"); + myMaxPool->forward(); + op->getOutput(0)->print(); + REQUIRE(*(op->getOutput(0)) == *myOutput); + } } \ No newline at end of file -- GitLab From 96cea9f4cbbb9d07d8d1cf2ac439c04d860613fa Mon Sep 17 00:00:00 2001 From: hrouis <houssemeddine.rouis92@gmail.com> Date: Mon, 27 Jan 2025 15:21:08 +0100 Subject: [PATCH 07/22] add dilations and cielmode to AvgPooling --- .../backend/cpu/operator/AvgPoolingImpl.hpp | 2 + .../cpu/operator/AvgPoolingImpl_kernels.hpp | 76 ++++++++----------- src/operator/AvgPoolingImpl.cpp | 2 + unit_tests/operator/Test_AndImpl.cpp | 23 ------ 4 files changed, 36 insertions(+), 67 deletions(-) diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp index adea96ca..7c76657f 100644 --- a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp +++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp @@ -28,8 +28,10 @@ namespace Aidge { using AvgPooling2D_Op = AvgPooling_Op<2>; using AvgPoolingImpl2D_cpu = OperatorImpl_cpu<AvgPooling_Op<2>, void(const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, const std::array<DimSize_t, 2>&, const std::array<DimSize_t, 4>&, + bool, const void *, void *)>; diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp index f6da9dcb..68dbfbe7 100644 --- a/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp @@ -35,66 +35,54 @@ namespace Aidge { template <class I, class O> void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, const std::array<DimSize_t, 2>& kernelDims, + const std::array<DimSize_t, 2>& dilations, const std::array<DimSize_t, 4> &dims, + bool ceilMode, const void *input_, void *output_) { - // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); O *output = static_cast<O *>(output_); + // Calculate output dimensions based on ceilMode and dilations + auto compute_output_size = [&](DimSize_t inputDim, DimSize_t kernelDim, DimSize_t stride, DimSize_t dilation) { + DimSize_t effectiveKernelDim = (kernelDim - 1) * dilation + 1; + float result = static_cast<float>(inputDim - effectiveKernelDim + stride) / static_cast<float>(stride); + return ceilMode ? static_cast<DimSize_t>(std::ceil(result)) : static_cast<DimSize_t>(std::floor(result)); + }; - // output H size - const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) / - static_cast<float>(strideDims[0]))); - // output W size - const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) / - static_cast<float>(strideDims[1]))); + const std::size_t oxSize = compute_output_size(dims[2], kernelDims[0], strideDims[0], dilations[0]); + const std::size_t oySize = compute_output_size(dims[3], kernelDims[1], strideDims[1], dilations[1]); - // TODO: kernel computation - // output (batch, outCh, Xout, Yout) - // input (batch, ch, Xin, Yin) - // weight (outCh, ch, kernelX, kernelY) - // does not take Dilation attribute into account using signedsize = std::make_signed<std::size_t>::type; + for (std::size_t batch = 0; batch < dims[0]; ++batch) { for (std::size_t ch = 0; ch < dims[1]; ++ch) { - const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize; - const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; - std::fill(output + oIndex, output+(oIndex+oxSize*oySize), 0); + const std::size_t oIndex = (ch + batch * dims[1]) * oxSize * oySize; + const std::size_t iIndex = (ch + batch * dims[1]) * dims[2] * dims[3]; + std::fill(output + oIndex, output + (oIndex + oxSize * oySize), 0); + for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); - const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] : dims[2] + difx); + const signedsize startx = static_cast<signedsize>(ox * strideDims[0]) - (dilations[0] - 1); + const std::size_t sxMin = static_cast<std::size_t>(std::max(startx, signedsize(0))); + const std::size_t sxMax = std::min(dims[2], static_cast<std::size_t>(startx + kernelDims[0] * dilations[0])); + for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); - const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > kernelDims[1] ? kernelDims[1] : dims[3] + dify); - const std::size_t oIndexFull = oIndex + ox*oySize + oy; - const std::size_t ix = ox * strideDims[0]; - const std::size_t iy = oy * strideDims[1]; + const signedsize starty = static_cast<signedsize>(oy * strideDims[1]) - (dilations[1] - 1); + const std::size_t syMin = static_cast<std::size_t>(std::max(starty, signedsize(0))); + const std::size_t syMax = std::min(dims[3], static_cast<std::size_t>(starty + kernelDims[1] * dilations[1])); - if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { - output[oIndexFull] += static_cast<O>( - input[iIndex + (ix+0)*dims[3] + (iy+0)] + - input[iIndex + (ix+0)*dims[3] + (iy+1)] + - input[iIndex + (ix+0)*dims[3] + (iy+2)] + - input[iIndex + (ix+1)*dims[3] + (iy+0)] + - input[iIndex + (ix+1)*dims[3] + (iy+1)] + - input[iIndex + (ix+1)*dims[3] + (iy+2)] + - input[iIndex + (ix+2)*dims[3] + (iy+0)] + - input[iIndex + (ix+2)*dims[3] + (iy+1)] + - input[iIndex + (ix+2)*dims[3] + (iy+2)]) / O(9); - } else { - for (std::size_t sx = sxMin; sx < sxMax; ++sx) { - for (std::size_t sy = syMin; sy < syMax; ++sy) { - output[oIndexFull] += input[iIndex + (ix+sx)*dims[3] + (iy+sy)]; - } + const std::size_t oIndexFull = oIndex + ox * oySize + oy; + O sum = static_cast<O>(0); + std::size_t count = 0; + + for (std::size_t sx = sxMin; sx < sxMax; sx += dilations[0]) { + for (std::size_t sy = syMin; sy < syMax; sy += dilations[1]) { + sum += static_cast<O>(input[iIndex + sx * dims[3] + sy]); + ++count; } - // padding not used - output[oIndexFull] /= (sxMax - sxMin) * (syMax - syMin); } + + output[oIndexFull] = sum / static_cast<O>(count); } } } diff --git a/src/operator/AvgPoolingImpl.cpp b/src/operator/AvgPoolingImpl.cpp index 01a5e8cf..eb5ef87b 100644 --- a/src/operator/AvgPoolingImpl.cpp +++ b/src/operator/AvgPoolingImpl.cpp @@ -32,7 +32,9 @@ void Aidge::AvgPoolingImpl2D_cpu::forward() { // Call kernel impl.forward(op_.strideDims(), op_.kernelDims(), + op_.dilations(), op_.getInput(0)->template dims<4>(), + op_.ceilMode(), getCPUPtr(op_.getInput(0)), getCPUPtr(op_.getOutput(0))); } diff --git a/unit_tests/operator/Test_AndImpl.cpp b/unit_tests/operator/Test_AndImpl.cpp index 978a89e5..148298d5 100644 --- a/unit_tests/operator/Test_AndImpl.cpp +++ b/unit_tests/operator/Test_AndImpl.cpp @@ -160,28 +160,6 @@ TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") { } SECTION("Broadcasting") { -<<<<<<< HEAD - std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { - { // - { // - {{10, 20},{22, 23},{20, 20}}, // - {{10, 15},{10, 29},{20, 20}}, // - {{26, 25},{33, 20},{10, 20}} // - } // - } // - }); // - - std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{10, 20}}); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { - { // - { // - {{ 1, 1},{ 0, 0},{ 0, 1}}, // - {{ 1, 0},{ 1, 0},{ 0, 1}}, // - {{ 0, 0},{ 0, 1},{ 1, 1}} // - } // - } // - }); // -======= std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<float, 1, 2, 2, 2>{ { {{{1, 0}, {1, 0}}, @@ -193,7 +171,6 @@ TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") { {{{1, 0}, {1, 0}}, {{1, 0}, {0, 0}}}} }); ->>>>>>> fix and kernel and unit tests std::shared_ptr<Node> myAnd = And(); auto op = std::static_pointer_cast<OperatorTensor>(myAnd->getOperator()); -- GitLab From c12d2826171a88b4c87ce5b222363256a222b262 Mon Sep 17 00:00:00 2001 From: hrouis <houssemeddine.rouis92@gmail.com> Date: Mon, 3 Feb 2025 10:11:02 +0100 Subject: [PATCH 08/22] handle ceil_mode in pooling kernels --- .../cpu/operator/AvgPoolingImpl_kernels.hpp | 56 ++++++++++++------- .../cpu/operator/MaxPoolingImpl_kernels.hpp | 20 ++++--- unit_tests/operator/Test_AvgPoolingImpl.cpp | 35 +++++++++++- 3 files changed, 82 insertions(+), 29 deletions(-) diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp index 68dbfbe7..78f8446a 100644 --- a/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp @@ -43,15 +43,20 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD const I *input = static_cast<const I *>(input_); O *output = static_cast<O *>(output_); - // Calculate output dimensions based on ceilMode and dilations - auto compute_output_size = [&](DimSize_t inputDim, DimSize_t kernelDim, DimSize_t stride, DimSize_t dilation) { - DimSize_t effectiveKernelDim = (kernelDim - 1) * dilation + 1; - float result = static_cast<float>(inputDim - effectiveKernelDim + stride) / static_cast<float>(stride); - return ceilMode ? static_cast<DimSize_t>(std::ceil(result)) : static_cast<DimSize_t>(std::floor(result)); - }; - - const std::size_t oxSize = compute_output_size(dims[2], kernelDims[0], strideDims[0], dilations[0]); - const std::size_t oySize = compute_output_size(dims[3], kernelDims[1], strideDims[1], dilations[1]); + // output H size + const std::size_t oxSize = + ceilMode + ? static_cast<std::size_t>(std::ceil(static_cast<float>(dims[2] - (kernelDims[0] - 1) * dilations[0] - 1 + strideDims[0]) / + static_cast<float>(strideDims[0]))) + : static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - (kernelDims[0] - 1) * dilations[0] - 1 + strideDims[0]) / + static_cast<float>(strideDims[0]))); + // output W size + const std::size_t oySize = + ceilMode + ? static_cast<std::size_t>(std::ceil(static_cast<float>(dims[3] - (kernelDims[1] - 1) * dilations[1] - 1 + strideDims[1]) / + static_cast<float>(strideDims[1]))) + : static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - (kernelDims[1] - 1) * dilations[1] - 1 + strideDims[1]) / + static_cast<float>(strideDims[1]))); using signedsize = std::make_signed<std::size_t>::type; @@ -59,30 +64,39 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD for (std::size_t ch = 0; ch < dims[1]; ++ch) { const std::size_t oIndex = (ch + batch * dims[1]) * oxSize * oySize; const std::size_t iIndex = (ch + batch * dims[1]) * dims[2] * dims[3]; - std::fill(output + oIndex, output + (oIndex + oxSize * oySize), 0); for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize startx = static_cast<signedsize>(ox * strideDims[0]) - (dilations[0] - 1); - const std::size_t sxMin = static_cast<std::size_t>(std::max(startx, signedsize(0))); - const std::size_t sxMax = std::min(dims[2], static_cast<std::size_t>(startx + kernelDims[0] * dilations[0])); + const signedsize difx = static_cast<signedsize>(-ox * strideDims[0]); + const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); + const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] : dims[2] + difx); for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize starty = static_cast<signedsize>(oy * strideDims[1]) - (dilations[1] - 1); - const std::size_t syMin = static_cast<std::size_t>(std::max(starty, signedsize(0))); - const std::size_t syMax = std::min(dims[3], static_cast<std::size_t>(starty + kernelDims[1] * dilations[1])); + const signedsize dify = static_cast<signedsize>(-oy * strideDims[1]); + const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); + const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > kernelDims[1] ? kernelDims[1] : dims[3] + dify); const std::size_t oIndexFull = oIndex + ox * oySize + oy; + const std::size_t ix = ox * strideDims[0]; + const std::size_t iy = oy * strideDims[1]; + O sum = static_cast<O>(0); std::size_t count = 0; - for (std::size_t sx = sxMin; sx < sxMax; sx += dilations[0]) { - for (std::size_t sy = syMin; sy < syMax; sy += dilations[1]) { - sum += static_cast<O>(input[iIndex + sx * dims[3] + sy]); - ++count; + for (unsigned int sy = syMin; sy < syMax; ++sy) { + for (unsigned int sx = sxMin; sx < sxMax; ++sx) { + // Apply dilation factor + const std::size_t dilated_sx = sx * dilations[0]; + const std::size_t dilated_sy = sy * dilations[1]; + + // Ensure within bounds + if ((ix + dilated_sx) < dims[2] && (iy + dilated_sy) < dims[3]) { + sum += static_cast<O>(input[iIndex + (ix + dilated_sx) * dims[3] + (iy + dilated_sy)]); + ++count; + } } } - output[oIndexFull] = sum / static_cast<O>(count); + output[oIndexFull] = count > 0 ? sum / static_cast<O>(count) : 0; } } } diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp index 250b11b0..d5ac02fe 100644 --- a/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp @@ -36,7 +36,7 @@ template <class I, class O> void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, const std::array<DimSize_t, 2>& kernelDims, const std::array<DimSize_t, 2>& dilations, - const bool /*ceilMode*/, + const bool ceilMode, const std::array<DimSize_t, 4> &dims, const void *input_, void *output_) { @@ -44,13 +44,19 @@ void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD O *output = static_cast<O *>(output_); // output H size - const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - (kernelDims[0] - 1) * dilations[0] - 1 + strideDims[0]) / - static_cast<float>(strideDims[0]))); + const std::size_t oxSize = + ceilMode + ? static_cast<std::size_t>(std::ceil(static_cast<float>(dims[2] - (kernelDims[0] - 1) * dilations[0] - 1 + strideDims[0]) / + static_cast<float>(strideDims[0]))) + : static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - (kernelDims[0] - 1) * dilations[0] - 1 + strideDims[0]) / + static_cast<float>(strideDims[0]))); // output W size - const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - (kernelDims[1] - 1) * dilations[1] - 1 + strideDims[1]) / - static_cast<float>(strideDims[1]))); + const std::size_t oySize = + ceilMode + ? static_cast<std::size_t>(std::ceil(static_cast<float>(dims[3] - (kernelDims[1] - 1) * dilations[1] - 1 + strideDims[1]) / + static_cast<float>(strideDims[1]))) + : static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - (kernelDims[1] - 1) * dilations[1] - 1 + strideDims[1]) / + static_cast<float>(strideDims[1]))); using signedsize = std::make_signed<std::size_t>::type; for (std::size_t batch = 0; batch < dims[0]; ++batch) { diff --git a/unit_tests/operator/Test_AvgPoolingImpl.cpp b/unit_tests/operator/Test_AvgPoolingImpl.cpp index 372febc6..21a7a680 100644 --- a/unit_tests/operator/Test_AvgPoolingImpl.cpp +++ b/unit_tests/operator/Test_AvgPoolingImpl.cpp @@ -110,5 +110,38 @@ TEST_CASE("[cpu/operator] AvgPooling(forward)", "[AvgPooling][CPU]") { REQUIRE(std::abs(outPtr[i] - expectedOutPtr[i]) < 0.00001); } } - // std::cout << static_cast<Tensor>((*op)["weight"])[0][0][0][0] << std::endl; + SECTION("Dilations") { + std::shared_ptr<Tensor> myInput3 = std::make_shared<Tensor>(Array4D<float,1,1,5,5> { // NCHW + { + { + {{ 1, 2, 3, 4, 5}, + { 6, 7, 8, 9, 10}, + {11, 12, 13, 14, 15}, + {16, 17, 18, 19, 20}, + {21, 22, 23, 24, 25}} + } + } + }); + + // Dilation of 2 means we take every second element in the window + std::shared_ptr<Node> myAvgPool = AvgPooling({2,2}, "mycdw", {1,1}, {2,2}); + auto op = std::static_pointer_cast<AvgPooling_Op<2>>(myAvgPool -> getOperator()); + + std::shared_ptr<Tensor> myOutput3 = std::make_shared<Tensor>(Array4D<float,1,1,3,3> { + { + { + {{ 7, 8, 9}, + { 12, 13, 14}, + { 17, 18, 19}} + } + } + }); + + op->associateInput(0, myInput3); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myAvgPool->forward(); + op->getOutput(0)->print(); + REQUIRE(*(op->getOutput(0)) == *myOutput3); + } } \ No newline at end of file -- GitLab From b3ae66f75c1dfbc3b4ae3e08f198889cbf837937 Mon Sep 17 00:00:00 2001 From: hrouis <houssemeddine.rouis92@gmail.com> Date: Mon, 3 Feb 2025 15:09:26 +0100 Subject: [PATCH 09/22] add ceil_mode tests for Avg and Max Pooling --- .../cpu/operator/MaxPoolingImpl_kernels.hpp | 1 + unit_tests/operator/Test_AvgPoolingImpl.cpp | 57 +++++++++++++++++++ unit_tests/operator/Test_MaxPoolingImpl.cpp | 57 +++++++++++++++++++ 3 files changed, 115 insertions(+) diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp index d5ac02fe..027fc02a 100644 --- a/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp @@ -16,6 +16,7 @@ #include <cmath> #include <tuple> + #include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/data/Data.hpp" diff --git a/unit_tests/operator/Test_AvgPoolingImpl.cpp b/unit_tests/operator/Test_AvgPoolingImpl.cpp index 21a7a680..f116934c 100644 --- a/unit_tests/operator/Test_AvgPoolingImpl.cpp +++ b/unit_tests/operator/Test_AvgPoolingImpl.cpp @@ -144,4 +144,61 @@ TEST_CASE("[cpu/operator] AvgPooling(forward)", "[AvgPooling][CPU]") { op->getOutput(0)->print(); REQUIRE(*(op->getOutput(0)) == *myOutput3); } + SECTION("Ceil Mode") { + std::shared_ptr<Tensor> myInput4 = std::make_shared<Tensor>(Array4D<float,1,1,5,5> { // NCHW + { + { + { + { 1, 2, 3, 4, 5}, + { 6, 7, 8, 9, 10}, + {11, 12, 13, 14, 15}, + {16, 17, 18, 19, 20}, + {21, 22, 23, 24, 25} + } + } + } + }); + + // AvgPool with ceil_mode = true + std::shared_ptr<Node> myAvgPool1 = AvgPooling({2,2}, "mycdw", {2,2}, {1,1}, true); + auto op1 = std::static_pointer_cast<AvgPooling_Op<2>>(myAvgPool1 -> getOperator()); + + std::shared_ptr<Tensor> myOutput4 = std::make_shared<Tensor>(Array4D<float,1,1,3,3> { + { + { + { + { 4.0, 6.0, 7.5 }, + { 14.0, 16.0, 17.5 }, + { 21.5, 23.5, 25.0 } + } + } + } + }); + op1->associateInput(0, myInput4); + op1->setDataType(DataType::Float32); + op1->setBackend("cpu"); + myAvgPool1->forward(); + op1->getOutput(0)->print(); + REQUIRE(*(op1->getOutput(0)) == *myOutput4); + + // AvgPool with ceil_mode = false + std::shared_ptr<Node> myAvgPool2 = AvgPooling({2,2}, "mycdw", {2,2}, {1,1}, false); + auto op2 = std::static_pointer_cast<AvgPooling_Op<2>>(myAvgPool2 -> getOperator()); + std::shared_ptr<Tensor> myOutput5 = std::make_shared<Tensor>(Array4D<float,1,1,2,2> { + { + { + { + { 4.0, 6.0 }, + { 14.0, 16.0 } + } + } + } + }); + op2->associateInput(0, myInput4); + op2->setDataType(DataType::Float32); + op2->setBackend("cpu"); + myAvgPool2->forward(); + op2->getOutput(0)->print(); + REQUIRE(*(op2->getOutput(0)) == *myOutput5); + } } \ No newline at end of file diff --git a/unit_tests/operator/Test_MaxPoolingImpl.cpp b/unit_tests/operator/Test_MaxPoolingImpl.cpp index 6b7e6d2f..d480fc30 100644 --- a/unit_tests/operator/Test_MaxPoolingImpl.cpp +++ b/unit_tests/operator/Test_MaxPoolingImpl.cpp @@ -115,4 +115,61 @@ TEST_CASE("[cpu/operator] MaxPooling(forward)", "[MaxPooling][CPU]") { op->getOutput(0)->print(); REQUIRE(*(op->getOutput(0)) == *myOutput); } + SECTION("Ceil Mode") { + std::shared_ptr<Tensor> myInput4 = std::make_shared<Tensor>(Array4D<float,1,1,5,5> { // NCHW + { + { + { + { 1, 2, 3, 4, 5}, + { 6, 7, 8, 9, 10}, + {11, 12, 13, 14, 15}, + {16, 17, 18, 19, 20}, + {21, 22, 23, 24, 25} + } + } + } + }); + + // MaxPool with ceil_mode = true + std::shared_ptr<Node> myMaxPool1 = MaxPooling({2,2}, "mycdw", {2,2}, {1,1}, true); + auto op1 = std::static_pointer_cast<OperatorTensor>(myMaxPool1 -> getOperator()); + + std::shared_ptr<Tensor> myOutput4 = std::make_shared<Tensor>(Array4D<float,1,1,3,3> { + { + { + { + { 7.0, 9.0, 10.0 }, + { 17.0, 19.0, 20.0 }, + { 22.0, 24.0, 25.0 } + } + } + } + }); + op1->associateInput(0, myInput4); + op1->setDataType(DataType::Float32); + op1->setBackend("cpu"); + myMaxPool1->forward(); + op1->getOutput(0)->print(); + REQUIRE(*(op1->getOutput(0)) == *myOutput4); + + // MaxPool with ceil_mode = false + std::shared_ptr<Node> myMaxPool2 = MaxPooling({2,2}, "mycdw", {2,2}, {1,1}, false); + auto op2 = std::static_pointer_cast<OperatorTensor>(myMaxPool2 -> getOperator()); + std::shared_ptr<Tensor> myOutput5 = std::make_shared<Tensor>(Array4D<float,1,1,2,2> { + { + { + { + { 7.0, 9.0 }, + { 17.0, 19.0 } + } + } + } + }); + op2->associateInput(0, myInput4); + op2->setDataType(DataType::Float32); + op2->setBackend("cpu"); + myMaxPool2->forward(); + op2->getOutput(0)->print(); + REQUIRE(*(op2->getOutput(0)) == *myOutput5); + } } \ No newline at end of file -- GitLab From 40a34dc2c1910bba8e983adc28929204a4e62f45 Mon Sep 17 00:00:00 2001 From: hrouis <houssemeddine.rouis92@gmail.com> Date: Wed, 19 Feb 2025 10:32:39 +0100 Subject: [PATCH 10/22] separate fwdDims tests section from fwd section --- unit_tests/operator/Test_EqualImpl.cpp | 145 ++++++++++++------------- 1 file changed, 72 insertions(+), 73 deletions(-) diff --git a/unit_tests/operator/Test_EqualImpl.cpp b/unit_tests/operator/Test_EqualImpl.cpp index a229b8ce..013e16eb 100644 --- a/unit_tests/operator/Test_EqualImpl.cpp +++ b/unit_tests/operator/Test_EqualImpl.cpp @@ -19,86 +19,85 @@ using namespace Aidge; -TEST_CASE("[cpu/operator] Equal(forward)", "[Equal][CPU]") { - SECTION("ForwardDims") - { - constexpr std::uint16_t NBTRIALS = 10; - // Create a random number generator - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 - std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); - std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); - std::uniform_int_distribution<int> boolDist(0,1); - - SECTION("Same dimensions") { - for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { - DimSize_t nbDims = nbDimsDist(gen); - std::vector<DimSize_t> dims(nbDims); - for (std::size_t i = 0; i < nbDims; i++) { - dims[i] = dimSizeDist(gen); - } - - std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims); - myInput1->setBackend("cpu"); - myInput1->setDataType(DataType::Float32); - myInput1->zeros(); - std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims); - myInput2->setBackend("cpu"); - myInput2->setDataType(DataType::Float32); - myInput2->zeros(); - std::shared_ptr<Node> myEqual = Equal(); - auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator()); - op->associateInput(0,myInput1); - op->associateInput(1,myInput2); - op->setDataType(DataType::Float32); - op->setBackend("cpu"); - op->forwardDims(); - - const auto outputDims = op->getOutput(0)->dims(); - REQUIRE(outputDims == dims); +TEST_CASE("[cpu/operator] Equal(forwardDims)", "[Equal][CPU]") { + constexpr std::uint16_t NBTRIALS = 10; + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); + std::uniform_int_distribution<int> boolDist(0,1); + + SECTION("Same dimensions") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); } + + std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims); + myInput1->setBackend("cpu"); + myInput1->setDataType(DataType::Float32); + myInput1->zeros(); + std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims); + myInput2->setBackend("cpu"); + myInput2->setDataType(DataType::Float32); + myInput2->zeros(); + std::shared_ptr<Node> myEqual = Equal(); + auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator()); + op->associateInput(0,myInput1); + op->associateInput(1,myInput2); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == dims); } - SECTION("Broadcasting") { - for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { - DimSize_t nbDims = nbDimsDist(gen); - std::vector<DimSize_t> dims1(nbDims, 1); - std::vector<DimSize_t> dims2(nbDims, 1); - std::vector<DimSize_t> expectedOutDims; - for (std::size_t i = 0; i < nbDims; i++) { - DimSize_t dim = dimSizeDist(gen); - if (boolDist(gen)) { - dims1[i] = dim; - } - if (boolDist(gen)) { - dims2[i] = dim; - } - expectedOutDims.push_back(std::max(dims1[i],dims2[i])); + } + SECTION("Broadcasting") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims1(nbDims, 1); + std::vector<DimSize_t> dims2(nbDims, 1); + std::vector<DimSize_t> expectedOutDims; + for (std::size_t i = 0; i < nbDims; i++) { + DimSize_t dim = dimSizeDist(gen); + if (boolDist(gen)) { + dims1[i] = dim; + } + if (boolDist(gen)) { + dims2[i] = dim; } + expectedOutDims.push_back(std::max(dims1[i],dims2[i])); + } - std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims1); - myInput1->setBackend("cpu"); - myInput1->setDataType(DataType::Float32); - myInput1->zeros(); - std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims2); - myInput2->setBackend("cpu"); - myInput2->setDataType(DataType::Float32); - myInput2->zeros(); - std::shared_ptr<Node> myEqual = Equal(); - auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator()); - op->associateInput(0,myInput1); - op->associateInput(1,myInput2); - op->setDataType(DataType::Float32); - op->setBackend("cpu"); - - op->forwardDims(); - - const auto outputDims = op->getOutput(0)->dims(); - REQUIRE(outputDims == expectedOutDims); - } + std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims1); + myInput1->setBackend("cpu"); + myInput1->setDataType(DataType::Float32); + myInput1->zeros(); + std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims2); + myInput2->setBackend("cpu"); + myInput2->setDataType(DataType::Float32); + myInput2->zeros(); + std::shared_ptr<Node> myEqual = Equal(); + auto op = std::static_pointer_cast<OperatorTensor>(myEqual -> getOperator()); + op->associateInput(0,myInput1); + op->associateInput(1,myInput2); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == expectedOutDims); } } +} +TEST_CASE("[cpu/operator] Equal(forward)", "[Equal][CPU]") { SECTION("Same size inputs") { std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { { // -- GitLab From f3de3e10f342b3dd573609d8f835ed822950e5d7 Mon Sep 17 00:00:00 2001 From: hrouis <houssemeddine.rouis92@gmail.com> Date: Thu, 20 Feb 2025 11:35:10 +0100 Subject: [PATCH 11/22] remove unnecessary header in Equal tests --- unit_tests/operator/Test_EqualImpl.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/unit_tests/operator/Test_EqualImpl.cpp b/unit_tests/operator/Test_EqualImpl.cpp index 013e16eb..bd9fa94f 100644 --- a/unit_tests/operator/Test_EqualImpl.cpp +++ b/unit_tests/operator/Test_EqualImpl.cpp @@ -15,8 +15,6 @@ #include "aidge/data/Tensor.hpp" #include "aidge/operator/Equal.hpp" -#include "aidge/backend/cpu.hpp" - using namespace Aidge; TEST_CASE("[cpu/operator] Equal(forwardDims)", "[Equal][CPU]") { @@ -137,7 +135,7 @@ TEST_CASE("[cpu/operator] Equal(forward)", "[Equal][CPU]") { } // } // }); // - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + Tensor expectedOutput =Tensor(Array4D<int,3,3,3,2> { { { {{1, 0},{0, 0},{1, 1}}, @@ -165,7 +163,7 @@ TEST_CASE("[cpu/operator] Equal(forward)", "[Equal][CPU]") { op->setDataType(DataType::Int32); myEqual->forward(); - REQUIRE(*(op->getOutput(0)) == *expectedOutput); + REQUIRE(*(op->getOutput(0)) == expectedOutput); } SECTION("Broadcasting") { @@ -180,7 +178,7 @@ TEST_CASE("[cpu/operator] Equal(forward)", "[Equal][CPU]") { }); // std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{10, 20}}); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { + Tensor expectedOutput = Tensor(Array4D<int,1,3,3,2> { { // { // {{ 1, 1},{ 0, 0},{ 0, 1}}, // @@ -198,7 +196,7 @@ TEST_CASE("[cpu/operator] Equal(forward)", "[Equal][CPU]") { op->setBackend("cpu"); myEqual->forward(); op->getOutput(0)->print(); - expectedOutput->print(); - REQUIRE(*op->getOutput(0) == *expectedOutput); + + REQUIRE(*op->getOutput(0) == expectedOutput); } } \ No newline at end of file -- GitLab From dcbd4ebd1fe6d4eb065496f8ab0b62b771b42589 Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Fri, 14 Feb 2025 17:57:38 +0100 Subject: [PATCH 12/22] Add Mod --- .../aidge/backend/cpu/operator/ModImpl.hpp | 33 +++++ .../backend/cpu/operator/ModImpl_kernels.hpp | 77 ++++++++++ src/operator/ModImpl.cpp | 131 ++++++++++++++++++ 3 files changed, 241 insertions(+) create mode 100644 include/aidge/backend/cpu/operator/ModImpl.hpp create mode 100644 include/aidge/backend/cpu/operator/ModImpl_kernels.hpp create mode 100644 src/operator/ModImpl.cpp diff --git a/include/aidge/backend/cpu/operator/ModImpl.hpp b/include/aidge/backend/cpu/operator/ModImpl.hpp new file mode 100644 index 00000000..96ff599b --- /dev/null +++ b/include/aidge/backend/cpu/operator/ModImpl.hpp @@ -0,0 +1,33 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_MODIMPL_H_ +#define AIDGE_CPU_OPERATOR_MODIMPL_H_ + +#include <memory> +#include <tuple> +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/Mod.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// Operator implementation entry point for the backend +using ModImpl_cpu = OperatorImpl_cpu<Mod_Op, + void(bool, const std::size_t, const std::size_t, const std::size_t, const void*, const void*,void*)>; + +// Implementation entry point registration to Operator +REGISTRAR(Mod_Op, "cpu", Aidge::ModImpl_cpu::create); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_MODIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ModImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ModImpl_kernels.hpp new file mode 100644 index 00000000..940fa482 --- /dev/null +++ b/include/aidge/backend/cpu/operator/ModImpl_kernels.hpp @@ -0,0 +1,77 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_MODIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_MODIMPL_KERNELS_H_ + +#include <numeric> // std::accumulate +#include <cstddef> // std::size_t +#include <cstdint> // std::int32_t, std::int64_t +#include <functional> // std::multiplies + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/data/Broadcasting.hpp" +#include "aidge/backend/cpu/operator/ModImpl.hpp" + +namespace Aidge { + +template <typename T, + typename std::enable_if<std::is_integral<T>::value>::type* = nullptr> +static inline T modulus(T a, T b) { + return a % b; +} + +template <typename T, + typename std::enable_if<!std::is_integral<T>::value>::type* = nullptr> +static inline T modulus(T /*a*/, T /*b*/) { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Mod Operator with fmod attribute set to false only supports integer types."); +} + +template <class I1, class I2, class O> +constexpr void ModImpl_cpu_forward_kernel(bool fmod, + const std::size_t input1size_, + const std::size_t input2size_, + const std::size_t output1size_, + const void* input1_, + const void* input2_, + void* output_) { + + const I1* input_1 = static_cast<const I1*>(input1_); + const I2* input_2 = static_cast<const I2*>(input2_); + O* output = static_cast<O*>(output_); + +// suppose values are contiguous in memory + for (std::size_t i = 0; i < output1size_; ++i) { + const std::size_t in1_id = (input1size_ != 1) ? i : 0; + const std::size_t in2_id = (input2size_ != 1) ? i : 0; + if (fmod) { + output[i] = static_cast<O>(std::fmod(input_1[in1_id], input_2[in2_id])); + } + else { + output[i] = static_cast<O>(modulus(input_1[in1_id], input_2[in2_id])); + } + } +} + +// Kernels registration to implementation entry point +REGISTRAR(ModImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::ModImpl_cpu_forward_kernel<float, float, float>, nullptr}); +REGISTRAR(ModImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::ModImpl_cpu_forward_kernel<double, double, double>, nullptr}); +REGISTRAR(ModImpl_cpu, + {DataType::Int32}, + {ProdConso::inPlaceModel, Aidge::ModImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, nullptr}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_MODIMPL_KERNELS_H_ */ diff --git a/src/operator/ModImpl.cpp b/src/operator/ModImpl.cpp new file mode 100644 index 00000000..161f7bc1 --- /dev/null +++ b/src/operator/ModImpl.cpp @@ -0,0 +1,131 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <memory> +#include <vector> + +#include "aidge/backend/cpu/data/Broadcasting.hpp" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/backend/cpu/operator/ModImpl.hpp" +#include "aidge/backend/cpu/operator/ModImpl_kernels.hpp" +#include "aidge/data/Tensor.hpp" +#include "aidge/utils/Types.h" + +template <> +void Aidge::ModImpl_cpu::forward() { + // 1. Same number of dimensions -> [5,2,1,7] & [1,2,6,7] + // 2. Find the highest equal dimension -> 3 + // Exception: if the first diverging dimension is the last one, then -> 4 (dims.size()) + // 3. Compute the highest number of contiguous data -> 7 + // 4. Compute stride and offset step for the broadcast mechanism + // 5. Call a simple kernel + const auto& opTensor = static_cast<const Mod_Op&>(mOp); + + // Find the correct kernel type + const auto impl = Registrar<ModImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Compute compatible input dimensions + std::vector<std::size_t> dims0 = opTensor.getInput(0)->dims(); + std::vector<std::size_t> dims1 = opTensor.getInput(1)->dims(); + const std::vector<std::size_t>& outDims = opTensor.getOutput(0)->dims(); + + // special case for equal dimensions, the kernel is called with the entire arrays at once + if (dims0 == dims1) { + const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>()); + impl.forward(opTensor.fmod(), + input0_contiguous_size, input0_contiguous_size, input0_contiguous_size, + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawInput(1)), + getCPUPtr(mOp.getRawOutput(0))); + return; + } + + // set dimensions to be of equal size by filling the smallest one with ones. + if (dims0.size() > dims1.size()) { + dims1.insert(dims1.cbegin(), dims0.size() - dims1.size(), std::size_t(1)); + } + else if (dims1.size() > dims0.size()) { + dims0.insert(dims0.cbegin(), dims1.size() - dims0.size(), std::size_t(1)); + } + + const std::size_t nbDims = dims0.size(); + + // Find the highest equal dimension + // std::size_t contiguousIdx = nbDims - 1; + std::size_t contiguousIdx = nbDims; + while (contiguousIdx-- > 0) { + // for (; contiguousIdx+1 > 0; --contiguousIdx) { + if (dims0[contiguousIdx] != dims1[contiguousIdx]) { + if (contiguousIdx == (nbDims -1)) { // last dimensions of one of the input Tensor are of size 1 + const std::vector<std::size_t>& dims = (dims0[contiguousIdx] == 1) ? dims0 : dims1; + while ((contiguousIdx+1 > 0) && (dims[contiguousIdx] == 1)) { + --contiguousIdx; + } + } + break; + } + } + ++contiguousIdx; + + // Compute the highest number of contiguous data for each Tensor + const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin()+contiguousIdx, dims0.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t input1_contiguous_size = std::accumulate(dims1.cbegin()+contiguousIdx, dims1.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const std::size_t output_contiguous_size = std::accumulate(outDims.cbegin()+contiguousIdx, outDims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + + // initialize strides to iterate through data because of broadcasting + std::unique_ptr<std::int32_t[]> stride_post0 = std::make_unique<std::int32_t[]>(contiguousIdx); + std::unique_ptr<std::int32_t[]> stride_post1 = std::make_unique<std::int32_t[]>(contiguousIdx); + std::unique_ptr<std::int32_t[]> stride_step0 = std::make_unique<std::int32_t[]>(contiguousIdx); + std::unique_ptr<std::int32_t[]> stride_step1 = std::make_unique<std::int32_t[]>(contiguousIdx); + if (contiguousIdx > 0) { + stride_post0[contiguousIdx - 1] = 1; + stride_post1[contiguousIdx - 1] = 1; + for (std::size_t i = contiguousIdx - 2; i != static_cast<std::size_t>(-1); --i) { + stride_post0[i] = stride_post0[i+1]*static_cast<std::int32_t>(dims0[i+1]); + stride_post1[i] = stride_post1[i+1]*static_cast<std::int32_t>(dims1[i+1]); + } + for (std::size_t i = 0; i != contiguousIdx; ++i) { + stride_step0[i] = (dims0[i] == 1) ? 1 - stride_post0[i] : 1; + stride_step1[i] = (dims1[i] == 1) ? 1 - stride_post1[i] : 1; + } + } + + // variables for arrays offsets + std::size_t offsetIn0 = 0; + std::size_t offsetIn1 = 0; + std::size_t offsetOut = 0; + + + std::size_t dim = contiguousIdx - 1; + const std::size_t nbStacks = std::accumulate(outDims.cbegin(), outDims.cbegin() + contiguousIdx, std::size_t(1), std::multiplies<std::size_t>()); + for (std::size_t stack = 0; stack < nbStacks;) { + impl.forward(opTensor.fmod(), input0_contiguous_size, input1_contiguous_size, output_contiguous_size, + getCPUPtr(mOp.getRawInput(0), offsetIn0*input0_contiguous_size), + getCPUPtr(mOp.getRawInput(1), offsetIn1*input1_contiguous_size), + getCPUPtr(mOp.getRawOutput(0), offsetOut*output_contiguous_size)); + if (++stack < nbStacks) { + std::size_t tmp_stack = stack; + while(tmp_stack % outDims[dim] == 0) { + tmp_stack /= outDims[dim]; + dim--; + } + offsetIn0 += stride_step0[dim]; + offsetIn1 += stride_step1[dim]; + ++offsetOut; + dim = contiguousIdx - 1; + } + } +} + +template <> +void Aidge::ModImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Mod_Op on backend cpu"); +} -- GitLab From 1c023cb2c416ad660acef30a6e913b8b40c1c8d5 Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Sun, 16 Feb 2025 16:39:05 +0100 Subject: [PATCH 13/22] Fixed typo --- include/aidge/backend/cpu/operator/AbsImpl_kernels.hpp | 4 ++-- .../aidge/backend/cpu/operator/AtanImpl_kernels.hpp | 8 ++++---- include/aidge/backend/cpu/operator/ErfImpl_kernels.hpp | 4 ++-- .../backend/cpu/operator/HeavisideImpl_kernels.hpp | 4 ++-- .../backend/cpu/operator/LeakyReLUImpl_kernels.hpp | 8 ++++---- include/aidge/backend/cpu/operator/LnImpl_kernels.hpp | 10 +++++----- .../aidge/backend/cpu/operator/ReLUImpl_kernels.hpp | 10 +++++----- .../aidge/backend/cpu/operator/RoundImpl_kernels.hpp | 4 ++-- .../aidge/backend/cpu/operator/ScalingImpl_kernels.hpp | 4 ++-- .../aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp | 10 +++++----- .../aidge/backend/cpu/operator/SqrtImpl_kernels.hpp | 8 ++++---- .../aidge/backend/cpu/operator/TanhImpl_kernels.hpp | 10 +++++----- 12 files changed, 42 insertions(+), 42 deletions(-) diff --git a/include/aidge/backend/cpu/operator/AbsImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AbsImpl_kernels.hpp index 16e5f9de..e6474cf2 100644 --- a/include/aidge/backend/cpu/operator/AbsImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AbsImpl_kernels.hpp @@ -20,14 +20,14 @@ namespace Aidge { template <class I, class O> -void AbsImpl_cpu_forward_kernel(std::size_t inputLenght, +void AbsImpl_cpu_forward_kernel(std::size_t inputLength, const void* input_, void* output_) { const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); - for (std::size_t i = 0; i < inputLenght; ++i) { + for (std::size_t i = 0; i < inputLength; ++i) { output[i] = std::abs(input[i]); } } diff --git a/include/aidge/backend/cpu/operator/AtanImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AtanImpl_kernels.hpp index 2a786339..141e5b60 100644 --- a/include/aidge/backend/cpu/operator/AtanImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AtanImpl_kernels.hpp @@ -20,20 +20,20 @@ namespace Aidge { template <class I, class O> -void AtanImpl_cpu_forward_kernel(std::size_t inputLenght, +void AtanImpl_cpu_forward_kernel(std::size_t inputLength, const void* input_, void* output_) { const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); - for (size_t i = 0; i < inputLenght; ++i) { + for (size_t i = 0; i < inputLength; ++i) { output[i] = static_cast<O>(atan(input[i])); } } template <class O, class GI, class GO> -void AtanImpl_cpu_backward_kernel(const std::size_t inputLenght, +void AtanImpl_cpu_backward_kernel(const std::size_t inputLength, const void* output_, const void* grad_output_, void* grad_input_) { const O* output = static_cast<const O*>(output_); @@ -41,7 +41,7 @@ void AtanImpl_cpu_backward_kernel(const std::size_t inputLenght, GI* grad_input = static_cast<GI*>(grad_input_); // Apply the derivative of atan for each element in the input array - for (size_t i = 0; i < inputLenght; ++i) { + for (size_t i = 0; i < inputLength; ++i) { // dx = dy * (1 / (1 + x^2)) grad_input[i] = grad_output[i] * static_cast<O>(1.0 / (1.0 + output[i] * output[i])); } diff --git a/include/aidge/backend/cpu/operator/ErfImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ErfImpl_kernels.hpp index 02041f55..709f4a6f 100644 --- a/include/aidge/backend/cpu/operator/ErfImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ErfImpl_kernels.hpp @@ -20,14 +20,14 @@ namespace Aidge { template <class I, class O> -void ErfImpl_cpu_forward_kernel(std::size_t inputLenght, +void ErfImpl_cpu_forward_kernel(std::size_t inputLength, const void* input_, void* output_) { const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); - for (std::size_t i = 0; i < inputLenght; ++i) { + for (std::size_t i = 0; i < inputLength; ++i) { output[i] = std::erf(input[i]); } } diff --git a/include/aidge/backend/cpu/operator/HeavisideImpl_kernels.hpp b/include/aidge/backend/cpu/operator/HeavisideImpl_kernels.hpp index 3fd6ca7d..06d7fff8 100644 --- a/include/aidge/backend/cpu/operator/HeavisideImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/HeavisideImpl_kernels.hpp @@ -23,14 +23,14 @@ namespace Aidge { template <class I, class O> -void HeavisideImplCpuForwardKernel(std::size_t inputLenght, +void HeavisideImplCpuForwardKernel(std::size_t inputLength, const void *input_, void *output_, const float value) { const I *input = static_cast<const I *>(input_); O *output = static_cast<O *>(output_); - for (std::size_t i = 0; i < inputLenght; ++i) { + for (std::size_t i = 0; i < inputLength; ++i) { output[i] = (input[i] > 0) ? 1 : (input[i] == 0 ? value : 0); } } diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl_kernels.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl_kernels.hpp index bc856f70..7afd8298 100644 --- a/include/aidge/backend/cpu/operator/LeakyReLUImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl_kernels.hpp @@ -19,7 +19,7 @@ namespace Aidge { template <class I, class O> void LeakyReLUImpl_cpu_forward_kernel(const float negativeSlope_, - std::size_t inputLenght, + std::size_t inputLength, const void* input_, void* output_) { @@ -27,14 +27,14 @@ void LeakyReLUImpl_cpu_forward_kernel(const float negativeSlope_, O* output = static_cast<O*>(output_); const I negativeSlope = static_cast<const I>(negativeSlope_); - for (std::size_t i = 0; i < inputLenght; ++i) { + for (std::size_t i = 0; i < inputLength; ++i) { output[i] = (input[i] >= 0) ? input[i] : input[i] * negativeSlope; } } template <class I, class O> void LeakyReLUImpl_cpu_backward_kernel(const float negativeSlope_, - std::size_t inputLenght, + std::size_t inputLength, const void* input_, void* output_) { @@ -42,7 +42,7 @@ void LeakyReLUImpl_cpu_backward_kernel(const float negativeSlope_, O* output = static_cast<O*>(output_); const I negativeSlope = static_cast<const I>(negativeSlope_); - for (std::size_t i = 0; i < inputLenght; ++i) { + for (std::size_t i = 0; i < inputLength; ++i) { output[i] = (input[i] > 0) ? input[i] : negativeSlope*input[i]; } } diff --git a/include/aidge/backend/cpu/operator/LnImpl_kernels.hpp b/include/aidge/backend/cpu/operator/LnImpl_kernels.hpp index b30b05bb..ee2864b6 100755 --- a/include/aidge/backend/cpu/operator/LnImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/LnImpl_kernels.hpp @@ -18,7 +18,7 @@ namespace Aidge { template <class I, class O> -void LnImpl_cpu_forward_kernel(std::size_t inputLenght, +void LnImpl_cpu_forward_kernel(std::size_t inputLength, const void* input_, void* output_) { @@ -26,8 +26,8 @@ void LnImpl_cpu_forward_kernel(std::size_t inputLenght, O* output = static_cast<O*>(output_); const float eps = 1.0e-20f; -//#pragma omp parallel for if (inputLenght > 1024) - for (std::size_t i = 0; i < inputLenght; ++i) { +//#pragma omp parallel for if (inputLength > 1024) + for (std::size_t i = 0; i < inputLength; ++i) { if (input[i] > I(eps)) { output[i] = std::log(input[i]); } else { @@ -37,7 +37,7 @@ void LnImpl_cpu_forward_kernel(std::size_t inputLenght, } template <class I, class GI, class GO> -void LnImpl_cpu_backward_kernel(const std::size_t inputLenght, +void LnImpl_cpu_backward_kernel(const std::size_t inputLength, const void* input_, const void* grad_output_, void* grad_input_) { @@ -46,7 +46,7 @@ void LnImpl_cpu_backward_kernel(const std::size_t inputLenght, GI* grad_input = static_cast<GI*>(grad_input_); const float eps = 1.0e-20f; - for (std::size_t i = 0; i < inputLenght; ++i) { + for (std::size_t i = 0; i < inputLength; ++i) { if (input[i] > I(eps)) { grad_input[i] = grad_output[i] / input[i]; } else { diff --git a/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp index e39e9b7d..bb5d7cc3 100644 --- a/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp @@ -26,27 +26,27 @@ namespace Aidge { // Kernels template <class I, class O> -void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght, +void ReLUImpl_cpu_forward_kernel(std::size_t inputLength, const void* input_, void* output_) { const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); -//#pragma omp parallel for if (inputLenght > 1024) - for (std::size_t i = 0; i < inputLenght; ++i) { +//#pragma omp parallel for if (inputLength > 1024) + for (std::size_t i = 0; i < inputLength; ++i) { output[i] = (input[i] > 0) ? input[i] : 0; } } template <class I, class GI, class GO> -void ReLUImpl_cpu_backward_kernel(const std::size_t inputLenght, +void ReLUImpl_cpu_backward_kernel(const std::size_t inputLength, const void* input_, const void* grad_output_, void* grad_input_) { const I* input = static_cast<const I*>(input_); const GO* grad_output = static_cast<const GO*>(grad_output_); GI* grad_input = static_cast<GI*>(grad_input_); - for (std::size_t i = 0; i < inputLenght; ++i) { + for (std::size_t i = 0; i < inputLength; ++i) { grad_input[i] = (input[i] > 0) ? grad_output[i] : 0; } } diff --git a/include/aidge/backend/cpu/operator/RoundImpl_kernels.hpp b/include/aidge/backend/cpu/operator/RoundImpl_kernels.hpp index ba9c63bc..7ac4319b 100644 --- a/include/aidge/backend/cpu/operator/RoundImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/RoundImpl_kernels.hpp @@ -21,14 +21,14 @@ namespace Aidge { template <class I, class O> -void RoundImpl_cpu_forward_kernel(const std::size_t inputLenght, +void RoundImpl_cpu_forward_kernel(const std::size_t inputLength, const void* input_, void* output_) { const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); - for (std::size_t i = 0; i < inputLenght; ++i) { + for (std::size_t i = 0; i < inputLength; ++i) { //std::round would not work since it doesn't follow the halves rules (See ONNX Round) output[i] = static_cast<O>(std::nearbyint(static_cast<float>(input[i]))); } diff --git a/include/aidge/backend/cpu/operator/ScalingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ScalingImpl_kernels.hpp index c758c9cf..f9ca00b7 100644 --- a/include/aidge/backend/cpu/operator/ScalingImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ScalingImpl_kernels.hpp @@ -76,14 +76,14 @@ template <class I, class O> void ScalingImpl_cpu_forward_kernel(const float scalingFactor, const std::size_t quantizedNbBits, const bool isOutputUnsigned, - std::size_t inputLenght, + std::size_t inputLength, const void* input_, void* output_) { const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); - for (std::size_t i = 0; i < inputLenght; ++i) { + for (std::size_t i = 0; i < inputLength; ++i) { output[i] = static_cast<O>(input[i] * static_cast<I>(scalingFactor)); if(quantizedNbBits > 0) { diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp index dfd71ce0..83ad4575 100644 --- a/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp @@ -18,15 +18,15 @@ namespace Aidge { template <class I, class O> -void SigmoidImpl_cpu_forward_kernel(std::size_t inputLenght, +void SigmoidImpl_cpu_forward_kernel(std::size_t inputLength, const void* input_, void* output_) { const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); -//#pragma omp parallel for if (inputLenght > 1024) - for (std::size_t i = 0; i < inputLenght; ++i) { +//#pragma omp parallel for if (inputLength > 1024) + for (std::size_t i = 0; i < inputLength; ++i) { if (input[i] > I(0)) { output[i] = O(1) / (O(1) + std::exp(-input[i])); } else { @@ -36,13 +36,13 @@ void SigmoidImpl_cpu_forward_kernel(std::size_t inputLenght, } template <class O, class GI, class GO> -void SigmoidImpl_cpu_backward_kernel(const std::size_t inputLenght, +void SigmoidImpl_cpu_backward_kernel(const std::size_t inputLength, const void* output_, const void* grad_output_, void* grad_input_) { const O* output = static_cast<const O*>(output_); const GO* grad_output = static_cast<const GO*>(grad_output_); GI* grad_input = static_cast<GI*>(grad_input_); - for (std::size_t i = 0; i < inputLenght; ++i) { + for (std::size_t i = 0; i < inputLength; ++i) { grad_input[i] = output[i] * (O(1) - output[i]) * grad_output[i]; } } diff --git a/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp index 0464119c..1ce1ef9b 100644 --- a/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp @@ -21,27 +21,27 @@ namespace Aidge { template <class I, class O> -void SqrtImpl_cpu_forward_kernel(const std::size_t inputLenght, +void SqrtImpl_cpu_forward_kernel(const std::size_t inputLength, const void* input_, void* output_) { const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); - for (std::size_t i = 0; i < inputLenght; ++i) { + for (std::size_t i = 0; i < inputLength; ++i) { output[i] = static_cast<O>(std::sqrt(static_cast<float>(input[i]))); } } template <class I, class O> -void SqrtImpl_cpu_backward_kernel(const std::size_t inputLenght, +void SqrtImpl_cpu_backward_kernel(const std::size_t inputLength, const void* input_, void* output_) { const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); - for (std::size_t i = 0; i < inputLenght; ++i) { + for (std::size_t i = 0; i < inputLength; ++i) { output[i] = static_cast<O>(0.5/(std::sqrt(static_cast<float>(input[i])))); } } diff --git a/include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp b/include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp index fdcac210..49cfe9cb 100644 --- a/include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp @@ -18,27 +18,27 @@ namespace Aidge { template <class I, class O> -void TanhImpl_cpu_forward_kernel(std::size_t inputLenght, +void TanhImpl_cpu_forward_kernel(std::size_t inputLength, const void* input_, void* output_) { const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); -//#pragma omp parallel for if (inputLenght > 1024) - for (std::size_t i = 0; i < inputLenght; ++i) { +//#pragma omp parallel for if (inputLength > 1024) + for (std::size_t i = 0; i < inputLength; ++i) { output[i] = std::tanh(input[i]); } } template <class O, class GI, class GO> -void TanhImpl_cpu_backward_kernel(const std::size_t inputLenght, +void TanhImpl_cpu_backward_kernel(const std::size_t inputLength, const void* output_, const void* grad_output_, void* grad_input_) { const O* output = static_cast<const O*>(output_); const GO* grad_output = static_cast<const GO*>(grad_output_); GI* grad_input = static_cast<GI*>(grad_input_); - for (std::size_t i = 0; i < inputLenght; ++i) { + for (std::size_t i = 0; i < inputLength; ++i) { grad_input[i] = (O(1) - output[i] * output[i]) * grad_output[i]; } } -- GitLab From 06c6f8b120b9cbc772ae367ea9827a0e7f8bf040 Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Sun, 16 Feb 2025 16:42:03 +0100 Subject: [PATCH 14/22] Fixed missing include --- unit_tests/operator/Test_MetaOperator.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/unit_tests/operator/Test_MetaOperator.cpp b/unit_tests/operator/Test_MetaOperator.cpp index 4fe39630..adc548b9 100644 --- a/unit_tests/operator/Test_MetaOperator.cpp +++ b/unit_tests/operator/Test_MetaOperator.cpp @@ -18,6 +18,7 @@ #include "aidge/backend/cpu/operator/ConvImpl.hpp" #include "aidge/backend/cpu/operator/PadImpl.hpp" +#include "aidge/backend/cpu/operator/TanhImpl.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/filler/Filler.hpp" #include "aidge/operator/Conv.hpp" -- GitLab From 79e60036680239d2ee9d41c5e56bb6742f740585 Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Sun, 16 Feb 2025 17:53:50 +0100 Subject: [PATCH 15/22] Working concept --- CMakeLists.txt | 12 ++++ include/aidge/backend/cpu.hpp | 2 + .../backend/cpu/operator/CryptoHashImpl.hpp | 36 +++++++++++ .../cpu/operator/CryptoHashImpl_kernels.hpp | 52 ++++++++++++++++ .../backend/cpu/operator/ModImpl_kernels.hpp | 3 + src/operator/CryptoHashImpl.cpp | 46 +++++++++++++++ unit_tests/operator/Test_CryptoHash.cpp | 56 ++++++++++++++++++ unit_tests/scheduler/Test_Scheduler.cpp | 59 +++++++++++++++++++ 8 files changed, 266 insertions(+) create mode 100644 include/aidge/backend/cpu/operator/CryptoHashImpl.hpp create mode 100644 include/aidge/backend/cpu/operator/CryptoHashImpl_kernels.hpp create mode 100644 src/operator/CryptoHashImpl.cpp create mode 100644 unit_tests/operator/Test_CryptoHash.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 66ef8ff2..2d4bc8ec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,6 +64,14 @@ if(NOT $ENV{AIDGE_INSTALL} STREQUAL "") endif() find_package(aidge_core REQUIRED) +find_package(OpenSSL QUIET) +if(OpenSSL_FOUND) + message(STATUS "OpenSSL found: ${OPENSSL_VERSION}") + add_definitions(-DWITH_OPENSSL) +else() + message(WARNING "OpenSSL not found, SHA256 will not be available.") +endif() + ############################################## # Create target and set properties file(GLOB_RECURSE src_files "src/*.cpp") @@ -112,6 +120,10 @@ target_include_directories(${module_name} ${CMAKE_CURRENT_SOURCE_DIR}/src ) +if(OpenSSL_FOUND) + target_link_libraries(${module_name} PRIVATE OpenSSL::SSL OpenSSL::Crypto) +endif() + target_compile_features(${module_name} PRIVATE cxx_std_14) target_compile_options(${module_name} PRIVATE diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index ffc03ae5..80574b4a 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -28,6 +28,7 @@ #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" #include "aidge/backend/cpu/operator/ConvImpl.hpp" #include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp" +#include "aidge/backend/cpu/operator/CryptoHashImpl.hpp" #include "aidge/backend/cpu/operator/DivImpl.hpp" #include "aidge/backend/cpu/operator/EqualImpl.hpp" #include "aidge/backend/cpu/operator/ErfImpl.hpp" @@ -40,6 +41,7 @@ #include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" #include "aidge/backend/cpu/operator/LnImpl.hpp" #include "aidge/backend/cpu/operator/MatMulImpl.hpp" +#include "aidge/backend/cpu/operator/ModImpl.hpp" #include "aidge/backend/cpu/operator/MulImpl.hpp" #include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/backend/cpu/operator/PaddedConvImpl.hpp" diff --git a/include/aidge/backend/cpu/operator/CryptoHashImpl.hpp b/include/aidge/backend/cpu/operator/CryptoHashImpl.hpp new file mode 100644 index 00000000..d7f07f99 --- /dev/null +++ b/include/aidge/backend/cpu/operator/CryptoHashImpl.hpp @@ -0,0 +1,36 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_TANHIMPL_H_ +#define AIDGE_CPU_OPERATOR_TANHIMPL_H_ + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/CryptoHash.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include <memory> +#include <vector> + +#ifdef WITH_OPENSSL +#include <openssl/sha.h> + +namespace Aidge { +// Operator implementation entry point for the backend +using CryptoHashImpl_cpu = OperatorImpl_cpu<CryptoHash_Op, + void(const std::size_t, const void*, void*)>; + +// Implementation entry point registration to Operator +REGISTRAR(CryptoHash_Op, "cpu", Aidge::CryptoHashImpl_cpu::create); +} // namespace Aidge +#endif + +#endif /* AIDGE_CPU_OPERATOR_TANHIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/CryptoHashImpl_kernels.hpp b/include/aidge/backend/cpu/operator/CryptoHashImpl_kernels.hpp new file mode 100644 index 00000000..cd596b69 --- /dev/null +++ b/include/aidge/backend/cpu/operator/CryptoHashImpl_kernels.hpp @@ -0,0 +1,52 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_CRYPTOHASHIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_CRYPTOHASHIMPL_KERNELS_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/CryptoHashImpl.hpp" + +#ifdef WITH_OPENSSL +namespace Aidge { +template <class I, class O> +void CryptoHashImpl_cpu_forward_kernel(std::size_t inputLength, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + // output must be at least SHA256_DIGEST_LENGTH bytes length + SHA256(reinterpret_cast<const uint8_t*>(input), inputLength * sizeof(I), reinterpret_cast<uint8_t*>(output)); +} + +// Kernels registration to implementation entry point +REGISTRAR(CryptoHashImpl_cpu, + {{DataType::UInt8, DataFormat::Any}, {DataType::UInt8}}, + {ProdConso::inPlaceModel, Aidge::CryptoHashImpl_cpu_forward_kernel<uint8_t, uint8_t>, nullptr}); +REGISTRAR(CryptoHashImpl_cpu, + {{DataType::UInt8, DataFormat::Any}, {DataType::UInt64}}, + {ProdConso::inPlaceModel, Aidge::CryptoHashImpl_cpu_forward_kernel<uint8_t, uint64_t>, nullptr}); +REGISTRAR(CryptoHashImpl_cpu, + {{DataType::Float32, DataFormat::Any}, {DataType::UInt8}}, + {ProdConso::inPlaceModel, Aidge::CryptoHashImpl_cpu_forward_kernel<float, uint8_t>, nullptr}); +REGISTRAR(CryptoHashImpl_cpu, + {{DataType::Float32, DataFormat::Any}, {DataType::UInt64}}, + {ProdConso::inPlaceModel, Aidge::CryptoHashImpl_cpu_forward_kernel<float, uint64_t>, nullptr}); +REGISTRAR(CryptoHashImpl_cpu, + {{DataType::Float64, DataFormat::Any}, {DataType::UInt8}}, + {ProdConso::inPlaceModel, Aidge::CryptoHashImpl_cpu_forward_kernel<double, uint8_t>, nullptr}); +} // namespace Aidge +#endif + +#endif /* AIDGE_CPU_OPERATOR_CRYPTOHASHIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/ModImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ModImpl_kernels.hpp index 940fa482..15d18bf4 100644 --- a/include/aidge/backend/cpu/operator/ModImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ModImpl_kernels.hpp @@ -72,6 +72,9 @@ REGISTRAR(ModImpl_cpu, REGISTRAR(ModImpl_cpu, {DataType::Int32}, {ProdConso::inPlaceModel, Aidge::ModImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, nullptr}); +REGISTRAR(ModImpl_cpu, + {DataType::UInt64}, + {ProdConso::inPlaceModel, Aidge::ModImpl_cpu_forward_kernel<std::uint64_t, std::uint64_t, std::uint64_t>, nullptr}); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_MODIMPL_KERNELS_H_ */ diff --git a/src/operator/CryptoHashImpl.cpp b/src/operator/CryptoHashImpl.cpp new file mode 100644 index 00000000..10d82dd0 --- /dev/null +++ b/src/operator/CryptoHashImpl.cpp @@ -0,0 +1,46 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <chrono> // std::chrono::milliseconds +#include <numeric> // std::accumulate +#include <thread> // std::this_thread::sleep_for +#include <vector> + +#include "aidge/operator/CryptoHash.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" + +#include "aidge/backend/cpu/operator/CryptoHashImpl.hpp" +#include "aidge/backend/cpu/operator/CryptoHashImpl_kernels.hpp" + +#ifdef WITH_OPENSSL +template <> +void Aidge::CryptoHashImpl_cpu::forward() { + const CryptoHash_Op& op_ = dynamic_cast<const CryptoHash_Op&>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + AIDGE_ASSERT(in0, "missing input #0"); + + // Find the correct kernel type + const auto impl = Registrar<CryptoHashImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.forward(in0->size(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); +} + +template <> +void Aidge::CryptoHashImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not available for CryptoHash_Op"); +} +#endif diff --git a/unit_tests/operator/Test_CryptoHash.cpp b/unit_tests/operator/Test_CryptoHash.cpp new file mode 100644 index 00000000..7453ea19 --- /dev/null +++ b/unit_tests/operator/Test_CryptoHash.cpp @@ -0,0 +1,56 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cmath> // std::abs +#include <cstddef> // std::size_t +#include <memory> + +#include <catch2/catch_test_macros.hpp> + +#include "aidge/backend/cpu/operator/CryptoHashImpl.hpp" +#include "aidge/data/Data.hpp" +#include "aidge/data/Tensor.hpp" +#include "aidge/graph/Node.hpp" +#include "aidge/operator/CryptoHash.hpp" +#include "aidge/utils/ArrayHelpers.hpp" + +using namespace Aidge; + +#ifdef WITH_OPENSSL +TEST_CASE("[cpu/operator] CryptoHash(forward)") { + SECTION("1D Tensor") { + std::shared_ptr<Tensor> input0 = + std::make_shared<Tensor>(Array1D<uint8_t, 5>{ + {'a', 'b', 'c', 'd', 'e'}}); + std::shared_ptr<Tensor> expectedOutput = + std::make_shared<Tensor>(Array1D<uint8_t, 32>{ + {0x36, 0xbb, 0xe5, 0x0e, 0xd9, 0x68, 0x41, 0xd1, + 0x04, 0x43, 0xbc, 0xb6, 0x70, 0xd6, 0x55, 0x4f, + 0x0a, 0x34, 0xb7, 0x61, 0xbe, 0x67, 0xec, 0x9c, + 0x4a, 0x8a, 0xd2, 0xc0, 0xc4, 0x4c, 0xa4, 0x2c}}); + + std::shared_ptr<Node> myCryptoHash = CryptoHash(); + auto op = std::static_pointer_cast<CryptoHash_Op>(myCryptoHash->getOperator()); + op->associateInput(0, input0); + op->setDataType(DataType::UInt8); + op->setBackend("cpu"); + myCryptoHash->forward(); + + REQUIRE(op->getOutput(0)->size() == 32); + + uint8_t* resPtr = static_cast<uint8_t*>(op->getOutput(0)->getImpl()->rawPtr()); + uint8_t* expectedPtr = static_cast<uint8_t*>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i < expectedOutput->size(); ++i) { + REQUIRE(resPtr[i] == expectedPtr[i]); + } + } +} +#endif diff --git a/unit_tests/scheduler/Test_Scheduler.cpp b/unit_tests/scheduler/Test_Scheduler.cpp index 956169c3..5bd86eec 100644 --- a/unit_tests/scheduler/Test_Scheduler.cpp +++ b/unit_tests/scheduler/Test_Scheduler.cpp @@ -21,6 +21,10 @@ #include "aidge/operator/Pop.hpp" #include "aidge/operator/Stack.hpp" #include "aidge/operator/Identity.hpp" +#include "aidge/operator/CryptoHash.hpp" +#include "aidge/operator/Mod.hpp" +#include "aidge/operator/Tanh.hpp" +#include "aidge/operator/Select.hpp" #include "aidge/operator/MetaOperator.hpp" #include "aidge/scheduler/SequentialScheduler.hpp" #include "aidge/scheduler/ParallelScheduler.hpp" @@ -30,6 +34,9 @@ #include "aidge/backend/cpu/operator/ReLUImpl.hpp" #include "aidge/backend/cpu/operator/SqrtImpl.hpp" #include "aidge/backend/cpu/operator/AddImpl.hpp" +#include "aidge/backend/cpu/operator/CryptoHashImpl.hpp" +#include "aidge/backend/cpu/operator/ModImpl.hpp" +#include "aidge/backend/cpu/operator/TanhImpl.hpp" #include "aidge/recipes/GraphViewHelper.hpp" @@ -512,4 +519,56 @@ TEST_CASE("[cpu/scheduler] Accumulate", "[scheduler]") { std::shared_ptr<Tensor> output = std::static_pointer_cast<OperatorTensor>(pop_o->getOperator())->getOutput(0); REQUIRE(*output == *expectedOutput); } + +#ifdef WITH_OPENSSL +TEST_CASE("[cpu/scheduler] Select", "[scheduler]") { + std::shared_ptr<Tensor> in = std::make_shared<Tensor>( + Array2D<float, 2, 3>{{{1, 2, 3}, {4, 5, 6}}}); + + std::shared_ptr<GraphView> g = Sequential({ + Producer(in, "input"), + Parallel({ + Sequential({ + CryptoHash("hash"), + Mod("mod") + }), + ReLU("relu"), + Tanh("tanh"), + Sqrt("sqrt") + }), + Select(3, "select") + }); + + auto modProd = Producer(std::make_shared<Tensor>(Array1D<uint64_t, 1>{{3}})); + modProd->addChild(g->getNode("mod"), 0, 1); + g->add(modProd); + + g->getNode("hash")->getOperator()->setDataType(DataType::UInt64); + g->getNode("mod")->getOperator()->setDataType(DataType::UInt64); + g->setBackend("cpu"); + g->save("select"); + + auto scheduler = SequentialScheduler(g); + scheduler.generateScheduling(); + scheduler.saveStaticSchedulingDiagram("select_scheduling"); + REQUIRE_NOTHROW(scheduler.forward(true)); + + g->save("select_forwarded"); + + auto expectedOutputHash = std::make_shared<Tensor>( + Array1D<uint64_t, 4>{{0x1b7cf58dfe2dae24, 0x3bac903def4ce580, 0x5f5a347389d97f41, 0x2c2dc759abc6b61}}); + auto outputHash = std::static_pointer_cast<OperatorTensor>(g->getNode("hash")->getOperator())->getOutput(0); + REQUIRE(*outputHash == *expectedOutputHash); + + auto expectedOutputMod = std::make_shared<Tensor>( + Array1D<uint64_t, 4>{{2, 1, 1, 2}}); + auto outputMod = std::static_pointer_cast<OperatorTensor>(g->getNode("mod")->getOperator())->getOutput(0); + REQUIRE(*outputMod == *expectedOutputMod); + + auto expectedOutput = std::make_shared<Tensor>( + Array2D<float, 2, 3>{{{std::sqrt(1), std::sqrt(2), std::sqrt(3)}, {std::sqrt(4), std::sqrt(5), std::sqrt(6)}}}); + auto output = std::static_pointer_cast<OperatorTensor>(g->getNode("select")->getOperator())->getOutput(0); + REQUIRE(*output == *expectedOutput); +} +#endif } // namespace Aidge -- GitLab From 652eb2810fd6a0a0360f8881cd3e9b41343d8340 Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Sun, 16 Feb 2025 23:46:45 +0100 Subject: [PATCH 16/22] Working concept of with tagConditionalNodes() --- unit_tests/scheduler/Test_Scheduler.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/unit_tests/scheduler/Test_Scheduler.cpp b/unit_tests/scheduler/Test_Scheduler.cpp index 5bd86eec..54e57ec4 100644 --- a/unit_tests/scheduler/Test_Scheduler.cpp +++ b/unit_tests/scheduler/Test_Scheduler.cpp @@ -569,6 +569,24 @@ TEST_CASE("[cpu/scheduler] Select", "[scheduler]") { Array2D<float, 2, 3>{{{std::sqrt(1), std::sqrt(2), std::sqrt(3)}, {std::sqrt(4), std::sqrt(5), std::sqrt(6)}}}); auto output = std::static_pointer_cast<OperatorTensor>(g->getNode("select")->getOperator())->getOutput(0); REQUIRE(*output == *expectedOutput); + + scheduler.resetScheduling(); + scheduler.tagConditionalNodes(); + + REQUIRE(g->getNode("relu")->attributes()->hasAttr("schedule.cond")); + REQUIRE(g->getNode("relu")->attributes()->getAttr<std::set<std::pair<NodePtr, size_t>>>("schedule.cond") + == std::set<std::pair<NodePtr, size_t>>{{g->getNode("select"), 0}}); + REQUIRE(g->getNode("tanh")->attributes()->hasAttr("schedule.cond")); + REQUIRE(g->getNode("tanh")->attributes()->getAttr<std::set<std::pair<NodePtr, size_t>>>("schedule.cond") + == std::set<std::pair<NodePtr, size_t>>{{g->getNode("select"), 1}}); + REQUIRE(g->getNode("sqrt")->attributes()->hasAttr("schedule.cond")); + REQUIRE(g->getNode("sqrt")->attributes()->getAttr<std::set<std::pair<NodePtr, size_t>>>("schedule.cond") + == std::set<std::pair<NodePtr, size_t>>{{g->getNode("select"), 2}}); + REQUIRE(!g->getNode("input")->attributes()->hasAttr("schedule.cond")); + + scheduler.generateScheduling(); + scheduler.saveStaticSchedulingDiagram("select_scheduling_tag"); + REQUIRE_NOTHROW(scheduler.forward(true)); } #endif } // namespace Aidge -- GitLab From e13b5fa521c5f979602e41236ece5795eaed8635 Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Thu, 20 Feb 2025 09:09:30 +0100 Subject: [PATCH 17/22] Export OpenSSL dependency --- CMakeLists.txt | 2 ++ aidge_backend_cpu-config.cmake.in | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2d4bc8ec..729853ee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -120,8 +120,10 @@ target_include_directories(${module_name} ${CMAKE_CURRENT_SOURCE_DIR}/src ) +set(AIDGE_REQUIRES_OPENSSL FALSE) if(OpenSSL_FOUND) target_link_libraries(${module_name} PRIVATE OpenSSL::SSL OpenSSL::Crypto) + set(AIDGE_REQUIRES_OPENSSL TRUE) endif() target_compile_features(${module_name} PRIVATE cxx_std_14) diff --git a/aidge_backend_cpu-config.cmake.in b/aidge_backend_cpu-config.cmake.in index d8e1372b..7582102c 100644 --- a/aidge_backend_cpu-config.cmake.in +++ b/aidge_backend_cpu-config.cmake.in @@ -2,6 +2,10 @@ include(CMakeFindDependencyMacro) find_dependency(aidge_core) +set(AIDGE_REQUIRES_OPENSSL @AIDGE_REQUIRES_OPENSSL@) +if (AIDGE_REQUIRES_OPENSSL) + find_dependency(OpenSSL) +endif() include(CMakeFindDependencyMacro) -- GitLab From f53364a0301fc933ec2e48d6c7f5488a76470d77 Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Fri, 21 Feb 2025 14:49:00 +0100 Subject: [PATCH 18/22] Renaming --- aidge_backend_cpu/unit_tests/test_scheduler.py | 8 ++++---- unit_tests/operator/Test_MetaOperator.cpp | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/aidge_backend_cpu/unit_tests/test_scheduler.py b/aidge_backend_cpu/unit_tests/test_scheduler.py index 494f3456..b60ff3f0 100644 --- a/aidge_backend_cpu/unit_tests/test_scheduler.py +++ b/aidge_backend_cpu/unit_tests/test_scheduler.py @@ -57,9 +57,9 @@ class test_scheduler(unittest.TestCase): scheduler = aidge_core.SequentialScheduler(graph_view) scheduler.generate_scheduling() - self.assertEqual(len(scheduler.get_static_scheduling()), 10) + self.assertEqual(len(scheduler.get_sequential_static_scheduling()), 10) # Do not care about the order of execution of the producers - self.assertListEqual([i.name() for i in scheduler.get_static_scheduling()[-3:]], EXPECTED_SCHEDULE) + self.assertListEqual([i.name() for i in scheduler.get_sequential_static_scheduling()[-3:]], EXPECTED_SCHEDULE) def test_parallel_scheduling(self): @@ -83,9 +83,9 @@ class test_scheduler(unittest.TestCase): scheduler = aidge_core.SequentialScheduler(graph_view) scheduler.generate_scheduling() - self.assertEqual(len(scheduler.get_static_scheduling()), 11) + self.assertEqual(len(scheduler.get_sequential_static_scheduling()), 11) # Do not care about the order of execution of the producers - self.assertTrue([i.name() for i in scheduler.get_static_scheduling()[-4:]] in EXPECTED_SCHEDULE) + self.assertTrue([i.name() for i in scheduler.get_sequential_static_scheduling()[-4:]] in EXPECTED_SCHEDULE) if __name__ == '__main__': unittest.main() diff --git a/unit_tests/operator/Test_MetaOperator.cpp b/unit_tests/operator/Test_MetaOperator.cpp index adc548b9..bb9027d3 100644 --- a/unit_tests/operator/Test_MetaOperator.cpp +++ b/unit_tests/operator/Test_MetaOperator.cpp @@ -279,9 +279,9 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { REQUIRE(op->getNbConsumedData(1).data == 32768); REQUIRE(op->getNbProducedData(0).data == 34816); REQUIRE(op->getNbProducedData(1).data == 34816); - REQUIRE(microGraphScheduler->getStaticScheduling(0).size() == 26); - REQUIRE(microGraphScheduler->getStaticScheduling(1).size() == 24); - REQUIRE(microGraphScheduler->getStaticScheduling(15).size() == 24); + REQUIRE(microGraphScheduler->getSequentialStaticScheduling(0).size() == 26); + REQUIRE(microGraphScheduler->getSequentialStaticScheduling(1).size() == 24); + REQUIRE(microGraphScheduler->getSequentialStaticScheduling(15).size() == 24); } SECTION("LSTM(forward_values)") { -- GitLab From 47bb2b3f69b7642f5d22b62d81559347ebd4b6ff Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Mon, 24 Feb 2025 13:49:17 +0000 Subject: [PATCH 19/22] Fix some imports following aidge_core update --- src/operator/PadImpl.cpp | 6 +++--- src/operator/ReduceSumImpl.cpp | 7 +++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/operator/PadImpl.cpp b/src/operator/PadImpl.cpp index cdae21f8..9a54437f 100644 --- a/src/operator/PadImpl.cpp +++ b/src/operator/PadImpl.cpp @@ -9,14 +9,14 @@ * ********************************************************************************/ +#include <cstddef> #include <vector> -#include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" -#include "aidge/operator/Conv.hpp" - #include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/backend/cpu/operator/PadImpl_kernels.hpp" +#include "aidge/operator/Pad.hpp" +#include "aidge/utils/Types.h" Aidge::Elts_t Aidge::Pad_ProdConso_cpu::getNbRequiredProtected(Aidge::IOIndex_t inputIdx) const { AIDGE_ASSERT(inputIdx == 0, "input index out of range." diff --git a/src/operator/ReduceSumImpl.cpp b/src/operator/ReduceSumImpl.cpp index aad08018..93a89a34 100644 --- a/src/operator/ReduceSumImpl.cpp +++ b/src/operator/ReduceSumImpl.cpp @@ -12,11 +12,14 @@ #include "aidge/backend/cpu/operator/ReduceSumImpl.hpp" #include <memory> +#include <stdexcept> #include <vector> -#include "aidge/utils/Types.h" -#include "aidge/operator/ReduceSum.hpp" #include "aidge/backend/cpu/operator/ReduceSumImpl_kernels.hpp" +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/ReduceSum.hpp" +#include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Types.h" template <> void Aidge::ReduceSumImpl_cpu::forward() { -- GitLab From 3f4cd6e77aae54e674ab3f9aec0e0675cbd6860d Mon Sep 17 00:00:00 2001 From: Jerome Hue <jerome.hue@cea.fr> Date: Thu, 6 Feb 2025 10:49:15 +0100 Subject: [PATCH 20/22] Implement backward function of Add operator --- .../aidge/backend/cpu/operator/AddImpl.hpp | 14 +- .../backend/cpu/operator/AddImpl_kernels.hpp | 64 +++- src/operator/AddImpl.cpp | 25 +- unit_tests/operator/Test_AddImpl.cpp | 275 +++++++++++++++++- 4 files changed, 368 insertions(+), 10 deletions(-) diff --git a/include/aidge/backend/cpu/operator/AddImpl.hpp b/include/aidge/backend/cpu/operator/AddImpl.hpp index e39c35b4..ca04dff9 100644 --- a/include/aidge/backend/cpu/operator/AddImpl.hpp +++ b/include/aidge/backend/cpu/operator/AddImpl.hpp @@ -25,7 +25,19 @@ namespace Aidge { // Operator implementation entry point for the backend using AddImpl_cpu = OperatorImpl_cpu<Add_Op, - void(std::vector<std::size_t>, std::vector<std::size_t>, const std::vector<std::size_t>&, const void*, const void*, void*)>; + void(std::vector<std::size_t>, std::vector<std::size_t>, const std::vector<std::size_t>&, const void*, const void*, void*), + void(const std::size_t, + const std::size_t, + const std::size_t, + const std::vector<std::size_t>&, + const std::vector<std::size_t>&, + const std::vector<std::size_t>&, + const void*, + const void*, + const void*, + void*, + void*) +>; // Implementation entry point registration to Operator REGISTRAR(Add_Op, "cpu", Aidge::AddImpl_cpu::create); diff --git a/include/aidge/backend/cpu/operator/AddImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AddImpl_kernels.hpp index e6d13fcf..d6fff9b5 100644 --- a/include/aidge/backend/cpu/operator/AddImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AddImpl_kernels.hpp @@ -147,25 +147,75 @@ void AddImpl_cpu_forward_kernel(std::vector<std::size_t> dims0, } } +template <class I, class O> +void AddImpl_cpu_backward_kernel(const std::size_t input0Length, + const std::size_t input1Length, + const std::size_t gradOutputLength, + const std::vector<std::size_t>& dims0, + const std::vector<std::size_t>& dims1, + const std::vector<std::size_t>& outputDims, + const void* input0_, + const void* input1_, + const void* grad_output_, + void* gradientInput0_, + void* gradientInput1_) +{ + // TODO: Remove input0/1 from the function + const I* input0 = static_cast<const I*>(input0_); + const I* input1 = static_cast<const I*>(input1_); + const O* gradOutput = static_cast<const O*>(grad_output_); + auto* gradInput0 = static_cast<I*>(gradientInput0_); + auto* gradInput1 = static_cast<I*>(gradientInput1_); + + std::fill_n(gradInput0, input0Length, static_cast<I>(0)); + std::fill_n(gradInput1, input1Length, static_cast<I>(0)); + + auto broadcastedDims0 = getBroadcastedDims(outputDims, dims0); + auto broadcastedDims1 = getBroadcastedDims(outputDims, dims1); + + for (std::size_t i = 0; i < gradOutputLength; ++i) { + auto idxOutputGrad = getMultiDimIndices(outputDims, i); + std::vector<std::size_t> idxInput0(broadcastedDims0.size()); + std::vector<std::size_t> idxInput1(broadcastedDims1.size()); + + for (std::size_t dimension = 0; dimension < broadcastedDims0.size(); ++dimension) { + idxInput0[dimension] = (broadcastedDims0[dimension] == 1) ? 0 : idxOutputGrad[dimension]; + } + + for (std::size_t dimension = 0; dimension < broadcastedDims1.size(); ++dimension) { + idxInput1[dimension] = (broadcastedDims1[dimension] == 1) ? 0 : idxOutputGrad[dimension]; + } + + auto idx0 = getFlattenedIndex(broadcastedDims0, idxInput0); + auto idx1 = getFlattenedIndex(broadcastedDims1, idxInput1); + + // For addition: gradient of both inputs is just the output gradient + // (unlike multiplication where we need to multiply by the other input, + // or subtraction where we need to negate one of them) + gradInput0[idx0] += static_cast<I>(gradOutput[i]); + gradInput1[idx1] += static_cast<I>(gradOutput[i]); + } +} + // Kernels registration to implementation entry point REGISTRAR(AddImpl_cpu, {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}}, - {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<float, float>, nullptr}); + {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<float, float>, Aidge::AddImpl_cpu_backward_kernel<float, float>}); REGISTRAR(AddImpl_cpu, {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}}, - {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<double, double>, nullptr}); + {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<double, double>, Aidge::AddImpl_cpu_backward_kernel<double, double>}); REGISTRAR(AddImpl_cpu, {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int8}}, - {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int8_t, std::int8_t>, nullptr}); + {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int8_t, std::int8_t>, Aidge::AddImpl_cpu_backward_kernel<std::int8_t, std::int8_t>}); REGISTRAR(AddImpl_cpu, {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::UInt8}}, - {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::uint8_t, std::uint8_t>, nullptr}); + {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::uint8_t, std::uint8_t>, Aidge::AddImpl_cpu_backward_kernel<std::uint8_t, std::uint8_t>}); REGISTRAR(AddImpl_cpu, {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}}, - {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr}); + {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, Aidge::AddImpl_cpu_backward_kernel<std::int32_t, std::int32_t>}); REGISTRAR(AddImpl_cpu, {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int64}}, - {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, nullptr}); + {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, Aidge::AddImpl_cpu_backward_kernel<std::int64_t, std::int64_t>}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_ADDIMPL_CPU_KERNELS_H_ */ \ No newline at end of file +#endif /* AIDGE_CPU_OPERATOR_ADDIMPL_CPU_KERNELS_H_ */ diff --git a/src/operator/AddImpl.cpp b/src/operator/AddImpl.cpp index 101743ec..b027fb87 100644 --- a/src/operator/AddImpl.cpp +++ b/src/operator/AddImpl.cpp @@ -55,5 +55,28 @@ void Aidge::AddImpl_cpu::forward() { template <> void Aidge::AddImpl_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Add_Op on backend cpu"); + const Add_Op& op_ = dynamic_cast<const Add_Op&>(mOp); + + auto in0 = op_.getInput(0); + auto in1 = op_.getInput(1); + auto in0grad = op_.getInput(0)->grad(); + auto in1grad = op_.getInput(1)->grad(); + auto out0grad = op_.getOutput(0)->grad(); + + // Find the correct kernel type + const auto impl = Registrar<AddImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.backward(in0grad->size(), + in1grad->size(), + out0grad->size(), + in0->dims(), + in1->dims(), + out0grad->dims(), + getCPUPtr(in0), + getCPUPtr(in1), + getCPUPtr(out0grad), + getCPUPtr(in0grad), + getCPUPtr(in1grad)); + } diff --git a/unit_tests/operator/Test_AddImpl.cpp b/unit_tests/operator/Test_AddImpl.cpp index bff9629b..4538b322 100644 --- a/unit_tests/operator/Test_AddImpl.cpp +++ b/unit_tests/operator/Test_AddImpl.cpp @@ -10,6 +10,7 @@ ********************************************************************************/ #include <memory> +#include <random> #include <catch2/catch_test_macros.hpp> @@ -19,6 +20,7 @@ #include "aidge/graph/Node.hpp" #include "aidge/operator/Add.hpp" #include "aidge/utils/ArrayHelpers.hpp" +#include "aidge/utils/TensorUtils.hpp" using namespace Aidge; @@ -139,4 +141,275 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") { Log::info("Expected Add_1 Tensor:\n{}", expectedOutput); REQUIRE(*op_1->getOutput(0) == expectedOutput); } -} \ No newline at end of file +} + +TEST_CASE("[cpu/operator] Add(backward)", "[Add][CPU]") { + std::shared_ptr<Add_Op> op = std::make_shared<Add_Op>(); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + // NOTE: The first four tests use fixed values, the last one uses random values but static dimensions. + + SECTION("Case 1: 1D and 2D Tensors") { + const auto T0 = std::make_shared<Tensor>( + Array2D<cpptype_t<DataType::Float32>, 2, 3>({{{1, 2, 3}, {4, 5, 6}}})); + + const auto T1 = + std::make_shared<Tensor>(Array1D<cpptype_t<DataType::Float32>, 3>({0.1, 0.2, 0.3})); + + op->associateInput(0, T0); + op->associateInput(1, T1); + op->getOutput(0)->setGrad(std::make_shared<Tensor>( + Array2D<float, 2, 3>({{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}}))); + op->forwardDims(); + + op->backward(); + + const Tensor expectedGrad0 = + Array2D<cpptype_t<DataType::Float32>, 2, 3>({{{1, 1, 1}, {1, 1, 1}}}); + + const Tensor expectedGrad1 = Array1D<cpptype_t<DataType::Float32>, 3>({2, 2, 2}); + + + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0)); + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1)); + } + + SECTION("Case 2: 3D and 1D tensors") { + const auto T0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + {{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}, + {{7.0, 8.0, 9.0}, {10.0, 11.0, 12.0}}}})); + + const auto T1 = + std::make_shared<Tensor>(Array1D<float, 3>({0.3, 0.2, 0.1})); + + const auto newGrad = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + {{{{1, 1, 1}, {1, 1, 1}}, {{1, 1, 1}, {1, 1, 1}}}})); + + const Tensor expectedGrad0 = + Array3D<float, 2, 2, 3>({{{{1, 1, 1}, {1, 1, 1}}, + {{1, 1, 1}, {1, 1, 1}}}}); + + const Tensor expectedGrad1 = Array1D<cpptype_t<DataType::Float32>, 3>({4, 4, 4}); + + op->associateInput(0, T0); + op->associateInput(1, T1); + op->getOutput(0)->setGrad(newGrad); + op->forwardDims(); + op->backward(); + + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0)); + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1)); + } + + SECTION("Case 3: 4D and 2D tensors") { + const auto T0 = std::make_shared<Tensor>(Array4D<cpptype_t<DataType::Float32>, 2, 2, 3, 3>( + {{{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}, {7.0, 8.0, 9.0}}, + {{10.0, 11.0, 12.0}, {13.0, 14.0, 15.0}, {16.0, 17.0, 18.0}}}, + {{{19.0, 20.0, 21.0}, {22.0, 23.0, 24.0}, {25.0, 26.0, 27.0}}, + {{28.0, 29.0, 30.0}, + {31.0, 32.0, 33.0}, + {34.0, 35.0, 36.0}}}}})); + + const auto T1 = std::make_shared<Tensor>(Array2D<cpptype_t<DataType::Float32>, 3, 3>( + {{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}})); + + const auto newGrad = + std::make_shared<Tensor>(Array4D<cpptype_t<DataType::Float32>, 2, 2, 3, 3>( + {{{{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}, + {{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}}, + {{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}, + {{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}}}})); + + const Tensor expectedGrad0 = + Array4D<cpptype_t<DataType::Float32>, 2, 2, 3, 3>( + {{{{{1, 1, 1}, {1, 1, 1}, {1, 1, 1}}, + {{1, 1, 1}, {1, 1, 1}, {1, 1, 1}}}, + {{{1, 1, 1}, {1, 1, 1}, {1, 1, 1}}, + {{1, 1, 1}, {1, 1, 1}, {1, 1, 1}}}}}); + + const Tensor expectedGrad1 = + Array2D<cpptype_t<DataType::Float32>, 3, 3>({{ + {4.0, 4.0, 4.0}, + {4.0, 4.0, 4.0}, + {4.0, 4.0, 4.0}}}); + + op->associateInput(0, T0); + op->associateInput(1, T1); + op->getOutput(0)->setGrad(newGrad); + op->forwardDims(); + + op->backward(); + + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0)); + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1)); + } + + SECTION("Case 4: 3D and 2D tensors") { + const auto T0 = std::make_shared<Tensor>( + Array3D<float, 2, 3, 4>({{{ + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + }, + { + {13.0, 14.0, 15.0, 16.0}, + {17.0, 18.0, 19.0, 20.0}, + {21.0, 22.0, 23.0, 24.0}, + }}})); + + const auto T1 = std::make_shared<Tensor>( + Array2D<cpptype_t<DataType::Float32>, 3, 4>({{{0.1, 0.2, 0.3, 0.4}, + {0.5, 0.6, 0.7, 0.8}, + {0.9, 1.0, 1.1, 1.2}}})); + + const auto newGrad = std::make_shared<Tensor>( + Array3D<cpptype_t<DataType::Float32>, 2, 3, 4>({{{ + {1.0, 1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0, 1.0}, + }, + { + {1.0, 1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0, 1.0}, + }}})); + + const Tensor expectedGrad0 = + Array3D<cpptype_t<DataType::Float32>, 2, 3, 4>({{{{1, 1, 1, 1}, + {1, 1, 1, 1}, + {1, 1, 1, 1}}, + {{1, 1, 1, 1}, + {1, 1, 1, 1}, + {1, 1, 1, 1}}}}); + + const Tensor expectedGrad1 = + Array2D<cpptype_t<DataType::Float32>, 3, 4>({{{2.0, 2.0, 2.0, 2.0}, + {2.0, 2.0, 2.0, 2.0}, + {2.0, 2.0, 2.0, 2.0}}}); + + op->associateInput(0, T0); + op->associateInput(1, T1); + op->getOutput(0)->setGrad(newGrad); + op->forwardDims(); + + op->backward(); + + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(0)->grad()), expectedGrad0)); + REQUIRE(approxEq<cpptype_t<DataType::Float32>>(*(op->getInput(1)->grad()), expectedGrad1)); + } + + SECTION("Case 5: Tensors with random values") { + + // Use random values + const std::vector<std::size_t> dims0 = {5, 2, 1, 7}; // First tensor + const std::vector<std::size_t> dims1 = {2, 6, 7}; // Second tensor + const std::vector<std::size_t> outputDims = {5, 2, 6, 7}; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> dist(0.1f, 1.0f); + + auto T0 = std::make_shared<Tensor>(dims0); + T0->setDataType(DataType::Float32); + T0->setBackend("cpu"); + float* input0Data = static_cast<float*>(T0->getImpl()->rawPtr()); + // Fill with random values + for (std::size_t i = 0; i < T0->size(); ++i) { + input0Data[i] = dist(gen); + } + + auto T1 = std::make_shared<Tensor>(dims1); + T1->setDataType(DataType::Float32); + T1->setBackend("cpu"); + float* input1Data = static_cast<float*>(T1->getImpl()->rawPtr()); + // Fill with random values + for (std::size_t i = 0; i < T1->size(); ++i) { + input1Data[i] = dist(gen); + } + + op->associateInput(0, T0); + op->associateInput(1, T1); + + op->forwardDims(); + op->forward(); + + Tensor expectedOutput{outputDims}; + expectedOutput.setBackend("cpu"); + float* expectedOutputData = static_cast<float*>(expectedOutput.getImpl()->rawPtr()); + + for (std::size_t n = 0; n < 5; ++n) { + for (std::size_t c = 0; c < 2; ++c) { + for (std::size_t h = 0; h < 6; ++h) { + for (std::size_t w = 0; w < 7; ++w) { + std::size_t outIdx = w + 7 * (h + 6 * (c + 2 * n)); + std::size_t in0Idx = + w + 7 * (0 + 1 * (c + 2 * n)); // middle dim is 1 + std::size_t in1Idx = + w + 7 * (h + 6 * c); // no n dimension + + expectedOutputData[outIdx] = input0Data[in0Idx] + input1Data[in1Idx]; + } + } + } + } + + auto outputTensor = op->getOutput(0); + + REQUIRE(approxEq<float>(*outputTensor, expectedOutput)); + + // Backward pass + std::vector<float> gradOutputData(expectedOutput.size()); + for (auto &val : gradOutputData) { + val = dist(gen); + } + + op->getOutput(0)->setGrad(std::make_shared<Tensor>()); + op->getOutput(0)->grad()->resize(outputDims); + op->getOutput(0)->grad()->getImpl()->setRawPtr(gradOutputData.data(), + expectedOutput.size()); + + // Compute reference gradients + std::vector<float> expectedGrad0(T0->size(), 0.0f); + std::vector<float> expectedGrad1(T1->size(), 0.0f); + + for (std::size_t n = 0; n < 5; ++n) { + for (std::size_t c = 0; c < 2; ++c) { + for (std::size_t h = 0; h < 6; ++h) { + for (std::size_t w = 0; w < 7; ++w) { + std::size_t outIdx = w + 7 * (h + 6 * (c + 2 * n)); + std::size_t in0Idx = w + 7 * (0 + 1 * (c + 2 * n)); + std::size_t in1Idx = w + 7 * (h + 6 * c); + + // Gradient for input0: just accumulate grad_output + expectedGrad0[in0Idx] += gradOutputData[outIdx]; + + // Gradient for input1: just accumulate grad_output + expectedGrad1[in1Idx] += gradOutputData[outIdx]; + } + } + } + } + + // Perform backward pass + op->backward(); + + auto expectedGrad0Tensor = std::make_shared<Tensor>(); + expectedGrad0Tensor->resize(T0->dims()); + expectedGrad0Tensor->setBackend("cpu"); + expectedGrad0Tensor->setDataType(DataType::Float32); + expectedGrad0Tensor->getImpl()->setRawPtr(expectedGrad0.data(), + expectedGrad0.size()); + + auto expectedGrad1Tensor = std::make_shared<Tensor>(T1->dims()); + expectedGrad1Tensor->setBackend("cpu"); + expectedGrad1Tensor->setDataType(DataType::Float32); + expectedGrad1Tensor->getImpl()->setRawPtr(expectedGrad1.data(), + expectedGrad1.size()); + + // Verify backward pass + REQUIRE(approxEq<float>(*T0->grad(), *expectedGrad0Tensor)); + REQUIRE(approxEq<float>(*T1->grad(), *expectedGrad1Tensor)); + } +} + -- GitLab From 393fb207a6599cdfbbbe141e3cb29a3a5cae8246 Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Wed, 26 Feb 2025 14:48:17 +0000 Subject: [PATCH 21/22] [upd] ConstantOfShape kernel to use Tensor as inputs and avoid redundant size computation --- .../cpu/operator/ConstantOfShapeImpl.hpp | 8 +++----- .../operator/ConstantOfShapeImpl_kernels.hpp | 17 ++++------------- src/operator/ConstantOfShapeImpl.cpp | 9 +++------ 3 files changed, 10 insertions(+), 24 deletions(-) diff --git a/include/aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp b/include/aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp index 83e7e030..b595ec93 100644 --- a/include/aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp @@ -12,23 +12,21 @@ #ifndef AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_ #define AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_ -#include <cstddef> #include <memory> -#include <vector> #include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/ConstantOfShape.hpp" #include "aidge/utils/Registrar.hpp" -#include "aidge/utils/Types.h" namespace Aidge { + +class Tensor; // Operator implementation entry point for the backend using ConstantOfShapeImpl_cpu = OperatorImpl_cpu<ConstantOfShape_Op, - void(const std::vector<DimSize_t>, const Tensor&, void *)>; + void(const std::shared_ptr<Tensor>&, const Tensor&)>; // Implementation entry point registration to Operator REGISTRAR(ConstantOfShape_Op, "cpu", Aidge::ConstantOfShapeImpl_cpu::create); } // namespace Aidge #endif /* _AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_ */ - diff --git a/include/aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp index 18ab9c0a..c42cc76a 100644 --- a/include/aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp @@ -30,20 +30,11 @@ namespace Aidge { template <class O> void ConstantOfShapeimpl_cpu_forward_kernel( - const std::vector<DimSize_t> output_dims, const Tensor &value, - void *output_) { + const std::shared_ptr<Tensor>& output_, const Tensor &value) { - O *output = static_cast<O *>(output_); - O val; - std::copy(static_cast<O *>(value.getImpl()->hostPtr()), - static_cast<O *>(value.getImpl()->hostPtr()) + - static_cast<NbElts_t>(1), - &val); - const size_t output_size = std::accumulate( - output_dims.begin(), output_dims.end(), 1, std::multiplies<DimSize_t>()); - for (size_t i = 0; i < output_size; ++i) { - output[i] = val; - } + O* output = static_cast<O*>(output_->getImpl()->hostPtr()); + const O val = *reinterpret_cast<O*>(value.getImpl()->hostPtr()); + std::fill_n(output, output_->size(), val); } // Kernels registration to implementation entry point diff --git a/src/operator/ConstantOfShapeImpl.cpp b/src/operator/ConstantOfShapeImpl.cpp index 16e4b762..1d41160b 100644 --- a/src/operator/ConstantOfShapeImpl.cpp +++ b/src/operator/ConstantOfShapeImpl.cpp @@ -13,15 +13,14 @@ #include <functional> #include <memory> -#include <vector> +#include <stdexcept> // std::runtime_error #include "aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp" -#include "aidge/data/Data.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/ConstantOfShape.hpp" +#include "aidge/backend/OperatorImpl.hpp" // Aidge::getBestMatch, Aidge::getRequiredSpec #include "aidge/utils/ErrorHandling.hpp" #include "aidge/utils/Registrar.hpp" -#include "aidge/utils/Types.h" template <> void Aidge::ConstantOfShapeImpl_cpu::forward() { @@ -33,9 +32,7 @@ void Aidge::ConstantOfShapeImpl_cpu::forward() { const auto impl = Registrar<ConstantOfShapeImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - impl.forward(op_.getOutput(0)->dims(), - op_.value(), - op_.getOutput(0)->getImpl()->rawPtr()); + impl.forward(op_.getOutput(0), op_.value()); } template <> -- GitLab From 9d9647aa0f91f637c5cd063b78b8a68075c2294e Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Wed, 26 Feb 2025 14:51:38 +0000 Subject: [PATCH 22/22] [upd] tests following 'aidge_core' changes --- .../operator/Test_ConstantOfShapeImpl.cpp | 139 +++++++++--------- .../recipies/Test_FoldConstantOfShape.cpp | 50 +++++++ 2 files changed, 119 insertions(+), 70 deletions(-) create mode 100644 unit_tests/recipies/Test_FoldConstantOfShape.cpp diff --git a/unit_tests/operator/Test_ConstantOfShapeImpl.cpp b/unit_tests/operator/Test_ConstantOfShapeImpl.cpp index 8ec1669b..6833d836 100644 --- a/unit_tests/operator/Test_ConstantOfShapeImpl.cpp +++ b/unit_tests/operator/Test_ConstantOfShapeImpl.cpp @@ -27,89 +27,88 @@ #include "aidge/data/Tensor.hpp" #include "aidge/filler/Filler.hpp" #include "aidge/operator/ConstantOfShape.hpp" -#include "aidge/operator/OperatorTensor.hpp" #include "aidge/utils/TensorUtils.hpp" #include "aidge/utils/Types.h" namespace Aidge { -TEST_CASE("[cpu/operator] ConstantOfShape", "[ConstantOfShape][CPU]") { - constexpr std::uint16_t NBTRIALS = 10; - // Create a random number generator - auto random_seed = Catch::Generators::Detail::getSeed; - std::mt19937 gen(random_seed()); - std::uniform_real_distribution<float> valueDist( - 0.1f, 1.1f); // Random float distribution between 0 and 1 - std::uniform_int_distribution<DimSize_t> input_tensor_size_dist( - std::size_t(1), std::size_t(10)); - std::uniform_int_distribution<int64_t> input_tensor_values_dist( - std::size_t(1), std::size_t(7)); - std::uniform_real_distribution<double> operator_attr_value_dist(-100., 100.); - /////////////////////////////////////////////// - // SETUP FUNCTIONS - auto generate_input_tensor = - [&gen, &input_tensor_size_dist, - &input_tensor_values_dist]() -> std::shared_ptr<Tensor> { - std::vector<DimSize_t> input_dims; - input_dims.push_back(input_tensor_size_dist(gen)); +TEST_CASE("[cpu/operator] ConstantOfShape(forward)", "[ConstantOfShape][CPU][forward]") { + constexpr std::uint16_t NBTRIALS = 10; + // Create a random number generator + auto random_seed = Catch::Generators::Detail::getSeed; + std::mt19937 gen(random_seed()); + std::uniform_real_distribution<float> valueDist( + 0.1f, 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<DimSize_t> input_tensor_size_dist( + std::size_t(1), std::size_t(10)); + std::uniform_int_distribution<int64_t> input_tensor_values_dist( + std::size_t(1), std::size_t(7)); + std::uniform_real_distribution<double> operator_attr_value_dist(-100., 100.); - auto result = std::make_shared<Tensor>(input_dims); - result->setDataType(DataType::Int64); - result->setBackend("cpu"); - for (DimSize_t i = 0; i < result->size(); ++i) { - result->set<std::int64_t>(i, input_tensor_values_dist(gen)); - } - return result; - }; + /////////////////////////////////////////////// + // SETUP FUNCTIONS + auto generate_input_tensor = + [&gen, &input_tensor_size_dist, + &input_tensor_values_dist]() -> std::shared_ptr<Tensor> { + std::vector<DimSize_t> input_dims; + input_dims.push_back(input_tensor_size_dist(gen)); - auto generate_random_operator = - [&gen, - &operator_attr_value_dist]() -> std::shared_ptr<ConstantOfShape_Op> { - auto node = ConstantOfShape(Tensor(operator_attr_value_dist(gen))); - auto op = std::static_pointer_cast<ConstantOfShape_Op>(node->getOperator()); - op->setDataType(DataType::Float64); - op->setBackend("cpu"); - return op; - }; + auto result = std::make_shared<Tensor>(input_dims); + result->setDataType(DataType::Int64); + result->setBackend("cpu"); + for (DimSize_t i = 0; i < result->size(); ++i) { + result->set<std::int64_t>(i, input_tensor_values_dist(gen)); + } + return result; + }; - auto generate_output_tensor = [](std::shared_ptr<Tensor> input_tensor, - std::shared_ptr<ConstantOfShape_Op> op) { - std::vector<DimSize_t> output_dims; - output_dims.reserve(input_tensor->size()); - for (DimSize_t i = 0; i < input_tensor->size(); ++i) { - output_dims.push_back(input_tensor->get<int64_t>(i)); - } - auto result = std::make_shared<Tensor>(output_dims); - result->setDataType(op->value().dataType()); - result->setBackend("cpu"); - constantFiller(result, op->value().get<double>(0)); - return result; - }; + auto generate_random_operator = + [&gen, + &operator_attr_value_dist]() -> std::shared_ptr<ConstantOfShape_Op> { + std::shared_ptr<ConstantOfShape_Op> op = std::make_shared<ConstantOfShape_Op>(Tensor(operator_attr_value_dist(gen))); + op->setDataType(DataType::Float64); + op->setBackend("cpu"); + return op; + }; + + auto generate_output_tensor = [](std::shared_ptr<Tensor> input_tensor, + std::shared_ptr<ConstantOfShape_Op> op) { + std::vector<DimSize_t> output_dims; + output_dims.reserve(input_tensor->size()); + for (DimSize_t i = 0; i < input_tensor->size(); ++i) { + output_dims.push_back(input_tensor->get<std::int64_t>(i)); + } + auto result = std::make_shared<Tensor>(output_dims); + result->setDataType(op->value().dataType()); + result->setBackend("cpu"); + constantFiller(result, op->value().get<double>(0)); + return result; + }; - ///////////////////////////////////// - // BENCHMARKING - std::chrono::time_point<std::chrono::system_clock> start; - std::chrono::time_point<std::chrono::system_clock> end; - std::chrono::duration<double, std::micro> duration{}; - int number_of_operation{0}; + ///////////////////////////////////// + // BENCHMARKING + std::chrono::time_point<std::chrono::system_clock> start; + std::chrono::time_point<std::chrono::system_clock> end; + std::chrono::duration<double, std::micro> duration{}; + int number_of_operation{0}; - SECTION("ConstantOfShapeImpl_cpu::forward()") { - for (int i = 0; i < NBTRIALS; ++i) { - auto input_T = generate_input_tensor(); - std::shared_ptr<ConstantOfShape_Op> op = generate_random_operator(); - auto output_T = generate_output_tensor(input_T, op); - op->associateInput(0, input_T); + SECTION("ConstantOfShapeImpl_cpu::forward()") { + for (int i = 0; i < NBTRIALS; ++i) { + auto input_T = generate_input_tensor(); + std::shared_ptr<ConstantOfShape_Op> op = generate_random_operator(); + auto output_T = generate_output_tensor(input_T, op); + op->associateInput(0, input_T); - REQUIRE(op->forwardDims(true)); - REQUIRE_NOTHROW(op->forward()); + REQUIRE(op->forwardDims(true)); + REQUIRE_NOTHROW(op->forward()); - CHECK(output_T->nbDims() == op->getOutput(0)->nbDims()); - for (DimIdx_t i = 0; i < output_T->nbDims(); ++i) { - CHECK(output_T->dims().at(i) == op->getOutput(0)->dims().at(i)); - } - CHECK(approxEq<double>(*output_T, *op->getOutput(0))); + CHECK(output_T->nbDims() == op->getOutput(0)->nbDims()); + for (DimIdx_t i = 0; i < output_T->nbDims(); ++i) { + CHECK(output_T->dims().at(i) == op->getOutput(0)->dims().at(i)); + } + CHECK(approxEq<double>(*output_T, *op->getOutput(0))); + } } - } } } // namespace Aidge diff --git a/unit_tests/recipies/Test_FoldConstantOfShape.cpp b/unit_tests/recipies/Test_FoldConstantOfShape.cpp new file mode 100644 index 00000000..a1c09b15 --- /dev/null +++ b/unit_tests/recipies/Test_FoldConstantOfShape.cpp @@ -0,0 +1,50 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + + #include "aidge/graph/GraphView.hpp" + #include "aidge/operator/Identity.hpp" + #include "aidge/recipes/Recipes.hpp" + + #include <cstdint> // std::int64_t + #include <memory> + + #include <catch2/catch_test_macros.hpp> + + #include "aidge/graph/OpArgs.hpp" + #include "aidge/operator/ConstantOfShape.hpp" + #include "aidge/operator/Conv.hpp" + #include "aidge/operator/Producer.hpp" + #include "aidge/operator/ReLU.hpp" + #include "aidge/recipes/Recipes.hpp" + #include "aidge/utils/ArrayHelpers.hpp" + #include "aidge/utils/Types.h" + + namespace Aidge { + + TEST_CASE("[cpu/recipes] foldConstantOfShape", + "[ConstantOfShape][foldConstantOfShape][recipes]") { + auto input_T = std::make_shared<Tensor>(Array1D<std::int64_t, 4>({1, 1, 3, 3})); + + auto model = std::make_shared<GraphView>(); + SECTION("Sequential model") { + model = Sequential({ + Producer(input_T, "prod_0", true), + ConstantOfShape(3, "constantOfShape_0"), + Conv(1, 1, {3, 3}, "Conv_0"), + ReLU("ReLU_1") + }); + // aidge_backend_cpu loaded. Recipe should work + REQUIRE(foldConstantOfShape(model) == 1); + CHECK(model->forwardDims()); + } + } + + } // namespace Aidge -- GitLab