diff --git a/.gitlab/ci/_global.gitlab-ci.yml b/.gitlab/ci/_global.gitlab-ci.yml index 1615b8974db11d93cb3305ce800e46cf5377bc33..331373fe0f27e7750183eb2e76fe83300cf316a8 100644 --- a/.gitlab/ci/_global.gitlab-ci.yml +++ b/.gitlab/ci/_global.gitlab-ci.yml @@ -9,6 +9,14 @@ variables: GIT_SSL_NO_VERIFY: 1 DEBIAN_FRONTEND: noninteractive +# See https://docs.gitlab.com/ee/ci/yaml/workflow.html#switch-between-branch-pipelines-and-merge-request-pipelines +workflow: + rules: + - if: $CI_PIPELINE_SOURCE == "merge_request_event" + - if: $CI_COMMIT_BRANCH && $CI_OPEN_MERGE_REQUESTS + when: never + - if: $CI_COMMIT_BRANCH + default: image: nvidia/cuda:12.2.0-devel-ubuntu22.04 before_script: diff --git a/.gitlab/ci/build.gitlab-ci.yml b/.gitlab/ci/build.gitlab-ci.yml index 68fcb6b4bc0dac08c4f0029ec1f2d3404226c1c2..e996def7984908ea751c5e033814e81b18a3d51b 100644 --- a/.gitlab/ci/build.gitlab-ci.yml +++ b/.gitlab/ci/build.gitlab-ci.yml @@ -23,17 +23,105 @@ build:ubuntu_cpp: - build_cpp/ - install_cpp/ -build:ubuntu_python: +build:ubuntu_cpp_g++10: + stage: build + needs: [] + tags: + - docker + + script: + # Download dependencies + # aidge_core + - 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_cpp"' + - unzip -o build_artifacts.zip -d . + - rm -rf build_cpp + + # Build current module + - export CMAKE_PREFIX_PATH=../install_cpp + - apt install -y g++-10 + - mkdir -p build_cpp + - mkdir -p install_cpp + - cd build_cpp + - export CXX=/usr/bin/g++-10 + - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON .. + - make -j4 all install + +build:ubuntu_cpp_g++12: + stage: build + needs: [] + tags: + - docker + + script: + # Download dependencies + # aidge_core + - 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_cpp"' + - unzip -o build_artifacts.zip -d . + - rm -rf build_cpp + + # Build current module + - export CMAKE_PREFIX_PATH=../install_cpp + - apt install -y g++-12 + - mkdir -p build_cpp + - mkdir -p install_cpp + - cd build_cpp + - export CXX=/usr/bin/g++-12 + - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON .. + - make -j4 all install + +build:ubuntu_cpp_clang12: + stage: build + needs: [] + tags: + - docker + + script: + # Download dependencies + # aidge_core + - 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_cpp"' + - unzip -o build_artifacts.zip -d . + - rm -rf build_cpp + + # Build current module + - export CMAKE_PREFIX_PATH=../install_cpp + - apt install -y clang-12 + - mkdir -p build_cpp + - mkdir -p install_cpp + - cd build_cpp + - export CXX=/usr/bin/clang++-12 + - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON .. + - make -j4 all install + +build:ubuntu_cpp_clang15: stage: build needs: [] tags: - docker + script: # Download dependencies - # aidge_core (CPP) + # aidge_core - 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_cpp"' - unzip -o build_artifacts.zip -d . - rm -rf build_cpp + + # Build current module + - export CMAKE_PREFIX_PATH=../install_cpp + - apt install -y clang-15 + - mkdir -p build_cpp + - mkdir -p install_cpp + - cd build_cpp + - export CXX=/usr/bin/clang++-15 + - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON .. + - make -j4 all install + +build:ubuntu_python: + stage: build + needs: [] + tags: + - docker + script: + # Download dependencies # aidge_core (Python) - 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_python"' - unzip -o build_artifacts.zip -d . @@ -41,8 +129,7 @@ build:ubuntu_python: - python3 -m pip install virtualenv - virtualenv venv - source venv/bin/activate - - export AIDGE_INSTALL=`pwd`/install - - export CMAKE_PREFIX_PATH=../install_cpp + - python3 -m pip install -r requirements.txt - python3 -m pip install . artifacts: expire_in: 1 week @@ -84,3 +171,35 @@ build:windows_cpp: paths: - build_cpp/ - install_cpp/ + +build:windows_python: + stage: build + needs: [] + tags: + - windows + + image: buildtools + before_script: + # Install Chocolatey + - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1')) + # Install dependencies + - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y + - choco install git -Y + - choco install python -Y + # Update PATH + - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User") + script: + # Download dependencies + # aidge_core (Python) + - 'curl "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:windows_python" -o build_artifacts.zip' + - Expand-Archive -Path .\build_artifacts.zip -DestinationPath . -Force + + - python -m pip install virtualenv + - virtualenv venv + - venv\Scripts\Activate.ps1 + - python -m pip install -r requirements.txt + - python -m pip install . + artifacts: + expire_in: 1 week + paths: + - venv/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 51ee1f6d5df771fcccd1b05a45861eb2f1d3bbbe..51a6ebe10d7b8d03fcb94898de55734dbabf9b0c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,11 +12,6 @@ set(module_name _${project}) # target name project(${project}) -############################################## -# Import utils CMakeLists -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") -include(PybindModuleCreation) - ############################################## # Define options option(PYBIND "python binding" ON) diff --git a/README.md b/README.md index 74eb50826bf6f88a0ded363138adba04827390d0..865cb08a17ebf8638cb2ac56773a4f464860b8ae 100644 --- a/README.md +++ b/README.md @@ -14,9 +14,11 @@ So far be sure to have the correct requirements to use this library ## Pip installation -You will need to install first the aidge_core library before installing aidge_cpu. -Also, make sure that the install path was set before installing aidge_core library. -Then run in your python environnement : +You will need to install first the ``aidge_core`` library before installing ``aidge_backend_cpu``. + +If you have set a custom install path for the ``aidge_core`` library, make sure to use the same one here. + +Then run in your python environnement : ``` bash pip install . -v ``` @@ -46,4 +48,4 @@ Important: this command can also be run with `make`. To compile the CPU library with the python binding + the associated unitary tests, run ``` make cpu_with_pybind_tests -``` \ No newline at end of file +``` diff --git a/aidge_backend_cpu/unit_tests/test_recipies.py b/aidge_backend_cpu/unit_tests/test_recipies.py new file mode 100644 index 0000000000000000000000000000000000000000..60949adf245f4f4a7ed316879fb307131f70739a --- /dev/null +++ b/aidge_backend_cpu/unit_tests/test_recipies.py @@ -0,0 +1,77 @@ +""" +Copyright (c) 2023 CEA-List + +This program and the accompanying materials are made available under the +terms of the Eclipse Public License 2.0 which is available at +http://www.eclipse.org/legal/epl-2.0. + +SPDX-License-Identifier: EPL-2.0 +""" + +import unittest +import aidge_core +import aidge_backend_cpu + +from functools import reduce +import numpy as np + +class test_recipies(unittest.TestCase): + def setUp(self): + pass + + def tearDown(self): + pass + + def test_fuse_batchnorm(self): + dims = [1, 1, 10, 10] + size = reduce((lambda x, y: x*y), dims) + + input_data = np.arange(size).reshape(dims).astype(np.float32) + input_tensor = aidge_core.Tensor(input_data) + + input_node = aidge_core.Producer(input_tensor, "X") + conv = aidge_core.Conv2D(1, 1, [3, 3], name="Conv0") + bn = aidge_core.BatchNorm2D(name="Add0") + + graph_view = aidge_core.sequential([conv, bn]) + + # Add random values to conv and BatchNorm parameters + input_node.add_child(graph_view) + input_node.get_operator().set_datatype(aidge_core.DataType.Float32) + input_node.get_operator().set_backend("cpu") + graph_view.set_datatype(aidge_core.DataType.Float32) + graph_view.set_backend("cpu") + + np_weights = np.arange(9).reshape([1, 1, 3, 3]).astype(np.float32) + np_bias = np.arange(1).reshape([1, 1]).astype(np.float32) + + np_scale = np.array([0.05]).astype(np.float32) + np_shift = np.array([0.05]).astype(np.float32) + np_mean = np.array([0.05]).astype(np.float32) + np_var = np.array([0.05]).astype(np.float32) + conv.input(1)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_weights)) + conv.input(2)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_bias)) + bn.input(1)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_scale)) + bn.input(2)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_shift)) + bn.input(3)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_mean)) + bn.input(4)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_var)) + scheduler0 = aidge_core.SequentialScheduler(graph_view) + scheduler0.forward() + + for outNode in graph_view.get_output_nodes(): + output_aidge0 = outNode.get_operator().output(0) + + aidge_core.fuse_batchnorm(graph_view) + scheduler1 = aidge_core.SequentialScheduler(graph_view) + scheduler1.forward() + + for outNode in graph_view.get_output_nodes(): + output_aidge1 = outNode.get_operator().output(0) + + self.assertTrue(aidge_core.approx_eq(output_aidge0, output_aidge1, 0.000001, 0.0001)) + +if __name__ == '__main__': + unittest.main() + + + diff --git a/aidge_backend_cpu/unit_tests/test_scheduler.py b/aidge_backend_cpu/unit_tests/test_scheduler.py index d8cf3e164da4bd34273905b0b0e156cf057635a5..3449ff513ef618e24788419c835b7277a1e751f1 100644 --- a/aidge_backend_cpu/unit_tests/test_scheduler.py +++ b/aidge_backend_cpu/unit_tests/test_scheduler.py @@ -55,6 +55,8 @@ class test_scheduler(unittest.TestCase): graph_view.set_datatype(aidge_core.DataType.Float32) graph_view.set_backend("cpu") + graph_view.forward_dims() + scheduler = aidge_core.SequentialScheduler(graph_view) scheduler.generate_scheduling() @@ -80,6 +82,8 @@ class test_scheduler(unittest.TestCase): graph_view.set_datatype(aidge_core.DataType.Float32) graph_view.set_backend("cpu") + graph_view.forward_dims() + scheduler = aidge_core.SequentialScheduler(graph_view) scheduler.generate_scheduling() diff --git a/aidge_backend_cpu/unit_tests/test_tensor.py b/aidge_backend_cpu/unit_tests/test_tensor.py index 1d12fc0cbadf71f04226a98e2e65984abc7e3254..438b6acd51791a52c9e308fb1aceaefb2a45fb29 100644 --- a/aidge_backend_cpu/unit_tests/test_tensor.py +++ b/aidge_backend_cpu/unit_tests/test_tensor.py @@ -45,5 +45,17 @@ class test_tensor(unittest.TestCase): self.assertTrue(i_t == i_n) # TODO : May need to change this to a difference for i,j in zip(t.dims(), np_array.shape): self.assertEqual(i,j) + + def test_get_set(self): + dims = [2,2,2] + + np_array = np.arange(8).reshape(dims) + # Numpy -> Tensor + t = aidge_core.Tensor(np_array) + for i in range(8): + self.assertEqual(t[i], i) + t[i] = 5 + self.assertEqual(t[i], 5) + if __name__ == '__main__': unittest.main() diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index 95b2f7b8e2ff70c9b9224bea1137ad74e469ffb8..5a7ac3958b76e94c8389b0287fdac40c8c3a5ad8 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -15,13 +15,22 @@ #include "aidge/backend/cpu/data/TensorImpl.hpp" #include "aidge/backend/cpu/operator/AddImpl.hpp" #include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp" +#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" #include "aidge/backend/cpu/operator/BatchNormImpl.hpp" #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" #include "aidge/backend/cpu/operator/ConvImpl.hpp" +#include "aidge/backend/cpu/operator/DivImpl.hpp" #include "aidge/backend/cpu/operator/FCImpl.hpp" #include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" +#include "aidge/backend/cpu/operator/MatMulImpl.hpp" +#include "aidge/backend/cpu/operator/MulImpl.hpp" +#include "aidge/backend/cpu/operator/PadImpl.hpp" +#include "aidge/backend/cpu/operator/PowImpl.hpp" #include "aidge/backend/cpu/operator/ProducerImpl.hpp" #include "aidge/backend/cpu/operator/ReLUImpl.hpp" +#include "aidge/backend/cpu/operator/ScalingImpl.hpp" +#include "aidge/backend/cpu/operator/SqrtImpl.hpp" #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" +#include "aidge/backend/cpu/operator/SubImpl.hpp" #endif /* AIDGE_CPU_IMPORTS_H_ */ \ No newline at end of file diff --git a/include/aidge/backend/cpu/data/TensorImpl.hpp b/include/aidge/backend/cpu/data/TensorImpl.hpp index 014939e106e5891c86b007f4bd3905c765ec8754..967b42333a8748999f99afcb6e52b9a15bf936e3 100644 --- a/include/aidge/backend/cpu/data/TensorImpl.hpp +++ b/include/aidge/backend/cpu/data/TensorImpl.hpp @@ -47,6 +47,10 @@ class TensorImpl_cpu : public TensorImpl { return mData.data(); }; + void* getRaw(std::size_t idx){ + return static_cast<void*>(static_cast<T *>(rawPtr()) + idx); + }; + virtual ~TensorImpl_cpu() = default; void setRawPtr(void *ptr) override final { diff --git a/include/aidge/backend/cpu/operator/AddImpl.hpp b/include/aidge/backend/cpu/operator/AddImpl.hpp index 6e1cd03a3af81ee85f4f9e0e212af7c02089734e..9dbd21501462c010384248544b81bb9f26346604 100644 --- a/include/aidge/backend/cpu/operator/AddImpl.hpp +++ b/include/aidge/backend/cpu/operator/AddImpl.hpp @@ -53,152 +53,51 @@ class AddImplBackward_cpu<3> template <DimIdx_t NUM> class AddImpl_cpu : public OperatorImpl { - private: - const Add_Op<NUM>& mOp; - std::array<NbElts_t, NUM> mNbConsumedData = {}; - std::array<NbElts_t, 1> mNbProducedData = {}; - - public: - AddImpl_cpu(const Add_Op<NUM>& op) : mOp(op) {} +public: + AddImpl_cpu(const Add_Op<NUM>& op) : OperatorImpl(op) {} static std::unique_ptr<AddImpl_cpu<NUM>> create(const Add_Op<NUM>& op) { return std::make_unique<AddImpl_cpu<NUM>>(op); } - - public: - NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final { - assert(mOp.getInput(inputIdx) && "requires valid input"); - - // Requires the whole tensors - const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims(); - return std::accumulate(inputDims.begin(), inputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); - } - - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final { - // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! - return 0; - } - - NbElts_t getRequiredMemory(const IOIndex_t outputIdx, const std::vector<DimSize_t>& inputsSize) const override final { - // Requires the whole tensors, regardless of available data on inputs - assert(outputIdx == 0 && "operator has only one output"); - (void) outputIdx; - - const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); - return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); - } - - NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final { - assert(inputIdx < mNbConsumedData.size()); - return mNbConsumedData[inputIdx]; - } - - NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final { - assert(outputIdx < mNbProducedData.size()); - return mNbProducedData[outputIdx]; - } - void updateConsummerProducer() override final; - - void forward() { - // nothing - } - - void backward() { printf("Not implemented yet.\n"); } }; template <> class AddImpl_cpu<1> : public OperatorImpl { - private: - const Add_Op<1>& mOp; - std::array<NbElts_t, 1> mNbConsumedData; - std::array<NbElts_t, 1> mNbProducedData; - - public: - AddImpl_cpu(const Add_Op<1>& op) : mOp(op), mNbConsumedData({0}), mNbProducedData({0}) {} +public: + AddImpl_cpu(const Add_Op<1>& op) : OperatorImpl(op) {} static std::unique_ptr<AddImpl_cpu<1>> create(const Add_Op<1>& op) { return std::make_unique<AddImpl_cpu<1>>(op); } - public: - NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; - - NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, - const std::vector<DimSize_t> &/*inputsSize*/) const override final; - - NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final; - - NbElts_t getNbProducedData(const IOIndex_t /*outputIdx*/) const override final; - void updateConsummerProducer() override final; - - void forward(); - - void backward(); + void forward() override; }; template <> class AddImpl_cpu<2> : public OperatorImpl { - private: - const Add_Op<2>& mOp; - std::array<NbElts_t, 2> mNbConsumedData; - std::array<NbElts_t, 1> mNbProducedData; - - public: - AddImpl_cpu(const Add_Op<2>& op) : mOp(op), mNbConsumedData({0, 0}), mNbProducedData({0}) {} +public: + AddImpl_cpu(const Add_Op<2>& op) : OperatorImpl(op) {} static std::unique_ptr<AddImpl_cpu<2>> create(const Add_Op<2>& op) { return std::make_unique<AddImpl_cpu<2>>(op); } - public: - NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - - NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, - const std::vector<DimSize_t>& /*inputsSize*/) const override final; - - NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; - - NbElts_t getNbProducedData(const IOIndex_t /*outputIdx*/) const override final; - void updateConsummerProducer() override final; - - void forward(); - - void backward(); + void forward() override; }; template <> class AddImpl_cpu<3> : public OperatorImpl { - private: - const Add_Op<3>& mOp; - std::array<NbElts_t, 3> mNbConsumedData; - std::array<NbElts_t, 1> mNbProducedData; - - public: - AddImpl_cpu(const Add_Op<3>& op) : mOp(op), mNbConsumedData({0, 0, 0}), mNbProducedData({0}) {} +public: + AddImpl_cpu(const Add_Op<3>& op) : OperatorImpl(op) {} static std::unique_ptr<AddImpl_cpu<3>> create(const Add_Op<3>& op) { return std::make_unique<AddImpl_cpu<3>>(op); } - public: - NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; - NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; - - NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t>& /*inputsSize*/) const override final; - - NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; - - NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - void updateConsummerProducer() override final; - - void forward(); - - void backward(); + void forward() override; }; namespace { diff --git a/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp index 490598599aedf24b26865ce6a1ddb3fe32044b1b..221e36dcfac44e21d1b1a35674ca21403b4b57ab 100644 --- a/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp @@ -20,7 +20,7 @@ namespace Aidge { template <class I1, class O> void AddImpl1I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, void* output_) { - // FIXME: missing Add parameters as arguments + // FIXME: missing Add attributes as arguments const I1* input1 = static_cast<const I1*>(input1_); O* output = static_cast<O*>(output_); @@ -32,7 +32,7 @@ void AddImpl1I_cpu_forward_kernel(const std::size_t inputLength, const void* inp template <class I1, class I2, class O> void AddImpl2I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_, void* output_) { - // FIXME: missing Add parameters as arguments + // FIXME: missing Add attributes as arguments const I1* input1 = static_cast<const I1*>(input1_); const I2* input2 = static_cast<const I2*>(input2_); O* output = static_cast<O*>(output_); @@ -45,7 +45,7 @@ void AddImpl2I_cpu_forward_kernel(const std::size_t inputLength, const void* inp template <class I1, class I2, class I3, class O> void AddImpl3I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_, const void* input3_, void* output_) { - // FIXME: missing Add parameters as arguments + // FIXME: missing Add attributes as arguments const I1* input1 = static_cast<const I1*>(input1_); const I2* input2 = static_cast<const I2*>(input2_); const I3* input3 = static_cast<const I3*>(input3_); diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp index 8373cb84a550efd8741a2dbc04c1e94ad37fe611..e3c3a6a28b08386a3b93702f8ce64df68f703119 100644 --- a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp +++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp @@ -29,36 +29,22 @@ namespace Aidge { class AvgPoolingImpl2DForward_cpu : public Registrable<AvgPoolingImpl2DForward_cpu, std::tuple<DataType, DataType>, - void(const AvgPooling_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *, void *)> {}; + void(const AvgPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {}; class AvgPoolingImpl2DBackward_cpu : public Registrable<AvgPoolingImpl2DBackward_cpu, std::tuple<DataType, DataType>, - void(const AvgPooling_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *, void *)> {}; + void(const AvgPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {}; class AvgPoolingImpl2D_cpu : public OperatorImpl { - private: - const AvgPooling_Op<2> &mOp; - std::array<NbElts_t, 1> mNbConsumedData; - std::array<NbElts_t, 1> mNbProducedData; - - public: - AvgPoolingImpl2D_cpu(const AvgPooling_Op<2> &op) : mOp(op), mNbConsumedData({0}), mNbProducedData({0}) {} +public: + AvgPoolingImpl2D_cpu(const AvgPooling_Op<2> &op) : OperatorImpl(op) {} static std::unique_ptr<AvgPoolingImpl2D_cpu> create(const AvgPooling_Op<2> &op) { return std::make_unique<AvgPoolingImpl2D_cpu>(op); } - public: - NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &inputsSize) const override final; - NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; - NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - void updateConsummerProducer() override final; - - void forward(); - - void backward(); + void forward() override; }; namespace { diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp index 776e020f1a20056db345c8e845fd73bb31b4138b..ea46a540ad04b6227d6ec01c965e2eb99806d5e1 100644 --- a/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp @@ -26,51 +26,51 @@ namespace Aidge { * @brief Forward kernel for 2D AvgPoolingolution on CPU backend. * @tparam I Input data type. * @tparam O Output data type. - * @param params tuple of Parameters from the Operator + * @param params tuple of Attributes from the Operator * @param dims Array of input dimensions. * @param input_ const input Tensor. * @param output_ Output Tensor. */ template <class I, class O> -void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Parameters ¶ms, +void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims, const void *input_, void *output_) { - // FIXME: missing convolution parameters as arguments + // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); O *output = static_cast<O *>(output_); // output H size const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<2>(params)[0] + std::get<2>(params)[2] - std::get<1>(params)[0] + std::get<0>(params)[0]) / - static_cast<float>(std::get<0>(params)[0]))); + static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<1>(attrs)[0] + std::get<0>(attrs)[0]) / + static_cast<float>(std::get<0>(attrs)[0]))); // output W size const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<2>(params)[1] + std::get<2>(params)[3] - std::get<1>(params)[1] + std::get<0>(params)[1]) / - static_cast<float>(std::get<0>(params)[1]))); + static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<1>(attrs)[1] + std::get<0>(attrs)[1]) / + static_cast<float>(std::get<0>(attrs)[1]))); // TODO: kernel computation // output (batch, outCh, Xout, Yout) // input (batch, ch, Xin, Yin) // weight (outCh, ch, kernelX, kernelY) - // does not take Dilation parameter into account + // does not take Dilation attribute into account using signedsize = std::make_signed<std::size_t>::type; for (std::size_t batch = 0; batch < dims[0]; ++batch) { for (std::size_t ch = 0; ch < dims[1]; ++ch) { const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize; const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(std::get<2>(params)[0] - ox * std::get<0>(params)[0]); + const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]); const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<1>(params)[0] ? std::get<1>(params)[0] : dims[2] + difx); + const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<1>(attrs)[0] ? std::get<1>(attrs)[0] : dims[2] + difx); for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize dify = static_cast<signedsize>(std::get<2>(params)[1] - oy * std::get<0>(params)[1]); + const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]); const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<1>(params)[1] ? std::get<1>(params)[1] : dims[3] + dify); + const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<1>(attrs)[1] ? std::get<1>(attrs)[1] : dims[3] + dify); const std::size_t oIndexFull = oIndex + ox*oySize + oy; - const std::size_t ix = ox * std::get<0>(params)[0]; - const std::size_t iy = oy * std::get<0>(params)[1]; + const std::size_t ix = ox * std::get<0>(attrs)[0]; + const std::size_t iy = oy * std::get<0>(attrs)[1]; if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { output[oIndexFull] += static_cast<O>( diff --git a/include/aidge/backend/cpu/operator/BatchNormImpl.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp index d9f25b4a8e38510f82fc5afe9ed4b656197a47d5..060e19b135c12832e8a7e8cc9c0db828d4a204d1 100644 --- a/include/aidge/backend/cpu/operator/BatchNormImpl.hpp +++ b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp @@ -29,7 +29,7 @@ namespace Aidge { class BatchNormImpl2DForward_cpu : public Registrable<BatchNormImpl2DForward_cpu, std::tuple<DataType, DataType, DataType>, - void(const BatchNorm_Op<2>::Parameters &, + void(const BatchNorm_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, const void *, @@ -41,7 +41,7 @@ class BatchNormImpl2DForward_cpu class BatchNormImpl2DBackward_cpu : public Registrable<BatchNormImpl2DBackward_cpu, std::tuple<DataType, DataType, DataType>, - void(const BatchNorm_Op<2>::Parameters &, + void(const BatchNorm_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, const void *, @@ -51,29 +51,15 @@ class BatchNormImpl2DBackward_cpu void *)> {}; class BatchNormImpl2D_cpu : public OperatorImpl { - private: - const BatchNorm_Op<2> &mOp; - std::array<NbElts_t, 5> mNbConsumedData; - std::array<NbElts_t, 1> mNbProducedData; - - public: - BatchNormImpl2D_cpu(const BatchNorm_Op<2> &op) : mOp(op), mNbConsumedData({0, 0, 0, 0, 0}), mNbProducedData({0}) {} +public: + BatchNormImpl2D_cpu(const BatchNorm_Op<2> &op) : OperatorImpl(op) {} static std::unique_ptr<BatchNormImpl2D_cpu> create(const BatchNorm_Op<2> &op) { return std::make_unique<BatchNormImpl2D_cpu>(op); } - public: - NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &inputsSize) const override final; - NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; - NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - void updateConsummerProducer() override final; - - void forward(); - - void backward(); + void forward() override; }; namespace { diff --git a/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp index eedb80bde60d65b53bac70cc33ca83eb4f0121e7..486829e782ae2173332a7efa6646bb7bba322252 100644 --- a/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp @@ -27,7 +27,7 @@ namespace Aidge { * @tparam W Weight data type. * @tparam B Bias data type. * @tparam O Output data type. - * @param params tuple of Parameters from the Operator + * @param params tuple of Attributes from the Operator * @param dims Array of input dimensions. * @param input_ const input Tensor. * @param scale_ const scale Tensor. @@ -37,9 +37,9 @@ namespace Aidge { * @param output_ Output Tensor. */ template <class I, class P, class O> -void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Parameters ¶ms, const std::array<DimSize_t, 4> &dims, +void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims, const void *input_, const void *scale_, const void *shift_, void *batchMean_, void *batchVar_, void *output_, const bool freeze) { - // FIXME: missing convolution parameters as arguments + // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); const P *scale = static_cast<const P *>(scale_); const P *shift = static_cast<const P *>(shift_); @@ -52,12 +52,12 @@ void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Parameters ¶m const DimSize_t featureMapSize = dims[2]*dims[3]; - if ((freeze == true) || (std::get<1>(params) == 0.0f)) { + if ((freeze == true) || (std::get<1>(attrs) == 0.0f)) { for (std::size_t batch = 0; batch < nbBatch; ++batch) { for (std::size_t ch = 0; ch < nbChannels; ++ch) { const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize; std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]); - const P var = std::sqrt(batchVar[ch] + static_cast<P>(std::get<0>(params))); + const P var = std::sqrt(batchVar[ch] + static_cast<P>(std::get<0>(attrs))); for (std::size_t feature = 0; feature<featureMapSize; ++feature) { output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-batchMean[ch]) / var; @@ -81,10 +81,10 @@ void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Parameters ¶m const I inputMean = sum / static_cast<I>(nbDataPerChannel); const I inputVar = sumSquare / static_cast<I>(nbDataPerChannel) - inputMean*inputMean; - batchMean[ch] = batchMean[ch]*(1-std::get<1>(params)) + inputMean*std::get<1>(params); - batchVar[ch] = batchVar[ch]*(1-std::get<1>(params)) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*std::get<1>(params); + batchMean[ch] = batchMean[ch]*(1-std::get<1>(attrs)) + inputMean*std::get<1>(attrs); + batchVar[ch] = batchVar[ch]*(1-std::get<1>(attrs)) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*std::get<1>(attrs); - const P var = std::sqrt(inputVar + static_cast<P>(std::get<0>(params))); + const P var = std::sqrt(inputVar + static_cast<P>(std::get<0>(attrs))); for (std::size_t batch = 0; batch < nbBatch; ++batch) { const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize; for (std::size_t feature = 0; feature<featureMapSize; ++feature) { diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp index 0d21c676d797b2fc4e95c4aea47674c8fca5eef4..7b5dbfb0801fb314d91da15c8a9c4b80fe62eb35 100644 --- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp @@ -29,38 +29,24 @@ namespace Aidge { class ConvDepthWiseImpl2DForward_cpu : public Registrable<ConvDepthWiseImpl2DForward_cpu, std::tuple<DataType, DataType, DataType, DataType>, - void(const ConvDepthWise_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *, + void(const ConvDepthWise_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, const void *, const void *, void *)> {}; class ConvDepthWiseImpl2DBackward_cpu : public Registrable<ConvDepthWiseImpl2DBackward_cpu, std::tuple<DataType, DataType, DataType, DataType>, - void(const ConvDepthWise_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *, + void(const ConvDepthWise_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, const void *, const void *, void *)> {}; class ConvDepthWiseImpl2D_cpu : public OperatorImpl { - private: - const ConvDepthWise_Op<2> &mOp; - std::array<NbElts_t, 3> mNbConsumedData; - std::array<NbElts_t, 1> mNbProducedData; - - public: - ConvDepthWiseImpl2D_cpu(const ConvDepthWise_Op<2> &op) : mOp(op), mNbConsumedData({0, 0, 0}), mNbProducedData({0}) {} +public: + ConvDepthWiseImpl2D_cpu(const ConvDepthWise_Op<2> &op) : OperatorImpl(op) {} static std::unique_ptr<ConvDepthWiseImpl2D_cpu> create(const ConvDepthWise_Op<2> &op) { return std::make_unique<ConvDepthWiseImpl2D_cpu>(op); } - public: - NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final; - NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; - NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - void updateConsummerProducer() override final; - - void forward(); - - void backward(); + void forward() override; }; namespace { diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp index ee2d82e00376c5a2cc5a075565e35eb8885c021e..5aa29ac55740d46bba873bb9d85a04cd004cc3bd 100644 --- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp @@ -9,7 +9,7 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMP_FORWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_ #define AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_ #include "aidge/utils/Registrar.hpp" @@ -27,7 +27,7 @@ namespace Aidge { * @tparam W Weight data type. * @tparam B Bias data type. * @tparam O Output data type. - * @param params tuple of Parameters from the Operator + * @param params tuple of Attributes from the Operator * @param dims Array of input dimensions. * @param input_ const input Tensor. * @param weights_ const weight Tensor. @@ -35,9 +35,9 @@ namespace Aidge { * @param output_ Output Tensor. */ template <class I, class W, class B, class O> -void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Parameters ¶ms, const std::array<DimSize_t, 4> &dims, +void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims, const void *input_, const void *weights_, const void *biases_, void *output_) { - // FIXME: missing convolution parameters as arguments + // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); const W *weights = static_cast<const W *>(weights_); const B *biases = static_cast<const B *>(biases_); @@ -46,52 +46,52 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Parameter // output H size const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<4>(params)[0] + std::get<4>(params)[2] - std::get<3>(params)[0] + std::get<0>(params)[0]) / - static_cast<float>(std::get<0>(params)[0]))); + static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<3>(attrs)[0] + std::get<0>(attrs)[0]) / + static_cast<float>(std::get<0>(attrs)[0]))); // output W size const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<4>(params)[1] + std::get<4>(params)[3] - std::get<3>(params)[1] + std::get<0>(params)[1]) / - static_cast<float>(std::get<0>(params)[1]))); + static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<3>(attrs)[1] + std::get<0>(attrs)[1]) / + static_cast<float>(std::get<0>(attrs)[1]))); // TODO: kernel computation // output (batch, outCh, Xout, Yout) // input (batch, ch, Xin, Yin) // weight (outCh, ch, kernelX, kernelY) - // does not take Dilation parameter into account + // does not take Dilation attribute into account using signedsize = std::make_signed<std::size_t>::type; for (std::size_t batch = 0; batch < dims[0]; ++batch) { - for (std::size_t ch = 0; ch < std::get<2>(params); ++ch) { - const std::size_t oIndex = (ch + batch*std::get<2>(params)) * oxSize * oySize; + for (std::size_t ch = 0; ch < std::get<2>(attrs); ++ch) { + const std::size_t oIndex = (ch + batch*std::get<2>(attrs)) * oxSize * oySize; B biasVal = (biases != nullptr) ? biases[ch] : B(0); std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal); const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; - const std::size_t wIndex = ch * std::get<3>(params)[0] * std::get<3>(params)[1]; + const std::size_t wIndex = ch * std::get<3>(attrs)[0] * std::get<3>(attrs)[1]; for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(std::get<4>(params)[0] - ox * std::get<0>(params)[0]); + const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]); const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<3>(params)[0] ? std::get<3>(params)[0] : dims[2] + difx); + const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<3>(attrs)[0] ? std::get<3>(attrs)[0] : dims[2] + difx); for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize dify = static_cast<signedsize>(std::get<4>(params)[1] - oy * std::get<0>(params)[1]); + const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]); const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<3>(params)[1] ? std::get<3>(params)[1] : dims[3] + dify); + const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<3>(attrs)[1] ? std::get<3>(attrs)[1] : dims[3] + dify); const std::size_t oIndexFull = oIndex + ox*oySize + oy; - const signedsize ix = static_cast<signedsize>(ox * std::get<0>(params)[0]) - std::get<4>(params)[0]; - const signedsize iy = static_cast<signedsize>(oy * std::get<0>(params)[1]) - std::get<4>(params)[1]; + const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]); + const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]); if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { - output[oIndexFull] += (weights[wIndex + 0*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 0*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 0*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 1*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 1*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 1*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 2*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 2*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 2*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]); + output[oIndexFull] += (weights[wIndex + 0*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 0*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 0*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 1*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 1*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 1*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 2*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 2*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 2*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]); } else { for (std::size_t sx = sxMin; sx < sxMax; ++sx) { for (std::size_t sy = syMin; sy < syMax; ++sy) { - output[oIndexFull] += weights[wIndex + sx*std::get<3>(params)[1] + sy] * + output[oIndexFull] += weights[wIndex + sx*std::get<3>(attrs)[1] + sy] * input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; } } diff --git a/include/aidge/backend/cpu/operator/ConvImpl.hpp b/include/aidge/backend/cpu/operator/ConvImpl.hpp index 1f3dffe43b966bc37887f267cc56760a899476f9..3db91ab507456244676c990427287e5755ab019b 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl.hpp @@ -29,38 +29,25 @@ namespace Aidge { class ConvImpl2DForward_cpu : public Registrable<ConvImpl2DForward_cpu, std::tuple<DataType, DataType, DataType, DataType>, - void(const Conv_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *, + void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, const void *, const void *, void *)> {}; class ConvImpl2DBackward_cpu : public Registrable<ConvImpl2DBackward_cpu, std::tuple<DataType, DataType, DataType, DataType>, - void(const Conv_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *, + void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, const void *, const void *, void *)> {}; class ConvImpl2D_cpu : public OperatorImpl { - private: - const Conv_Op<2> &mOp; - std::array<NbElts_t, 3> mNbConsumedData; - std::array<NbElts_t, 1> mNbProducedData; - public: - ConvImpl2D_cpu(const Conv_Op<2> &op) : mOp(op), mNbConsumedData({0, 0, 0}), mNbProducedData({0}) {} + ConvImpl2D_cpu(const Conv_Op<2>& op) : OperatorImpl(op) {} static std::unique_ptr<ConvImpl2D_cpu> create(const Conv_Op<2> &op) { return std::make_unique<ConvImpl2D_cpu>(op); } public: - NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final; - NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; - NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - void updateConsummerProducer() override final; - - void forward(); - - void backward(); + void forward() override; }; namespace { diff --git a/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp index bc2f10099f42cba91be8d089b66dc176fdeb7c10..03e2c35170432181c7a9b3934d61f0bd18471876 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp @@ -27,7 +27,7 @@ namespace Aidge { * @tparam W Weight data type. * @tparam B Bias data type. * @tparam O Output data type. - * @param params tuple of Parameters from the Operator + * @param params tuple of Attributes from the Operator * @param dims Array of input dimensions. * @param input_ const input Tensor. * @param weights_ const weight Tensor. @@ -35,9 +35,9 @@ namespace Aidge { * @param output_ Output Tensor. */ template <class I, class W, class B, class O> -void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Parameters ¶ms, const std::array<DimSize_t, 4> &dims, +void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims, const void *input_, const void *weights_, const void *biases_, void *output_) { - // FIXME: missing convolution parameters as arguments + // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); const W *weights = static_cast<const W *>(weights_); const B *biases = static_cast<const B *>(biases_); @@ -45,34 +45,34 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Parameters ¶ms, const s /* // output H size const std::size_t oxSize = - static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(params)[0] + std::get<0>(params)[0]) / - static_cast<float>(std::get<0>(params)[0])); + static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(attrs)[0] + std::get<0>(attrs)[0]) / + static_cast<float>(std::get<0>(attrs)[0])); // output W size const std::size_t oySize = - static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(params)[1] + std::get<0>(params)[1]) / - static_cast<float>(std::get<0>(params)[1])); + static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(attrs)[1] + std::get<0>(attrs)[1]) / + static_cast<float>(std::get<0>(attrs)[1])); // TODO: kernel computation // output (Xout, Yout, outCh, batch) // input (Xin, Yin, inCh, batch) // weight (kernelX, kernelY, inCh, outCh) - // does not take Dilation parameter into account + // does not take Dilation attribute into account for (std::size_t ox = 0; ox < oxSize; ++ox) { for (std::size_t oy = 0; oy < oySize; ++oy) { - const std::size_t ix = ox * std::get<0>(params)[0]; - const std::size_t iy = oy * std::get<0>(params)[1]; + const std::size_t ix = ox * std::get<0>(attrs)[0]; + const std::size_t iy = oy * std::get<0>(attrs)[1]; - for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) { - const std::size_t oIndex = dims[3] * (outCh + std::get<3>(params) * (oy + oySize * ox)); + for (std::size_t outCh = 0; outCh < std::get<3>(attrs); ++outCh) { + const std::size_t oIndex = dims[3] * (outCh + std::get<3>(attrs) * (oy + oySize * ox)); B biasVal = (biases != nullptr) ? biases[outCh] : B(0); for (std::size_t batch = 0; batch < dims[3]; ++batch) { output[oIndex + batch] = biasVal; } for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) { - for (std::size_t sx = 0; sx < std::get<4>(params)[0]; ++sx) { - for (std::size_t sy = 0; sy < std::get<4>(params)[1]; ++sy) { + for (std::size_t sx = 0; sx < std::get<4>(attrs)[0]; ++sx) { + for (std::size_t sy = 0; sy < std::get<4>(attrs)[1]; ++sy) { const std::size_t wIndex = - outCh + std::get<3>(params) * (inCh + dims[2] * (sy + std::get<4>(params)[1] * sx)); + outCh + std::get<3>(attrs) * (inCh + dims[2] * (sy + std::get<4>(attrs)[1] * sx)); std::size_t iIndex = dims[3] * (inCh + dims[2] * ((iy + sy) + dims[1] * (ix + sx))); for (std::size_t batch = 0; batch < dims[3]; ++batch) { output[oIndex + batch] += weights[wIndex] * input[iIndex + batch]; @@ -88,53 +88,53 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Parameters ¶ms, const s // output H size const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<5>(params)[0] + std::get<5>(params)[2] - std::get<4>(params)[0] + std::get<0>(params)[0]) / - static_cast<float>(std::get<0>(params)[0]))); + static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<4>(attrs)[0] + std::get<0>(attrs)[0]) / + static_cast<float>(std::get<0>(attrs)[0]))); // output W size const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<5>(params)[1] + std::get<5>(params)[3] - std::get<4>(params)[1] + std::get<0>(params)[1]) / - static_cast<float>(std::get<0>(params)[1]))); + static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<4>(attrs)[1] + std::get<0>(attrs)[1]) / + static_cast<float>(std::get<0>(attrs)[1]))); // TODO: kernel computation // output (batch, outCh, Xout, Yout) // input (batch, inCh, Xin, Yin) // weight (outCh, inCh, kernelX, kernelY) - // does not take Dilation parameter into account + // does not take Dilation attribute into account using signedsize = std::make_signed<std::size_t>::type; for (std::size_t batch = 0; batch < dims[0]; ++batch) { - for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) { - const std::size_t oIndex = (outCh + batch*std::get<3>(params)) * oxSize * oySize; + for (std::size_t outCh = 0; outCh < std::get<3>(attrs); ++outCh) { + const std::size_t oIndex = (outCh + batch*std::get<3>(attrs)) * oxSize * oySize; B biasVal = (biases != nullptr) ? biases[outCh] : B(0); std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal); for (std::size_t inCh = 0; inCh < dims[1]; ++inCh) { const std::size_t iIndex = (inCh + batch*dims[1]) * dims[2] * dims[3]; - const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(params)[0] * std::get<4>(params)[1]; + const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(attrs)[0] * std::get<4>(attrs)[1]; for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(std::get<5>(params)[0] - ox * std::get<0>(params)[0]); + const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]); const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<4>(params)[0] ? std::get<4>(params)[0] : dims[2] + difx); + const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<4>(attrs)[0] ? std::get<4>(attrs)[0] : dims[2] + difx); for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize dify = static_cast<signedsize>(std::get<5>(params)[1] - oy * std::get<0>(params)[1]); + const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]); const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<4>(params)[1] ? std::get<4>(params)[1] : dims[3] + dify); + const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<4>(attrs)[1] ? std::get<4>(attrs)[1] : dims[3] + dify); const std::size_t oIndexFull = oIndex + ox*oySize + oy; - const signedsize ix = static_cast<signedsize>(ox * std::get<0>(params)[0]) - std::get<5>(params)[0]; - const signedsize iy = static_cast<signedsize>(oy * std::get<0>(params)[1]) - std::get<5>(params)[1]; + const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]); + const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]); if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { - output[oIndexFull] += (weights[wIndex + 0*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 0*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 0*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 1*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 1*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 1*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 2*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 2*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 2*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]); + output[oIndexFull] += (weights[wIndex + 0*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 0*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 0*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 1*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 1*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 1*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 2*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 2*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 2*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]); } else { for (std::size_t sx = sxMin; sx < sxMax; ++sx) { for (std::size_t sy = syMin; sy < syMax; ++sy) { - output[oIndexFull] += weights[wIndex + sx*std::get<4>(params)[1] + sy] * + output[oIndexFull] += weights[wIndex + sx*std::get<4>(attrs)[1] + sy] * input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; } } diff --git a/include/aidge/backend/cpu/operator/DivImpl.hpp b/include/aidge/backend/cpu/operator/DivImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..655a9f6c8accb80fc85d8bc7bd9bf378d4f48a6b --- /dev/null +++ b/include/aidge/backend/cpu/operator/DivImpl.hpp @@ -0,0 +1,50 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_DIVIMPL_H_ +#define AIDGE_CPU_OPERATOR_DIVIMPL_H_ + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/operator/Div.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include <memory> +#include <vector> + +namespace Aidge { +// class Div_Op; + +// compute kernel registry for forward and backward +class DivImplForward_cpu + : public Registrable<DivImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const void*, const void*,void*)> { +}; +class DivImplBackward_cpu + : public Registrable<DivImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const void*, const void*, void*)> { +}; + +class DivImpl_cpu : public OperatorImpl { +public: + DivImpl_cpu(const Div_Op& op) : OperatorImpl(op) {} + + static std::unique_ptr<DivImpl_cpu> create(const Div_Op& op) { + return std::make_unique<DivImpl_cpu>(op); + } + + NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; + +namespace { +static Registrar<Div_Op> registrarDivImpl_cpu("cpu", Aidge::DivImpl_cpu::create); +} +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_DIVIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/DivImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/DivImpl_forward_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e2ead9ca8de3ed8328b659906336766fbfbb6a47 --- /dev/null +++ b/include/aidge/backend/cpu/operator/DivImpl_forward_kernels.hpp @@ -0,0 +1,64 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_DIVIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_DIVIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/DivImpl.hpp" + +namespace Aidge { +template <class I1, class I2, class O> +void DivImpl_cpu_forward_kernel(std::size_t input1Length, + std::size_t input2Length, + const void* input1_, + const void* input2_, + void* output_) { + + const I1* input_1 = static_cast<const I1*>(input1_); + const I2* input_2 = static_cast<const I2*>(input2_); + O* output = static_cast<O*>(output_); + if (input2Length == input1Length) + { + for (std::size_t i = 0; i < input1Length; ++i) { + output[i] = input_1[i] / input_2[i]; + } + } + else if (input2Length == 1) + { + for (std::size_t i = 0; i < input1Length; ++i) { + output[i] = input_1[i] / input_2[0]; + } + } + else // input_2 is 1d and of size the number of channels of input_1 + { + for (std::size_t i = 0; i < input1Length; ++i) { + std::size_t channelIdx = i % input2Length; + output[i] = input_1[i] / input_2[channelIdx]; + } + } +} + +namespace { +static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::DivImpl_cpu_forward_kernel<float, float, float>); +static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32}, + Aidge::DivImpl_cpu_forward_kernel<int, int, int>); +static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::DivImpl_cpu_forward_kernel<double, double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_DIVIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/FCImpl.hpp b/include/aidge/backend/cpu/operator/FCImpl.hpp index c69cc0b08a58877108c78d6f12c29e9089c2f665..5d79369077d06288e218b9002274e7e3d1880b59 100644 --- a/include/aidge/backend/cpu/operator/FCImpl.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl.hpp @@ -26,34 +26,22 @@ namespace Aidge { // compute kernel registry for forward and backward class FCImplForward_cpu : public Registrable<FCImplForward_cpu, std::tuple<DataType, DataType, DataType, DataType>, - void(const FC_Op::Parameters &, const DimSize_t, const DimSize_t, + void(const FC_Op::Attrs &, const DimSize_t, const DimSize_t, const void *, const void *, const void *, void *)> {}; class FCImplBackward_cpu : public Registrable<FCImplBackward_cpu, std::tuple<DataType, DataType, DataType, DataType>, - void(const FC_Op::Parameters &, const DimSize_t, const DimSize_t, + void(const FC_Op::Attrs &, const DimSize_t, const DimSize_t, const void *, const void *, const void *, void *)> {}; class FCImpl_cpu : public OperatorImpl { - private: - const FC_Op &mOp; - std::array<NbElts_t, 3> mNbConsumedData; - std::array<NbElts_t, 1> mNbProducedData; +public: + FCImpl_cpu(const FC_Op &op) : OperatorImpl(op) {} - public: - FCImpl_cpu(const FC_Op &op) : mOp(op), mNbConsumedData({0, 0, 0}), mNbProducedData({0}) {} + static std::unique_ptr<FCImpl_cpu> create(const FC_Op &op) { + return std::make_unique<FCImpl_cpu>(op); + } - static std::unique_ptr<FCImpl_cpu> create(const FC_Op &op) { return std::make_unique<FCImpl_cpu>(op); } - - public: - NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final; - NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; - NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - void updateConsummerProducer() override final; - void forward(); - - void backward(); + void forward() override; }; namespace { diff --git a/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp index d6acb7dfea3415a8d67384745e16ecdd8bf06324..91e2558a7ef1079cbc9fb11f78fab53ef4246149 100644 --- a/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp @@ -19,17 +19,17 @@ namespace Aidge { // template <class I, class W, class B, class O> -// void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 4>& dims, +// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const std::array<DimSize_t, 4>& dims, // const void* input_, const void* weights_, const void* biases_, void* output_) { -// // FIXME: missing FC parameters as arguments +// // FIXME: missing FC attributes as arguments // const I* input = static_cast<const I*>(input_); // const W* weights = static_cast<const W*>(weights_); // const B* biases = static_cast<const B*>(biases_); // O* output = static_cast<O*>(output_); -// for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) { +// for (std::size_t outIdx = 0; outIdx < std::get<0>(attrs); ++outIdx) { // std::size_t oIndex = outIdx * dims[3]; -// const B bias = std::get<1>(params) ? B(0) : biases[outIdx]; +// const B bias = std::get<1>(attrs) ? B(0) : biases[outIdx]; // for (std::size_t batch = 0; batch < dims[3]; ++batch) { // output[oIndex + batch] = bias; // } @@ -39,10 +39,10 @@ namespace Aidge { // for (std::size_t iy = 0; iy < dims[1]; ++iy) { // for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) { // const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix)); -// for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) { +// for (std::size_t outCh = 0; outCh < std::get<0>(attrs); ++outCh) { // const std::size_t oIndex = dims[3] * outCh; -// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(params) + -// outCh; // (iIndex*std::get<0>(params) + oIndex)/dims[3]; +// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(attrs) + +// outCh; // (iIndex*std::get<0>(attrs) + oIndex)/dims[3]; // for (std::size_t batch = 0; batch < dims[3]; ++batch) { // output[oIndex + batch] += weights[wIndex] * input[iIndex + batch]; // } @@ -53,9 +53,9 @@ namespace Aidge { // } // template <class I, class W, class B, class O> -// void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 2>& dims, +// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const std::array<DimSize_t, 2>& dims, // const void* input_, const void* weights_, const void* biases_, void* output_) { -// // FIXME: missing FC parameters as arguments +// // FIXME: missing FC attributes as arguments // const I* input = static_cast<const I*>(input_); // const W* weights = static_cast<const W*>(weights_); // const B* biases = static_cast<const B*>(biases_); @@ -63,9 +63,9 @@ namespace Aidge { // // let's have I.dims() = [N, C, H, W] instead of [H, W, C, N] -// for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) { +// for (std::size_t outIdx = 0; outIdx < std::get<0>(attrs); ++outIdx) { // std::size_t oIndex = outIdx * dims[0]; -// const B bias = std::get<1>(params) ? B(0) : biases[outIdx]; +// const B bias = std::get<1>(attrs) ? B(0) : biases[outIdx]; // for (std::size_t batch = 0; batch < dims[0]; ++batch) { // output[oIndex + batch] = bias; // } @@ -74,8 +74,8 @@ namespace Aidge { // for (std::size_t batch = 0; batch < dims[0]; ++batch) { // const std::size_t oIndex = dims[1] * batch; // for (std::size_t i = 0; i < dims[1]; ++i) { -// for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) { -// std::size_t wIndex = i * std::get<0>(params) + outCh; // (iIndex*std::get<0>(params) + oIndex)/dims[3]; +// for (std::size_t outCh = 0; outCh < std::get<0>(attrs); ++outCh) { +// std::size_t wIndex = i * std::get<0>(attrs) + outCh; // (iIndex*std::get<0>(attrs) + oIndex)/dims[3]; // output[oIndex + outCh] += weights[wIndex] * input[i + batch]; // } // } @@ -83,29 +83,29 @@ namespace Aidge { // } template <class I, class W, class B, class O> -void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const DimSize_t batchSize, const DimSize_t oneInputSize, +void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize, const void* input_, const void* weights_, const void* biases_, void* output_) { - // FIXME: missing FC parameters as arguments + // FIXME: missing FC attributes as arguments const I* input = static_cast<const I*>(input_); const W* weights = static_cast<const W*>(weights_); const B* biases = static_cast<const B*>(biases_); O* output = static_cast<O*>(output_); - if (std::get<1>(params)) { - std::fill(output, output+(batchSize*std::get<0>(params)), B(0)); + if (std::get<1>(attrs)) { + std::fill(output, output+(batchSize*std::get<0>(attrs)), B(0)); } else { for (std::size_t batch = 0; batch < batchSize; ++batch) { - std::copy(biases, biases+std::get<0>(params), output+(batch*std::get<0>(params))); + std::copy(biases, biases+std::get<0>(attrs), output+(batch*std::get<0>(attrs))); } } for (std::size_t batch = 0; batch < batchSize; ++batch) { - for (std::size_t out = 0; out < std::get<0>(params); ++out) { - output[out + batch*std::get<0>(params)] = std::inner_product(input + batch*oneInputSize, + for (std::size_t out = 0; out < std::get<0>(attrs); ++out) { + output[out + batch*std::get<0>(attrs)] = std::inner_product(input + batch*oneInputSize, input + (batch + 1)*oneInputSize, weights + out*oneInputSize, - output[out + batch*std::get<0>(params)]); + output[out + batch*std::get<0>(attrs)]); } } } diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp index abe167bea16de01f861beb9701f747d39f265d9d..371e2905a81d1dc2e114f6044388d7e6686122f8 100644 --- a/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp +++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp @@ -24,35 +24,22 @@ namespace Aidge { // compute kernel registry for forward and backward class LeakyReLUImplForward_cpu - : public Registrable<LeakyReLUImplForward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Parameters&, std::size_t, const void*, void*)> { + : public Registrable<LeakyReLUImplForward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Attrs&, std::size_t, const void*, void*)> { }; class LeakyReLUImplBackward_cpu - : public Registrable<LeakyReLUImplBackward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Parameters&, std::size_t, const void*, void*)> { + : public Registrable<LeakyReLUImplBackward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Attrs&, std::size_t, const void*, void*)> { }; class LeakyReLUImpl_cpu : public OperatorImpl { - private: - const LeakyReLU_Op& mOp; - std::array<NbElts_t, 1> mNbConsumedData; - std::array<NbElts_t, 1> mNbProducedData; - - public: - LeakyReLUImpl_cpu(const LeakyReLU_Op& op) : mOp(op), mNbConsumedData({0}), mNbProducedData({0}) {} +public: + LeakyReLUImpl_cpu(const LeakyReLU_Op& op) : OperatorImpl(op) {} static std::unique_ptr<LeakyReLUImpl_cpu> create(const LeakyReLU_Op& op) { return std::make_unique<LeakyReLUImpl_cpu>(op); } - public: - NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t>& /*inputsSize*/) const override final; - NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; - NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - void updateConsummerProducer() override final; - void forward(); - - void backward(); + void forward() override; }; namespace { diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp index ff9a8ac6a8f968f244429b330401d794f16fac01..761b9579c3c3dc187e4b0fac24812fa77f916e65 100644 --- a/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp @@ -18,14 +18,14 @@ namespace Aidge { template <class I, class O> -void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Parameters& params, +void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Attrs& attrs, std::size_t inputLenght, const void* input_, void* output_) { const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); - I negativeSlope = static_cast<I>(std::get<0>(params)); + I negativeSlope = static_cast<I>(std::get<0>(attrs)); for (std::size_t i = 0; i < inputLenght; ++i) { output[i] = input[i] >= 0 ? input[i] : input[i] * negativeSlope; diff --git a/include/aidge/backend/cpu/operator/MatMulImpl.hpp b/include/aidge/backend/cpu/operator/MatMulImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2e4b3157360065b0fa857a8bcdd85f1b7442ee63 --- /dev/null +++ b/include/aidge/backend/cpu/operator/MatMulImpl.hpp @@ -0,0 +1,53 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_MATMULIMPL_H_ +#define AIDGE_CPU_OPERATOR_MATMULIMPL_H_ + +#include <array> +#include <memory> +#include <vector> + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/operator/MatMul.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// class MatMul_Op; + +// compute kernel registry for forward and backward +class MatMulImplForward_cpu + : public Registrable<MatMulImplForward_cpu, std::tuple<DataType, DataType, DataType>, + void(const MatMul_Op::Attrs &, const DimSize_t, const DimSize_t, + const void *, const void *, void *)> {}; +class MatMulImplBackward_cpu + : public Registrable<MatMulImplBackward_cpu, std::tuple<DataType, DataType, DataType>, + void(const MatMul_Op::Attrs &, const DimSize_t, const DimSize_t, + const void *, const void *, void *)> {}; + +class MatMulImpl_cpu : public OperatorImpl { +public: + MatMulImpl_cpu(const MatMul_Op &op): OperatorImpl(op) {} + + static std::unique_ptr<MatMulImpl_cpu> create(const MatMul_Op &op) { + return std::make_unique<MatMulImpl_cpu>(op); + } + + void forward() override; +}; + +namespace { +static Registrar<MatMul_Op> registrarMatMulImpl_cpu("cpu", Aidge::MatMulImpl_cpu::create); +} +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_MATMULIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..bc52779eff274379a853ea84fb839c9486652433 --- /dev/null +++ b/include/aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp @@ -0,0 +1,58 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" +#include <algorithm> + +#include "aidge/backend/cpu/operator/MatMulImpl.hpp" + +namespace Aidge { + +template <class I, class W, class O> +void MatMulImpl_cpu_forward_kernel(const MatMul_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize, + const void* input_, const void* weights_, void* output_) { + // FIXME: missing MatMul parameters as arguments + const I* input = static_cast<const I*>(input_); + const W* weights = static_cast<const W*>(weights_); + O* output = static_cast<O*>(output_); + + + std::fill(output, output+(batchSize*std::get<0>(attrs)), O(0)); + + for (std::size_t batch = 0; batch < batchSize; ++batch) { + for (std::size_t out = 0; out < std::get<0>(attrs); ++out) { + output[out + batch*std::get<0>(attrs)] = std::inner_product(input + batch*oneInputSize, + input + (batch + 1)*oneInputSize, + weights + out*oneInputSize, + output[out + batch*std::get<0>(attrs)]); + } + } +} + + +namespace { +static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::MatMulImpl_cpu_forward_kernel<float, float, float>); +static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32}, + Aidge::MatMulImpl_cpu_forward_kernel<int, int, int>); +static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::MatMulImpl_cpu_forward_kernel<double, double, double>); +} // namespace + +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a96fcc226b927b135465ef9cf395d10f844a2646 --- /dev/null +++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp @@ -0,0 +1,56 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_H_ +#define AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_H_ + +#include <array> +#include <memory> +#include <tuple> +#include <vector> + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/operator/MaxPooling.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// class MaxPooling_Op; + +// compute kernel registry for forward and backward +class MaxPoolingImpl2DForward_cpu + : public Registrable<MaxPoolingImpl2DForward_cpu, + std::tuple<DataType, DataType>, + void(const MaxPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {}; +class MaxPoolingImpl2DBackward_cpu + : public Registrable<MaxPoolingImpl2DBackward_cpu, + std::tuple<DataType, DataType>, + void(const MaxPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {}; + +class MaxPoolingImpl2D_cpu : public OperatorImpl { +public: + MaxPoolingImpl2D_cpu(const MaxPooling_Op<2> &op) : OperatorImpl(op) {} + + static std::unique_ptr<MaxPoolingImpl2D_cpu> create(const MaxPooling_Op<2> &op) { + return std::make_unique<MaxPoolingImpl2D_cpu>(op); + } + + NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; + +namespace { +// add cpu backend to MaxPooling_Op<2> implementation registry +static Registrar<MaxPooling_Op<2>> registrarMaxPoolingImpl2D_cpu("cpu", Aidge::MaxPoolingImpl2D_cpu::create); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..caa99e8678a72c7fd3c77fe8b7579ea739ac64c7 --- /dev/null +++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp @@ -0,0 +1,215 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" +#include "aidge/utils/Types.h" +#include "aidge/data/Data.hpp" +#include <array> +#include <tuple> +#include <cmath> + +namespace Aidge { +/** + * @brief Forward kernel for 2D MaxPoolingolution on CPU backend. + * @tparam I Input data type. + * @tparam O Output data type. + * @param attrs tuple of Attributes from the Operator + * @param dims Array of input dimensions. + * @param input_ const input Tensor. + * @param output_ Output Tensor. + */ +template <class I, class O> +void MaxPoolingImpl2D_cpu_forward_kernel(const MaxPooling_Op<2>::Attrs &attrs, + const std::array<DimSize_t, 4> &dims, + const void *input_, + void *output_) { + // FIXME: missing convolution parameters as arguments + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); + + std::array<DimSize_t, 2> strideDims = std::get<0>(attrs); + std::array<DimSize_t, 2> kernelDims = std::get<1>(attrs); + + // output H size + const std::size_t oxSize = + static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) / + static_cast<float>(strideDims[0]))); + // output W size + const std::size_t oySize = + static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) / + static_cast<float>(strideDims[1]))); + + // TODO: kernel computation + // output (batch, outCh, Xout, Yout) + // input (batch, ch, Xin, Yin) + // weight (outCh, ch, kernelX, kernelY) + // does not take Dilation parameter into account + using signedsize = std::make_signed<std::size_t>::type; + for (std::size_t batch = 0; batch < dims[0]; ++batch) { + for (std::size_t ch = 0; ch < dims[1]; ++ch) { + const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize; + const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; + for (std::size_t ox = 0; ox < oxSize; ++ox) { + const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); + const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); + const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] : dims[2] + difx); + for (std::size_t oy = 0; oy < oySize; ++oy) { + const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); + const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); + const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > kernelDims[1] ? kernelDims[1] : dims[3] + dify); + const std::size_t oIndexFull = oIndex + ox*oySize + oy; + const std::size_t ix = ox * strideDims[0]; + const std::size_t iy = oy * strideDims[1]; + + I poolValue(0.0); + bool valid = false; + + for (unsigned int channel = 0; channel < dims[1]; + ++channel){ + for (unsigned int sy = syMin; sy < syMax; ++sy) { + for (unsigned int sx = sxMin; sx < sxMax; ++sx) + { + const I value = input[iIndex + (ix+sx)*dims[3] + (iy+sy)]; + + if (!valid || value > poolValue) { + poolValue = value; + valid = true; + } + } + } + } + output[oIndexFull] = poolValue; + } + } + } + } +} + +//N2D2 version +/* +template <class T> +void N2D2::PoolCell_Frame_Kernels::forwardMax(const T* alpha, + const Tensor<T>& + inputs, + const Descriptor& desc, + const T* beta, + Tensor<T>& outputs, + Tensor<ArgMax>& argMax, + bool useArgMax, + const Tensor<bool>& maps) +{ + const unsigned int size = inputs.dimB() * outputs.dimZ(); + +#if defined(_OPENMP) && _OPENMP >= 200805 +#pragma omp parallel for collapse(2) if (size > 16) +#else +#pragma omp parallel for if (inputs.dimB() > 4 && size > 16) +#endif + for (int batchPos = 0; batchPos < (int)inputs.dimB(); ++batchPos) { + for (unsigned int output = 0; output < outputs.dimZ(); ++output) { + for (unsigned int oy = 0; oy < outputs.dimY(); ++oy) { + for (unsigned int ox = 0; ox < outputs.dimX(); ++ox) { + const unsigned int sxMin = (unsigned int)std::max( + desc.padding[0] - (int)(ox * desc.stride[0]), 0); + const unsigned int syMin = (unsigned int)std::max( + desc.padding[1] - (int)(oy * desc.stride[1]), 0); + const unsigned int sxMax = Utils::clamp + <int>(inputs.dimX() + desc.padding[0] - ox * desc.stride[0], + 0, + desc.pool[0]); + const unsigned int syMax = Utils::clamp + <int>(inputs.dimY() + desc.padding[1] - oy * desc.stride[1], + 0, + desc.pool[1]); + + const int ix = (int)(ox * desc.stride[0]) - desc.padding[0]; + const int iy = (int)(oy * desc.stride[1]) - desc.padding[1]; + + T poolValue(0.0); + + // For each output, compute the pool value + if (useArgMax) { + const ArgMax inputMax + = argMax(ox, oy, output, batchPos); + + if (inputMax.valid) { + poolValue = inputs(inputMax.ix, + inputMax.iy, + inputMax.channel, + batchPos); + } + } + else { + unsigned int ixMax = 0; + unsigned int iyMax = 0; + unsigned int channelMax = 0; + bool valid = false; + + for (unsigned int channel = 0; channel < inputs.dimZ(); + ++channel) + { + if (!maps.empty() && !maps(output, channel)) + continue; + + for (unsigned int sy = syMin; sy < syMax; ++sy) { + for (unsigned int sx = sxMin; sx < sxMax; ++sx) + { + const T value = inputs(ix + sx, + iy + sy, + channel, + batchPos); + + if (!valid || value > poolValue) { + poolValue = value; + valid = true; + + ixMax = ix + sx; + iyMax = iy + sy; + channelMax = channel; + } + } + } + } + + argMax(ox, oy, output, batchPos) + = ArgMax(ixMax, iyMax, channelMax, valid); + } + + outputs(ox, oy, output, batchPos) + = (*alpha) * poolValue + + (*beta) * outputs(ox, oy, output, batchPos); + } + } + } + } +} + +*/ + +namespace { +static Registrar<MaxPoolingImpl2DForward_cpu> registrarMaxPoolingImpl2DForward_cpu_Float32( + std::tuple<DataType, DataType>({DataType::Float32, DataType::Float32}), + Aidge::MaxPoolingImpl2D_cpu_forward_kernel<float, float>); +static Registrar<MaxPoolingImpl2DForward_cpu> registrarMaxPoolingImpl2DForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, + Aidge::MaxPoolingImpl2D_cpu_forward_kernel<int, int>); +static Registrar<MaxPoolingImpl2DForward_cpu> registrarMaxPoolingImpl2DForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, + Aidge::MaxPoolingImpl2D_cpu_forward_kernel<double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/MulImpl.hpp b/include/aidge/backend/cpu/operator/MulImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..54361e4f5f7a361032c9f4928392f18f183724ac --- /dev/null +++ b/include/aidge/backend/cpu/operator/MulImpl.hpp @@ -0,0 +1,50 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_MULIMPL_H_ +#define AIDGE_CPU_OPERATOR_MULIMPL_H_ + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/operator/Mul.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include <memory> +#include <vector> + +namespace Aidge { +// class Mul_Op; + +// compute kernel registry for forward and backward +class MulImplForward_cpu + : public Registrable<MulImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const void*, const void*,void*)> { +}; +class MulImplBackward_cpu + : public Registrable<MulImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const void*, const void*, void*)> { +}; + +class MulImpl_cpu : public OperatorImpl { +public: + MulImpl_cpu(const Mul_Op& op) : OperatorImpl(op) {} + + static std::unique_ptr<MulImpl_cpu> create(const Mul_Op& op) { + return std::make_unique<MulImpl_cpu>(op); + } + + NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; + +namespace { +static Registrar<Mul_Op> registrarMulImpl_cpu("cpu", Aidge::MulImpl_cpu::create); +} +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_MULIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9caef8b88af3ca779309b60eba984a72db35f84a --- /dev/null +++ b/include/aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp @@ -0,0 +1,64 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_MULIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_MULIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/MulImpl.hpp" + +namespace Aidge { +template <class I1, class I2, class O> +void MulImpl_cpu_forward_kernel(std::size_t input1Length, + std::size_t input2Length, + const void* input1_, + const void* input2_, + void* output_) { + + const I1* input_1 = static_cast<const I1*>(input1_); + const I2* input_2 = static_cast<const I2*>(input2_); + O* output = static_cast<O*>(output_); + if (input2Length == input1Length) + { + for (std::size_t i = 0; i < input1Length; ++i) { + output[i] = input_1[i] * input_2[i]; + } + } + else if (input2Length == 1) + { + for (std::size_t i = 0; i < input1Length; ++i) { + output[i] = input_1[i] * input_2[0]; + } + } + else // input_2 is 1d and of size the number of channels of input_1 + { + for (std::size_t i = 0; i < input1Length; ++i) { + std::size_t channelIdx = i % input2Length; + output[i] = input_1[i] * input_2[channelIdx]; + } + } +} + +namespace { +static Registrar<MulImplForward_cpu> registrarMulImplForward_cpu_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::MulImpl_cpu_forward_kernel<float, float, float>); +static Registrar<MulImplForward_cpu> registrarMulImplForward_cpu_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32}, + Aidge::MulImpl_cpu_forward_kernel<int, int, int>); +static Registrar<MulImplForward_cpu> registrarMulImplForward_cpu_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::MulImpl_cpu_forward_kernel<double, double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_MULIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/PadImpl.hpp b/include/aidge/backend/cpu/operator/PadImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9d93828f5817043f4f5cb07166db213c02866ca1 --- /dev/null +++ b/include/aidge/backend/cpu/operator/PadImpl.hpp @@ -0,0 +1,58 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_PADIMPL_H_ +#define AIDGE_CPU_OPERATOR_PADIMPL_H_ + +#include <array> +#include <memory> +#include <tuple> +#include <vector> + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/operator/Pad.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// class Pad_Op; + +// compute kernel registry for forward and backward +class PadImpl2DForward_cpu + : public Registrable<PadImpl2DForward_cpu, + std::tuple<DataType, DataType>, + void(const Pad_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, + void *)> {}; +class PadImpl2DBackward_cpu + : public Registrable<PadImpl2DBackward_cpu, + std::tuple<DataType, DataType>, + void(const Pad_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, + void *)> {}; + +class PadImpl2D_cpu : public OperatorImpl { +public: + PadImpl2D_cpu(const Pad_Op<2> &op) : OperatorImpl(op) {} + + static std::unique_ptr<PadImpl2D_cpu> create(const Pad_Op<2> &op) { + return std::make_unique<PadImpl2D_cpu>(op); + } + + NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; + +namespace { +// add cpu backend to Pad_Op<2> implementation registry +static Registrar<Pad_Op<2>> registrarPadImpl2D_cpu("cpu", Aidge::PadImpl2D_cpu::create); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_PADIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8b793257d2f3f126793316d463fe2542512da939 --- /dev/null +++ b/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp @@ -0,0 +1,110 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/PadImpl.hpp" +#include "aidge/utils/Types.h" +#include <cmath> +#include <array> +#include <algorithm> + +namespace Aidge { +/** + * @brief Forward kernel for 2D Padding on CPU backend. + * @tparam I Input data type. + * @tparam O Output data type. + * @param attrs tuple of Parameters from the Operator + * @param dims Array of input dimensions. + * @param input_ const input Tensor. + * @param output_ Output Tensor. + */ +template <class I, class O> +void PadImpl2D_cpu_forward_kernel(const Pad_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims, + const void *input_, void *output_) +{ + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); + + const std::size_t oySize = dims[2] + std::get<0>(attrs)[0] + std::get<0>(attrs)[1]; + const std::size_t oxSize = dims[3] + std::get<0>(attrs)[2] + std::get<0>(attrs)[3]; + + for (std::size_t batch = 0; batch < dims[0]; ++batch) { + for (std::size_t ch = 0; ch < dims[1]; ++ch) { + const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; + const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize; + + for (unsigned int oy = 0; oy < oySize; ++oy) { + for (unsigned int ox = 0; ox < oxSize; ++ox) { + const std::size_t oIndexFull = oIndex + ox*oySize + oy; + + O outputValue = std::get<2>(attrs); + + if (std::get<1>(attrs) == PadBorderType::Constant) { + int ix = static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[3]); + int iy = static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[1]); + + if (ix >= 0 && ix < static_cast<int>(dims[3]) && iy >= 0 && iy < static_cast<int>(dims[2])) { + outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)]; + } + } + else if (std::get<1>(attrs) == PadBorderType::Edge) { + int ix = std::max(0, std::min(static_cast<int>(dims[3]) - 1, static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[3]))); + int iy = std::max(0, std::min(static_cast<int>(dims[2]) - 1, static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[1]))); + + outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)]; + } + else if (std::get<1>(attrs) == PadBorderType::Reflect) { + int ix = static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[3]); + int iy = static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[1]); + + if (ix < 0) + ix = 0 - ix; + if (iy < 0) + iy = 0 - iy; + if (ix >= static_cast<int>(dims[3])) + ix = static_cast<int>(dims[3]) - ix; + if (iy >= static_cast<int>(dims[2])) + iy = static_cast<int>(dims[2]) - iy; + + outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)]; + } + else if (std::get<1>(attrs) == PadBorderType::Wrap) { + int ix = (static_cast<int>(dims[3]) + static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[3])) % static_cast<int>(dims[3]); + int iy = (static_cast<int>(dims[2]) + static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[1])) % static_cast<int>(dims[2]); + + outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)]; + } + + output[oIndexFull] = outputValue; + } + } + } + } +} + +namespace { +static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Float32( + {DataType::Float32, DataType::Float32}, + Aidge::PadImpl2D_cpu_forward_kernel<float, float>); +static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, + Aidge::PadImpl2D_cpu_forward_kernel<int, int>); +static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, + Aidge::PadImpl2D_cpu_forward_kernel<double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/PowImpl.hpp b/include/aidge/backend/cpu/operator/PowImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c33fbf0ed4adf4a0206ce8ed32ffdce2cd9ad17c --- /dev/null +++ b/include/aidge/backend/cpu/operator/PowImpl.hpp @@ -0,0 +1,50 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_POWIMPL_H_ +#define AIDGE_CPU_OPERATOR_POWIMPL_H_ + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/operator/Pow.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include <memory> +#include <vector> + +namespace Aidge { +// class Pow_Op; + +// compute kernel registry for forward and backward +class PowImplForward_cpu + : public Registrable<PowImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const void*, const void*,void*)> { +}; +class PowImplBackward_cpu + : public Registrable<PowImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const void*, const void*, void*)> { +}; + +class PowImpl_cpu : public OperatorImpl { +public: + PowImpl_cpu(const Pow_Op& op) : OperatorImpl(op) {} + + static std::unique_ptr<PowImpl_cpu> create(const Pow_Op& op) { + return std::make_unique<PowImpl_cpu>(op); + } + + NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; + +namespace { +static Registrar<Pow_Op> registrarPowImpl_cpu("cpu", Aidge::PowImpl_cpu::create); +} +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_POWIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/PowImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/PowImpl_forward_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c9c5db7e9aef07d24ba8f80c94b8f2494865e004 --- /dev/null +++ b/include/aidge/backend/cpu/operator/PowImpl_forward_kernels.hpp @@ -0,0 +1,66 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_POWIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_POWIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" +#include <cmath> + +#include "aidge/backend/cpu/operator/PowImpl.hpp" + +namespace Aidge { +template <class I1, class I2, class O> +void PowImpl_cpu_forward_kernel(std::size_t input1Length, + std::size_t input2Length, + const void* input1_, + const void* input2_, + void* output_) { + + const I1* input_1 = static_cast<const I1*>(input1_); + const I2* input_2 = static_cast<const I2*>(input2_); + O* output = static_cast<O*>(output_); + + if (input2Length == input1Length) + { + for (std::size_t i = 0; i < input1Length; ++i) { + output[i] = std::pow(input_1[i], input_2[i]); + } + } + else if (input2Length == 1) + { + for (std::size_t i = 0; i < input1Length; ++i) { + output[i] = std::pow(input_1[i], input_2[0]); + } + } + else // input_2 is 1d and of size the number of channels of input_1 + { + for (std::size_t i = 0; i < input1Length; ++i) { + std::size_t channelIdx = i % input2Length; + output[i] = std::pow(input_1[i], input_2[channelIdx]); + } + } +} + +namespace { +static Registrar<PowImplForward_cpu> registrarPowImplForward_cpu_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::PowImpl_cpu_forward_kernel<float, float, float>); +static Registrar<PowImplForward_cpu> registrarPowImplForward_cpu_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32}, + Aidge::PowImpl_cpu_forward_kernel<int, int, int>); +static Registrar<PowImplForward_cpu> registrarPowImplForward_cpu_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::PowImpl_cpu_forward_kernel<double, double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_POWIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ProducerImpl.hpp b/include/aidge/backend/cpu/operator/ProducerImpl.hpp index 032172dbf0995fc62ce631aa5eba1cabf2374ad3..19361f1903e8737562dba63b24f3410e6eba1e5b 100644 --- a/include/aidge/backend/cpu/operator/ProducerImpl.hpp +++ b/include/aidge/backend/cpu/operator/ProducerImpl.hpp @@ -21,31 +21,19 @@ namespace Aidge { class ProducerImpl_cpu : public OperatorImpl { - private: - const Producer_Op &mOp; - - public: - ProducerImpl_cpu(const Producer_Op &op) : mOp(op) {} +public: + ProducerImpl_cpu(const Producer_Op &op) : OperatorImpl(op) {} static std::unique_ptr<ProducerImpl_cpu> create(const Producer_Op &op) { return std::make_unique<ProducerImpl_cpu>(op); } - public: - NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final; - NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - void updateConsummerProducer() override final; - - void forward(); - - void backward(); + void forward() override; }; namespace { -static Registrar<Producer_Op> registrarProducer1DImpl_cpu("cpu", Aidge::ProducerImpl_cpu::create); +static Registrar<Producer_Op> registrarProducerImpl_cpu("cpu", Aidge::ProducerImpl_cpu::create); } // namespace } // namespace Aidge diff --git a/include/aidge/backend/cpu/operator/ReLUImpl.hpp b/include/aidge/backend/cpu/operator/ReLUImpl.hpp index 537bdeeaf89b388a82e819330649c2ae3445c590..6596c1c9052ca8f919c3cb2fa7ef5a2fa1f823d4 100644 --- a/include/aidge/backend/cpu/operator/ReLUImpl.hpp +++ b/include/aidge/backend/cpu/operator/ReLUImpl.hpp @@ -31,28 +31,15 @@ class ReLUImplBackward_cpu }; class ReLUImpl_cpu : public OperatorImpl { - protected: - const ReLU_Op& mOp; - std::array<NbElts_t, 1> mNbConsumedData; - std::array<NbElts_t, 1> mNbProducedData; - - public: - ReLUImpl_cpu(const ReLU_Op& op) : mOp(op), mNbConsumedData({0}), mNbProducedData({0}) {} +public: + ReLUImpl_cpu(const ReLU_Op& op) : OperatorImpl(op) {} static std::unique_ptr<ReLUImpl_cpu> create(const ReLU_Op& op) { return std::make_unique<ReLUImpl_cpu>(op); } - public: - NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t>& /*inputsSize*/) const override final; - NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; - NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - void updateConsummerProducer() override final; - void forward(); - - void backward(); + void forward() override; }; namespace { diff --git a/include/aidge/backend/cpu/operator/ScalingImpl.hpp b/include/aidge/backend/cpu/operator/ScalingImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e336adb003769afd97770fd3dd65796b5bbf6a2d --- /dev/null +++ b/include/aidge/backend/cpu/operator/ScalingImpl.hpp @@ -0,0 +1,51 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef __AIDGE_CPU_OPERATOR_ScalingIMPL_H__ +#define __AIDGE_CPU_OPERATOR_ScalingIMPL_H__ + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/operator/Scaling.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include <memory> +#include <vector> +#include <array> + +namespace Aidge { +// class Scaling_Op; + +// compute kernel registry for forward and backward +class ScalingImplForward_cpu + : public Registrable<ScalingImplForward_cpu, std::tuple<DataType, DataType>, void(const Scaling_Op::Attrs&, std::size_t, const void*, void*)> { +}; +class ScalingImplBackward_cpu + : public Registrable<ScalingImplBackward_cpu, std::tuple<DataType, DataType>, void(const Scaling_Op::Attrs&, std::size_t, const void*, void*)> { +}; + +class ScalingImpl_cpu : public OperatorImpl { +public: + ScalingImpl_cpu(const Scaling_Op& op) : OperatorImpl(op) {} + + static std::unique_ptr<ScalingImpl_cpu> create(const Scaling_Op& op) { + return std::make_unique<ScalingImpl_cpu>(op); + } + + NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; + +namespace { +static Registrar<Scaling_Op> registrarScalingImpl_cpu("cpu", Aidge::ScalingImpl_cpu::create); +} +} // namespace Aidge + +#endif /* __AIDGE_CPU_OPERATOR_ScalingIMPL_H__ */ \ No newline at end of file diff --git a/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8fe13bce3a4c470d77b083603d3b889a46fda71f --- /dev/null +++ b/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp @@ -0,0 +1,45 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef __AIDGE_CPU_OPERATOR_ScalingIMPL_FORWARD_KERNEL_H__ +#define __AIDGE_CPU_OPERATOR_ScalingIMPL_FORWARD_KERNEL_H__ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/ScalingImpl.hpp" + +namespace Aidge { +template <class I, class O> +void ScalingImpl_cpu_forward_kernel(const Scaling_Op::Attrs& attrs, + std::size_t inputLenght, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + const I& scalingFactor = static_cast<const I&>(std::get<0>(attrs)); + + for (std::size_t i = 0; i < inputLenght; ++i) { + output[i] = input[i] * scalingFactor; + } +} + +namespace { +static Registrar<ScalingImplForward_cpu> registrarScalingImplForward_cpu_Float32( + {DataType::Float32, DataType::Float32}, Aidge::ScalingImpl_cpu_forward_kernel<float, float>); +static Registrar<ScalingImplForward_cpu> registrarScalingImplForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, Aidge::ScalingImpl_cpu_forward_kernel<int, int>); +static Registrar<ScalingImplForward_cpu> registrarScalingImplForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, Aidge::ScalingImpl_cpu_forward_kernel<double, double>); +} // namespace +} // namespace Aidge + +#endif /* __AIDGE_CPU_OPERATOR_ScalingIMPL_FORWARD_KERNEL_H__ */ diff --git a/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp index 08567ab98e55233f1f578e82cb39ac5681f0a839..995f57f7c0168328e1982315358201c9f8940235 100644 --- a/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp +++ b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp @@ -31,28 +31,15 @@ class SoftmaxImplBackward_cpu }; class SoftmaxImpl_cpu : public OperatorImpl { - private: - const Softmax_Op& mOp; - std::array<NbElts_t, 1> mNbConsumedData; - std::array<NbElts_t, 1> mNbProducedData; - - public: - SoftmaxImpl_cpu(const Softmax_Op& op) : mOp(op), mNbConsumedData({0}), mNbProducedData({0}) {} +public: + SoftmaxImpl_cpu(const Softmax_Op& op) : OperatorImpl(op) {} static std::unique_ptr<SoftmaxImpl_cpu> create(const Softmax_Op& op) { return std::make_unique<SoftmaxImpl_cpu>(op); } - public: - NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t>& /*inputsSize*/) const override final; - NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; - NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - void updateConsummerProducer() override final; - void forward(); - - void backward(); + void forward() override; }; namespace { diff --git a/include/aidge/backend/cpu/operator/SqrtImpl.hpp b/include/aidge/backend/cpu/operator/SqrtImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1880408cd52f537c6d4965438ece88151d4df6e3 --- /dev/null +++ b/include/aidge/backend/cpu/operator/SqrtImpl.hpp @@ -0,0 +1,50 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_SQRTIMPL_H_ +#define AIDGE_CPU_OPERATOR_SQRTIMPL_H_ + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/operator/Sqrt.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include <memory> +#include <vector> + +namespace Aidge { +// class Sqrt_Op; + +// compute kernel registry for forward and backward +class SqrtImplForward_cpu + : public Registrable<SqrtImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> { +}; +class SqrtImplBackward_cpu + : public Registrable<SqrtImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> { +}; + +class SqrtImpl_cpu : public OperatorImpl { +public: + SqrtImpl_cpu(const Sqrt_Op& op) : OperatorImpl(op) {} + + static std::unique_ptr<SqrtImpl_cpu> create(const Sqrt_Op& op) { + return std::make_unique<SqrtImpl_cpu>(op); + } + + NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; + +namespace { +static Registrar<Sqrt_Op> registrarSqrtImpl_cpu("cpu", Aidge::SqrtImpl_cpu::create); +} +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_SQRTIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SqrtImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SqrtImpl_forward_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a180fc2cc206ef27b52d506a981f9f50f7bf8a3e --- /dev/null +++ b/include/aidge/backend/cpu/operator/SqrtImpl_forward_kernels.hpp @@ -0,0 +1,44 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_SQRTIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_SQRTIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" +#include <cmath> + +#include "aidge/backend/cpu/operator/SqrtImpl.hpp" + +namespace Aidge { +template <class I, class O> +void SqrtImpl_cpu_forward_kernel(std::size_t inputLenght, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + for (std::size_t i = 0; i < inputLenght; ++i) { + output[i] = std::sqrt(input[i]); + } +} + +namespace { +static Registrar<SqrtImplForward_cpu> registrarSqrtImplForward_cpu_Float32( + {DataType::Float32, DataType::Float32}, Aidge::SqrtImpl_cpu_forward_kernel<float, float>); +static Registrar<SqrtImplForward_cpu> registrarSqrtImplForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, Aidge::SqrtImpl_cpu_forward_kernel<int, int>); +static Registrar<SqrtImplForward_cpu> registrarSqrtImplForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, Aidge::SqrtImpl_cpu_forward_kernel<double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_SQRTIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SubImpl.hpp b/include/aidge/backend/cpu/operator/SubImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..08ec69e509b2b6c02e30f613abd83208de254f75 --- /dev/null +++ b/include/aidge/backend/cpu/operator/SubImpl.hpp @@ -0,0 +1,50 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_SUBIMPL_H_ +#define AIDGE_CPU_OPERATOR_SUBIMPL_H_ + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/operator/Sub.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include <memory> +#include <vector> + +namespace Aidge { +// class Sub_Op; + +// compute kernel registry for forward and backward +class SubImplForward_cpu + : public Registrable<SubImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const void*, const void*,void*)> { +}; +class SubImplBackward_cpu + : public Registrable<SubImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const void*, const void*, void*)> { +}; + +class SubImpl_cpu : public OperatorImpl { +public: + SubImpl_cpu(const Sub_Op& op) : OperatorImpl(op) {} + + static std::unique_ptr<SubImpl_cpu> create(const Sub_Op& op) { + return std::make_unique<SubImpl_cpu>(op); + } + + NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; + +namespace { +static Registrar<Sub_Op> registrarSubImpl_cpu("cpu", Aidge::SubImpl_cpu::create); +} +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_SUBIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..08f2e24fa38d2739943279666187a55d7076a89b --- /dev/null +++ b/include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp @@ -0,0 +1,65 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_SUBIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_SUBIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/SubImpl.hpp" + +namespace Aidge { +template <class I1, class I2, class O> +void SubImpl_cpu_forward_kernel(std::size_t input1Length, + std::size_t input2Length, + const void* input1_, + const void* input2_, + void* output_) { + + const I1* input_1 = static_cast<const I1*>(input1_); + const I2* input_2 = static_cast<const I2*>(input2_); + O* output = static_cast<O*>(output_); + + if (input2Length == input1Length) + { + for (std::size_t i = 0; i < input1Length; ++i) { + output[i] = input_1[i] - input_2[i]; + } + } + else if (input2Length == 1) + { + for (std::size_t i = 0; i < input1Length; ++i) { + output[i] = input_1[i] - input_2[0]; + } + } + else // input_2 is 1d and of size the number of channels of input_1 + { + for (std::size_t i = 0; i < input1Length; ++i) { + std::size_t channelIdx = i % input2Length; + output[i] = input_1[i] - input_2[channelIdx]; + } + } +} + +namespace { +static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::SubImpl_cpu_forward_kernel<float, float, float>); +static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32}, + Aidge::SubImpl_cpu_forward_kernel<int, int, int>); +static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::SubImpl_cpu_forward_kernel<double, double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_SUBIMPL_FORWARD_KERNEL_H_ */ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..24ce15ab7ead32f98c7ac3edcd34bb2010ff4326 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +numpy diff --git a/setup.py b/setup.py index 0b0f66e9132d66cdb6385d7f8c6c69ae0cc5d0e3..80500f3165dd87eb7b6dd73c78b89806cc8a874a 100644 --- a/setup.py +++ b/setup.py @@ -62,15 +62,17 @@ class CMakeBuild(build_ext): os.chdir(str(build_temp)) - # Impose to use the executable of the python + # Impose to use the executable of the python # used to launch setup.py to setup PythonInterp param_py = "-DPYTHON_EXECUTABLE=" + sys.executable - - install_path = f"{build_temp}/install" if "AIDGE_INSTALL" not in os.environ else os.environ["AIDGE_INSTALL"] - self.spawn(['cmake', str(cwd), param_py, '-DTEST=OFF', f'-DCMAKE_INSTALL_PREFIX:PATH={install_path}']) + compile_type = 'Debug' + install_path = os.path.join(sys.prefix, "lib", "libAidge") if "AIDGE_INSTALL" not in os.environ else os.environ["AIDGE_INSTALL"] + + self.spawn(['cmake', str(cwd), param_py, '-DTEST=OFF', f'-DCMAKE_INSTALL_PREFIX:PATH={install_path}', f'-DCMAKE_BUILD_TYPE={compile_type}']) if not self.dry_run: - self.spawn(['make', 'all', 'install', '-j', max_jobs]) + self.spawn(['cmake', '--build', '.', '--config', compile_type, '-j', max_jobs]) + self.spawn(['cmake', '--install', '.', '--config', compile_type]) os.chdir(str(cwd)) aidge_package = build_lib / (get_project_name()) @@ -81,13 +83,13 @@ class CMakeBuild(build_ext): # Copy all shared object files from build_temp/lib to aidge_package for root, _, files in os.walk(build_temp.absolute()): for file in files: - if file.endswith('.so') and (root != str(aidge_package.absolute())): + if (file.endswith('.so') or file.endswith('.pyd')) and (root != str(aidge_package.absolute())): currentFile=os.path.join(root, file) - shutil.copy(currentFile, str(aidge_package.absolute())) + shutil.copy(currentFile, str(aidge_package.absolute())) # Copy version.txt in aidge_package os.chdir(os.path.dirname(__file__)) - shutil.copy("version.txt", str(aidge_package.absolute())) + shutil.copy("version.txt", str(aidge_package.absolute())) if __name__ == '__main__': @@ -100,13 +102,13 @@ if __name__ == '__main__': long_description_content_type="text/markdown", long_description="\n".join(DOCLINES[2:]), classifiers=[c for c in CLASSIFIERS.split('\n') if c], - platforms=["Linux"], packages=find_packages(where="."), include_package_data=True, ext_modules=[CMakeExtension(get_project_name())], cmdclass={ 'build_ext': CMakeBuild, }, + install_requires=['aidge_core'], zip_safe=False, ) diff --git a/src/operator/AddImpl.cpp b/src/operator/AddImpl.cpp index d3da42185237a59146af17199e34a00dbebd6d96..4be0078199671bc09af73a5f9dbfcd0ff2e61bed 100644 --- a/src/operator/AddImpl.cpp +++ b/src/operator/AddImpl.cpp @@ -25,38 +25,12 @@ // AddImpl_cpu<1> ////////////////////////////////// -Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const { - assert(mOp.getInput(0) && "requires valid input"); - // Requires the whole tensors - return static_cast<int>(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size()); -} - Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! + // this implementation can be in-place return 0; } -Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getRequiredMemory(const Aidge::IOIndex_t /*outputIdx*/, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const { - // Requires the whole tensors, regardless of available data on inputs - return std::static_pointer_cast<Tensor>(mOp.getOutput(0))->size(); -} - -Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbConsumedData(Aidge::IOIndex_t /*inputIdx*/) const { - return mNbConsumedData[0]; -} - -Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const { - return mNbProducedData[0]; -} -void Aidge::AddImpl_cpu<1>::updateConsummerProducer(){ - // Update producer-consumer data - for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass - - mNbProducedData[0]+= getRequiredMemory(0, {}); -} void Aidge::AddImpl_cpu<1>::forward() { - // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); // Find the correct kernel type @@ -68,11 +42,6 @@ void Aidge::AddImpl_cpu<1>::forward() { kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(), mOp.getInput(0)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); - -} - -void Aidge::AddImpl_cpu<1>::backward() { - printf("Not implemented yet.\n"); } @@ -80,67 +49,26 @@ void Aidge::AddImpl_cpu<1>::backward() { // AddImpl_cpu<2> ////////////////////////////////// - -Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { - assert(mOp.getInput(inputIdx) && "requires valid input"); - - // Requires the whole tensors - const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims(); - - return std::accumulate(inputDims.begin(), inputDims.end(), - NbElts_t(1), std::multiplies<NbElts_t>()); -} - Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! + // this implementation of add can be in-place return 0; } -Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const { - // Requires the whole tensors, regardless of available data on inputs - assert(outputIdx == 0 && "operator has only one output"); - - const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); - return std::accumulate(outputDims.begin(), outputDims.end(), - NbElts_t(1), std::multiplies<NbElts_t>()); -} - -Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbConsumedData(Aidge::IOIndex_t inputIdx) const { - assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size()); - return mNbConsumedData[static_cast<std::size_t>(inputIdx)]; -} - -Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const { - return mNbProducedData[0]; -} -void Aidge::AddImpl_cpu<2>::updateConsummerProducer(){ - // Update producer-consumer data - for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass - - mNbProducedData[0]+= getRequiredMemory(0, {}); -} void Aidge::AddImpl_cpu<2>::forward() { - // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); - assert(mOp.mInputs[1] && "missing input #1"); + assert(mOp.getInput(1) && "missing input #1"); // Find the correct kernel type auto kernelFunc = Registrar<AddImplForward_cpu<2>>::create({ mOp.getInput(0)->dataType(), - mOp.mInputs[1]->dataType(), + mOp.getInput(1)->dataType(), mOp.getOutput(0)->dataType()}); // Call kernel kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(), mOp.getInput(0)->getImpl()->rawPtr(), - mOp.mInputs[1]->getImpl()->rawPtr(), + mOp.getInput(1)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); - -} - -void Aidge::AddImpl_cpu<2>::backward() { - printf("Not implemented yet.\n"); } @@ -148,69 +76,27 @@ void Aidge::AddImpl_cpu<2>::backward() { // AddImpl_cpu<3> ////////////////////////////////// - -Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { - assert(mOp.getInput(inputIdx) && "requires valid input"); - - // Requires the whole tensors - const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims(); - - return std::accumulate(inputDims.begin(), inputDims.end(), - Aidge::NbElts_t(1), std::multiplies<Aidge::NbElts_t>()); -} - Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! + // this implementation of add can be in-place return 0; } -Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const { - // Requires the whole tensors, regardless of available data on inputs - assert(outputIdx == 0 && "operator has only one output"); - - const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); - return std::accumulate(outputDims.begin(), outputDims.end(), - NbElts_t(1), std::multiplies<NbElts_t>()); -} - -Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbConsumedData(Aidge::IOIndex_t inputIdx) const { - assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size()); - return mNbConsumedData[inputIdx]; -} - -Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbProducedData(Aidge::IOIndex_t outputIdx) const { - assert(static_cast<std::size_t>(outputIdx) < mNbProducedData.size()); - return mNbProducedData[static_cast<std::size_t>(outputIdx)]; -} -void Aidge::AddImpl_cpu<3>::updateConsummerProducer(){ - // Update producer-consumer data - for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass - - mNbProducedData[0]+= getRequiredMemory(0, {}); -} void Aidge::AddImpl_cpu<3>::forward() { - // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); - assert(mOp.mInputs[1] && "missing input #1"); - assert(mOp.mInputs[2] && "missing input #2"); + assert(mOp.getInput(1) && "missing input #1"); + assert(mOp.getInput(2) && "missing input #2"); // Find the correct kernel type auto kernelFunc = Registrar<AddImplForward_cpu<3>>::create({ mOp.getInput(0)->dataType(), - mOp.mInputs[1]->dataType(), - mOp.mInputs[2]->dataType(), + mOp.getInput(1)->dataType(), + mOp.getInput(2)->dataType(), mOp.getOutput(0)->dataType()}); // Call kernel kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(), mOp.getInput(0)->getImpl()->rawPtr(), - mOp.mInputs[1]->getImpl()->rawPtr(), - mOp.mInputs[2]->getImpl()->rawPtr(), + mOp.getInput(1)->getImpl()->rawPtr(), + mOp.getInput(2)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); - -} - -void Aidge::AddImpl_cpu<3>::backward() { - printf("Not implemented yet.\n"); } diff --git a/src/operator/AvgPoolingImpl.cpp b/src/operator/AvgPoolingImpl.cpp index 6c434a5c38853a1dee66db5be95b6b1bfdde8162..ae93934c23ce9bbc97d071be2f258e04ec8ae877 100644 --- a/src/operator/AvgPoolingImpl.cpp +++ b/src/operator/AvgPoolingImpl.cpp @@ -20,49 +20,12 @@ #include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp" #include "aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::AvgPoolingImpl2D_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { - assert(mOp.getInput(inputIdx) && "requires valid input"); - - // Requires the whole tensors - const auto &inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims(); - - return std::accumulate(inputDims.begin(), inputDims.end(), Aidge::NbElts_t(1), std::multiplies<NbElts_t>()); -} - Aidge::NbElts_t Aidge::AvgPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { - // for the direct convolution algorithm, convolutions can be in-place, if - // there is no padding! + // this implementation can be in-place return 0; } -Aidge::NbElts_t Aidge::AvgPoolingImpl2D_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx, - const std::vector<Aidge::DimSize_t> & /*inputsSize*/) const { - // Requires the whole tensors, regardless of available data on inputs - assert(outputIdx == 0 && "operator has only one output"); - (void) outputIdx; - - const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); - return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); -} - -Aidge::NbElts_t Aidge::AvgPoolingImpl2D_cpu::getNbConsumedData(Aidge::IOIndex_t inputIdx) const { - assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size()); - return mNbConsumedData[static_cast<std::size_t>(inputIdx)]; -} - -Aidge::NbElts_t Aidge::AvgPoolingImpl2D_cpu::getNbProducedData(Aidge::IOIndex_t outputIdx) const { - assert((outputIdx == 0) && (static_cast<std::size_t>(outputIdx) < mNbProducedData.size())); - return mNbProducedData[static_cast<std::size_t>(outputIdx)]; -} -void Aidge::AvgPoolingImpl2D_cpu::updateConsummerProducer(){ - // Update producer-consumer data - for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum - // amount for a forward pass - mNbProducedData[0] += getRequiredMemory(0, {}); -} void Aidge::AvgPoolingImpl2D_cpu::forward() { - // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); // Find the correct kernel type @@ -70,11 +33,8 @@ void Aidge::AvgPoolingImpl2D_cpu::forward() { Registrar<AvgPoolingImpl2DForward_cpu>::create({mOp.getInput(0)->dataType(), mOp.getOutput(0)->dataType()}); // Call kernel - kernelFunc(mOp.getParams(), + kernelFunc(dynamic_cast<const AvgPooling_Op<2>&>(mOp).getStaticAttributes(), mOp.getInput(0)->dims<4>(), mOp.getInput(0)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); - } - -void Aidge::AvgPoolingImpl2D_cpu::backward() { printf("Not implemented yet.\n"); } diff --git a/src/operator/BatchNormImpl.cpp b/src/operator/BatchNormImpl.cpp index a0d4d032ded9ede1b2dba307aa967af330167d25..c9d52b767b03008d19209e34fa9a6f2749a63450 100644 --- a/src/operator/BatchNormImpl.cpp +++ b/src/operator/BatchNormImpl.cpp @@ -19,50 +19,12 @@ #include "aidge/backend/cpu/operator/BatchNormImpl.hpp" #include "aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { - assert(mOp.getInput(inputIdx) && "requires valid input"); - - // Requires the whole tensors - const auto &inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims(); - - return std::accumulate(inputDims.begin(), inputDims.end(), Aidge::NbElts_t(1), std::multiplies<NbElts_t>()); -} - Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { - // for the direct convolution algorithm, convolutions can be in-place, if - // there is no padding! + // this implementation can be in-place return 0; } -Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx, - const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const { - // Requires the whole tensors, regardless of available data on inputs - assert(outputIdx == 0 && "operator has only one output"); - (void) outputIdx; - - const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); - return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); -} - -Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getNbConsumedData(Aidge::IOIndex_t inputIdx) const { - assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size()); - return mNbConsumedData[static_cast<std::size_t>(inputIdx)]; -} - -Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getNbProducedData(Aidge::IOIndex_t outputIdx) const { - assert((outputIdx == 0) && (static_cast<std::size_t>(outputIdx) < mNbProducedData.size())); - return mNbProducedData[static_cast<std::size_t>(outputIdx)]; -} -void Aidge::BatchNormImpl2D_cpu::updateConsummerProducer(){ - // Update producer-consumer data - for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum - // amount for a forward pass - - mNbProducedData[0] += getRequiredMemory(0, {}); -} void Aidge::BatchNormImpl2D_cpu::forward() { - // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); assert(mOp.getInput(1) && "missing input #1"); assert(mOp.getInput(2) && "missing input #2"); @@ -76,7 +38,7 @@ void Aidge::BatchNormImpl2D_cpu::forward() { mOp.getOutput(0)->dataType()}); // Call kernel - kernelFunc(mOp.getParams(), + kernelFunc(dynamic_cast<const BatchNorm_Op<2>&>(mOp).getStaticAttributes(), mOp.getInput(0)->dims<4>(), mOp.getInput(0)->getImpl()->rawPtr(), mOp.getInput(1)->getImpl()->rawPtr(), @@ -85,8 +47,4 @@ void Aidge::BatchNormImpl2D_cpu::forward() { mOp.getInput(4)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr(), true); - - } - -void Aidge::BatchNormImpl2D_cpu::backward() { printf("Not implemented yet.\n"); } diff --git a/src/operator/ConvDepthWiseImpl.cpp b/src/operator/ConvDepthWiseImpl.cpp index 3e920cf68366b82bce8df29c8aea0c838e6a1364..5ac109e2f282ce55c8a274597be08561c2baf5c8 100644 --- a/src/operator/ConvDepthWiseImpl.cpp +++ b/src/operator/ConvDepthWiseImpl.cpp @@ -21,50 +21,12 @@ #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" #include "aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { - assert(mOp.getInput(inputIdx) && "requires valid input"); - - // Requires the whole tensors - const auto &inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims(); - - return std::accumulate(inputDims.begin(), inputDims.end(), Aidge::NbElts_t(1), std::multiplies<NbElts_t>()); -} - Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { - // for the direct convolution algorithm, convolutions can be in-place, if - // there is no padding! + // this implementation can be in-place return 0; } -Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx, - const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const { - // Requires the whole tensors, regardless of available data on inputs - assert(outputIdx == 0 && "operator has only one output"); - (void) outputIdx; - - const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); - return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); -} - -Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbConsumedData(Aidge::IOIndex_t inputIdx) const { - assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size()); - return mNbConsumedData[static_cast<std::size_t>(inputIdx)]; -} - -Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbProducedData(Aidge::IOIndex_t outputIdx) const { - assert((outputIdx == 0) && (static_cast<std::size_t>(outputIdx) < mNbProducedData.size())); - return mNbProducedData[static_cast<std::size_t>(outputIdx)]; -} -void Aidge::ConvDepthWiseImpl2D_cpu::updateConsummerProducer(){ - // Update producer-consumer data - for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum - // amount for a forward pass - - mNbProducedData[0] += getRequiredMemory(0, {}); -} void Aidge::ConvDepthWiseImpl2D_cpu::forward() { - // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); assert(mOp.getInput(1) && "missing input #1"); assert(mOp.getInput(2) && "missing input #2"); @@ -77,9 +39,7 @@ void Aidge::ConvDepthWiseImpl2D_cpu::forward() { mOp.getInput(2)->dataType(), mOp.getOutput(0)->dataType()}); // Call kernel - kernelFunc(mOp.getParams(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(), + kernelFunc(dynamic_cast<const ConvDepthWise_Op<2>&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(), mOp.getInput(0)->getImpl()->rawPtr(), mOp.getInput(1)->getImpl()->rawPtr(), mOp.getInput(2)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); } - -void Aidge::ConvDepthWiseImpl2D_cpu::backward() { printf("Not implemented yet.\n"); } diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp index b4ddf80929923a9c2c5998ac8614ebb0d3afe000..347d427908502b9976c2943417775bcbf0d3b344 100644 --- a/src/operator/ConvImpl.cpp +++ b/src/operator/ConvImpl.cpp @@ -21,48 +21,11 @@ #include "aidge/backend/cpu/operator/ConvImpl.hpp" #include "aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { - assert(mOp.getInput(inputIdx) && "requires valid input"); - - // Requires the whole tensors - const auto &inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims(); - - return std::accumulate(inputDims.begin(), inputDims.end(), Aidge::NbElts_t(1), std::multiplies<NbElts_t>()); -} - Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { - // for the direct convolution algorithm, convolutions can be in-place, if - // there is no padding! + // this implementation can be in-place return 0; } -Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx, - const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const { - // Requires the whole tensors, regardless of available data on inputs - assert(outputIdx == 0 && "operator has only one output"); - (void) outputIdx; - - const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); - return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); -} - -Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getNbConsumedData(Aidge::IOIndex_t inputIdx) const { - assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size()); - return mNbConsumedData[static_cast<std::size_t>(inputIdx)]; -} - -Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getNbProducedData(Aidge::IOIndex_t outputIdx) const { - assert((outputIdx == 0) && (static_cast<std::size_t>(outputIdx) < mNbProducedData.size())); - return mNbProducedData[static_cast<std::size_t>(outputIdx)]; -} -void Aidge::ConvImpl2D_cpu::updateConsummerProducer(){ - // Update producer-consumer data - for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum - // amount for a forward pass - - mNbProducedData[0] += getRequiredMemory(0, {}); -} void Aidge::ConvImpl2D_cpu::forward() { // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); @@ -75,11 +38,7 @@ void Aidge::ConvImpl2D_cpu::forward() { mOp.getInput(2)->dataType(), mOp.getOutput(0)->dataType()}); // Call kernel - kernelFunc(mOp.getParams(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(), + kernelFunc(dynamic_cast<const Conv_Op<2>&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(), mOp.getInput(0)->getImpl()->rawPtr(), mOp.getInput(1)->getImpl()->rawPtr(), mOp.getInput(2)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); - - } - -void Aidge::ConvImpl2D_cpu::backward() { printf("Not implemented yet.\n"); } diff --git a/src/operator/DivImpl.cpp b/src/operator/DivImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f7cbc7d20b9126ab318a6989ebf627491cb247aa --- /dev/null +++ b/src/operator/DivImpl.cpp @@ -0,0 +1,51 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <chrono> // std::chrono::milliseconds +#include <numeric> // std::accumulate +#include <thread> // std::this_thread::sleep_for +#include <vector> + +#include "aidge/operator/Div.hpp" +#include "aidge/utils/Types.h" + +#include "aidge/backend/cpu/operator/DivImpl.hpp" +#include "aidge/backend/cpu/operator/DivImpl_forward_kernels.hpp" + +Aidge::NbElts_t Aidge::DivImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; +} + +void Aidge::DivImpl_cpu::forward() { + assert(mOp.getInput(0) && "missing input #0"); + assert(mOp.getInput(1) && "missing input #1"); + + assert(((mOp.getInput(1)->size() == 1) || + (mOp.getInput(1)->size() == mOp.getInput(0)->size()) || + (mOp.getInput(1)->nbDims() == 1 && mOp.getInput(1)->size() == mOp.getInput(0)->dims()[mOp.getInput(0)->nbDims()-1]) + ) && + "input #1 must either be a tensor of size 1, the number of channels of input # or the same size of input #0"); + + // Find the correct kernel type + auto kernelFunc = Registrar<DivImplForward_cpu>::create({ + mOp.getInput(0)->dataType(), + mOp.getInput(1)->dataType(), + mOp.getOutput(0)->dataType()}); + + // Call kernel + kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(), + std::static_pointer_cast<Tensor>(mOp.getInput(1))->size(), + mOp.getInput(0)->getImpl()->rawPtr(), + mOp.getInput(1)->getImpl()->rawPtr(), + mOp.getOutput(0)->getImpl()->rawPtr()); +} diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp index 086902be0ab1c2027a8c62c143bc27921e5e9e1b..77ce50281cf4db94a492fce88a6d73eabde1bae5 100644 --- a/src/operator/FCImpl.cpp +++ b/src/operator/FCImpl.cpp @@ -21,84 +21,23 @@ #include "aidge/backend/cpu/operator/FCImpl.hpp" #include "aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::FCImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const -{ - assert(mOp.getInput(inputIdx) && "requires valid input"); - - // Requires the whole tensors - const auto &inputDims - = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims(); - - return std::accumulate( - inputDims.begin(), - inputDims.end(), - Aidge::NbElts_t(1), - std::multiplies<Aidge::NbElts_t>()); -} - -Aidge::NbElts_t - Aidge::FCImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const -{ - // for the direct convolution algorithm, convolutions can be in-place, if - // there is no padding! - return 0; -} - -Aidge::NbElts_t Aidge::FCImpl_cpu::getRequiredMemory( - const IOIndex_t outputIdx, const std::vector<DimSize_t> &/*inputsSize*/) const -{ - // Requires the whole tensors, regardless of available data on inputs - assert(outputIdx == 0 && "operator has only one output"); - (void) outputIdx; - - const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); - return std::accumulate( - outputDims.begin(), - outputDims.end(), - static_cast<NbElts_t>(1), - std::multiplies<NbElts_t>()); -} - -Aidge::NbElts_t Aidge::FCImpl_cpu::getNbConsumedData(Aidge::IOIndex_t inputIdx) const -{ - assert((inputIdx != gk_IODefaultIndex) && (inputIdx < mNbConsumedData.size())); - return mNbConsumedData[static_cast<std::size_t>(inputIdx)]; -} - -Aidge::NbElts_t Aidge::FCImpl_cpu::getNbProducedData(Aidge::IOIndex_t outputIdx) const -{ - assert(static_cast<std::size_t>(outputIdx) < mNbProducedData.size()); - return mNbProducedData[static_cast<std::size_t>(outputIdx)]; -} - -void Aidge::FCImpl_cpu::updateConsummerProducer(){ - // Update producer-consumer data - for (IOIndex_t inputIdx = 0; static_cast<std::size_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx] - += getNbRequiredData(static_cast<std::size_t>(inputIdx)); // each input is consumed by the minimum - // amount for a forward pass - - mNbProducedData[0] += getRequiredMemory(0, {}); -} - void Aidge::FCImpl_cpu::forward() { - // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); - assert(mOp.mInputs[1] && "missing input #1"); - assert(mOp.mInputs[2] && "missing input #2"); + assert(mOp.getInput(1) && "missing input #1"); + assert(mOp.getInput(2) && "missing input #2"); // Find the correct kernel type auto kernelFunc = Registrar<FCImplForward_cpu>::create( {mOp.getInput(0)->dataType(), - mOp.mInputs[1]->dataType(), - mOp.mInputs[2]->dataType(), + mOp.getInput(1)->dataType(), + mOp.getInput(2)->dataType(), mOp.getOutput(0)->dataType()}); // Call kernel // if (mOp.getInput(0)->nbDims() == 4) { // kernelFunc( - // mOp.getParams(), + // mOp.getStaticAttributes(), // std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(), // mOp.getInput(0)->getImpl()->rawPtr(), // mOp.mInputs[1]->getImpl()->rawPtr(), @@ -107,18 +46,11 @@ void Aidge::FCImpl_cpu::forward() // } // else kernelFunc( - mOp.getParams(), + dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(), mOp.getInput(0)->dims()[0], mOp.getInput(0)->sizeM1(), mOp.getInput(0)->getImpl()->rawPtr(), - mOp.mInputs[1]->getImpl()->rawPtr(), - mOp.mInputs[2]->getImpl()->rawPtr(), + mOp.getInput(1)->getImpl()->rawPtr(), + mOp.getInput(2)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); - - -} - -void Aidge::FCImpl_cpu::backward() -{ - printf("Not implemented yet.\n"); } diff --git a/src/operator/LeakyReLUImpl.cpp b/src/operator/LeakyReLUImpl.cpp index f6a44d381081c7c7f1dcbbf02d91212168cc07aa..c81acf60f0171bd819bfd760565e59d361401e29 100644 --- a/src/operator/LeakyReLUImpl.cpp +++ b/src/operator/LeakyReLUImpl.cpp @@ -21,42 +21,12 @@ #include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" #include "aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp" -// FIXME: replace whole Tensor with minimum needed data quantity -Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const { - assert(mOp.getInput(0) && "requires valid input"); - - // Requires the whole tensors - const auto& inputDims = mOp.getInput(0)->dims(); - - return std::accumulate(inputDims.begin(), inputDims.end(), - static_cast<NbElts_t>(1), std::multiplies<NbElts_t>()); -} - Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! + // this implementation can be in-place return 0; } -Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t /*outputIdx*/, const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const { - const auto& outputDims = mOp.getOutput(0)->dims(); - return std::accumulate(outputDims.begin(), outputDims.end(), - static_cast<NbElts_t>(1), std::multiplies<NbElts_t>()); -} - -Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbConsumedData(Aidge::IOIndex_t /*inputIdx*/) const { - return mNbConsumedData[0]; -} - -Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const { - return mNbProducedData[0]; -} -void Aidge::LeakyReLUImpl_cpu::updateConsummerProducer(){ - mNbConsumedData[0]+= getNbRequiredData(0); // each input is consumed by the minimum amount for a forward pass - - mNbProducedData[0]+= getRequiredMemory(0, {}); -} void Aidge::LeakyReLUImpl_cpu::forward() { - // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); // Find the correct kernel type @@ -65,12 +35,8 @@ void Aidge::LeakyReLUImpl_cpu::forward() { mOp.getOutput(0)->dataType()}); // Call kernel - kernelFunc(mOp.getParams(), + kernelFunc(dynamic_cast<const LeakyReLU_Op&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(), mOp.getInput(0)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); } - -void Aidge::LeakyReLUImpl_cpu::backward() { - printf("Not implemented yet.\n"); -} diff --git a/src/operator/MatMulImpl.cpp b/src/operator/MatMulImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f4812629c4bcf7b699d3eca66ff4e884df0c04d6 --- /dev/null +++ b/src/operator/MatMulImpl.cpp @@ -0,0 +1,55 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <chrono> // std::chrono::milliseconds +#include <numeric> // std::accumulate +#include <thread> // std::this_thread::sleep_for +#include <vector> + +#include "aidge/operator/MatMul.hpp" +#include "aidge/utils/Types.h" + +#include "aidge/backend/cpu/operator/MatMulImpl.hpp" +#include "aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp" + +void Aidge::MatMulImpl_cpu::forward() +{ + assert(mOp.getInput(0) && "missing input #0"); + assert(mOp.getInput(1) && "missing input #1"); + + // Find the correct kernel type + auto kernelFunc = Registrar<MatMulImplForward_cpu>::create( + {mOp.getInput(0)->dataType(), + mOp.getInput(1)->dataType(), + mOp.getOutput(0)->dataType()}); + + // Call kernel + // if (mOp.getInput(0)->nbDims() == 4) { + // kernelFunc( + // mOp.getStaticAttributes(), + // std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(), + // mOp.getInput(0)->getImpl()->rawPtr(), + // mOp.mInputs[1]->getImpl()->rawPtr(), + // mOp.mInputs[2]->getImpl()->rawPtr(), + // mOp.getOutput(0)->getImpl()->rawPtr()); + // } + // else + kernelFunc( + dynamic_cast<const MatMul_Op&>(mOp).getStaticAttributes(), + mOp.getInput(0)->dims()[0], + mOp.getInput(0)->sizeM1(), + mOp.getInput(0)->getImpl()->rawPtr(), + mOp.getInput(1)->getImpl()->rawPtr(), + mOp.getOutput(0)->getImpl()->rawPtr()); + + +} diff --git a/src/operator/MaxPoolingImpl.cpp b/src/operator/MaxPoolingImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c5127c1e4577b3da44716cdc34358a8906b9cbb0 --- /dev/null +++ b/src/operator/MaxPoolingImpl.cpp @@ -0,0 +1,40 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <numeric> +#include <thread> +#include <vector> + +#include "aidge/utils/Types.h" +#include "aidge/operator/MaxPooling.hpp" + +#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" +#include "aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp" + +Aidge::NbElts_t Aidge::MaxPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; +} + +void Aidge::MaxPoolingImpl2D_cpu::forward() { + assert(mOp.getInput(0) && "missing input #0"); + + // Find the correct kernel type + auto kernelFunc = + Registrar<MaxPoolingImpl2DForward_cpu>::create({mOp.getInput(0)->dataType(), mOp.getOutput(0)->dataType()}); + + // Call kernel + kernelFunc(dynamic_cast<const MaxPooling_Op<2>&>(mOp).getStaticAttributes(), + mOp.getInput(0)->dims<4>(), + mOp.getInput(0)->getImpl()->rawPtr(), + mOp.getOutput(0)->getImpl()->rawPtr()); +} diff --git a/src/operator/MulImpl.cpp b/src/operator/MulImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b6eb245cf0b1afc8893dfbab13d3294b945b3e0e --- /dev/null +++ b/src/operator/MulImpl.cpp @@ -0,0 +1,51 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <chrono> // std::chrono::milliseconds +#include <numeric> // std::accumulate +#include <thread> // std::this_thread::sleep_for +#include <vector> + +#include "aidge/operator/Mul.hpp" +#include "aidge/utils/Types.h" + +#include "aidge/backend/cpu/operator/MulImpl.hpp" +#include "aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp" + +Aidge::NbElts_t Aidge::MulImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; +} + +void Aidge::MulImpl_cpu::forward() { + assert(mOp.getInput(0) && "missing input #0"); + assert(mOp.getInput(1) && "missing input #1"); + + assert(((mOp.getInput(1)->size() == 1) || + (mOp.getInput(1)->size() == mOp.getInput(0)->size()) || + (mOp.getInput(1)->nbDims() == 1 && mOp.getInput(1)->size() == mOp.getInput(0)->dims()[mOp.getInput(0)->nbDims()-1]) + ) && + "input #1 must either be a tensor of size 1, the number of channels of input # or the same size of input #0"); + + // Find the correct kernel type + auto kernelFunc = Registrar<MulImplForward_cpu>::create({ + mOp.getInput(0)->dataType(), + mOp.getInput(1)->dataType(), + mOp.getOutput(0)->dataType()}); + + // Call kernel + kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(), + std::static_pointer_cast<Tensor>(mOp.getInput(1))->size(), + mOp.getInput(0)->getImpl()->rawPtr(), + mOp.getInput(1)->getImpl()->rawPtr(), + mOp.getOutput(0)->getImpl()->rawPtr()); +} diff --git a/src/operator/PadImpl.cpp b/src/operator/PadImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7c2af9e2161ddc4567b702690b8f268fe1af1b6c --- /dev/null +++ b/src/operator/PadImpl.cpp @@ -0,0 +1,45 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <chrono> // std::chrono::milliseconds +#include <numeric> // std::accumulate +#include <thread> // std::this_thread::sleep_for +#include <vector> + +#include "aidge/utils/Types.h" +#include "aidge/operator/Conv.hpp" + +#include "aidge/backend/cpu/operator/PadImpl.hpp" +#include "aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp" + +Aidge::NbElts_t Aidge::PadImpl2D_cpu::getNbRequiredProtected(IOIndex_t inputIdx) const { + assert(inputIdx == 0 && "operator has only one input"); + (void) inputIdx; + + // Padding cannot be in-place! + // We must ensure that we do not override data that has not been consummed yet. + const auto inputSize = std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(); + const auto outputSize = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->size(); + return (outputSize - inputSize); +} + +void Aidge::PadImpl2D_cpu::forward() { + assert(mOp.getInput(0) && "missing input #0"); + + // Find the correct kernel type + auto kernelFunc = + Registrar<PadImpl2DForward_cpu>::create({mOp.getInput(0)->dataType(), mOp.getOutput(0)->dataType()}); + + // Call kernel + kernelFunc(dynamic_cast<const Pad_Op<2>&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(), + mOp.getInput(0)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); +} diff --git a/src/operator/PowImpl.cpp b/src/operator/PowImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..52a4f46956e0d0f348583a23772c519a64ca857d --- /dev/null +++ b/src/operator/PowImpl.cpp @@ -0,0 +1,51 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <chrono> // std::chrono::milliseconds +#include <numeric> // std::accumulate +#include <thread> // std::this_thread::sleep_for +#include <vector> + +#include "aidge/operator/Pow.hpp" +#include "aidge/utils/Types.h" + +#include "aidge/backend/cpu/operator/PowImpl.hpp" +#include "aidge/backend/cpu/operator/PowImpl_forward_kernels.hpp" + +Aidge::NbElts_t Aidge::PowImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; +} + +void Aidge::PowImpl_cpu::forward() { + assert(mOp.getInput(0) && "missing input #0"); + assert(mOp.getInput(1) && "missing input #1"); + + assert(((mOp.getInput(1)->size() == 1) || + (mOp.getInput(1)->size() == mOp.getInput(0)->size()) || + (mOp.getInput(1)->nbDims() == 1 && mOp.getInput(1)->size() == mOp.getInput(0)->dims()[mOp.getInput(0)->nbDims()-1]) + ) && + "input #1 must either be a tensor of size 1, the number of channels of input # or the same size of input #0"); + + // Find the correct kernel type + auto kernelFunc = Registrar<PowImplForward_cpu>::create({ + mOp.getInput(0)->dataType(), + mOp.getInput(1)->dataType(), + mOp.getOutput(0)->dataType()}); + + // Call kernel + kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(), + std::static_pointer_cast<Tensor>(mOp.getInput(1))->size(), + mOp.getInput(0)->getImpl()->rawPtr(), + mOp.getInput(1)->getImpl()->rawPtr(), + mOp.getOutput(0)->getImpl()->rawPtr()); +} diff --git a/src/operator/ProducerImpl.cpp b/src/operator/ProducerImpl.cpp index 664f3745414380fbaf5654ab035ba2ab957da87b..404d95ef685fea3c5796e396a2c5e17c60ce53bc 100644 --- a/src/operator/ProducerImpl.cpp +++ b/src/operator/ProducerImpl.cpp @@ -19,57 +19,16 @@ #include "aidge/backend/cpu/operator/ProducerImpl.hpp" - -std::size_t Aidge::ProducerImpl_cpu::getNbRequiredData( - Aidge::IOIndex_t /*inputIdx*/) const -{ - return 0; -} - - -Aidge::DimSize_t Aidge::ProducerImpl_cpu::getNbConsumedData( - Aidge::IOIndex_t /*inputIdx*/) const -{ - return 0; -} - - -std::size_t Aidge::ProducerImpl_cpu::getNbRequiredProtected( - Aidge::IOIndex_t /*inputIdx*/) const -{ - return 0; -} - - -std::size_t Aidge::ProducerImpl_cpu::getRequiredMemory( - const IOIndex_t outputIdx, const std::vector<DimSize_t> &/*inputsSize*/) const +Aidge::DimSize_t Aidge::ProducerImpl_cpu::getNbProducedData( + Aidge::IOIndex_t outputIdx) const { // Requires the whole tensors, regardless of available data on inputs assert(outputIdx == 0 && "operator has only one output"); (void) outputIdx; - const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); - return std::accumulate( - outputDims.begin(), - outputDims.end(), - NbElts_t(1), - std::multiplies<NbElts_t>()); -} - -Aidge::DimSize_t Aidge::ProducerImpl_cpu::getNbProducedData( - Aidge::IOIndex_t /*outputIdx*/) const -{ - return getRequiredMemory(0, {}); -} -void Aidge::ProducerImpl_cpu::updateConsummerProducer(){ + return std::static_pointer_cast<Tensor>(mOp.getOutput(0))->size(); } void Aidge::ProducerImpl_cpu::forward() { } - - -void Aidge::ProducerImpl_cpu::backward() -{ - printf("Not implemented yet.\n"); -} diff --git a/src/operator/ReLUImpl.cpp b/src/operator/ReLUImpl.cpp index cea50bc1e72cfa8e60cdd0f1839c03bcd568e052..647898d3f0495a74fe7c1dd48dba446bd92cb7b5 100644 --- a/src/operator/ReLUImpl.cpp +++ b/src/operator/ReLUImpl.cpp @@ -21,42 +21,12 @@ #include "aidge/backend/cpu/operator/ReLUImpl.hpp" #include "aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp" -// FIXME: replace whole Tensor with minimum needed data quantity -Aidge::NbElts_t Aidge::ReLUImpl_cpu::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const { - assert(mOp.getInput(0) && "requires valid input"); - - // Requires the whole tensors - const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims(); - - return std::accumulate(inputDims.begin(), inputDims.end(), - static_cast<NbElts_t>(1), std::multiplies<NbElts_t>()); -} - Aidge::NbElts_t Aidge::ReLUImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! + // this implementation can be in-place return 0; } -Aidge::NbElts_t Aidge::ReLUImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t /*outputIdx*/, const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const { - const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); - return std::accumulate(outputDims.begin(), outputDims.end(), - static_cast<NbElts_t>(1), std::multiplies<NbElts_t>()); -} - -Aidge::NbElts_t Aidge::ReLUImpl_cpu::getNbConsumedData(Aidge::IOIndex_t /*inputIdx*/) const { - return mNbConsumedData[0]; -} - -Aidge::NbElts_t Aidge::ReLUImpl_cpu::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const { - return mNbProducedData[0]; -} -void Aidge::ReLUImpl_cpu::updateConsummerProducer(){ - mNbConsumedData[0]+= getNbRequiredData(0); // each input is consumed by the minimum amount for a forward pass - - mNbProducedData[0]+= getRequiredMemory(0, {}); -} void Aidge::ReLUImpl_cpu::forward() { - // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); // Find the correct kernel type @@ -68,9 +38,4 @@ void Aidge::ReLUImpl_cpu::forward() { kernelFunc(mOp.getInput(0)->size(), mOp.getInput(0)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); - -} - -void Aidge::ReLUImpl_cpu::backward() { - printf("Not implemented yet.\n"); } diff --git a/src/operator/ScalingImpl.cpp b/src/operator/ScalingImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..39c1326dd677a704795f625440e385d3f3a6465c --- /dev/null +++ b/src/operator/ScalingImpl.cpp @@ -0,0 +1,41 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <numeric> // std::accumulate +#include <functional> // std::multiplies + +#include "aidge/operator/Scaling.hpp" + +#include "aidge/backend/cpu/operator/ScalingImpl.hpp" +#include "aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp" +#include "aidge/utils/Types.h" +#include <vector> + +Aidge::NbElts_t Aidge::ScalingImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; +} + +void Aidge::ScalingImpl_cpu::forward() { + assert(mOp.getInput(0) && "missing input #0"); + + // Find the correct kernel type + auto kernelFunc = Registrar<ScalingImplForward_cpu>::create({ + mOp.getInput(0)->dataType(), + mOp.getOutput(0)->dataType()}); + + // Call kernel + kernelFunc(dynamic_cast<const Scaling_Op&>(mOp).getStaticAttributes(), + std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(), + mOp.getInput(0)->getImpl()->rawPtr(), + mOp.getOutput(0)->getImpl()->rawPtr()); +} diff --git a/src/operator/SoftmaxImpl.cpp b/src/operator/SoftmaxImpl.cpp index 03e8f9305617f6a7ced878470e3c94ba625f5b22..45b455a3f361587848e33864872f497493315a78 100644 --- a/src/operator/SoftmaxImpl.cpp +++ b/src/operator/SoftmaxImpl.cpp @@ -21,45 +21,12 @@ #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" #include "aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp" -// FIXME: replace whole Tensor with minimum needed data quantity -Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const { - assert(mOp.getInput(0) && "requires valid input"); - - // Requires the whole tensors - const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims(); - - return std::accumulate(inputDims.begin(), inputDims.end(), - static_cast<NbElts_t>(1), std::multiplies<NbElts_t>()); -} - Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! + // this implementation can be in-place return 0; } -Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t /*outputIdx*/, const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const { - const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); - return std::accumulate(outputDims.begin(), outputDims.end(), - static_cast<NbElts_t>(1), std::multiplies<NbElts_t>()); -} - -Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getNbConsumedData(Aidge::IOIndex_t /*inputIdx*/) const { - return mNbConsumedData[0]; -} - -Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const { - return mNbProducedData[0]; -} -void Aidge::SoftmaxImpl_cpu::updateConsummerProducer(){ - // Update producer-consumer data - for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum - // amount for a forward pass - - mNbProducedData[0] += getRequiredMemory(0, {}); -} void Aidge::SoftmaxImpl_cpu::forward() { - // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); assert(mOp.getInput(0)->nbDims()>1); @@ -77,13 +44,4 @@ void Aidge::SoftmaxImpl_cpu::forward() { featureSize, mOp.getInput(0)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); - - - mNbConsumedData[0]+= getNbRequiredData(0); // each input is consumed by the minimum amount for a forward pass - - mNbProducedData[0]+= getRequiredMemory(0, {}); -} - -void Aidge::SoftmaxImpl_cpu::backward() { - printf("Not implemented yet.\n"); } diff --git a/src/operator/SqrtImpl.cpp b/src/operator/SqrtImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..75d1d2fb20b6748c931124847198b3168d9bdba7 --- /dev/null +++ b/src/operator/SqrtImpl.cpp @@ -0,0 +1,41 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <chrono> // std::chrono::milliseconds +#include <numeric> // std::accumulate +#include <thread> // std::this_thread::sleep_for +#include <vector> + +#include "aidge/operator/Sqrt.hpp" +#include "aidge/utils/Types.h" + +#include "aidge/backend/cpu/operator/SqrtImpl.hpp" +#include "aidge/backend/cpu/operator/SqrtImpl_forward_kernels.hpp" + +Aidge::NbElts_t Aidge::SqrtImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; +} + +void Aidge::SqrtImpl_cpu::forward() { + assert(mOp.getInput(0) && "missing input #0"); + + // Find the correct kernel type + auto kernelFunc = Registrar<SqrtImplForward_cpu>::create({ + mOp.getInput(0)->dataType(), + mOp.getOutput(0)->dataType()}); + + // Call kernel + kernelFunc(mOp.getInput(0)->size(), + mOp.getInput(0)->getImpl()->rawPtr(), + mOp.getOutput(0)->getImpl()->rawPtr()); +} \ No newline at end of file diff --git a/src/operator/SubImpl.cpp b/src/operator/SubImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6d87821d89ff84aa1046a9ecf0fdd83dcc5dda53 --- /dev/null +++ b/src/operator/SubImpl.cpp @@ -0,0 +1,51 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <chrono> // std::chrono::milliseconds +#include <numeric> // std::accumulate +#include <thread> // std::this_thread::sleep_for +#include <vector> + +#include "aidge/operator/Sub.hpp" +#include "aidge/utils/Types.h" + +#include "aidge/backend/cpu/operator/SubImpl.hpp" +#include "aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp" + +Aidge::NbElts_t Aidge::SubImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; +} + +void Aidge::SubImpl_cpu::forward() { + assert(mOp.getInput(0) && "missing input #0"); + assert(mOp.getInput(1) && "missing input #1"); + + assert(((mOp.getInput(1)->size() == 1) || + (mOp.getInput(1)->size() == mOp.getInput(0)->size()) || + (mOp.getInput(1)->nbDims() == 1 && mOp.getInput(1)->size() == mOp.getInput(0)->dims()[mOp.getInput(0)->nbDims()-1]) + ) && + "input #1 must either be a tensor of size 1, the number of channels of input # or the same size of input #0"); + + // Find the correct kernel type + auto kernelFunc = Registrar<SubImplForward_cpu>::create({ + mOp.getInput(0)->dataType(), + mOp.getInput(1)->dataType(), + mOp.getOutput(0)->dataType()}); + + // Call kernel + kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(), + std::static_pointer_cast<Tensor>(mOp.getInput(1))->size(), + mOp.getInput(0)->getImpl()->rawPtr(), + mOp.getInput(1)->getImpl()->rawPtr(), + mOp.getOutput(0)->getImpl()->rawPtr()); +} diff --git a/unit_tests/Test_TensorImpl.cpp b/unit_tests/data/Test_TensorImpl.cpp similarity index 86% rename from unit_tests/Test_TensorImpl.cpp rename to unit_tests/data/Test_TensorImpl.cpp index f55e25f55359cbcbcb9a53e916b345d1fb5a6b22..ceedd8c10f22c2afb0331eccafa11c748628fd7d 100644 --- a/unit_tests/Test_TensorImpl.cpp +++ b/unit_tests/data/Test_TensorImpl.cpp @@ -41,12 +41,12 @@ TEST_CASE("Tensor creation") { } SECTION("get function") { - REQUIRE(x.get<int>(std::array<std::size_t, 3>({0, 0, 0})) == 1); - REQUIRE(x.get<int>(std::array<std::size_t, 3>({0, 0, 1})) == 2); - REQUIRE(x.get<int>(std::array<std::size_t, 3>({0, 1, 1})) == 4); - REQUIRE(x.get<int>(std::array<std::size_t, 3>({1, 1, 0})) == 7); - x.get<int>(std::array<std::size_t, 3>({1, 1, 1})) = 36; - REQUIRE(x.get<int>(std::array<std::size_t, 3>({1, 1, 1})) == 36); + REQUIRE(x.get<int>({0, 0, 0}) == 1); + REQUIRE(x.get<int>({0, 0, 1}) == 2); + REQUIRE(x.get<int>({0, 1, 1}) == 4); + REQUIRE(x.get<int>({1, 1, 0}) == 7); + x.get<int>({1, 1, 1}) = 36; + REQUIRE(x.get<int>({1, 1, 1}) == 36); } SECTION("Pretty printing for debug") { REQUIRE_NOTHROW(x.print()); } diff --git a/unit_tests/operator/Test_AddImpl.cpp b/unit_tests/operator/Test_AddImpl.cpp index e24d7ac6bd97586ebdeddce5ccb75807ddf530f0..18d98d169ddcb74310c5153d7c2c95103c395bb7 100644 --- a/unit_tests/operator/Test_AddImpl.cpp +++ b/unit_tests/operator/Test_AddImpl.cpp @@ -19,25 +19,25 @@ using namespace Aidge; TEST_CASE("[cpu/operator] Add(forward)") { - std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { - { - { - {{20, 47},{21, 48},{22, 49}}, - {{23, 50},{24, 51},{25, 52}}, - {{26, 53},{27, 54},{28, 55}} - }, - { - {{29, 56},{30, 57},{31, 58}}, - {{32, 59},{33, 60},{34, 61}}, - {{35, 62},{36, 63},{37, 64}} - }, - { - {{38, 65},{39, 66},{40, 67}}, - {{41, 68},{42, 69},{43, 70}}, - {{44, 71},{45, 72},{46, 73}} - } - } - }); + std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + { // + { // + {{20, 47},{21, 48},{22, 49}}, // + {{23, 50},{24, 51},{25, 52}}, // + {{26, 53},{27, 54},{28, 55}} // + }, // + { // + {{29, 56},{30, 57},{31, 58}}, // + {{32, 59},{33, 60},{34, 61}}, // + {{35, 62},{36, 63},{37, 64}} // + }, // + { // + {{38, 65},{39, 66},{40, 67}}, // + {{41, 68},{42, 69},{43, 70}}, // + {{44, 71},{45, 72},{46, 73}} // + } // + } // + }); // SECTION("One input") { std::shared_ptr<Node> myAdd = Add<1>(); @@ -51,7 +51,7 @@ TEST_CASE("[cpu/operator] Add(forward)") { } SECTION("Two inputs") { - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { { { {{40, 94},{42, 96},{44, 98}}, @@ -81,9 +81,9 @@ TEST_CASE("[cpu/operator] Add(forward)") { REQUIRE(*std::static_pointer_cast<Tensor>(myAdd->getOperator()->getOutput(0)) == *expectedOutput); } - + SECTION("Three inputs") { - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { { { {{ 60, 141},{ 63, 144},{ 66, 147}}, diff --git a/unit_tests/operator/Test_ConvImpl.cpp b/unit_tests/operator/Test_ConvImpl.cpp index 23ff1aaebcfb79a4d4b1abc4f1a77f1c6de63b21..891f0e94b02d07d41751728e83fa9b42e4b89be8 100644 --- a/unit_tests/operator/Test_ConvImpl.cpp +++ b/unit_tests/operator/Test_ConvImpl.cpp @@ -156,165 +156,8 @@ TEST_CASE("[cpu/operator] Conv(forward)") { // myConv->getOperator()->getOutput(0)->print(); REQUIRE(*(myConv->getOperator()->getOutput(0)) == *myOutput); } - SECTION("test Padding") { - std::shared_ptr<Node> myConv = Conv(3,4,{3,3}, "myconv", {1,1}, {1,1,1,1}); - myConv->getOperator()->setDatatype(DataType::Int32); - myConv->getOperator()->setBackend("cpu"); - std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> { - { - { - {{ 0, 1, 2}, - { 3, 4, 5}, - { 6, 7, 8}}, - {{ 9, 10, 11}, - { 12, 13, 14}, - { 15, 16, 17}}, - {{ 18, 19, 20}, - { 21, 22, 23}, - { 24, 25, 26}} - }, - { - {{ 27, 28, 29}, - { 30, 31, 32}, - { 33, 34, 35}}, - {{ 36, 37, 38}, - { 39, 40, 41}, - { 42, 43, 44}}, - {{ 45, 46, 47}, - { 48, 49, 50}, - { 51, 52, 53}} - }, - { - {{ 54, 55, 56}, - { 57, 58, 59}, - { 60, 61, 62}}, - {{ 63, 64, 65}, - { 66, 67, 68}, - { 69, 70, 71}}, - {{ 72, 73, 74}, - { 75, 76, 77}, - { 78, 79, 80}} - }, - { - {{ 81, 82, 83}, - { 84, 85, 86}, - { 87, 88, 89}}, - {{ 90, 91, 92}, - { 93, 94, 95}, - { 96, 97, 98}}, - {{ 99, 100, 101}, - {102, 103, 104}, - {105, 106, 107}} - } - } - }); - std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}}); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW - { - { - {{ 0, 1, 2, 3, 4}, - { 5, 6, 7, 8, 9}, - { 10, 11, 12, 13, 14}, - { 15, 16, 17, 18, 19}, - { 20, 21, 22, 23, 24}}, - - {{ 25, 26, 27, 28, 29}, - { 30, 31, 32, 33, 34}, - { 35, 36, 37, 38, 39}, - { 40, 41, 42, 43, 44}, - { 45, 46, 47, 48, 49}}, - - {{ 50, 51, 52, 53, 54}, - { 55, 56, 57, 58, 59}, - { 60, 61, 62, 63, 64}, - { 65, 66, 67, 68, 69}, - { 70, 71, 72, 73, 74}} - }, - { - {{ 75, 76, 77, 78, 79}, - { 80, 81, 82, 83, 84}, - { 85, 86, 87, 88, 89}, - { 90, 91, 92, 93, 94}, - { 95, 96, 97, 98, 99}}, - - {{100, 101, 102, 103, 104}, - {105, 106, 107, 108, 109}, - {110, 111, 112, 113, 114}, - {115, 116, 117, 118, 119}, - {120, 121, 122, 123, 124}}, - - {{125, 126, 127, 128, 129}, - {130, 131, 132, 133, 134}, - {135, 136, 137, 138, 139}, - {140, 141, 142, 143, 144}, - {145, 146, 147, 148, 149}} - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,5,5> { - { - { - {{ 6895, 10225, 10486, 10747, 7063}, - { 10303, 15226, 15577, 15928, 10429}, - { 11518, 16981, 17332, 17683, 11554}, - { 12733, 18736, 19087, 19438, 12679}, - { 8047, 11791, 11998, 12205, 7927}}, - - {{ 15960, 24069, 24816, 25563, 17100}, - { 25119, 37818, 38898, 39978, 26703}, - { 28764, 43218, 44298, 45378, 30258}, - { 32409, 48618, 49698, 50778, 33813}, - { 21972, 32925, 33618, 34311, 22824}}, - - {{ 25041, 37929, 39162, 40395, 27153}, - { 39951, 60426, 62235, 64044, 42993}, - { 46026, 69471, 71280, 73089, 48978}, - { 52101, 78516, 80325, 82134, 54963}, - { 35913, 54075, 55254, 56433, 37737}}, - - {{ 34104, 51771, 53490, 55209, 37188}, - { 54765, 83016, 85554, 88092, 59265}, - { 63270, 95706, 98244, 100782, 67680}, - { 71775, 108396, 110934, 113472, 76095}, - { 49836, 75207, 76872, 78537, 52632}} - }, - { - {{ 20395, 29800, 30061, 30322, 19663}, - { 28528, 41551, 41902, 42253, 27304}, - { 29743, 43306, 43657, 44008, 28429}, - { 30958, 45061, 45412, 45763, 29554}, - { 18847, 27316, 27523, 27730, 17827}}, - - {{ 53760, 80094, 80841, 81588, 54000}, - { 79794, 118818, 119898, 120978, 80028}, - { 83439, 124218, 125298, 126378, 83583}, - { 87084, 129618, 130698, 131778, 87138}, - { 57072, 84900, 85593, 86286, 57024}}, - - {{ 87141, 130404, 131637, 132870, 88353}, - {131076, 196101, 197910, 199719, 132768}, - {137151, 205146, 206955, 208764, 138753}, - {143226, 214191, 216000, 217809, 144738}, - { 95313, 142500, 143679, 144858, 96237}}, - - {{120504, 180696, 182415, 184134, 122688}, - {182340, 273366, 275904, 278442, 185490}, - {190845, 286056, 288594, 291132, 193905}, - {199350, 298746, 301284, 303822, 202320}, - {133536, 200082, 201747, 203412, 135432}} - } - } - }); - myConv->getOperator()->associateInput(0,myInput); - myConv->getOperator()->associateInput(1,myWeights); - myConv->getOperator()->associateInput(2,myBias); - myConv->getOperator()->computeOutputDims(); - myConv->forward(); - - REQUIRE(*(myConv->getOperator()->getOutput(0)) == *myOutput); - } SECTION("Point-wise") { - std::shared_ptr<Node> myConv = Conv(3,4,{1,1}, "myconv", {1,1}, {0,0,0,0}); + std::shared_ptr<Node> myConv = Conv(3,4,{1,1}, "myconv", {1,1}); myConv->getOperator()->setDatatype(DataType::Float32); myConv->getOperator()->setBackend("cpu"); myConv->getOperator()->input(0) = Array4D<float,2,3,3,3> { diff --git a/unit_tests/operator/Test_DivImpl.cpp b/unit_tests/operator/Test_DivImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c33319c88b63ee834bbcb388bbbe0775699edbd7 --- /dev/null +++ b/unit_tests/operator/Test_DivImpl.cpp @@ -0,0 +1,207 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/Div.hpp" + +#include "aidge/backend/cpu.hpp" + +#include <memory> + +using namespace Aidge; + +TEST_CASE("[cpu/operator] Div(forward)") { + SECTION("2D Tensor by Singleton") { + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> { + { + {0.07607108, 0.44075000}, + {0.19494885, 0.20071143} + } + }); + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,1,1>{{0.5}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> { + { + {0.15214217, 0.88150001}, + {0.38989770, 0.40142286} + } + }); + + std::shared_ptr<Node> myDiv = Div(); + myDiv->getOperator()->setDatatype(DataType::Float32); + myDiv->getOperator()->setBackend("cpu"); + myDiv->getOperator()->associateInput(0, input_1); + myDiv->getOperator()->associateInput(1, input_2); + myDiv->getOperator()->computeOutputDims(); + myDiv->forward(); + + float* resPtr = static_cast<float*>(myDiv->getOperator()->getOutput(0)->getImpl()->rawPtr()); + float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i< 4; ++i) { + REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + } + + } + + SECTION("2D Tensors") { + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> { + { + {0.79780143, 0.49322051}, + {0.84239346, 0.83737719} + } + }); + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,2,2>{ + { + {0.59088874, 0.78858775}, + {0.42879432, 0.17615074} + } + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> { + { + {1.35017204, 0.62544787}, + {1.96456301, 4.75375366} + } + }); + + std::shared_ptr<Node> myDiv = Div(); + myDiv->getOperator()->setDatatype(DataType::Float32); + myDiv->getOperator()->setBackend("cpu"); + myDiv->getOperator()->associateInput(0, input_1); + myDiv->getOperator()->associateInput(1, input_2); + myDiv->getOperator()->computeOutputDims(); + myDiv->forward(); + + float* resPtr = static_cast<float*>(myDiv->getOperator()->getOutput(0)->getImpl()->rawPtr()); + float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i< 4; ++i) { + REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + } + + } + + SECTION("3D Tensor by 1D Tensor") { + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array3D<float,2,2,3> { + { + {{0.24180168, 0.44319558, 0.06437260}, + {0.21270001, 0.34570599, 0.44151264}}, + + {{0.62294692, 0.98043168, 0.18628585}, + {0.33591706, 0.03432965, 0.32130069}} + } + }); + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float,3>{ + {0.63475525, 0.58620811, 0.69340748} + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<float,2,2,3> { + { + {{0.38093686, 0.75603795, 0.09283517}, + {0.33508980, 0.58973253, 0.63672900}}, + + {{0.98139703, 1.67249763, 0.26865280}, + {0.52920723, 0.05856223, 0.46336490}} + } + }); + + std::shared_ptr<Node> myDiv = Div(); + myDiv->getOperator()->setDatatype(DataType::Float32); + myDiv->getOperator()->setBackend("cpu"); + myDiv->getOperator()->associateInput(0, input_1); + myDiv->getOperator()->associateInput(1, input_2); + myDiv->getOperator()->computeOutputDims(); + myDiv->forward(); + + float* resPtr = static_cast<float*>(myDiv->getOperator()->getOutput(0)->getImpl()->rawPtr()); + float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i< 12; ++i) { + REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + } + + } + + SECTION("4D Tensor") { + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<float,2,3,3,3> { + { + { + {{0.25675946, 0.36265653, 0.22386390}, + {0.30483031, 0.97449398, 0.73871714}, + {0.36169255, 0.04510212, 0.27525920}}, + + {{0.73255682, 0.03885978, 0.24181491}, + {0.14465559, 0.86070061, 0.88848090}, + {0.74408931, 0.87412918, 0.19800508}}, + + {{0.43551809, 0.73437816, 0.37513995}, + {0.25414777, 0.06396711, 0.98708153}, + {0.02140611, 0.84974837, 0.62108254}} + }, + { + {{0.86227137, 0.69357753, 0.41814715}, + {0.76048166, 0.46306920, 0.05907208}, + {0.76625377, 0.91793799, 0.92988223}}, + + {{0.34362513, 0.85009813, 0.21107805}, + {0.65575773, 0.38140792, 0.48540717}, + {0.10045588, 0.85803932, 0.23778951}}, + + {{0.30316389, 0.04176688, 0.17290735}, + {0.07942408, 0.48647392, 0.39440966}, + {0.26543915, 0.92589515, 0.83948994}} + } + } + }); + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,1,1>{{3.0}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,2,3,3,3> { + { + { + {{0.08558649, 0.12088551, 0.07462130}, + {0.10161010, 0.32483134, 0.24623905}, + {0.12056419, 0.01503404, 0.09175307}}, + + {{0.24418561, 0.01295326, 0.08060497}, + {0.04821853, 0.28690019, 0.29616031}, + {0.24802977, 0.29137638, 0.06600169}}, + + {{0.14517270, 0.24479271, 0.12504666}, + {0.08471593, 0.02132237, 0.32902718}, + {0.00713537, 0.28324947, 0.20702751}} + }, + { + {{0.28742379, 0.23119251, 0.13938238}, + {0.25349388, 0.15435641, 0.01969069}, + {0.25541791, 0.30597934, 0.30996075}}, + + {{0.11454171, 0.28336605, 0.07035935}, + {0.21858591, 0.12713598, 0.16180240}, + {0.03348529, 0.28601310, 0.07926317}}, + + {{0.10105463, 0.01392229, 0.05763578}, + {0.02647469, 0.16215797, 0.13146989}, + {0.08847972, 0.30863172, 0.27982998}} + } + } + }); + + std::shared_ptr<Node> myDiv = Div(); + myDiv->getOperator()->setDatatype(DataType::Float32); + myDiv->getOperator()->setBackend("cpu"); + myDiv->getOperator()->associateInput(0, input_1); + myDiv->getOperator()->associateInput(1, input_2); + myDiv->getOperator()->computeOutputDims(); + myDiv->forward(); + + float* resPtr = static_cast<float*>(myDiv->getOperator()->getOutput(0)->getImpl()->rawPtr()); + float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i< 54; ++i) { + REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + } + } +} \ No newline at end of file diff --git a/unit_tests/operator/Test_LeakyReLUImpl.cpp b/unit_tests/operator/Test_LeakyReLUImpl.cpp index 7096962e196c2ace4abf2b0b14aca8dfa37d3441..d5bd91ff75404a7b928c8919c64e06315b78206f 100644 --- a/unit_tests/operator/Test_LeakyReLUImpl.cpp +++ b/unit_tests/operator/Test_LeakyReLUImpl.cpp @@ -153,7 +153,7 @@ TEST_CASE("[cpu/operator] LeakyReLU(forward)") { REQUIRE(*myLeakyReLU->getOperator()->getOutput(0) == *expectedOutput); } - SECTION("Test construction parameter: negative_slop") { + SECTION("Test construction attribute: negative_slop") { std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<float,10> { {0.0f, 1.0f, 2.0f,-3.0f, 4.0f,-5.0f,-6.0f, 7.0f, 8.0f, 9.0f} }); diff --git a/unit_tests/operator/Test_MatMulImpl.cpp b/unit_tests/operator/Test_MatMulImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0da01b3287043e07e5b967df8882960cfb814f8f --- /dev/null +++ b/unit_tests/operator/Test_MatMulImpl.cpp @@ -0,0 +1,108 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <memory> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/MatMul.hpp" + +#include "aidge/backend/cpu/operator/MatMulImpl.hpp" + +using namespace Aidge; + +TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul]") { + // Test MatMul forward with batch size = 2 and feature size = 75 + std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array2D<int, 5, 75>{ + {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}}); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<int, 2, 5>{ + {{23600, 23600, 23600, 23600, 23600}, {68600, 68600, 68600, 68600, 68600}}}); + + std::shared_ptr<Node> myMatMul = MatMul(5, "mymatmul"); + myMatMul->getOperator()->setDatatype(DataType::Int32); + myMatMul->getOperator()->setBackend("cpu"); + myMatMul->getOperator()->associateInput(1, myWeights); + + SECTION("2D input") { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array2D<int, 2, 75>{ + {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, + 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, + 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74}, + {75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, + 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, + 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149}}}); + myMatMul->getOperator()->associateInput(0, myInput); + myMatMul->getOperator()->computeOutputDims(); + myMatMul->forward(); + REQUIRE(*std::static_pointer_cast<Tensor>(myMatMul->getOperator()->getOutput(0)) == *myOutput); + } + SECTION("4D input") { + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(Array4D<int, 2, 3, 5, 5>{{{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}, + {{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}, + {{50, 51, 52, 53, 54}, + {55, 56, 57, 58, 59}, + {60, 61, 62, 63, 64}, + {65, 66, 67, 68, 69}, + {70, 71, 72, 73, 74}}}, + {{{75, 76, 77, 78, 79}, + {80, 81, 82, 83, 84}, + {85, 86, 87, 88, 89}, + {90, 91, 92, 93, 94}, + {95, 96, 97, 98, 99}}, + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}}}}}); + myMatMul->getOperator()->associateInput(0, myInput); + myMatMul->getOperator()->computeOutputDims(); + myMatMul->forward(); + REQUIRE(*std::static_pointer_cast<Tensor>(myMatMul->getOperator()->getOutput(0)) == *myOutput); + } + + // std::cout << static_cast<Tensor>((*myMatMul->getOperator())["weight"])[0][0][0][0] << std::endl; +} \ No newline at end of file diff --git a/unit_tests/operator/Test_MaxPoolingImpl.cpp b/unit_tests/operator/Test_MaxPoolingImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..83fa7eaa670399c8d6c085a14db08fa35df9de8c --- /dev/null +++ b/unit_tests/operator/Test_MaxPoolingImpl.cpp @@ -0,0 +1,82 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <memory> +#include <cstdlib> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/MaxPooling.hpp" + +#include "aidge/backend/cpu.hpp" + +using namespace Aidge; + + +TEST_CASE("[cpu/operator] MaxPooling(forward)") { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<float,2,2,5,5> { //NCHW + { + { + {{-0.3848, 0.2166, -0.4373, 0.6142, 0.5277}, + {0.7995, 0.3638, -1.4589, -1.0843, 1.0918}, + {0.7147, 0.0936, -1.2902, 1.2037, 0.4874}, + {-0.5981, 2.1184, -0.9175, 1.3859, 0.3305}, + {-1.7700, 0.0563, -0.3914, 0.0538, -0.3955}}, + + {{-3.1409, -0.4554, 0.0524, 2.2291, 0.4859}, + {-0.7465, -0.6567, -2.3703, -0.6386, -1.4152}, + { 2.2329, -0.5850, 0.0700, 1.2838, -1.7363}, + { 0.2139, 0.0624, -1.0689, -0.8221, -0.8038}, + { 0.1886, -0.7840, -0.2313, 0.2651, -1.6244}} + }, + { + {{ 0.4371, 1.6417, 0.9129, 0.6325, 0.5438}, + {-2.3552, -0.8850, -0.0232, -0.5462, -1.2011}, + {1.7653, -1.6668, -1.0814, 0.6182, 1.2071}, + {0.9541, -0.5133, 0.8664, -0.8892, 1.4585}, + {1.0220, -0.5107, 0.1829, -0.2301, -0.4268}}, + + {{ 1.0429, 0.6279, -0.2875, 0.7187, -0.1500}, + {1.6041, 2.9635, 1.4172, -0.7517, 0.5441}, + {-0.2276, 0.0857, 0.6776, -0.1389, -0.0614}, + {-0.1547, -0.3435, 0.0650, -0.5095, -1.8073}, + {1.7217, 0.3999, -0.5953, 1.0604, -0.4126}} + } + } + }); + SECTION("Stride") { + std::shared_ptr<Node> myMaxPool = MaxPooling({2,2}, "mycdw", {2,2}); + myMaxPool->getOperator()->setDatatype(DataType::Float32); + myMaxPool->getOperator()->setBackend("cpu"); + + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<float,2,2,2,2> { + { + { + {{ 0.7995, 0.6142}, + { 2.1184, 1.3859}}, + {{ -0.4554, 2.2291}, + { 2.2329, 1.2838}} + }, + { + {{1.6417, 0.9129}, + {1.7653, 0.8664}}, + {{2.9635, 1.4172}, + {0.0857, 0.6776}} + } + } + }); + myMaxPool->getOperator()->associateInput(0,myInput); + myMaxPool->getOperator()->computeOutputDims(); + myMaxPool->forward(); + myMaxPool->getOperator()->getOutput(0)->print(); + REQUIRE(*(myMaxPool->getOperator()->getOutput(0)) == *myOutput); + } +} \ No newline at end of file diff --git a/unit_tests/operator/Test_MulImpl.cpp b/unit_tests/operator/Test_MulImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cea62f998cfc538d1d5800639e461eb4d15cb270 --- /dev/null +++ b/unit_tests/operator/Test_MulImpl.cpp @@ -0,0 +1,129 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/Mul.hpp" + +#include "aidge/backend/cpu.hpp" + +#include <memory> + +using namespace Aidge; + +TEST_CASE("[cpu/operator] Mul(forward)") { + SECTION("2D Tensor by Singleton") { + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> { + { + {0.38977361, 0.34064174}, + {0.00427264, 0.90872520} + } + }); + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,1,1>{{3.0}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> { + { + {1.16932082, 1.02192521}, + {0.01281792, 2.72617555} + } + }); + + std::shared_ptr<Node> myMul = Mul(); + myMul->getOperator()->setDatatype(DataType::Float32); + myMul->getOperator()->setBackend("cpu"); + myMul->getOperator()->associateInput(0, input_1); + myMul->getOperator()->associateInput(1, input_2); + myMul->getOperator()->computeOutputDims(); + myMul->forward(); + + float* resPtr = static_cast<float*>(myMul->getOperator()->getOutput(0)->getImpl()->rawPtr()); + float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i< 4; ++i) { + REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + } + + } + + SECTION("2D Tensors") { + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> { + { + {0.38977361, 0.34064174}, + {0.00427264, 0.90872520} + } + }); + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,2,2>{ + { + {0.02362096, 0.24084556}, + {0.94690859, 0.13512510} + } + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> { + { + {0.00920683, 0.08204205}, + {0.00404580, 0.12279158} + } + }); + + std::shared_ptr<Node> myMul = Mul(); + myMul->getOperator()->setDatatype(DataType::Float32); + myMul->getOperator()->setBackend("cpu"); + myMul->getOperator()->associateInput(0, input_1); + myMul->getOperator()->associateInput(1, input_2); + myMul->getOperator()->computeOutputDims(); + myMul->forward(); + + float* resPtr = static_cast<float*>(myMul->getOperator()->getOutput(0)->getImpl()->rawPtr()); + float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i< 4; ++i) { + REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + } + + } + + SECTION("3D Tensor by 1D Tensor") { + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array3D<float,2,2,3> { + { + {{0.33647752, 0.89360154, 0.46586215}, + {0.71518236, 0.71481097, 0.97991812}}, + + {{0.17393428, 0.56849813, 0.18489265}, + {0.78397650, 0.00348300, 0.65758008}} + } + }); + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float,3>{ + {0.15380561, 0.51063120, 0.93031412} + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<float,2,2,3> { + { + {{0.05175213, 0.45630082, 0.43339813}, + {0.10999906, 0.36500478, 0.91163164}}, + + {{0.02675207, 0.29029289, 0.17200825}, + {0.12057999, 0.00177853, 0.61175603}} + } + }); + + std::shared_ptr<Node> myMul = Mul(); + myMul->getOperator()->setDatatype(DataType::Float32); + myMul->getOperator()->setBackend("cpu"); + myMul->getOperator()->associateInput(0, input_1); + myMul->getOperator()->associateInput(1, input_2); + myMul->getOperator()->computeOutputDims(); + myMul->forward(); + + float* resPtr = static_cast<float*>(myMul->getOperator()->getOutput(0)->getImpl()->rawPtr()); + float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i< 12; ++i) { + REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + } + + } +} \ No newline at end of file diff --git a/unit_tests/operator/Test_PadImpl.cpp b/unit_tests/operator/Test_PadImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b603e165392f1a861dc1b40d50b70a53c9256870 --- /dev/null +++ b/unit_tests/operator/Test_PadImpl.cpp @@ -0,0 +1,569 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <cstdlib> +#include <memory> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/Pad.hpp" + +#include "aidge/backend/cpu.hpp" + +using namespace Aidge; + +TEST_CASE("[cpu/operator] Pad(forward)") { + SECTION("Symmetric Pad") { + const int pv = 0; // pad value + + std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Constant, static_cast<double>(pv)); + myPad->getOperator()->setDatatype(DataType::Int32); + myPad->getOperator()->setBackend("cpu"); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW + { + { + {{ 0, 1, 2, 3, 4}, + { 5, 6, 7, 8, 9}, + { 10, 11, 12, 13, 14}, + { 15, 16, 17, 18, 19}, + { 20, 21, 22, 23, 24}}, + + {{ 25, 26, 27, 28, 29}, + { 30, 31, 32, 33, 34}, + { 35, 36, 37, 38, 39}, + { 40, 41, 42, 43, 44}, + { 45, 46, 47, 48, 49}}, + + {{ 50, 51, 52, 53, 54}, + { 55, 56, 57, 58, 59}, + { 60, 61, 62, 63, 64}, + { 65, 66, 67, 68, 69}, + { 70, 71, 72, 73, 74}} + }, + { + {{ 75, 76, 77, 78, 79}, + { 80, 81, 82, 83, 84}, + { 85, 86, 87, 88, 89}, + { 90, 91, 92, 93, 94}, + { 95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}} + } + } + }); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW + { + { + {{ pv, pv, pv, pv, pv, pv, pv}, + { pv, 0, 1, 2, 3, 4, pv}, + { pv, 5, 6, 7, 8, 9, pv}, + { pv, 10, 11, 12, 13, 14, pv}, + { pv, 15, 16, 17, 18, 19, pv}, + { pv, 20, 21, 22, 23, 24, pv}, + { pv, pv, pv, pv, pv, pv, pv}}, + + {{ pv, pv, pv, pv, pv, pv, pv}, + { pv, 25, 26, 27, 28, 29, pv}, + { pv, 30, 31, 32, 33, 34, pv}, + { pv, 35, 36, 37, 38, 39, pv}, + { pv, 40, 41, 42, 43, 44, pv}, + { pv, 45, 46, 47, 48, 49, pv}, + { pv, pv, pv, pv, pv, pv, pv}}, + + {{ pv, pv, pv, pv, pv, pv, pv}, + { pv, 50, 51, 52, 53, 54, pv}, + { pv, 55, 56, 57, 58, 59, pv}, + { pv, 60, 61, 62, 63, 64, pv}, + { pv, 65, 66, 67, 68, 69, pv}, + { pv, 70, 71, 72, 73, 74, pv}, + { pv, pv, pv, pv, pv, pv, pv}} + }, + { + {{ pv, pv, pv, pv, pv, pv, pv}, + { pv, 75, 76, 77, 78, 79, pv}, + { pv, 80, 81, 82, 83, 84, pv}, + { pv, 85, 86, 87, 88, 89, pv}, + { pv, 90, 91, 92, 93, 94, pv}, + { pv, 95, 96, 97, 98, 99, pv}, + { pv, pv, pv, pv, pv, pv, pv}}, + + {{ pv, pv, pv, pv, pv, pv, pv}, + {pv, 100, 101, 102, 103, 104, pv}, + {pv, 105, 106, 107, 108, 109, pv}, + {pv, 110, 111, 112, 113, 114, pv}, + {pv, 115, 116, 117, 118, 119, pv}, + {pv, 120, 121, 122, 123, 124, pv}, + { pv, pv, pv, pv, pv, pv, pv}}, + + {{ pv, pv, pv, pv, pv, pv, pv}, + {pv, 125, 126, 127, 128, 129, pv}, + {pv, 130, 131, 132, 133, 134, pv}, + {pv, 135, 136, 137, 138, 139, pv}, + {pv, 140, 141, 142, 143, 144, pv}, + {pv, 145, 146, 147, 148, 149, pv}, + { pv, pv, pv, pv, pv, pv, pv}} + } + } + }); + + myPad->getOperator()->associateInput(0,myInput); + myPad->getOperator()->computeOutputDims(); + myPad->forward(); + // myPad->getOperator()->getOutput(0)->print(); + REQUIRE(*(myPad->getOperator()->getOutput(0)) == *myOutput); + } + + SECTION("Asymmetric Pad") { + const int pv = 0; // pad value + + std::shared_ptr<Node> myPad = Pad<2>({1, 0, 0, 1}, "mypad", PadBorderType::Constant, static_cast<double>(pv)); + myPad->getOperator()->setDatatype(DataType::Int32); + myPad->getOperator()->setBackend("cpu"); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW + { + { + {{ 0, 1, 2, 3, 4}, + { 5, 6, 7, 8, 9}, + { 10, 11, 12, 13, 14}, + { 15, 16, 17, 18, 19}, + { 20, 21, 22, 23, 24}}, + + {{ 25, 26, 27, 28, 29}, + { 30, 31, 32, 33, 34}, + { 35, 36, 37, 38, 39}, + { 40, 41, 42, 43, 44}, + { 45, 46, 47, 48, 49}}, + + {{ 50, 51, 52, 53, 54}, + { 55, 56, 57, 58, 59}, + { 60, 61, 62, 63, 64}, + { 65, 66, 67, 68, 69}, + { 70, 71, 72, 73, 74}} + }, + { + {{ 75, 76, 77, 78, 79}, + { 80, 81, 82, 83, 84}, + { 85, 86, 87, 88, 89}, + { 90, 91, 92, 93, 94}, + { 95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}} + } + } + }); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,6,6> { //NCHW + { + { + {{ pv, pv, pv, pv, pv, pv}, + { 0, 1, 2, 3, 4, pv}, + { 5, 6, 7, 8, 9, pv}, + { 10, 11, 12, 13, 14, pv}, + { 15, 16, 17, 18, 19, pv}, + { 20, 21, 22, 23, 24, pv}}, + + {{ pv, pv, pv, pv, pv, pv}, + { 25, 26, 27, 28, 29, pv}, + { 30, 31, 32, 33, 34, pv}, + { 35, 36, 37, 38, 39, pv}, + { 40, 41, 42, 43, 44, pv}, + { 45, 46, 47, 48, 49, pv}}, + + {{ pv, pv, pv, pv, pv, pv}, + { 50, 51, 52, 53, 54, pv}, + { 55, 56, 57, 58, 59, pv}, + { 60, 61, 62, 63, 64, pv}, + { 65, 66, 67, 68, 69, pv}, + { 70, 71, 72, 73, 74, pv}} + }, + { + {{ pv, pv, pv, pv, pv, pv}, + { 75, 76, 77, 78, 79, pv}, + { 80, 81, 82, 83, 84, pv}, + { 85, 86, 87, 88, 89, pv}, + { 90, 91, 92, 93, 94, pv}, + { 95, 96, 97, 98, 99, pv}}, + + {{ pv, pv, pv, pv, pv, pv}, + { 100, 101, 102, 103, 104, pv}, + { 105, 106, 107, 108, 109, pv}, + { 110, 111, 112, 113, 114, pv}, + { 115, 116, 117, 118, 119, pv}, + { 120, 121, 122, 123, 124, pv}}, + + {{ pv, pv, pv, pv, pv, pv}, + { 125, 126, 127, 128, 129, pv}, + { 130, 131, 132, 133, 134, pv}, + { 135, 136, 137, 138, 139, pv}, + { 140, 141, 142, 143, 144, pv}, + { 145, 146, 147, 148, 149, pv}} + } + } + }); + + myPad->getOperator()->associateInput(0,myInput); + myPad->getOperator()->computeOutputDims(); + myPad->forward(); + // myPad->getOperator()->getOutput(0)->print(); + REQUIRE(*(myPad->getOperator()->getOutput(0)) == *myOutput); + } + + SECTION("Pad Edge") { + std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Edge); + myPad->getOperator()->setDatatype(DataType::Int32); + myPad->getOperator()->setBackend("cpu"); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW + { + { + {{ 0, 1, 2, 3, 4}, + { 5, 6, 7, 8, 9}, + { 10, 11, 12, 13, 14}, + { 15, 16, 17, 18, 19}, + { 20, 21, 22, 23, 24}}, + + {{ 25, 26, 27, 28, 29}, + { 30, 31, 32, 33, 34}, + { 35, 36, 37, 38, 39}, + { 40, 41, 42, 43, 44}, + { 45, 46, 47, 48, 49}}, + + {{ 50, 51, 52, 53, 54}, + { 55, 56, 57, 58, 59}, + { 60, 61, 62, 63, 64}, + { 65, 66, 67, 68, 69}, + { 70, 71, 72, 73, 74}} + }, + { + {{ 75, 76, 77, 78, 79}, + { 80, 81, 82, 83, 84}, + { 85, 86, 87, 88, 89}, + { 90, 91, 92, 93, 94}, + { 95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}} + } + } + }); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW + { + { + {{ 0, 0, 1, 2, 3, 4, 4}, + { 0, 0, 1, 2, 3, 4, 4}, + { 5, 5, 6, 7, 8, 9, 9}, + { 10, 10, 11, 12, 13, 14, 14}, + { 15, 15, 16, 17, 18, 19, 19}, + { 20, 20, 21, 22, 23, 24, 24}, + { 20, 20, 21, 22, 23, 24, 24}}, + + {{ 25, 25, 26, 27, 28, 29, 29}, + { 25, 25, 26, 27, 28, 29, 29}, + { 30, 30, 31, 32, 33, 34, 34}, + { 35, 35, 36, 37, 38, 39, 39}, + { 40, 40, 41, 42, 43, 44, 44}, + { 45, 45, 46, 47, 48, 49, 49}, + { 45, 45, 46, 47, 48, 49, 49}}, + + {{ 50, 50, 51, 52, 53, 54, 54}, + { 50, 50, 51, 52, 53, 54, 54}, + { 55, 55, 56, 57, 58, 59, 59}, + { 60, 60, 61, 62, 63, 64, 64}, + { 65, 65, 66, 67, 68, 69, 69}, + { 70, 70, 71, 72, 73, 74, 74}, + { 70, 70, 71, 72, 73, 74, 74}} + }, + { + {{ 75, 75, 76, 77, 78, 79, 79}, + { 75, 75, 76, 77, 78, 79, 79}, + { 80, 80, 81, 82, 83, 84, 84}, + { 85, 85, 86, 87, 88, 89, 89}, + { 90, 90, 91, 92, 93, 94, 94}, + { 95, 95, 96, 97, 98, 99, 99}, + { 95, 95, 96, 97, 98, 99, 99}}, + + {{100, 100, 101, 102, 103, 104, 104}, + {100, 100, 101, 102, 103, 104, 104}, + {105, 105, 106, 107, 108, 109, 109}, + {110, 110, 111, 112, 113, 114, 114}, + {115, 115, 116, 117, 118, 119, 119}, + {120, 120, 121, 122, 123, 124, 124}, + {120, 120, 121, 122, 123, 124, 124}}, + + {{125, 125, 126, 127, 128, 129, 129}, + {125, 125, 126, 127, 128, 129, 129}, + {130, 130, 131, 132, 133, 134, 134}, + {135, 135, 136, 137, 138, 139, 139}, + {140, 140, 141, 142, 143, 144, 144}, + {145, 145, 146, 147, 148, 149, 149}, + {145, 145, 146, 147, 148, 149, 149}} + } + } + }); + + myPad->getOperator()->associateInput(0,myInput); + myPad->getOperator()->computeOutputDims(); + myPad->forward(); + // myPad->getOperator()->getOutput(0)->print(); + REQUIRE(*(myPad->getOperator()->getOutput(0)) == *myOutput); + } + + SECTION("Pad Reflect") { + std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Reflect); + myPad->getOperator()->setDatatype(DataType::Int32); + myPad->getOperator()->setBackend("cpu"); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW + { + { + {{ 0, 1, 2, 3, 4}, + { 5, 6, 7, 8, 9}, + { 10, 11, 12, 13, 14}, + { 15, 16, 17, 18, 19}, + { 20, 21, 22, 23, 24}}, + + {{ 25, 26, 27, 28, 29}, + { 30, 31, 32, 33, 34}, + { 35, 36, 37, 38, 39}, + { 40, 41, 42, 43, 44}, + { 45, 46, 47, 48, 49}}, + + {{ 50, 51, 52, 53, 54}, + { 55, 56, 57, 58, 59}, + { 60, 61, 62, 63, 64}, + { 65, 66, 67, 68, 69}, + { 70, 71, 72, 73, 74}} + }, + { + {{ 75, 76, 77, 78, 79}, + { 80, 81, 82, 83, 84}, + { 85, 86, 87, 88, 89}, + { 90, 91, 92, 93, 94}, + { 95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}} + } + } + }); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW + { + { + { + { 6, 5, 6, 7, 8, 9, 5}, + { 1, 0, 1, 2, 3, 4, 0}, + { 6, 5, 6, 7, 8, 9, 5}, + { 11, 10, 11, 12, 13, 14, 10}, + { 16, 15, 16, 17, 18, 19, 15}, + { 21, 20, 21, 22, 23, 24, 20}, + { 1, 0, 1, 2, 3, 4, 0} + }, + { + { 31, 30, 31, 32, 33, 34, 30}, + { 26, 25, 26, 27, 28, 29, 25}, + { 31, 30, 31, 32, 33, 34, 30}, + { 36, 35, 36, 37, 38, 39, 35}, + { 41, 40, 41, 42, 43, 44, 40}, + { 46, 45, 46, 47, 48, 49, 45}, + { 26, 25, 26, 27, 28, 29, 25} + }, + { + { 56, 55, 56, 57, 58, 59, 55}, + { 51, 50, 51, 52, 53, 54, 50}, + { 56, 55, 56, 57, 58, 59, 55}, + { 61, 60, 61, 62, 63, 64, 60}, + { 66, 65, 66, 67, 68, 69, 65}, + { 71, 70, 71, 72, 73, 74, 70}, + { 51, 50, 51, 52, 53, 54, 50} + } + }, + { + { + { 81, 80, 81, 82, 83, 84, 80}, + { 76, 75, 76, 77, 78, 79, 75}, + { 81, 80, 81, 82, 83, 84, 80}, + { 86, 85, 86, 87, 88, 89, 85}, + { 91, 90, 91, 92, 93, 94, 90}, + { 96, 95, 96, 97, 98, 99, 95}, + { 76, 75, 76, 77, 78, 79, 75} + }, + { + { 106, 105, 106, 107, 108, 109, 105}, + { 101, 100, 101, 102, 103, 104, 100}, + { 106, 105, 106, 107, 108, 109, 105}, + { 111, 110, 111, 112, 113, 114, 110}, + { 116, 115, 116, 117, 118, 119, 115}, + { 121, 120, 121, 122, 123, 124, 120}, + { 101, 100, 101, 102, 103, 104, 100} + }, + { + { 131, 130, 131, 132, 133, 134, 130}, + { 126, 125, 126, 127, 128, 129, 125}, + { 131, 130, 131, 132, 133, 134, 130}, + { 136, 135, 136, 137, 138, 139, 135}, + { 141, 140, 141, 142, 143, 144, 140}, + { 146, 145, 146, 147, 148, 149, 145}, + { 126, 125, 126, 127, 128, 129, 125} + } + } + } + }); + + myPad->getOperator()->associateInput(0,myInput); + myPad->getOperator()->computeOutputDims(); + myPad->forward(); + myPad->getOperator()->getOutput(0)->print(); + REQUIRE(*(myPad->getOperator()->getOutput(0)) == *myOutput); + } + + SECTION("Pad Wrap") { + std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Wrap); + myPad->getOperator()->setDatatype(DataType::Int32); + myPad->getOperator()->setBackend("cpu"); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW + { + { + {{ 0, 1, 2, 3, 4}, + { 5, 6, 7, 8, 9}, + { 10, 11, 12, 13, 14}, + { 15, 16, 17, 18, 19}, + { 20, 21, 22, 23, 24}}, + + {{ 25, 26, 27, 28, 29}, + { 30, 31, 32, 33, 34}, + { 35, 36, 37, 38, 39}, + { 40, 41, 42, 43, 44}, + { 45, 46, 47, 48, 49}}, + + {{ 50, 51, 52, 53, 54}, + { 55, 56, 57, 58, 59}, + { 60, 61, 62, 63, 64}, + { 65, 66, 67, 68, 69}, + { 70, 71, 72, 73, 74}} + }, + { + {{ 75, 76, 77, 78, 79}, + { 80, 81, 82, 83, 84}, + { 85, 86, 87, 88, 89}, + { 90, 91, 92, 93, 94}, + { 95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}} + } + } + }); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW + { + { + {{ 24, 20, 21, 22, 23, 24, 20}, + { 4, 0, 1, 2, 3, 4, 0}, + { 9, 5, 6, 7, 8, 9, 5}, + { 14, 10, 11, 12, 13, 14, 10}, + { 19, 15, 16, 17, 18, 19, 15}, + { 24, 20, 21, 22, 23, 24, 20}, + { 4, 0, 1, 2, 3, 4, 0}}, + + {{ 49, 45, 46, 47, 48, 49, 45}, + { 29, 25, 26, 27, 28, 29, 25}, + { 34, 30, 31, 32, 33, 34, 30}, + { 39, 35, 36, 37, 38, 39, 35}, + { 44, 40, 41, 42, 43, 44, 40}, + { 49, 45, 46, 47, 48, 49, 45}, + { 29, 25, 26, 27, 28, 29, 25}}, + + {{ 74, 70, 71, 72, 73, 74, 70}, + { 54, 50, 51, 52, 53, 54, 50}, + { 59, 55, 56, 57, 58, 59, 55}, + { 64, 60, 61, 62, 63, 64, 60}, + { 69, 65, 66, 67, 68, 69, 65}, + { 74, 70, 71, 72, 73, 74, 70}, + { 54, 50, 51, 52, 53, 54, 50}} + }, + { + {{ 99, 95, 96, 97, 98, 99, 95}, + { 79, 75, 76, 77, 78, 79, 75}, + { 84, 80, 81, 82, 83, 84, 80}, + { 89, 85, 86, 87, 88, 89, 85}, + { 94, 90, 91, 92, 93, 94, 90}, + { 99, 95, 96, 97, 98, 99, 95}, + { 79, 75, 76, 77, 78, 79, 75}}, + + {{124, 120, 121, 122, 123, 124, 120}, + {104, 100, 101, 102, 103, 104, 100}, + {109, 105, 106, 107, 108, 109, 105}, + {114, 110, 111, 112, 113, 114, 110}, + {119, 115, 116, 117, 118, 119, 115}, + {124, 120, 121, 122, 123, 124, 120}, + {104, 100, 101, 102, 103, 104, 100}}, + + {{149, 145, 146, 147, 148, 149, 145}, + {129, 125, 126, 127, 128, 129, 125}, + {134, 130, 131, 132, 133, 134, 130}, + {139, 135, 136, 137, 138, 139, 135}, + {144, 140, 141, 142, 143, 144, 140}, + {149, 145, 146, 147, 148, 149, 145}, + {129, 125, 126, 127, 128, 129, 125}} + } + } + }); + + myPad->getOperator()->associateInput(0,myInput); + myPad->getOperator()->computeOutputDims(); + myPad->forward(); + // myPad->getOperator()->getOutput(0)->print(); + REQUIRE(*(myPad->getOperator()->getOutput(0)) == *myOutput); + } +} \ No newline at end of file diff --git a/unit_tests/operator/Test_PaddedConv.cpp b/unit_tests/operator/Test_PaddedConv.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e41be85ab00faae1af7239c43b74a34f558a663c --- /dev/null +++ b/unit_tests/operator/Test_PaddedConv.cpp @@ -0,0 +1,319 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <cstdlib> +#include <memory> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/MetaOperator.hpp" +#include "aidge/operator/MetaOperatorDefs.hpp" + +#include "aidge/backend/cpu.hpp" + +using namespace Aidge; + +TEST_CASE("[cpu/operator] PaddedConv(forward)") { + SECTION("Classic Conv") { + std::shared_ptr<Node> myConv = PaddedConv(3,4,{3,3}, "myconv"); + myConv->getOperator()->setDatatype(DataType::Int32); + myConv->getOperator()->setBackend("cpu"); + std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> { + { + { + {{ 0, 1, 2}, + { 3, 4, 5}, + { 6, 7, 8}}, + {{ 9, 10, 11}, + { 12, 13, 14}, + { 15, 16, 17}}, + {{ 18, 19, 20}, + { 21, 22, 23}, + { 24, 25, 26}} + }, + { + {{ 27, 28, 29}, + { 30, 31, 32}, + { 33, 34, 35}}, + {{ 36, 37, 38}, + { 39, 40, 41}, + { 42, 43, 44}}, + {{ 45, 46, 47}, + { 48, 49, 50}, + { 51, 52, 53}} + }, + { + {{ 54, 55, 56}, + { 57, 58, 59}, + { 60, 61, 62}}, + {{ 63, 64, 65}, + { 66, 67, 68}, + { 69, 70, 71}}, + {{ 72, 73, 74}, + { 75, 76, 77}, + { 78, 79, 80}} + }, + { + {{ 81, 82, 83}, + { 84, 85, 86}, + { 87, 88, 89}}, + {{ 90, 91, 92}, + { 93, 94, 95}, + { 96, 97, 98}}, + {{ 99, 100, 101}, + {102, 103, 104}, + {105, 106, 107}} + } + } + }); + std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}}); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW + { + { + {{ 0, 1, 2, 3, 4}, + { 5, 6, 7, 8, 9}, + { 10, 11, 12, 13, 14}, + { 15, 16, 17, 18, 19}, + { 20, 21, 22, 23, 24}}, + + {{ 25, 26, 27, 28, 29}, + { 30, 31, 32, 33, 34}, + { 35, 36, 37, 38, 39}, + { 40, 41, 42, 43, 44}, + { 45, 46, 47, 48, 49}}, + + {{ 50, 51, 52, 53, 54}, + { 55, 56, 57, 58, 59}, + { 60, 61, 62, 63, 64}, + { 65, 66, 67, 68, 69}, + { 70, 71, 72, 73, 74}} + }, + { + {{ 75, 76, 77, 78, 79}, + { 80, 81, 82, 83, 84}, + { 85, 86, 87, 88, 89}, + { 90, 91, 92, 93, 94}, + { 95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}} + } + } + }); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> { + { + { + {{ 15226, 15577, 15928}, + { 16981, 17332, 17683}, + { 18736, 19087, 19438}}, + {{ 37818, 38898, 39978}, + { 43218, 44298, 45378}, + { 48618, 49698, 50778}}, + {{ 60426, 62235, 64044}, + { 69471, 71280, 73089}, + { 78516, 80325, 82134}}, + {{ 83016, 85554, 88092}, + { 95706, 98244, 100782}, + {108396, 110934, 113472}} + }, + { + {{ 41551, 41902, 42253}, + { 43306, 43657, 44008}, + { 45061, 45412, 45763}}, + {{118818, 119898, 120978}, + {124218, 125298, 126378}, + {129618, 130698, 131778}}, + {{196101, 197910, 199719}, + {205146, 206955, 208764}, + {214191, 216000, 217809}}, + {{273366, 275904, 278442}, + {286056, 288594, 291132}, + {298746, 301284, 303822}} + } + } + }); + + myConv->getOperator()->associateInput(0,myInput); + myConv->getOperator()->associateInput(1,myWeights); + myConv->getOperator()->associateInput(2,myBias); + myConv->getOperator()->computeOutputDims(); + myConv->forward(); + + REQUIRE(*(myConv->getOperator()->getOutput(0)) == *myOutput); + } + SECTION("test Padding") { + std::shared_ptr<Node> myConv = PaddedConv(3,4,{3,3}, "myconv", {1,1}, {1,1,1,1}); + myConv->getOperator()->setDatatype(DataType::Int32); + myConv->getOperator()->setBackend("cpu"); + std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> { + { + { + {{ 0, 1, 2}, + { 3, 4, 5}, + { 6, 7, 8}}, + {{ 9, 10, 11}, + { 12, 13, 14}, + { 15, 16, 17}}, + {{ 18, 19, 20}, + { 21, 22, 23}, + { 24, 25, 26}} + }, + { + {{ 27, 28, 29}, + { 30, 31, 32}, + { 33, 34, 35}}, + {{ 36, 37, 38}, + { 39, 40, 41}, + { 42, 43, 44}}, + {{ 45, 46, 47}, + { 48, 49, 50}, + { 51, 52, 53}} + }, + { + {{ 54, 55, 56}, + { 57, 58, 59}, + { 60, 61, 62}}, + {{ 63, 64, 65}, + { 66, 67, 68}, + { 69, 70, 71}}, + {{ 72, 73, 74}, + { 75, 76, 77}, + { 78, 79, 80}} + }, + { + {{ 81, 82, 83}, + { 84, 85, 86}, + { 87, 88, 89}}, + {{ 90, 91, 92}, + { 93, 94, 95}, + { 96, 97, 98}}, + {{ 99, 100, 101}, + {102, 103, 104}, + {105, 106, 107}} + } + } + }); + std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}}); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW + { + { + {{ 0, 1, 2, 3, 4}, + { 5, 6, 7, 8, 9}, + { 10, 11, 12, 13, 14}, + { 15, 16, 17, 18, 19}, + { 20, 21, 22, 23, 24}}, + + {{ 25, 26, 27, 28, 29}, + { 30, 31, 32, 33, 34}, + { 35, 36, 37, 38, 39}, + { 40, 41, 42, 43, 44}, + { 45, 46, 47, 48, 49}}, + + {{ 50, 51, 52, 53, 54}, + { 55, 56, 57, 58, 59}, + { 60, 61, 62, 63, 64}, + { 65, 66, 67, 68, 69}, + { 70, 71, 72, 73, 74}} + }, + { + {{ 75, 76, 77, 78, 79}, + { 80, 81, 82, 83, 84}, + { 85, 86, 87, 88, 89}, + { 90, 91, 92, 93, 94}, + { 95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}} + } + } + }); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,5,5> { + { + { + {{ 6895, 10225, 10486, 10747, 7063}, + { 10303, 15226, 15577, 15928, 10429}, + { 11518, 16981, 17332, 17683, 11554}, + { 12733, 18736, 19087, 19438, 12679}, + { 8047, 11791, 11998, 12205, 7927}}, + + {{ 15960, 24069, 24816, 25563, 17100}, + { 25119, 37818, 38898, 39978, 26703}, + { 28764, 43218, 44298, 45378, 30258}, + { 32409, 48618, 49698, 50778, 33813}, + { 21972, 32925, 33618, 34311, 22824}}, + + {{ 25041, 37929, 39162, 40395, 27153}, + { 39951, 60426, 62235, 64044, 42993}, + { 46026, 69471, 71280, 73089, 48978}, + { 52101, 78516, 80325, 82134, 54963}, + { 35913, 54075, 55254, 56433, 37737}}, + + {{ 34104, 51771, 53490, 55209, 37188}, + { 54765, 83016, 85554, 88092, 59265}, + { 63270, 95706, 98244, 100782, 67680}, + { 71775, 108396, 110934, 113472, 76095}, + { 49836, 75207, 76872, 78537, 52632}} + }, + { + {{ 20395, 29800, 30061, 30322, 19663}, + { 28528, 41551, 41902, 42253, 27304}, + { 29743, 43306, 43657, 44008, 28429}, + { 30958, 45061, 45412, 45763, 29554}, + { 18847, 27316, 27523, 27730, 17827}}, + + {{ 53760, 80094, 80841, 81588, 54000}, + { 79794, 118818, 119898, 120978, 80028}, + { 83439, 124218, 125298, 126378, 83583}, + { 87084, 129618, 130698, 131778, 87138}, + { 57072, 84900, 85593, 86286, 57024}}, + + {{ 87141, 130404, 131637, 132870, 88353}, + {131076, 196101, 197910, 199719, 132768}, + {137151, 205146, 206955, 208764, 138753}, + {143226, 214191, 216000, 217809, 144738}, + { 95313, 142500, 143679, 144858, 96237}}, + + {{120504, 180696, 182415, 184134, 122688}, + {182340, 273366, 275904, 278442, 185490}, + {190845, 286056, 288594, 291132, 193905}, + {199350, 298746, 301284, 303822, 202320}, + {133536, 200082, 201747, 203412, 135432}} + } + } + }); + + myConv->getOperator()->associateInput(0,myInput); + myConv->getOperator()->associateInput(1,myWeights); + myConv->getOperator()->associateInput(2,myBias); + myConv->getOperator()->computeOutputDims(); + myConv->forward(); + + REQUIRE(*(myConv->getOperator()->getOutput(0)) == *myOutput); + } +} diff --git a/unit_tests/operator/Test_PowImpl.cpp b/unit_tests/operator/Test_PowImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7293198f411510904ee73aced47b69dfc37374af --- /dev/null +++ b/unit_tests/operator/Test_PowImpl.cpp @@ -0,0 +1,203 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/Pow.hpp" + +#include "aidge/backend/cpu.hpp" + +#include <memory> + +using namespace Aidge; + +TEST_CASE("[cpu/operator] Pow(forward)") { + SECTION("2D Tensor by Singleton") { + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> { + { + {0.42139274, 0.51524192}, + {0.85247433, 0.13432795} + } + }); + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,1,1>{{2.0}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> { + { + {0.17757183, 0.26547423}, + {0.72671247, 0.01804400} + } + }); + + std::shared_ptr<Node> myPow = Pow(); + myPow->getOperator()->setDatatype(DataType::Float32); + myPow->getOperator()->setBackend("cpu"); + myPow->getOperator()->associateInput(0, input_1); + myPow->getOperator()->associateInput(1, input_2); + myPow->getOperator()->computeOutputDims(); + myPow->forward(); + + float* resPtr = static_cast<float*>(myPow->getOperator()->getOutput(0)->getImpl()->rawPtr()); + float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i< 4; ++i) { + REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + } + + } + + SECTION("3D Tensor by 1D Tensor") { + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array3D<float,2,2,3> { + { + {{0.87519985, 0.10536593, 0.20268351}, + {0.75532353, 0.95977652, 0.03897029}}, + + {{0.67554104, 0.35499334, 0.27741563}, + {0.94270861, 0.48397779, 0.35532343}} + } + }); + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float,3>{ + {0.39333701, 0.08719915, 0.16713941} + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<float,2,2,3> { + { + {{0.94891787, 0.82182676, 0.76584703}, + {0.89549923, 0.99642646, 0.58137459}}, + + {{0.85702944, 0.91364944, 0.80709606}, + {0.97706109, 0.93867886, 0.84118503}} + } + }); + + std::shared_ptr<Node> myPow = Pow(); + myPow->getOperator()->setDatatype(DataType::Float32); + myPow->getOperator()->setBackend("cpu"); + myPow->getOperator()->associateInput(0, input_1); + myPow->getOperator()->associateInput(1, input_2); + myPow->getOperator()->computeOutputDims(); + myPow->forward(); + + float* resPtr = static_cast<float*>(myPow->getOperator()->getOutput(0)->getImpl()->rawPtr()); + float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i< 12; ++i) { + REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + } + + } + + SECTION("2D Tensors") { + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> { + { + {0.79780143, 0.49322051}, + {0.84239346, 0.83737719} + } + }); + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,2,2>{ + { + {0.59088874, 0.78858775}, + {0.42879432, 0.17615074} + } + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> { + { + {0.87504572, 0.57271165}, + {0.92909741, 0.96922028} + } + }); + + std::shared_ptr<Node> myPow = Pow(); + myPow->getOperator()->setDatatype(DataType::Float32); + myPow->getOperator()->setBackend("cpu"); + myPow->getOperator()->associateInput(0, input_1); + myPow->getOperator()->associateInput(1, input_2); + myPow->getOperator()->computeOutputDims(); + myPow->forward(); + + float* resPtr = static_cast<float*>(myPow->getOperator()->getOutput(0)->getImpl()->rawPtr()); + float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i< 4; ++i) { + REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + } + + } + + SECTION("4D Tensor") { + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<float,2,3,3,3> { + { + { + {{0.80191749, 0.45388508, 0.86550850}, + {0.47226250, 0.55809456, 0.59451854}, + {0.45497441, 0.02653158, 0.44041735}}, + {{0.30726379, 0.73146582, 0.46462774}, + {0.30268502, 0.78075552, 0.65154958}, + {0.91332406, 0.62448132, 0.53238851}}, + {{0.13917381, 0.43061519, 0.30198061}, + {0.12880909, 0.08995515, 0.29609048}, + {0.46449280, 0.47559714, 0.24193990}} + }, + { + {{0.87349969, 0.51625526, 0.16921073}, + {0.95035923, 0.10066575, 0.56729180}, + {0.84686232, 0.05965143, 0.03635806}}, + {{0.61107808, 0.59954077, 0.45627308}, + {0.84114522, 0.77186388, 0.37427086}, + {0.13415480, 0.00617349, 0.84260136}}, + {{0.55090177, 0.57292056, 0.29158932}, + {0.67131883, 0.96988875, 0.69545972}, + {0.80979776, 0.18238151, 0.19527155}} + } + } + }); + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,1,1>{{2.0}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,2,3,3,3> { + { + { + {{6.43071651e-01, 2.06011668e-01, 7.49104977e-01}, + {2.23031864e-01, 3.11469525e-01, 3.53452295e-01}, + {2.07001716e-01, 7.03924568e-04, 1.93967447e-01}}, + + {{9.44110379e-02, 5.35042226e-01, 2.15878934e-01}, + {9.16182250e-02, 6.09579206e-01, 4.24516857e-01}, + {8.34160864e-01, 3.89976919e-01, 2.83437520e-01}}, + + {{1.93693489e-02, 1.85429439e-01, 9.11922902e-02}, + {1.65917836e-02, 8.09192937e-03, 8.76695737e-02}, + {2.15753555e-01, 2.26192638e-01, 5.85349165e-02}} + }, + { + {{7.63001740e-01, 2.66519487e-01, 2.86322720e-02}, + {9.03182685e-01, 1.01335924e-02, 3.21819991e-01}, + {7.17175782e-01, 3.55829368e-03, 1.32190844e-03}}, + + {{3.73416424e-01, 3.59449148e-01, 2.08185121e-01}, + {7.07525253e-01, 5.95773816e-01, 1.40078679e-01}, + {1.79975089e-02, 3.81119971e-05, 7.09977031e-01}}, + + {{3.03492755e-01, 3.28237981e-01, 8.50243345e-02}, + {4.50668961e-01, 9.40684199e-01, 4.83664215e-01}, + {6.55772448e-01, 3.32630165e-02, 3.81309800e-02}} + } + } + }); + + std::shared_ptr<Node> myPow = Pow(); + myPow->getOperator()->setDatatype(DataType::Float32); + myPow->getOperator()->setBackend("cpu"); + myPow->getOperator()->associateInput(0, input_1); + myPow->getOperator()->associateInput(1, input_2); + myPow->getOperator()->computeOutputDims(); + myPow->forward(); + + float* resPtr = static_cast<float*>(myPow->getOperator()->getOutput(0)->getImpl()->rawPtr()); + float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i< 54; ++i) { + REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + } + } +} \ No newline at end of file diff --git a/unit_tests/operator/Test_SqrtImpl.cpp b/unit_tests/operator/Test_SqrtImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cf17499aba50359547218adc6b3938176e729ed3 --- /dev/null +++ b/unit_tests/operator/Test_SqrtImpl.cpp @@ -0,0 +1,121 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/Sqrt.hpp" + +#include "aidge/backend/cpu.hpp" + +#include <memory> + +using namespace Aidge; + +TEST_CASE("[cpu/operator] Sqrt(forward)") { + SECTION("2D Tensor") { + std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array2D<float,2,2> { + { + {16.00000000, 0.62226844}, + { 0.00000000, 1.84539008} + } + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> { + { + {4.00000000, 0.78883994}, + {0.00000000, 1.35845140} + } + }); + + std::shared_ptr<Node> mySqrt = Sqrt(); + mySqrt->getOperator()->setDatatype(DataType::Float32); + mySqrt->getOperator()->setBackend("cpu"); + mySqrt->getOperator()->associateInput(0,input); + mySqrt->getOperator()->computeOutputDims(); + mySqrt->forward(); + + float* resPtr = static_cast<float*>(mySqrt->getOperator()->getOutput(0)->getImpl()->rawPtr()); + float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i< 4; ++i) { + REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + } + + } + + SECTION("4D Tensor") { + std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array4D<float,2,3,3,3> { + { + { + {{0.06218481, 0.46850157, 0.60914326}, + {0.57470602, 0.09943211, 0.59992820}, + {0.99623793, 0.54931718, 0.89343822}}, + {{0.75176072, 0.38237786, 0.84824580}, + {0.10619396, 0.11959118, 0.93499404}, + {0.65563291, 0.02913034, 0.17093092}}, + {{0.36303985, 0.92073035, 0.79146117}, + {0.88962847, 0.94561219, 0.92033130}, + {0.52903181, 0.13397896, 0.76086712}} + }, + { + {{0.31242222, 0.80526417, 0.48411584}, + {0.84375203, 0.65408552, 0.55028963}, + {0.77546734, 0.06203610, 0.83163154}}, + {{0.46342927, 0.53631741, 0.39145601}, + {0.14204198, 0.84214240, 0.94185621}, + {0.05068624, 0.99889028, 0.38464361}}, + {{0.37591159, 0.51769549, 0.30288595}, + {0.96883464, 0.35154045, 0.55648762}, + {0.13022375, 0.73467660, 0.02705121}} + } + } + }); + + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,2,3,3,3> { + { + { + {{0.24936883, 0.6844717, 0.7804763}, + {0.75809366, 0.31532857, 0.7745503}, + {0.9981172, 0.7411593, 0.9452186}}, + {{0.86704135, 0.6183671, 0.9210026}, + {0.32587415, 0.34581956, 0.9669509}, + {0.80971164, 0.17067613, 0.41343793}}, + {{0.60252786, 0.9595469, 0.88964105}, + {0.9432012, 0.97242594, 0.95933896}, + {0.7273457, 0.36603138, 0.87227696}} + }, + { + {{0.55894744, 0.89736515, 0.69578433}, + {0.91855973, 0.8087555, 0.7418151}, + {0.88060623, 0.24907047, 0.91193837}}, + {{0.6807564, 0.73233694, 0.6256645}, + {0.37688458, 0.9176832, 0.9704928}, + {0.22513604, 0.99944496, 0.62019646}}, + {{0.6131163, 0.7195106, 0.5503507}, + {0.984294, 0.59290844, 0.745981}, + {0.3608653, 0.8571328, 0.16447252}} + } + } + }); + + std::shared_ptr<Node> mySqrt = Sqrt(); + mySqrt->getOperator()->setDatatype(DataType::Float32); + mySqrt->getOperator()->setBackend("cpu"); + mySqrt->getOperator()->associateInput(0,input); + mySqrt->getOperator()->computeOutputDims(); + mySqrt->forward(); + + float* resPtr = static_cast<float*>(mySqrt->getOperator()->getOutput(0)->getImpl()->rawPtr()); + float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i< 54; ++i) { + REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + } + } +} \ No newline at end of file diff --git a/unit_tests/operator/Test_SubImpl.cpp b/unit_tests/operator/Test_SubImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d741602cf02958a88bb41bbd2927577027acb069 --- /dev/null +++ b/unit_tests/operator/Test_SubImpl.cpp @@ -0,0 +1,129 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/Sub.hpp" + +#include "aidge/backend/cpu.hpp" + +#include <memory> + +using namespace Aidge; + +TEST_CASE("[cpu/operator] Sub(forward)") { + SECTION("2D Tensor by Singleton") { + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> { + { + {0.34234560, 0.92812711}, + {0.73706615, 0.69953883} + } + }); + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,1,1>{{2.5}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> { + { + {-2.15765429, -1.57187295}, + {-1.76293385, -1.80046117} + } + }); + + std::shared_ptr<Node> mySub = Sub(); + mySub->getOperator()->setDatatype(DataType::Float32); + mySub->getOperator()->setBackend("cpu"); + mySub->getOperator()->associateInput(0, input_1); + mySub->getOperator()->associateInput(1, input_2); + mySub->getOperator()->computeOutputDims(); + mySub->forward(); + + float* resPtr = static_cast<float*>(mySub->getOperator()->getOutput(0)->getImpl()->rawPtr()); + float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i< 4; ++i) { + REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + } + + } + + SECTION("2D Tensors") { + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> { + { + {0.34234560, 0.92812711}, + {0.73706615, 0.69953883} + } + }); + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,2,2>{ + { + {0.61729127, 0.83004373}, + {0.72002089, 0.52473849} + } + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> { + { + {-0.27494568, 0.09808338}, + {0.01704526, 0.17480034} + } + }); + + std::shared_ptr<Node> mySub = Sub(); + mySub->getOperator()->setDatatype(DataType::Float32); + mySub->getOperator()->setBackend("cpu"); + mySub->getOperator()->associateInput(0, input_1); + mySub->getOperator()->associateInput(1, input_2); + mySub->getOperator()->computeOutputDims(); + mySub->forward(); + + float* resPtr = static_cast<float*>(mySub->getOperator()->getOutput(0)->getImpl()->rawPtr()); + float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i< 4; ++i) { + REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + } + + } + + SECTION("3D Tensor by 1D Tensor") { + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array3D<float,2,2,3> { + { + {{0.84181279, 0.20655948, 0.09750116}, + {0.37723488, 0.73120135, 0.04666907}}, + + {{0.91483921, 0.93985939, 0.58823180}, + {0.39963132, 0.67879969, 0.33209187}} + } + }); + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float,3>{ + {0.04784805, 0.91903114, 0.38606840} + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<float,2,2,3> { + { + {{0.79396474, -0.71247166, -0.28856725}, + {0.32938683, -0.18782979, -0.33939934}}, + + {{0.86699116, 0.02082825, 0.20216340}, + {0.35178328, -0.24023145, -0.05397654}} + } + }); + + std::shared_ptr<Node> mySub = Sub(); + mySub->getOperator()->setDatatype(DataType::Float32); + mySub->getOperator()->setBackend("cpu"); + mySub->getOperator()->associateInput(0, input_1); + mySub->getOperator()->associateInput(1, input_2); + mySub->getOperator()->computeOutputDims(); + mySub->forward(); + + float* resPtr = static_cast<float*>(mySub->getOperator()->getOutput(0)->getImpl()->rawPtr()); + float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i< 12; ++i) { + REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + } + + } +} \ No newline at end of file diff --git a/unit_tests/Test_Scheduler.cpp b/unit_tests/scheduler/Test_Scheduler.cpp similarity index 100% rename from unit_tests/Test_Scheduler.cpp rename to unit_tests/scheduler/Test_Scheduler.cpp