diff --git a/.gitlab/ci/build.gitlab-ci.yml b/.gitlab/ci/build.gitlab-ci.yml index e996def7984908ea751c5e033814e81b18a3d51b..b9fdd937b358b714fd83a36d8417ad2b417d0385 100644 --- a/.gitlab/ci/build.gitlab-ci.yml +++ b/.gitlab/ci/build.gitlab-ci.yml @@ -136,70 +136,70 @@ build:ubuntu_python: paths: - venv/ -build:windows_cpp: - stage: build - needs: [] - tags: - - windows - - image: buildtools - before_script: - # Install Chocolatey - - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1')) - # Install dependencies - - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y - - choco install git -Y - - choco install python -Y - # Update PATH - - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User") - script: - # Download dependencies - # aidge_core - - 'curl "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:windows_cpp" -o build_artifacts.zip' - - Expand-Archive -Path .\build_artifacts.zip -DestinationPath . -Force - - Remove-Item .\build_cpp\ -Recurse - - - $env:CMAKE_PREFIX_PATH = '../install_cpp' - - mkdir -p build_cpp - - cd build_cpp - - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug .. - - cmake --build . -j2 - - cmake --install . --config Debug - - artifacts: - expire_in: 1 week - paths: - - build_cpp/ - - install_cpp/ - -build:windows_python: - stage: build - needs: [] - tags: - - windows - - image: buildtools - before_script: - # Install Chocolatey - - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1')) - # Install dependencies - - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y - - choco install git -Y - - choco install python -Y - # Update PATH - - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User") - script: - # Download dependencies - # aidge_core (Python) - - 'curl "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:windows_python" -o build_artifacts.zip' - - Expand-Archive -Path .\build_artifacts.zip -DestinationPath . -Force - - - python -m pip install virtualenv - - virtualenv venv - - venv\Scripts\Activate.ps1 - - python -m pip install -r requirements.txt - - python -m pip install . - artifacts: - expire_in: 1 week - paths: - - venv/ +# build:windows_cpp: +# stage: build +# needs: [] +# tags: +# - windows + +# image: buildtools +# before_script: +# # Install Chocolatey +# - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1')) +# # Install dependencies +# - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y +# - choco install git -Y +# - choco install python -Y +# # Update PATH +# - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User") +# script: +# # Download dependencies +# # aidge_core +# - 'curl "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:windows_cpp" -o build_artifacts.zip' +# - Expand-Archive -Path .\build_artifacts.zip -DestinationPath . -Force +# - Remove-Item .\build_cpp\ -Recurse + +# - $env:CMAKE_PREFIX_PATH = '../install_cpp' +# - mkdir -p build_cpp +# - cd build_cpp +# - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug .. +# - cmake --build . -j2 +# - cmake --install . --config Debug + +# artifacts: +# expire_in: 1 week +# paths: +# - build_cpp/ +# - install_cpp/ + +# build:windows_python: +# stage: build +# needs: [] +# tags: +# - windows + +# image: buildtools +# before_script: +# # Install Chocolatey +# - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1')) +# # Install dependencies +# - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y +# - choco install git -Y +# - choco install python -Y +# # Update PATH +# - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User") +# script: +# # Download dependencies +# # aidge_core (Python) +# - 'curl "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:windows_python" -o build_artifacts.zip' +# - Expand-Archive -Path .\build_artifacts.zip -DestinationPath . -Force + +# - python -m pip install virtualenv +# - virtualenv venv +# - venv\Scripts\Activate.ps1 +# - python -m pip install -r requirements.txt +# - python -m pip install . +# artifacts: +# expire_in: 1 week +# paths: +# - venv/ diff --git a/.gitlab/ci/test.gitlab-ci.yml b/.gitlab/ci/test.gitlab-ci.yml index 05f567dd7430b0d3a801612ca5353a39288285d2..8f6b1e54109c4c2dcfa026fd477a93b6c0a1c641 100644 --- a/.gitlab/ci/test.gitlab-ci.yml +++ b/.gitlab/ci/test.gitlab-ci.yml @@ -27,23 +27,23 @@ test:ubuntu_python: reports: junit: ${CI_PROJECT_NAME}/xmlrunner-results.xml -test:windows_cpp: - stage: test - needs: ["build:windows_cpp"] - tags: - - windows - image: buildtools - before_script: - # Install Chocolatey - - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1')) - # Install dependencies - - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y - - choco install python -Y - # Update PATH - - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User") - script: - - cd build_cpp - - ctest --output-junit ctest-results.xml --output-on-failure - artifacts: - reports: - junit: build_cpp/ctest-results.xml +# test:windows_cpp: +# stage: test +# needs: ["build:windows_cpp"] +# tags: +# - windows +# image: buildtools +# before_script: +# # Install Chocolatey +# - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1')) +# # Install dependencies +# - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y +# - choco install python -Y +# # Update PATH +# - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User") +# script: +# - cd build_cpp +# - ctest --output-junit ctest-results.xml --output-on-failure +# artifacts: +# reports: +# junit: build_cpp/ctest-results.xml diff --git a/CHANGELOG b/CHANGELOG new file mode 100644 index 0000000000000000000000000000000000000000..82e90519cc6546e5fa2c2dfa76bc32893d7cad64 --- /dev/null +++ b/CHANGELOG @@ -0,0 +1,3 @@ +# Version 0.1.0 (January 23, 2024) + +Initial release diff --git a/aidge_backend_cpu/unit_tests/test_tensor.py b/aidge_backend_cpu/unit_tests/test_tensor.py index 438b6acd51791a52c9e308fb1aceaefb2a45fb29..37531b43cf7755dfb760e575450b70bfa9a6ff68 100644 --- a/aidge_backend_cpu/unit_tests/test_tensor.py +++ b/aidge_backend_cpu/unit_tests/test_tensor.py @@ -16,7 +16,7 @@ class test_tensor(unittest.TestCase): self.assertTrue("cpu" in aidge_core.Tensor.get_available_backends()) def test_numpy_int_to_tensor(self): - np_array = np.arange(9).reshape(1,1,3,3) + np_array = np.arange(9).reshape(1,1,3,3).astype(np.int32) # Numpy -> Tensor t = aidge_core.Tensor(np_array) self.assertEqual(t.dtype(), aidge_core.DataType.Int32) @@ -35,6 +35,16 @@ class test_tensor(unittest.TestCase): for i,j in zip(t.dims(), nnarray.shape): self.assertEqual(i,j) + def test_numpy_int64_to_tensor(self): + np_array = np.arange(9).reshape(1,1,3,3).astype(np.int64) + # Numpy -> Tensor + t = aidge_core.Tensor(np_array) + self.assertEqual(t.dtype(), aidge_core.DataType.Int64) + for i_t, i_n in zip(t, np_array.flatten()): + self.assertTrue(i_t == i_n) + for i,j in zip(t.dims(), np_array.shape): + self.assertEqual(i,j) + def test_numpy_float_to_tensor(self): t = aidge_core.Tensor() np_array = np.random.rand(1, 1, 3, 3).astype(np.float32) @@ -49,7 +59,7 @@ class test_tensor(unittest.TestCase): def test_get_set(self): dims = [2,2,2] - np_array = np.arange(8).reshape(dims) + np_array = np.arange(8).reshape(dims).astype(np.int32) # Numpy -> Tensor t = aidge_core.Tensor(np_array) for i in range(8): diff --git a/include/aidge/backend/cpu/data/TensorImpl.hpp b/include/aidge/backend/cpu/data/TensorImpl.hpp index 012ff5af1c15e73fe76114a23ec62f9ef023bce2..c451b4a5beccacb7980c834d56b979c1b76cdd3f 100644 --- a/include/aidge/backend/cpu/data/TensorImpl.hpp +++ b/include/aidge/backend/cpu/data/TensorImpl.hpp @@ -1,29 +1,50 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + #ifndef AIDGE_CPU_DATA_TENSORIMPL_H_ #define AIDGE_CPU_DATA_TENSORIMPL_H_ #include "aidge/backend/TensorImpl.hpp" #include "aidge/data/Tensor.hpp" +#include "aidge/data/half.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" +#include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/future_std/span.hpp" namespace Aidge { + template <class T> class TensorImpl_cpu : public TensorImpl { - private: +private: const Tensor &mTensor; // Impl needs to access Tensor information, but is not // supposed to change it! - std::vector<T> mData; + /// Pointer to the data and its capacity + future_std::span<T> mData; + /// If this instance own the data, std::unique_ptr manages it + std::unique_ptr<T[]> mDataOwner; - public: +public: static constexpr const char *Backend = "cpu"; TensorImpl_cpu(const Tensor &tensor) : TensorImpl(Backend), mTensor(tensor) {} bool operator==(const TensorImpl &otherImpl) const override final { + const auto& typedOtherImpl = reinterpret_cast<const TensorImpl_cpu<T> &>(otherImpl); + AIDGE_INTERNAL_ASSERT(typedOtherImpl.size() >= mTensor.size()); + std::size_t i = 0; for (; i < mTensor.size() && - mData[i] == reinterpret_cast<const TensorImpl_cpu<T> &>(otherImpl).data()[i]; - ++i) { + *(mData.data()+i) == *static_cast<const T*>(typedOtherImpl.rawPtr(i)); + ++i) { } return i == mTensor.size(); } @@ -32,37 +53,130 @@ class TensorImpl_cpu : public TensorImpl { return std::make_unique<TensorImpl_cpu<T>>(tensor); } - // native interface - const std::vector<T> &data() const { return mData; } + inline std::size_t size() const noexcept override final { return mData.size(); } + inline std::size_t scalarSize() const noexcept override final { return sizeof(T); } - std::size_t scalarSize() const override { return sizeof(T); } + void setDevice(DeviceIdx_t device) override final { + AIDGE_ASSERT(device == 0, "device cannot be != 0 for CPU backend"); + } - void copy(const void *src, NbElts_t length) override { + void copy(const void *src, NbElts_t length, NbElts_t offset = 0) override final { + AIDGE_ASSERT(length <= mData.size() || length <= mTensor.size(), "copy length is above capacity"); std::copy(static_cast<const T *>(src), static_cast<const T *>(src) + length, - static_cast<T *>(rawPtr())); + static_cast<T *>(rawPtr()) + offset); + } + + void copyCast(const void *src, NbElts_t length, const DataType srcDt) override final { + if (length == 0) { + return; + } + + AIDGE_ASSERT(length <= mData.size() || length <= mTensor.size(), "copy length is above capacity"); + switch (srcDt) + { + case DataType::Float64: + std::copy(static_cast<const double*>(src), static_cast<const double*>(src) + length, + static_cast<T *>(rawPtr())); + break; + case DataType::Float32: + std::copy(static_cast<const float*>(src), static_cast<const float*>(src) + length, + static_cast<T *>(rawPtr())); + break; + case DataType::Float16: + std::copy(static_cast<const half_float::half*>(src), static_cast<const half_float::half*>(src) + length, + static_cast<T *>(rawPtr())); + break; + case DataType::Int64: + std::copy(static_cast<const int64_t*>(src), static_cast<const int64_t*>(src) + length, + static_cast<T *>(rawPtr())); + break; + case DataType::UInt64: + std::copy(static_cast<const uint64_t*>(src), static_cast<const uint64_t*>(src) + length, + static_cast<T *>(rawPtr())); + break; + case DataType::Int32: + std::copy(static_cast<const int32_t*>(src), static_cast<const int32_t*>(src) + length, + static_cast<T *>(rawPtr())); + break; + case DataType::UInt32: + std::copy(static_cast<const uint32_t*>(src), static_cast<const uint32_t*>(src) + length, + static_cast<T *>(rawPtr())); + break; + case DataType::Int16: + std::copy(static_cast<const int16_t*>(src), static_cast<const int16_t*>(src) + length, + static_cast<T *>(rawPtr())); + break; + case DataType::UInt16: + std::copy(static_cast<const uint16_t*>(src), static_cast<const uint16_t*>(src) + length, + static_cast<T *>(rawPtr())); + break; + case DataType::Int8: + std::copy(static_cast<const int8_t*>(src), static_cast<const int8_t*>(src) + length, + static_cast<T *>(rawPtr())); + break; + case DataType::UInt8: + std::copy(static_cast<const uint8_t*>(src), static_cast<const uint8_t*>(src) + length, + static_cast<T *>(rawPtr())); + break; + default: + AIDGE_THROW_OR_ABORT(std::runtime_error, "Unsupported data type."); + break; + } } - void *rawPtr() override { - lazyInit(mData); - return mData.data(); + void copyFromDevice(const void *src, NbElts_t length, const std::pair<std::string, DeviceIdx_t>& device) override final { + AIDGE_ASSERT(device.first == Backend, "backend must match"); + AIDGE_ASSERT(device.second == 0, "device cannot be != 0 for CPU backend"); + copy(src, length); + } + + inline void copyFromHost(const void *src, NbElts_t length) override final { + copy(src, length); + } + + void copyToHost(void *dst, NbElts_t length) const override final { + AIDGE_ASSERT(length <= mData.size() || length <= mTensor.size(), "copy length is above capacity"); + const T* src = static_cast<const T*>(rawPtr()); + std::copy(static_cast<const T *>(src), static_cast<const T *>(src) + length, + static_cast<T *>(dst)); + } + + void *rawPtr(NbElts_t offset = 0) override final { + lazyInit(); + return (mData.data() + offset); }; - void* getRaw(std::size_t idx){ - return static_cast<void*>(static_cast<T *>(rawPtr()) + idx); - }; + const void *rawPtr(NbElts_t offset = 0) const override final { + AIDGE_ASSERT(mData.size() >= mTensor.size(), "accessing uninitialized const rawPtr"); + return (mData.data() + offset); + }; - virtual ~TensorImpl_cpu() = default; + void *hostPtr(NbElts_t offset = 0) override final { + lazyInit(); + return (mData.data() + offset); + }; - void setRawPtr(void *ptr) override final { - T *newPtr = static_cast<T *>(ptr); - mData = std::vector<T>(newPtr, newPtr + mTensor.size()); + const void *hostPtr(NbElts_t offset = 0) const override final { + AIDGE_ASSERT(mData.size() >= mTensor.size(), "accessing uninitialized const hostPtr"); + return (mData.data() + offset); }; - private: - void lazyInit(std::vector<T> &data) { - assert(mTensor.dataType() == NativeType<T>::type); + void setRawPtr(void *ptr, NbElts_t length) override final { + AIDGE_ASSERT(length >= mTensor.size(), "trying to set raw pointer of insufficient capacity"); + mData = future_std::span<T>(static_cast<T *>(ptr), length); + mDataOwner.reset(); + }; - if (data.size() != mTensor.size()) data.resize(mTensor.size()); + virtual ~TensorImpl_cpu() = default; + +private: + void lazyInit() { + if (mData.size() < mTensor.size()) { + // Need more data, a re-allocation will occur + AIDGE_ASSERT(mData.empty() || mDataOwner != nullptr, "trying to enlarge non-owned data"); + mDataOwner.reset(new T[mTensor.size()]); + mData = future_std::span<T>(mDataOwner.get(), mTensor.size()); + } } }; @@ -71,8 +185,12 @@ static Registrar<Tensor> registrarTensorImpl_cpu_Float64( {"cpu", DataType::Float64}, Aidge::TensorImpl_cpu<double>::create); static Registrar<Tensor> registrarTensorImpl_cpu_Float32( {"cpu", DataType::Float32}, Aidge::TensorImpl_cpu<float>::create); +static Registrar<Tensor> registrarTensorImpl_cpu_Float16( + {"cpu", DataType::Float16}, Aidge::TensorImpl_cpu<half_float::half>::create); static Registrar<Tensor> registrarTensorImpl_cpu_Int32( {"cpu", DataType::Int32}, Aidge::TensorImpl_cpu<int>::create); +static Registrar<Tensor> registrarTensorImpl_cpu_Int64( + {"cpu", DataType::Int64}, Aidge::TensorImpl_cpu<long>::create); } // namespace } // namespace Aidge diff --git a/include/aidge/backend/cpu/operator/AddImpl.hpp b/include/aidge/backend/cpu/operator/AddImpl.hpp index f5974f1509edd35ee1850ba707e6c23cd68f2b52..57669c628b4fa650f137c2b28c8c0a4584bf6c35 100644 --- a/include/aidge/backend/cpu/operator/AddImpl.hpp +++ b/include/aidge/backend/cpu/operator/AddImpl.hpp @@ -39,19 +39,7 @@ public: return std::make_unique<AddImpl_cpu>(op); } -public: - NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; - NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; - - NbElts_t getRequiredMemory(const IOIndex_t outputIdx, const std::vector<DimSize_t>& /*inputsSize*/) const override final; - - NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; - - NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - - void updateConsummerProducer() override final; - void forward() override; }; diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp index 5598cc9cdfd463b6e40e6801b74203b911a318e6..d6950e11e935a3f6d5548148d1c393a5340af224 100644 --- a/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp @@ -61,6 +61,7 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs, for (std::size_t ch = 0; ch < dims[1]; ++ch) { const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize; const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; + std::fill(output + oIndex, output+(oIndex+oxSize*oySize), 0); for (std::size_t ox = 0; ox < oxSize; ++ox) { const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]); const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); diff --git a/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp index cbd784698fcce5152c0bb42a192c327abb2b10dd..83607f280f53e5e477db7d8bbbbd1634dd9c584d 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp @@ -14,6 +14,7 @@ #include "aidge/utils/Registrar.hpp" +#include "aidge/data/half.hpp" #include "aidge/backend/cpu/operator/ConvImpl.hpp" #include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" @@ -151,6 +152,9 @@ namespace { static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float32( {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>); +static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float16( + {DataType::Float16, DataType::Float16, DataType::Float16, DataType::Float16}, + Aidge::ConvImpl2D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>); static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Int32( {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, Aidge::ConvImpl2D_cpu_forward_kernel<int, int, int, int>); diff --git a/include/aidge/backend/cpu/operator/SliceImpl.hpp b/include/aidge/backend/cpu/operator/SliceImpl.hpp index 80e2f0fcef83a369561095f8e55a437f7acc9675..1cba5906064c51a4f0da2f1f3682b0828a080d43 100644 --- a/include/aidge/backend/cpu/operator/SliceImpl.hpp +++ b/include/aidge/backend/cpu/operator/SliceImpl.hpp @@ -13,7 +13,6 @@ #define AIDGE_CPU_OPERATOR_SLICEIMPL_H_ #include <memory> -#include <tuple> #include <vector> #include "aidge/backend/OperatorImpl.hpp" @@ -39,7 +38,6 @@ class SliceImplBackward_cpu const void*, void*)> {}; - class SliceImpl_cpu : public OperatorImpl { public: SliceImpl_cpu(const Slice_Op& op) : OperatorImpl(op) {} @@ -48,7 +46,6 @@ public: return std::make_unique<SliceImpl_cpu>(op); } -public: NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final; NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; NbElts_t getRequiredMemory(const IOIndex_t outputIdx, @@ -58,14 +55,12 @@ public: void updateConsummerProducer() override final; void forward() override; - void backward() override; }; - namespace { static Registrar<Slice_Op> registrarSliceImpl_cpu("cpu", Aidge::SliceImpl_cpu::create); -} // namespace +} } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H_ */ \ No newline at end of file +#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp index 7eb4b9dc2cb8dddc8b7fdaf4d63b8f1d39d879b0..9f08fab758a1d8c717ccb5f0a0357f94fd86e5e4 100644 --- a/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp @@ -12,57 +12,73 @@ #ifndef AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ #define AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ -#include "aidge/utils/Registrar.hpp" -#include "aidge/operator/Slice.hpp" -#include "aidge/backend/cpu/operator/SliceImpl.hpp" -#include <vector> #include <cstddef> +#include <vector> +#include "aidge/backend/cpu/operator/SliceImpl.hpp" #include "aidge/data/Data.hpp" +#include "aidge/operator/Slice.hpp" +#include "aidge/utils/Registrar.hpp" namespace Aidge { template <class I> void SliceImpl_cpu_forward_kernel(const typename Slice_Op::Attrs& attrs, - const std::vector<std::size_t> inputDims, - const void* input_, - void* output_) { + const std::vector<std::size_t> inputDims, + const void* input_, + void* output_) { + std::vector<std::size_t> slicedDims = inputDims; + + std::size_t beginning = 0; + DimSize_t nbAxes = std::get<2>(attrs).size(); + for (std::size_t i = 0; i < nbAxes; ++i) { + // For each slice operation get the params and cast them to size_t + const std::int64_t axis_ = std::get<2>(attrs)[i]; + const std::int64_t start_ = std::get<0>(attrs)[i]; + const std::int64_t end_ = std::get<1>(attrs)[i]; + const std::size_t axis = axis_ >= 0 ? axis_ : static_cast<std::size_t>(axis_ + static_cast<std::int32_t>(inputDims.size())); + const std::size_t start = start_ >= 0 ? start_ : start_ + inputDims[axis]; + const std::size_t end = end_ >= 0 ? end_ : end_ + inputDims[axis]; + std::size_t stride = 1; + for (std::size_t j = inputDims.size() - 1; j > axis; --j) stride *= inputDims[j]; + beginning += start * stride; + const std::size_t sliceLength = end - start + 1; + slicedDims[axis] = sliceLength; + } - const I* input = static_cast<const I*>(input_) + std::get<0>(attrs); + const I* input = static_cast<const I*>(input_) + beginning; I* output = static_cast<I*>(output_); - const std::vector<std::size_t> slicedDims = std::get<1>(attrs); const std::size_t nbDims = slicedDims.size(); - // for inputDims = {4,5,5,3} & slicedDims = {3,2,2,1}, substractDims = {1,5,5,3} + // for inputDims = {4,5,5,3} & slicedDims = {3,2,2,1}, substractDims = {1,5,5,3} std::vector<std::size_t> substractedDims = std::vector<std::size_t>(nbDims); for (std::size_t i = 0; i < nbDims; ++i) { substractedDims[i] = inputDims[i] - slicedDims[i]; } - // for slicedDims = {3,2,2,1}, prodSlicedDims = {12,4,2,1} + // for slicedDims = {3,2,2,1}, prodSlicedDims = {12,4,2,1} std::vector<std::size_t> prodSlicedDims = std::vector<std::size_t>(nbDims); - std::vector<std::size_t> prodInputDims = std::vector<std::size_t>(nbDims+1); - prodSlicedDims[nbDims - 1] = slicedDims[nbDims - 1]; - prodInputDims[nbDims - 1] = inputDims[nbDims - 1]; - prodInputDims[nbDims] = 1; - for (std::size_t i = 2; i <= nbDims; ++i) { - prodSlicedDims[nbDims - i] = prodSlicedDims[nbDims - i + 1]*slicedDims[nbDims - i]; - prodInputDims[nbDims - i] = prodInputDims[nbDims - i + 1]*inputDims[nbDims - i]; - } + std::vector<std::size_t> prodInputDims = std::vector<std::size_t>(nbDims + 1); + prodSlicedDims[nbDims - 1] = slicedDims[nbDims - 1]; + prodInputDims[nbDims - 1] = inputDims[nbDims - 1]; + prodInputDims[nbDims] = 1; + for (std::size_t i = 2; i <= nbDims; ++i) { + prodSlicedDims[nbDims - i] = prodSlicedDims[nbDims - i + 1] * slicedDims[nbDims - i]; + prodInputDims[nbDims - i] = prodInputDims[nbDims - i + 1] * inputDims[nbDims - i]; + } - std::size_t j = 0; - std::size_t i = 0; - for (; j < prodSlicedDims[0];) { - output[j] = input[i++]; + std::size_t j = 0; + std::size_t i = 0; + for (; j < prodSlicedDims[0];) { + output[j] = input[i++]; ++j; - for (std::size_t idx = nbDims - 1; idx > 0; --idx) { - i += j % prodSlicedDims[idx] == 0 ? substractedDims[idx]*prodInputDims[idx+1] : 0; - } - } + for (std::size_t idx = nbDims - 1; idx > 0; --idx) { + i += j % prodSlicedDims[idx] == 0 ? substractedDims[idx] * prodInputDims[idx + 1] : 0; + } + } } namespace { -// DIM = 1 static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Float32( {DataType::Float32}, Aidge::SliceImpl_cpu_forward_kernel<float>); static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Int32( diff --git a/src/operator/AddImpl.cpp b/src/operator/AddImpl.cpp index 915109461cec4ce9e1db8a3de25526d7dc87f24d..7355ebcb3e8fb68bf74dbd1ce831bf471d285cb7 100644 --- a/src/operator/AddImpl.cpp +++ b/src/operator/AddImpl.cpp @@ -21,46 +21,11 @@ #include "aidge/backend/cpu/operator/AddImpl.hpp" #include "aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::AddImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { - assert(mOp.getRawInput(inputIdx) && "requires valid input"); - - // Requires the whole tensors - const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(inputIdx))->dims(); - return std::accumulate(inputDims.begin(), inputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); -} - Aidge::NbElts_t Aidge::AddImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! + // this implementation can be in-place return 0; } -Aidge::NbElts_t Aidge::AddImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const { - // Requires the whole tensors, regardless of available data on inputs - assert(outputIdx == 0 && "operator has only one output"); - (void) outputIdx; - - const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(); - return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); -} - -Aidge::NbElts_t Aidge::AddImpl_cpu::getNbConsumedData(const Aidge::IOIndex_t inputIdx) const { - assert(inputIdx < mNbConsumedData.size()); - return mNbConsumedData[inputIdx]; -} - -Aidge::NbElts_t Aidge::AddImpl_cpu::getNbProducedData(const Aidge::IOIndex_t outputIdx) const { - assert(outputIdx < mNbProducedData.size()); - return mNbProducedData[outputIdx]; -} - -void Aidge::AddImpl_cpu::updateConsummerProducer() { - for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass - - mNbProducedData[0]+= getRequiredMemory(0, {}); - -} - void Aidge::AddImpl_cpu::forward() { assert(mOp.getRawInput(0) && "missing input in Add operator"); DataType datatypeFirstInput = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(); @@ -69,13 +34,31 @@ void Aidge::AddImpl_cpu::forward() { assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->dataType() == datatypeFirstInput); } - auto kernelFunc = Registrar<AddImplForward_cpu>::create({ + // Find the correct kernel type + const auto outputDataType = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType(); + const Registrar<AddImplForward_cpu>::registrar_key registrarKey = { datatypeFirstInput, - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + outputDataType}; + Registrar<AddImplForward_cpu>::registrar_type kernelFunc; + if (Registrar<AddImplForward_cpu>::exists(registrarKey)) { + // One exists with the right inputs/output types + kernelFunc = Registrar<AddImplForward_cpu>::create(registrarKey); + } + else { + // Otherwise, fallback to the kernel with all types matching output type + kernelFunc = Registrar<AddImplForward_cpu>::create({ + outputDataType, outputDataType}); + } + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. std::size_t nbDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->nbDims(); std::vector<std::vector<std::size_t>> inputsDims; std::vector<const void*> opInputs; + std::vector<std::shared_ptr<Tensor>> inputsFallback(mOp.nbInputs()); for (IOIndex_t i = 0; i < mOp.nbInputs(); ++i) { std::vector<std::size_t> inputDims(nbDims, 1); auto dims = std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->dims(); @@ -85,7 +68,8 @@ void Aidge::AddImpl_cpu::forward() { inputDims[idx] = dims[j]; } inputsDims.push_back(inputDims); - opInputs.push_back(getCPUPtr(mOp.getRawInput(i))); + const auto& input = std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->refCastFrom(inputsFallback[i], *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); + opInputs.push_back(input.getImpl()->rawPtr()); } kernelFunc(opInputs, @@ -93,4 +77,4 @@ void Aidge::AddImpl_cpu::forward() { std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->size(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), getCPUPtr(mOp.getRawOutput(0))); -} \ No newline at end of file +} diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp index d476f84717c0ed6f7bd45d68bd24b4d7ada6cbbd..b849142dd3abe0131fb0c6c448530a7669ce27dc 100644 --- a/src/operator/ConvImpl.cpp +++ b/src/operator/ConvImpl.cpp @@ -34,14 +34,35 @@ void Aidge::ConvImpl2D_cpu::forward() { assert(mOp.getRawInput(2) && "missing input #2"); // Find the correct kernel type - auto kernelFunc = - Registrar<ConvImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + const auto outputDataType = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType(); + const Registrar<ConvImpl2DForward_cpu>::registrar_key registrarKey = { + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(), + outputDataType}; + + Registrar<ConvImpl2DForward_cpu>::registrar_type kernelFunc; + if (Registrar<ConvImpl2DForward_cpu>::exists(registrarKey)) { + // One exists with the right inputs/output types + kernelFunc = Registrar<ConvImpl2DForward_cpu>::create(registrarKey); + } + else { + // Otherwise, fallback to the kernel with all types matching output type + kernelFunc = Registrar<ConvImpl2DForward_cpu>::create({ + outputDataType, outputDataType, outputDataType, outputDataType}); + } + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. + std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; + const auto& input0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->refCastFrom(input0Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); + const auto& input1 = std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->refCastFrom(input1Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); + const auto& input2 = std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->refCastFrom(input2Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); // Call kernel kernelFunc(dynamic_cast<const Conv_Op<2>&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), - getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawInput(1)), - getCPUPtr(mOp.getRawInput(2)), getCPUPtr(mOp.getRawOutput(0))); + input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(), + getCPUPtr(mOp.getRawOutput(0))); } diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp index 14f59f6f7baff57602ad71c8c08023038963b5f0..bc4a7a7cab91049c623e9a9e95ee63367da00722 100644 --- a/src/operator/FCImpl.cpp +++ b/src/operator/FCImpl.cpp @@ -29,29 +29,37 @@ void Aidge::FCImpl_cpu::forward() assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(2)) && "missing input #2"); // Find the correct kernel type - auto kernelFunc = Registrar<FCImplForward_cpu>::create( - {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + const auto outputDataType = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType(); + const Registrar<FCImplForward_cpu>::registrar_key registrarKey = { + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(), + outputDataType}; + + Registrar<FCImplForward_cpu>::registrar_type kernelFunc; + if (Registrar<FCImplForward_cpu>::exists(registrarKey)) { + // One exists with the right inputs/output types + kernelFunc = Registrar<FCImplForward_cpu>::create(registrarKey); + } + else { + // Otherwise, fallback to the kernel with all types matching output type + kernelFunc = Registrar<FCImplForward_cpu>::create({ + outputDataType, outputDataType, outputDataType, outputDataType}); + } + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. + std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; + const auto& input0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->refCastFrom(input0Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); + const auto& input1 = std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->refCastFrom(input1Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); + const auto& input2 = std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->refCastFrom(input2Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); // Call kernel - // if (std::static_pointer_cast<Tensor>(mOp.getRawInput(0)->nbDims() == 4) { - // kernelFunc( - // mOp.getStaticAttributes(), - // std::static_pointer_cast<Tensor>(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), - // getCPUPtr(mOp.getRawInput(0), - // mOp.mInputs[1]->getImpl()->rawPtr(), - // mOp.mInputs[2]->getImpl()->rawPtr(), - // mOp.getOutput(0)->getImpl()->rawPtr()); - // } - // else - kernelFunc( - dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0], - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->sizeM1(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawInput(1)), - getCPUPtr(mOp.getRawInput(2)), + kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(), + input0.dims()[0], + input0.size() / input0.dims()[0], + input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(), getCPUPtr(mOp.getRawOutput(0))); } diff --git a/src/operator/MatMulImpl.cpp b/src/operator/MatMulImpl.cpp index 1abd75db070bbd3b197519318f5bf23c7b46ee5a..f02effb3172e2c0624c6c7532513a2b794ee3a89 100644 --- a/src/operator/MatMulImpl.cpp +++ b/src/operator/MatMulImpl.cpp @@ -47,7 +47,7 @@ void Aidge::MatMulImpl_cpu::forward() kernelFunc( dynamic_cast<const MatMul_Op&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0], - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->sizeM1(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size() / std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0], getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawInput(1)), getCPUPtr(mOp.getRawOutput(0))); diff --git a/src/operator/SoftmaxImpl.cpp b/src/operator/SoftmaxImpl.cpp index 428d32fc7a4c1a2b639d4f78601c78ab41376b47..c3086d8f9067996b9b0a8546b6deb3e281c777b4 100644 --- a/src/operator/SoftmaxImpl.cpp +++ b/src/operator/SoftmaxImpl.cpp @@ -38,7 +38,7 @@ void Aidge::SoftmaxImpl_cpu::forward() { DimSize_t batchSize = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0]; DimSize_t channelSize = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[1]; - DimSize_t featureSize = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->sizeM1()/channelSize; + DimSize_t featureSize = (std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size()/batchSize)/channelSize; // Call kernel kernelFunc(batchSize, channelSize, diff --git a/unit_tests/data/Test_TensorImpl.cpp b/unit_tests/data/Test_TensorImpl.cpp index 6c75c4dc19ff1b646308858ad262441d43390122..b75c49077f190ed61486fea8eaa18152423a73ed 100644 --- a/unit_tests/data/Test_TensorImpl.cpp +++ b/unit_tests/data/Test_TensorImpl.cpp @@ -45,7 +45,7 @@ TEST_CASE("Tensor creation") { REQUIRE(x.get<int>({0, 0, 1}) == 2); REQUIRE(x.get<int>({0, 1, 1}) == 4); REQUIRE(x.get<int>({1, 1, 0}) == 7); - x.get<int>({1, 1, 1}) = 36; + x.set<int>({1, 1, 1}, 36); REQUIRE(x.get<int>({1, 1, 1}) == 36); } diff --git a/unit_tests/operator/Test_MetaOperator.cpp b/unit_tests/operator/Test_MetaOperator.cpp new file mode 100644 index 0000000000000000000000000000000000000000..71646c92fa7f041d695a89858cf21ab0d0336f2c --- /dev/null +++ b/unit_tests/operator/Test_MetaOperator.cpp @@ -0,0 +1,190 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <cmath> +#include <cstdlib> +#include <memory> + +#include "aidge/backend/cpu/operator/ConvImpl.hpp" +#include "aidge/backend/cpu/operator/PadImpl.hpp" +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/Conv.hpp" +#include "aidge/operator/MetaOperator.hpp" +#include "aidge/operator/MetaOperatorDefs.hpp" +#include "aidge/operator/Pad.hpp" + +using namespace Aidge; + +TEST_CASE("[cpu/operator] MetaOperator/PaddedConv(forward)", "[MetaOperator][PaddedConv][CPU]") { + std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>( + Array4D<double, 4, 3, 3, 3>{{{{{6.20986394e-01, 1.19775136e-03, 7.22876095e-02}, + {1.16492919e-01, 8.21634093e-02, 1.17413265e-01}, + {2.23743494e-01, 3.99495413e-01, 5.55552411e-01}}, + {{6.64970077e-01, 9.62199940e-01, 4.87531967e-01}, + {6.12586558e-01, 8.09918671e-02, 8.40649383e-01}, + {4.15264406e-01, 8.28247138e-01, 1.52301135e-01}}, + {{1.76992844e-02, 7.78697112e-01, 8.14531592e-01}, + {1.36960611e-01, 4.64806728e-01, 4.85150000e-01}, + {4.34776520e-01, 9.51740977e-01, 9.05793799e-01}}}, + + {{{1.71925246e-02, 1.91082720e-01, 3.67982644e-01}, + {1.56806559e-01, 6.22280998e-01, 3.15827594e-01}, + {6.04359038e-01, 2.83095947e-01, 6.11168892e-01}}, + {{2.76942832e-01, 1.89768419e-01, 8.07988176e-01}, + {1.67925807e-01, 2.68356150e-01, 6.28875602e-01}, + {1.69093357e-04, 9.64788636e-01, 7.29254981e-01}}, + {{6.34030122e-01, 1.32087038e-01, 3.33857107e-01}, + {7.63047502e-01, 5.12539506e-02, 9.77400493e-01}, + {8.06151288e-01, 2.60237147e-01, 3.93729313e-01}}}, + + {{{5.84605240e-01, 4.74648725e-01, 8.54111741e-01}, + {7.10897067e-02, 5.02579011e-01, 3.35236224e-01}, + {9.08637408e-01, 8.02903830e-01, 2.83929907e-01}}, + {{3.68206999e-01, 9.18579021e-02, 7.33168098e-01}, + {1.59875539e-01, 9.13163381e-01, 3.59806060e-01}, + {1.41295882e-01, 7.00312185e-01, 5.63728289e-01}}, + {{9.39513546e-01, 1.91704891e-01, 1.11454944e-01}, + {5.46298282e-01, 2.89698587e-01, 2.62612651e-01}, + {1.18554992e-01, 4.32147376e-02, 7.53016994e-01}}}, + + {{{9.53179175e-01, 2.05041054e-02, 1.11318451e-01}, + {8.67878485e-01, 2.93263422e-01, 8.03912714e-01}, + {8.93620255e-01, 1.37831128e-01, 3.83640583e-01}}, + {{3.96020188e-01, 6.24959320e-01, 1.90709175e-01}, + {5.80538620e-01, 6.63031275e-01, 2.07247191e-01}, + {5.65672171e-01, 5.57014317e-01, 9.26909496e-01}}, + {{3.43901418e-01, 4.47741636e-01, 6.59249367e-01}, + {7.34639028e-01, 2.84957200e-02, 9.70225217e-01}, + {1.33578790e-02, 6.12054702e-01, 9.36685235e-02}}}}}); + std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>( + Array1D<double, 4>{{0.16884905, 0.27994487, 0.57227465, 0.06435205}}); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<double, 2, 3, 5, 5>{ + // NCHW + {{{{0.43224481, 0.9047832, 0.18402257, 0.06162838, 0.52490127}, + {0.27773404, 0.55402353, 0.9485062, 0.31197083, 0.80328607}, + {0.85065842, 0.88226201, 0.54971951, 0.23360494, 0.53907884}, + {0.33423098, 0.79564312, 0.80419414, 0.76839638, 0.87248221}, + {0.77328729, 0.65749407, 0.47277589, 0.32889198, 0.93970518}}, + + {{0.66669145, 0.64193351, 0.45315988, 0.32794057, 0.38461822}, + {0.72295814, 0.18395073, 0.85909664, 0.30010301, 0.56065865}, + {0.34777938, 0.77869746, 0.33159421, 0.19540932, 0.77767906}, + {0.5778391, 0.08218411, 0.27758371, 0.99017749, 0.61827997}, + {0.10440745, 0.3197831, 0.89157608, 0.12216887, 0.950232}}, + + {{0.68073443, 0.2681118, 0.51848834, 0.62864493, 0.36717478}, + {0.64106244, 0.43779425, 0.02771029, 0.78275231, 0.45693104}, + {0.6487417, 0.01603838, 0.73869997, 0.96494221, 0.39588782}, + {0.5975827, 0.90913292, 0.55036969, 0.4747373, 0.62460509}, + {0.79675124, 0.02807549, 0.53227602, 0.88805927, 0.96646591}}}, + + {{{0.81851935, 0.21267665, 0.01580692, 0.54907998, 0.89010049}, + {0.80165784, 0.55195592, 0.20740314, 0.22782844, 0.89205031}, + {0.94217108, 0.58434542, 0.20738313, 0.79065873, 0.9371597}, + {0.02254708, 0.95539178, 0.95165758, 0.53736666, 0.49100362}, + {0.08018625, 0.69108027, 0.00329741, 0.74565761, 0.30899213}}, + + {{0.34868638, 0.12792604, 0.37382248, 0.0374756, 0.50653087}, + {0.59614405, 0.64820746, 0.31470307, 0.62460364, 0.29253268}, + {0.92864889, 0.51014224, 0.08921206, 0.11094072, 0.64691121}, + {0.50586371, 0.6686477, 0.72511169, 0.41681783, 0.6325049}, + {0.71594137, 0.73382767, 0.36589439, 0.03255165, 0.75006865}}, + + {{0.6294127, 0.85548534, 0.0902963, 0.28915773, 0.36564289}, + {0.95873236, 0.6742374, 0.55679676, 0.6323497, 0.34072958}, + {0.49694061, 0.79173045, 0.19738225, 0.14755281, 0.80818177}, + {0.02332061, 0.74270703, 0.59415632, 0.08195934, 0.46295434}, + {0.71426058, 0.85032931, 0.90750818, 0.28768431, 0.4401146}}}}}); + + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>( + Array4D<double, 2, 4, 5, 5>{{{{{3.40294218, 3.74021220, 4.02050114, 4.07054710, 2.46286273}, + {4.61770582, 6.70517588, 6.50356627, 6.29688787, 3.53332567}, + {5.47480106, 5.92094421, 6.64605665, 7.95090199, 4.28721523}, + {4.01485729, 6.06748962, 7.52447891, 7.37980652, 5.28401136}, + {2.83065438, 3.62033439, 3.56222963, 5.56103945, 3.23335814}}, + + {{3.30230498, 4.92814112, 4.34710836, 3.96262765, 2.97987890}, + {4.49693012, 6.68929291, 5.53603029, 5.68874264, 4.28756475}, + {4.20528078, 6.82776880, 6.70569849, 7.12809610, 4.40845442}, + {4.31169367, 6.73352146, 6.30962515, 7.45826864, 4.99164438}, + {2.18136287, 4.28968000, 4.20080042, 4.89814138, 2.87394023}}, + + {{3.54787683, 4.35851812, 4.63881302, 4.23359537, 3.16992092}, + {5.25099468, 7.54282856, 6.69849157, 5.64309788, 4.56919575}, + {4.71914101, 7.52830601, 6.71450949, 7.81113863, 5.84658146}, + {4.97893143, 7.39293909, 6.89905310, 8.14430809, 5.62998581}, + {2.79735112, 4.80967140, 5.57630205, 5.38828325, 4.57078695}}, + + {{3.03048635, 5.04540300, 4.21824932, 4.87323284, 2.35113740}, + {4.45167351, 6.47721338, 7.40922976, 6.70445728, 3.60700107}, + {3.77927423, 6.82826376, 7.41777134, 7.57402420, 5.13131523}, + {4.08747244, 7.07994175, 7.57206821, 8.51897335, 5.26987123}, + {2.34426999, 4.60127831, 4.86486769, 6.01579571, 3.97803569}}}, + + + {{{3.84700942, 4.25972605, 3.05269003, 3.78043652, 2.08771229}, + {6.00459957, 6.05633259, 4.45951605, 4.54089880, 4.03066444}, + {5.41579390, 7.29543972, 6.18680000, 5.58812714, 3.45964241}, + {6.04531050, 7.70924091, 5.52207708, 5.02131319, 4.09403706}, + {3.18092418, 4.45422697, 4.04294252, 3.86577177, 2.18776536}}, + + {{4.02600670, 4.27603531, 3.81011319, 4.03631020, 2.57254648}, + {5.33471155, 5.72588634, 5.12079763, 5.11733150, 3.76836705}, + {5.62947607, 5.92492962, 6.24170446, 6.44130468, 3.44276404}, + {5.38414621, 6.02679539, 5.88985586, 5.90263271, 3.15044069}, + {3.31261086, 4.44371319, 3.47660780, 4.15411520, 1.48961508}}, + + {{3.95879412, 4.17324543, 3.70114422, 3.27447152, 3.09713888}, + {5.78258181, 6.57920837, 4.99913597, 6.20961237, 4.98552179}, + {5.84685421, 7.19971228, 6.66386652, 6.68013430, 4.90963316}, + {5.24417877, 7.06430531, 6.58512402, 6.02492285, 4.48986387}, + {3.64294529, 5.00678444, 5.04760027, 4.72895622, 2.67990756}}, + + {{3.48610687, 4.12853813, 4.07563591, 3.51327014, 2.44217038}, + {4.80529881, 7.33211374, 5.14774036, 4.77281189, 4.44612408}, + {5.11703110, 7.55168772, 7.14374542, 6.43696356, 4.10621357}, + {5.41270018, 6.85949135, 6.73503923, 5.74601364, 4.46150303}, + {3.16612267, 4.38248920, 5.23248482, 4.21292210, 2.86031270}}}}}); + + std::shared_ptr<Node> myConv = Conv<2>(3, 4, {3, 3}, "myconv"); + auto convOp = std::static_pointer_cast<OperatorTensor>(myConv->getOperator()); + + std::shared_ptr<Node> myPad = + Pad<2>({1, 1, 1, 1}, "myPad", PadBorderType::Constant, 0.0); + auto padOp = std::static_pointer_cast<OperatorTensor>(myPad->getOperator()); + + convOp->setInput(1, myWeights); + convOp->setInput(2, myBias); + + myPad->addChild(myConv, 0, 0); + padOp->setInput(0, myInput); + + padOp->setDataType(DataType::Float64); + padOp->setBackend("cpu"); + padOp->computeOutputDims(); + convOp->setDataType(DataType::Float64); + convOp->setBackend("cpu"); + convOp->computeOutputDims(); + + myPad->forward(); + myConv->forward(); + convOp -> getOutput(0) -> print(); + + double* computedOutput = static_cast<double*>(convOp->getOutput(0)->getImpl()->rawPtr()); + double* expectedOutput = static_cast<double*>(myOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i < myOutput->size(); ++i) { + REQUIRE(std::abs(computedOutput[i] - expectedOutput[i]) < 1e-5); + } + + std::shared_ptr<Node> myPaddedConv = + PaddedConv(3, 4, {3, 3}, "myPaddedConv", {1, 1}, {1, 1, 1, 1}); +} \ No newline at end of file diff --git a/unit_tests/operator/Test_SliceImpl.cpp b/unit_tests/operator/Test_SliceImpl.cpp index 3e25c28f9caac61c64d38fa70879af79d20392bc..7a71f31e9850852cadd659c91683c30ddcbe9849 100644 --- a/unit_tests/operator/Test_SliceImpl.cpp +++ b/unit_tests/operator/Test_SliceImpl.cpp @@ -27,14 +27,14 @@ TEST_CASE("[cpu/operator] Slice(forward)", "[Slice][CPU]") { {0, 1, 2,-3} }); - std::shared_ptr<Node> mySlice = Slice(0, {4}); + std::shared_ptr<Node> mySlice = Slice({0}, {3}, {0}); auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator()); mySlice->getOperator()->associateInput(0,input0); mySlice->getOperator()->setDataType(DataType::Int32); mySlice->getOperator()->setBackend("cpu"); op->computeOutputDims(); mySlice->forward(); - // mySlice->getOperator()->output(0).print(); + REQUIRE(*(op->getOutput(0)) == *expectedOutput); REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims()); REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType()); @@ -54,7 +54,7 @@ TEST_CASE("[cpu/operator] Slice(forward)", "[Slice][CPU]") { } }); - std::shared_ptr<Node> mySlice = Slice(5, {2,3}); + std::shared_ptr<Node> mySlice = Slice({0,5}, {1,7}, {0,1}); auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator()); mySlice->getOperator()->associateInput(0,input0); mySlice->getOperator()->setDataType(DataType::Int32); @@ -88,7 +88,7 @@ TEST_CASE("[cpu/operator] Slice(forward)", "[Slice][CPU]") { } }); - std::shared_ptr<Node> mySlice = Slice(14, {1,1,3}); + std::shared_ptr<Node> mySlice = Slice({0,1,4}, {0,1,6}, {0,1,2}); auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator()); mySlice->getOperator()->associateInput(0,input0); mySlice->getOperator()->setDataType(DataType::Int32); @@ -151,7 +151,7 @@ TEST_CASE("[cpu/operator] Slice(forward)", "[Slice][CPU]") { } }); - std::shared_ptr<Node> mySlice = Slice(0, {2,2,2,10}); + std::shared_ptr<Node> mySlice = Slice({0,0,0,0}, {1,1,1,9}, {0,1,2,3}); auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator()); mySlice->getOperator()->associateInput(0,input0); mySlice->getOperator()->setDataType(DataType::Int32); diff --git a/unit_tests/operator/Test_SoftmaxImpl.cpp b/unit_tests/operator/Test_SoftmaxImpl.cpp index 3d3c9fe4a0de0183e9069b814084aa80019adf0f..360b7440599030dbd93954e345f0d5986eb83b15 100644 --- a/unit_tests/operator/Test_SoftmaxImpl.cpp +++ b/unit_tests/operator/Test_SoftmaxImpl.cpp @@ -39,7 +39,7 @@ TEST_CASE("[cpu/operator] Softmax(forward)", "[Softmax][CPU]") { } }); - std::shared_ptr<Node> mySoftmax = Softmax(); + std::shared_ptr<Node> mySoftmax = Softmax(1); auto op = std::static_pointer_cast<OperatorTensor>(mySoftmax -> getOperator()); mySoftmax->getOperator()->associateInput(0,input); mySoftmax->getOperator()->setDataType(DataType::Float32); @@ -108,7 +108,7 @@ TEST_CASE("[cpu/operator] Softmax(forward)", "[Softmax][CPU]") { } }); - std::shared_ptr<Node> mySoftmax = Softmax(); + std::shared_ptr<Node> mySoftmax = Softmax(1); auto op = std::static_pointer_cast<OperatorTensor>(mySoftmax -> getOperator()); mySoftmax->getOperator()->associateInput(0,input); mySoftmax->getOperator()->setDataType(DataType::Float32); diff --git a/unit_tests/recipies/Test_ExplicitCastMove.cpp b/unit_tests/recipies/Test_ExplicitCastMove.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7d169ba9ba949ead0bf96f80e53a47e1ca6c24d9 --- /dev/null +++ b/unit_tests/recipies/Test_ExplicitCastMove.cpp @@ -0,0 +1,46 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> + +#include "aidge/recipies/Recipies.hpp" +#include "aidge/operator/Conv.hpp" +#include "aidge/operator/Producer.hpp" +#include "aidge/graph/OpArgs.hpp" +#include <cstddef> + +using namespace Aidge; + +TEST_CASE("[ExplicitCastMove] conv") { + auto conv1 = Conv(3, 32, {3, 3}, "conv1"); + auto conv2 = Conv(32, 64, {3, 3}, "conv2"); + auto conv3 = Conv(64, 10, {1, 1}, "conv3", {2, 2}); + + auto g1 = Sequential({ + Producer({16, 3, 224, 224}, "dataProvider"), + conv1, + conv2, + conv3 + }); + + g1->setBackend("cpu"); + conv1->getOperator()->setDataType(DataType::Int32); + conv3->getOperator()->setDataType(DataType::Float64); + + g1->save("explicitCastMove_before"); + REQUIRE(g1->getNodes().size() == 10); + + g1->forwardDims(); + explicitCastMove(g1); + + g1->save("explicitCastMove_after"); + REQUIRE(g1->getNodes().size() == 13); +} diff --git a/unit_tests/recipies/Test_HorizontalTiling.cpp b/unit_tests/recipies/Test_HorizontalTiling.cpp index b71a01d130a783caf5c643dfb0c3757b1c524e5e..268d94cc55821c41f9c3d4a8451b5730ecaf1bd0 100644 --- a/unit_tests/recipies/Test_HorizontalTiling.cpp +++ b/unit_tests/recipies/Test_HorizontalTiling.cpp @@ -183,26 +183,4 @@ TEST_CASE("[core/recipies] Tiling(transformation)", "[Tiling][Recipies]") { } } } -} - // std::shared_ptr<GraphView> g = Sequential({ - // Conv(3, 16, {3,3}, "conv1"), - // ReLU("relu1"), - // Conv(16, 32, {1,1}, "conv2"), - // Conv(32, 16, {1,1}, "conv3"), - // Conv(16, 10, {3,3}, "conv4"), - // ReLU("relu2") - // }); - - // for (auto& individualConv : g->match("Conv")) { - // auto tiledConv = horizontalTiling(individualConv); - // g->replace(individualConv, tiledConv); - // } - // } - - // SECTION("Create the GraphView with tiled layers") { - // std::shared_ptr<GraphView> g; - // g->addChild(horizontalTiling(Conv())) - // } - -// } -// } // namespace Aidge \ No newline at end of file +} // namespace Aidge \ No newline at end of file diff --git a/unit_tests/scheduler/Test_CastMove.cpp b/unit_tests/scheduler/Test_CastMove.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a52b2b06901818f01117273d181d5d5388348f95 --- /dev/null +++ b/unit_tests/scheduler/Test_CastMove.cpp @@ -0,0 +1,246 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <memory> +#include <string> + +#include "aidge/data/Tensor.hpp" +#include "aidge/utils/TensorUtils.hpp" +#include "aidge/graph/Node.hpp" +#include "aidge/graph/GraphView.hpp" +#include "aidge/graph/OpArgs.hpp" +#include "aidge/scheduler/Scheduler.hpp" +#include "aidge/recipies/Recipies.hpp" + +#include "aidge/backend/cpu.hpp" + +using namespace Aidge; + +TEST_CASE("[cpu/castmove] CastMove(forward)") { + std::shared_ptr<Tensor> inputTensor = + std::make_shared<Tensor>(Array4D<int, 2, 1, 5, 5>{{{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}}, + {{{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}}}}); + + std::shared_ptr<Tensor> weight1 = std::make_shared<Tensor>( + Array4D<int, 3, 1, 3, 3>{{{{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}}, + {{{10, 11, 12}, {13, 14, 15}, {16, 17, 18}}}, + {{{19, 20, 21}, {22, 23, 24}, {25, 26, 27}}}}}); + + std::shared_ptr<Tensor> bias1 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}}); + + SECTION("Test implicit") { + std::shared_ptr<GraphView> g = + Sequential({ + Conv(1, 3, {3, 3}, "conv1"), + Conv(3, 4, {1, 1}, "conv2"), + Conv(4, 3, {1, 1}, "conv3"), + FC(27, 5, false, "fc")}); + + g->getNode("conv1")->getOperator()->setInput(0, inputTensor); + g->getNode("conv1")->getOperator()->setInput(1, weight1); + g->getNode("conv1")->getOperator()->setInput(2, bias1); + + std::shared_ptr<Tensor> weight2 = + std::make_shared<Tensor>(Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}}, + {{{4}}, {{5}}, {{6}}}, + {{{7}}, {{8}}, {{9}}}, + {{{10}}, {{11}}, {{12}}}}}); + std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}}); + g->getNode("conv2")->getOperator()->setInput(1, weight2); + g->getNode("conv2")->getOperator()->setInput(2, bias2); + // *(g->getNode("conv2")->getOperator()->input(1, weight2); + + std::shared_ptr<Tensor> weight3 = std::make_shared<Tensor>( + Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}}, + {{{5}}, {{6}}, {{7}}, {{8}}}, + {{{9}}, {{10}}, {{11}}, {{12}}}}}); + std::shared_ptr<Tensor> bias3 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}}); + g->getNode("conv3")->getOperator()->setInput(1, weight3); + g->getNode("conv3")->getOperator()->setInput(2, bias3); + + std::shared_ptr<Tensor> weightfc = std::make_shared<Tensor>( + Array2D<int, 5, 27>{{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6}, + {7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3}, + {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}}); + std::shared_ptr<Tensor> biasfc = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}}); + g->getNode("fc")->getOperator()->setInput(1, weightfc); + g->getNode("fc")->getOperator()->setInput(2, biasfc); + + // input->addChild(g); + g->setDataType(Aidge::DataType::Int32); + g->getNode("conv1")->getOperator()->setDataType(DataType::Float32); + g->getNode("conv3")->getOperator()->setDataType(DataType::Float64); + + g->setBackend("cpu"); + g->forwardDims(); + SequentialScheduler scheduler(g); + REQUIRE_NOTHROW(scheduler.forward()); + scheduler.saveSchedulingDiagram("schedulingSequential"); + + std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{ + {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}}, + {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}}, + {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}}, + {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}}, + {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}}, + {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}}); + + std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(Array4D<int, 2, 4, 3, 3>{ + {{{{6099, 7017, 7935}, {10689, 11607, 12525}, {15279, 16197, 17115}}, + {{13786, 15838, 17890}, {24046, 26098, 28150}, {34306, 36358, 38410}}, + {{21473, 24659, 27845}, {37403, 40589, 43775}, {53333, 56519, 59705}}, + {{29160, 33480, 37800}, {50760, 55080, 59400}, {72360, 76680, 81000}}}, + {{{29049, 29967, 30885}, {33639, 34557, 35475}, {38229, 39147, 40065}}, + {{65086, 67138, 69190}, {75346, 77398, 79450}, {85606, 87658, 89710}}, + {{101123, 104309, 107495}, {117053, 120239, 123425}, {132983, 136169, 139355}}, + {{137160, 141480, 145800}, {158760, 163080, 167400}, {180360, 184680, 189000}}}}}); + + std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{ + {{{{214731, 246591, 278451}, {374031, 405891, 437751}, {533331, 565191, 597051}}, + {{496804, 570568, 644332}, {865624, 939388, 1013152}, {1234444, 1308208, 1381972}}, + {{778877, 894545, 1010213}, {1357217, 1472885, 1588553}, {1935557, 2051225, 2166893}}}, + {{{1011231, 1043091, 1074951}, {1170531, 1202391, 1234251}, {1329831, 1361691, 1393551}}, + {{2340904, 2414668, 2488432}, {2709724, 2783488, 2857252}, {3078544, 3152308, 3226072}}, + {{3670577, 3786245, 3901913}, {4248917, 4364585, 4480253}, {4827257, 4942925, 5058593}}}}}); + + Tensor expectedOutput4 = Array2D<int, 2, 5>{ + {{205050376, 198925904, 181355097, 196978090, 238868348}, + {598467376, 561797804, 560823897, 593043790, 698672948}}}; + std::shared_ptr<Tensor> other1 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv1")->getOperator())->getOutput(0); + REQUIRE(approxEq<float, int>(*other1, *expectedOutput1, 0.0, 1.0e-12)); + std::shared_ptr<Tensor> other2 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv2")->getOperator())->getOutput(0); + REQUIRE(approxEq<int>(*other2, *expectedOutput2, 0.0, 1.0e-12)); + std::shared_ptr<Tensor> other3 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv3")->getOperator())->getOutput(0); + REQUIRE(approxEq<double, int>(*other3, *expectedOutput3, 0.0, 1.0e-12)); + std::shared_ptr<Tensor> other4 = std::static_pointer_cast<OperatorTensor>(g->getNode("fc")->getOperator())->getOutput(0); + REQUIRE(approxEq<int>(*other4, expectedOutput4, 0.0, 1.0e-12)); + } + + SECTION("Half") { + Tensor refTensor = Array2D<float, 3, 2>{{{0.0, 1.0},{2.1, 3.4},{5000.0, 1.0e5}}}; + Tensor tensor(DataType::Float16); + tensor.copyCastFrom(refTensor); + REQUIRE(approxEq<float, half_float::half>(refTensor, tensor, 1.0e-3, 0.0)); + } + + SECTION("Test explicit") { + std::shared_ptr<GraphView> g = + Sequential({ + Conv(1, 3, {3, 3}, "conv1"), + Conv(3, 4, {1, 1}, "conv2"), + Conv(4, 3, {1, 1}, "conv3"), + FC(27, 5, false, "fc")}); + + g->getNode("conv1")->getOperator()->setInput(0, inputTensor); + g->getNode("conv1")->getOperator()->setInput(1, weight1); + g->getNode("conv1")->getOperator()->setInput(2, bias1); + + std::shared_ptr<Tensor> weight2 = + std::make_shared<Tensor>(Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}}, + {{{4}}, {{5}}, {{6}}}, + {{{7}}, {{8}}, {{9}}}, + {{{10}}, {{11}}, {{12}}}}}); + std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}}); + g->getNode("conv2")->getOperator()->setInput(1, weight2); + g->getNode("conv2")->getOperator()->setInput(2, bias2); + // *(g->getNode("conv2")->getOperator()->input(1, weight2); + + std::shared_ptr<Tensor> weight3 = std::make_shared<Tensor>( + Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}}, + {{{5}}, {{6}}, {{7}}, {{8}}}, + {{{9}}, {{10}}, {{11}}, {{12}}}}}); + std::shared_ptr<Tensor> bias3 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}}); + g->getNode("conv3")->getOperator()->setInput(1, weight3); + g->getNode("conv3")->getOperator()->setInput(2, bias3); + + std::shared_ptr<Tensor> weightfc = std::make_shared<Tensor>( + Array2D<int, 5, 27>{{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6}, + {7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3}, + {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}}); + std::shared_ptr<Tensor> biasfc = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}}); + g->getNode("fc")->getOperator()->setInput(1, weightfc); + g->getNode("fc")->getOperator()->setInput(2, biasfc); + + // input->addChild(g); + g->setDataType(Aidge::DataType::Int32); + g->getNode("conv1")->getOperator()->setDataType(DataType::Float32); + g->getNode("conv3")->getOperator()->setDataType(DataType::Float64); + + explicitCastMove(g); + g->setBackend("cpu"); + g->forwardDims(); + + SequentialScheduler scheduler(g); + REQUIRE_NOTHROW(scheduler.forward()); + scheduler.saveSchedulingDiagram("schedulingSequential"); + + std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{ + {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}}, + {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}}, + {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}}, + {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}}, + {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}}, + {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}}); + + std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(Array4D<int, 2, 4, 3, 3>{ + {{{{6099, 7017, 7935}, {10689, 11607, 12525}, {15279, 16197, 17115}}, + {{13786, 15838, 17890}, {24046, 26098, 28150}, {34306, 36358, 38410}}, + {{21473, 24659, 27845}, {37403, 40589, 43775}, {53333, 56519, 59705}}, + {{29160, 33480, 37800}, {50760, 55080, 59400}, {72360, 76680, 81000}}}, + {{{29049, 29967, 30885}, {33639, 34557, 35475}, {38229, 39147, 40065}}, + {{65086, 67138, 69190}, {75346, 77398, 79450}, {85606, 87658, 89710}}, + {{101123, 104309, 107495}, {117053, 120239, 123425}, {132983, 136169, 139355}}, + {{137160, 141480, 145800}, {158760, 163080, 167400}, {180360, 184680, 189000}}}}}); + + std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{ + {{{{214731, 246591, 278451}, {374031, 405891, 437751}, {533331, 565191, 597051}}, + {{496804, 570568, 644332}, {865624, 939388, 1013152}, {1234444, 1308208, 1381972}}, + {{778877, 894545, 1010213}, {1357217, 1472885, 1588553}, {1935557, 2051225, 2166893}}}, + {{{1011231, 1043091, 1074951}, {1170531, 1202391, 1234251}, {1329831, 1361691, 1393551}}, + {{2340904, 2414668, 2488432}, {2709724, 2783488, 2857252}, {3078544, 3152308, 3226072}}, + {{3670577, 3786245, 3901913}, {4248917, 4364585, 4480253}, {4827257, 4942925, 5058593}}}}}); + + Tensor expectedOutput4 = Array2D<int, 2, 5>{ + {{205050376, 198925904, 181355097, 196978090, 238868348}, + {598467376, 561797804, 560823897, 593043790, 698672948}}}; + std::shared_ptr<Tensor> other1 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv1")->getOperator())->getOutput(0); + REQUIRE(approxEq<float, int>(*other1, *expectedOutput1, 0.0, 1.0e-12)); + std::shared_ptr<Tensor> other2 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv2")->getOperator())->getOutput(0); + REQUIRE(approxEq<int>(*other2, *expectedOutput2, 0.0, 1.0e-12)); + std::shared_ptr<Tensor> other3 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv3")->getOperator())->getOutput(0); + REQUIRE(approxEq<double, int>(*other3, *expectedOutput3, 0.0, 1.0e-12)); + std::shared_ptr<Tensor> other4 = std::static_pointer_cast<OperatorTensor>(g->getNode("fc")->getOperator())->getOutput(0); + REQUIRE(approxEq<int>(*other4, expectedOutput4, 0.0, 1.0e-12)); + } +}