Skip to content
Snippets Groups Projects
Commit c450869a authored by Maxence Naud's avatar Maxence Naud
Browse files

Merge branch 'dev' into matmul_rework

parents 204f80b0 8521b218
No related branches found
No related tags found
2 merge requests!50version 0.2.0,!34Matmul rework
Showing
with 1002 additions and 174 deletions
include:
- remote: 'https://gitlab.eclipse.org/eclipse/aidge/gitlab_shared_files/-/raw/main/.gitlab/ci/shared_script.gitlab-ci.yml'
build:ubuntu_cpp: build:ubuntu_cpp:
stage: build stage: build
needs: [] needs: []
...@@ -6,9 +9,9 @@ build:ubuntu_cpp: ...@@ -6,9 +9,9 @@ build:ubuntu_cpp:
script: script:
# Download dependencies # Download dependencies
# aidge_core # aidge_core
- 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_cpp"' - DEPENDENCY_NAME="aidge_core"
- unzip -o build_artifacts.zip -d . - DEPENDENCY_JOB="build:ubuntu_cpp"
- rm -rf build_cpp - !reference [.download_dependency, script]
# Build current module # Build current module
- export CMAKE_PREFIX_PATH=../install_cpp - export CMAKE_PREFIX_PATH=../install_cpp
...@@ -32,9 +35,9 @@ build:ubuntu_cpp_g++10: ...@@ -32,9 +35,9 @@ build:ubuntu_cpp_g++10:
script: script:
# Download dependencies # Download dependencies
# aidge_core # aidge_core
- 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_cpp"' - DEPENDENCY_NAME="aidge_core"
- unzip -o build_artifacts.zip -d . - DEPENDENCY_JOB="build:ubuntu_cpp"
- rm -rf build_cpp - !reference [.download_dependency, script]
# Build current module # Build current module
- export CMAKE_PREFIX_PATH=../install_cpp - export CMAKE_PREFIX_PATH=../install_cpp
...@@ -55,9 +58,9 @@ build:ubuntu_cpp_g++12: ...@@ -55,9 +58,9 @@ build:ubuntu_cpp_g++12:
script: script:
# Download dependencies # Download dependencies
# aidge_core # aidge_core
- 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_cpp"' - DEPENDENCY_NAME="aidge_core"
- unzip -o build_artifacts.zip -d . - DEPENDENCY_JOB="build:ubuntu_cpp"
- rm -rf build_cpp - !reference [.download_dependency, script]
# Build current module # Build current module
- export CMAKE_PREFIX_PATH=../install_cpp - export CMAKE_PREFIX_PATH=../install_cpp
...@@ -78,9 +81,9 @@ build:ubuntu_cpp_clang12: ...@@ -78,9 +81,9 @@ build:ubuntu_cpp_clang12:
script: script:
# Download dependencies # Download dependencies
# aidge_core # aidge_core
- 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_cpp"' - DEPENDENCY_NAME="aidge_core"
- unzip -o build_artifacts.zip -d . - DEPENDENCY_JOB="build:ubuntu_cpp"
- rm -rf build_cpp - !reference [.download_dependency, script]
# Build current module # Build current module
- export CMAKE_PREFIX_PATH=../install_cpp - export CMAKE_PREFIX_PATH=../install_cpp
...@@ -101,9 +104,9 @@ build:ubuntu_cpp_clang15: ...@@ -101,9 +104,9 @@ build:ubuntu_cpp_clang15:
script: script:
# Download dependencies # Download dependencies
# aidge_core # aidge_core
- 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_cpp"' - DEPENDENCY_NAME="aidge_core"
- unzip -o build_artifacts.zip -d . - DEPENDENCY_JOB="build:ubuntu_cpp"
- rm -rf build_cpp - !reference [.download_dependency, script]
# Build current module # Build current module
- export CMAKE_PREFIX_PATH=../install_cpp - export CMAKE_PREFIX_PATH=../install_cpp
...@@ -120,86 +123,92 @@ build:ubuntu_python: ...@@ -120,86 +123,92 @@ build:ubuntu_python:
needs: [] needs: []
tags: tags:
- docker - docker
script: script:
# Download dependencies # Download dependencies
# aidge_core (Python) # aidge_core (Python)
- 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_python"' - DEPENDENCY_NAME="aidge_core"
- unzip -o build_artifacts.zip -d . - DEPENDENCY_JOB="build:ubuntu_python"
- !reference [.download_dependency, script]
- python3 -m pip install virtualenv - python3 -m pip install virtualenv
- virtualenv venv - virtualenv venv
- source venv/bin/activate - source venv/bin/activate
- python3 -m pip install -r requirements.txt - python3 -m pip install -r requirements.txt
- python3 -m pip install . - python3 -m pip install .
- python3 -m pip install numpy unittest-xml-reporting
- python3 -m pip list
artifacts: artifacts:
expire_in: 1 week expire_in: 1 week
paths: paths:
- venv/ - venv/
# build:windows_cpp: build:windows_cpp:
# stage: build stage: build
# needs: [] needs: []
# tags: tags:
# - windows - windows
# image: buildtools image: buildtools
# before_script: before_script:
# # Install Chocolatey # Install Chocolatey
# - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1')) - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
# # Install dependencies # Install dependencies
# - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y
# - choco install git -Y - choco install git -Y
# - choco install python -Y - choco install python -Y
# # Update PATH # Update PATH
# - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User") - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
# script: script:
# # Download dependencies # Download dependencies
# # aidge_core # aidge_core
# - 'curl "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:windows_cpp" -o build_artifacts.zip' - $DEPENDENCY_NAME="aidge_core"
# - Expand-Archive -Path .\build_artifacts.zip -DestinationPath . -Force - $DEPENDENCY_JOB="build:windows_cpp"
# - Remove-Item .\build_cpp\ -Recurse - !reference [.download_dependency_windows, script]
- Remove-Item .\build_cpp\ -Recurse -Force -ErrorAction Ignore
# - $env:CMAKE_PREFIX_PATH = '../install_cpp'
# - mkdir -p build_cpp - $env:CMAKE_PREFIX_PATH = '../install_cpp'
# - cd build_cpp - mkdir -p build_cpp
# - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug .. - cd build_cpp
# - cmake --build . -j2 - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug ..
# - cmake --install . --config Debug - cmake --build . -j2
- cmake --install . --config Debug
# artifacts:
# expire_in: 1 week artifacts:
# paths: expire_in: 1 week
# - build_cpp/ paths:
# - install_cpp/ - build_cpp/
- install_cpp/
# build:windows_python:
# stage: build build:windows_python:
# needs: [] stage: build
# tags: needs: []
# - windows tags:
- windows
# image: buildtools
# before_script: image: buildtools
# # Install Chocolatey before_script:
# - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1')) # Install Chocolatey
# # Install dependencies - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
# - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y # Install dependencies
# - choco install git -Y - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y
# - choco install python -Y - choco install git -Y
# # Update PATH - choco install python -Y
# - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User") # Update PATH
# script: - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
# # Download dependencies script:
# # aidge_core (Python) # Download dependencies
# - 'curl "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:windows_python" -o build_artifacts.zip' # aidge_core (Python)
# - Expand-Archive -Path .\build_artifacts.zip -DestinationPath . -Force - $DEPENDENCY_NAME="aidge_core"
- $DEPENDENCY_JOB="build:windows_python"
# - python -m pip install virtualenv - !reference [.download_dependency_windows, script]
# - virtualenv venv
# - venv\Scripts\Activate.ps1 - python -m pip install virtualenv
# - python -m pip install -r requirements.txt - virtualenv venv
# - python -m pip install . - venv\Scripts\Activate.ps1
# artifacts: - python -m pip install -r requirements.txt
# expire_in: 1 week - python -m pip install .
# paths: artifacts:
# - venv/ expire_in: 1 week
paths:
- venv/
...@@ -18,32 +18,31 @@ test:ubuntu_python: ...@@ -18,32 +18,31 @@ test:ubuntu_python:
script: script:
- source venv/bin/activate - source venv/bin/activate
- cd ${CI_PROJECT_NAME} - cd ${CI_PROJECT_NAME}
- python3 -m pip install numpy unittest-xml-reporting
- python3 -m pip list # Run on discovery all tests located in core/unit_tests/python and discard the stdout
# Run on discovery all tests located in core/unit_tests/python and discard the stdout
# only to show the errors/warnings and the results of the tests # only to show the errors/warnings and the results of the tests
- python3 -m xmlrunner discover -s unit_tests/ -v -b --output-file xmlrunner-results.xml - python3 -m xmlrunner discover -s unit_tests/ -v -b --output-file xmlrunner-results.xml
artifacts: artifacts:
reports: reports:
junit: ${CI_PROJECT_NAME}/xmlrunner-results.xml junit: ${CI_PROJECT_NAME}/xmlrunner-results.xml
# test:windows_cpp: test:windows_cpp:
# stage: test stage: test
# needs: ["build:windows_cpp"] needs: ["build:windows_cpp"]
# tags: tags:
# - windows - windows
# image: buildtools image: buildtools
# before_script: before_script:
# # Install Chocolatey # Install Chocolatey
# - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1')) - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
# # Install dependencies # Install dependencies
# - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y
# - choco install python -Y - choco install python -Y
# # Update PATH # Update PATH
# - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User") - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
# script: script:
# - cd build_cpp - cd build_cpp
# - ctest --output-junit ctest-results.xml --output-on-failure - ctest --output-junit ctest-results.xml --output-on-failure
# artifacts: artifacts:
# reports: reports:
# junit: build_cpp/ctest-results.xml junit: build_cpp/ctest-results.xml
...@@ -21,18 +21,23 @@ ...@@ -21,18 +21,23 @@
#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
#include "aidge/backend/cpu/operator/ConvImpl.hpp" #include "aidge/backend/cpu/operator/ConvImpl.hpp"
#include "aidge/backend/cpu/operator/DivImpl.hpp" #include "aidge/backend/cpu/operator/DivImpl.hpp"
#include "aidge/backend/cpu/operator/ErfImpl.hpp"
#include "aidge/backend/cpu/operator/FCImpl.hpp" #include "aidge/backend/cpu/operator/FCImpl.hpp"
#include "aidge/backend/cpu/operator/GatherImpl.hpp"
#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" #include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
#include "aidge/backend/cpu/operator/MatMulImpl.hpp" #include "aidge/backend/cpu/operator/MatMulImpl.hpp"
#include "aidge/backend/cpu/operator/MulImpl.hpp" #include "aidge/backend/cpu/operator/MulImpl.hpp"
#include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/backend/cpu/operator/PadImpl.hpp"
#include "aidge/backend/cpu/operator/PowImpl.hpp" #include "aidge/backend/cpu/operator/PowImpl.hpp"
#include "aidge/backend/cpu/operator/ProducerImpl.hpp" #include "aidge/backend/cpu/operator/ProducerImpl.hpp"
#include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp"
#include "aidge/backend/cpu/operator/ReLUImpl.hpp" #include "aidge/backend/cpu/operator/ReLUImpl.hpp"
#include "aidge/backend/cpu/operator/ReshapeImpl.hpp"
#include "aidge/backend/cpu/operator/ScalingImpl.hpp" #include "aidge/backend/cpu/operator/ScalingImpl.hpp"
#include "aidge/backend/cpu/operator/SliceImpl.hpp" #include "aidge/backend/cpu/operator/SliceImpl.hpp"
#include "aidge/backend/cpu/operator/SqrtImpl.hpp" #include "aidge/backend/cpu/operator/SqrtImpl.hpp"
#include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
#include "aidge/backend/cpu/operator/SubImpl.hpp" #include "aidge/backend/cpu/operator/SubImpl.hpp"
#include "aidge/backend/cpu/operator/TransposeImpl.hpp"
#endif /* AIDGE_CPU_IMPORTS_H_ */ #endif /* AIDGE_CPU_IMPORTS_H_ */
\ No newline at end of file
...@@ -16,7 +16,8 @@ ...@@ -16,7 +16,8 @@
namespace Aidge { namespace Aidge {
inline void *getCPUPtr(std::shared_ptr<Aidge::Data> const &data) { inline void *getCPUPtr(std::shared_ptr<Aidge::Data> const &data) {
return std::static_pointer_cast<Tensor>(data)->getImpl()->rawPtr(); const auto tensor = std::static_pointer_cast<Tensor>(data);
return tensor->getImpl()->hostPtr(tensor->getImplOffset());
} }
} // namespace Aidge } // namespace Aidge
......
...@@ -25,8 +25,6 @@ namespace Aidge { ...@@ -25,8 +25,6 @@ namespace Aidge {
template <class T> template <class T>
class TensorImpl_cpu : public TensorImpl { class TensorImpl_cpu : public TensorImpl {
private: private:
const Tensor &mTensor; // Impl needs to access Tensor information, but is not
// supposed to change it!
/// Pointer to the data and its capacity /// Pointer to the data and its capacity
future_std::span<T> mData; future_std::span<T> mData;
/// If this instance own the data, std::unique_ptr manages it /// If this instance own the data, std::unique_ptr manages it
...@@ -35,88 +33,87 @@ private: ...@@ -35,88 +33,87 @@ private:
public: public:
static constexpr const char *Backend = "cpu"; static constexpr const char *Backend = "cpu";
TensorImpl_cpu(const Tensor &tensor) : TensorImpl(Backend), mTensor(tensor) {} TensorImpl_cpu(DeviceIdx_t device, NbElts_t length) : TensorImpl(Backend, device, length) {}
bool operator==(const TensorImpl &otherImpl) const override final { bool operator==(const TensorImpl &otherImpl) const override final {
const auto& typedOtherImpl = reinterpret_cast<const TensorImpl_cpu<T> &>(otherImpl); const auto& typedOtherImpl = reinterpret_cast<const TensorImpl_cpu<T> &>(otherImpl);
AIDGE_INTERNAL_ASSERT(typedOtherImpl.size() >= mTensor.size()); AIDGE_INTERNAL_ASSERT(typedOtherImpl.size() >= mNbElts);
std::size_t i = 0; std::size_t i = 0;
for (; i < mTensor.size() && for (; i < mNbElts &&
*(mData.data()+i) == *static_cast<const T*>(typedOtherImpl.rawPtr(i)); *(mData.data()+i) == *static_cast<const T*>(typedOtherImpl.rawPtr(i));
++i) { ++i) {
} }
return i == mTensor.size(); return i == mNbElts;
} }
static std::unique_ptr<TensorImpl_cpu> create(const Tensor &tensor) { static std::shared_ptr<TensorImpl_cpu> create(DeviceIdx_t device, NbElts_t length) {
return std::make_unique<TensorImpl_cpu<T>>(tensor); return std::make_shared<TensorImpl_cpu<T>>(device, length);
} }
inline std::size_t size() const noexcept override final { return mData.size(); }
inline std::size_t scalarSize() const noexcept override final { return sizeof(T); } inline std::size_t scalarSize() const noexcept override final { return sizeof(T); }
void setDevice(DeviceIdx_t device) override final {
AIDGE_ASSERT(device == 0, "device cannot be != 0 for CPU backend");
}
void copy(const void *src, NbElts_t length, NbElts_t offset = 0) override final { void copy(const void *src, NbElts_t length, NbElts_t offset = 0) override final {
AIDGE_ASSERT(length <= mData.size() || length <= mTensor.size(), "copy length is above capacity"); const T* srcT = static_cast<const T *>(src);
std::copy(static_cast<const T *>(src), static_cast<const T *>(src) + length, T* dstT = static_cast<T *>(rawPtr(offset));
static_cast<T *>(rawPtr()) + offset);
AIDGE_ASSERT(length <= mData.size() || length <= mNbElts, "copy length is above capacity");
AIDGE_ASSERT(dstT < srcT || dstT >= srcT + length, "overlapping copy is not supported");
std::copy(srcT, srcT + length, dstT);
} }
void copyCast(const void *src, NbElts_t length, const DataType srcDt) override final { void copyCast(const void *src, const DataType srcDt, NbElts_t length, NbElts_t offset = 0) override final {
if (length == 0) { if (length == 0) {
return; return;
} }
AIDGE_ASSERT(length <= mData.size() || length <= mTensor.size(), "copy length is above capacity"); T* dstT = static_cast<T *>(rawPtr(offset));
AIDGE_ASSERT(length <= mData.size() || length <= mNbElts, "copy length is above capacity");
switch (srcDt) switch (srcDt)
{ {
case DataType::Float64: case DataType::Float64:
std::copy(static_cast<const double*>(src), static_cast<const double*>(src) + length, std::copy(static_cast<const double*>(src), static_cast<const double*>(src) + length,
static_cast<T *>(rawPtr())); dstT);
break; break;
case DataType::Float32: case DataType::Float32:
std::copy(static_cast<const float*>(src), static_cast<const float*>(src) + length, std::copy(static_cast<const float*>(src), static_cast<const float*>(src) + length,
static_cast<T *>(rawPtr())); dstT);
break; break;
case DataType::Float16: case DataType::Float16:
std::copy(static_cast<const half_float::half*>(src), static_cast<const half_float::half*>(src) + length, std::copy(static_cast<const half_float::half*>(src), static_cast<const half_float::half*>(src) + length,
static_cast<T *>(rawPtr())); dstT);
break; break;
case DataType::Int64: case DataType::Int64:
std::copy(static_cast<const int64_t*>(src), static_cast<const int64_t*>(src) + length, std::copy(static_cast<const int64_t*>(src), static_cast<const int64_t*>(src) + length,
static_cast<T *>(rawPtr())); dstT);
break; break;
case DataType::UInt64: case DataType::UInt64:
std::copy(static_cast<const uint64_t*>(src), static_cast<const uint64_t*>(src) + length, std::copy(static_cast<const uint64_t*>(src), static_cast<const uint64_t*>(src) + length,
static_cast<T *>(rawPtr())); dstT);
break; break;
case DataType::Int32: case DataType::Int32:
std::copy(static_cast<const int32_t*>(src), static_cast<const int32_t*>(src) + length, std::copy(static_cast<const int32_t*>(src), static_cast<const int32_t*>(src) + length,
static_cast<T *>(rawPtr())); dstT);
break; break;
case DataType::UInt32: case DataType::UInt32:
std::copy(static_cast<const uint32_t*>(src), static_cast<const uint32_t*>(src) + length, std::copy(static_cast<const uint32_t*>(src), static_cast<const uint32_t*>(src) + length,
static_cast<T *>(rawPtr())); dstT);
break; break;
case DataType::Int16: case DataType::Int16:
std::copy(static_cast<const int16_t*>(src), static_cast<const int16_t*>(src) + length, std::copy(static_cast<const int16_t*>(src), static_cast<const int16_t*>(src) + length,
static_cast<T *>(rawPtr())); dstT);
break; break;
case DataType::UInt16: case DataType::UInt16:
std::copy(static_cast<const uint16_t*>(src), static_cast<const uint16_t*>(src) + length, std::copy(static_cast<const uint16_t*>(src), static_cast<const uint16_t*>(src) + length,
static_cast<T *>(rawPtr())); dstT);
break; break;
case DataType::Int8: case DataType::Int8:
std::copy(static_cast<const int8_t*>(src), static_cast<const int8_t*>(src) + length, std::copy(static_cast<const int8_t*>(src), static_cast<const int8_t*>(src) + length,
static_cast<T *>(rawPtr())); dstT);
break; break;
case DataType::UInt8: case DataType::UInt8:
std::copy(static_cast<const uint8_t*>(src), static_cast<const uint8_t*>(src) + length, std::copy(static_cast<const uint8_t*>(src), static_cast<const uint8_t*>(src) + length,
static_cast<T *>(rawPtr())); dstT);
break; break;
default: default:
AIDGE_THROW_OR_ABORT(std::runtime_error, "Unsupported data type."); AIDGE_THROW_OR_ABORT(std::runtime_error, "Unsupported data type.");
...@@ -124,21 +121,20 @@ public: ...@@ -124,21 +121,20 @@ public:
} }
} }
void copyFromDevice(const void *src, NbElts_t length, const std::pair<std::string, DeviceIdx_t>& device) override final { void copyFromDevice(const void *src, const std::pair<std::string, DeviceIdx_t>& device, NbElts_t length, NbElts_t offset = 0) override final {
AIDGE_ASSERT(device.first == Backend, "backend must match"); AIDGE_ASSERT(device.first == Backend, "backend must match");
AIDGE_ASSERT(device.second == 0, "device cannot be != 0 for CPU backend"); AIDGE_ASSERT(device.second == 0, "device cannot be != 0 for CPU backend");
copy(src, length); copy(src, length, offset);
} }
inline void copyFromHost(const void *src, NbElts_t length) override final { inline void copyFromHost(const void *src, NbElts_t length, NbElts_t offset = 0) override final {
copy(src, length); copy(src, length, offset);
} }
void copyToHost(void *dst, NbElts_t length) const override final { void copyToHost(void *dst, NbElts_t length, NbElts_t offset = 0) const override final {
AIDGE_ASSERT(length <= mData.size() || length <= mTensor.size(), "copy length is above capacity"); const T* src = static_cast<const T*>(rawPtr(offset));
const T* src = static_cast<const T*>(rawPtr()); AIDGE_ASSERT(length <= mData.size() || length <= mNbElts, "copy length is above capacity");
std::copy(static_cast<const T *>(src), static_cast<const T *>(src) + length, std::copy(src, src + length, static_cast<T *>(dst));
static_cast<T *>(dst));
} }
void *rawPtr(NbElts_t offset = 0) override final { void *rawPtr(NbElts_t offset = 0) override final {
...@@ -147,7 +143,7 @@ public: ...@@ -147,7 +143,7 @@ public:
}; };
const void *rawPtr(NbElts_t offset = 0) const override final { const void *rawPtr(NbElts_t offset = 0) const override final {
AIDGE_ASSERT(mData.size() >= mTensor.size(), "accessing uninitialized const rawPtr"); AIDGE_ASSERT(mData.size() >= mNbElts, "accessing uninitialized const rawPtr");
return (mData.data() + offset); return (mData.data() + offset);
}; };
...@@ -157,12 +153,12 @@ public: ...@@ -157,12 +153,12 @@ public:
}; };
const void *hostPtr(NbElts_t offset = 0) const override final { const void *hostPtr(NbElts_t offset = 0) const override final {
AIDGE_ASSERT(mData.size() >= mTensor.size(), "accessing uninitialized const hostPtr"); AIDGE_ASSERT(mData.size() >= mNbElts, "accessing uninitialized const hostPtr");
return (mData.data() + offset); return (mData.data() + offset);
}; };
void setRawPtr(void *ptr, NbElts_t length) override final { void setRawPtr(void *ptr, NbElts_t length) override final {
AIDGE_ASSERT(length >= mTensor.size(), "trying to set raw pointer of insufficient capacity"); AIDGE_ASSERT(length >= mNbElts, "trying to set raw pointer of insufficient capacity");
mData = future_std::span<T>(static_cast<T *>(ptr), length); mData = future_std::span<T>(static_cast<T *>(ptr), length);
mDataOwner.reset(); mDataOwner.reset();
}; };
...@@ -171,11 +167,11 @@ public: ...@@ -171,11 +167,11 @@ public:
private: private:
void lazyInit() { void lazyInit() {
if (mData.size() < mTensor.size()) { if (mData.size() < mNbElts) {
// Need more data, a re-allocation will occur // Need more data, a re-allocation will occur
AIDGE_ASSERT(mData.empty() || mDataOwner != nullptr, "trying to enlarge non-owned data"); AIDGE_ASSERT(mData.empty() || mDataOwner != nullptr, "trying to enlarge non-owned data");
mDataOwner.reset(new T[mTensor.size()]); mDataOwner.reset(new T[mNbElts]);
mData = future_std::span<T>(mDataOwner.get(), mTensor.size()); mData = future_std::span<T>(mDataOwner.get(), mNbElts);
} }
} }
}; };
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ERFIMPL_H_
#define AIDGE_CPU_OPERATOR_ERFIMPL_H_
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Erf.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include <memory>
#include <vector>
namespace Aidge {
// class Erf_Op;
// compute kernel registry for forward and backward
class ErfImplForward_cpu
: public Registrable<ErfImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
};
class ErfImplBackward_cpu
: public Registrable<ErfImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
};
class ErfImpl_cpu : public OperatorImpl {
public:
ErfImpl_cpu(const Erf_Op& op) : OperatorImpl(op) {}
static std::unique_ptr<ErfImpl_cpu> create(const Erf_Op& op) {
return std::make_unique<ErfImpl_cpu>(op);
}
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
namespace {
static Registrar<Erf_Op> registrarErfImpl_cpu("cpu", Aidge::ErfImpl_cpu::create);
}
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ERFIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ERFIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_ERFIMPL_FORWARD_KERNEL_H_
#include <cmath>
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/ErfImpl.hpp"
namespace Aidge {
template <class I, class O>
void ErfImpl_cpu_forward_kernel(std::size_t inputLenght,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
for (std::size_t i = 0; i < inputLenght; ++i) {
output[i] = std::erf(input[i]);
}
}
namespace {
static Registrar<ErfImplForward_cpu> registrarErfImplForward_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::ErfImpl_cpu_forward_kernel<float, float>);
static Registrar<ErfImplForward_cpu> registrarErfImplForward_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::ErfImpl_cpu_forward_kernel<int, int>);
static Registrar<ErfImplForward_cpu> registrarErfImplForward_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::ErfImpl_cpu_forward_kernel<double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ERFIMPL_FORWARD_KERNEL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_GATHERIMPL_H_
#define AIDGE_CPU_OPERATOR_GATHERIMPL_H_
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Gather.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include <memory>
#include <vector>
namespace Aidge {
// class Gather_Op;
// compute kernel registry for forward and backward
class GatherImplForward_cpu
: public Registrable<GatherImplForward_cpu, std::tuple<DataType, DataType>, void(const typename Gather_Op::Attrs&, const std::vector<DimSize_t>&, const void*, void*)> {
};
class GatherImplBackward_cpu
: public Registrable<GatherImplBackward_cpu, std::tuple<DataType, DataType>, void(const typename Gather_Op::Attrs&, const std::vector<DimSize_t>&, const void*, void*)> {
};
class GatherImpl_cpu : public OperatorImpl {
public:
GatherImpl_cpu(const Gather_Op& op) : OperatorImpl(op) {}
static std::unique_ptr<GatherImpl_cpu> create(const Gather_Op& op) {
return std::make_unique<GatherImpl_cpu>(op);
}
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
namespace {
static Registrar<Gather_Op> registrarGatherImpl_cpu("cpu", Aidge::GatherImpl_cpu::create);
}
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_GATHERIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_GATHERIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_GATHERIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include <cstddef>
#include <cmath>
#include "aidge/data/Data.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/operator/GatherImpl.hpp"
namespace Aidge {
template <class I, class O>
void GatherImpl_cpu_forward_kernel(const typename Gather_Op::Attrs& attrs, const std::vector<DimSize_t>& inputDims, const void* input_, void* output_)
{
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
const std::size_t axisIdx = std::get<2>(attrs)>=0 ?
std::get<2>(attrs) :
static_cast<std::size_t>(std::get<2>(attrs)) + inputDims.size();
std::size_t postAxisElems = 1;
for (std::size_t i = axisIdx + 1; i < inputDims.size(); ++i) {
postAxisElems *= inputDims[i];
}
std::size_t preAxisElems = 1;
for (std::size_t i = 0; i < axisIdx; ++i) {
preAxisElems *= inputDims[i];
}
const std::vector<std::int64_t> indices = std::get<0>(attrs);
for (std::size_t i=0; i<preAxisElems; ++i)
{
for(std::size_t j=0; j<indices.size(); ++j)
{
const std::size_t idx = indices[j] >= 0 ? indices[j] : static_cast<std::size_t>(indices[j]) + inputDims[axisIdx];
const I* startPtr = std::next(input, i * postAxisElems * inputDims[axisIdx] + idx * postAxisElems);
std::copy_n(startPtr, postAxisElems, output);
output += postAxisElems;
}
}
}
namespace {
static Registrar<GatherImplForward_cpu> registrarGatherImplForward_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::GatherImpl_cpu_forward_kernel<float, float>);
static Registrar<GatherImplForward_cpu> registrarGatherImplForward_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::GatherImpl_cpu_forward_kernel<int, int>);
static Registrar<GatherImplForward_cpu> registrarGatherImplForward_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::GatherImpl_cpu_forward_kernel<double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_GATHERIMPL_FORWARD_KERNEL_H_ */
...@@ -25,7 +25,7 @@ void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Attrs& attrs, ...@@ -25,7 +25,7 @@ void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Attrs& attrs,
const I* input = static_cast<const I*>(input_); const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_); O* output = static_cast<O*>(output_);
I negativeSlope = static_cast<I>(std::get<0>(attrs)); const I negativeSlope = static_cast<const I>(std::get<0>(attrs));
for (std::size_t i = 0; i < inputLenght; ++i) { for (std::size_t i = 0; i < inputLenght; ++i) {
output[i] = input[i] >= 0 ? input[i] : input[i] * negativeSlope; output[i] = input[i] >= 0 ? input[i] : input[i] * negativeSlope;
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_H_
#define AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/ReduceMean.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// class ReduceMean_Op;
// compute kernel registry for forward and backward
// DIM 1
class ReduceMeanImpl1DForward_cpu
: public Registrable<ReduceMeanImpl1DForward_cpu,
std::tuple<DataType, DataType>,
void(const ReduceMean_Op<1>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
class ReduceMeanImpl1DBackward_cpu
: public Registrable<ReduceMeanImpl1DBackward_cpu,
std::tuple<DataType, DataType>,
void(const ReduceMean_Op<1>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
// DIM 2
class ReduceMeanImpl2DForward_cpu
: public Registrable<ReduceMeanImpl2DForward_cpu,
std::tuple<DataType, DataType>,
void(const ReduceMean_Op<2>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
class ReduceMeanImpl2DBackward_cpu
: public Registrable<ReduceMeanImpl2DBackward_cpu,
std::tuple<DataType, DataType>,
void(const ReduceMean_Op<2>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
// DIM 3
class ReduceMeanImpl3DForward_cpu
: public Registrable<ReduceMeanImpl3DForward_cpu,
std::tuple<DataType, DataType>,
void(const ReduceMean_Op<3>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
class ReduceMeanImpl3DBackward_cpu
: public Registrable<ReduceMeanImpl3DBackward_cpu,
std::tuple<DataType, DataType>,
void(const ReduceMean_Op<3>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
class ReduceMeanImpl1D_cpu : public OperatorImpl {
public:
ReduceMeanImpl1D_cpu(const ReduceMean_Op<1>& op) : OperatorImpl(op) {}
static std::unique_ptr<ReduceMeanImpl1D_cpu> create(const ReduceMean_Op<1> &op) {
return std::make_unique<ReduceMeanImpl1D_cpu>(op);
}
public:
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
class ReduceMeanImpl2D_cpu : public OperatorImpl {
public:
ReduceMeanImpl2D_cpu(const ReduceMean_Op<2>& op) : OperatorImpl(op) {}
static std::unique_ptr<ReduceMeanImpl2D_cpu> create(const ReduceMean_Op<2> &op) {
return std::make_unique<ReduceMeanImpl2D_cpu>(op);
}
public:
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
class ReduceMeanImpl3D_cpu : public OperatorImpl {
public:
ReduceMeanImpl3D_cpu(const ReduceMean_Op<3>& op) : OperatorImpl(op) {}
static std::unique_ptr<ReduceMeanImpl3D_cpu> create(const ReduceMean_Op<3> &op) {
return std::make_unique<ReduceMeanImpl3D_cpu>(op);
}
public:
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
namespace {
// add cpu backend to ReduceMean_Op<2> implementation registry
static Registrar<ReduceMean_Op<1>> registrarReduceMeanImpl1D_cpu("cpu", Aidge::ReduceMeanImpl1D_cpu::create);
static Registrar<ReduceMean_Op<2>> registrarReduceMeanImpl2D_cpu("cpu", Aidge::ReduceMeanImpl2D_cpu::create);
static Registrar<ReduceMean_Op<3>> registrarReduceMeanImpl3D_cpu("cpu", Aidge::ReduceMeanImpl3D_cpu::create);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_FORWARD_KERNEL_H_
#include <cstddef>
#include <algorithm> // std::copy, std::for_each
#include <numeric> //std::accumulate
#include <functional> //std::multiplies
#include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/operator/ReduceMean.hpp"
#include "aidge/utils/Registrar.hpp"
namespace Aidge {
template <class I, class O, DimSize_t DIM>
void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op<DIM>::Attrs& attrs,
const std::vector<DimSize_t>& inputDims,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
const std::size_t nb_dims = inputDims.size();
const std::size_t totalElements = std::accumulate(inputDims.cbegin(), inputDims.cend(), 1, std::multiplies<std::size_t>());
if (DIM == 1) {
const std::size_t stride_pre = std::accumulate(inputDims.cbegin(), inputDims.cbegin() + std::get<0>(attrs)[0], 1, std::multiplies<std::size_t>());
const std::size_t stride_post = std::accumulate(inputDims.crbegin(), inputDims.crbegin() + nb_dims -1 - std::get<0>(attrs)[0], 1, std::multiplies<std::size_t>());
const std::size_t dim_i = inputDims[std::get<0>(attrs)[0]];
for (std::size_t pre = 0; pre < stride_pre; ++pre) {
for (std::size_t post = 0; post < stride_post; ++post) {
const std::size_t idx_i = pre * dim_i * stride_post + post;
const std::size_t idx_o = pre * stride_post + post;
output[idx_o] = input[idx_i];
for (std::size_t i = 1; i < dim_i; ++i) {
output[idx_o] += input[idx_i + i*stride_post];
}
output[idx_o] /= dim_i;
}
}
} else {
std::size_t outputElements = totalElements;
std::size_t *stride_post = new std::size_t[nb_dims];
stride_post[nb_dims - 1] = 1;
for (std::size_t i = nb_dims-2; i != static_cast<std::size_t>(-1); --i) {
stride_post[i] = stride_post[i+1]*inputDims[i+1];
}
std::size_t *stride_pre = new std::size_t[nb_dims];
stride_pre[0] = 1;
for (std::size_t i = 1; i < nb_dims; ++i) {
stride_pre[i] = stride_pre[i-1]*inputDims[i-1];
}
const I* inputAccumulation = input;
I* outputAccumulation = nullptr;
for (const auto& axisInt : std::get<0>(attrs)) {
const std::size_t a = static_cast<std::size_t>(axisInt);
outputElements /= inputDims[a];
outputAccumulation = new I[outputElements];
const std::size_t dim_i = inputDims[a];
for (std::size_t pre = 0; pre < stride_pre[a]; ++pre) {
for (std::size_t post = 0; post < stride_post[a]; ++post) {
const std::size_t idx_i = pre * dim_i * stride_post[a] + post;
const std::size_t idx_o = pre * stride_post[a] + post;
outputAccumulation[idx_o] = inputAccumulation[idx_i];
for (std::size_t i = 1; i < dim_i; ++i) {
outputAccumulation[idx_o] += inputAccumulation[idx_i + i*stride_post[a]];
}
}
}
std::for_each(stride_pre+a+1, stride_pre+nb_dims, [dim_i] (std::size_t& val) { val /= dim_i; });
if (inputAccumulation != input) {
delete[] inputAccumulation;
}
inputAccumulation = outputAccumulation;
}
// Copy elements from inputAccumulation to output while dividing by divisor
I divisor = totalElements / outputElements;
std::transform(inputAccumulation, inputAccumulation + outputElements, output,
[divisor](int element) { return element / divisor; });
if (outputAccumulation) {
delete[] outputAccumulation;
}
delete[] stride_post;
delete[] stride_pre;
}
}
namespace {
// DIM = 1
static Registrar<ReduceMeanImpl1DForward_cpu> registrarReduceMeanImplForward_1D_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float,1>);
static Registrar<ReduceMeanImpl1DForward_cpu> registrarReduceMeanImplForward_1D_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int,1>);
static Registrar<ReduceMeanImpl1DForward_cpu> registrarReduceMeanImplForward_1D_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double,1>);
// DIM = 2
static Registrar<ReduceMeanImpl2DForward_cpu> registrarReduceMeanImplForward_2D_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float,2>);
static Registrar<ReduceMeanImpl2DForward_cpu> registrarReduceMeanImplForward_2D_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int,2>);
static Registrar<ReduceMeanImpl2DForward_cpu> registrarReduceMeanImplForward_2D_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double,2>);
// DIM = 3
static Registrar<ReduceMeanImpl3DForward_cpu> registrarReduceMeanImplForward_3D_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float,3>);
static Registrar<ReduceMeanImpl3DForward_cpu> registrarReduceMeanImplForward_3D_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int,3>);
static Registrar<ReduceMeanImpl3DForward_cpu> registrarReduceMeanImplForward_3D_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double,3>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_FORWARD_KERNEL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_RESHAPEIMPL_H_
#define AIDGE_CPU_OPERATOR_RESHAPEIMPL_H_
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Reshape.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include <memory>
#include <vector>
namespace Aidge {
// class Reshape_Op;
// compute kernel registry for forward and backward
class ReshapeImplForward_cpu
: public Registrable<ReshapeImplForward_cpu, std::tuple<DataType, DataType>, void(std::size_t, const void*, void*)> {
};
class ReshapeImplBackward_cpu
: public Registrable<ReshapeImplBackward_cpu, std::tuple<DataType, DataType>, void(std::size_t, const void*, void*)> {
};
class ReshapeImpl_cpu : public OperatorImpl {
public:
ReshapeImpl_cpu(const Reshape_Op& op) : OperatorImpl(op) {}
static std::unique_ptr<ReshapeImpl_cpu> create(const Reshape_Op& op) {
return std::make_unique<ReshapeImpl_cpu>(op);
}
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
namespace {
static Registrar<Reshape_Op> registrarReshapeImpl_cpu("cpu", Aidge::ReshapeImpl_cpu::create);
}
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_RESHAPEIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_RESHAPEIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_RESHAPEIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include <cmath>
#include "aidge/backend/cpu/operator/ReshapeImpl.hpp"
namespace Aidge {
template <class I, class O>
void ReshapeImpl_cpu_forward_kernel(std::size_t inputLength,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
std::copy_n(input, inputLength, output);
}
namespace {
static Registrar<ReshapeImplForward_cpu> registrarReshapeImplForward_cpu_Float32(
{DataType::Float32, DataType::Float32},
Aidge::ReshapeImpl_cpu_forward_kernel<float, float>);
static Registrar<ReshapeImplForward_cpu> registrarReshapeImplForward_cpu_Int32(
{DataType::Int32, DataType::Int32},
Aidge::ReshapeImpl_cpu_forward_kernel<int, int>);
static Registrar<ReshapeImplForward_cpu> registrarReshapeImplForward_cpu_Float64(
{DataType::Float64, DataType::Float64},
Aidge::ReshapeImpl_cpu_forward_kernel<double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_RESHAPEIMPL_FORWARD_KERNEL_H_ */
...@@ -35,7 +35,7 @@ void SliceImpl_cpu_forward_kernel(const typename Slice_Op::Attrs& attrs, ...@@ -35,7 +35,7 @@ void SliceImpl_cpu_forward_kernel(const typename Slice_Op::Attrs& attrs,
const std::int64_t axis_ = std::get<2>(attrs)[i]; const std::int64_t axis_ = std::get<2>(attrs)[i];
const std::int64_t start_ = std::get<0>(attrs)[i]; const std::int64_t start_ = std::get<0>(attrs)[i];
const std::int64_t end_ = std::get<1>(attrs)[i]; const std::int64_t end_ = std::get<1>(attrs)[i];
const std::size_t axis = axis_ >= 0 ? axis_ : static_cast<std::size_t>(axis_ + static_cast<std::int32_t>(inputDims.size())); const std::size_t axis = axis_ >= 0 ? axis_ : static_cast<std::size_t>(axis_) + inputDims.size();
const std::size_t start = start_ >= 0 ? start_ : start_ + inputDims[axis]; const std::size_t start = start_ >= 0 ? start_ : start_ + inputDims[axis];
const std::size_t end = end_ >= 0 ? end_ : end_ + inputDims[axis]; const std::size_t end = end_ >= 0 ? end_ : end_ + inputDims[axis];
std::size_t stride = 1; std::size_t stride = 1;
......
...@@ -25,10 +25,10 @@ namespace Aidge { ...@@ -25,10 +25,10 @@ namespace Aidge {
// compute kernel registry for forward and backward // compute kernel registry for forward and backward
class SoftmaxImplForward_cpu class SoftmaxImplForward_cpu
: public Registrable<SoftmaxImplForward_cpu, std::tuple<DataType, DataType>, void(const DimSize_t, const DimSize_t, const DimSize_t, const void*, void*)> { : public Registrable<SoftmaxImplForward_cpu, std::tuple<DataType, DataType>, void(std::size_t, const std::vector<DimSize_t>&, const void*, void*)> {
}; };
class SoftmaxImplBackward_cpu class SoftmaxImplBackward_cpu
: public Registrable<SoftmaxImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> { : public Registrable<SoftmaxImplBackward_cpu, std::tuple<DataType, DataType>, void(std::size_t, const std::vector<DimSize_t>&, const void*, void*)> {
}; };
class SoftmaxImpl_cpu : public OperatorImpl { class SoftmaxImpl_cpu : public OperatorImpl {
......
...@@ -23,30 +23,33 @@ ...@@ -23,30 +23,33 @@
namespace Aidge { namespace Aidge {
template <class I, class O> template <class I, class O>
void SoftmaxImpl_cpu_forward_kernel(const DimSize_t batchSize, void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSize_t>& inputDims, const void* input_, void* output_)
const DimSize_t channelSize, {
const DimSize_t featureSize,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_); const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_); O* output = static_cast<O*>(output_);
for (std::size_t batch = 0; batch < batchSize; ++batch) { std::size_t postAxisElems = 1;
for (std::size_t feature = 0; feature < featureSize; ++feature) { for (std::size_t i = axisIdx + 1; i < inputDims.size(); ++i) {
std::size_t ioIndex = batch*channelSize*featureSize + feature; postAxisElems *= inputDims[i];
}
std::size_t preAxisElems = 1;
for (std::size_t i = 0; i < axisIdx; ++i) {
preAxisElems *= inputDims[i];
}
I sum(0.0); for (std::size_t i = 0; i < preAxisElems; ++i) {
for (std::size_t ch = 0; ch < channelSize; ++ch) { for (std::size_t j = 0; j < postAxisElems; ++j) {
output[ioIndex] = std::exp(input[ioIndex]); // Calculate sum of exponentials within the axis
sum += output[ioIndex]; I sumExp = 0;
ioIndex+=featureSize; for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) {
std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
sumExp += std::exp(input[inIdx]);
} }
ioIndex = batch*channelSize*featureSize + feature; // Calculate softmax for the current slice along the axis
for (std::size_t ch = 0; ch < channelSize; ++ch) { for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) {
output[ioIndex] /= sum; std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
ioIndex += featureSize; output[inIdx] = std::exp(input[inIdx]) / sumExp;
} }
} }
} }
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_TransposeIMPL_H_
#define AIDGE_CPU_OPERATOR_TransposeIMPL_H_
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Transpose.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include <memory>
#include <vector>
namespace Aidge {
// class Transpose_Op;
// compute kernel registry for forward and backward
class TransposeImpl2DForward_cpu
: public Registrable<TransposeImpl2DForward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<2>::Attrs& attrs, const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const void*, void*)> {
};
class TransposeImpl3DForward_cpu
: public Registrable<TransposeImpl3DForward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<3>::Attrs& attrs, const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const void*, void*)> {
};
class TransposeImpl4DForward_cpu
: public Registrable<TransposeImpl4DForward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<4>::Attrs& attrs, const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const void*, void*)> {
};
class TransposeImpl5DForward_cpu
: public Registrable<TransposeImpl5DForward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<5>::Attrs& attrs, const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const void*, void*)> {
};
class TransposeImpl6DForward_cpu
: public Registrable<TransposeImpl6DForward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<6>::Attrs& attrs, const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const void*, void*)> {
};
class TransposeImpl2DBackward_cpu
: public Registrable<TransposeImpl2DBackward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<2>::Attrs& attrs, const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const void*, void*)> {
};
class TransposeImpl3DBackward_cpu
: public Registrable<TransposeImpl3DBackward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<3>::Attrs& attrs, const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const void*, void*)> {
};
class TransposeImpl4DBackward_cpu
: public Registrable<TransposeImpl4DBackward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<4>::Attrs& attrs, const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const void*, void*)> {
};
class TransposeImpl5DBackward_cpu
: public Registrable<TransposeImpl5DBackward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<5>::Attrs& attrs, const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const void*, void*)> {
};
class TransposeImpl6DBackward_cpu
: public Registrable<TransposeImpl6DBackward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<6>::Attrs& attrs, const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const void*, void*)> {
};
class TransposeImpl2D_cpu : public OperatorImpl {
public:
TransposeImpl2D_cpu(const Transpose_Op<2>& op) : OperatorImpl(op) {}
static std::unique_ptr<TransposeImpl2D_cpu> create(const Transpose_Op<2>& op) {
return std::make_unique<TransposeImpl2D_cpu>(op);
}
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
class TransposeImpl3D_cpu : public OperatorImpl {
public:
TransposeImpl3D_cpu(const Transpose_Op<3>& op) : OperatorImpl(op) {}
static std::unique_ptr<TransposeImpl3D_cpu> create(const Transpose_Op<3>& op) {
return std::make_unique<TransposeImpl3D_cpu>(op);
}
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
class TransposeImpl4D_cpu : public OperatorImpl {
public:
TransposeImpl4D_cpu(const Transpose_Op<4>& op) : OperatorImpl(op) {}
static std::unique_ptr<TransposeImpl4D_cpu> create(const Transpose_Op<4>& op) {
return std::make_unique<TransposeImpl4D_cpu>(op);
}
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
class TransposeImpl5D_cpu : public OperatorImpl {
public:
TransposeImpl5D_cpu(const Transpose_Op<5>& op) : OperatorImpl(op) {}
static std::unique_ptr<TransposeImpl5D_cpu> create(const Transpose_Op<5>& op) {
return std::make_unique<TransposeImpl5D_cpu>(op);
}
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
class TransposeImpl6D_cpu : public OperatorImpl {
public:
TransposeImpl6D_cpu(const Transpose_Op<6>& op) : OperatorImpl(op) {}
static std::unique_ptr<TransposeImpl6D_cpu> create(const Transpose_Op<6>& op) {
return std::make_unique<TransposeImpl6D_cpu>(op);
}
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
namespace {
static Registrar<Transpose_Op<2>> registrarTransposeImpl2D_cpu("cpu", Aidge::TransposeImpl2D_cpu::create);
static Registrar<Transpose_Op<3>> registrarTransposeImpl3D_cpu("cpu", Aidge::TransposeImpl3D_cpu::create);
static Registrar<Transpose_Op<4>> registrarTransposeImpl4D_cpu("cpu", Aidge::TransposeImpl4D_cpu::create);
static Registrar<Transpose_Op<5>> registrarTransposeImpl5D_cpu("cpu", Aidge::TransposeImpl5D_cpu::create);
static Registrar<Transpose_Op<6>> registrarTransposeImpl6D_cpu("cpu", Aidge::TransposeImpl6D_cpu::create);
}
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_TransposeIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_TRANSPOSEIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_TRANSPOSEIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include <cstddef>
#include <cmath>
#include "aidge/data/Data.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/operator/TransposeImpl.hpp"
namespace Aidge {
template <class I, class O, DimSize_t DIM>
void TransposeImpl_cpu_forward_kernel( const typename Transpose_Op<DIM>::Attrs& attrs, const std::vector<DimSize_t>& inputDims, const std::vector<DimSize_t>& outputDims, const void* input_, void* output_)
{
O* output = static_cast<O*>(output_);
const I* input = static_cast<const I*>(input_);
// Compute total number of elements in the input array
size_t totalElements = 1;
for (size_t dimSize : inputDims) {
totalElements *= dimSize;
}
std::vector<std::size_t> outStrides(DIM, 1);
for (size_t i = 0; i < DIM; ++i) {
for (size_t j = i+1; j < DIM; ++j)
{
outStrides[i] *= outputDims[j];
}
}
std::vector<size_t> indices(outputDims.size(), 0);
for (size_t i = 0; i < totalElements; ++i) {
size_t idx = 0;
// Permute indices based on OutputDimsOrder attr
std::vector<size_t> permutedIndices(DIM);
for (size_t j = 0; j < DIM; ++j) {
permutedIndices[j] = indices[std::get<0>(attrs)[j]];
}
for (int j = DIM -1; j >=0; --j) {
idx += permutedIndices[j] * outStrides[j];
}
// Copy the value in output
output[idx] = input[i];
// Update indices for the next iteration
for (int j = DIM - 1; j >= 0; --j) {
if (indices[j] < inputDims[j] - 1) {
indices[j]++;
break;
} else {
indices[j] = 0;
}
}
}
}
namespace {
// DIM = 2
static Registrar<TransposeImpl2DForward_cpu> registrarTransposeImpl2DForward_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::TransposeImpl_cpu_forward_kernel<float, float, 2>);
static Registrar<TransposeImpl2DForward_cpu> registrarTransposeImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::TransposeImpl_cpu_forward_kernel<int, int, 2>);
static Registrar<TransposeImpl2DForward_cpu> registrarTransposeImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::TransposeImpl_cpu_forward_kernel<double, double, 2>);
// DIM = 3
static Registrar<TransposeImpl3DForward_cpu> registrarTransposeImpl3DForward_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::TransposeImpl_cpu_forward_kernel<float, float, 3>);
static Registrar<TransposeImpl3DForward_cpu> registrarTransposeImpl3DForward_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::TransposeImpl_cpu_forward_kernel<int, int, 3>);
static Registrar<TransposeImpl3DForward_cpu> registrarTransposeImpl3DForward_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::TransposeImpl_cpu_forward_kernel<double, double, 3>);
// DIM = 4
static Registrar<TransposeImpl4DForward_cpu> registrarTransposeImpl4DForward_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::TransposeImpl_cpu_forward_kernel<float, float, 4>);
static Registrar<TransposeImpl4DForward_cpu> registrarTransposeImpl4DForward_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::TransposeImpl_cpu_forward_kernel<int, int, 4>);
static Registrar<TransposeImpl4DForward_cpu> registrarTransposeImpl4DForward_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::TransposeImpl_cpu_forward_kernel<double, double, 4>);
// DIM = 5
static Registrar<TransposeImpl5DForward_cpu> registrarTransposeImpl5DForward_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::TransposeImpl_cpu_forward_kernel<float, float, 5>);
static Registrar<TransposeImpl5DForward_cpu> registrarTransposeImpl5DForward_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::TransposeImpl_cpu_forward_kernel<int, int, 5>);
static Registrar<TransposeImpl5DForward_cpu> registrarTransposeImpl5DForward_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::TransposeImpl_cpu_forward_kernel<double, double, 5>);
// DIM = 6
static Registrar<TransposeImpl6DForward_cpu> registrarTransposeImpl6DForward_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::TransposeImpl_cpu_forward_kernel<float, float, 6>);
static Registrar<TransposeImpl6DForward_cpu> registrarTransposeImpl6DForward_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::TransposeImpl_cpu_forward_kernel<int, int, 6>);
static Registrar<TransposeImpl6DForward_cpu> registrarTransposeImpl6DForward_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::TransposeImpl_cpu_forward_kernel<double, double, 6>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_TRANSPOSEIMPL_FORWARD_KERNEL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <vector>
#include "aidge/operator/Erf.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/operator/ErfImpl.hpp"
#include "aidge/backend/cpu/operator/ErfImpl_forward_kernels.hpp"
Aidge::NbElts_t Aidge::ErfImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
// this implementation can be in-place
return 0;
}
void Aidge::ErfImpl_cpu::forward() {
// Find the correct kernel type
auto kernelFunc = Registrar<ErfImplForward_cpu>::create({
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
// Call kernel
kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment