Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mszczep/aidge_backend_cpu
  • eclipse/aidge/aidge_backend_cpu
  • hrouis/aidge_backend_cpu
  • oantoni/aidge_backend_cpu
  • raphaelmillet/aidge_backend_cpu
  • cguillon/aidge_backend_cpu
  • jeromeh/aidge_backend_cpu
  • axelfarr/aidge_backend_cpu
  • noamzerah/aidge_backend_cpu
  • silvanosky/aidge_backend_cpu
  • maab05/aidge_backend_cpu
  • lucaslopez/aidge_backend_cpu_ll
  • farnez/aidge_backend_cpu
13 results
Show changes
Commits on Source (100)
Showing
with 363 additions and 490 deletions
...@@ -143,72 +143,72 @@ build:ubuntu_python: ...@@ -143,72 +143,72 @@ build:ubuntu_python:
paths: paths:
- venv/ - venv/
build:windows_cpp: # build:windows_cpp:
stage: build # stage: build
needs: [] # needs: []
tags: # tags:
- windows # - windows
image: buildtools # image: buildtools
before_script: # before_script:
# Install Chocolatey # # Install Chocolatey
- Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1')) # - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
# Install dependencies # # Install dependencies
- choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y # - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y
- choco install git -Y # - choco install git -Y
- choco install python -Y # - choco install python -Y
# Update PATH # # Update PATH
- $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User") # - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
script: # script:
# Download dependencies # # Download dependencies
# aidge_core # # aidge_core
- $DEPENDENCY_NAME="aidge_core" # - $DEPENDENCY_NAME="aidge_core"
- $DEPENDENCY_JOB="build:windows_cpp" # - $DEPENDENCY_JOB="build:windows_cpp"
- !reference [.download_dependency_windows, script] # - !reference [.download_dependency_windows, script]
- Remove-Item .\build_cpp\ -Recurse -Force -ErrorAction Ignore # - Remove-Item .\build_cpp\ -Recurse -Force -ErrorAction Ignore
- $env:CMAKE_PREFIX_PATH = '../install_cpp' # - $env:CMAKE_PREFIX_PATH = '../install_cpp'
- mkdir -p build_cpp # - mkdir -p build_cpp
- cd build_cpp # - cd build_cpp
- cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug .. # - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug ..
- cmake --build . -j2 # - cmake --build . -j2
- cmake --install . --config Debug # - cmake --install . --config Debug
artifacts: # artifacts:
expire_in: 1 week # expire_in: 1 week
paths: # paths:
- build_cpp/ # - build_cpp/
- install_cpp/ # - install_cpp/
build:windows_python: # build:windows_python:
stage: build # stage: build
needs: [] # needs: []
tags: # tags:
- windows # - windows
image: buildtools # image: buildtools
before_script: # before_script:
# Install Chocolatey # # Install Chocolatey
- Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1')) # - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
# Install dependencies # # Install dependencies
- choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y # - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y
- choco install git -Y # - choco install git -Y
- choco install python -Y # - choco install python -Y
# Update PATH # # Update PATH
- $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User") # - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
script: # script:
# Download dependencies # # Download dependencies
# aidge_core (Python) # # aidge_core (Python)
- $DEPENDENCY_NAME="aidge_core" # - $DEPENDENCY_NAME="aidge_core"
- $DEPENDENCY_JOB="build:windows_python" # - $DEPENDENCY_JOB="build:windows_python"
- !reference [.download_dependency_windows, script] # - !reference [.download_dependency_windows, script]
- python -m pip install virtualenv # - python -m pip install virtualenv
- virtualenv venv # - virtualenv venv
- venv\Scripts\Activate.ps1 # - venv\Scripts\Activate.ps1
- python -m pip install -r requirements.txt # - python -m pip install -r requirements.txt
- python -m pip install . # - python -m pip install .
artifacts: # artifacts:
expire_in: 1 week # expire_in: 1 week
paths: # paths:
- venv/ # - venv/
...@@ -26,23 +26,23 @@ test:ubuntu_python: ...@@ -26,23 +26,23 @@ test:ubuntu_python:
reports: reports:
junit: ${CI_PROJECT_NAME}/xmlrunner-results.xml junit: ${CI_PROJECT_NAME}/xmlrunner-results.xml
test:windows_cpp: # test:windows_cpp:
stage: test # stage: test
needs: ["build:windows_cpp"] # needs: ["build:windows_cpp"]
tags: # tags:
- windows # - windows
image: buildtools # image: buildtools
before_script: # before_script:
# Install Chocolatey # # Install Chocolatey
- Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1')) # - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
# Install dependencies # # Install dependencies
- choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y # - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y
- choco install python -Y # - choco install python -Y
# Update PATH # # Update PATH
- $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User") # - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
script: # script:
- cd build_cpp # - cd build_cpp
- ctest --output-junit ctest-results.xml --output-on-failure # - ctest --output-junit ctest-results.xml --output-on-failure
artifacts: # artifacts:
reports: # reports:
junit: build_cpp/ctest-results.xml # junit: build_cpp/ctest-results.xml
...@@ -15,7 +15,7 @@ import aidge_backend_cpu ...@@ -15,7 +15,7 @@ import aidge_backend_cpu
from functools import reduce from functools import reduce
import numpy as np import numpy as np
class test_recipies(unittest.TestCase): class test_recipes(unittest.TestCase):
def setUp(self): def setUp(self):
pass pass
...@@ -33,12 +33,9 @@ class test_recipies(unittest.TestCase): ...@@ -33,12 +33,9 @@ class test_recipies(unittest.TestCase):
conv = aidge_core.Conv2D(1, 1, [3, 3], name="Conv0") conv = aidge_core.Conv2D(1, 1, [3, 3], name="Conv0")
bn = aidge_core.BatchNorm2D(1, name="Add0") bn = aidge_core.BatchNorm2D(1, name="Add0")
graph_view = aidge_core.sequential([conv, bn]) graph_view = aidge_core.sequential([input_node, conv, bn])
# Add random values to conv and BatchNorm parameters # Add random values to conv and BatchNorm parameters
input_node.add_child(graph_view)
input_node.get_operator().set_datatype(aidge_core.DataType.Float32)
input_node.get_operator().set_backend("cpu")
graph_view.set_datatype(aidge_core.DataType.Float32) graph_view.set_datatype(aidge_core.DataType.Float32)
graph_view.set_backend("cpu") graph_view.set_backend("cpu")
......
...@@ -40,18 +40,14 @@ class test_scheduler(unittest.TestCase): ...@@ -40,18 +40,14 @@ class test_scheduler(unittest.TestCase):
input_data = np.array([0]).astype(np.float32) input_data = np.array([0]).astype(np.float32)
input_tensor = aidge_core.Tensor(input_data) input_tensor = aidge_core.Tensor(input_data)
input_node = aidge_core.Producer(input_tensor, "X")
graph_view = aidge_core.sequential([ graph_view = aidge_core.sequential([
aidge_core.Producer(input_tensor, "X"),
aidge_core.FC(1, 50, name='0'), aidge_core.FC(1, 50, name='0'),
aidge_core.FC(50, 50, name='1'), aidge_core.FC(50, 50, name='1'),
aidge_core.FC(50, 10, name='2'), aidge_core.FC(50, 10, name='2'),
]) ])
EXPECTED_SCHEDULE = ['0', '1', '2'] EXPECTED_SCHEDULE = ['0', '1', '2']
input_node.add_child(graph_view)
input_node.get_operator().set_datatype(aidge_core.DataType.Float32)
input_node.get_operator().set_backend("cpu")
graph_view.set_datatype(aidge_core.DataType.Float32) graph_view.set_datatype(aidge_core.DataType.Float32)
graph_view.set_backend("cpu") graph_view.set_backend("cpu")
...@@ -60,15 +56,17 @@ class test_scheduler(unittest.TestCase): ...@@ -60,15 +56,17 @@ class test_scheduler(unittest.TestCase):
scheduler = aidge_core.SequentialScheduler(graph_view) scheduler = aidge_core.SequentialScheduler(graph_view)
scheduler.generate_scheduling() scheduler.generate_scheduling()
self.assertListEqual([i.name() for i in scheduler.get_static_scheduling()], EXPECTED_SCHEDULE) self.assertEqual(len(scheduler.get_static_scheduling()), 10)
# Do not care about the order of execution of the producers
self.assertListEqual([i.name() for i in scheduler.get_static_scheduling()[-3:]], EXPECTED_SCHEDULE)
def test_parallel_scheduling(self): def test_parallel_scheduling(self):
input_data = np.array([0]).astype(np.float32) input_data = np.array([0]).astype(np.float32)
input_tensor = aidge_core.Tensor(input_data) input_tensor = aidge_core.Tensor(input_data)
input_node = aidge_core.Producer(input_tensor, "X")
graph_view = aidge_core.sequential([ graph_view = aidge_core.sequential([
aidge_core.Producer(input_tensor, "X"),
aidge_core.FC(1, 50, name='0'), aidge_core.FC(1, 50, name='0'),
aidge_core.parallel([aidge_core.FC(50, 50, name='1'), aidge_core.FC(50, 50, name='3')]), aidge_core.parallel([aidge_core.FC(50, 50, name='1'), aidge_core.FC(50, 50, name='3')]),
aidge_core.Add(2, name='2'), aidge_core.Add(2, name='2'),
...@@ -76,9 +74,6 @@ class test_scheduler(unittest.TestCase): ...@@ -76,9 +74,6 @@ class test_scheduler(unittest.TestCase):
EXPECTED_SCHEDULE = [['0', '1', '3', '2'], ['0', '3', '1', '2']] # Both scheduling are valid ! EXPECTED_SCHEDULE = [['0', '1', '3', '2'], ['0', '3', '1', '2']] # Both scheduling are valid !
input_node.add_child(graph_view)
input_node.get_operator().set_datatype(aidge_core.DataType.Float32)
input_node.get_operator().set_backend("cpu")
graph_view.set_datatype(aidge_core.DataType.Float32) graph_view.set_datatype(aidge_core.DataType.Float32)
graph_view.set_backend("cpu") graph_view.set_backend("cpu")
...@@ -87,7 +82,9 @@ class test_scheduler(unittest.TestCase): ...@@ -87,7 +82,9 @@ class test_scheduler(unittest.TestCase):
scheduler = aidge_core.SequentialScheduler(graph_view) scheduler = aidge_core.SequentialScheduler(graph_view)
scheduler.generate_scheduling() scheduler.generate_scheduling()
self.assertTrue([i.name() for i in scheduler.get_static_scheduling()] in EXPECTED_SCHEDULE) self.assertEqual(len(scheduler.get_static_scheduling()), 11)
# Do not care about the order of execution of the producers
self.assertTrue([i.name() for i in scheduler.get_static_scheduling()[-4:]] in EXPECTED_SCHEDULE)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
import unittest
import aidge_core
import aidge_backend_cpu
import numpy as np
class test_tensor(unittest.TestCase):
"""Test tensor binding
"""
def setUp(self):
pass
def tearDown(self):
pass
def test_getavailable_backends(self):
self.assertTrue("cpu" in aidge_core.Tensor.get_available_backends())
def test_numpy_int_to_tensor(self):
np_array = np.arange(9).reshape(1,1,3,3).astype(np.int32)
# Numpy -> Tensor
t = aidge_core.Tensor(np_array)
self.assertEqual(t.dtype(), aidge_core.DataType.Int32)
for i_t, i_n in zip(t, np_array.flatten()):
self.assertTrue(i_t == i_n)
for i,j in zip(t.dims(), np_array.shape):
self.assertEqual(i,j)
def test_tensor_int_to_numpy(self):
np_array = np.arange(9).reshape(1,1,3,3)
# Numpy -> Tensor
t = aidge_core.Tensor(np_array)
# Tensor -> Numpy
nnarray = np.array(t)
for i_nn, i_n in zip(nnarray.flatten(), np_array.flatten()):
self.assertTrue(i_nn == i_n)
for i,j in zip(t.dims(), nnarray.shape):
self.assertEqual(i,j)
def test_numpy_int64_to_tensor(self):
np_array = np.arange(9).reshape(1,1,3,3).astype(np.int64)
# Numpy -> Tensor
t = aidge_core.Tensor(np_array)
self.assertEqual(t.dtype(), aidge_core.DataType.Int64)
for i_t, i_n in zip(t, np_array.flatten()):
self.assertTrue(i_t == i_n)
for i,j in zip(t.dims(), np_array.shape):
self.assertEqual(i,j)
def test_numpy_float_to_tensor(self):
t = aidge_core.Tensor()
np_array = np.random.rand(1, 1, 3, 3).astype(np.float32)
# Numpy -> Tensor
t = aidge_core.Tensor(np_array)
self.assertEqual(t.dtype(), aidge_core.DataType.Float32)
for i_t, i_n in zip(t, np_array.flatten()):
self.assertTrue(i_t == i_n) # TODO : May need to change this to a difference
for i,j in zip(t.dims(), np_array.shape):
self.assertEqual(i,j)
def test_get_set(self):
dims = [2,2,2]
np_array = np.arange(8).reshape(dims).astype(np.int32)
# Numpy -> Tensor
t = aidge_core.Tensor(np_array)
for i in range(8):
self.assertEqual(t[i], i)
t[i] = 5
self.assertEqual(t[i], 5)
if __name__ == '__main__':
unittest.main()
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
#ifndef AIDGE_CPU_IMPORTS_H_ #ifndef AIDGE_CPU_IMPORTS_H_
#define AIDGE_CPU_IMPORTS_H_ #define AIDGE_CPU_IMPORTS_H_
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/AddImpl.hpp" #include "aidge/backend/cpu/operator/AddImpl.hpp"
#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp" #include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" #include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
...@@ -26,18 +25,24 @@ ...@@ -26,18 +25,24 @@
#include "aidge/backend/cpu/operator/GatherImpl.hpp" #include "aidge/backend/cpu/operator/GatherImpl.hpp"
#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" #include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
#include "aidge/backend/cpu/operator/MatMulImpl.hpp" #include "aidge/backend/cpu/operator/MatMulImpl.hpp"
#include "aidge/backend/cpu/operator/MemorizeImpl.hpp"
#include "aidge/backend/cpu/operator/MulImpl.hpp" #include "aidge/backend/cpu/operator/MulImpl.hpp"
#include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/backend/cpu/operator/PadImpl.hpp"
#include "aidge/backend/cpu/operator/PopImpl.hpp"
#include "aidge/backend/cpu/operator/PowImpl.hpp" #include "aidge/backend/cpu/operator/PowImpl.hpp"
#include "aidge/backend/cpu/operator/ProducerImpl.hpp"
#include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp" #include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp"
#include "aidge/backend/cpu/operator/ReLUImpl.hpp" #include "aidge/backend/cpu/operator/ReLUImpl.hpp"
#include "aidge/backend/cpu/operator/ReshapeImpl.hpp" #include "aidge/backend/cpu/operator/ReshapeImpl.hpp"
#include "aidge/backend/cpu/operator/ScalingImpl.hpp" #include "aidge/backend/cpu/operator/ScalingImpl.hpp"
#include "aidge/backend/cpu/operator/SigmoidImpl.hpp"
#include "aidge/backend/cpu/operator/SliceImpl.hpp" #include "aidge/backend/cpu/operator/SliceImpl.hpp"
#include "aidge/backend/cpu/operator/SqrtImpl.hpp" #include "aidge/backend/cpu/operator/SqrtImpl.hpp"
#include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
#include "aidge/backend/cpu/operator/SubImpl.hpp" #include "aidge/backend/cpu/operator/SubImpl.hpp"
#include "aidge/backend/cpu/operator/TanhImpl.hpp"
#include "aidge/backend/cpu/operator/TransposeImpl.hpp" #include "aidge/backend/cpu/operator/TransposeImpl.hpp"
#endif /* AIDGE_CPU_IMPORTS_H_ */ #include "aidge/backend/cpu/data/TensorImpl.hpp"
\ No newline at end of file
#endif /* AIDGE_CPU_IMPORTS_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_DATA_BROADCASTING_H_
#define AIDGE_CPU_DATA_BROADCASTING_H_
#include <vector>
namespace Aidge {
// Function to broadCast an input dims vector into the same size as an outputDims vector
/**
* @brief Broadcast an input dims vector into the same size as an outputDims vector
* @details The missing dimensions would be completed by 1
* @param outputDims The vector of dimensions to follow
* @param dimsToBroadcast The vecotr of dimensions to braodcast
* @return std::vector<std::size_t> a broadcasted vector by addding 1 on the missing dimensions.
*/
std::vector<std::size_t> getBroadcastedDims(const std::vector<std::size_t>& outputDims, const std::vector<std::size_t>& dimsToBroadcast);
/**
* @brief Get a vector of indexes along the dimensions vector from a flattened index
* @param dimensions The vector of dimensions we want the indexes on
* @param idx The flattened index
* @return std::vector<std::size_t> vector of indexes along dimensions.
*/
std::vector<std::size_t> getMultiDimIndices(const std::vector<std::size_t>& dimensions, std::size_t idx);
// Function to get a flattened index from multi-dimensional indices
/**
* @brief Get a flattened index the dimensions vector from a given vector of indices on a broadcasted vector
* @param dimensions The vector of dimensions we want the flattened index on
* @param indices The vector of indices we want to flatten
* @return std::size_t The flattened index on the dimensions vector
*/
std::size_t getFlattenedIndex(const std::vector<std::size_t>& dimensions, const std::vector<std::size_t>& indices);
} // namespace Aidge
#endif // AIDGE_CPU_DATA_BROADCASTING_H_
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_DATA_GETCPUPTR_H_
#define AIDGE_CPU_DATA_GETCPUPTR_H_
#include "aidge/data/Tensor.hpp"
namespace Aidge {
inline void *getCPUPtr(std::shared_ptr<Aidge::Data> const &data) {
const auto tensor = std::static_pointer_cast<Tensor>(data);
return tensor->getImpl()->hostPtr(tensor->getImplOffset());
}
} // namespace Aidge
#endif // AIDGE_CPU_DATA_GETCPUPTR_H_
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_DATA_TENSORIMPL_H_
#define AIDGE_CPU_DATA_TENSORIMPL_H_
#include "aidge/backend/TensorImpl.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/data/half.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/utils/future_std/span.hpp"
namespace Aidge {
template <class T>
class TensorImpl_cpu : public TensorImpl {
private:
/// Pointer to the data and its capacity
future_std::span<T> mData;
/// If this instance own the data, std::unique_ptr manages it
std::unique_ptr<T[]> mDataOwner;
public:
static constexpr const char *Backend = "cpu";
TensorImpl_cpu(DeviceIdx_t device, NbElts_t length) : TensorImpl(Backend, device, length) {}
bool operator==(const TensorImpl &otherImpl) const override final {
const auto& typedOtherImpl = reinterpret_cast<const TensorImpl_cpu<T> &>(otherImpl);
AIDGE_INTERNAL_ASSERT(typedOtherImpl.size() >= mNbElts);
std::size_t i = 0;
for (; i < mNbElts &&
*(mData.data()+i) == *static_cast<const T*>(typedOtherImpl.rawPtr(i));
++i) {
}
return i == mNbElts;
}
static std::shared_ptr<TensorImpl_cpu> create(DeviceIdx_t device, NbElts_t length) {
return std::make_shared<TensorImpl_cpu<T>>(device, length);
}
inline std::size_t scalarSize() const noexcept override final { return sizeof(T); }
void copy(const void *src, NbElts_t length, NbElts_t offset = 0) override final {
const T* srcT = static_cast<const T *>(src);
T* dstT = static_cast<T *>(rawPtr(offset));
AIDGE_ASSERT(length <= mData.size() || length <= mNbElts, "copy length is above capacity");
AIDGE_ASSERT(dstT < srcT || dstT >= srcT + length, "overlapping copy is not supported");
std::copy(srcT, srcT + length, dstT);
}
void copyCast(const void *src, const DataType srcDt, NbElts_t length, NbElts_t offset = 0) override final {
if (length == 0) {
return;
}
T* dstT = static_cast<T *>(rawPtr(offset));
AIDGE_ASSERT(length <= mData.size() || length <= mNbElts, "copy length is above capacity");
switch (srcDt)
{
case DataType::Float64:
std::copy(static_cast<const double*>(src), static_cast<const double*>(src) + length,
dstT);
break;
case DataType::Float32:
std::copy(static_cast<const float*>(src), static_cast<const float*>(src) + length,
dstT);
break;
case DataType::Float16:
std::copy(static_cast<const half_float::half*>(src), static_cast<const half_float::half*>(src) + length,
dstT);
break;
case DataType::Int64:
std::copy(static_cast<const int64_t*>(src), static_cast<const int64_t*>(src) + length,
dstT);
break;
case DataType::UInt64:
std::copy(static_cast<const uint64_t*>(src), static_cast<const uint64_t*>(src) + length,
dstT);
break;
case DataType::Int32:
std::copy(static_cast<const int32_t*>(src), static_cast<const int32_t*>(src) + length,
dstT);
break;
case DataType::UInt32:
std::copy(static_cast<const uint32_t*>(src), static_cast<const uint32_t*>(src) + length,
dstT);
break;
case DataType::Int16:
std::copy(static_cast<const int16_t*>(src), static_cast<const int16_t*>(src) + length,
dstT);
break;
case DataType::UInt16:
std::copy(static_cast<const uint16_t*>(src), static_cast<const uint16_t*>(src) + length,
dstT);
break;
case DataType::Int8:
std::copy(static_cast<const int8_t*>(src), static_cast<const int8_t*>(src) + length,
dstT);
break;
case DataType::UInt8:
std::copy(static_cast<const uint8_t*>(src), static_cast<const uint8_t*>(src) + length,
dstT);
break;
default:
AIDGE_THROW_OR_ABORT(std::runtime_error, "Unsupported data type.");
break;
}
}
void copyFromDevice(const void *src, const std::pair<std::string, DeviceIdx_t>& device, NbElts_t length, NbElts_t offset = 0) override final {
AIDGE_ASSERT(device.first == Backend, "backend must match");
AIDGE_ASSERT(device.second == 0, "device cannot be != 0 for CPU backend");
copy(src, length, offset);
}
inline void copyFromHost(const void *src, NbElts_t length, NbElts_t offset = 0) override final {
copy(src, length, offset);
}
void copyToHost(void *dst, NbElts_t length, NbElts_t offset = 0) const override final {
const T* src = static_cast<const T*>(rawPtr(offset));
AIDGE_ASSERT(length <= mData.size() || length <= mNbElts, "copy length is above capacity");
std::copy(src, src + length, static_cast<T *>(dst));
}
void *rawPtr(NbElts_t offset = 0) override final {
lazyInit();
return (mData.data() + offset);
};
const void *rawPtr(NbElts_t offset = 0) const override final {
AIDGE_ASSERT(mData.size() >= mNbElts, "accessing uninitialized const rawPtr");
return (mData.data() + offset);
};
void *hostPtr(NbElts_t offset = 0) override final {
lazyInit();
return (mData.data() + offset);
};
const void *hostPtr(NbElts_t offset = 0) const override final {
AIDGE_ASSERT(mData.size() >= mNbElts, "accessing uninitialized const hostPtr");
return (mData.data() + offset);
};
void setRawPtr(void *ptr, NbElts_t length) override final {
AIDGE_ASSERT(length >= mNbElts, "trying to set raw pointer of insufficient capacity");
mData = future_std::span<T>(static_cast<T *>(ptr), length);
mDataOwner.reset();
};
virtual ~TensorImpl_cpu() = default;
private:
void lazyInit() {
if (mData.size() < mNbElts) {
// Need more data, a re-allocation will occur
AIDGE_ASSERT(mData.empty() || mDataOwner != nullptr, "trying to enlarge non-owned data");
mDataOwner.reset(new T[mNbElts]);
mData = future_std::span<T>(mDataOwner.get(), mNbElts);
}
}
};
namespace {
static Registrar<Tensor> registrarTensorImpl_cpu_Float64(
{"cpu", DataType::Float64}, Aidge::TensorImpl_cpu<double>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_Float32(
{"cpu", DataType::Float32}, Aidge::TensorImpl_cpu<float>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_Float16(
{"cpu", DataType::Float16}, Aidge::TensorImpl_cpu<half_float::half>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_Int32(
{"cpu", DataType::Int32}, Aidge::TensorImpl_cpu<int>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_Int64(
{"cpu", DataType::Int64}, Aidge::TensorImpl_cpu<long>::create);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_DATA_TENSORIMPL_H_ */
...@@ -25,10 +25,10 @@ namespace Aidge { ...@@ -25,10 +25,10 @@ namespace Aidge {
// compute kernel registry for forward and backward // compute kernel registry for forward and backward
class AddImplForward_cpu class AddImplForward_cpu
: public Registrable<AddImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const std::vector<const void*>, void*)> {}; : public Registrable<AddImplForward_cpu, std::tuple<DataType, DataType>, void(const std::vector<const void*>, const std::vector<std::vector<std::size_t>>&, const std::size_t, const std::vector<std::size_t>&, void*)> {};
class AddImplBackward_cpu class AddImplBackward_cpu
: public Registrable<AddImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const std::vector<const void*>, void*)> {}; : public Registrable<AddImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::vector<const void*>, const std::vector<std::vector<std::size_t>>&, const std::size_t, const std::vector<std::size_t>&, void*)> {};
class AddImpl_cpu : public OperatorImpl { class AddImpl_cpu : public OperatorImpl {
......
...@@ -14,12 +14,13 @@ ...@@ -14,12 +14,13 @@
#include "aidge/utils/Registrar.hpp" #include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/data/Broadcasting.hpp"
#include "aidge/backend/cpu/operator/AddImpl.hpp" #include "aidge/backend/cpu/operator/AddImpl.hpp"
namespace Aidge { namespace Aidge {
template <class I, class O> template <class I, class O>
void AddImpl_cpu_forward_kernel(const std::size_t inputLength, const std::vector<const void*> inputs_, void* output_) { void AddImpl_cpu_forward_kernel(const std::vector<const void*> inputs_, const std::vector<std::vector<std::size_t>>& inputDims, const std::size_t outputLength, const std::vector<std::size_t>& outDims, void* output_) {
// FIXME: missing Add attributes as arguments // FIXME: missing Add attributes as arguments
std::vector<const I*> inputs; std::vector<const I*> inputs;
for (const auto& input_ : inputs_) { for (const auto& input_ : inputs_) {
...@@ -27,12 +28,15 @@ void AddImpl_cpu_forward_kernel(const std::size_t inputLength, const std::vector ...@@ -27,12 +28,15 @@ void AddImpl_cpu_forward_kernel(const std::size_t inputLength, const std::vector
} }
O* output = static_cast<O*>(output_); O* output = static_cast<O*>(output_);
for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) { for (std::size_t oIndex = 0; oIndex < outputLength; ++oIndex)
{
output[oIndex] = 0; output[oIndex] = 0;
for (std::size_t iIndex = 0; iIndex < inputs.size(); ++iIndex) { std::vector<size_t> indexes = getMultiDimIndices(outDims, oIndex);
output[oIndex] += inputs[iIndex][oIndex]; for(std::size_t iIndex = 0; iIndex < inputs.size(); ++iIndex) {
} std::size_t idx = getFlattenedIndex(inputDims[iIndex], indexes);
} output[oIndex] += inputs[iIndex][idx];
}
}
} }
namespace { namespace {
......
...@@ -12,23 +12,24 @@ ...@@ -12,23 +12,24 @@
#ifndef AIDGE_CPU_OPERATOR_DIVIMPL_H_ #ifndef AIDGE_CPU_OPERATOR_DIVIMPL_H_
#define AIDGE_CPU_OPERATOR_DIVIMPL_H_ #define AIDGE_CPU_OPERATOR_DIVIMPL_H_
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/OperatorImpl.hpp" #include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Div.hpp" #include "aidge/operator/Div.hpp"
#include "aidge/utils/Registrar.hpp" #include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h" #include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
namespace Aidge { namespace Aidge {
// class Div_Op;
// compute kernel registry for forward and backward // compute kernel registry for forward and backward
class DivImplForward_cpu class DivImplForward_cpu
: public Registrable<DivImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const void*, const void*,void*)> { // : public Registrable<DivImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*)> {
: public Registrable<DivImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const std::size_t, const void*, const void*,void*)> {
}; };
class DivImplBackward_cpu class DivImplBackward_cpu
: public Registrable<DivImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const void*, const void*, void*)> { : public Registrable<DivImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, void*)> {
}; };
class DivImpl_cpu : public OperatorImpl { class DivImpl_cpu : public OperatorImpl {
...@@ -40,7 +41,8 @@ public: ...@@ -40,7 +41,8 @@ public:
} }
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
void forward() override final;
}; };
namespace { namespace {
......
...@@ -12,42 +12,64 @@ ...@@ -12,42 +12,64 @@
#ifndef AIDGE_CPU_OPERATOR_DIVIMPL_FORWARD_KERNEL_H_ #ifndef AIDGE_CPU_OPERATOR_DIVIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_DIVIMPL_FORWARD_KERNEL_H_ #define AIDGE_CPU_OPERATOR_DIVIMPL_FORWARD_KERNEL_H_
#include <numeric> // std::accumulate
#include <cstddef> // std::size_t
#include <functional> // std::multiplies
#include "aidge/utils/Registrar.hpp" #include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/data/Broadcasting.hpp"
#include "aidge/backend/cpu/operator/DivImpl.hpp" #include "aidge/backend/cpu/operator/DivImpl.hpp"
namespace Aidge { namespace Aidge {
// template <class I1, class I2, class O>
// void DivImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims,
// const std::vector<std::size_t>& input2Dims,
// const std::vector<std::size_t>& outputDims,
// const void* input1_,
// const void* input2_,
// void* output_) {
// const I1* input_1 = static_cast<const I1*>(input1_);
// const I2* input_2 = static_cast<const I2*>(input2_);
// O* output = static_cast<O*>(output_);
// const std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
// for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex)
// {
// std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, oIndex);
// std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
// std::size_t idx2 = getFlattenedIndex(input2Dims, indexes);
// // TODO assert if input_2 is bad?
// output[oIndex] = input_1[idx1] / input_2[idx2];
// }
// }
template <class I1, class I2, class O> template <class I1, class I2, class O>
void DivImpl_cpu_forward_kernel(std::size_t input1Length, constexpr void DivImpl_cpu_forward_kernel(const std::size_t input1size_,
std::size_t input2Length, const std::size_t input2size_,
const void* input1_, const std::size_t output1size_,
const void* input2_, const void* input1_,
void* output_) { const void* input2_,
void* output_) {
const I1* input_1 = static_cast<const I1*>(input1_); const I1* input_1 = static_cast<const I1*>(input1_);
const I2* input_2 = static_cast<const I2*>(input2_); const I2* input_2 = static_cast<const I2*>(input2_);
O* output = static_cast<O*>(output_); O* output = static_cast<O*>(output_);
if (input2Length == input1Length)
{ // suppose values are contiguous in memory
for (std::size_t i = 0; i < input1Length; ++i) { for (std::size_t i = 0; i < output1size_; ++i) {
output[i] = input_1[i] / input_2[i]; const std::size_t in1_id = (input1size_ != 1) ? i : 0;
} const std::size_t in2_id = (input2size_ != 1) ? i : 0;
} output[i] = static_cast<O>(input_1[in1_id] / input_2[in2_id]);
else if (input2Length == 1)
{
for (std::size_t i = 0; i < input1Length; ++i) {
output[i] = input_1[i] / input_2[0];
}
}
else // input_2 is 1d and of size the number of channels of input_1
{
for (std::size_t i = 0; i < input1Length; ++i) {
std::size_t channelIdx = i % input2Length;
output[i] = input_1[i] / input_2[channelIdx];
}
} }
} }
namespace { namespace {
static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Float32( static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32}, {DataType::Float32, DataType::Float32, DataType::Float32},
......
...@@ -23,16 +23,14 @@ ...@@ -23,16 +23,14 @@
#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge { namespace Aidge {
// class MatMul_Op;
// compute kernel registry for forward and backward
class MatMulImplForward_cpu class MatMulImplForward_cpu
: public Registrable<MatMulImplForward_cpu, std::tuple<DataType, DataType, DataType>, : public Registrable<MatMulImplForward_cpu, std::tuple<DataType, DataType>,
void(const MatMul_Op::Attrs &, const DimSize_t, const DimSize_t, void(const std::size_t, const std::size_t, const std::size_t,
const void *, const void *, void *)> {}; const void *, const void *, void *)> {};
class MatMulImplBackward_cpu class MatMulImplBackward_cpu
: public Registrable<MatMulImplBackward_cpu, std::tuple<DataType, DataType, DataType>, : public Registrable<MatMulImplBackward_cpu, std::tuple<DataType, DataType>,
void(const MatMul_Op::Attrs &, const DimSize_t, const DimSize_t, void(const std::vector<DimSize_t>&, const std::vector<DimSize_t>&,
const void *, const void *, void *)> {}; const void *, const void *, void *)> {};
class MatMulImpl_cpu : public OperatorImpl { class MatMulImpl_cpu : public OperatorImpl {
......
...@@ -12,45 +12,39 @@ ...@@ -12,45 +12,39 @@
#ifndef AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_ #ifndef AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_ #define AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include <algorithm>
#include "aidge/backend/cpu/operator/MatMulImpl.hpp" #include "aidge/backend/cpu/operator/MatMulImpl.hpp"
namespace Aidge { namespace Aidge {
template <class I, class W, class O> template <class I, class O>
void MatMulImpl_cpu_forward_kernel(const MatMul_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize, void MatMulImpl_cpu_forward_kernel(const std::size_t n, const std::size_t k, const std::size_t m,
const void* input_, const void* weights_, void* output_) { const void* input1_, const void* input2_, void* output_) {
// FIXME: missing MatMul parameters as arguments // FIXME: missing MatMul parameters as arguments
const I* input = static_cast<const I*>(input_); const I* input1 = static_cast<const I*>(input1_);
const W* weights = static_cast<const W*>(weights_); const I* input2 = static_cast<const I*>(input2_);
O* output = static_cast<O*>(output_); O* output = static_cast<O*>(output_);
for (std::size_t i = 0; i < n; ++i) {
std::fill(output, output+(batchSize*std::get<0>(attrs)), O(0)); for (std::size_t j = 0; j < m; ++j) {
O sum = O(0);
for (std::size_t batch = 0; batch < batchSize; ++batch) { for (std::size_t l = 0; l < k; ++l) {
for (std::size_t out = 0; out < std::get<0>(attrs); ++out) { sum += static_cast<O>(input1[i*k + l] * input2[l*m + j]);
output[out + batch*std::get<0>(attrs)] = std::inner_product(input + batch*oneInputSize, }
input + (batch + 1)*oneInputSize, output[i*m + j] = sum;
weights + out*oneInputSize,
output[out + batch*std::get<0>(attrs)]);
} }
} }
} }
namespace { namespace {
static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float32( static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32}, {DataType::Float32, DataType::Float32},
Aidge::MatMulImpl_cpu_forward_kernel<float, float, float>); Aidge::MatMulImpl_cpu_forward_kernel<float, float>);
static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Int32( static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32}, {DataType::Int32, DataType::Int32},
Aidge::MatMulImpl_cpu_forward_kernel<int, int, int>); Aidge::MatMulImpl_cpu_forward_kernel<int, int>);
static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float64( static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64}, {DataType::Float64, DataType::Float64},
Aidge::MatMulImpl_cpu_forward_kernel<double, double, double>); Aidge::MatMulImpl_cpu_forward_kernel<double, double>);
} // namespace } // namespace
} // namespace Aidge } // namespace Aidge
......
...@@ -9,33 +9,36 @@ ...@@ -9,33 +9,36 @@
* *
********************************************************************************/ ********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_PRODUCERIMPL_H_ #ifndef AIDGE_CPU_OPERATOR_MEMORIZEIMPL_H_
#define AIDGE_CPU_OPERATOR_PRODUCERIMPL_H_ #define AIDGE_CPU_OPERATOR_MEMORIZEIMPL_H_
#include <memory>
#include "aidge/backend/OperatorImpl.hpp" #include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Producer.hpp" #include "aidge/operator/Memorize.hpp"
#include "aidge/utils/Registrar.hpp" #include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h" #include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
namespace Aidge { namespace Aidge {
class ProducerImpl_cpu : public OperatorImpl { class MemorizeImpl_cpu : public OperatorImpl {
public: public:
ProducerImpl_cpu(const Producer_Op &op) : OperatorImpl(op) {} MemorizeImpl_cpu(const Memorize_Op& op) : OperatorImpl(op) {}
static std::unique_ptr<ProducerImpl_cpu> create(const Producer_Op &op) { static std::unique_ptr<MemorizeImpl_cpu> create(const Memorize_Op& op) {
return std::make_unique<ProducerImpl_cpu>(op); return std::make_unique<MemorizeImpl_cpu>(op);
} }
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
NbElts_t getRequiredMemory(const Aidge::IOIndex_t outputIdx,
const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const override final;
void updateConsummerProducer() override final;
void forward() override; void forward() override;
}; };
namespace { namespace {
static Registrar<Producer_Op> registrarProducerImpl_cpu("cpu", Aidge::ProducerImpl_cpu::create); static Registrar<Memorize_Op> registrarMemorizeImpl_cpu("cpu", Aidge::MemorizeImpl_cpu::create);
} // namespace }
} // namespace Aidge } // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_PRODUCERIMPL_H_ */ #endif /* AIDGE_CPU_OPERATOR_MEMORIZEIMPL_H_ */
...@@ -25,10 +25,10 @@ namespace Aidge { ...@@ -25,10 +25,10 @@ namespace Aidge {
// compute kernel registry for forward and backward // compute kernel registry for forward and backward
class MulImplForward_cpu class MulImplForward_cpu
: public Registrable<MulImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const void*, const void*,void*)> { : public Registrable<MulImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*)> {
}; };
class MulImplBackward_cpu class MulImplBackward_cpu
: public Registrable<MulImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const void*, const void*, void*)> { : public Registrable<MulImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, void*)> {
}; };
class MulImpl_cpu : public OperatorImpl { class MulImpl_cpu : public OperatorImpl {
......
...@@ -14,37 +14,35 @@ ...@@ -14,37 +14,35 @@
#include "aidge/utils/Registrar.hpp" #include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/data/Broadcasting.hpp"
#include "aidge/backend/cpu/operator/MulImpl.hpp" #include "aidge/backend/cpu/operator/MulImpl.hpp"
namespace Aidge { namespace Aidge {
template <class I1, class I2, class O> template <class I1, class I2, class O>
void MulImpl_cpu_forward_kernel(std::size_t input1Length, void MulImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims,
std::size_t input2Length, const std::vector<std::size_t>& input2Dims,
const void* input1_, const std::vector<std::size_t>& outputDims,
const void* input2_, const void* input1_,
void* output_) { const void* input2_,
void* output_) {
const I1* input_1 = static_cast<const I1*>(input1_); const I1* input_1 = static_cast<const I1*>(input1_);
const I2* input_2 = static_cast<const I2*>(input2_); const I2* input_2 = static_cast<const I2*>(input2_);
O* output = static_cast<O*>(output_); O* output = static_cast<O*>(output_);
if (input2Length == input1Length)
{ size_t totalElements = 1;
for (std::size_t i = 0; i < input1Length; ++i) { for (size_t dimSize : outputDims) {
output[i] = input_1[i] * input_2[i]; totalElements *= dimSize;
}
}
else if (input2Length == 1)
{
for (std::size_t i = 0; i < input1Length; ++i) {
output[i] = input_1[i] * input_2[0];
}
} }
else // input_2 is 1d and of size the number of channels of input_1
{ for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex)
for (std::size_t i = 0; i < input1Length; ++i) { {
std::size_t channelIdx = i % input2Length; std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex);
output[i] = input_1[i] * input_2[channelIdx];
} std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
std::size_t idx2 = getFlattenedIndex(input2Dims, indexes);
output[oIndex] = input_1[idx1] * input_2[idx2];
} }
} }
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_POPIMPL_H_
#define AIDGE_CPU_OPERATOR_POPIMPL_H_
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Pop.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
namespace Aidge {
// class Pop_Op;
// compute kernel registry for forward and backward
class PopImplForward_cpu
: public Registrable<PopImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
};
class PopImplBackward_cpu
: public Registrable<PopImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
};
class PopImpl_cpu : public OperatorImpl {
public:
PopImpl_cpu(const Pop_Op& op) : OperatorImpl(op) {}
static std::unique_ptr<PopImpl_cpu> create(const Pop_Op& op) {
return std::make_unique<PopImpl_cpu>(op);
}
NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
void forward() override;
};
namespace {
static Registrar<Pop_Op> registrarPopImpl_cpu("cpu", Aidge::PopImpl_cpu::create);
}
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_POPIMPL_H_ */
...@@ -25,10 +25,10 @@ namespace Aidge { ...@@ -25,10 +25,10 @@ namespace Aidge {
// compute kernel registry for forward and backward // compute kernel registry for forward and backward
class PowImplForward_cpu class PowImplForward_cpu
: public Registrable<PowImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const void*, const void*,void*)> { : public Registrable<PowImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*)> {
}; };
class PowImplBackward_cpu class PowImplBackward_cpu
: public Registrable<PowImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const void*, const void*, void*)> { : public Registrable<PowImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, void*)> {
}; };
class PowImpl_cpu : public OperatorImpl { class PowImpl_cpu : public OperatorImpl {
......