Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mszczep/aidge_backend_cpu
  • eclipse/aidge/aidge_backend_cpu
  • hrouis/aidge_backend_cpu
  • oantoni/aidge_backend_cpu
  • raphaelmillet/aidge_backend_cpu
  • cguillon/aidge_backend_cpu
  • jeromeh/aidge_backend_cpu
  • axelfarr/aidge_backend_cpu
  • noamzerah/aidge_backend_cpu
  • silvanosky/aidge_backend_cpu
  • maab05/aidge_backend_cpu
  • lucaslopez/aidge_backend_cpu_ll
  • farnez/aidge_backend_cpu
13 results
Show changes
Commits on Source (23)
Showing
with 722 additions and 93 deletions
cmake_minimum_required(VERSION 3.15)
cmake_minimum_required(VERSION 3.18)
set(CXX_STANDARD 14)
file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version)
......@@ -24,6 +24,7 @@ add_definitions(-DGIT_COMMIT_HASH="${GIT_COMMIT_HASH}")
# Note : project name is ${CMAKE_PROJECT_NAME} and python module name is also ${CMAKE_PROJECT_NAME}
set(module_name _${CMAKE_PROJECT_NAME}) # target name
set(pybind_module_name ${CMAKE_PROJECT_NAME}) # name of submodule for python bindings
##############################################
# Define options
......@@ -69,16 +70,12 @@ set_property(TARGET ${module_name} PROPERTY POSITION_INDEPENDENT_CODE ON)
# PYTHON BINDING
if (PYBIND)
# Handles Python + pybind11 headers dependencies
include(PybindModuleCreation)
generate_python_binding(${CMAKE_PROJECT_NAME} ${module_name})
# Python binding lib is by default installed in <prefix>/python_packages/<package>/
# When installed from python, setup.py should set it to the python package dir
set(PYBIND_INSTALL_PREFIX python_packages/${pybind_module_name} CACHE PATH "Python package install prefix")
target_link_libraries(${module_name}
PUBLIC
pybind11::pybind11
PRIVATE
Python::Module
)
include(PybindModuleCreation)
generate_python_binding(${pybind_module_name} ${module_name})
endif()
if( ${ENABLE_ASAN} )
......@@ -102,7 +99,6 @@ target_include_directories(${module_name}
${CMAKE_CURRENT_SOURCE_DIR}/src
)
target_link_libraries(${module_name} PUBLIC fmt::fmt)
target_compile_features(${module_name} PRIVATE cxx_std_14)
target_compile_options(${module_name} PRIVATE
......@@ -128,6 +124,12 @@ install(TARGETS ${module_name} EXPORT ${CMAKE_PROJECT_NAME}-targets
)
install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
if (PYBIND)
install(TARGETS ${pybind_module_name}
DESTINATION ${PYBIND_INSTALL_PREFIX}
)
endif()
#Export the targets to a script
install(EXPORT ${CMAKE_PROJECT_NAME}-targets
FILE "${CMAKE_PROJECT_NAME}-targets.cmake"
......@@ -159,15 +161,16 @@ install(FILES
## Exporting from the build tree
message(STATUS "Exporting created targets to use them in another build")
export(EXPORT ${CMAKE_PROJECT_NAME}-targets
FILE "${CMAKE_CURRENT_BINARY_DIR}/${project}-targets.cmake")
FILE "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_PROJECT_NAME}-targets.cmake")
##############################################
## Add test
if(TEST)
if(PYBIND)
message(FATAL_ERROR "PYBIND and TEST are both enabled. But cannot compile with catch_2.\nChoose between pybind and Catch2 for compilation.")
if (AIDGE_REQUIRES_PYTHON AND NOT AIDGE_PYTHON_HAS_EMBED)
message(WARNING "Skipping compilation of tests: missing Python embedded interpreter")
else()
enable_testing()
add_subdirectory(unit_tests)
endif()
enable_testing()
add_subdirectory(unit_tests)
endif()
......@@ -23,9 +23,23 @@ Those operators can be used on any machine with an Linux OS.
pip install . -v
```
> **TIPS :** Use environment variables to change compilation options :
> - `AIDGE_INSTALL` : to set the installation folder. Defaults to /usr/local/lib. :warning: This path must be identical to aidge_core install path.
> - `AIDGE_PYTHON_BUILD_TYPE` : to set the compilation mode to **Debug** or **Release**
> - `AIDGE_BUILD_GEN` : to set the build backend with
> - `AIDGE_INSTALL` : to set the installation folder. Defaults to `<python_prefix>/lib/libAidge`. :warning: This path must be identical to aidge_core install path.
> - `AIDGE_PYTHON_BUILD_TYPE` : to set the compilation mode to **Debug** or **Release** or "" (for default flags). Defaults to **Release**.
> - `AIDGE_BUILD_GEN` : to set the build backend (for development mode) or "" for the cmake default. Default to "".
## Pip installation for development
To setup using pip in development (or editable mode), use the `--no-build-isolation -e` options to pip.
For instance run the following command in your python environnement for a typical setup :
``` bash
export AIDGE_PYTHON_BUILD_TYPE= # default flags (no debug info but fastest build time)
export AIDGE_PYTHON_BUILD_TYPE=Debug # or if one really need to debug the C++ code
pip install -U pip setuptools setuptools_scm[toml] cmake # Pre-install build requirements (refer to the pyproject.toml [build-system] section)
pip install -v --no-build-isolation -e .
```
Refer to `aidge_core/README.md` for more details on development build options.
### Standard C++ Compilation
......
@PACKAGE_INIT@
include(CMakeFindDependencyMacro)
find_dependency(aidge_core)
include(CMakeFindDependencyMacro)
include(${CMAKE_CURRENT_LIST_DIR}/aidge_backend_cpu-config-version.cmake)
include(${CMAKE_CURRENT_LIST_DIR}/aidge_backend_cpu-targets.cmake)
function(generate_python_binding pybind_module_name target_to_bind)
add_definitions(-DPYBIND)
find_package(Python COMPONENTS Interpreter Development.Module)
Include(FetchContent)
set(PYBIND_VERSION v2.10.4)
set(PYBIND11_FINDPYTHON ON)
message(STATUS "Retrieving pybind ${PYBIND_VERSION} from git")
FetchContent_Declare(
......@@ -12,14 +13,12 @@ function(generate_python_binding pybind_module_name target_to_bind)
GIT_TAG ${PYBIND_VERSION} # or a later release
)
# Use the New FindPython mode, recommanded. Requires CMake 3.15+
find_package(Python COMPONENTS Interpreter Development.Module)
FetchContent_MakeAvailable(PyBind11)
message(STATUS "Creating binding for module ${pybind_module_name}")
file(GLOB_RECURSE pybind_src_files "python_binding/*.cpp")
pybind11_add_module(${pybind_module_name} MODULE ${pybind_src_files} "NO_EXTRAS") # NO EXTRA recquired for pip install
target_include_directories(${pybind_module_name} PUBLIC "python_binding")
target_link_libraries(${pybind_module_name} PUBLIC ${target_to_bind})
target_include_directories(${pybind_module_name} PRIVATE "python_binding")
target_link_libraries(${pybind_module_name} PRIVATE ${target_to_bind})
endfunction()
......@@ -19,6 +19,7 @@
#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
#include "aidge/backend/cpu/operator/BatchNormImpl.hpp"
#include "aidge/backend/cpu/operator/BitShiftImpl.hpp"
#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
#include "aidge/backend/cpu/operator/ConvImpl.hpp"
#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp"
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_BITSHIFTIMPL_H_
#define AIDGE_CPU_OPERATOR_BITSHIFTIMPL_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/BitShift.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
namespace Aidge {
// Operator implementation entry point for the backend
using BitShiftImpl_cpu = OperatorImpl_cpu<BitShift_Op,
void(const BitShift_Op::BitShiftDirection,
const std::vector<std::size_t>&,
const std::vector<std::size_t>&,
const std::vector<std::size_t>&,
const void*,
const void*,
void*)>;
// Implementation entry point registration to Operator
REGISTRAR(BitShift_Op,"cpu",Aidge::BitShiftImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_BITSHIFTIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_BITSHIFTIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_BITSHIFTIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include <cstdint> // std::int32_t, std::int64_t
#include "aidge/operator/BitShift.hpp"
#include "aidge/backend/cpu/data/Broadcasting.hpp"
#include "aidge/backend/cpu/operator/BitShiftImpl.hpp"
namespace Aidge {
template <class I1, class I2, class O>
void BitShiftImpl_cpu_forward_kernel(
const BitShift_Op::BitShiftDirection direction,
const std::vector<std::size_t>& input1Dims,
const std::vector<std::size_t>& input2Dims,
const std::vector<std::size_t>& outputDims,
const void* input1_,
const void* input2_,
void* output_
) {
const I1* input_1 = static_cast<const I1*>(input1_);
const I2* input_2 = static_cast<const I2*>(input2_);
O* output = static_cast<O*>(output_);
const size_t totalElements = std::accumulate(outputDims.begin(), outputDims.end(), std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex)
{
std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex);
std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
std::size_t idx2 = getFlattenedIndex(input2Dims, indexes);
if(direction == BitShift_Op::BitShiftDirection::right)
{
output[oIndex]= input_1[idx1] >> input_2[idx2];
}
else
{
output[oIndex] = input_1[idx1] << input_2[idx2];
}
}
}
REGISTRAR(BitShiftImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel,Aidge::BitShiftImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>,nullptr});
REGISTRAR(BitShiftImpl_cpu,
{DataType::Int64},
{ProdConso::inPlaceModel,Aidge::BitShiftImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>,nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_BitShiftIMPL_KERNELS_H_ */
\ No newline at end of file
......@@ -18,19 +18,19 @@ namespace Aidge {
template <class I, class O>
void MatMulImpl_cpu_forward_kernel(const std::size_t n, const std::size_t k, const std::size_t m,
const void* input1_, const void* input2_, void* output_) {
const void* input1_, const void* input2_, void* __restrict output_) {
// FIXME: missing MatMul parameters as arguments
const I* input1 = static_cast<const I*>(input1_);
const I* input2 = static_cast<const I*>(input2_);
O* output = static_cast<O*>(output_);
O* __restrict output = static_cast<O* __restrict>(output_);
std::memset(output, O(0), n * m * sizeof(O));
for (std::size_t i = 0; i < n; ++i) {
for (std::size_t j = 0; j < m; ++j) {
O sum = O(0);
for (std::size_t l = 0; l < k; ++l) {
sum += static_cast<O>(input1[i*k + l] * input2[l*m + j]);
for (std::size_t l = 0; l < k; ++l) {
for (std::size_t j = 0; j < m; ++j) {
output[i*m + j] += static_cast<O>(input1[i*k + l] * input2[l*m + j]);
}
output[i*m + j] = sum;
}
}
}
......
......@@ -24,7 +24,8 @@ namespace Aidge {
// Operator implementation entry point for the backend
using PowImpl_cpu = OperatorImpl_cpu<Pow_Op,
void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*),
void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, void*)>;
void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, const void*, void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Pow_Op, "cpu", Aidge::PowImpl_cpu::create);
......
......@@ -31,14 +31,10 @@ void PowImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims,
const I2* input_2 = static_cast<const I2*>(input2_);
O* output = static_cast<O*>(output_);
size_t totalElements = 1;
for (size_t dimSize : outputDims) {
totalElements *= dimSize;
}
std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex)
{
std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex);
std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, oIndex);
std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
std::size_t idx2 = getFlattenedIndex(input2Dims, indexes);
......@@ -47,16 +43,53 @@ void PowImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims,
}
}
template <class I1, class I2, class O>
void PowImpl_cpu_backward_kernel(const std::vector<std::size_t>& input0Dims,
const std::vector<std::size_t>& input1Dims,
const std::vector<std::size_t>& outputDims,
const void* input0_,
const void* input1_,
const void* gradOutput_,
void* gradientInput0_,
void* gradientInput1_) {
const I1* input0 = static_cast<const I1*>(input0_);
I1* grad0 = static_cast<I1*>(gradientInput0_);
const I2* input1 = static_cast<const I2*>(input1_);
I2* grad1 = static_cast<I2*>(gradientInput1_);
const O* gradOut = static_cast<const O*>(gradOutput_);
// Fill input grads with zeros
std::size_t input0Elements = std::accumulate(input0Dims.cbegin(), input0Dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
std::fill(grad0, grad0 + input0Elements, I1(0));
std::size_t input1Elements = std::accumulate(input1Dims.cbegin(), input1Dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
std::fill(grad1, grad1 + input1Elements, I2(0));
std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
for (size_t oIndex = 0; oIndex < totalElements; ++oIndex)
{
// Compute indexes in inputs 0 and 1 to support broadcasting
std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, oIndex);
std::size_t idx0 = getFlattenedIndex(input0Dims, indexes);
std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
// grad0 = grad_output * (input1 * pow(input0, (input1 -1)))
grad0[idx0] += gradOut[oIndex]*input1[idx1]* std::pow(input0[idx0], input1[idx1]-1);
// grad1 = grad_output * (output * ln(input0))
grad1[idx1] += gradOut[oIndex] * std::pow(input0[idx0], input1[idx1]) * std::log(input0[idx0]);
}
}
// Kernels registration to implementation entry point
REGISTRAR(PowImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<float, float, float>, nullptr});
{ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<float, float, float>, Aidge::PowImpl_cpu_backward_kernel<float, float, float>});
REGISTRAR(PowImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<double, double, double>, nullptr});
{ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<double, double, double>, Aidge::PowImpl_cpu_backward_kernel<double, double, double>});
REGISTRAR(PowImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<int32_t, int32_t, int32_t>, nullptr});
{ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<int32_t, int32_t, int32_t>, Aidge::PowImpl_cpu_backward_kernel<int32_t, int32_t, int32_t>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_POWIMPL_KERNELS_H_ */
aidge_backend_cpu
\ No newline at end of file
......@@ -17,8 +17,7 @@ dynamic = ["version"] # defined in tool.setuptools_scm
requires = [
"setuptools>=64",
"setuptools_scm[toml]==7.1.0",
"cmake>=3.15.3.post1",
"toml"
"cmake>=3.18.4.post1"
]
build-backend = "setuptools.build_meta"
......
......@@ -8,17 +8,13 @@ import multiprocessing
from math import ceil
import toml
from setuptools import setup, Extension
from setuptools.command.build_ext import build_ext
def get_project_name() -> str:
with open(pathlib.Path().absolute() / "pyproject.toml", "r") as file:
project_toml = toml.load(file)
return project_toml["project"]["name"]
PROJECT_NAME = "aidge_backend_cpu"
SETUP_DIR = pathlib.Path(__file__).parent
class AidgeBuildExtension(Extension):
def __init__(self, name):
......@@ -26,6 +22,15 @@ class AidgeBuildExtension(Extension):
class AidgePkgBuild(build_ext):
def __init__(self, dist, *args, **kwargs):
super().__init__(dist, *args, **kwargs)
# Detect editable_mode for old versions of setuptools
if not hasattr(self, "editable_mode"):
if hasattr(dist, "commands"):
self.editable_mode = "develop" in dist.commands
else:
self.editable_mode = False
def run(self):
####################################
# BUILD PACKAGE
......@@ -43,36 +48,35 @@ class AidgePkgBuild(build_ext):
if not build_lib.exists():
build_lib.mkdir(parents=True, exist_ok=True)
os.chdir(str(build_temp))
package_prefix = build_lib if not self.editable_mode else SETUP_DIR
pybind_install_prefix = (package_prefix / PROJECT_NAME).absolute()
compile_type = (
"Release"
if "AIDGE_PYTHON_BUILD_TYPE" not in os.environ
else os.environ["AIDGE_PYTHON_BUILD_TYPE"]
)
os.chdir(str(build_temp))
compile_type = os.environ.get("AIDGE_PYTHON_BUILD_TYPE", "Release")
install_path = (
os.path.join(sys.prefix, "lib", "libAidge")
if "AIDGE_INSTALL" not in os.environ
else os.environ["AIDGE_INSTALL"]
)
# using ninja as default build system to build faster and with the same compiler as on windows
build_gen = (
["-G", os.environ["AIDGE_BUILD_GEN"]]
if "AIDGE_BUILD_GEN" in os.environ
build_gen = os.environ.get("AIDGE_BUILD_GEN", "")
build_gen_opts = (
["-G", build_gen]
if build_gen
else []
)
test_onoff = os.environ.get("AIDGE_BUILD_TEST", "OFF")
self.spawn(
[
"cmake",
*build_gen,
*build_gen_opts,
str(cwd),
"-DTEST=OFF",
f"-DTEST={test_onoff}",
f"-DCMAKE_INSTALL_PREFIX:PATH={install_path}",
f"-DCMAKE_BUILD_TYPE={compile_type}",
"-DPYBIND=ON",
f"-DPYBIND_INSTALL_PREFIX:PATH={pybind_install_prefix}",
"-DCMAKE_EXPORT_COMPILE_COMMANDS=ON",
"-DCOVERAGE=OFF",
]
......@@ -85,25 +89,11 @@ class AidgePkgBuild(build_ext):
self.spawn(["cmake", "--install", ".", "--config", compile_type])
os.chdir(str(cwd))
aidge_package = build_lib / (get_project_name())
# Get "aidge core" package
# ext_lib = build_temp
print(build_temp.absolute())
# Copy all shared object files from build_temp/lib to aidge_package
for root, _, files in os.walk(build_temp.absolute()):
for file in files:
if (file.endswith(".so") or file.endswith(".pyd")) and (
root != str(aidge_package.absolute())
):
currentFile = os.path.join(root, file)
shutil.copy(currentFile, str(aidge_package.absolute()))
if __name__ == "__main__":
setup(
include_package_data=True,
ext_modules=[AidgeBuildExtension(get_project_name())],
ext_modules=[AidgeBuildExtension(PROJECT_NAME)],
cmdclass={
"build_ext": AidgePkgBuild,
},
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric>
#include <thread> // std::this_thread::sleep_for
#include <vector>
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/Broadcasting.hpp"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/BitShiftImpl.hpp"
#include "aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp"
template<>
void Aidge::BitShiftImpl_cpu::forward() {
const auto& op_ = dynamic_cast<const BitShift_Op&>(mOp);
const auto impl = Registrar<BitShiftImpl_cpu>::create(getBestMatch(getRequiredSpec()));
const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims());
const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims());
BitShift_Op::BitShiftDirection direction = op_.direction();
// Call kernel
impl.forward(
direction,
inputDims0,
inputDims1,
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
getCPUPtr(mOp.getRawInput(0)),
getCPUPtr(mOp.getRawInput(1)),
getCPUPtr(mOp.getRawOutput(0)));
}
template <>
void Aidge::BitShiftImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for BitShift_Op on backend cpu");
}
\ No newline at end of file
......@@ -44,21 +44,29 @@ void Aidge::PowImpl_cpu::forward() {
template <>
void Aidge::PowImpl_cpu::backward() {
// Find the correct kernel type
const Pow_Op& op_ = dynamic_cast<const Pow_Op&>(mOp);
const std::vector<std::size_t> input0gradDims = getBroadcastedDims(op_.getInput(0)->grad()->dims(),
op_.getOutput(0)->grad()->dims());
const std::vector<std::size_t> input1gradDims = getBroadcastedDims(op_.getInput(1)->grad()->dims(),
op_.getOutput(0)->grad()->dims());
auto in0 = op_.getInput(0);
auto in1 = op_.getInput(1);
auto in0grad = op_.getInput(0)->grad();
auto in1grad = op_.getInput(1)->grad();
auto out0grad = op_.getOutput(0)->grad();
const std::vector<std::size_t> input0gradDims = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->grad()->dims(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->grad()->dims());
const std::vector<std::size_t> input1gradDims = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->grad()->dims(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->grad()->dims());
// Find the correct kernel type
const auto impl = Registrar<PowImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
impl.backward(op_.getOutput(0)->grad()->dims(),
input0gradDims,
input1gradDims,
getCPUPtr(mOp.getRawOutput(0)),
getCPUPtr(mOp.getRawInput(0)),
getCPUPtr(mOp.getRawInput(1)));
impl.backward(input0gradDims,
input1gradDims,
out0grad->dims(),
getCPUPtr(in0),
getCPUPtr(in1),
getCPUPtr(out0grad),
getCPUPtr(in0grad),
getCPUPtr(in1grad));
}
\ No newline at end of file
......@@ -12,7 +12,7 @@ file(GLOB_RECURSE src_files "*.cpp")
add_executable(tests${module_name} ${src_files})
target_link_libraries(tests${module_name} PUBLIC ${module_name})
target_link_libraries(tests${module_name} PRIVATE ${module_name})
target_link_libraries(tests${module_name} PRIVATE Catch2::Catch2WithMain)
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <chrono>
#include <iostream>
#include <memory>
#include <numeric>
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include <iomanip>
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/BitShift.hpp"
#include "aidge/utils/TensorUtils.hpp"
namespace Aidge {
TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") {
constexpr std::uint16_t NBTRIALS = 15;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<int> valueDist(-15, 15);
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(5));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(3));
std::uniform_int_distribution<int> boolDist(0,1);
BitShift_Op::BitShiftDirection direction = BitShift_Op::BitShiftDirection::left;
if(valueDist(gen) % 2 == 0)
{
direction = BitShift_Op::BitShiftDirection::right;
}
// Create BitShift Operator
std::shared_ptr<Node> myBitShift = BitShift(direction);
auto op = std::static_pointer_cast<OperatorTensor>(myBitShift-> getOperator());
op->setDataType(DataType::Int32);
op->setBackend("cpu");
// Create 2 input Tensors
std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
op->associateInput(0,T0);
T0->setDataType(DataType::Int32);
T0->setBackend("cpu");
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
op -> associateInput(1,T1);
T1->setDataType(DataType::Int32);
T1->setBackend("cpu");
// Create results Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>();
Tres->setDataType(DataType::Int32);
Tres->setBackend("cpu");
// To measure execution time of 'BitShift_Op::forward()' member function call
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration{};
SECTION("BitShiftImpl_cpu::forward()") {
SECTION("Test Forward Kernel with same dimensions") {
std::size_t number_of_operation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
const std::size_t nbDims = nbDimsDist(gen);
std::vector<std::size_t> dims;
for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen));
}
const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
// without broadcasting
int* array0 = new int[nb_elements];
int* array1 = new int[nb_elements];
int* result = new int[nb_elements];
for (std::size_t i = 0; i < nb_elements; ++i) {
array0[i] = valueDist(gen);
array1[i] = std::abs(valueDist(gen)); // bitshift is impossible with negative value
if(direction == BitShift_Op::BitShiftDirection::left)
{
result[i] = array0[i] << array1[i];
}
else
{
result[i] = array0[i] >> array1[i];
}
}
// input0
T0->resize(dims);
T0 -> getImpl() -> setRawPtr(array0, nb_elements);
// input1
T1->resize(dims);
T1 -> getImpl() -> setRawPtr(array1, nb_elements);
// results
Tres->resize(dims);
Tres -> getImpl() -> setRawPtr(result, nb_elements);
op->forwardDims();
start = std::chrono::system_clock::now();
myBitShift->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
bool is_eq = approxEq<int>(*(op->getOutput(0)), *Tres);
auto Output = *(op->getOutput(0));
auto prt = Output.getImpl()->rawPtr();
REQUIRE(is_eq);
delete[] array0;
delete[] array1;
delete[] result;
}
std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
std::cout << "total time: " << duration.count() << "μs" << std::endl;
}
SECTION("Test BitShift kernels with Broadcasting") {
std::size_t number_of_operation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
// handle dimensions, replace some dimensions with '1' to get broadcasting
constexpr std::size_t nbDims = 4;
std::vector<std::size_t> dims;
for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen));
}
std::vector<std::size_t> dims0 = dims;
std::vector<std::size_t> dims1 = dims;
std::vector<std::size_t> dimsOut = dims;
for (std::size_t i = 0; i < nbDims; ++i) {
if (boolDist(gen)) {
dims0[i] = 1;
}
if (boolDist(gen)) {
dims1[i] = 1;
}
dimsOut[i] = (dims0[i] == 1) ? dims1[i] : dims0[i];
}
// create arrays and fill them with random values
int* array0 = new int[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
int* array1 = new int[dims1[0]*dims1[1]*dims1[2]*dims1[3]];
int* result = new int[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) {
array0[i] = valueDist(gen);
}
for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) {
array1[i] = std::abs(valueDist(gen));
}
//True result with broadcast
const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1};
for (std::size_t a = 0; a < dimsOut[0]; ++a) {
for (std::size_t b = 0; b < dimsOut[1]; ++b) {
const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
+ strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0)
+ strides1[1] * ((dims1[1] > 1) ? b : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c) {
const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
for (std::size_t d = 0; d < dimsOut[3]; ++d) {
std::size_t idx0 = idx0_0
+ strides0[2] * ((dims0[2] > 1) ? c : 0)
+ ((dims0[3] > 1) ? d : 0);
std::size_t idx1 = idx1_0
+ strides1[2] * ((dims1[2] > 1) ? c : 0)
+ ((dims1[3] > 1) ? d : 0);
if(direction == BitShift_Op::BitShiftDirection::left)
{
result[idx_out + d] = array0[idx0] << array1[idx1];
}
else
{
result[idx_out + d] = array0[idx0] >> array1[idx1];
}
}
}
}
}
// conversion to Aidge::Tensors
// input0
T0->resize(dims0);
T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
// input1
T1->resize(dims1);
T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]);
// results
Tres->resize(dimsOut);
Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
// compute result
op->forwardDims();
start = std::chrono::system_clock::now();
myBitShift->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
// comparison between truth and computed result
bool equiv = (approxEq<int>(*(op->getOutput(0)), *Tres));
if(equiv == false)
{
std::cout << "Problem\n";
}
REQUIRE(equiv);
delete[] array0;
delete[] array1;
delete[] result;
const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
}
std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
std::cout << "total time: " << duration.count() << "μs" << std::endl;
}
}
} // namespace Aidge
}
\ No newline at end of file
......@@ -313,5 +313,171 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
std::cout << "total time: " << duration.count() << "μs" << std::endl;
}
}
SECTION("PowImpl_cpu::backward()") {
SECTION("3D Tensors") {
const auto input0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
{
{
{
{2.0, 3.0},
{4.0, 5.0}
},
{
{6.0, 7.0},
{8.0, 9.0}
}
}
}
));
const auto input1 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
{
{
{
{1.0, 2.0},
{3.0, 2.0}
},
{
{2.0, 3.0},
{1.0, 0.5}
}
}
}
));
const auto gradOut = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
{
{
{
{0.5, 1.0},
{1.5, 2.0}
},
{
{2.5, 3.0},
{3.5, 4.0}
}
}
}
));
const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
{
{
{
{0.50000000, 6.00000000},
{72.00000000, 20.00000000}
},
{
{30.00000000, 441.00000000},
{3.50000000, 0.66666669}
}
}
}
));
const auto expectedGrad1 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
{
{
{
{ 0.693147182, 9.88751030},
{1.33084259e+02, 8.04718933e+01}
},
{
{1.61258362e+02, 2.00234143e+03},
{5.82243652e+01, 2.63666954e+01}
}
}
}
));
for(const auto T: {input0, input1, gradOut, expectedGrad0, expectedGrad1})
{
T->setBackend("cpu") ;
T->setDataType(DataType::Float32);
}
std::shared_ptr<Node> powOp = Pow();
auto opr = std::static_pointer_cast<OperatorTensor>(powOp-> getOperator());
opr->setDataType(DataType::Float32);
opr->setBackend("cpu");
opr->associateInput(0, input0);
opr->associateInput(1, input1);
opr->getOutput(0)->setGrad(gradOut);
opr->forward();
powOp->backward();
REQUIRE(approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0));
REQUIRE(approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1));
}
SECTION("Broadcasting") {
const auto input0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
{
{
{
{1.0, 2.0, 3.0},
{4.0, 5.0, 6.0}
},
{
{1.5, 2.5, 3.5},
{4.5, 5.5, 6.5}
}
}
}
));
const auto input1 = std::make_shared<Tensor>(Array1D<float, 3>(
{
{0.1, 0.2, 0.3}
}
));
const auto gradOut = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
{
{
{
{1.0, 2.0, 3.0},
{4.0, 5.0, 6.0}
},
{
{6.0, 5.0, 4.0},
{3.0, 2.0, 1.0}
}
}
}
));
const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
{
{
{
{0.10000000, 0.22973967, 0.41711676},
{0.11486985, 0.27594593, 0.51353097}
},
{
{0.41655189, 0.48044977, 0.49926791},
{0.07748720, 0.10227509, 0.08092485}
}
}
}
));
const auto expectedGrad1 = std::make_shared<Tensor>(Array1D<float, 3>(
{
{14.14779854, 22.99299049, 33.56402588}
}
));
for(const auto T: {input0, input1, gradOut, expectedGrad0, expectedGrad1})
{
T->setBackend("cpu") ;
T->setDataType(DataType::Float32);
}
std::shared_ptr<Node> powOp = Pow();
auto opr = std::static_pointer_cast<OperatorTensor>(powOp-> getOperator());
opr->setDataType(DataType::Float32);
opr->setBackend("cpu");
opr->associateInput(0, input0);
opr->associateInput(1, input1);
opr->getOutput(0)->setGrad(gradOut);
powOp->forward();
powOp->backward();
REQUIRE(approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0));
REQUIRE(approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1));
}
}
}
} // namespace Aidge
0.2.3
0.3.0