diff --git a/CMakeLists.txt b/CMakeLists.txt index eef0e63bf398cffb2c15b3af56ec0bf02d6590a9..3574e25cec5977bc2249c7d756041c09650f9b11 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.15) +cmake_minimum_required(VERSION 3.18) set(CXX_STANDARD 14) file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version) @@ -24,6 +24,7 @@ add_definitions(-DGIT_COMMIT_HASH="${GIT_COMMIT_HASH}") # Note : project name is ${CMAKE_PROJECT_NAME} and python module name is also ${CMAKE_PROJECT_NAME} set(module_name _${CMAKE_PROJECT_NAME}) # target name +set(pybind_module_name ${CMAKE_PROJECT_NAME}) # name of submodule for python bindings ############################################## # Define options @@ -69,16 +70,12 @@ set_property(TARGET ${module_name} PROPERTY POSITION_INDEPENDENT_CODE ON) # PYTHON BINDING if (PYBIND) - # Handles Python + pybind11 headers dependencies - include(PybindModuleCreation) - generate_python_binding(${CMAKE_PROJECT_NAME} ${module_name}) + # Python binding lib is by default installed in <prefix>/python_packages/<package>/ + # When installed from python, setup.py should set it to the python package dir + set(PYBIND_INSTALL_PREFIX python_packages/${pybind_module_name} CACHE PATH "Python package install prefix") - target_link_libraries(${module_name} - PUBLIC - pybind11::pybind11 - PRIVATE - Python::Module - ) + include(PybindModuleCreation) + generate_python_binding(${pybind_module_name} ${module_name}) endif() if( ${ENABLE_ASAN} ) @@ -102,7 +99,6 @@ target_include_directories(${module_name} ${CMAKE_CURRENT_SOURCE_DIR}/src ) -target_link_libraries(${module_name} PUBLIC fmt::fmt) target_compile_features(${module_name} PRIVATE cxx_std_14) target_compile_options(${module_name} PRIVATE @@ -128,6 +124,12 @@ install(TARGETS ${module_name} EXPORT ${CMAKE_PROJECT_NAME}-targets ) install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) +if (PYBIND) + install(TARGETS ${pybind_module_name} + DESTINATION ${PYBIND_INSTALL_PREFIX} + ) +endif() + #Export the targets to a script install(EXPORT ${CMAKE_PROJECT_NAME}-targets FILE "${CMAKE_PROJECT_NAME}-targets.cmake" @@ -159,15 +161,16 @@ install(FILES ## Exporting from the build tree message(STATUS "Exporting created targets to use them in another build") export(EXPORT ${CMAKE_PROJECT_NAME}-targets - FILE "${CMAKE_CURRENT_BINARY_DIR}/${project}-targets.cmake") + FILE "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_PROJECT_NAME}-targets.cmake") ############################################## ## Add test if(TEST) - if(PYBIND) - message(FATAL_ERROR "PYBIND and TEST are both enabled. But cannot compile with catch_2.\nChoose between pybind and Catch2 for compilation.") + if (AIDGE_REQUIRES_PYTHON AND NOT AIDGE_PYTHON_HAS_EMBED) + message(WARNING "Skipping compilation of tests: missing Python embedded interpreter") + else() + enable_testing() + add_subdirectory(unit_tests) endif() - enable_testing() - add_subdirectory(unit_tests) endif() diff --git a/README.md b/README.md index ed44c2eaaee074f17c8d7edad059c0891473f272..96283603759f03415b7dc1b99f3905550427f633 100644 --- a/README.md +++ b/README.md @@ -23,9 +23,23 @@ Those operators can be used on any machine with an Linux OS. pip install . -v ``` > **TIPS :** Use environment variables to change compilation options : -> - `AIDGE_INSTALL` : to set the installation folder. Defaults to /usr/local/lib. :warning: This path must be identical to aidge_core install path. -> - `AIDGE_PYTHON_BUILD_TYPE` : to set the compilation mode to **Debug** or **Release** -> - `AIDGE_BUILD_GEN` : to set the build backend with +> - `AIDGE_INSTALL` : to set the installation folder. Defaults to `<python_prefix>/lib/libAidge`. :warning: This path must be identical to aidge_core install path. +> - `AIDGE_PYTHON_BUILD_TYPE` : to set the compilation mode to **Debug** or **Release** or "" (for default flags). Defaults to **Release**. +> - `AIDGE_BUILD_GEN` : to set the build backend (for development mode) or "" for the cmake default. Default to "". + +## Pip installation for development + +To setup using pip in development (or editable mode), use the `--no-build-isolation -e` options to pip. + +For instance run the following command in your python environnement for a typical setup : +``` bash +export AIDGE_PYTHON_BUILD_TYPE= # default flags (no debug info but fastest build time) +export AIDGE_PYTHON_BUILD_TYPE=Debug # or if one really need to debug the C++ code +pip install -U pip setuptools setuptools_scm[toml] cmake # Pre-install build requirements (refer to the pyproject.toml [build-system] section) +pip install -v --no-build-isolation -e . +``` + +Refer to `aidge_core/README.md` for more details on development build options. ### Standard C++ Compilation diff --git a/aidge_backend_cpu-config.cmake.in b/aidge_backend_cpu-config.cmake.in index f3604be11c27d86caf1ad8a48b333b9bd8f30625..d8e1372bc8a7b79bd09c79b654af4291c995ac58 100644 --- a/aidge_backend_cpu-config.cmake.in +++ b/aidge_backend_cpu-config.cmake.in @@ -1,3 +1,10 @@ +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) +find_dependency(aidge_core) + +include(CMakeFindDependencyMacro) + include(${CMAKE_CURRENT_LIST_DIR}/aidge_backend_cpu-config-version.cmake) include(${CMAKE_CURRENT_LIST_DIR}/aidge_backend_cpu-targets.cmake) diff --git a/cmake/PybindModuleCreation.cmake b/cmake/PybindModuleCreation.cmake index 8f386bef59ed86dfa366eca5d4fccae24b28d24e..a520039f6505a7178acefaca076fa3f659e41bcb 100644 --- a/cmake/PybindModuleCreation.cmake +++ b/cmake/PybindModuleCreation.cmake @@ -1,9 +1,10 @@ function(generate_python_binding pybind_module_name target_to_bind) - add_definitions(-DPYBIND) + + find_package(Python COMPONENTS Interpreter Development.Module) + Include(FetchContent) set(PYBIND_VERSION v2.10.4) - set(PYBIND11_FINDPYTHON ON) message(STATUS "Retrieving pybind ${PYBIND_VERSION} from git") FetchContent_Declare( @@ -12,14 +13,12 @@ function(generate_python_binding pybind_module_name target_to_bind) GIT_TAG ${PYBIND_VERSION} # or a later release ) - # Use the New FindPython mode, recommanded. Requires CMake 3.15+ - find_package(Python COMPONENTS Interpreter Development.Module) FetchContent_MakeAvailable(PyBind11) message(STATUS "Creating binding for module ${pybind_module_name}") file(GLOB_RECURSE pybind_src_files "python_binding/*.cpp") pybind11_add_module(${pybind_module_name} MODULE ${pybind_src_files} "NO_EXTRAS") # NO EXTRA recquired for pip install - target_include_directories(${pybind_module_name} PUBLIC "python_binding") - target_link_libraries(${pybind_module_name} PUBLIC ${target_to_bind}) + target_include_directories(${pybind_module_name} PRIVATE "python_binding") + target_link_libraries(${pybind_module_name} PRIVATE ${target_to_bind}) endfunction() diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index 712d520377d8d71eb5f371a13e10712bae846589..760fc71a4b659e1bffe28e2796c7bb400e8ec1a2 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -21,6 +21,7 @@ #include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp" #include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" #include "aidge/backend/cpu/operator/BatchNormImpl.hpp" +#include "aidge/backend/cpu/operator/BitShiftImpl.hpp" #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" #include "aidge/backend/cpu/operator/ConvImpl.hpp" #include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp" diff --git a/include/aidge/backend/cpu/operator/BitShiftImpl.hpp b/include/aidge/backend/cpu/operator/BitShiftImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6da67bb7dd4469b6ca609c5aea1ae70dfca3f939 --- /dev/null +++ b/include/aidge/backend/cpu/operator/BitShiftImpl.hpp @@ -0,0 +1,38 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_BITSHIFTIMPL_H_ +#define AIDGE_CPU_OPERATOR_BITSHIFTIMPL_H_ + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/BitShift.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include <memory> +#include <vector> + +namespace Aidge { +// Operator implementation entry point for the backend +using BitShiftImpl_cpu = OperatorImpl_cpu<BitShift_Op, + void(const BitShift_Op::BitShiftDirection, + const std::vector<std::size_t>&, + const std::vector<std::size_t>&, + const std::vector<std::size_t>&, + const void*, + const void*, + void*)>; + + // Implementation entry point registration to Operator + REGISTRAR(BitShift_Op,"cpu",Aidge::BitShiftImpl_cpu::create); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_BITSHIFTIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp b/include/aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f815e946ea2e4abaff48a6e5155368d564e88e8c --- /dev/null +++ b/include/aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp @@ -0,0 +1,70 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_BITSHIFTIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_BITSHIFTIMPL_KERNELS_H_ + +#include "aidge/utils/Registrar.hpp" + +#include <cstdint> // std::int32_t, std::int64_t +#include "aidge/operator/BitShift.hpp" + +#include "aidge/backend/cpu/data/Broadcasting.hpp" +#include "aidge/backend/cpu/operator/BitShiftImpl.hpp" + + + +namespace Aidge { +template <class I1, class I2, class O> +void BitShiftImpl_cpu_forward_kernel( + const BitShift_Op::BitShiftDirection direction, + const std::vector<std::size_t>& input1Dims, + const std::vector<std::size_t>& input2Dims, + const std::vector<std::size_t>& outputDims, + const void* input1_, + const void* input2_, + void* output_ + ) { + + const I1* input_1 = static_cast<const I1*>(input1_); + const I2* input_2 = static_cast<const I2*>(input2_); + O* output = static_cast<O*>(output_); + + const size_t totalElements = std::accumulate(outputDims.begin(), outputDims.end(), std::size_t(1), std::multiplies<std::size_t>()); + + for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) + { + std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex); + std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); + std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); + if(direction == BitShift_Op::BitShiftDirection::right) + + { + output[oIndex]= input_1[idx1] >> input_2[idx2]; + } + else + { + output[oIndex] = input_1[idx1] << input_2[idx2]; + } + } +} + +REGISTRAR(BitShiftImpl_cpu, +{DataType::Int32}, +{ProdConso::inPlaceModel,Aidge::BitShiftImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>,nullptr}); +REGISTRAR(BitShiftImpl_cpu, +{DataType::Int64}, +{ProdConso::inPlaceModel,Aidge::BitShiftImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>,nullptr}); + + +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_BitShiftIMPL_KERNELS_H_ */ \ No newline at end of file diff --git a/include/aidge/backend/cpu/operator/MatMulImpl_kernels.hpp b/include/aidge/backend/cpu/operator/MatMulImpl_kernels.hpp index 7cb1239ebf4bd782635600e64eab6cd75b3a0282..5fc13baf49b1d0606eb4af5a54eec83fa5dce22a 100644 --- a/include/aidge/backend/cpu/operator/MatMulImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/MatMulImpl_kernels.hpp @@ -18,19 +18,19 @@ namespace Aidge { template <class I, class O> void MatMulImpl_cpu_forward_kernel(const std::size_t n, const std::size_t k, const std::size_t m, - const void* input1_, const void* input2_, void* output_) { + const void* input1_, const void* input2_, void* __restrict output_) { // FIXME: missing MatMul parameters as arguments const I* input1 = static_cast<const I*>(input1_); const I* input2 = static_cast<const I*>(input2_); - O* output = static_cast<O*>(output_); + O* __restrict output = static_cast<O* __restrict>(output_); + + std::memset(output, O(0), n * m * sizeof(O)); for (std::size_t i = 0; i < n; ++i) { - for (std::size_t j = 0; j < m; ++j) { - O sum = O(0); - for (std::size_t l = 0; l < k; ++l) { - sum += static_cast<O>(input1[i*k + l] * input2[l*m + j]); + for (std::size_t l = 0; l < k; ++l) { + for (std::size_t j = 0; j < m; ++j) { + output[i*m + j] += static_cast<O>(input1[i*k + l] * input2[l*m + j]); } - output[i*m + j] = sum; } } } diff --git a/include/aidge/backend/cpu/operator/PowImpl.hpp b/include/aidge/backend/cpu/operator/PowImpl.hpp index daf23177fb57bee4111c92654ad94dfae3e50f08..cfbb8173d1f83162519016a8f2b3c3166977a5b7 100644 --- a/include/aidge/backend/cpu/operator/PowImpl.hpp +++ b/include/aidge/backend/cpu/operator/PowImpl.hpp @@ -24,7 +24,8 @@ namespace Aidge { // Operator implementation entry point for the backend using PowImpl_cpu = OperatorImpl_cpu<Pow_Op, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*), - void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, void*)>; + void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, const void*, void*, void*)>; + // Implementation entry point registration to Operator REGISTRAR(Pow_Op, "cpu", Aidge::PowImpl_cpu::create); diff --git a/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp b/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp index f484fabf437f656dc8671d4ac78161ef11e84de5..ab9b2ccc7b823842decd044b90a5c6364cedc9c9 100644 --- a/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp @@ -31,14 +31,10 @@ void PowImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims, const I2* input_2 = static_cast<const I2*>(input2_); O* output = static_cast<O*>(output_); - size_t totalElements = 1; - for (size_t dimSize : outputDims) { - totalElements *= dimSize; - } - + std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>()); for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) { - std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex); + std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, oIndex); std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); @@ -47,16 +43,53 @@ void PowImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims, } } +template <class I1, class I2, class O> +void PowImpl_cpu_backward_kernel(const std::vector<std::size_t>& input0Dims, + const std::vector<std::size_t>& input1Dims, + const std::vector<std::size_t>& outputDims, + const void* input0_, + const void* input1_, + const void* gradOutput_, + void* gradientInput0_, + void* gradientInput1_) { + const I1* input0 = static_cast<const I1*>(input0_); + I1* grad0 = static_cast<I1*>(gradientInput0_); + const I2* input1 = static_cast<const I2*>(input1_); + I2* grad1 = static_cast<I2*>(gradientInput1_); + const O* gradOut = static_cast<const O*>(gradOutput_); + + // Fill input grads with zeros + std::size_t input0Elements = std::accumulate(input0Dims.cbegin(), input0Dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + std::fill(grad0, grad0 + input0Elements, I1(0)); + std::size_t input1Elements = std::accumulate(input1Dims.cbegin(), input1Dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + std::fill(grad1, grad1 + input1Elements, I2(0)); + + std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + for (size_t oIndex = 0; oIndex < totalElements; ++oIndex) + { + // Compute indexes in inputs 0 and 1 to support broadcasting + std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, oIndex); + std::size_t idx0 = getFlattenedIndex(input0Dims, indexes); + std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); + + // grad0 = grad_output * (input1 * pow(input0, (input1 -1))) + grad0[idx0] += gradOut[oIndex]*input1[idx1]* std::pow(input0[idx0], input1[idx1]-1); + + // grad1 = grad_output * (output * ln(input0)) + grad1[idx1] += gradOut[oIndex] * std::pow(input0[idx0], input1[idx1]) * std::log(input0[idx0]); + } +} + // Kernels registration to implementation entry point REGISTRAR(PowImpl_cpu, {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<float, float, float>, nullptr}); + {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<float, float, float>, Aidge::PowImpl_cpu_backward_kernel<float, float, float>}); REGISTRAR(PowImpl_cpu, {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<double, double, double>, nullptr}); + {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<double, double, double>, Aidge::PowImpl_cpu_backward_kernel<double, double, double>}); REGISTRAR(PowImpl_cpu, {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<int32_t, int32_t, int32_t>, nullptr}); + {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<int32_t, int32_t, int32_t>, Aidge::PowImpl_cpu_backward_kernel<int32_t, int32_t, int32_t>}); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_POWIMPL_KERNELS_H_ */ diff --git a/project_name.txt b/project_name.txt deleted file mode 100644 index f8a086fc063978638db5a0fcfe1dc2e5c9d0c1b7..0000000000000000000000000000000000000000 --- a/project_name.txt +++ /dev/null @@ -1 +0,0 @@ -aidge_backend_cpu \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index aa43189d3f4f7d3796009c2646175635382796bf..9dbdbede6083ea2ededd5a861449a2dfbea6f40e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,8 +17,7 @@ dynamic = ["version"] # defined in tool.setuptools_scm requires = [ "setuptools>=64", "setuptools_scm[toml]==7.1.0", - "cmake>=3.15.3.post1", - "toml" + "cmake>=3.18.4.post1" ] build-backend = "setuptools.build_meta" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 24ce15ab7ead32f98c7ac3edcd34bb2010ff4326..0000000000000000000000000000000000000000 --- a/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -numpy diff --git a/setup.py b/setup.py index 35520fd344c505bf38a60fcd5484c28517b0d2bd..22cbd9732c8b9e1099c3e322032e8377f6d4506b 100644 --- a/setup.py +++ b/setup.py @@ -8,17 +8,13 @@ import multiprocessing from math import ceil -import toml - from setuptools import setup, Extension from setuptools.command.build_ext import build_ext -def get_project_name() -> str: - with open(pathlib.Path().absolute() / "pyproject.toml", "r") as file: - project_toml = toml.load(file) - return project_toml["project"]["name"] +PROJECT_NAME = "aidge_backend_cpu" +SETUP_DIR = pathlib.Path(__file__).parent class AidgeBuildExtension(Extension): def __init__(self, name): @@ -26,6 +22,15 @@ class AidgeBuildExtension(Extension): class AidgePkgBuild(build_ext): + def __init__(self, dist, *args, **kwargs): + super().__init__(dist, *args, **kwargs) + # Detect editable_mode for old versions of setuptools + if not hasattr(self, "editable_mode"): + if hasattr(dist, "commands"): + self.editable_mode = "develop" in dist.commands + else: + self.editable_mode = False + def run(self): #################################### # BUILD PACKAGE @@ -43,36 +48,35 @@ class AidgePkgBuild(build_ext): if not build_lib.exists(): build_lib.mkdir(parents=True, exist_ok=True) - os.chdir(str(build_temp)) + package_prefix = build_lib if not self.editable_mode else SETUP_DIR + pybind_install_prefix = (package_prefix / PROJECT_NAME).absolute() - compile_type = ( - "Release" - if "AIDGE_PYTHON_BUILD_TYPE" not in os.environ - else os.environ["AIDGE_PYTHON_BUILD_TYPE"] - ) + os.chdir(str(build_temp)) + compile_type = os.environ.get("AIDGE_PYTHON_BUILD_TYPE", "Release") install_path = ( os.path.join(sys.prefix, "lib", "libAidge") if "AIDGE_INSTALL" not in os.environ else os.environ["AIDGE_INSTALL"] ) - - # using ninja as default build system to build faster and with the same compiler as on windows - build_gen = ( - ["-G", os.environ["AIDGE_BUILD_GEN"]] - if "AIDGE_BUILD_GEN" in os.environ + build_gen = os.environ.get("AIDGE_BUILD_GEN", "") + build_gen_opts = ( + ["-G", build_gen] + if build_gen else [] ) + test_onoff = os.environ.get("AIDGE_BUILD_TEST", "OFF") self.spawn( [ "cmake", - *build_gen, + *build_gen_opts, str(cwd), - "-DTEST=OFF", + f"-DTEST={test_onoff}", f"-DCMAKE_INSTALL_PREFIX:PATH={install_path}", f"-DCMAKE_BUILD_TYPE={compile_type}", "-DPYBIND=ON", + f"-DPYBIND_INSTALL_PREFIX:PATH={pybind_install_prefix}", "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", "-DCOVERAGE=OFF", ] @@ -85,25 +89,11 @@ class AidgePkgBuild(build_ext): self.spawn(["cmake", "--install", ".", "--config", compile_type]) os.chdir(str(cwd)) - aidge_package = build_lib / (get_project_name()) - - # Get "aidge core" package - # ext_lib = build_temp - print(build_temp.absolute()) - # Copy all shared object files from build_temp/lib to aidge_package - for root, _, files in os.walk(build_temp.absolute()): - for file in files: - if (file.endswith(".so") or file.endswith(".pyd")) and ( - root != str(aidge_package.absolute()) - ): - currentFile = os.path.join(root, file) - shutil.copy(currentFile, str(aidge_package.absolute())) - if __name__ == "__main__": setup( include_package_data=True, - ext_modules=[AidgeBuildExtension(get_project_name())], + ext_modules=[AidgeBuildExtension(PROJECT_NAME)], cmdclass={ "build_ext": AidgePkgBuild, }, diff --git a/src/operator/BitShiftImpl.cpp b/src/operator/BitShiftImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1e0f79fd29fd140f0b41c64d245b9b240da80028 --- /dev/null +++ b/src/operator/BitShiftImpl.cpp @@ -0,0 +1,57 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <chrono> // std::chrono::milliseconds +#include <numeric> +#include <thread> // std::this_thread::sleep_for +#include <vector> + + +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/Broadcasting.hpp" +#include "aidge/backend/cpu/data/GetCPUPtr.h" + +#include "aidge/backend/cpu/operator/BitShiftImpl.hpp" +#include "aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp" + +template<> +void Aidge::BitShiftImpl_cpu::forward() { + + const auto& op_ = dynamic_cast<const BitShift_Op&>(mOp); + + + const auto impl = Registrar<BitShiftImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + + const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()); + const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims()); + + BitShift_Op::BitShiftDirection direction = op_.direction(); + + // Call kernel + impl.forward( + direction, + inputDims0, + inputDims1, + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawInput(1)), + getCPUPtr(mOp.getRawOutput(0))); + +} + +template <> +void Aidge::BitShiftImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for BitShift_Op on backend cpu"); +} \ No newline at end of file diff --git a/src/operator/PowImpl.cpp b/src/operator/PowImpl.cpp index fe16bb955973d99e022c61043e8144aeaf6801a1..74a7be71e176ba8e1cb8851050e575d6aa7465df 100644 --- a/src/operator/PowImpl.cpp +++ b/src/operator/PowImpl.cpp @@ -44,21 +44,29 @@ void Aidge::PowImpl_cpu::forward() { template <> void Aidge::PowImpl_cpu::backward() { - // Find the correct kernel type const Pow_Op& op_ = dynamic_cast<const Pow_Op&>(mOp); - const std::vector<std::size_t> input0gradDims = getBroadcastedDims(op_.getInput(0)->grad()->dims(), - op_.getOutput(0)->grad()->dims()); - const std::vector<std::size_t> input1gradDims = getBroadcastedDims(op_.getInput(1)->grad()->dims(), - op_.getOutput(0)->grad()->dims()); + + auto in0 = op_.getInput(0); + auto in1 = op_.getInput(1); + auto in0grad = op_.getInput(0)->grad(); + auto in1grad = op_.getInput(1)->grad(); + auto out0grad = op_.getOutput(0)->grad(); + + const std::vector<std::size_t> input0gradDims = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->grad()->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->grad()->dims()); + const std::vector<std::size_t> input1gradDims = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->grad()->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->grad()->dims()); // Find the correct kernel type const auto impl = Registrar<PowImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - impl.backward(op_.getOutput(0)->grad()->dims(), - input0gradDims, - input1gradDims, - getCPUPtr(mOp.getRawOutput(0)), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawInput(1))); + impl.backward(input0gradDims, + input1gradDims, + out0grad->dims(), + getCPUPtr(in0), + getCPUPtr(in1), + getCPUPtr(out0grad), + getCPUPtr(in0grad), + getCPUPtr(in1grad)); } \ No newline at end of file diff --git a/unit_tests/CMakeLists.txt b/unit_tests/CMakeLists.txt index 671cdd5ac1262ab61b35a70a234236aff4a3cc15..8178df93beb96a3a7538dae8d9a706380c06ecf8 100644 --- a/unit_tests/CMakeLists.txt +++ b/unit_tests/CMakeLists.txt @@ -12,7 +12,7 @@ file(GLOB_RECURSE src_files "*.cpp") add_executable(tests${module_name} ${src_files}) -target_link_libraries(tests${module_name} PUBLIC ${module_name}) +target_link_libraries(tests${module_name} PRIVATE ${module_name}) target_link_libraries(tests${module_name} PRIVATE Catch2::Catch2WithMain) diff --git a/unit_tests/operator/Test_BitShift.cpp b/unit_tests/operator/Test_BitShift.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a52990bc7991a325ce151cf6634b0d5a831992c8 --- /dev/null +++ b/unit_tests/operator/Test_BitShift.cpp @@ -0,0 +1,245 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t +#include <chrono> +#include <iostream> +#include <memory> +#include <numeric> +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <iomanip> +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/BitShift.hpp" +#include "aidge/utils/TensorUtils.hpp" + +namespace Aidge { + +TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") { + constexpr std::uint16_t NBTRIALS = 15; + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> valueDist(-15, 15); + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(5)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(3)); + std::uniform_int_distribution<int> boolDist(0,1); + + BitShift_Op::BitShiftDirection direction = BitShift_Op::BitShiftDirection::left; + + if(valueDist(gen) % 2 == 0) + { + direction = BitShift_Op::BitShiftDirection::right; + } + + // Create BitShift Operator + std::shared_ptr<Node> myBitShift = BitShift(direction); + auto op = std::static_pointer_cast<OperatorTensor>(myBitShift-> getOperator()); + op->setDataType(DataType::Int32); + op->setBackend("cpu"); + + // Create 2 input Tensors + std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>(); + op->associateInput(0,T0); + T0->setDataType(DataType::Int32); + T0->setBackend("cpu"); + std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(); + op -> associateInput(1,T1); + T1->setDataType(DataType::Int32); + T1->setBackend("cpu"); + + // Create results Tensor + std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(); + Tres->setDataType(DataType::Int32); + Tres->setBackend("cpu"); + + // To measure execution time of 'BitShift_Op::forward()' member function call + std::chrono::time_point<std::chrono::system_clock> start; + + std::chrono::time_point<std::chrono::system_clock> end; + std::chrono::duration<double, std::micro> duration{}; + + SECTION("BitShiftImpl_cpu::forward()") { + SECTION("Test Forward Kernel with same dimensions") { + std::size_t number_of_operation = 0; + + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + // generate 2 random Tensors + const std::size_t nbDims = nbDimsDist(gen); + std::vector<std::size_t> dims; + for (std::size_t i = 0; i < nbDims; ++i) { + dims.push_back(dimSizeDist(gen)); + } + const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + number_of_operation += nb_elements; + + // without broadcasting + int* array0 = new int[nb_elements]; + int* array1 = new int[nb_elements]; + int* result = new int[nb_elements]; + + for (std::size_t i = 0; i < nb_elements; ++i) { + array0[i] = valueDist(gen); + array1[i] = std::abs(valueDist(gen)); // bitshift is impossible with negative value + if(direction == BitShift_Op::BitShiftDirection::left) + { + result[i] = array0[i] << array1[i]; + } + else + { + result[i] = array0[i] >> array1[i]; + } + } + + // input0 + T0->resize(dims); + T0 -> getImpl() -> setRawPtr(array0, nb_elements); + + // input1 + T1->resize(dims); + T1 -> getImpl() -> setRawPtr(array1, nb_elements); + + // results + Tres->resize(dims); + Tres -> getImpl() -> setRawPtr(result, nb_elements); + + op->forwardDims(); + start = std::chrono::system_clock::now(); + myBitShift->forward(); + end = std::chrono::system_clock::now(); + duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + + bool is_eq = approxEq<int>(*(op->getOutput(0)), *Tres); + + auto Output = *(op->getOutput(0)); + auto prt = Output.getImpl()->rawPtr(); + + REQUIRE(is_eq); + + delete[] array0; + delete[] array1; + delete[] result; + + + } + std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; + std::cout << "total time: " << duration.count() << "μs" << std::endl; + } + SECTION("Test BitShift kernels with Broadcasting") { + std::size_t number_of_operation = 0; + + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + // generate 2 random Tensors + // handle dimensions, replace some dimensions with '1' to get broadcasting + constexpr std::size_t nbDims = 4; + std::vector<std::size_t> dims; + for (std::size_t i = 0; i < nbDims; ++i) { + dims.push_back(dimSizeDist(gen)); + } + std::vector<std::size_t> dims0 = dims; + std::vector<std::size_t> dims1 = dims; + std::vector<std::size_t> dimsOut = dims; + for (std::size_t i = 0; i < nbDims; ++i) { + if (boolDist(gen)) { + dims0[i] = 1; + } + if (boolDist(gen)) { + dims1[i] = 1; + } + dimsOut[i] = (dims0[i] == 1) ? dims1[i] : dims0[i]; + } + + // create arrays and fill them with random values + int* array0 = new int[dims0[0]*dims0[1]*dims0[2]*dims0[3]]; + int* array1 = new int[dims1[0]*dims1[1]*dims1[2]*dims1[3]]; + int* result = new int[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]]; + + for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) { + array0[i] = valueDist(gen); + } + for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) { + array1[i] = std::abs(valueDist(gen)); + } + + //True result with broadcast + const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1}; + const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1}; + for (std::size_t a = 0; a < dimsOut[0]; ++a) { + for (std::size_t b = 0; b < dimsOut[1]; ++b) { + const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0) + + strides0[1] * ((dims0[1] > 1) ? b : 0); + const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0) + + strides1[1] * ((dims1[1] > 1) ? b : 0); + for (std::size_t c = 0; c < dimsOut[2]; ++c) { + const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a)); + for (std::size_t d = 0; d < dimsOut[3]; ++d) { + std::size_t idx0 = idx0_0 + + strides0[2] * ((dims0[2] > 1) ? c : 0) + + ((dims0[3] > 1) ? d : 0); + std::size_t idx1 = idx1_0 + + strides1[2] * ((dims1[2] > 1) ? c : 0) + + ((dims1[3] > 1) ? d : 0); + if(direction == BitShift_Op::BitShiftDirection::left) + { + result[idx_out + d] = array0[idx0] << array1[idx1]; + } + else + { + result[idx_out + d] = array0[idx0] >> array1[idx1]; + } + } + } + } + } + + // conversion to Aidge::Tensors + // input0 + T0->resize(dims0); + T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]); + + // input1 + T1->resize(dims1); + T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]); + + // results + Tres->resize(dimsOut); + Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]); + + // compute result + op->forwardDims(); + start = std::chrono::system_clock::now(); + myBitShift->forward(); + end = std::chrono::system_clock::now(); + duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + + // comparison between truth and computed result + bool equiv = (approxEq<int>(*(op->getOutput(0)), *Tres)); + if(equiv == false) + { + std::cout << "Problem\n"; + } + REQUIRE(equiv); + + delete[] array0; + delete[] array1; + delete[] result; + + const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>()); + number_of_operation += nb_elements; + } + std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; + std::cout << "total time: " << duration.count() << "μs" << std::endl; + } + +} +} // namespace Aidge +} \ No newline at end of file diff --git a/unit_tests/operator/Test_PowImpl.cpp b/unit_tests/operator/Test_PowImpl.cpp index 3b85defb37ff76439b658faa84c3c7457a152d2f..cb5d8872c9c7242bb4aa4efca388d53b578417f9 100644 --- a/unit_tests/operator/Test_PowImpl.cpp +++ b/unit_tests/operator/Test_PowImpl.cpp @@ -313,5 +313,171 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { std::cout << "total time: " << duration.count() << "μs" << std::endl; } } + + + SECTION("PowImpl_cpu::backward()") { + SECTION("3D Tensors") { + const auto input0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + { + { + { + {2.0, 3.0}, + {4.0, 5.0} + }, + { + {6.0, 7.0}, + {8.0, 9.0} + } + } + } + )); + const auto input1 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + { + { + { + {1.0, 2.0}, + {3.0, 2.0} + }, + { + {2.0, 3.0}, + {1.0, 0.5} + } + } + } + )); + const auto gradOut = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + { + { + { + {0.5, 1.0}, + {1.5, 2.0} + }, + { + {2.5, 3.0}, + {3.5, 4.0} + } + } + } + )); + const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + { + { + { + {0.50000000, 6.00000000}, + {72.00000000, 20.00000000} + }, + { + {30.00000000, 441.00000000}, + {3.50000000, 0.66666669} + } + } + } + )); + const auto expectedGrad1 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + { + { + { + { 0.693147182, 9.88751030}, + {1.33084259e+02, 8.04718933e+01} + }, + { + {1.61258362e+02, 2.00234143e+03}, + {5.82243652e+01, 2.63666954e+01} + } + } + } + )); + for(const auto T: {input0, input1, gradOut, expectedGrad0, expectedGrad1}) + { + T->setBackend("cpu") ; + T->setDataType(DataType::Float32); + } + std::shared_ptr<Node> powOp = Pow(); + auto opr = std::static_pointer_cast<OperatorTensor>(powOp-> getOperator()); + opr->setDataType(DataType::Float32); + opr->setBackend("cpu"); + opr->associateInput(0, input0); + opr->associateInput(1, input1); + opr->getOutput(0)->setGrad(gradOut); + opr->forward(); + + powOp->backward(); + REQUIRE(approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0)); + REQUIRE(approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1)); + } + SECTION("Broadcasting") { + const auto input0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + { + { + { + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0} + }, + { + {1.5, 2.5, 3.5}, + {4.5, 5.5, 6.5} + } + } + } + )); + const auto input1 = std::make_shared<Tensor>(Array1D<float, 3>( + { + {0.1, 0.2, 0.3} + } + )); + + const auto gradOut = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + { + { + { + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0} + }, + { + {6.0, 5.0, 4.0}, + {3.0, 2.0, 1.0} + } + } + } + )); + const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + { + { + { + {0.10000000, 0.22973967, 0.41711676}, + {0.11486985, 0.27594593, 0.51353097} + }, + { + {0.41655189, 0.48044977, 0.49926791}, + {0.07748720, 0.10227509, 0.08092485} + } + } + } + )); + const auto expectedGrad1 = std::make_shared<Tensor>(Array1D<float, 3>( + { + {14.14779854, 22.99299049, 33.56402588} + } + )); + + for(const auto T: {input0, input1, gradOut, expectedGrad0, expectedGrad1}) + { + T->setBackend("cpu") ; + T->setDataType(DataType::Float32); + } + std::shared_ptr<Node> powOp = Pow(); + auto opr = std::static_pointer_cast<OperatorTensor>(powOp-> getOperator()); + opr->setDataType(DataType::Float32); + opr->setBackend("cpu"); + opr->associateInput(0, input0); + opr->associateInput(1, input1); + opr->getOutput(0)->setGrad(gradOut); + powOp->forward(); + + powOp->backward(); + REQUIRE(approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0)); + REQUIRE(approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1)); + } + } } } // namespace Aidge diff --git a/version.txt b/version.txt index 7179039691ce07a214e7a815893fee97a97b1422..0d91a54c7d439e84e3dd17d3594f1b2b6737f430 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.2.3 +0.3.0