Skip to content
Snippets Groups Projects
Commit b2e93d76 authored by Maxence Naud's avatar Maxence Naud
Browse files

Merge branch 'dev' into 'main'

[UPD] version 0.4.1 -> 0.5.0

See merge request eclipse/aidge/aidge_backend_cpu!132
parents 5d2c727a 71080feb
No related branches found
No related tags found
No related merge requests found
Pipeline #66067 failed
Showing
with 645 additions and 95 deletions
......@@ -4,6 +4,7 @@
# C++ Build
build*/
install*/
include/aidge/backend/cpu_version.h
# VSCode
.vscode
......
# Verson 0.5.0 (January 31, 2025)
# Verson 0.4.0 (December 6, 2024)
# Version 0.2.2 (May 14, 2024)
......
cmake_minimum_required(VERSION 3.18)
set(CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version)
# Parse version.txt to retrieve Major, Minor and Path
string(REGEX MATCH "([0-9]+\\.[0-9]+\\.[0-9]+)" _ MATCHES ${version})
set(PROJECT_VERSION_MAJOR ${CMAKE_MATCH_1})
set(PROJECT_VERSION_MINOR ${CMAKE_MATCH_2})
set(PROJECT_VERSION_PATCH ${CMAKE_MATCH_3})
project(aidge_backend_cpu
VERSION ${version}
DESCRIPTION "CPU implementations of the operators of aidge framework."
LANGUAGES CXX)
message(STATUS "Project name: ${CMAKE_PROJECT_NAME}")
message(STATUS "Project version: ${version}")
add_definitions(-DPROJECT_VERSION="${version}")
# Retrieve latest git commit
execute_process(
COMMAND git rev-parse --short HEAD
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
......@@ -19,8 +25,10 @@ execute_process(
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_QUIET
)
message(STATUS "Project name: ${CMAKE_PROJECT_NAME}")
message(STATUS "Project version: ${version}")
message(STATUS "Latest git commit: ${GIT_COMMIT_HASH}")
add_definitions(-DGIT_COMMIT_HASH="${GIT_COMMIT_HASH}")
# helper for LSP users
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
......@@ -64,6 +72,8 @@ file(GLOB_RECURSE inc_files "include/*.hpp")
add_library(${module_name} ${src_files} ${inc_files})
target_link_libraries(${module_name}
PRIVATE
fmt::fmt
PUBLIC
_aidge_core # _ is added because we link the exported target and not the project
)
......@@ -115,6 +125,13 @@ if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE)
append_coverage_compiler_flags()
endif()
message(STATUS "Creating ${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/backend/cpu_version.h")
# Generate version.h file from config file version.h.in
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/backend/version.h.in"
"${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/backend/cpu_version.h"
)
##############################################
# Installation instructions
include(GNUInstallDirs)
......
import aidge_core
from aidge_backend_cpu.aidge_backend_cpu import * # import so generated by PyBind
from ._version import *
......@@ -12,6 +12,8 @@
#ifndef AIDGE_CPU_IMPORTS_H_
#define AIDGE_CPU_IMPORTS_H_
#include "aidge/backend/cpu_version.h"
#include "aidge/backend/cpu/operator/AbsImpl.hpp"
#include "aidge/backend/cpu/operator/AddImpl.hpp"
#include "aidge/backend/cpu/operator/AndImpl.hpp"
......@@ -28,9 +30,11 @@
#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp"
#include "aidge/backend/cpu/operator/DivImpl.hpp"
#include "aidge/backend/cpu/operator/ErfImpl.hpp"
#include "aidge/backend/cpu/operator/ExpandImpl.hpp"
#include "aidge/backend/cpu/operator/FCImpl.hpp"
#include "aidge/backend/cpu/operator/FoldImpl.hpp"
#include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp"
#include "aidge/backend/cpu/operator/HeavisideImpl.hpp"
#include "aidge/backend/cpu/operator/LRNImpl.hpp"
#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
#include "aidge/backend/cpu/operator/LnImpl.hpp"
......@@ -51,8 +55,8 @@
#include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
#include "aidge/backend/cpu/operator/SubImpl.hpp"
#include "aidge/backend/cpu/operator/TanhImpl.hpp"
#include "aidge/backend/cpu/operator/WeightInterleavedImpl.hpp"
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#endif /* AIDGE_CPU_IMPORTS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_EXPANDIMPL_H_
#define AIDGE_CPU_OPERATOR_EXPANDIMPL_H_
#include <memory>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Expand.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using ExpandImpl_cpu = OperatorImpl_cpu<Expand_Op,
void(const std::shared_ptr<Tensor> &,
const std::shared_ptr<Tensor> &,
void *,
const std::vector<DimSize_t> &)>;
// Implementation entry point registration to Operator
REGISTRAR(Expand_Op, "cpu", Aidge::ExpandImpl_cpu::create);
} // namespace Aidge
#endif /* _AIDGE_CPU_OPERATOR_EXPANDIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_EXPANDIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_EXPANDIMPL_KERNELS_H_
#include "aidge/backend/cpu/operator/ExpandImpl.hpp"
#include "aidge/utils/Registrar.hpp"
#include <aidge/data/Data.hpp>
#include <aidge/data/Tensor.hpp>
#include <aidge/data/half.hpp>
#include <aidge/scheduler/ProdConso.hpp>
#include <aidge/utils/Types.h>
#include <cmath>
#include <cstdint> // std::int32_t, std::int64_t
#include <memory>
#include <numeric>
namespace {
// suppose values are contiguous in memory
template <class IO>
void expandContiguousArray(const std::size_t inputStackSize,
const std::size_t outputStackSize,
const IO *input,
IO *output) {
for (std::size_t i = 0; i < outputStackSize; ++i) {
output[i] = (inputStackSize == 1) ? input[0] : input[i];
}
return;
}
} // namespace
namespace Aidge {
template <class IO>
void ExpandImpl_cpu_forward_kernel(
const std::shared_ptr<Tensor> &inData,
const std::shared_ptr<Tensor> &_inExpandShape,
void *_output,
const std::vector<DimSize_t> &outputDims) {
// retrieving data of inputShape & dimensions of inputDims
// as the process will require to modify the values
IO *output = static_cast<IO *>(_output);
std::vector<DimSize_t> inExpandShape(_inExpandShape->size());
for (DimSize_t i = 0; i < _inExpandShape->size(); ++i) {
inExpandShape[i] = _inExpandShape->get<std::int64_t>(i);
}
std::vector<DimSize_t> inDataDims = inData->dims();
// Example with 2 tensors
// [5,2,1,7] & [2,6,7]
// 1. Same number of dimensions but adding 1s to le left of "smallest"
// tensor -> [5,2,1,7] & [1,2,6,7]
// 2. Find the highest equal dimension -> 3
// Exception: if the first diverging dimension is the last one, then ->
// 4 (dims.size())
// 3. Compute the highest number of contiguous data -> 7
// 4. Compute stride and offset step for the broadcast mechanism
// 5. Call a simple kernel
// ## Compute compatible input dimensions
// special case for equal dimensions, the kernel is called with the entire
// arrays at once
if (inDataDims == inExpandShape) {
const std::size_t input0ContiguousSize =
std::accumulate(inDataDims.cbegin(),
inDataDims.cend(),
static_cast<std::size_t>(1),
std::multiplies<std::size_t>());
for (std::size_t i = 0; i < input0ContiguousSize; ++i) {
output[i] = inData->get<IO>(i);
}
return;
}
// set dimensions to be of equal size by filling the smallest one with
// ones.
if (inDataDims.size() > inExpandShape.size()) {
inExpandShape.insert(inExpandShape.cbegin(),
inDataDims.size() - inExpandShape.size(),
static_cast<DimSize_t>(1));
} else if (_inExpandShape->size() > inDataDims.size()) {
inDataDims.insert(inDataDims.cbegin(),
inExpandShape.size() - inDataDims.size(),
static_cast<DimSize_t>(1));
}
const std::size_t nbDims = inDataDims.size();
// Find the highest equal dimension
// std::size_t contiguousIdx = nbDims - 1;
std::size_t contiguousIdx = nbDims;
while (contiguousIdx-- > 0) {
// for (; contiguousIdx+1 > 0; --contiguousIdx) {
if (inDataDims[contiguousIdx] != inExpandShape[contiguousIdx]) {
break;
}
}
if (contiguousIdx == (nbDims - 1)) {
// last dimensions of one of the input Tensor are of size 1
const std::vector<std::size_t> &dims =
(inDataDims[contiguousIdx] == 1) ? inDataDims : inExpandShape;
while ((contiguousIdx + 1 > 0) && (dims[contiguousIdx] == 1)) {
--contiguousIdx;
}
}
++contiguousIdx;
// Compute the highest number of contiguous data for each Tensor
const std::size_t inputDataContiguousSize =
std::accumulate(inDataDims.cbegin() + contiguousIdx,
inDataDims.cend(),
static_cast<std::size_t>(1),
std::multiplies<std::size_t>());
const std::size_t outputContiguousSize =
std::accumulate(outputDims.cbegin() + contiguousIdx,
outputDims.cend(),
static_cast<std::size_t>(1),
std::multiplies<std::size_t>());
// initialize strides to iterate through data because of broadcasting
std::unique_ptr<std::int32_t[]> stridePostIn =
std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> strideStepIn =
std::make_unique<std::int32_t[]>(contiguousIdx);
if (contiguousIdx > 0) {
stridePostIn[contiguousIdx - 1] = 1;
for (std::size_t i = contiguousIdx - 2;
i != static_cast<std::size_t>(-1);
--i) {
stridePostIn[i] = stridePostIn[i + 1] *
static_cast<std::int32_t>(inDataDims[i + 1]);
}
for (std::size_t i = 0; i != contiguousIdx; ++i) {
strideStepIn[i] = (inDataDims[i] == 1) ? 1 - stridePostIn[i] : 1;
}
}
// variables for arrays offsets
std::size_t offsetInData = 0;
std::size_t offsetOut = 0;
std::size_t dim = contiguousIdx - 1;
const std::size_t nbStacks =
std::accumulate(outputDims.cbegin(),
outputDims.cbegin() + contiguousIdx,
static_cast<std::size_t>(1),
std::multiplies<std::size_t>());
for (std::size_t stack = 0; stack < nbStacks;) {
expandContiguousArray<IO>(
inputDataContiguousSize,
outputContiguousSize,
&static_cast<const IO *>(
inData->getImpl()
->rawPtr())[offsetInData * inputDataContiguousSize],
&output[offsetOut * outputContiguousSize]);
if (++stack < nbStacks) {
std::size_t tmpStack = stack;
while (tmpStack % outputDims[dim] == 0) {
tmpStack /= outputDims[dim];
dim--;
}
offsetInData += strideStepIn[dim];
++offsetOut;
dim = contiguousIdx - 1;
}
}
}
REGISTRAR(ExpandImpl_cpu,
{{DataType::Int16, DataType::Int64}, {DataType::Int16}},
{ProdConso::inPlaceModel,
Aidge::ExpandImpl_cpu_forward_kernel<std::int16_t>,
nullptr});
REGISTRAR(ExpandImpl_cpu,
{{DataType::Int32, DataType::Int64}, {DataType::Int32}},
{ProdConso::inPlaceModel,
Aidge::ExpandImpl_cpu_forward_kernel<std::int32_t>,
nullptr});
REGISTRAR(ExpandImpl_cpu,
{{DataType::Int64, DataType::Int64}, {DataType::Int64}},
{ProdConso::inPlaceModel,
Aidge::ExpandImpl_cpu_forward_kernel<std::int64_t>,
nullptr});
REGISTRAR(ExpandImpl_cpu,
{{DataType::Float16, DataType::Int64}, {DataType::Float16}},
{ProdConso::inPlaceModel,
Aidge::ExpandImpl_cpu_forward_kernel<half_float::half>,
nullptr});
REGISTRAR(ExpandImpl_cpu,
{{DataType::Float32, DataType::Int64}, {DataType::Float32}},
{ProdConso::inPlaceModel,
Aidge::ExpandImpl_cpu_forward_kernel<float>,
nullptr});
REGISTRAR(ExpandImpl_cpu,
{{DataType::Float64, DataType::Int64}, {DataType::Float64}},
{ProdConso::inPlaceModel,
Aidge::ExpandImpl_cpu_forward_kernel<double>,
nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_EXPANDIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2025 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_H_
#define AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_H_
#include <cstddef> // std::size_t
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Heaviside.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/future_std/span.hpp"
namespace Aidge {
using HeavisideImplCpu =
OperatorImpl_cpu<Heaviside_Op,
void(std::size_t, const void *, void *, const float),
void(const float, std::size_t, const void *, void *)>;
// Implementation entry point registration for operator Heaviside
REGISTRAR(Heaviside_Op, "cpu", HeavisideImplCpu::create);
} // namespace Aidge
#endif // AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_H_
/********************************************************************************
* Copyright (c) 2025 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include <cstddef> // std::size_t
#include "aidge/backend/cpu/operator/HeavisideImpl.hpp"
#include "aidge/utils/ErrorHandling.hpp"
namespace Aidge {
template <class I, class O>
void HeavisideImplCpuForwardKernel(std::size_t inputLenght,
const void *input_,
void *output_,
const float value) {
const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_);
for (std::size_t i = 0; i < inputLenght; ++i) {
output[i] = (input[i] > 0) ? 1 : (input[i] == 0 ? value : 0);
}
}
// Kernels registration to implementation entry point
REGISTRAR(HeavisideImplCpu,
{DataType::Float32},
{ProdConso::inPlaceModel,
Aidge::HeavisideImplCpuForwardKernel<float, float>,
nullptr});
} // namespace Aidge
#endif // AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_KERNELS_H__H_
......@@ -34,6 +34,7 @@ using MulImpl_cpu = OperatorImpl_cpu<Mul_Op,
const std::size_t,
const std::vector<std::size_t>,
const std::vector<std::size_t>,
const std::vector<std::size_t>,
const void*,
const void*,
const void*,
......
......@@ -149,61 +149,53 @@ void MulImpl_cpu_forward_kernel(std::vector<std::size_t> dims0,
template <class I1, class I2, class O>
void MulImpl_cpu_backward_kernel(const std::size_t input0Length,
const std::size_t input1Length,
const std::size_t grad0Length,
const std::vector<std::size_t> input0Dims,
const std::vector<std::size_t> input1Dims,
const void* input0_,
const void* input1_,
const void* grad_output_,
void* gradientInput0,
void* gradientInput1)
const std::size_t input1Length,
const std::size_t gradOutputLength,
const std::vector<std::size_t>& dims0,
const std::vector<std::size_t>& dims1,
const std::vector<std::size_t>& outputDims,
const void* input0_,
const void* input1_,
const void* grad_output_,
void* gradientInput0_,
void* gradientInput1_)
{
const auto* input0 = static_cast<const I1*>(input0_);
const auto* input1 = static_cast<const I1*>(input1_);
const auto* grad_output = static_cast<const O*>(grad_output_);
auto* grad_input_0 = static_cast<I1*>(gradientInput0);
auto* grad_input_1 = static_cast<I2*>(gradientInput1);
if(input0Dims.size() >= input1Dims.size())
{
AIDGE_ASSERT(input0Length == grad0Length, "Incorrect dimensions between Mul input and output tensors");
for(auto i = 0U; i < input0Length; ++i)
{
const auto indices = getMultiDimIndices(input1Dims, i);
const auto flattenedIndex = getFlattenedIndex(input1Dims, indices);
grad_input_0[i] = input1[flattenedIndex] * grad_output[i];
}
for(std::size_t i = 0 ; i < grad0Length; ++i)
{
const auto indices = getMultiDimIndices(input1Dims, i);
const auto flattenedIndex = getFlattenedIndex(input1Dims, indices);
grad_input_1[flattenedIndex] += input0[i] * grad_output[i];
const I1* input0 = static_cast<const I1*>(input0_);
const I2* input1 = static_cast<const I2*>(input1_);
const O* grad_output = static_cast<const O*>(grad_output_);
auto* grad_input_0 = static_cast<I1*>(gradientInput0_);
auto* grad_input_1 = static_cast<I2*>(gradientInput1_);
std::fill_n(grad_input_0, input0Length, static_cast<I1>(0));
std::fill_n(grad_input_1, input1Length, static_cast<I2>(0));
// Broadcast dims0 and dims1 to match the shape of outputDims
auto broadcastedDims0 = getBroadcastedDims(outputDims, dims0);
auto broadcastedDims1 = getBroadcastedDims(outputDims, dims1);
for (std::size_t i = 0; i < gradOutputLength; ++i) {
auto idxOutputGrad = getMultiDimIndices(outputDims, i);
std::vector<std::size_t> idxInput0(broadcastedDims0.size());
std::vector<std::size_t> idxInput1(broadcastedDims1.size());
// Map output indices to input0 indices, considering broadcasting
for (std::size_t dimension = 0; dimension < broadcastedDims0.size(); ++dimension) {
// If input0 is broadcasted along this dimension (== 1) or both dimensions are 1, index is 0.
// idxInput0 represent the multi dim index of input0 contributing
// to the output at index i.
idxInput0[dimension] = (broadcastedDims0[dimension] == 1) ? 0 : idxOutputGrad[dimension];
}
} else {
AIDGE_ASSERT(input1Length == grad0Length, "Incorrect dimensions between Mul input and output tensors");
for(auto i = 0U; i < input1Length; ++i)
{
const auto indices = getMultiDimIndices(input0Dims, i);
const auto flattenedIndex = getFlattenedIndex(input0Dims, indices);
grad_input_1[i] = input0[flattenedIndex] * grad_output[i];
for (std::size_t dimension = 0; dimension < broadcastedDims1.size(); ++dimension) {
idxInput1[dimension] = (broadcastedDims1[dimension] == 1) ? 0 : idxOutputGrad[dimension];
}
for(std::size_t i = 0 ; i < grad0Length; ++i)
{
const auto indices = getMultiDimIndices(input0Dims, i);
const auto flattenedIndex = getFlattenedIndex(input0Dims, indices);
// We have to access tensors with a flat index, hence the conversion
auto idx0 = getFlattenedIndex(broadcastedDims0, idxInput0);
auto idx1 = getFlattenedIndex(broadcastedDims1, idxInput1);
grad_input_0[flattenedIndex] += input1[i] * grad_output[i];
}
grad_input_0[idx0] += static_cast<I1>(grad_output[i] * input1[idx1]);
grad_input_1[idx1] += static_cast<I2>(grad_output[i] * input0[idx0]);
}
}
......@@ -211,6 +203,9 @@ void MulImpl_cpu_backward_kernel(const std::size_t input0Length,
REGISTRAR(MulImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<float, float, float>, Aidge::MulImpl_cpu_backward_kernel<float, float, float>});
REGISTRAR(MulImpl_cpu,
{{{DataType::Float32}, {DataType::Float64}}, {DataType::Float32}},
{ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<float, double, float>, Aidge::MulImpl_cpu_backward_kernel<float, double, float>});
REGISTRAR(MulImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<double, double, double>, Aidge::MulImpl_cpu_backward_kernel<double, double, double>});
......
......@@ -99,30 +99,31 @@ void ResizeImpl_cpu_forward_kernel(
}
return;
}
// Kernels registration to implementation entry point
REGISTRAR(ResizeImpl_cpu,
{{{DataType::Int16},
{DataType::Float32},
{DataType::Float32},
{DataType::UInt64}},
{DataType::Any},
{DataType::Any},
{DataType::Any}},
{DataType::Int16}},
{ProdConso::inPlaceModel,
ResizeImpl_cpu_forward_kernel<int16_t>,
nullptr});
REGISTRAR(ResizeImpl_cpu,
{{{DataType::Int32},
{DataType::Float32},
{DataType::Float32},
{DataType::UInt64}},
{DataType::Any},
{DataType::Any},
{DataType::Any}},
{DataType::Int32}},
{ProdConso::inPlaceModel,
ResizeImpl_cpu_forward_kernel<int32_t>,
nullptr});
REGISTRAR(ResizeImpl_cpu,
{{{DataType::Int64},
{DataType::Float32},
{DataType::Float32},
{DataType::Int64}},
{DataType::Any},
{DataType::Any},
{DataType::Any}},
{DataType::UInt64}},
{ProdConso::inPlaceModel,
ResizeImpl_cpu_forward_kernel<int64_t>,
......@@ -130,27 +131,27 @@ REGISTRAR(ResizeImpl_cpu,
REGISTRAR(ResizeImpl_cpu,
{{{DataType::Float16},
{DataType::Float32},
{DataType::Float32},
{DataType::UInt64}},
{DataType::Any},
{DataType::Any},
{DataType::Any}},
{DataType::Float16}},
{ProdConso::inPlaceModel,
ResizeImpl_cpu_forward_kernel<half_float::half>,
nullptr});
REGISTRAR(ResizeImpl_cpu,
{{{DataType::Float32},
{DataType::Float32},
{DataType::Float32},
{DataType::UInt64}},
{DataType::Any},
{DataType::Any},
{DataType::Any}},
{DataType::Float32}},
{ProdConso::inPlaceModel,
ResizeImpl_cpu_forward_kernel<float>,
nullptr});
REGISTRAR(ResizeImpl_cpu,
{{{DataType::Float64},
{DataType::Float32},
{DataType::Float32},
{DataType::UInt64}},
{DataType::Any},
{DataType::Any},
{DataType::Any}},
{DataType::Float64}},
{ProdConso::inPlaceModel,
ResizeImpl_cpu_forward_kernel<double>,
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVINGIMPL_H_
#define AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVINGIMPL_H_
#include <array>
#include <memory>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/WeightInterleaving.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using WeightInterleavedImpl_cpu = OperatorImpl_cpu<WeightInterleaving_Op,
void(const DimSize_t,
const DimSize_t,
const DimSize_t,
const void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(WeightInterleaving_Op, "cpu", Aidge::WeightInterleavedImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_WeightInterleavingIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVEDIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVEDIMPL_KERNELS_H_
#include <cstddef> // std::size_t
#include <cstdint> // std::int8_t, std::uint8_t
#include "aidge/backend/cpu/operator/WeightInterleavedImpl.hpp"
#include "aidge/data/DataType.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/ErrorHandling.hpp"
namespace Aidge {
/**
* @brief Compacts 8-bit data into a smaller bit-width representation.
*
* This function takes an array of 8-bit data and compacts it into smaller chunks
* based on the specified bit-width `nb_bits`. Each element in `compactData` will
* store multiple packed `nb_bits` segments extracted from `data`.
*
* @param data The input array of 8-bit values to be compacted.
* @param dataSize The size of the input `data` array.
* @param compactData The output array storing the compacted data.
* @param nb_bits The number of bits to extract from each `data` element (must be less than 8).
*/
template <typename T>
void compact_data(const T* data, std::size_t dataSize, T* compactData, std::uint8_t nb_bits) {
AIDGE_ASSERT(nb_bits > 0 && nb_bits < 5, "Cannot compact with the given nb_bits"); // Ensure valid bit width
// Mask to extract `nb_bits` from each data element
const unsigned int mask = (1U << nb_bits) - 1;
// Calculate the number of `nb_bits` segments that fit into an 8-bit compacted value
const unsigned int nbSlot = 8 / nb_bits;
// Case nb_bits=3 or 4, then shift is 4
// Case nb_bits=2, then shift is 2
// Case nb_bits=1, then shift is 1
std::uint8_t shift = 8 / nbSlot;
const unsigned int nbFullCompactbytes = dataSize / nbSlot;
// Main loop to process data in groups of `nbSlot`
for (std::size_t i = 0; i < nbFullCompactbytes; ++i) {
T compact = 0;
for (unsigned int j = 0; j < nbSlot; ++j) {
compact |= (data[i * nbSlot + j] & mask); // Apply mask to keep `nb_bits` only
// Shift only if not on the last slot to make room for the next `nb_bits`
if (j < nbSlot - 1) {
compact <<= shift;
}
}
// Store the compacted value in the output array
compactData[i] = compact;
}
// Handle any remaining data elements (if dataSize is not a multiple of nbSlot).
std::size_t remaining = dataSize % nbSlot;
if (remaining != 0) {
std::int8_t compact = 0;
for (std::size_t j = 0; j < remaining; ++j) {
compact |= (data[nbFullCompactbytes*nbSlot + j] & mask);
if (j < remaining - 1) {
compact <<= shift;
}
}
compact <<= (shift*(nbSlot - remaining));
// Store the last compacted value
compactData[dataSize / nbSlot] = compact;
}
}
template <class I, class O, int nb_bits>
void WeightInterleavedImpl_cpu_forward_kernel(const DimSize_t input_interleaving,
const DimSize_t nb_interleaving,
const DimSize_t output_interleaving,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
// Aidge::compact_data(const std::int8_t* data, std::size_t dataSize, std::int8_t* compactData, std::uint8_t nb_bits) {
for (std::size_t i=0; i<nb_interleaving; ++i){
compact_data(input+(i*input_interleaving), input_interleaving, output+(i*output_interleaving), static_cast<std::uint8_t>(nb_bits));
}
}
REGISTRAR(WeightInterleavedImpl_cpu,
{ImplSpec::IOSpec{DataType::Int4, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavedType_v<DataType::Int4>, DataFormat::NHWC}},
{ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<int8_t, int8_t, 4>, nullptr});
REGISTRAR(WeightInterleavedImpl_cpu,
{ImplSpec::IOSpec{DataType::Int3, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavedType_v<DataType::Int3>, DataFormat::NHWC}},
{ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<int8_t, int8_t, 3>, nullptr});
REGISTRAR(WeightInterleavedImpl_cpu,
{ImplSpec::IOSpec{DataType::Int2, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavedType_v<DataType::Int2>, DataFormat::NHWC}},
{ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<int8_t, int8_t, 2>, nullptr});
REGISTRAR(WeightInterleavedImpl_cpu,
{ImplSpec::IOSpec{DataType::Binary, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavedType_v<DataType::Binary>, DataFormat::NHWC}},
{ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<int8_t, int8_t, 1>, nullptr});
REGISTRAR(WeightInterleavedImpl_cpu,
{ImplSpec::IOSpec{DataType::UInt4, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavedType_v<DataType::UInt4>, DataFormat::NHWC}},
{ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<uint8_t, uint8_t, 4>, nullptr});
REGISTRAR(WeightInterleavedImpl_cpu,
{ImplSpec::IOSpec{DataType::UInt3, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavedType_v<DataType::UInt3>, DataFormat::NHWC}},
{ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<uint8_t, uint8_t, 3>, nullptr});
REGISTRAR(WeightInterleavedImpl_cpu,
{ImplSpec::IOSpec{DataType::UInt2, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavedType_v<DataType::UInt2>, DataFormat::NHWC}},
{ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<uint8_t, uint8_t, 2>, nullptr});
// REGISTRAR(WeightInterleavedImpl_cpu,
// {ImplSpec::IOSpec{DataType::Int4, DataFormat::NHWC}},
// {ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<int8_t, int8_t, 4>, nullptr});
// REGISTRAR(WeightInterleavedImpl_cpu,
// {ImplSpec::IOSpec{DataType::Int3, DataFormat::NHWC}},
// {ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<int8_t, int8_t, 3>, nullptr});
// REGISTRAR(WeightInterleavedImpl_cpu,
// {ImplSpec::IOSpec{DataType::Int2, DataFormat::NHWC}},
// {ProdConso::defaultModel, Aidge::WeightInterleavedImpl_cpu_forward_kernel<int8_t, int8_t, 2>, nullptr});
}
#endif /* AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVEDIMPL_KERNELS_H_ */
\ No newline at end of file
#ifndef VERSION_H
#define VERSION_H
namespace Aidge {
static constexpr const int PROJECT_VERSION_MAJOR = @PROJECT_VERSION_MAJOR@;
static constexpr const int PROJECT_VERSION_MINOR = @PROJECT_VERSION_MINOR@;
static constexpr const int PROJECT_VERSION_PATCH = @PROJECT_VERSION_PATCH@;
static constexpr const char * PROJECT_VERSION = "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@";
static constexpr const char * PROJECT_GIT_HASH = "@GIT_COMMIT_HASH@";
}
#endif // VERSION_H
......@@ -2,17 +2,20 @@
#define AIDGE_UTILS_SYS_INFO_CPU_VERSION_INFO_H
#include "aidge/utils/Log.hpp"
#include "aidge/backend/cpu_version.h"
namespace Aidge {
#ifndef PROJECT_VERSION // Normally defined in CMakeLists.txt
#define PROJECT_VERSION "Unknown version"
#endif
#ifndef GIT_COMMIT_HASH
#define GIT_COMMIT_HASH ""
#endif
void showCpuVersion() {
Log::info("Aidge backend CPU: {} ({}), {} {}", PROJECT_VERSION, GIT_COMMIT_HASH, __DATE__, __TIME__);
constexpr inline const char * getBackendCPUProjectVersion(){
return PROJECT_VERSION;
}
constexpr inline const char * getBackendCPUGitHash(){
return PROJECT_GIT_HASH;
}
void showBackendCpuVersion() {
Log::info("Aidge backend CPU: {} ({}), {} {}", getBackendCPUProjectVersion(), getBackendCPUGitHash(), __DATE__, __TIME__);
// Compiler version
#if defined(__clang__)
/* Clang/LLVM. ---------------------------------------------- */
......
aidge_backend_cpu
......@@ -4,20 +4,28 @@ description="CPU implementation of operators of the AIDGE framework"
dependencies = [
"numpy",
]
requires-python = ">= 3.7"
requires-python = ">= 3.8"
readme = "README.md"
license = { file = "LICENSE" }
classifiers = [
classifiers = [
"Development Status :: 2 - Pre-Alpha",
"Programming Language :: Python :: 3"
]
dynamic = ["version"] # defined in tool.setuptools_scm
dynamic = ["version"] # defined by pbr
[project.urls]
Homepage = "https://www.deepgreen.ai/en/platform"
Documentation = "https://eclipse-aidge.readthedocs.io/en/latest/"
Repository = "https://gitlab.eclipse.org/eclipse/aidge/aidge_backend_cpu"
Issues = "https://gitlab.eclipse.org/eclipse/aidge/aidge_backend_cpu/-/issues"
Changelog = "https://gitlab.eclipse.org/eclipse/aidge/aidge_backend_cpu/-/releases"
[build-system]
requires = [
"setuptools>=64",
"setuptools_scm[toml]==7.1.0",
"cmake>=3.18.4.post1"
"cmake>=3.18.4.post1",
"pbr"
]
build-backend = "setuptools.build_meta"
......@@ -29,9 +37,6 @@ where = ["."] # list of folders that contain the packages (["."] by default)
include = ["aidge_backend_cpu*"] # package names should match these glob patterns (["*"] by default)
exclude = ["aidge_backend_cpu.unit_tests*"] # exclude packages matching these glob patterns (empty by default)
namespaces = false # to disable scanning PEP 420 namespaces (true by default)
# SETUPTOOLS_SCM
[tool.setuptools_scm]
write_to = "aidge_backend_cpu/_version.py"
#####################################################
# CIBUILDWHEEL
......
......@@ -6,10 +6,10 @@ namespace py = pybind11;
namespace Aidge {
void init_cpu_sys_info(py::module& m);
void init_CpuVersionInfo(py::module& m);
void init_Aidge(py::module& m){
init_cpu_sys_info(m);
init_CpuVersionInfo(m);
}
......
......@@ -3,7 +3,9 @@
namespace py = pybind11;
namespace Aidge {
void init_cpu_sys_info(py::module& m){
m.def("show_cpu_version", &showCpuVersion);
void init_CpuVersionInfo(py::module& m){
m.def("show_version", &showBackendCpuVersion);
m.def("get_project_version", &getBackendCPUProjectVersion);
m.def("get_git_hash", &getBackendCPUGitHash);
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment