Skip to content
Snippets Groups Projects
Commit 03e4e4b1 authored by Maxence Naud's avatar Maxence Naud
Browse files

Merge branch 'low_bit_support_arm' into 'dev'

Low bit support for ARM Cortex-M export

See merge request !111
parents 5f379bac 78247d02
No related branches found
No related tags found
1 merge request!111Low bit support for ARM Cortex-M export
Pipeline #64978 failed
......@@ -53,6 +53,7 @@
#include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
#include "aidge/backend/cpu/operator/SubImpl.hpp"
#include "aidge/backend/cpu/operator/TanhImpl.hpp"
#include "aidge/backend/cpu/operator/WeightInterleavingImpl.hpp"
#include "aidge/backend/cpu/data/TensorImpl.hpp"
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVINGIMPL_H_
#define AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVINGIMPL_H_
#include <array>
#include <memory>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/WeightInterleaving.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using WeightInterleavingImpl_cpu = OperatorImpl_cpu<WeightInterleaving_Op,
void(const DimSize_t,
const DimSize_t,
const DimSize_t,
const void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(WeightInterleaving_Op, "cpu", Aidge::WeightInterleavingImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_WeightInterleavingIMPL_H_ */
#ifndef AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVINGIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVINGIMPL_KERNELS_H_
#include <algorithm>
#include "aidge/backend/cpu/operator/WeightInterleavingImpl.hpp"
#include "aidge/utils/Registrar.hpp"
namespace Aidge {
/**
* @brief Compacts 8-bit data into a smaller bit-width representation.
*
* This function takes an array of 8-bit data and compacts it into smaller chunks
* based on the specified bit-width `nb_bits`. Each element in `compactData` will
* store multiple packed `nb_bits` segments extracted from `data`.
*
* @param data The input array of 8-bit values to be compacted.
* @param dataSize The size of the input `data` array.
* @param compactData The output array storing the compacted data.
* @param nb_bits The number of bits to extract from each `data` element (must be less than 8).
*/
template <typename T>
void compact_data(const T* data, std::size_t dataSize, T* compactData, std::uint8_t nb_bits) {
AIDGE_ASSERT(nb_bits > 0 && nb_bits < 5, "Cannot compact with the given nb_bits"); // Ensure valid bit width
// Mask to extract `nb_bits` from each data element
const unsigned int mask = (1U << nb_bits) - 1;
// Calculate the number of `nb_bits` segments that fit into an 8-bit compacted value
const unsigned int nbSlot = 8 / nb_bits;
// Case nb_bits=3 or 4, then shift is 4
// Case nb_bits=2, then shift is 2
// Case nb_bits=1, then shift is 1
std::uint8_t shift = 8 / nbSlot;
const unsigned int nbFullCompactbytes = dataSize / nbSlot;
// Main loop to process data in groups of `nbSlot`
for (std::size_t i = 0; i < nbFullCompactbytes; ++i) {
T compact = 0;
for (unsigned int j = 0; j < nbSlot; ++j) {
compact |= (data[i * nbSlot + j] & mask); // Apply mask to keep `nb_bits` only
// Shift only if not on the last slot to make room for the next `nb_bits`
if (j < nbSlot - 1) {
compact <<= shift;
}
}
// Store the compacted value in the output array
compactData[i] = compact;
}
// Handle any remaining data elements (if dataSize is not a multiple of nbSlot).
std::size_t remaining = dataSize % nbSlot;
if (remaining != 0) {
std::int8_t compact = 0;
for (std::size_t j = 0; j < remaining; ++j) {
compact |= (data[nbFullCompactbytes*nbSlot + j] & mask);
if (j < remaining - 1) {
compact <<= shift;
}
}
compact <<= (shift*(nbSlot - remaining));
// Store the last compacted value
compactData[dataSize / nbSlot] = compact;
}
}
template <class I, class O, int nb_bits>
void WeightInterleavingImpl_cpu_forward_kernel(const DimSize_t input_interleaving,
const DimSize_t nb_interleaving,
const DimSize_t output_interleaving,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
// Aidge::compact_data(const std::int8_t* data, std::size_t dataSize, std::int8_t* compactData, std::uint8_t nb_bits) {
for (std::size_t i=0; i<nb_interleaving; ++i){
compact_data(input+(i*input_interleaving), input_interleaving, output+(i*output_interleaving), static_cast<std::uint8_t>(nb_bits));
}
}
REGISTRAR(WeightInterleavingImpl_cpu,
{ImplSpec::IOSpec{DataType::Int4, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavingType<DataType::Int4>::type, DataFormat::NHWC}},
{ProdConso::defaultModel, Aidge::WeightInterleavingImpl_cpu_forward_kernel<int8_t, int8_t, 4>, nullptr});
REGISTRAR(WeightInterleavingImpl_cpu,
{ImplSpec::IOSpec{DataType::Int3, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavingType<DataType::Int3>::type, DataFormat::NHWC}},
{ProdConso::defaultModel, Aidge::WeightInterleavingImpl_cpu_forward_kernel<int8_t, int8_t, 3>, nullptr});
REGISTRAR(WeightInterleavingImpl_cpu,
{ImplSpec::IOSpec{DataType::Int2, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavingType<DataType::Int2>::type, DataFormat::NHWC}},
{ProdConso::defaultModel, Aidge::WeightInterleavingImpl_cpu_forward_kernel<int8_t, int8_t, 2>, nullptr});
REGISTRAR(WeightInterleavingImpl_cpu,
{ImplSpec::IOSpec{DataType::Binary, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavingType<DataType::Binary>::type, DataFormat::NHWC}},
{ProdConso::defaultModel, Aidge::WeightInterleavingImpl_cpu_forward_kernel<int8_t, int8_t, 1>, nullptr});
REGISTRAR(WeightInterleavingImpl_cpu,
{ImplSpec::IOSpec{DataType::UInt4, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavingType<DataType::UInt4>::type, DataFormat::NHWC}},
{ProdConso::defaultModel, Aidge::WeightInterleavingImpl_cpu_forward_kernel<uint8_t, uint8_t, 4>, nullptr});
REGISTRAR(WeightInterleavingImpl_cpu,
{ImplSpec::IOSpec{DataType::UInt3, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavingType<DataType::UInt3>::type, DataFormat::NHWC}},
{ProdConso::defaultModel, Aidge::WeightInterleavingImpl_cpu_forward_kernel<uint8_t, uint8_t, 3>, nullptr});
REGISTRAR(WeightInterleavingImpl_cpu,
{ImplSpec::IOSpec{DataType::UInt2, DataFormat::NHWC}, ImplSpec::IOSpec{WeightInterleavingType<DataType::UInt2>::type, DataFormat::NHWC}},
{ProdConso::defaultModel, Aidge::WeightInterleavingImpl_cpu_forward_kernel<uint8_t, uint8_t, 2>, nullptr});
// REGISTRAR(WeightInterleavingImpl_cpu,
// {ImplSpec::IOSpec{DataType::Int4, DataFormat::NHWC}},
// {ProdConso::defaultModel, Aidge::WeightInterleavingImpl_cpu_forward_kernel<int8_t, int8_t, 4>, nullptr});
// REGISTRAR(WeightInterleavingImpl_cpu,
// {ImplSpec::IOSpec{DataType::Int3, DataFormat::NHWC}},
// {ProdConso::defaultModel, Aidge::WeightInterleavingImpl_cpu_forward_kernel<int8_t, int8_t, 3>, nullptr});
// REGISTRAR(WeightInterleavingImpl_cpu,
// {ImplSpec::IOSpec{DataType::Int2, DataFormat::NHWC}},
// {ProdConso::defaultModel, Aidge::WeightInterleavingImpl_cpu_forward_kernel<int8_t, int8_t, 2>, nullptr});
}
#endif /* AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVINGIMPL_KERNELS_H_ */
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include "aidge/backend/cpu/operator/WeightInterleavingImpl.hpp"
#include <cstddef> // std::size_t
#include <functional>
#include <memory>
#include <tuple>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/WeightInterleavingImpl_kernels.hpp"
#include "aidge/operator/WeightInterleaving.hpp"
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/utils/Types.h"
template <>
void Aidge::WeightInterleavingImpl_cpu::forward()
{
const WeightInterleaving_Op& op_ = dynamic_cast<const WeightInterleaving_Op&>(mOp);
AIDGE_ASSERT(op_.getInput(0), "missing input #0");
const auto impl = Registrar<WeightInterleavingImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Convert input data (no overhead if not needed!)
// TODO: right now, if needed, memory will be allocated/deallocated at each
// call to forward(). We might put the following shared_ptr as members of
// this class to avoid that.
std::shared_ptr<Tensor> input0Fallback;
const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *(op_.getOutput(0)));
// inputInterleaving is the number of consecutive input elements that will be compacted
// Here the interleaving is the last dimension (cf STM32 low bit kernels)
std::size_t inputInterleaving = input0.dims().back();
// The resulting compacted dimension was computed in forwardDims and the output tensor was resized
std::size_t outputInterleaving = op_.getOutput(0)->dims().back();
// nb_interleaving is the number of compacted segments
std::size_t nbInterleaving;
// Determine the number of segment to compact
if (input0.dims().size() > 1){
nbInterleaving = std::accumulate(
input0.dims().cbegin(),
std::prev(input0.dims().cend()), // Exclude the last element
std::size_t(1),
std::multiplies<std::size_t>());
} else {
// Case when the weight tensor is only one dimension
nbInterleaving = 1;
}
impl.forward(inputInterleaving,
nbInterleaving,
outputInterleaving,
input0.getImpl()->rawPtr(),
getCPUPtr(mOp.getRawOutput(0)));
}
template <>
void Aidge::WeightInterleavingImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for WeightInterleaving_Op on backend cpu");
}
\ No newline at end of file
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment