From 110b05060f97c0dda3d5dd33ae03ae0055523985 Mon Sep 17 00:00:00 2001 From: thibault allenet <thibault.allenet@cea.fr> Date: Wed, 6 Nov 2024 09:32:15 +0000 Subject: [PATCH] First version of compactData for custom datatype in export arm --- include/aidge/utils/CompactData.hpp | 105 +++++++++++++++ python_binding/utils/pybind_CompactData.cpp | 23 ++++ unit_tests/utils/Test_CompactData.cpp | 137 ++++++++++++++++++++ 3 files changed, 265 insertions(+) create mode 100644 include/aidge/utils/CompactData.hpp create mode 100644 python_binding/utils/pybind_CompactData.cpp create mode 100644 unit_tests/utils/Test_CompactData.cpp diff --git a/include/aidge/utils/CompactData.hpp b/include/aidge/utils/CompactData.hpp new file mode 100644 index 000000000..092102824 --- /dev/null +++ b/include/aidge/utils/CompactData.hpp @@ -0,0 +1,105 @@ + +#ifndef __CONVERT_CUSTOM_DATA_H__ +#define __CONVERT_CUSTOM_DATA_H__ + +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <limits> +#include <cmath> + +#include "aidge/utils/ErrorHandling.hpp" + + +namespace Aidge { + + /** + * @brief Calculates the required size for the 8-bits`compactData` vector. + * + * This function determines the minimum number of bytes needed in `compactData` + * to store `dataSize` elements compacted to `nb_bits` bits each. + * + * @param dataSize The total number of elements in the input data array. + * @param nb_bits The number of bits to use for each compacted element (from 1 to 7). + * @return std::size_t The required size in bytes for `compactData`. + */ + std::size_t compactDataSize(std::size_t dataSize, std::uint8_t nb_bits) { + AIDGE_ASSERT(nb_bits > 0 && nb_bits < 8, "nb_bits must be between 1 and 4"); // Ensure valid bit width + + // Calculate the number of `nb_bits` segments that can fit in an 8-bit byte. + const unsigned int nbSlot = 8 / nb_bits; + + // Calculate the number of compacted bytes needed to store all data elements. + // The formula (dataSize + nbSlot - 1) / nbSlot effectively rounds up the division, ensuring that any remaining elements that don't fully fill a byte are accounted for. + std::size_t requiredSize = (dataSize + nbSlot - 1) / nbSlot; + + return requiredSize; + } + + /** + * @brief Compacts 8-bit data into a smaller bit-width representation. + * + * This function takes an array of 8-bit data and compacts it into smaller chunks + * based on the specified bit-width `nb_bits`. Each element in `compactData` will + * store multiple packed `nb_bits` segments extracted from `data`. + * + * @param data The input array of 8-bit values to be compacted. + * @param dataSize The size of the input `data` array. + * @param compactData The output array storing the compacted data. + * @param nb_bits The number of bits to extract from each `data` element (must be less than 8). + */ + void compact_data(const std::int8_t* data, std::size_t dataSize, std::int8_t* compactData, std::uint8_t nb_bits) { + AIDGE_ASSERT(nb_bits > 0 && nb_bits < 5, "Cannot compact with the given nb_bits"); // Ensure valid bit width + + // Mask to extract `nb_bits` from each data element + const unsigned int mask = (1U << nb_bits) - 1; + + // Calculate the number of `nb_bits` segments that fit into an 8-bit compacted value + const unsigned int nbSlot = 8 / nb_bits; + + // Case nb_bits=3 or 4, then shift is 4 + // Case nb_bits=2, then shift is 2 + // Case nb_bits=1, then shift is 1 + std::uint8_t shift = 8 / nbSlot; + + const unsigned int nbFullCompactbytes = dataSize / nbSlot; + + // Main loop to process data in groups of `nbSlot` + for (std::size_t i = 0; i < nbFullCompactbytes; ++i) { + std::int8_t compact = 0; + + for (unsigned int j = 0; j < nbSlot; ++j) { + compact |= (data[i * nbSlot + j] & mask); // Apply mask to keep `nb_bits` only + + // Shift only if not on the last slot to make room for the next `nb_bits` + if (j < nbSlot - 1) { + compact <<= shift; + } + } + // Store the compacted value in the output array + compactData[i] = compact; + } + + + // Handle any remaining data elements (if dataSize is not a multiple of nbSlot). + std::size_t remaining = dataSize % nbSlot; + if (remaining != 0) { + std::int8_t compact = 0; + for (std::size_t j = 0; j < remaining; ++j) { + compact |= (data[nbFullCompactbytes*nbSlot + j] & mask); + + if (j < remaining - 1) { + compact <<= shift; + } + } + compact <<= (shift*(nbSlot - remaining)); + // Store the last compacted value + compactData[dataSize / nbSlot] = compact; + } + } + + + +} + +#endif \ No newline at end of file diff --git a/python_binding/utils/pybind_CompactData.cpp b/python_binding/utils/pybind_CompactData.cpp new file mode 100644 index 000000000..aad35d572 --- /dev/null +++ b/python_binding/utils/pybind_CompactData.cpp @@ -0,0 +1,23 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <pybind11/pybind11.h> +#include "aidge/utils/CompactData.hpp" + +namespace py = pybind11; + +namespace Aidge { + +void init_CompactData(py::module &m) { + m.def("compact_data", &compact_data, py::arg("data"), py::arg("data_size"), py::arg("compact_data"), py::arg("nb_bits"), + "Compacts 8-bit data into smaller bit-width representation."); +} +} // namespace Aidge diff --git a/unit_tests/utils/Test_CompactData.cpp b/unit_tests/utils/Test_CompactData.cpp new file mode 100644 index 000000000..7971046a8 --- /dev/null +++ b/unit_tests/utils/Test_CompactData.cpp @@ -0,0 +1,137 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <vector> +#include <cstdint> +#include <iostream> + +#include "aidge/data/Tensor.hpp" +#include "aidge/utils/CompactData.hpp" + +using namespace Aidge; + +TEST_CASE("compactDataSize - Basic Tests", "[core][required_size]") { + SECTION("Single element cases") { + REQUIRE(Aidge::compactDataSize(1, 1) == 1); // 1 bit, needs 1 byte + REQUIRE(Aidge::compactDataSize(1, 7) == 1); // 7 bits, needs 1 byte + } + + SECTION("Boundary cases for different nb_bits values") { + REQUIRE(Aidge::compactDataSize(8, 1) == 1); // 8 elements at 1 bit each, fits in 1 byte + REQUIRE(Aidge::compactDataSize(8, 2) == 2); // 8 elements at 2 bits each, needs 2 bytes + REQUIRE(Aidge::compactDataSize(8, 3) == 4); // 8 elements at 3 bits each, needs 4 bytes + REQUIRE(Aidge::compactDataSize(8, 4) == 4); // 8 elements at 4 bits each, needs 4 bytes + } + + SECTION("Larger dataSize values") { + REQUIRE(Aidge::compactDataSize(16, 1) == 2); // 16 elements at 1 bit each, fits in 2 bytes + REQUIRE(Aidge::compactDataSize(16, 2) == 4); // 16 elements at 2 bits each, needs 4 bytes + REQUIRE(Aidge::compactDataSize(16, 3) == 8); // 16 elements at 3 bits each, needs 6 bytes + REQUIRE(Aidge::compactDataSize(16, 4) == 8); // 16 elements at 4 bits each, needs 8 bytes + } + + SECTION("Odd dataSize values with varying nb_bits") { + REQUIRE(Aidge::compactDataSize(7, 1) == 1); // 7 elements at 1 bit each, fits in 1 byte + REQUIRE(Aidge::compactDataSize(7, 2) == 2); // 7 elements at 2 bits each, needs 2 bytes + REQUIRE(Aidge::compactDataSize(7, 3) == 4); // 7 elements at 3 bits each, needs 4 bytes + REQUIRE(Aidge::compactDataSize(7, 4) == 4); // 7 elements at 4 bits each, needs 4 bytes + } + + SECTION("Minimum and maximum values for nb_bits") { + REQUIRE(Aidge::compactDataSize(5, 1) == 1); // 5 elements at 1 bit each, fits in 1 byte + } + + SECTION("Edge Case - dataSize of 0 should result in 0 required size") { + REQUIRE(Aidge::compactDataSize(0, 1) == 0); // No data elements + } +} + + +TEST_CASE("Compact Data", "[core][compactdata]") { + SECTION("Basic Tests - 4-bit compaction") { + + Tensor data = Array1D<std::int8_t, 4>{{static_cast<std::int8_t>(0x0F), static_cast<std::int8_t>(0xF5), static_cast<std::int8_t>(0xB3), static_cast<std::int8_t>(0x9C)}}; + uint8_t nb_bits = 4; + + std::size_t required_size = Aidge::compactDataSize(data.size(), nb_bits); + // std::size_t required_size = (data.size() + (8 / nb_bits) - 1) / (8 / nb_bits); + std::vector<int8_t> compactData(required_size); + + Aidge::compact_data(static_cast<int8_t*>(data.getImpl()->rawPtr()), data.size(), static_cast<int8_t*>(compactData.data()), nb_bits); + + // Expected result: each int8_t in compactData holds two 4-bit values. + std::vector<int8_t> expected = {{static_cast<int8_t>(0xF5), static_cast<int8_t>(0x3C)}}; + REQUIRE(compactData == expected); + } + + SECTION("Basic Tests - 2-bit compaction") { + Tensor data = Array1D<std::int8_t, 4>{{static_cast<std::int8_t>(0x03), static_cast<std::int8_t>(0x02), static_cast<std::int8_t>(0x01), static_cast<std::int8_t>(0x00)}}; + uint8_t nb_bits = 2; + + std::size_t required_size = Aidge::compactDataSize(data.size(), nb_bits); + std::vector<int8_t> compactData(required_size); + + Aidge::compact_data(static_cast<int8_t*>(data.getImpl()->rawPtr()), data.size(), static_cast<int8_t*>(compactData.data()), nb_bits); + + // Expected result: each int8_t in compactData holds four 2-bit values. + std::vector<int8_t> expected = {static_cast<int8_t>(0xE4)}; + REQUIRE(compactData == expected); + } + + SECTION("Edge Cases - Single element data") { + Tensor data = Array1D<std::int8_t, 1>{{static_cast<int8_t>(0x0F)}}; + uint8_t nb_bits = 4; + + std::size_t required_size = Aidge::compactDataSize(data.size(), nb_bits); + std::vector<int8_t> compactData(required_size); + + Aidge::compact_data(static_cast<int8_t*>(data.getImpl()->rawPtr()), data.size(), static_cast<int8_t*>(compactData.data()), nb_bits); + + // Expected result: should be a single 4-bit compacted value in one int8_t. + std::vector<int8_t> expected = {static_cast<int8_t>(0xF0)}; + REQUIRE(compactData == expected); + } + + SECTION("Edge Cases - Non-divisible dataSize for nbSlot") { + + Tensor data = Array1D<std::int8_t, 3>{{static_cast<int8_t>(0x0F), static_cast<int8_t>(0xA5), static_cast<int8_t>(0x34)}}; + uint8_t nb_bits = 4; + + std::size_t required_size = Aidge::compactDataSize(data.size(), nb_bits); + std::vector<int8_t> compactData(required_size); + + Aidge::compact_data(static_cast<int8_t*>(data.getImpl()->rawPtr()), data.size(), static_cast<int8_t*>(compactData.data()), nb_bits); + + // Expected: last value padded (only three 4-bit values to fit in two int8_t). + std::vector<int8_t> expected = {static_cast<int8_t>(0xF5), static_cast<int8_t>(0x40)}; + REQUIRE(compactData == expected); + + } + + SECTION("Edge Cases - Non-divisible dataSize for nbSlot") { + + Tensor data = Array1D<std::int8_t, 3>{{static_cast<int8_t>(0x0F), static_cast<int8_t>(0x05), static_cast<int8_t>(0x04)}}; + uint8_t nb_bits = 3; + + std::size_t required_size = Aidge::compactDataSize(data.size(), nb_bits); + std::vector<int8_t> compactData(required_size); + + Aidge::compact_data(static_cast<int8_t*>(data.getImpl()->rawPtr()), data.size(), static_cast<int8_t*>(compactData.data()), nb_bits); + + // Expected: last value padded (only three 4-bit values to fit in two int8_t). + std::vector<int8_t> expected = {static_cast<int8_t>(0x75), static_cast<int8_t>(0x40)}; + REQUIRE(compactData == expected); + + } + +} + -- GitLab