From 110b05060f97c0dda3d5dd33ae03ae0055523985 Mon Sep 17 00:00:00 2001
From: thibault allenet <thibault.allenet@cea.fr>
Date: Wed, 6 Nov 2024 09:32:15 +0000
Subject: [PATCH] First version of compactData for custom datatype in export
 arm

---
 include/aidge/utils/CompactData.hpp         | 105 +++++++++++++++
 python_binding/utils/pybind_CompactData.cpp |  23 ++++
 unit_tests/utils/Test_CompactData.cpp       | 137 ++++++++++++++++++++
 3 files changed, 265 insertions(+)
 create mode 100644 include/aidge/utils/CompactData.hpp
 create mode 100644 python_binding/utils/pybind_CompactData.cpp
 create mode 100644 unit_tests/utils/Test_CompactData.cpp

diff --git a/include/aidge/utils/CompactData.hpp b/include/aidge/utils/CompactData.hpp
new file mode 100644
index 000000000..092102824
--- /dev/null
+++ b/include/aidge/utils/CompactData.hpp
@@ -0,0 +1,105 @@
+
+#ifndef __CONVERT_CUSTOM_DATA_H__
+#define __CONVERT_CUSTOM_DATA_H__
+
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <cmath>
+
+#include "aidge/utils/ErrorHandling.hpp"
+
+
+namespace Aidge {
+
+    /**
+     * @brief Calculates the required size for the 8-bits`compactData` vector.
+     * 
+     * This function determines the minimum number of bytes needed in `compactData`
+     * to store `dataSize` elements compacted to `nb_bits` bits each.
+     * 
+     * @param dataSize The total number of elements in the input data array.
+     * @param nb_bits The number of bits to use for each compacted element (from 1 to 7).
+     * @return std::size_t The required size in bytes for `compactData`.
+     */
+    std::size_t compactDataSize(std::size_t dataSize, std::uint8_t nb_bits) {
+        AIDGE_ASSERT(nb_bits > 0 && nb_bits < 8, "nb_bits must be between 1 and 4"); // Ensure valid bit width
+
+        // Calculate the number of `nb_bits` segments that can fit in an 8-bit byte.
+        const unsigned int nbSlot = 8 / nb_bits;
+
+        // Calculate the number of compacted bytes needed to store all data elements.
+        // The formula (dataSize + nbSlot - 1) / nbSlot effectively rounds up the division, ensuring that any remaining elements that don't fully fill a byte are accounted for.
+        std::size_t requiredSize = (dataSize + nbSlot - 1) / nbSlot;
+
+        return requiredSize;
+    }
+
+    /**
+     * @brief Compacts 8-bit data into a smaller bit-width representation.
+     * 
+     * This function takes an array of 8-bit data and compacts it into smaller chunks 
+     * based on the specified bit-width `nb_bits`. Each element in `compactData` will 
+     * store multiple packed `nb_bits` segments extracted from `data`.
+     * 
+     * @param data The input array of 8-bit values to be compacted.
+     * @param dataSize The size of the input `data` array.
+     * @param compactData The output array storing the compacted data.
+     * @param nb_bits The number of bits to extract from each `data` element (must be less than 8).
+     */
+    void compact_data(const std::int8_t* data, std::size_t dataSize, std::int8_t* compactData, std::uint8_t nb_bits) {
+        AIDGE_ASSERT(nb_bits > 0 && nb_bits < 5, "Cannot compact with the given nb_bits"); // Ensure valid bit width
+
+        // Mask to extract `nb_bits` from each data element
+        const unsigned int mask = (1U << nb_bits) - 1;
+
+        // Calculate the number of `nb_bits` segments that fit into an 8-bit compacted value
+        const unsigned int nbSlot = 8 / nb_bits;
+
+        // Case nb_bits=3 or 4, then shift is 4
+        // Case nb_bits=2, then shift is 2
+        // Case nb_bits=1, then shift is 1
+        std::uint8_t shift = 8 / nbSlot;
+
+        const unsigned int nbFullCompactbytes = dataSize / nbSlot;
+        
+        // Main loop to process data in groups of `nbSlot`
+        for (std::size_t i = 0; i < nbFullCompactbytes; ++i) {
+            std::int8_t compact = 0;
+            
+            for (unsigned int j = 0; j < nbSlot; ++j) {
+                compact |= (data[i * nbSlot + j] & mask);    // Apply mask to keep `nb_bits` only
+                
+                // Shift only if not on the last slot to make room for the next `nb_bits`
+                if (j < nbSlot - 1) {
+                    compact <<= shift;
+                }
+            }
+            // Store the compacted value in the output array
+            compactData[i] = compact;
+        }
+        
+
+        // Handle any remaining data elements (if dataSize is not a multiple of nbSlot).
+        std::size_t remaining = dataSize % nbSlot;
+        if (remaining != 0) {
+            std::int8_t compact = 0;
+            for (std::size_t j = 0; j < remaining; ++j) {
+                compact |= (data[nbFullCompactbytes*nbSlot + j] & mask);
+                
+                if (j < remaining - 1) {
+                    compact <<= shift;
+                }
+            }
+            compact <<= (shift*(nbSlot - remaining));
+            // Store the last compacted value
+            compactData[dataSize / nbSlot] = compact;
+        }
+    }
+
+
+
+}
+
+#endif
\ No newline at end of file
diff --git a/python_binding/utils/pybind_CompactData.cpp b/python_binding/utils/pybind_CompactData.cpp
new file mode 100644
index 000000000..aad35d572
--- /dev/null
+++ b/python_binding/utils/pybind_CompactData.cpp
@@ -0,0 +1,23 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <pybind11/pybind11.h>
+#include "aidge/utils/CompactData.hpp"
+
+namespace py = pybind11;
+
+namespace Aidge {
+
+void init_CompactData(py::module &m) {
+    m.def("compact_data", &compact_data, py::arg("data"), py::arg("data_size"), py::arg("compact_data"), py::arg("nb_bits"),
+          "Compacts 8-bit data into smaller bit-width representation.");
+}
+}  // namespace Aidge
diff --git a/unit_tests/utils/Test_CompactData.cpp b/unit_tests/utils/Test_CompactData.cpp
new file mode 100644
index 000000000..7971046a8
--- /dev/null
+++ b/unit_tests/utils/Test_CompactData.cpp
@@ -0,0 +1,137 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <catch2/catch_test_macros.hpp>
+#include <vector>
+#include <cstdint>
+#include <iostream>
+
+#include "aidge/data/Tensor.hpp"
+#include "aidge/utils/CompactData.hpp"
+
+using namespace Aidge;
+
+TEST_CASE("compactDataSize - Basic Tests", "[core][required_size]") {
+    SECTION("Single element cases") {
+        REQUIRE(Aidge::compactDataSize(1, 1) == 1);  // 1 bit, needs 1 byte
+        REQUIRE(Aidge::compactDataSize(1, 7) == 1);  // 7 bits, needs 1 byte
+    }
+
+    SECTION("Boundary cases for different nb_bits values") {
+        REQUIRE(Aidge::compactDataSize(8, 1) == 1);  // 8 elements at 1 bit each, fits in 1 byte
+        REQUIRE(Aidge::compactDataSize(8, 2) == 2);  // 8 elements at 2 bits each, needs 2 bytes
+        REQUIRE(Aidge::compactDataSize(8, 3) == 4);  // 8 elements at 3 bits each, needs 4 bytes
+        REQUIRE(Aidge::compactDataSize(8, 4) == 4);  // 8 elements at 4 bits each, needs 4 bytes
+    }
+
+    SECTION("Larger dataSize values") {
+        REQUIRE(Aidge::compactDataSize(16, 1) == 2);  // 16 elements at 1 bit each, fits in 2 bytes
+        REQUIRE(Aidge::compactDataSize(16, 2) == 4);  // 16 elements at 2 bits each, needs 4 bytes
+        REQUIRE(Aidge::compactDataSize(16, 3) == 8);  // 16 elements at 3 bits each, needs 6 bytes
+        REQUIRE(Aidge::compactDataSize(16, 4) == 8);  // 16 elements at 4 bits each, needs 8 bytes
+    }
+
+    SECTION("Odd dataSize values with varying nb_bits") {
+        REQUIRE(Aidge::compactDataSize(7, 1) == 1);  // 7 elements at 1 bit each, fits in 1 byte
+        REQUIRE(Aidge::compactDataSize(7, 2) == 2);  // 7 elements at 2 bits each, needs 2 bytes
+        REQUIRE(Aidge::compactDataSize(7, 3) == 4);  // 7 elements at 3 bits each, needs 4 bytes
+        REQUIRE(Aidge::compactDataSize(7, 4) == 4);  // 7 elements at 4 bits each, needs 4 bytes
+    }
+
+    SECTION("Minimum and maximum values for nb_bits") {
+        REQUIRE(Aidge::compactDataSize(5, 1) == 1);  // 5 elements at 1 bit each, fits in 1 byte
+    }
+
+    SECTION("Edge Case - dataSize of 0 should result in 0 required size") {
+        REQUIRE(Aidge::compactDataSize(0, 1) == 0);  // No data elements
+    }
+}
+
+
+TEST_CASE("Compact Data", "[core][compactdata]") {
+    SECTION("Basic Tests - 4-bit compaction") {
+
+        Tensor data = Array1D<std::int8_t, 4>{{static_cast<std::int8_t>(0x0F), static_cast<std::int8_t>(0xF5), static_cast<std::int8_t>(0xB3), static_cast<std::int8_t>(0x9C)}};
+        uint8_t nb_bits = 4;
+
+        std::size_t required_size = Aidge::compactDataSize(data.size(), nb_bits);
+        // std::size_t required_size = (data.size() + (8 / nb_bits) - 1) / (8 / nb_bits);
+        std::vector<int8_t> compactData(required_size);
+
+        Aidge::compact_data(static_cast<int8_t*>(data.getImpl()->rawPtr()), data.size(), static_cast<int8_t*>(compactData.data()), nb_bits);
+
+        // Expected result: each int8_t in compactData holds two 4-bit values.
+        std::vector<int8_t> expected = {{static_cast<int8_t>(0xF5), static_cast<int8_t>(0x3C)}};
+        REQUIRE(compactData == expected);
+    }
+
+    SECTION("Basic Tests - 2-bit compaction") {
+        Tensor data = Array1D<std::int8_t, 4>{{static_cast<std::int8_t>(0x03), static_cast<std::int8_t>(0x02), static_cast<std::int8_t>(0x01), static_cast<std::int8_t>(0x00)}};
+        uint8_t nb_bits = 2;
+
+        std::size_t required_size = Aidge::compactDataSize(data.size(), nb_bits);
+        std::vector<int8_t> compactData(required_size);
+
+        Aidge::compact_data(static_cast<int8_t*>(data.getImpl()->rawPtr()), data.size(), static_cast<int8_t*>(compactData.data()), nb_bits);
+
+        // Expected result: each int8_t in compactData holds four 2-bit values.
+        std::vector<int8_t> expected = {static_cast<int8_t>(0xE4)};
+        REQUIRE(compactData == expected);
+    }
+
+    SECTION("Edge Cases - Single element data") {
+        Tensor data = Array1D<std::int8_t, 1>{{static_cast<int8_t>(0x0F)}};
+        uint8_t nb_bits = 4;
+
+        std::size_t required_size = Aidge::compactDataSize(data.size(), nb_bits);
+        std::vector<int8_t> compactData(required_size);
+
+        Aidge::compact_data(static_cast<int8_t*>(data.getImpl()->rawPtr()), data.size(), static_cast<int8_t*>(compactData.data()), nb_bits);
+
+        // Expected result: should be a single 4-bit compacted value in one int8_t.
+        std::vector<int8_t> expected = {static_cast<int8_t>(0xF0)};
+        REQUIRE(compactData == expected);
+    }
+
+    SECTION("Edge Cases - Non-divisible dataSize for nbSlot") {
+
+        Tensor data = Array1D<std::int8_t, 3>{{static_cast<int8_t>(0x0F), static_cast<int8_t>(0xA5), static_cast<int8_t>(0x34)}};
+        uint8_t nb_bits = 4;
+
+        std::size_t required_size = Aidge::compactDataSize(data.size(), nb_bits);
+        std::vector<int8_t> compactData(required_size);
+
+        Aidge::compact_data(static_cast<int8_t*>(data.getImpl()->rawPtr()), data.size(), static_cast<int8_t*>(compactData.data()), nb_bits);
+
+        // Expected: last value padded (only three 4-bit values to fit in two int8_t).
+        std::vector<int8_t> expected = {static_cast<int8_t>(0xF5), static_cast<int8_t>(0x40)};
+        REQUIRE(compactData == expected);
+
+    }
+
+    SECTION("Edge Cases - Non-divisible dataSize for nbSlot") {
+
+        Tensor data = Array1D<std::int8_t, 3>{{static_cast<int8_t>(0x0F), static_cast<int8_t>(0x05), static_cast<int8_t>(0x04)}};
+        uint8_t nb_bits = 3;
+
+        std::size_t required_size = Aidge::compactDataSize(data.size(), nb_bits);
+        std::vector<int8_t> compactData(required_size);
+
+        Aidge::compact_data(static_cast<int8_t*>(data.getImpl()->rawPtr()), data.size(), static_cast<int8_t*>(compactData.data()), nb_bits);
+
+        // Expected: last value padded (only three 4-bit values to fit in two int8_t).
+        std::vector<int8_t> expected = {static_cast<int8_t>(0x75), static_cast<int8_t>(0x40)};
+        REQUIRE(compactData == expected);
+
+    }
+
+}
+
-- 
GitLab