diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index 2223acef71116f761e47cc9ff15e1033a220b0fb..98015d5b67e139ec2f842f3ba50f278a578e3da6 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -53,6 +53,7 @@ #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" #include "aidge/backend/cpu/operator/SubImpl.hpp" #include "aidge/backend/cpu/operator/TanhImpl.hpp" +#include "aidge/backend/cpu/operator/WeightInterleavingImpl.hpp" #include "aidge/backend/cpu/data/TensorImpl.hpp" diff --git a/include/aidge/backend/cpu/operator/WeightInterleavingImpl.hpp b/include/aidge/backend/cpu/operator/WeightInterleavingImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0b3b1c5765e4db42500645c314f8befe7cd9b182 --- /dev/null +++ b/include/aidge/backend/cpu/operator/WeightInterleavingImpl.hpp @@ -0,0 +1,37 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVINGIMPL_H_ +#define AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVINGIMPL_H_ + +#include <array> +#include <memory> +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/WeightInterleaving.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// Operator implementation entry point for the backend +using WeightInterleavingImpl_cpu = OperatorImpl_cpu<WeightInterleaving_Op, + void(const DimSize_t, + const DimSize_t, + const DimSize_t, + const void *, + void *)>; + +// Implementation entry point registration to Operator +REGISTRAR(WeightInterleaving_Op, "cpu", Aidge::WeightInterleavingImpl_cpu::create); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_WeightInterleavingIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/WeightInterleavingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/WeightInterleavingImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..422afab59178732dbcb2427892fbf930e97cbb45 --- /dev/null +++ b/include/aidge/backend/cpu/operator/WeightInterleavingImpl_kernels.hpp @@ -0,0 +1,105 @@ + + +#ifndef AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVINGIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVINGIMPL_KERNELS_H_ + +#include <algorithm> + +#include "aidge/backend/cpu/operator/WeightInterleavingImpl.hpp" +#include "aidge/utils/Registrar.hpp" + + +namespace Aidge { + + /** + * @brief Compacts 8-bit data into a smaller bit-width representation. + * + * This function takes an array of 8-bit data and compacts it into smaller chunks + * based on the specified bit-width `nb_bits`. Each element in `compactData` will + * store multiple packed `nb_bits` segments extracted from `data`. + * + * @param data The input array of 8-bit values to be compacted. + * @param dataSize The size of the input `data` array. + * @param compactData The output array storing the compacted data. + * @param nb_bits The number of bits to extract from each `data` element (must be less than 8). + */ + void compact_data(const std::int8_t* data, std::size_t dataSize, std::int8_t* compactData, std::uint8_t nb_bits) { + AIDGE_ASSERT(nb_bits > 0 && nb_bits < 5, "Cannot compact with the given nb_bits"); // Ensure valid bit width + + // Mask to extract `nb_bits` from each data element + const unsigned int mask = (1U << nb_bits) - 1; + + // Calculate the number of `nb_bits` segments that fit into an 8-bit compacted value + const unsigned int nbSlot = 8 / nb_bits; + + // Case nb_bits=3 or 4, then shift is 4 + // Case nb_bits=2, then shift is 2 + // Case nb_bits=1, then shift is 1 + std::uint8_t shift = 8 / nbSlot; + + const unsigned int nbFullCompactbytes = dataSize / nbSlot; + + // Main loop to process data in groups of `nbSlot` + for (std::size_t i = 0; i < nbFullCompactbytes; ++i) { + std::int8_t compact = 0; + + for (unsigned int j = 0; j < nbSlot; ++j) { + compact |= (data[i * nbSlot + j] & mask); // Apply mask to keep `nb_bits` only + + // Shift only if not on the last slot to make room for the next `nb_bits` + if (j < nbSlot - 1) { + compact <<= shift; + } + } + // Store the compacted value in the output array + compactData[i] = compact; + } + + + // Handle any remaining data elements (if dataSize is not a multiple of nbSlot). + std::size_t remaining = dataSize % nbSlot; + if (remaining != 0) { + std::int8_t compact = 0; + for (std::size_t j = 0; j < remaining; ++j) { + compact |= (data[nbFullCompactbytes*nbSlot + j] & mask); + + if (j < remaining - 1) { + compact <<= shift; + } + } + compact <<= (shift*(nbSlot - remaining)); + // Store the last compacted value + compactData[dataSize / nbSlot] = compact; + } + } + +template <class I, class O, int nb_bits> +void WeightInterleavingImpl_cpu_forward_kernel(const DimSize_t input_interleaving, + const DimSize_t nb_interleaving, + const DimSize_t output_interleaving, + const void* input_, + void* output_) { + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + // Aidge::compact_data(const std::int8_t* data, std::size_t dataSize, std::int8_t* compactData, std::uint8_t nb_bits) { + for (std::size_t i=0; i<nb_interleaving; ++i){ + compact_data(input+(i*input_interleaving), input_interleaving, output+(i*output_interleaving), static_cast<std::uint8_t>(nb_bits)); + } + +} + + +REGISTRAR(WeightInterleavingImpl_cpu, + {ImplSpec::IOSpec{DataType::Int4, DataFormat::NHWC}}, + {ProdConso::defaultModel, Aidge::WeightInterleavingImpl_cpu_forward_kernel<int8_t, int8_t, 4>, nullptr}); +REGISTRAR(WeightInterleavingImpl_cpu, + {ImplSpec::IOSpec{DataType::Int3, DataFormat::NHWC}}, + {ProdConso::defaultModel, Aidge::WeightInterleavingImpl_cpu_forward_kernel<int8_t, int8_t, 3>, nullptr}); +REGISTRAR(WeightInterleavingImpl_cpu, + {ImplSpec::IOSpec{DataType::Int2, DataFormat::NHWC}}, + {ProdConso::defaultModel, Aidge::WeightInterleavingImpl_cpu_forward_kernel<int8_t, int8_t, 2>, nullptr}); + +} + +#endif /* AIDGE_CPU_OPERATOR_WEIGHTINTERLEAVINGIMPL_KERNELS_H_ */ \ No newline at end of file diff --git a/src/operator/WeightInterleavingImpl.cpp b/src/operator/WeightInterleavingImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..afb79179512c8ed360387532b458e6bbe10a92b9 --- /dev/null +++ b/src/operator/WeightInterleavingImpl.cpp @@ -0,0 +1,75 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/backend/cpu/operator/WeightInterleavingImpl.hpp" + +#include <cstddef> // std::size_t +#include <functional> +#include <memory> +#include <tuple> + +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/backend/cpu/operator/WeightInterleavingImpl_kernels.hpp" +#include "aidge/operator/WeightInterleaving.hpp" +#include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Types.h" + + +template <> +void Aidge::WeightInterleavingImpl_cpu::forward() +{ + const WeightInterleaving_Op& op_ = dynamic_cast<const WeightInterleaving_Op&>(mOp); + AIDGE_ASSERT(op_.getInput(0), "missing input #0"); + + const auto impl = Registrar<WeightInterleavingImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. + std::shared_ptr<Tensor> input0Fallback; + const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *(op_.getOutput(0))); + + // inputInterleaving is the number of consecutive input elements that will be compacted + // Here the interleaving is the last dimension (cf STM32 low bit kernels) + std::size_t inputInterleaving = input0.dims().back(); + + // The resulting compacted dimension was computed in forwardDims and the output tensor was resized + std::size_t outputInterleaving = op_.getOutput(0)->dims().back(); + + // nb_interleaving is the number of compacted segments + std::size_t nbInterleaving; + + // Determine the number of segment to compact + if (input0.dims().size() > 1){ + nbInterleaving = std::accumulate( + input0.dims().cbegin(), + std::prev(input0.dims().cend()), // Exclude the last element + std::size_t(1), + std::multiplies<std::size_t>()); + } else { + // Case when the weight tensor is only one dimension + nbInterleaving = 1; + } + + impl.forward(inputInterleaving, + nbInterleaving, + outputInterleaving, + input0.getImpl()->rawPtr(), + getCPUPtr(mOp.getRawOutput(0))); + + +} + +template <> +void Aidge::WeightInterleavingImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for WeightInterleaving_Op on backend cpu"); +} \ No newline at end of file diff --git a/unit_tests/operator/Test_WeightInterleavingImpl.cpp b/unit_tests/operator/Test_WeightInterleavingImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8d4a6ac5ca4daf81f4f1bbaf502edbcf3c25fce0 --- /dev/null +++ b/unit_tests/operator/Test_WeightInterleavingImpl.cpp @@ -0,0 +1,330 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/WeightInterleaving.hpp" + +#include "aidge/backend/cpu.hpp" + +#include <memory> + +using namespace Aidge; + +TEST_CASE("[cpu/operator] WeightInterleaving", "[WeightInterleaving][CPU]") { + + std::shared_ptr<Node> myWeightInterleaving = WeightInterleaving(); + auto opWeightInterleaving = std::static_pointer_cast<WeightInterleaving_Op>(myWeightInterleaving -> getOperator()); + + SECTION("CompactDataSize - Single element cases") { + REQUIRE(opWeightInterleaving->compactDataSize(1, 1) == 1); // 1 bit, needs 1 byte + REQUIRE(opWeightInterleaving->compactDataSize(1, 7) == 1); // 7 bits, needs 1 byte + } + + SECTION("CompactDataSize - Boundary cases for different nb_bits values") { + REQUIRE(opWeightInterleaving->compactDataSize(8, 1) == 1); // 8 elements at 1 bit each, fits in 1 byte + REQUIRE(opWeightInterleaving->compactDataSize(8, 2) == 2); // 8 elements at 2 bits each, needs 2 bytes + REQUIRE(opWeightInterleaving->compactDataSize(8, 3) == 4); // 8 elements at 3 bits each, needs 4 bytes + REQUIRE(opWeightInterleaving->compactDataSize(8, 4) == 4); // 8 elements at 4 bits each, needs 4 bytes + } + + SECTION("CompactDataSize - Larger dataSize values") { + REQUIRE(opWeightInterleaving->compactDataSize(16, 1) == 2); // 16 elements at 1 bit each, fits in 2 bytes + REQUIRE(opWeightInterleaving->compactDataSize(16, 2) == 4); // 16 elements at 2 bits each, needs 4 bytes + REQUIRE(opWeightInterleaving->compactDataSize(16, 3) == 8); // 16 elements at 3 bits each, needs 6 bytes + REQUIRE(opWeightInterleaving->compactDataSize(16, 4) == 8); // 16 elements at 4 bits each, needs 8 bytes + } + + SECTION("CompactDataSize - Odd dataSize values with varying nb_bits") { + REQUIRE(opWeightInterleaving->compactDataSize(7, 1) == 1); // 7 elements at 1 bit each, fits in 1 byte + REQUIRE(opWeightInterleaving->compactDataSize(7, 2) == 2); // 7 elements at 2 bits each, needs 2 bytes + REQUIRE(opWeightInterleaving->compactDataSize(7, 3) == 4); // 7 elements at 3 bits each, needs 4 bytes + REQUIRE(opWeightInterleaving->compactDataSize(7, 4) == 4); // 7 elements at 4 bits each, needs 4 bytes + } + + SECTION("CompactDataSize - Minimum and maximum values for nb_bits") { + REQUIRE(opWeightInterleaving->compactDataSize(5, 1) == 1); // 5 elements at 1 bit each, fits in 1 byte + } + + SECTION("CompactDataSize - Edge Case - dataSize of 0 should result in 0 required size") { + REQUIRE(opWeightInterleaving->compactDataSize(0, 1) == 0); // No data elements + } + + + SECTION("CompactData - 4-bit compaction") { + std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array1D<std::int8_t, 4>{ + {static_cast<std::int8_t>(0x0F), + static_cast<std::int8_t>(0xF5), + static_cast<std::int8_t>(0xB3), + static_cast<std::int8_t>(0x9C)} + }); + + weight->setDataFormat(Aidge::DataFormat::NHWC); + weight->setDataType(Aidge::DataType::Int4); + + std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array1D<std::int8_t, 2>{ + {static_cast<int8_t>(0xF5), + static_cast<int8_t>(0x3C)} + }); + + expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC); + expectedWeightInterleaving->setDataType(Aidge::DataType::Int4); + + std::shared_ptr<Node> myWeightInterleavingNode = WeightInterleaving(); + auto op = std::static_pointer_cast<OperatorTensor>(myWeightInterleavingNode -> getOperator()); + op->associateInput(0,weight); + op->setDataType(DataType::Int4); + op->setDataFormat(DataFormat::NHWC); + op->setBackend("cpu"); + myWeightInterleavingNode->forward(); + REQUIRE(*(op->getOutput(0)) == *expectedWeightInterleaving); + } + + SECTION("CompactData - 3-bit compaction") { + std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array1D<std::int8_t, 4>{ + {static_cast<int8_t>(0x0F), + static_cast<int8_t>(0x05), + static_cast<int8_t>(0x04), + static_cast<int8_t>(0xD3)} + }); + + weight->setDataFormat(Aidge::DataFormat::NHWC); + weight->setDataType(Aidge::DataType::Int3); + + std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array1D<std::int8_t, 2>{ + {static_cast<int8_t>(0x75), + static_cast<int8_t>(0x43)} + }); + + expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC); + expectedWeightInterleaving->setDataType(Aidge::DataType::Int3); + + std::shared_ptr<Node> myWeightInterleavingNode = WeightInterleaving(); + auto op = std::static_pointer_cast<OperatorTensor>(myWeightInterleavingNode -> getOperator()); + op->associateInput(0,weight); + op->setDataType(DataType::Int3); + op->setDataFormat(DataFormat::NHWC); + op->setBackend("cpu"); + myWeightInterleavingNode->forward(); + REQUIRE(*(op->getOutput(0)) == *expectedWeightInterleaving); + } + + SECTION("CompactData - 2-bit compaction") { + std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array1D<std::int8_t, 4>{ + {static_cast<std::int8_t>(0x03), + static_cast<std::int8_t>(0x02), + static_cast<std::int8_t>(0x01), + static_cast<std::int8_t>(0x00)} + }); + + weight->setDataFormat(Aidge::DataFormat::NHWC); + weight->setDataType(Aidge::DataType::Int2); + + std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array1D<std::int8_t, 1>{ + {static_cast<int8_t>(0xE4)} + }); + + expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC); + expectedWeightInterleaving->setDataType(Aidge::DataType::Int2); + + std::shared_ptr<Node> myWeightInterleavingNode = WeightInterleaving(); + auto op = std::static_pointer_cast<OperatorTensor>(myWeightInterleavingNode -> getOperator()); + op->associateInput(0,weight); + op->setDataType(DataType::Int2); + op->setDataFormat(DataFormat::NHWC); + op->setBackend("cpu"); + myWeightInterleavingNode->forward(); + REQUIRE(*(op->getOutput(0)) == *expectedWeightInterleaving); + } + + SECTION("CompactData - Edge Cases - Single element data") { + std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array1D<std::int8_t, 1>{ + {static_cast<int8_t>(0x0F)} + }); + + weight->setDataFormat(Aidge::DataFormat::NHWC); + weight->setDataType(Aidge::DataType::Int4); + + std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array1D<std::int8_t, 1>{ + {static_cast<int8_t>(0xF0)} + }); + + expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC); + expectedWeightInterleaving->setDataType(Aidge::DataType::Int4); + + std::shared_ptr<Node> myWeightInterleavingNode = WeightInterleaving(); + auto op = std::static_pointer_cast<OperatorTensor>(myWeightInterleavingNode -> getOperator()); + op->associateInput(0,weight); + op->setDataType(DataType::Int4); + op->setDataFormat(DataFormat::NHWC); + op->setBackend("cpu"); + myWeightInterleavingNode->forward(); + REQUIRE(*(op->getOutput(0)) == *expectedWeightInterleaving); + } + + SECTION("CompactData - Edge Cases - Non-divisible dataSize for nbSlot with nbbits=4") { + std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array1D<std::int8_t, 3>{ + {static_cast<int8_t>(0x0F), + static_cast<int8_t>(0xA5), + static_cast<int8_t>(0x34)} + }); + + weight->setDataFormat(Aidge::DataFormat::NHWC); + weight->setDataType(Aidge::DataType::Int4); + + std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array1D<std::int8_t, 2>{ + {static_cast<int8_t>(0xF5), + static_cast<int8_t>(0x40)} + }); + + expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC); + expectedWeightInterleaving->setDataType(Aidge::DataType::Int4); + + std::shared_ptr<Node> myWeightInterleavingNode = WeightInterleaving(); + auto op = std::static_pointer_cast<OperatorTensor>(myWeightInterleavingNode -> getOperator()); + op->associateInput(0,weight); + op->setDataType(DataType::Int4); + op->setDataFormat(DataFormat::NHWC); + op->setBackend("cpu"); + myWeightInterleavingNode->forward(); + REQUIRE(*(op->getOutput(0)) == *expectedWeightInterleaving); + + } + + SECTION("CompactData - Edge Cases - Non-divisible dataSize for nbSlot with nbbits=3") { + + std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array1D<std::int8_t, 3>{ + {static_cast<int8_t>(0x0F), + static_cast<int8_t>(0x05), + static_cast<int8_t>(0x04)} + }); + + weight->setDataFormat(Aidge::DataFormat::NHWC); + weight->setDataType(Aidge::DataType::Int3); + + std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array1D<std::int8_t, 2>{ + {static_cast<int8_t>(0x75), + static_cast<int8_t>(0x40)} + }); + + expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC); + expectedWeightInterleaving->setDataType(Aidge::DataType::Int3); + + std::shared_ptr<Node> myWeightInterleavingNode = WeightInterleaving(); + auto op = std::static_pointer_cast<OperatorTensor>(myWeightInterleavingNode -> getOperator()); + op->associateInput(0,weight); + op->setDataType(DataType::Int3); + op->setDataFormat(DataFormat::NHWC); + op->setBackend("cpu"); + myWeightInterleavingNode->forward(); + REQUIRE(*(op->getOutput(0)) == *expectedWeightInterleaving); + + } + + SECTION("Forward Op - Convolution weight interleaving") { + + // Weight [Cout = 2, H = 3, W = 3, Cin = 4]: + std::shared_ptr<Tensor> weight = std::make_shared<Tensor>(Array4D<std::int8_t,2,3,3,4> { + { + { + { + {-6, 0, 5, -8}, // 'A' '0' '5' '8' in hexadecimal format + { 5, 5, 4, -5}, // '5' '5' '4' 'B' in hexadecimal format + {-7, -1, 4, -7} // '9' 'F' '4' '9' in hexadecimal format + }, + { + { 3, -3, -3, -3}, // '3' 'D' 'D' 'D' in hexadecimal format + { 1, 3, 1, -1}, // '1' '3' '1' 'F' in hexadecimal format + { 7, -3, -1, 4} // '7' 'D' 'F' '4' in hexadecimal format + }, + { + {-1, 3, 5, 6}, // 'F' '3' '5' '6' in hexadecimal format + {-8, 4, 7, 1}, // '8' '4' '7' '1' in hexadecimal format + {-5, 0, -1, -2} // 'B' '0' 'F' 'E' in hexadecimal format + } + }, + { + { + { 2, -7, 7, -4}, // '2' '9' '7' 'C' in hexadecimal format + {-7, 3, 0, 2}, // '9' '3' '0' '2' in hexadecimal format + { 1, -1, 2, 3} // '1' 'F' '2' '3' in hexadecimal format + }, + { + {-1, -5, -3, -7}, // 'F' 'B' 'D' '9' in hexadecimal format + {-8, 3, 5, -1}, // '8' '3' '5' 'F' in hexadecimal format + {-7, -4, -6, -1} // '9' 'C' 'A' 'F' in hexadecimal format + }, + { + { 1, 7, 5, -1}, // '1' '7' '5' 'F' in hexadecimal format + { 1, -8, 1, 2}, // '1' '8' '1' '2' in hexadecimal format + {-1, -6, -3, 0} // 'F' 'A' 'D' '0' in hexadecimal format + } + } + } + }); + + std::shared_ptr<Tensor> expectedWeightInterleaving = std::make_shared<Tensor>(Array4D<std::int8_t,2,3,3,2> { + { + { + { + {static_cast<int8_t>(0xA0), static_cast<int8_t>(0x58)}, // 'A' '0' '5' '8' in hexadecimal format + {static_cast<int8_t>(0x55), static_cast<int8_t>(0x4B)}, // '5' '5' '4' 'B' in hexadecimal format + {static_cast<int8_t>(0x9F), static_cast<int8_t>(0x49)} // '9' 'F' '4' '9' in hexadecimal format + }, + { + {static_cast<int8_t>(0x3D), static_cast<int8_t>(0xDD)}, // '3' 'D' 'D' 'D' in hexadecimal format + {static_cast<int8_t>(0x13), static_cast<int8_t>(0x1F)}, // '1' '3' '1' 'F' in hexadecimal format + {static_cast<int8_t>(0x7D), static_cast<int8_t>(0xF4)} // '7' 'D' 'F' '4' in hexadecimal format + }, + { + {static_cast<int8_t>(0xF3), static_cast<int8_t>(0x56)}, // 'F' '3' '5' '6' in hexadecimal format + {static_cast<int8_t>(0x84), static_cast<int8_t>(0x71)}, // '8' '4' '7' '1' in hexadecimal format + {static_cast<int8_t>(0xB0), static_cast<int8_t>(0xFE)} // 'B' '0' 'F' 'E' in hexadecimal format + } + }, + { + { + {static_cast<int8_t>(0x29), static_cast<int8_t>(0x7C)}, // '2' '9' '7' 'C' in hexadecimal format + {static_cast<int8_t>(0x93), static_cast<int8_t>(0x02)}, // '9' '3' '0' '2' in hexadecimal format + {static_cast<int8_t>(0x1F), static_cast<int8_t>(0x23)} // '1' 'F' '2' '3' in hexadecimal format + }, + { + {static_cast<int8_t>(0xFB), static_cast<int8_t>(0xD9)}, // 'F' 'B' 'D' '9' in hexadecimal format + {static_cast<int8_t>(0x83), static_cast<int8_t>(0x5F)}, // '8' '3' '5' 'F' in hexadecimal format + {static_cast<int8_t>(0x9C), static_cast<int8_t>(0xAF)} // '9' 'C' 'A' 'F' in hexadecimal format + }, + { + {static_cast<int8_t>(0x17), static_cast<int8_t>(0x5F)}, // '1' '7' '5' 'F' in hexadecimal format + {static_cast<int8_t>(0x18), static_cast<int8_t>(0x12)}, // '1' '8' '1' '2' in hexadecimal format + {static_cast<int8_t>(0xFA), static_cast<int8_t>(0xD0)} // 'F' 'A' 'D' '0' in hexadecimal format + } + } + } + }); + + weight->setDataFormat(Aidge::DataFormat::NHWC); + weight->setDataType(Aidge::DataType::Int4); + + expectedWeightInterleaving->setDataFormat(Aidge::DataFormat::NHWC); + expectedWeightInterleaving->setDataType(Aidge::DataType::Int4); + + std::shared_ptr<Node> myWeightInterleavingNode = WeightInterleaving(); + auto op = std::static_pointer_cast<OperatorTensor>(myWeightInterleavingNode -> getOperator()); + op->associateInput(0,weight); + op->setDataType(DataType::Int4); + op->setDataFormat(DataFormat::NHWC); + op->setBackend("cpu"); + myWeightInterleavingNode->forward(); + REQUIRE(*(op->getOutput(0)) == *expectedWeightInterleaving); + } + +}