WeightInterleavingImpl.cpp

/********************************************************************************
 * Copyright (c) 2023 CEA-List
 *
 * This program and the accompanying materials are made available under the
 * terms of the Eclipse Public License 2.0 which is available at
 * http://www.eclipse.org/legal/epl-2.0.
 *
 * SPDX-License-Identifier: EPL-2.0
 *
 ********************************************************************************/

#include "aidge/backend/cpu/operator/WeightInterleavingImpl.hpp"

#include <cstddef>  // std::size_t
#include <functional>
#include <memory>
#include <tuple>

#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/WeightInterleavingImpl_kernels.hpp"
#include "aidge/operator/WeightInterleaving.hpp"
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/utils/Types.h"


template <>
void Aidge::WeightInterleavingImpl_cpu::forward()
{
    const WeightInterleaving_Op& op_ = dynamic_cast<const WeightInterleaving_Op&>(mOp);
    AIDGE_ASSERT(op_.getInput(0), "missing input #0");

    const auto impl = Registrar<WeightInterleavingImpl_cpu>::create(getBestMatch(getRequiredSpec()));

    // Convert input data (no overhead if not needed!)
    // TODO: right now, if needed, memory will be allocated/deallocated at each
    // call to forward(). We might put the following shared_ptr as members of
    // this class to avoid that.
    std::shared_ptr<Tensor> input0Fallback;
    const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *(op_.getOutput(0)));

    // inputInterleaving is the number of consecutive input elements that will be compacted 
    // Here the interleaving is the last dimension (cf STM32 low bit kernels)
    std::size_t inputInterleaving = input0.dims().back();

    // The resulting compacted dimension was computed in forwardDims and the output tensor was resized
    std::size_t outputInterleaving = op_.getOutput(0)->dims().back();

    // nb_interleaving is the number of compacted segments 
    std::size_t nbInterleaving;

    // Determine the number of segment to compact
    if (input0.dims().size() > 1){
        nbInterleaving = std::accumulate(
        input0.dims().cbegin(),
        std::prev(input0.dims().cend()), // Exclude the last element
        std::size_t(1),
        std::multiplies<std::size_t>());
    } else {
        // Case when the weight tensor is only one dimension
        nbInterleaving = 1;
    }

    impl.forward(inputInterleaving,
        nbInterleaving,
        outputInterleaving,
        input0.getImpl()->rawPtr(),
        getCPUPtr(mOp.getRawOutput(0)));
    
    
}

template <>
void Aidge::WeightInterleavingImpl_cpu::backward() {
    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for WeightInterleaving_Op on backend cpu");
}