FCImpl.cpp

/********************************************************************************
 * Copyright (c) 2023 CEA-List
 *
 * This program and the accompanying materials are made available under the
 * terms of the Eclipse Public License 2.0 which is available at
 * http://www.eclipse.org/legal/epl-2.0.
 *
 * SPDX-License-Identifier: EPL-2.0
 *
 ********************************************************************************/

#include "aidge/backend/cpu/operator/FCImpl.hpp"

#include <cstddef>  // std::size_t
#include <functional>
#include <memory>
#include <tuple>

#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/FCImpl_kernels.hpp"
#include "aidge/operator/FC.hpp"
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/utils/Types.h"


template <>
void Aidge::FCImpl_cpu::forward()
{
    const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp);
    AIDGE_ASSERT(op_.getInput(0), "missing input #0");
    AIDGE_ASSERT(op_.getInput(1), "missing input #1");

    const auto impl = Registrar<FCImpl_cpu>::create(getBestMatch(getRequiredSpec()));

    // Convert input data (no overhead if not needed!)
    // TODO: right now, if needed, memory will be allocated/deallocated at each
    // call to forward(). We might put the following shared_ptr as members of
    // this class to avoid that.
    std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback;
    const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *(op_.getOutput(0)));
    const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *(op_.getOutput(0)));
    const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *(op_.getOutput(0))) : Tensor();

    // Call kernel
    const auto batchSize = (input0.dims().size() > 1) ? input0.dims()[0] : 1;
    impl.forward(batchSize,
        input1.dims()[1], // nb input features
        input1.dims()[0], // nb output features
        input0.getImpl()->rawPtr(),
        input1.getImpl()->rawPtr(),
        (op_.getInput(2)) ? input2.getImpl()->rawPtr() : nullptr,
        getCPUPtr(mOp.getRawOutput(0)));
}

template <>
void Aidge::FCImpl_cpu::backward()
{
    const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp);
    const auto& fc_grad = op_.getOutput(0)->grad();
    AIDGE_ASSERT(fc_grad, "missing ouput #0 gradient");
    AIDGE_ASSERT(op_.getInput(0)->grad(), "missing input #0 gradient");
    AIDGE_ASSERT(op_.getInput(1)->grad(), "missing input #1 gradient");

    const auto impl = Registrar<FCImpl_cpu>::create(getBestMatch(getRequiredSpec()));

    // Convert input data (no overhead if not needed!)
    // TODO: right now, if needed, memory will be allocated/deallocated at each
    // call to forward(). We might put the following shared_ptr as members of
    // this class to avoid that.
    std::shared_ptr<Tensor> input0gradFallback, input1gradFallback, input2gradFallback;
    const auto& input0grad = op_.getInput(0)->grad()->refCastFrom(input0gradFallback, *(op_.getOutput(0)));
    const auto& input1grad = op_.getInput(1)->grad()->refCastFrom(input1gradFallback, *(op_.getOutput(0)));
    const auto& input2grad = (op_.getInput(2)) ? op_.getInput(2)->grad()->refCastFrom(input2gradFallback, *(op_.getOutput(0))) : Tensor();

    Log::info(" ");
    Log::info("(FCImpl.cpp) Gradient of output 0 : {}", fc_grad->toString());
    Log::info("(FCImpl.cpp) Gradient of input  0 : {}", input0grad.toString());
    Log::info("(FCImpl.cpp) Gradient of input  1 : {}", input1grad.toString());

    // Call kernel
    const auto batchSize = (input0grad.dims().size() > 1) ? input0grad.dims()[0] : 1;
    impl.backward(batchSize,
        input1grad.dims()[1], // nb input features
        input1grad.dims()[0], // nb output features
        getCPUPtr(fc_grad),
        getCPUPtr(op_.getInput(0)),
        getCPUPtr(mOp.getRawInput(1)),
        input0grad.getImpl()->rawPtr(),
        input1grad.getImpl()->rawPtr(),
        (op_.getInput(2)) ? input2grad.getImpl()->rawPtr() : nullptr);

    Log::info("(FCImpl.cpp) Gradient of output 0 : {}", fc_grad->toString());
    Log::info("(FCImpl.cpp) Gradient of input  0 : {}", input0grad.toString());
    Log::info("(FCImpl.cpp) Gradient of input  1 : {}", input1grad.toString());
}