/********************************************************************************
 * Copyright (c) 2023 CEA-List
 *
 * This program and the accompanying materials are made available under the
 * terms of the Eclipse Public License 2.0 which is available at
 * http://www.eclipse.org/legal/epl-2.0.
 *
 * SPDX-License-Identifier: EPL-2.0
 *
 ********************************************************************************/

#ifndef AIDGE_CORE_FILLER_H_
#define AIDGE_CORE_FILLER_H_

#include <memory>
#include <random>  // normal_distribution, uniform_real_distribution

#include "aidge/data/Tensor.hpp"

namespace Aidge {

void calculateFanInFanOut(std::shared_ptr<Tensor> tensor, unsigned int& fanIn,
                          unsigned int& fanOut) {
    AIDGE_ASSERT(
        tensor->nbDims() == 4,
        "Tensor need to have 4 dimensions to compute FanIn and FanOut.");
    // Warning: This function suppose NCXX data layout.
    // Aidge currently only support NCHW but this maybe not be true in the
    // future.
    DimSize_t batchSize = tensor->dims()[0];
    DimSize_t channelSize = tensor->dims()[1];
    AIDGE_ASSERT(batchSize != 0,
                 "Cannot calculate FanIn if tensor batch size is 0.");
    AIDGE_ASSERT(channelSize != 0,
                 "Cannot calculate FanOut if tensor channel size is 0.");
    fanIn = static_cast<unsigned int>(tensor->size() / batchSize);
    fanOut = static_cast<unsigned int>(tensor->size() / channelSize);
}
enum VarianceNorm { FanIn, Average, FanOut };

template <typename T>
void constantFiller(std::shared_ptr<Tensor> tensor, T constantValue) {
    AIDGE_ASSERT(tensor->getImpl(),
                 "Tensor got no implementation, cannot fill it.");
    AIDGE_ASSERT(NativeType<T>::type == tensor->dataType(), "Wrong data type");

    std::shared_ptr<Tensor> cpyTensor;
    // Create cpy only if tensor not on CPU
    Tensor& tensorWithValues =
        tensor->refCastFrom(cpyTensor, tensor->dataType(), "cpu");

    // Setting values
    for (std::size_t idx = 0; idx < tensorWithValues.size(); ++idx) {
        tensorWithValues.set<T>(idx, constantValue);
    }

    // Copy values back to the original tensors (actual copy only if needed)
    tensor->copyCastFrom(tensorWithValues);
}
// TODO: Keep template or use switch case depending on Tensor datatype ?
template <typename T>
void normalFiller(std::shared_ptr<Tensor> tensor, double mean = 0.0,
                  double stdDev = 1.0) {
    AIDGE_ASSERT(tensor->getImpl(),
                 "Tensor got no implementation, cannot fill it.");
    AIDGE_ASSERT(NativeType<T>::type == tensor->dataType(), "Wrong data type");
    std::random_device rd;
    std::mt19937 gen(rd());  // Mersenne Twister pseudo-random number generator

    std::normal_distribution<T> normalDist(mean, stdDev);

    std::shared_ptr<Tensor> cpyTensor;
    // Create cpy only if tensor not on CPU
    Tensor& tensorWithValues =
        tensor->refCastFrom(cpyTensor, tensor->dataType(), "cpu");

    // Setting values
    for (std::size_t idx = 0; idx < tensorWithValues.size(); ++idx) {
        tensorWithValues.set<T>(idx, normalDist(gen));
    }

    // Copy values back to the original tensors (actual copy only if needed)
    tensor->copyCastFrom(tensorWithValues);
};

// TODO: Keep template or use switch case depending on Tensor datatype ?
template <typename T>
void uniformFiller(std::shared_ptr<Tensor> tensor, T min, T max) {
    AIDGE_ASSERT(tensor->getImpl(),
                 "Tensor got no implementation, cannot fill it.");
    AIDGE_ASSERT(NativeType<T>::type == tensor->dataType(), "Wrong data type");
    std::random_device rd;
    std::mt19937 gen(rd());  // Mersenne Twister pseudo-random number generator

    std::uniform_real_distribution<T> uniformDist(min, max);

    std::shared_ptr<Tensor> cpyTensor;
    // Create cpy only if tensor not on CPU
    Tensor& tensorWithValues =
        tensor->refCastFrom(cpyTensor, tensor->dataType(), "cpu");

    // Setting values
    for (std::size_t idx = 0; idx < tensorWithValues.size(); ++idx) {
        tensorWithValues.set<T>(idx, uniformDist(gen));
    }

    // Copy values back to the original tensors (actual copy only if needed)
    tensor->copyCastFrom(tensorWithValues);
};

template <typename T>
void xavierUniformFiller(std::shared_ptr<Tensor> tensor, T scaling = 1.0,
                         VarianceNorm varianceNorm = FanIn) {
    AIDGE_ASSERT(tensor->getImpl(),
                 "Tensor got no implementation, cannot fill it.");
    AIDGE_ASSERT(NativeType<T>::type == tensor->dataType(), "Wrong data type");

    unsigned int fanIn, fanOut = 0;
    calculateFanInFanOut(tensor, fanIn, fanOut);

    const T n((varianceNorm == FanIn)     ? fanIn
              : (varianceNorm == Average) ? (fanIn + fanOut) / 2.0
                                          : fanOut);
    const T scale(std::sqrt(3.0 / n));

    std::random_device rd;
    std::mt19937 gen(rd());  // Mersenne Twister pseudo-random number generator

    std::uniform_real_distribution<T> uniformDist(-scale, scale);

    std::shared_ptr<Tensor> cpyTensor;
    // Create cpy only if tensor not on CPU
    Tensor& tensorWithValues =
        tensor->refCastFrom(cpyTensor, tensor->dataType(), "cpu");
    // Setting values
    for (std::size_t idx = 0; idx < tensorWithValues.size(); ++idx) {
        T value = scaling * uniformDist(gen);
        tensorWithValues.set<T>(idx, value);
    }

    // Copy values back to the original tensors (actual copy only if needed)
    tensor->copyCastFrom(tensorWithValues);
};
template <typename T>
void xavierNormalFiller(std::shared_ptr<Tensor> tensor, T scaling = 1.0,
                        VarianceNorm varianceNorm = FanIn) {
    AIDGE_ASSERT(tensor->getImpl(),
                 "Tensor got no implementation, cannot fill it.");
    AIDGE_ASSERT(NativeType<T>::type == tensor->dataType(), "Wrong data type");

    unsigned int fanIn, fanOut = 0;
    calculateFanInFanOut(tensor, fanIn, fanOut);

    const T n((varianceNorm == FanIn)     ? fanIn
              : (varianceNorm == Average) ? (fanIn + fanOut) / 2.0
                                          : fanOut);
    const double stdDev(std::sqrt(1.0 / n));

    std::random_device rd;
    std::mt19937 gen(rd());  // Mersenne Twister pseudo-random number generator

    std::normal_distribution<T> normalDist(0.0, stdDev);

    std::shared_ptr<Tensor> cpyTensor;
    // Create cpy only if tensor not on CPU
    Tensor& tensorWithValues =
        tensor->refCastFrom(cpyTensor, tensor->dataType(), "cpu");

    // Setting values
    for (std::size_t idx = 0; idx < tensorWithValues.size(); ++idx) {
        tensorWithValues.set<T>(idx, normalDist(gen));
    }

    // Copy values back to the original tensors (actual copy only if needed)
    tensor->copyCastFrom(tensorWithValues);
};

template <typename T>
void heFiller(std::shared_ptr<Tensor> tensor, VarianceNorm varianceNorm = FanIn,
              T meanNorm = 0.0, T scaling = 1.0) {
    AIDGE_ASSERT(tensor->getImpl(),
                 "Tensor got no implementation, cannot fill it.");
    AIDGE_ASSERT(NativeType<T>::type == tensor->dataType(), "Wrong data type");

    unsigned int fanIn, fanOut = 0;
    calculateFanInFanOut(tensor, fanIn, fanOut);

    const T n((varianceNorm == FanIn)     ? fanIn
              : (varianceNorm == Average) ? (fanIn + fanOut) / 2.0
                                          : fanOut);

    const T stdDev(std::sqrt(2.0 / n));

    const T mean(varianceNorm == FanIn ? meanNorm / fanIn
                 : (varianceNorm == Average)
                     ? meanNorm / ((fanIn + fanOut) / 2.0)
                     : meanNorm / fanOut);

    std::random_device rd;
    std::mt19937 gen(rd());  // Mersenne Twister pseudo-random number generator

    std::normal_distribution<T> normalDist(mean, stdDev);

    std::shared_ptr<Tensor> cpyTensor;
    // Create cpy only if tensor not on CPU
    Tensor& tensorWithValues =
        tensor->refCastFrom(cpyTensor, tensor->dataType(), "cpu");

    // Setting values
    for (std::size_t idx = 0; idx < tensorWithValues.size(); ++idx) {
        tensorWithValues.set<T>(idx, normalDist(gen));
    }

    // Copy values back to the original tensors (actual copy only if needed)
    tensor->copyCastFrom(tensorWithValues);
};

}  // namespace Aidge

#endif /* AIDGE_CORE_FILLER_H_ */