/******************************************************************************** * Copyright (c) 2023 CEA-List * * This program and the accompanying materials are made available under the * terms of the Eclipse Public License 2.0 which is available at * http://www.eclipse.org/legal/epl-2.0. * * SPDX-License-Identifier: EPL-2.0 * ********************************************************************************/ #ifndef AIDGE_CORE_FILLER_H_ #define AIDGE_CORE_FILLER_H_ #include <memory> #include <random> // normal_distribution, uniform_real_distribution #include "aidge/data/Tensor.hpp" namespace Aidge { void calculateFanInFanOut(std::shared_ptr<Tensor> tensor, unsigned int& fanIn, unsigned int& fanOut) { AIDGE_ASSERT( tensor->nbDims() == 4, "Tensor need to have 4 dimensions to compute FanIn and FanOut."); // Warning: This function suppose NCXX data layout. // Aidge currently only support NCHW but this maybe not be true in the // future. DimSize_t batchSize = tensor->dims()[0]; DimSize_t channelSize = tensor->dims()[1]; AIDGE_ASSERT(batchSize != 0, "Cannot calculate FanIn if tensor batch size is 0."); AIDGE_ASSERT(channelSize != 0, "Cannot calculate FanOut if tensor channel size is 0."); fanIn = static_cast<unsigned int>(tensor->size() / batchSize); fanOut = static_cast<unsigned int>(tensor->size() / channelSize); } enum VarianceNorm { FanIn, Average, FanOut }; template <typename T> void constantFiller(std::shared_ptr<Tensor> tensor, T constantValue) { AIDGE_ASSERT(tensor->getImpl(), "Tensor got no implementation, cannot fill it."); AIDGE_ASSERT(NativeType<T>::type == tensor->dataType(), "Wrong data type"); std::shared_ptr<Tensor> cpyTensor; // Create cpy only if tensor not on CPU Tensor& tensorWithValues = tensor->refCastFrom(cpyTensor, tensor->dataType(), "cpu"); // Setting values for (std::size_t idx = 0; idx < tensorWithValues.size(); ++idx) { tensorWithValues.set<T>(idx, constantValue); } // Copy values back to the original tensors (actual copy only if needed) tensor->copyCastFrom(tensorWithValues); } // TODO: Keep template or use switch case depending on Tensor datatype ? template <typename T> void normalFiller(std::shared_ptr<Tensor> tensor, double mean = 0.0, double stdDev = 1.0) { AIDGE_ASSERT(tensor->getImpl(), "Tensor got no implementation, cannot fill it."); AIDGE_ASSERT(NativeType<T>::type == tensor->dataType(), "Wrong data type"); std::random_device rd; std::mt19937 gen(rd()); // Mersenne Twister pseudo-random number generator std::normal_distribution<T> normalDist(mean, stdDev); std::shared_ptr<Tensor> cpyTensor; // Create cpy only if tensor not on CPU Tensor& tensorWithValues = tensor->refCastFrom(cpyTensor, tensor->dataType(), "cpu"); // Setting values for (std::size_t idx = 0; idx < tensorWithValues.size(); ++idx) { tensorWithValues.set<T>(idx, normalDist(gen)); } // Copy values back to the original tensors (actual copy only if needed) tensor->copyCastFrom(tensorWithValues); }; // TODO: Keep template or use switch case depending on Tensor datatype ? template <typename T> void uniformFiller(std::shared_ptr<Tensor> tensor, T min, T max) { AIDGE_ASSERT(tensor->getImpl(), "Tensor got no implementation, cannot fill it."); AIDGE_ASSERT(NativeType<T>::type == tensor->dataType(), "Wrong data type"); std::random_device rd; std::mt19937 gen(rd()); // Mersenne Twister pseudo-random number generator std::uniform_real_distribution<T> uniformDist(min, max); std::shared_ptr<Tensor> cpyTensor; // Create cpy only if tensor not on CPU Tensor& tensorWithValues = tensor->refCastFrom(cpyTensor, tensor->dataType(), "cpu"); // Setting values for (std::size_t idx = 0; idx < tensorWithValues.size(); ++idx) { tensorWithValues.set<T>(idx, uniformDist(gen)); } // Copy values back to the original tensors (actual copy only if needed) tensor->copyCastFrom(tensorWithValues); }; template <typename T> void xavierUniformFiller(std::shared_ptr<Tensor> tensor, T scaling = 1.0, VarianceNorm varianceNorm = FanIn) { AIDGE_ASSERT(tensor->getImpl(), "Tensor got no implementation, cannot fill it."); AIDGE_ASSERT(NativeType<T>::type == tensor->dataType(), "Wrong data type"); unsigned int fanIn, fanOut = 0; calculateFanInFanOut(tensor, fanIn, fanOut); const T n((varianceNorm == FanIn) ? fanIn : (varianceNorm == Average) ? (fanIn + fanOut) / 2.0 : fanOut); const T scale(std::sqrt(3.0 / n)); std::random_device rd; std::mt19937 gen(rd()); // Mersenne Twister pseudo-random number generator std::uniform_real_distribution<T> uniformDist(-scale, scale); std::shared_ptr<Tensor> cpyTensor; // Create cpy only if tensor not on CPU Tensor& tensorWithValues = tensor->refCastFrom(cpyTensor, tensor->dataType(), "cpu"); // Setting values for (std::size_t idx = 0; idx < tensorWithValues.size(); ++idx) { T value = scaling * uniformDist(gen); tensorWithValues.set<T>(idx, value); } // Copy values back to the original tensors (actual copy only if needed) tensor->copyCastFrom(tensorWithValues); }; template <typename T> void xavierNormalFiller(std::shared_ptr<Tensor> tensor, T scaling = 1.0, VarianceNorm varianceNorm = FanIn) { AIDGE_ASSERT(tensor->getImpl(), "Tensor got no implementation, cannot fill it."); AIDGE_ASSERT(NativeType<T>::type == tensor->dataType(), "Wrong data type"); unsigned int fanIn, fanOut = 0; calculateFanInFanOut(tensor, fanIn, fanOut); const T n((varianceNorm == FanIn) ? fanIn : (varianceNorm == Average) ? (fanIn + fanOut) / 2.0 : fanOut); const double stdDev(std::sqrt(1.0 / n)); std::random_device rd; std::mt19937 gen(rd()); // Mersenne Twister pseudo-random number generator std::normal_distribution<T> normalDist(0.0, stdDev); std::shared_ptr<Tensor> cpyTensor; // Create cpy only if tensor not on CPU Tensor& tensorWithValues = tensor->refCastFrom(cpyTensor, tensor->dataType(), "cpu"); // Setting values for (std::size_t idx = 0; idx < tensorWithValues.size(); ++idx) { tensorWithValues.set<T>(idx, normalDist(gen)); } // Copy values back to the original tensors (actual copy only if needed) tensor->copyCastFrom(tensorWithValues); }; template <typename T> void heFiller(std::shared_ptr<Tensor> tensor, VarianceNorm varianceNorm = FanIn, T meanNorm = 0.0, T scaling = 1.0) { AIDGE_ASSERT(tensor->getImpl(), "Tensor got no implementation, cannot fill it."); AIDGE_ASSERT(NativeType<T>::type == tensor->dataType(), "Wrong data type"); unsigned int fanIn, fanOut = 0; calculateFanInFanOut(tensor, fanIn, fanOut); const T n((varianceNorm == FanIn) ? fanIn : (varianceNorm == Average) ? (fanIn + fanOut) / 2.0 : fanOut); const T stdDev(std::sqrt(2.0 / n)); const T mean(varianceNorm == FanIn ? meanNorm / fanIn : (varianceNorm == Average) ? meanNorm / ((fanIn + fanOut) / 2.0) : meanNorm / fanOut); std::random_device rd; std::mt19937 gen(rd()); // Mersenne Twister pseudo-random number generator std::normal_distribution<T> normalDist(mean, stdDev); std::shared_ptr<Tensor> cpyTensor; // Create cpy only if tensor not on CPU Tensor& tensorWithValues = tensor->refCastFrom(cpyTensor, tensor->dataType(), "cpu"); // Setting values for (std::size_t idx = 0; idx < tensorWithValues.size(); ++idx) { tensorWithValues.set<T>(idx, normalDist(gen)); } // Copy values back to the original tensors (actual copy only if needed) tensor->copyCastFrom(tensorWithValues); }; } // namespace Aidge #endif /* AIDGE_CORE_FILLER_H_ */