From f83d454a60246b64871b76aef5852c24999db66e Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Thu, 25 Jan 2024 14:28:09 +0000 Subject: [PATCH] [Upd] Code rework - [Syntax] Stimulis -> Stimuli and Stimuli -> Stimulus - [#define] Use standard Aidge syntax format - [Licence] add where missing - [#include] add what is used and remove what is not - [class] uniformize class member definition order - [types] change size_t for std::size_t from <stddef> for uniformazation - [types] change integer types for exact-width integers from <cstddint> - Remove end-of-line spaces --- include/aidge/aidge.hpp | 4 +- .../{StimuliImpl.hpp => StimulusImpl.hpp} | 16 ++- include/aidge/data/DataProvider.hpp | 61 ++++++---- include/aidge/data/Database.hpp | 45 +++++--- include/aidge/data/Tensor.hpp | 6 +- include/aidge/stimuli/Stimuli.hpp | 101 ----------------- include/aidge/stimuli/Stimulus.hpp | 107 ++++++++++++++++++ src/data/DataProvider.cpp | 61 +++++----- src/stimuli/Stimuli.cpp | 30 +++++ 9 files changed, 250 insertions(+), 181 deletions(-) rename include/aidge/backend/{StimuliImpl.hpp => StimulusImpl.hpp} (74%) delete mode 100644 include/aidge/stimuli/Stimuli.hpp create mode 100644 include/aidge/stimuli/Stimulus.hpp create mode 100644 src/stimuli/Stimuli.cpp diff --git a/include/aidge/aidge.hpp b/include/aidge/aidge.hpp index 09ebccb7e..e6a6f1334 100644 --- a/include/aidge/aidge.hpp +++ b/include/aidge/aidge.hpp @@ -14,7 +14,7 @@ #include "aidge/backend/OperatorImpl.hpp" #include "aidge/backend/TensorImpl.hpp" -#include "aidge/backend/StimuliImpl.hpp" +#include "aidge/backend/StimulusImpl.hpp" #include "aidge/data/Data.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/data/Database.hpp" @@ -59,7 +59,7 @@ #include "aidge/operator/Sub.hpp" #include "aidge/operator/Transpose.hpp" #include "aidge/scheduler/Scheduler.hpp" -#include "aidge/stimuli/Stimuli.hpp" +#include "aidge/stimuli/Stimulus.hpp" #include "aidge/recipies/Recipies.hpp" diff --git a/include/aidge/backend/StimuliImpl.hpp b/include/aidge/backend/StimulusImpl.hpp similarity index 74% rename from include/aidge/backend/StimuliImpl.hpp rename to include/aidge/backend/StimulusImpl.hpp index a949c5810..e875f9932 100644 --- a/include/aidge/backend/StimuliImpl.hpp +++ b/include/aidge/backend/StimulusImpl.hpp @@ -11,24 +11,22 @@ #ifndef AIDGE_CORE_BACKEND_STIMULIIMPL_H_ #define AIDGE_CORE_BACKEND_STIMULIIMPL_H_ + #include <memory> -#include "aidge/data/Data.hpp" + #include "aidge/data/Tensor.hpp" namespace Aidge { + /** - * @brief StimuliImpl. Base class to implement data loading functions. - * + * @brief Base class to implement data loading functions. */ -class StimuliImpl { +class StimulusImpl { public: + virtual ~StimulusImpl() noexcept = default; - StimuliImpl(){}; - virtual std::shared_ptr<Tensor> load() = 0; - - virtual ~StimuliImpl() = default; }; } // namespace Aidge -#endif /* AIDGE_STIMULIIMPL_H_ */ +#endif /* AIDGE_CORE_BACKEND_STIMULIIMPL_H_ */ diff --git a/include/aidge/data/DataProvider.hpp b/include/aidge/data/DataProvider.hpp index 055d6e926..5c7a1c73c 100644 --- a/include/aidge/data/DataProvider.hpp +++ b/include/aidge/data/DataProvider.hpp @@ -1,49 +1,64 @@ -#ifndef DATAPROVIDER_H_ -#define DATAPROVIDER_H_ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CORE_DATA_DATAPROVIDER_H_ +#define AIDGE_CORE_DATA_DATAPROVIDER_H_ + +#include <cstddef> // std::size_t +#include <memory> // std::shared_ptr +#include <string> +#include <vector> // std::vector #include "aidge/data/Database.hpp" #include "aidge/data/Data.hpp" -namespace Aidge{ +namespace Aidge { + /** * @brief Data Provider. Takes in a database and compose batches by fetching data from the given database. * @todo Implement Drop last batch option. Currently returns the last batch with less elements in the batch. * @todo Implement readRandomBatch to compose batches from the database with a random sampling startegy. Necessary for training. */ class DataProvider { +private: + // Dataset providing the data to the dataProvider + const Database& mDatabase; + + const std::size_t mNumberModality; + std::vector<std::vector<std::size_t>> mDataSizes; + std::vector<std::string> mDataBackends; + std::vector<DataType> mDataTypes; + + // Desired size of the produced batches + const std::size_t mBatchSize; public: /** - * @brief Constructor of Data Provider. + * @brief Constructor of Data Provider. * @param database database from which to load the data. * @param batchSize number of data samples per batch. */ - DataProvider(Database& database, std::size_t batchSize); + DataProvider(const Database& database, const std::size_t batchSize); +public: /** * @brief Create a batch for each data modality in the database. The returned batch contain the data as sorted in the database. - * @param startIndex the starting index in the database to start the batch from. + * @param startIndex the starting index in the database to start the batch from. * @return a vector of tensors. Each tensor is a batch corresponding to one modality. */ - std::vector<std::shared_ptr<Tensor>> readBatch(std::size_t startIndex); - -protected: - - // Dataset providing the data to the dataProvider - Database& mDatabase; - - size_t mNumberModality; - std::vector<std::vector<std::size_t>> mDataSizes; - std::vector<std::string> mDataBackends; - std::vector<DataType> mDataTypes; - - // Desired size of the produced batches - size_t mBatchSize; - + std::vector<std::shared_ptr<Tensor>> readBatch(const std::size_t startIndex) const; }; -} +} // namespace Aidge -#endif /* DATAPROVIDER_H_ */ \ No newline at end of file +#endif /* AIDGE_CORE_DATA_DATAPROVIDER_H_ */ diff --git a/include/aidge/data/Database.hpp b/include/aidge/data/Database.hpp index cfea403d3..edd4b4639 100644 --- a/include/aidge/data/Database.hpp +++ b/include/aidge/data/Database.hpp @@ -1,46 +1,57 @@ -#ifndef Database_H_ -#define Database_H_ - -#include <cstring> +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CORE_DATA_DATABASE_H_ +#define AIDGE_CORE_DATA_DATABASE_H_ + +#include <cstddef> #include <memory> #include <vector> -#include <tuple> #include "aidge/data/Tensor.hpp" namespace Aidge { /** - * @brief Database. An abstract class representing a map from a key to data. All databases should inherit from this class. All subclasses should overwrite :cpp:function:`Database::getItem` to fetch data from a given index. + * @brief Abstract class representing a map from a key to data. + * All databases should inherit from this class. All subclasses should overwrite + * :cpp:function:`Database::getItem` to fetch data from a given index. */ class Database { - public: Database() = default; - virtual ~Database() = default; + virtual ~Database() noexcept = default; /** - * @brief Fetch an item of the database. + * @brief Fetch an item of the database. * @param index index of the item. * @return vector of data mapped to index. */ - virtual std::vector<std::shared_ptr<Tensor>> getItem(std::size_t index) = 0; + virtual std::vector<std::shared_ptr<Tensor>> getItem(const std::size_t index) const = 0; /** * @brief Get the number of items in the database - * - * @return std::size_t + * + * @return std::size_t */ - virtual std::size_t getLen() = 0; + virtual std::size_t getLen() const noexcept = 0; /** * @brief Get the number of modalities in one database item - * - * @return std::size_t + * + * @return std::size_t */ - virtual std::size_t getNbModalities() = 0; + virtual std::size_t getNbModalities() const noexcept = 0; }; } // namespace Aidge -#endif /* Database_H_ */ \ No newline at end of file +#endif /* AIDGE_CORE_DATA_DATABASE_H_ */ diff --git a/include/aidge/data/Tensor.hpp b/include/aidge/data/Tensor.hpp index 8b0dae6f8..6d5f78748 100644 --- a/include/aidge/data/Tensor.hpp +++ b/include/aidge/data/Tensor.hpp @@ -55,14 +55,14 @@ class Tensor : public Data, { // ctor } - + /** * @brief Construct a new Tensor object from dimensions. - * + * * @param dims dimensions of the tensor * @param dataType datatype of the tensor (default = DataType::Float32) */ - Tensor(std::vector<DimSize_t> dims, DataType dataType = DataType::Float32) + Tensor(const std::vector<DimSize_t>& dims, DataType dataType = DataType::Float32) : Data(Type), mDataType(dataType), mDims(dims) diff --git a/include/aidge/stimuli/Stimuli.hpp b/include/aidge/stimuli/Stimuli.hpp deleted file mode 100644 index a6e08347b..000000000 --- a/include/aidge/stimuli/Stimuli.hpp +++ /dev/null @@ -1,101 +0,0 @@ -#ifndef STIMULI_H -#define STIMULI_H - -#include <cstring> -#include <iostream> -#include <memory> - -#include "aidge/backend/StimuliImpl.hpp" -#include "aidge/data/Tensor.hpp" -#include "aidge/utils/Registrar.hpp" - -namespace Aidge { -/** - * @brief Stimuli. A class wrapping a data sample. Stimuli has two functioning modes. The first mode enables to load data samples from a dataPath and optionnaly store the data in-memory. The second mode enables to store a data sample that was already loaded in memory. - * @details When Stimuli is used in the first mode, the loading function is determined automaticaly based on the backend and the file extension. - */ -class Stimuli : public Registrable<Stimuli, std::tuple<std::string, std::string>, std::unique_ptr<StimuliImpl>(const std::string&)> { -public: - - Stimuli() = delete; - /** - * @brief Construct a new Stimuli object based on a dataPath to load the data. - * - * @param dataPath path to the data to be loaded. - * @param loadDataInMemory when true, keep the data in memory once loaded - */ - Stimuli(const std::string& dataPath, - bool loadDataInMemory = false) : - mDataPath(dataPath) - { - size_t dotPos = dataPath.find_last_of("."); - assert(dotPos != std::string::npos && "Cannot find extension"); - mFileExtension = dataPath.substr(dotPos + 1); - }; - - /** - * @brief Construct a new Stimuli object copied from another one. - * @param otherStimuli - */ - Stimuli(const Stimuli& otherStimuli) - : mDataPath(otherStimuli.mDataPath), - mLoadDataInMemory(otherStimuli.mLoadDataInMemory), - mFileExtension(otherStimuli.mFileExtension), - mData(otherStimuli.mData) - { - if (otherStimuli.mImpl) { - mImpl = Registrar<Stimuli>::create({"opencv", mFileExtension})(mDataPath); - } - } - - /** - * @brief Construct a new Stimuli object based on a tensor that is already loaded in memory. - * - * @param data the data tensor. - */ - Stimuli(const std::shared_ptr<Tensor> data) : - mData(data), - mLoadDataInMemory(true) {} - virtual ~Stimuli() {}; - - /** - * @brief Set the backend of the stimuli associated load implementation - * @details Create and initialize an implementation. - * @param name name of the backend. - */ - inline void setBackend(const std::string &name) { - mImpl = Registrar<Stimuli>::create({name, mFileExtension})(mDataPath); - } - - /** - * @brief Get the data tensor associated to the stimuli. The data is either loaded from a datapath or passed from an in-memory tensor. - * - * @return std::shared_ptr<Tensor> the data tensor. - */ - virtual std::shared_ptr<Tensor> load(){ - assert((mImpl!=nullptr || mData!=nullptr) && "No load implementation and No stored data"); - - if (mLoadDataInMemory){ - if (mData == nullptr){ - mData = mImpl->load(); - } - return mData; - } - return mImpl->load(); - }; - -protected: - // Implementation of the Stimuli - std::unique_ptr<StimuliImpl> mImpl; - - /// Stimuli data path - std::string mDataPath; - std::string mFileExtension; - bool mLoadDataInMemory; - - /// Stimuli data ptr - std::shared_ptr<Tensor> mData; -}; -} // namespace Aidge - -#endif // STIMULI_H diff --git a/include/aidge/stimuli/Stimulus.hpp b/include/aidge/stimuli/Stimulus.hpp new file mode 100644 index 000000000..cc12f8844 --- /dev/null +++ b/include/aidge/stimuli/Stimulus.hpp @@ -0,0 +1,107 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CORE_STIMULI_STIMULI_H_ +#define AIDGE_CORE_STIMULI_STIMULI_H_ + +#include <string> +#include <memory> +#include <tuple> + +#include "aidge/backend/StimulusImpl.hpp" +#include "aidge/data/Tensor.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/ErrorHandling.hpp" + +namespace Aidge { +/** + * @brief Stimulus. A class wrapping a data sample. Stimulus has two functioning modes. The first mode enables to load data samples from a dataPath and optionnaly store the data in-memory. The second mode enables to store a data sample that was already loaded in memory. + * @details When Stimulus is used in the first mode, the loading function is determined automaticaly based on the backend and the file extension. + */ +class Stimulus : public Registrable<Stimulus, std::tuple<std::string, std::string>, std::unique_ptr<StimulusImpl>(const std::string&)> { +private: + /// Stimulus data path + const std::string mDataPath; + const std::string mFileExtension; + bool mLoadDataInMemory; + + /// Stimulus data ptr + std::shared_ptr<Tensor> mData; + + // Implementation of the Stimulus + std::unique_ptr<StimulusImpl> mImpl; + +public: + Stimulus() = delete; + + /** + * @brief Construct a new Stimulus object based on a tensor that is already loaded in memory. + * + * @param data the data tensor. + */ + Stimulus(const std::shared_ptr<Tensor> data) + : mLoadDataInMemory(true), + mData(data) + { + // ctor + } + + /** + * @brief Construct a new Stimulus object based on a dataPath to load the data. + * + * @param dataPath path to the data to be loaded. + * @param loadDataInMemory when true, keep the data in memory once loaded + */ + Stimulus(const std::string& dataPath, bool loadDataInMemory = false) + : mDataPath(dataPath), + mFileExtension(dataPath.substr(dataPath.find_last_of(".") + 1)), + mLoadDataInMemory(loadDataInMemory) + { + AIDGE_ASSERT((dataPath.find_last_of(".") != std::string::npos), "Cannot find extension"); + } + + /** + * @brief Construct a new Stimulus object copied from another one. + * @param otherStimulus + */ + Stimulus(const Stimulus& otherStimulus) + : mDataPath(otherStimulus.mDataPath), + mFileExtension(otherStimulus.mFileExtension), + mLoadDataInMemory(otherStimulus.mLoadDataInMemory), + mData(otherStimulus.mData) + { + if (otherStimulus.mImpl) { + mImpl = Registrar<Stimulus>::create({"opencv", mFileExtension})(mDataPath); + } + } + + virtual ~Stimulus(); + +public: + /** + * @brief Set the backend of the stimuli associated load implementation + * @details Create and initialize an implementation. + * @param name name of the backend. + */ + inline void setBackend(const std::string &name) { + mImpl = Registrar<Stimulus>::create({name, mFileExtension})(mDataPath); + } + + /** + * @brief Get the data tensor associated to the stimuli. The data is either loaded from a datapath or passed from an in-memory tensor. + * + * @return std::shared_ptr<Tensor> the data tensor. + */ + virtual std::shared_ptr<Tensor> load(); +}; +} // namespace Aidge + +#endif // STIMULI_H diff --git a/src/data/DataProvider.cpp b/src/data/DataProvider.cpp index 07804be8e..dffb5745d 100644 --- a/src/data/DataProvider.cpp +++ b/src/data/DataProvider.cpp @@ -1,39 +1,48 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + #include <cassert> +#include <cstddef> // std::size_t +#include <memory> +#include <vector> +#include "aidge/data/Database.hpp" #include "aidge/data/DataProvider.hpp" +#include "aidge/data/Tensor.hpp" -using namespace Aidge; -DataProvider::DataProvider(Database& database, std::size_t batchSize) - : - mDatabase(database), - mBatchSize(batchSize) +Aidge::DataProvider::DataProvider(const Aidge::Database& database, const std::size_t batchSize) + : mDatabase(database), + mNumberModality(database.getItem(0).size()), + mBatchSize(batchSize) { - // Get the tensor dimensions, datatype and backend of each modality to ensure each data have the same - auto item = mDatabase.getItem(0); - mNumberModality = item.size(); - // Iterating on each data modality in the database - for (std::size_t i = 0; i < mNumberModality; ++i) { - mDataSizes.push_back(item[i]->dims()); + // Get the tensor dimensions, datatype and backend of each modality to ensure each data have the same + for (const auto& modality : mDatabase.getItem(0)) { + mDataSizes.push_back(modality->dims()); // assert(std::strcmp(item[i]->getImpl()->backend(), "cpu") == 0 && "DataProvider currently only supports cpu backend tensors"); // mDataBackends.push_back(item[i]->getImpl()->backend()); - mDataTypes.push_back(item[i]->dataType()); + mDataTypes.push_back(modality->dataType()); } } -std::vector<std::shared_ptr<Tensor>> DataProvider::readBatch(std::size_t startIndex) +std::vector<std::shared_ptr<Aidge::Tensor>> Aidge::DataProvider::readBatch(const std::size_t startIndex) const { assert((startIndex) <= mDatabase.getLen() && " DataProvider readBatch : database fetch out of bounds"); - - + + // Determine the batch size (may differ for the last batch) - size_t current_batch_size; - if ((startIndex+mBatchSize) > mDatabase.getLen()){ - current_batch_size = mDatabase.getLen()-startIndex; - } else { - current_batch_size = mBatchSize; - } + const std::size_t current_batch_size = ((startIndex + mBatchSize) > mDatabase.getLen()) ? + mDatabase.getLen()-startIndex : + mBatchSize; // Create batch tensors (dimensions, backends, datatype) for each modality std::vector<std::shared_ptr<Tensor>> batchTensors; @@ -47,28 +56,28 @@ std::vector<std::shared_ptr<Tensor>> DataProvider::readBatch(std::size_t startIn batchData->setDataType(mDataTypes[i]); batchTensors.push_back(batchData); } - + // Call each database item and concatenate each data modularity in the batch tensors for (std::size_t i = 0; i < current_batch_size; ++i){ - auto dataItem = mDatabase.getItem(startIndex+i); + auto dataItem = mDatabase.getItem(startIndex+i); // assert same number of modalities assert(dataItem.size() == mNumberModality && "DataProvider readBatch : item from database have inconsistent number of modality."); // Browse each modularity in the database item for (std::size_t j = 0; j < mNumberModality; ++j) { auto dataSample = dataItem[j]; - + // Assert tensor sizes assert(dataSample->dims() == mDataSizes[j] && "DataProvider readBatch : corrupted Data size"); - + // Assert implementation backend // assert(dataSample->getImpl()->backend() == mDataBackends[j] && "DataProvider readBatch : corrupted data backend"); // Assert DataType assert(dataSample->dataType() == mDataTypes[j] && "DataProvider readBatch : corrupted data DataType"); - // Concatenate into the batch tensor + // Concatenate into the batch tensor batchTensors[j]->getImpl()->copy(dataSample->getImpl()->rawPtr(), dataSample->size(), i*dataSample->size()); } } diff --git a/src/stimuli/Stimuli.cpp b/src/stimuli/Stimuli.cpp new file mode 100644 index 000000000..6a9153447 --- /dev/null +++ b/src/stimuli/Stimuli.cpp @@ -0,0 +1,30 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/stimuli/Stimulus.hpp" + +#include <memory> + +#include "aidge/data/Tensor.hpp" + +Aidge::Stimulus::~Stimulus() = default; + +std::shared_ptr<Aidge::Tensor> Aidge::Stimulus::load() { + AIDGE_ASSERT((mImpl!=nullptr || mData!=nullptr), "No load implementation and No stored data"); + + if (mLoadDataInMemory){ + if (mData == nullptr){ + mData = mImpl->load(); + } + return mData; + } + return mImpl->load(); +} \ No newline at end of file -- GitLab