Skip to content
Snippets Groups Projects
Commit 60d053c0 authored by Maxence Naud's avatar Maxence Naud
Browse files

Merge branch 'dataloader' of gitlab.eclipse.org:eclipse/aidge/aidge_core into dataloader

parents 9f7c89aa efb68e6a
No related branches found
No related tags found
2 merge requests!105version 0.2.0,!4Dataloader
Showing
with 406 additions and 10 deletions
...@@ -14,10 +14,11 @@ ...@@ -14,10 +14,11 @@
#include "aidge/backend/OperatorImpl.hpp" #include "aidge/backend/OperatorImpl.hpp"
#include "aidge/backend/TensorImpl.hpp" #include "aidge/backend/TensorImpl.hpp"
#include "aidge/backend/StimuliImpl.hpp"
#include "aidge/data/Data.hpp" #include "aidge/data/Data.hpp"
#include "aidge/data/Tensor.hpp" #include "aidge/data/Tensor.hpp"
#include "aidge/data/Database.hpp"
#include "aidge/data/DataProvider.hpp"
#include "aidge/graph/Connector.hpp" #include "aidge/graph/Connector.hpp"
#include "aidge/graph/GraphView.hpp" #include "aidge/graph/GraphView.hpp"
#include "aidge/graph/Node.hpp" #include "aidge/graph/Node.hpp"
...@@ -58,6 +59,7 @@ ...@@ -58,6 +59,7 @@
#include "aidge/operator/Sub.hpp" #include "aidge/operator/Sub.hpp"
#include "aidge/operator/Transpose.hpp" #include "aidge/operator/Transpose.hpp"
#include "aidge/scheduler/Scheduler.hpp" #include "aidge/scheduler/Scheduler.hpp"
#include "aidge/stimuli/Stimuli.hpp"
#include "aidge/recipies/Recipies.hpp" #include "aidge/recipies/Recipies.hpp"
...@@ -66,7 +68,5 @@ ...@@ -66,7 +68,5 @@
#include "aidge/utils/DynamicAttributes.hpp" #include "aidge/utils/DynamicAttributes.hpp"
#include "aidge/utils/Registrar.hpp" #include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h" #include "aidge/utils/Types.h"
//#include "aidge/utilsParsing/AstNode.hpp"
//#include "aidge/utilsParsing/ParsingToken.hpp"
#endif /* AIDGE_IMPORTS_H_ */ #endif /* AIDGE_IMPORTS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_STIMULIIMPL_H_
#define AIDGE_STIMULIIMPL_H_
#include <memory>
#include "aidge/data/Data.hpp"
#include "aidge/data/Tensor.hpp"
namespace Aidge {
/**
* @brief StimuliImpl. Base class to implement data loading functions.
*
*/
class StimuliImpl {
public:
StimuliImpl(){};
virtual std::shared_ptr<Tensor> load(){};
virtual ~StimuliImpl() = default;
};
} // namespace Aidge
#endif /* AIDGE_STIMULIIMPL_H_ */
#ifndef DATAPROVIDER_H_
#define DATAPROVIDER_H_
#include "aidge/data/Database.hpp"
#include "aidge/data/Data.hpp"
namespace Aidge{
/**
* @brief Data Provider. Takes in a database and compose batches by fetching data from the given database.
* @todo Implement Drop last batch option. Currently returns the last batch with less elements in the batch.
* @todo Implement readRandomBatch to compose batches from the database with a random sampling startegy. Necessary for training.
*/
class DataProvider {
public:
/**
* @brief Constructor of Data Provider.
* @param database database from which to load the data.
* @param batchSize number of data samples per batch.
*/
DataProvider(Database& database, std::size_t batchSize);
/**
* @brief Create a batch for each data modality in the database. The returned batch contain the data as sorted in the database.
* @param startIndex the starting index in the database to start the batch from.
* @return a vector of tensors. Each tensor is a batch corresponding to one modality.
*/
std::vector<std::shared_ptr<Tensor>> readBatch(std::size_t startIndex);
protected:
// Dataset providing the data to the dataProvider
Database& mDatabase;
size_t mNumberModality;
std::vector<std::vector<std::size_t>> mDataSizes;
std::vector<std::string> mDataBackends;
std::vector<DataType> mDataTypes;
// Desired size of the produced batches
size_t mBatchSize;
};
}
#endif /* DATAPROVIDER_H_ */
\ No newline at end of file
#ifndef Database_H_
#define Database_H_
#include <cstring>
#include <memory>
#include <vector>
#include <tuple>
#include "aidge/data/Tensor.hpp"
namespace Aidge {
/**
* @brief Database. An abstract class representing a map from a key to data. All databases should inherit from this class. All subclasses should overwrite :cpp:function:`Database::getItem` to fetch data from a given index.
*/
class Database {
public:
Database() = default;
virtual ~Database() = default;
/**
* @brief Fetch an item of the database.
* @param index index of the item.
* @return vector of data mapped to index.
*/
virtual std::vector<std::shared_ptr<Tensor>> getItem(std::size_t index) = 0;
/**
* @brief Get the number of items in the database
*
* @return std::size_t
*/
virtual std::size_t getLen() = 0;
/**
* @brief Get the number of modalities in one database item
*
* @return std::size_t
*/
virtual std::size_t getNbModalities() = 0;
};
} // namespace Aidge
#endif /* Database_H_ */
\ No newline at end of file
...@@ -55,6 +55,20 @@ class Tensor : public Data, ...@@ -55,6 +55,20 @@ class Tensor : public Data,
{ {
// ctor // ctor
} }
/**
* @brief Construct a new Tensor object from dimensions.
*
* @param dims dimensions of the tensor
* @param dataType datatype of the tensor (default = DataType::Float32)
*/
Tensor(std::vector<DimSize_t> dims, DataType dataType = DataType::Float32)
: Data(Type),
mDataType(dataType),
mDims(dims)
{
computeSize();
}
/** /**
* @brief Construct a new Tensor object copied from another one. * @brief Construct a new Tensor object copied from another one.
......
...@@ -209,7 +209,7 @@ public: ...@@ -209,7 +209,7 @@ public:
* @brief Compute dimensions of input/output Tensors for each Operator of the * @brief Compute dimensions of input/output Tensors for each Operator of the
* GraphView object's Nodes. * GraphView object's Nodes.
*/ */
void forwardDims(); void forwardDims(const std::vector<std::vector<DimSize_t>> dims = {});
/** @brief Set the same backend for each Operator of the GraphView object's Nodes. */ /** @brief Set the same backend for each Operator of the GraphView object's Nodes. */
void setBackend(const std::string &backend, DeviceIdx_t device = 0); void setBackend(const std::string &backend, DeviceIdx_t device = 0);
......
...@@ -18,6 +18,8 @@ ...@@ -18,6 +18,8 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "aidge/data/Tensor.hpp"
namespace Aidge { namespace Aidge {
class Node; class Node;
class GraphView; class GraphView;
...@@ -49,11 +51,17 @@ public: ...@@ -49,11 +51,17 @@ public:
mScheduling.clear(); mScheduling.clear();
mStaticSchedule.clear(); mStaticSchedule.clear();
} }
/**
* @brief Place the data tensors inside in the data input tensor of the graphView. In case of multiple data input tensors, they are mapped to producers in the order given by the graph.
*
* @param data data input tensors
*/
void connectInputs(std::vector<std::shared_ptr<Aidge::Tensor>> data);
/** /**
* @brief Run the provided Computational Graph with a batch of data * @brief Run the provided Computational Graph with a batch of data
*/ */
void forward(bool forwardDims = true, bool verbose = false); void forward(bool forwardDims = true, bool verbose = false, std::vector<std::shared_ptr<Aidge::Tensor>> data = {});
/** /**
* @brief Save in a Markdown file the order of layers execution. * @brief Save in a Markdown file the order of layers execution.
......
#ifndef STIMULI_H
#define STIMULI_H
#include <cstring>
#include <iostream>
#include <memory>
#include "aidge/backend/StimuliImpl.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/utils/Registrar.hpp"
namespace Aidge {
/**
* @brief Stimuli. A class wrapping a data sample. Stimuli has two functioning modes. The first mode enables to load data samples from a dataPath and optionnaly store the data in-memory. The second mode enables to store a data sample that was already loaded in memory.
* @details When Stimuli is used in the first mode, the loading function is determined automaticaly based on the backend and the file extension.
*/
class Stimuli : public Registrable<Stimuli, std::tuple<std::string, std::string>, std::unique_ptr<StimuliImpl>(const std::string&)> {
public:
Stimuli() = delete;
/**
* @brief Construct a new Stimuli object based on a dataPath to load the data.
*
* @param dataPath path to the data to be loaded.
* @param loadDataInMemory when true, keep the data in memory once loaded
*/
Stimuli(const std::string& dataPath,
bool loadDataInMemory = false) :
mDataPath(dataPath)
{
size_t dotPos = dataPath.find_last_of(".");
assert(dotPos != std::string::npos && "Cannot find extension");
mFileExtension = dataPath.substr(dotPos + 1);
};
/**
* @brief Construct a new Stimuli object copied from another one.
* @param otherStimuli
*/
Stimuli(const Stimuli& otherStimuli)
: mDataPath(otherStimuli.mDataPath),
mLoadDataInMemory(otherStimuli.mLoadDataInMemory),
mFileExtension(otherStimuli.mFileExtension),
mData(otherStimuli.mData)
{
if (otherStimuli.mImpl) {
mImpl = Registrar<Stimuli>::create({"opencv", mFileExtension})(mDataPath);
}
}
/**
* @brief Construct a new Stimuli object based on a tensor that is already loaded in memory.
*
* @param data the data tensor.
*/
Stimuli(const std::shared_ptr<Tensor> data) :
mData(data),
mLoadDataInMemory(true) {}
virtual ~Stimuli() {};
/**
* @brief Set the backend of the stimuli associated load implementation
* @details Create and initialize an implementation.
* @param name name of the backend.
*/
inline void setBackend(const std::string &name) {
mImpl = Registrar<Stimuli>::create({name, mFileExtension})(mDataPath);
}
/**
* @brief Get the data tensor associated to the stimuli. The data is either loaded from a datapath or passed from an in-memory tensor.
*
* @return std::shared_ptr<Tensor> the data tensor.
*/
virtual std::shared_ptr<Tensor> load(){
assert((mImpl!=nullptr || mData!=nullptr) && "No load implementation and No stored data");
if (mLoadDataInMemory){
if (mData == nullptr){
mData = mImpl->load();
}
return mData;
}
return mImpl->load();
};
protected:
// Implementation of the Stimuli
std::unique_ptr<StimuliImpl> mImpl;
/// Stimuli data path
std::string mDataPath;
std::string mFileExtension;
bool mLoadDataInMemory;
/// Stimuli data ptr
std::shared_ptr<Tensor> mData;
};
} // namespace Aidge
#endif // STIMULI_H
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include "aidge/data/DataProvider.hpp"
#include "aidge/data/Database.hpp"
namespace py = pybind11;
namespace Aidge {
void init_DataProvider(py::module& m){
py::class_<DataProvider, std::shared_ptr<DataProvider>>(m, "DataProvider")
.def(py::init<Database&, std::size_t>(), py::arg("database"), py::arg("batchSize"))
.def("read_batch", &DataProvider::readBatch, py::arg("start_index"),
R"mydelimiter(
Return a batch of each data modality.
:param start_index: Database starting index to read the batch from
:type start_index: int
)mydelimiter");
}
}
#include <pybind11/pybind11.h>
#include "aidge/data/Database.hpp"
namespace py = pybind11;
namespace Aidge {
void init_Database(py::module& m){
py::class_<Database, std::shared_ptr<Database>>(m,"Database");
}
}
...@@ -100,7 +100,7 @@ void init_GraphView(py::module& m) { ...@@ -100,7 +100,7 @@ void init_GraphView(py::module& m) {
.def("get_nodes", &GraphView::getNodes) .def("get_nodes", &GraphView::getNodes)
.def("get_node", &GraphView::getNode, py::arg("node_name")) .def("get_node", &GraphView::getNode, py::arg("node_name"))
.def("forward_dims", &GraphView::forwardDims) .def("forward_dims", &GraphView::forwardDims, py::arg("dims")=std::vector<std::vector<DimSize_t>>())
.def("compile", &GraphView::compile, py::arg("backend"), py::arg("datatype"), py::arg("device") = 0) .def("compile", &GraphView::compile, py::arg("backend"), py::arg("datatype"), py::arg("device") = 0)
.def("__call__", &GraphView::operator(), py::arg("connectors")) .def("__call__", &GraphView::operator(), py::arg("connectors"))
.def("set_datatype", &GraphView::setDataType, py::arg("datatype")) .def("set_datatype", &GraphView::setDataType, py::arg("datatype"))
......
...@@ -15,6 +15,8 @@ namespace py = pybind11; ...@@ -15,6 +15,8 @@ namespace py = pybind11;
namespace Aidge { namespace Aidge {
void init_Data(py::module&); void init_Data(py::module&);
void init_Database(py::module&);
void init_DataProvider(py::module&);
void init_Tensor(py::module&); void init_Tensor(py::module&);
void init_OperatorImpl(py::module&); void init_OperatorImpl(py::module&);
void init_Attributes(py::module&); void init_Attributes(py::module&);
...@@ -66,6 +68,8 @@ void init_TensorUtils(py::module&); ...@@ -66,6 +68,8 @@ void init_TensorUtils(py::module&);
void init_Aidge(py::module& m){ void init_Aidge(py::module& m){
init_Data(m); init_Data(m);
init_Database(m);
init_DataProvider(m);
init_Tensor(m); init_Tensor(m);
init_Node(m); init_Node(m);
......
...@@ -13,13 +13,14 @@ ...@@ -13,13 +13,14 @@
#include <pybind11/stl.h> #include <pybind11/stl.h>
#include "aidge/scheduler/Scheduler.hpp" #include "aidge/scheduler/Scheduler.hpp"
#include "aidge/graph/GraphView.hpp" #include "aidge/graph/GraphView.hpp"
#include "aidge/data/Tensor.hpp"
namespace py = pybind11; namespace py = pybind11;
namespace Aidge { namespace Aidge {
void init_Scheduler(py::module& m){ void init_Scheduler(py::module& m){
py::class_<SequentialScheduler, std::shared_ptr<SequentialScheduler>>(m, "SequentialScheduler") py::class_<SequentialScheduler, std::shared_ptr<SequentialScheduler>>(m, "SequentialScheduler")
.def(py::init<std::shared_ptr<GraphView>&>(), py::arg("graph_view")) .def(py::init<std::shared_ptr<GraphView>&>(), py::arg("graph_view"))
.def("forward", &SequentialScheduler::forward, py::arg("forward_dims")=true, py::arg("verbose")=false) .def("forward", &SequentialScheduler::forward, py::arg("forward_dims")=true, py::arg("verbose")=false, py::arg("data")=std::vector<Tensor>())
.def("save_scheduling_diagram", &SequentialScheduler::saveSchedulingDiagram, py::arg("file_name")) .def("save_scheduling_diagram", &SequentialScheduler::saveSchedulingDiagram, py::arg("file_name"))
.def("resetScheduling", &SequentialScheduler::resetScheduling) .def("resetScheduling", &SequentialScheduler::resetScheduling)
.def("generate_scheduling", &SequentialScheduler::generateScheduling, py::arg("verbose")=false) .def("generate_scheduling", &SequentialScheduler::generateScheduling, py::arg("verbose")=false)
......
#include <cassert>
#include "aidge/data/DataProvider.hpp"
using namespace Aidge;
DataProvider::DataProvider(Database& database, std::size_t batchSize)
:
mDatabase(database),
mBatchSize(batchSize)
{
// Get the tensor dimensions, datatype and backend of each modality to ensure each data have the same
auto item = mDatabase.getItem(0);
mNumberModality = item.size();
// Iterating on each data modality in the database
for (std::size_t i = 0; i < mNumberModality; ++i) {
mDataSizes.push_back(item[i]->dims());
// assert(std::strcmp(item[i]->getImpl()->backend(), "cpu") == 0 && "DataProvider currently only supports cpu backend tensors");
// mDataBackends.push_back(item[i]->getImpl()->backend());
mDataTypes.push_back(item[i]->dataType());
}
}
std::vector<std::shared_ptr<Tensor>> DataProvider::readBatch(std::size_t startIndex)
{
assert((startIndex) <= mDatabase.getLen() && " DataProvider readBatch : database fetch out of bounds");
// Determine the batch size (may differ for the last batch)
size_t current_batch_size;
if ((startIndex+mBatchSize) > mDatabase.getLen()){
current_batch_size = mDatabase.getLen()-startIndex;
} else {
current_batch_size = mBatchSize;
}
// Create batch tensors (dimensions, backends, datatype) for each modality
std::vector<std::shared_ptr<Tensor>> batchTensors;
auto dataBatchSize = mDataSizes;
for (std::size_t i = 0; i < mNumberModality; ++i) {
dataBatchSize[i].insert(dataBatchSize[i].begin(), current_batch_size);
auto batchData = std::make_shared<Tensor>();
batchData->resize(dataBatchSize[i]);
// batchData->setBackend(mDataBackends[i]);
batchData->setBackend("cpu");
batchData->setDataType(mDataTypes[i]);
batchTensors.push_back(batchData);
}
// Call each database item and concatenate each data modularity in the batch tensors
for (std::size_t i = 0; i < current_batch_size; ++i){
auto dataItem = mDatabase.getItem(startIndex+i);
// assert same number of modalities
assert(dataItem.size() == mNumberModality && "DataProvider readBatch : item from database have inconsistent number of modality.");
// Browse each modularity in the database item
for (std::size_t j = 0; j < mNumberModality; ++j) {
auto dataSample = dataItem[j];
// Assert tensor sizes
assert(dataSample->dims() == mDataSizes[j] && "DataProvider readBatch : corrupted Data size");
// Assert implementation backend
// assert(dataSample->getImpl()->backend() == mDataBackends[j] && "DataProvider readBatch : corrupted data backend");
// Assert DataType
assert(dataSample->dataType() == mDataTypes[j] && "DataProvider readBatch : corrupted data DataType");
// Concatenate into the batch tensor
batchTensors[j]->getImpl()->copy(dataSample->getImpl()->rawPtr(), dataSample->size(), i*dataSample->size());
}
}
return batchTensors;
}
\ No newline at end of file
...@@ -265,10 +265,18 @@ void Aidge::GraphView::compile(const std::string& backend, const Aidge::DataType ...@@ -265,10 +265,18 @@ void Aidge::GraphView::compile(const std::string& backend, const Aidge::DataType
forwardDims(); forwardDims();
} }
void Aidge::GraphView::forwardDims() { void Aidge::GraphView::forwardDims(const std::vector<std::vector<Aidge::DimSize_t>> dims) {
// setInputs // setInputs
// Link every tensor to the right pointer // Link every tensor to the right pointer
// following parent - children informations // following parent - children informations
if (!dims.empty()){
AIDGE_ASSERT(dims.size() == mInputNodes.size(), "GraphView forwardDims error - Inconsistent number of dimensions and graph inputs");
for (std::size_t i = 0; i < dims.size(); ++i){
auto tensor = std::make_shared<Tensor>(dims[i]);
mInputNodes[i].first->getOperator()->setInput(mInputNodes[i].second, tensor);
}
}
for (std::shared_ptr<Node> nodePtr : getNodes()) { for (std::shared_ptr<Node> nodePtr : getNodes()) {
for (IOIndex_t i = 0; i < nodePtr->nbInputs(); ++i) { for (IOIndex_t i = 0; i < nodePtr->nbInputs(); ++i) {
// assess if the input was not already set and is a Tensor then link it to parent output // assess if the input was not already set and is a Tensor then link it to parent output
......
...@@ -174,8 +174,28 @@ void Aidge::SequentialScheduler::generateScheduling(bool verbose) { ...@@ -174,8 +174,28 @@ void Aidge::SequentialScheduler::generateScheduling(bool verbose) {
} }
void Aidge::SequentialScheduler::connectInputs(std::vector<std::shared_ptr<Aidge::Tensor>> data){
// This version of connect inputs only connects tensor inputs in input data producers.
auto inputNodes = mGraphView->getOrderedInputs();
// Assert that the number of input data producers corresponds to the number of data input
assert(data.size() == inputNodes.size() && "Scheduler connectInput error - Inconsistent number of graph inputs and inputs passed to the graph");
for (std::size_t i = 0; i < data.size(); ++i){
// TODO : maybe shallow copy instead of deepcopy
inputNodes[i].first->getOperator()->setInput(inputNodes[i].second, data[i]);
}
}
// TODO: handle multiple inputs/outputs // TODO: handle multiple inputs/outputs
void Aidge::SequentialScheduler::forward(bool forwardDims, bool verbose) { void Aidge::SequentialScheduler::forward(bool forwardDims, bool verbose, std::vector<std::shared_ptr<Aidge::Tensor>> data) {
// Collect all data input of the graph (that are producers)
if (!data.empty()){
connectInputs(data);
}
// Forward dims (if allowed) // Forward dims (if allowed)
if (forwardDims) {mGraphView->forwardDims(); } if (forwardDims) {mGraphView->forwardDims(); }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment