Skip to content
Snippets Groups Projects
Commit a6ee0f75 authored by Grégoire Kubler's avatar Grégoire Kubler
Browse files

Merge branch 'doxy' into 'main'

test doxygen Graph.cpp

See merge request eclipse/aidge/aidge_export_tensorrt!6
parents 001ffa6e c977243b
No related branches found
No related tags found
No related merge requests found
......@@ -12,85 +12,200 @@
// Allow TensorRT to use up to 1GB of GPU memory for tactic selection
constexpr size_t MAX_WORKSPACE_SIZE = 1ULL << 30; // 1 GB
typedef enum {
typedef enum
{
SYNC,
ASYNC
} ExecutionMode_T;
typedef struct {
typedef struct
{
std::string name;
int nbElements;
int size;
int nbElements;
int size;
} IODesc;
typedef struct {
typedef struct
{
std::vector<IODesc> inputs;
std::vector<IODesc> outputs;
unsigned int nIO;
} IOGraphDesc;
class Graph {
public:
Graph( std::string const& filePath,
unsigned int device_id,
int nbbits );
~Graph();
void device(unsigned int id);
void databits(int nbbits);
void datamode(nvinfer1::DataType datatype);
void load(std::string const& filePath);
void load_onnx(std::string const& onnxModelPath);
void load_trt(std::string const& trtModelPath);
void save(std::string const& fileName);
void initialize();
void calibrate(std::string const& calibration_folder_path, std::string const& cache_file_path, unsigned int batch_size);
void profile(unsigned int nb_iterations,
ExecutionMode_T mode = ExecutionMode_T::ASYNC);
void auto_input_profile(std::vector<std::vector<int>> dims_inputs);
// Inference methods
void run(void** inputs, void** outputs, ExecutionMode_T mode = ExecutionMode_T::ASYNC);
void run_async(void** inputs, void** outputs);
void run_sync(void** inputs, void** outputs);
// Getters
unsigned int getNbIO();
IOGraphDesc getIODescription();
protected:
void initialize_io_descriptors();
private:
// TensorRT objects for network, engine
// and context creation and management
nvinfer1::INetworkDefinition* _network{nullptr};
nvinfer1::ICudaEngine* _engine{nullptr};
nvinfer1::IBuilder* _builder{nullptr};
nvinfer1::IBuilderConfig* _builderconfig{nullptr};
nvinfer1::IExecutionContext* _context{nullptr};
nvinfer1::IOptimizationProfile* _profile{nullptr};
nvinfer1::IInt8Calibrator* _calibrator{nullptr};
// Graph IO information
IOGraphDesc _iodescriptors;
// Buffer for GPU computation
std::vector<void*> _iobuffer;
// Stream
cudaStream_t _stream{nullptr};
/**
* @class Graph
* @brief Manages the lifecycle and execution of a neural network graph using TensorRT.
*
* The Graph class encapsulates the functionality required to manage, configure, and execute
* a neural network graph for inference using NVIDIA's TensorRT. This includes loading models
* from ONNX or TensorRT files, setting the CUDA device and data types, managing calibration
* for INT8 precision, and running inference in both synchronous and asynchronous modes.
*/
class Graph
{
public:
/**
* @brief Constructor for the Graph class.
*
* @param filePath Path to the file to load (default is empty).
* @param device_id Device ID to use (default is 0).
* @param nbbits Number of bits for data (default is -32).
*/
Graph(std::string const &filePath,
unsigned int device_id,
int nbbits);
/**
* @brief Destructor for the Graph class.
*/
~Graph();
/**
* @brief Set the CUDA device.
*
* @param id Device ID.
*/
void device(unsigned int id);
/**
* @brief Set the data type for the graph.
*
* @param nbbits Number of bits for data.
*/
void databits(int nbbits);
/**
* @brief Set the data mode for the graph.
*
* @param datatype Data type for the graph.
*/
void datamode(nvinfer1::DataType datatype);
/**
* @brief Load a file into the graph.
*
* @param filePath Path to the file to load.
*/
void load(std::string const &filePath);
/**
* @brief Load an ONNX file into the graph.
*
* @param onnxModelPath Path to the ONNX model file.
*/
void load_onnx(std::string const &onnxModelPath);
/**
* @brief Load a TensorRT file into the graph.
*
* @param trtModelPath Path to the TensorRT model file.
*/
void load_trt(std::string const &trtModelPath);
/**
* @brief Save the graph to a file.
*
* @param fileName Name of the file to save.
*/
void save(std::string const &fileName);
/**
* @brief Initializes the TensorRT engine and execution context for the Graph class. This involves building a serialized network, deserializing it into a CUDA engine, and setting up the necessary execution context and I/O descriptors.
*/
void initialize();
/**
* @brief Calibrate the graph using the calibration data found inside the `calibration` folder.
* This folder should include a `.info` file containing the dimensions of the calibration data, along with the data stored in a `.batch` file*
* Calibration can be expensive, so it is beneficial to generate the calibration data once and then reuse it for subsequent builds of the network. The cache includes the regression cutoff and quantile values used to generate it, and will not be used if these do not match the settings of the current calibrator. However, the network should be recalibrated if its structure changes or if the input data set changes, and it is the responsibility of the application to ensure this.
*
* @param calibration_folder_path Path to the calibration folder.
* @param cache_file_path Path to the cache file.
* @param batch_size Batch size for calibration (default is 1).
*/
void calibrate(std::string const &calibration_folder_path, std::string const &cache_file_path, unsigned int batch_size);
/**
* @brief Profile the graph's execution by printing the average profiled tensorRT process time per stimulus.
*
* @param nb_iterations Number of iterations for profiling.
* @param mode Execution mode (SYNC or ASYNC).
*/
void profile(unsigned int nb_iterations, ExecutionMode_T mode = ExecutionMode_T::ASYNC);
/**
* @brief Automatically set the input profile for the graph.
*
* @param dims_inputs Dimensions of the input tensors.
*/
void auto_input_profile(std::vector<std::vector<int>> dims_inputs);
// Inference methods
/**
* @brief Run the graph.
*
* @param inputs Input data.
* @param outputs Output data.
* @param mode Execution mode (SYNC or ASYNC).
*/
void run(void **inputs, void **outputs, ExecutionMode_T mode = ExecutionMode_T::ASYNC);
/**
* @brief Run the graph asynchronously.
*
* @param inputs Input data.
* @param outputs Output data.
*/
void run_async(void **inputs, void **outputs);
/**
* @brief Run the graph synchronously.
*
* @param inputs Input data.
* @param outputs Output data.
*/
void run_sync(void **inputs, void **outputs);
// Getters
/**
* @brief Get the number of IO tensors in the graph.
*
* @return unsigned int Number of IO tensors.
*/
unsigned int getNbIO();
/**
* @brief Get the IO descriptors of the graph.
*
* @return IOGraphDesc IO descriptors.
*/
IOGraphDesc getIODescription();
protected:
/**
* @brief Initialize IO descriptors for the graph.
*/
void initialize_io_descriptors();
private:
// TensorRT objects for network, engine
// and context creation and management
nvinfer1::INetworkDefinition *_network{nullptr};
nvinfer1::ICudaEngine *_engine{nullptr};
nvinfer1::IBuilder *_builder{nullptr};
nvinfer1::IBuilderConfig *_builderconfig{nullptr};
nvinfer1::IExecutionContext *_context{nullptr};
nvinfer1::IOptimizationProfile *_profile{nullptr};
nvinfer1::IInt8Calibrator *_calibrator{nullptr};
// Graph IO information
IOGraphDesc _iodescriptors;
// Buffer for GPU computation
std::vector<void *> _iobuffer;
// Stream
cudaStream_t _stream{nullptr};
};
#endif // __AIDGE_TENSORRT_GRAPH_HPP__
#endif // __AIDGE_TENSORRT_GRAPH_HPP__
......@@ -21,14 +21,72 @@ void init_Graph(py::module& m)
.def(py::init<std::string, unsigned int, int>(),
py::arg("filepath") = "",
py::arg("device_id") = 0,
py::arg("nb_bits") = -32)
py::arg("nb_bits") = -32,
R"mydelimiter(
Construct a new Graph object.
:param filepath: Path to the file to load (default is empty).
:type filepath: str
:param device_id: Device ID to use (default is 0).
:type device_id: unsigned int
:param nb_bits: Number of bits for data (default is -32).
:type nb_bits: int
)mydelimiter")
.def("device", &Graph::device, py::arg("id"))
.def("load", &Graph::load, py::arg("filepath"))
.def("save", &Graph::save, py::arg("filepath"))
.def("calibrate", &Graph::calibrate, py::arg("calibration_folder_path") = "./calibration_folder/", py::arg("cache_file_path") = "./calibration_cache", py::arg("batch_size") = 1)
.def("initialize", &Graph::initialize)
.def("profile", &Graph::profile, py::arg("nb_iterations"), py::arg("mode")= ExecutionMode_T::ASYNC)
.def("device", &Graph::device, py::arg("id"),
R"mydelimiter(
Set the CUDA device.
:param id: Device ID.
:type id: unsigned int
)mydelimiter")
.def("load", &Graph::load, py::arg("filepath"),
R"mydelimiter(
Load a graph from a file, either a `.onnx` file or a `.trt` engine.
:param filepath: Path to the file.
:type filepath: str
)mydelimiter")
.def("save", &Graph::save, py::arg("filepath"),
R"mydelimiter(
Save the current graph as a `.trt` engine.
:param filepath: Path to the file.
:type filepath: str
)mydelimiter")
.def("calibrate", &Graph::calibrate, py::arg("calibration_folder_path") = "./calibration_folder/", py::arg("cache_file_path") = "./calibration_cache", py::arg("batch_size") = 1,
R"mydelimiter(
Calibrate the graph to determine the appropriate scaling factors for converting floating-point values to lower-precision representations, using the calibration data found inside the specified `calibration_folder`. This folder should include a `.info` file containing the dimensions of the calibration data, along with the data stored in a `.batch` file
Calibration can be expensive, so it is beneficial to generate the calibration data once and then reuse it for subsequent builds of the network. The cache includes the regression cutoff and quantile values used to generate it, and will not be used if these do not match the settings of the current calibrator. However, the network should be recalibrated if its structure changes or if the input data set changes, and it is the responsibility of the application to ensure this.
:param calibration_folder_path: Path to the calibration folder.
:type calibration_folder_path: str
:param cache_file_path: Path to the cache file.
:type cache_file_path: str
:param batch_size: Batch size for calibration (default is 1).
:type batch_size: int
)mydelimiter")
.def("initialize", &Graph::initialize,
R"mydelimiter(
Initializes the TensorRT engine and execution context for the Graph class. This involves building a serialized network, deserializing it into a CUDA engine, and setting up the necessary execution context and I/O descriptors.
)mydelimiter")
.def("profile", &Graph::profile, py::arg("nb_iterations"), py::arg("mode")= ExecutionMode_T::ASYNC,
R"mydelimiter(
Profile the graph's execution by printing the average profiled TensorRT process time per stimulus.
:param nb_iterations: Number of iterations for profiling.
:type nb_iterations: unsigned int
:param mode: Execution mode (SYNC or ASYNC, default is ASYNC).
:type mode: ExecutionMode_T
)mydelimiter")
.def("run_sync", [](Graph& graph, py::list inputs) -> py::list {
py::list outputs;
std::vector<void *> bufferIn;
......@@ -65,8 +123,17 @@ void init_Graph(py::module& m)
outputs.append(processed_array);
}
return outputs;
}, py::arg("inputs"))
;
}, py::arg("inputs"),
R"mydelimiter(
Run the graph.
:param inputs: Input data.
:type inputs: list
:param outputs: Output data.
:type outputs: list
:param mode: Execution mode (SYNC or ASYNC, default is ASYNC).
:type mode: ExecutionMode_T
)mydelimiter");
}
PYBIND11_MODULE(aidge_trt, m)
......
......@@ -112,6 +112,7 @@ void Graph::datamode(nvinfer1::DataType datatype)
break;
}
}
void Graph::calibrate( std::string const& calibration_folder_path = "./calibration_folder/",
std::string const& cache_file_path = "./calibration_cache",
unsigned int batch_size = 1)
......@@ -173,6 +174,7 @@ void Graph::load(std::string const& filePath)
}
}
void Graph::load_onnx(std::string const& onnxModelPath)
{
// Impose TensorRT flags for the creation of the network
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment