From 49f938e91edfa910637c6b784c37aa5d2d1b3dbb Mon Sep 17 00:00:00 2001 From: hrouis <houssemeddine.rouis92@gmail.com> Date: Fri, 31 Jan 2025 16:02:07 +0100 Subject: [PATCH] update documentation --- include/aidge/operator/AvgPooling.hpp | 43 ++- include/aidge/operator/MaxPooling.hpp | 16 +- include/aidge/operator/MetaOperatorDefs.hpp | 2 + .../operator/pybind_MetaOperatorDefs.cpp | 248 +++++++++++++++++- src/operator/AvgPooling.cpp | 3 +- 5 files changed, 281 insertions(+), 31 deletions(-) diff --git a/include/aidge/operator/AvgPooling.hpp b/include/aidge/operator/AvgPooling.hpp index 483ccac6a..ab9e111f2 100644 --- a/include/aidge/operator/AvgPooling.hpp +++ b/include/aidge/operator/AvgPooling.hpp @@ -55,11 +55,30 @@ enum class AvgPoolingAttr { * * The AvgPooling operation computes the average value within sliding windows of specified size * (kernel dimensions) over the input tensor. The stride dimensions determine how the window - * moves across the input. This operation is commonly used in neural networks to reduce the spatial - * dimensions while preserving features. + * moves across the input. The dilation parameter allows spacing between kernel elements, and + * `ceil_mode` determines whether to use ceiling instead of floor when computing the output shape. + * This operation is commonly used in neural networks to reduce spatial dimensions while preserving features. * * @tparam DIM Number of dimensions for the pooling operation. + * + * ### Output Shape Calculation + * - If `ceil_mode` is false: + * `output_size = floor((input_size - dilation * (kernel_size - 1) - 1) / stride + 1)` + * - If `ceil_mode` is true: + * `output_size = ceil((input_size - dilation * (kernel_size - 1) - 1) / stride + 1)` + * + * @example Example usage: + * - Input shape: (1, 3, 32, 32) // Batch size 1, 3 channels, 32x32 spatial dimensions + * - KernelDims: (2, 2) + * - StrideDims: (2, 2) + * - Dilation: (1, 1) + * - CeilMode: false + * - Output shape: (1, 3, 16, 16) + * + * @see OperatorTensor + * @see Registrable */ + template <DimIdx_t DIM> class AvgPooling_Op : public OperatorTensor, public Registrable<AvgPooling_Op<DIM>, std::string, std::function<std::shared_ptr<OperatorImpl>(const AvgPooling_Op<DIM> &)>> { @@ -95,9 +114,11 @@ public: /** * @brief Constructs an AvgPooling operation with specified kernel and stride dimensions. - * @param kernel_dims Size of the pooling window for each spatial dimension. - * @param stride_dims Step size (stride) for sliding the pooling window across the input dimensions. + * @param[in] kernel_dims Size of the pooling window for each spatial dimension. + * @param[in] stride_dims Step size (stride) for sliding the pooling window across the input dimensions. * Defaults to 1 for each dimension. + * @param[in] dilations Spatial dilations for the pooling operation. + * @param[in] ceil_mode Indicates whether to use ceil mode for output size calculation. */ constexpr AvgPooling_Op(const std::array<DimSize_t, DIM> &kernel_dims, const std::array<DimSize_t, DIM> &stride_dims = create_array<DimSize_t, DIM>(1), @@ -113,7 +134,7 @@ public: /** * @brief Copy-constructor. - * @param op AvgPooling_Op to copy. + * @param[in] op AvgPooling_Op to copy. * @details Copies the operator attributes and its output tensor(s), but not * its input tensors. The new operator has no associated input. */ @@ -127,16 +148,16 @@ public: /** * @brief Calculates the output dimensions based on the input dimensions and operator attributes. - * @param allowDataDependency If true, considers data-dependent operations. Defaults to false. + * @param[in] allowDataDependency If true, considers data-dependent operations. Defaults to false. * @return True if the dimensions are successfully calculated. */ bool forwardDims(bool /*allowDataDependency*/ = false) override final; /** * @brief Computes the receptive field of the operator. - * @param firstEltDims Dimensions of the first element. - * @param outputDims Dimensions of the output tensor. - * @param outputIdx Index of the output tensor. Defaults to 0. + * @param[in] firstEltDims Dimensions of the first element. + * @param[in] outputDims Dimensions of the output tensor. + * @param[in] outputIdx Index of the output tensor. Defaults to 0. * @return A vector of pairs representing the receptive fields. */ std::vector<std::pair<std::vector<DimSize_t>, std::vector<DimSize_t>>> @@ -146,8 +167,8 @@ public: /** * @brief Sets the backend for the operation. - * @param name Name of the backend. - * @param device Device index. Defaults to 0. + * @param[in] name Name of the backend. + * @param[in] device Device index. Defaults to 0. */ void setBackend(const std::string &name, DeviceIdx_t device = 0) override final; diff --git a/include/aidge/operator/MaxPooling.hpp b/include/aidge/operator/MaxPooling.hpp index 4b1190c14..9063fb88b 100644 --- a/include/aidge/operator/MaxPooling.hpp +++ b/include/aidge/operator/MaxPooling.hpp @@ -66,24 +66,28 @@ enum class MaxPoolingAttr { * @brief Implements the MaxPooling operation over a specified input tensor. * * MaxPooling reduces spatial dimensions by applying a max filter over a sliding window. - * The resulting output tensor contains the maximum value within each window. + * The stride dimensions determine how the window moves across the input. The dilation + * parameter allows spacing between kernel elements, and `ceil_mode` determines whether + * to use ceiling instead of floor when computing the output shape. * * ### Output Shape Calculation - * - If `CeilMode` is false: - * `output_size = floor((input_size - kernel_size) / stride + 1)` - * - If `CeilMode` is true: - * `output_size = ceil((input_size - kernel_size) / stride + 1)` + * - If `ceil_mode` is false: + * `output_size = floor((input_size - dilation * (kernel_size - 1) - 1) / stride + 1)` + * - If `ceil_mode` is true: + * `output_size = ceil((input_size - dilation * (kernel_size - 1) - 1) / stride + 1)` * * @example Example usage: * - Input shape: (1, 3, 32, 32) // Batch size 1, 3 channels, 32x32 spatial dimensions * - KernelDims: (2, 2) * - StrideDims: (2, 2) + * - Dilation: (1, 1) * - CeilMode: false * - Output shape: (1, 3, 16, 16) * * @see OperatorTensor * @see Registrable */ + template <DimIdx_t DIM> class MaxPooling_Op : public OperatorTensor, public Registrable<MaxPooling_Op<DIM>, @@ -121,7 +125,7 @@ public: /** * @brief Copy-constructor. - * @param op MaxPooling_Op to copy. + * @param[in] op MaxPooling_Op to copy. * @details Copies the operator attributes and its output tensor(s), but not * its input tensors. The new operator has no associated input. */ diff --git a/include/aidge/operator/MetaOperatorDefs.hpp b/include/aidge/operator/MetaOperatorDefs.hpp index 9d687c34d..c4ceccf53 100644 --- a/include/aidge/operator/MetaOperatorDefs.hpp +++ b/include/aidge/operator/MetaOperatorDefs.hpp @@ -166,6 +166,7 @@ PaddedConvDepthWise(const DimSize_t nb_channels, * @param[in] stride_dims The stride dimensions for pooling (default is 1). * @param[in] dilations The spatial dilations for pooling (default is 1). * @param[in] padding_dims Padding dimensions before pooling (default is 0). + * @param[in] ceil_mode Whether to use ceiling mode for pooling (default is false). * @return A shared pointer to the Node representing the padded average pooling operation. */ template <std::array<DimSize_t, 1>::size_type DIM> @@ -185,6 +186,7 @@ extern std::shared_ptr<Node> PaddedAvgPooling(const std::array<DimSize_t, DIM> & * @param[in] stride_dims The stride dimensions for pooling (default is 1). * @param[in] dilations The spatial dilations for pooling (default is 1). * @param[in] padding_dims Padding dimensions before pooling (default is 0). + * @param[in] ceil_mode Whether to use ceiling mode for pooling (default is false). * @return A shared pointer to the MetaOperator_Op representing the padded average pooling operation. */ template <std::array<DimSize_t, 1>::size_type DIM> diff --git a/python_binding/operator/pybind_MetaOperatorDefs.cpp b/python_binding/operator/pybind_MetaOperatorDefs.cpp index 587d3456c..8058cd2a2 100644 --- a/python_binding/operator/pybind_MetaOperatorDefs.cpp +++ b/python_binding/operator/pybind_MetaOperatorDefs.cpp @@ -46,7 +46,33 @@ template <DimIdx_t DIM> void declare_PaddedConvOp(py::module &m) { py::arg("stride_dims") = std::vector<DimSize_t>(DIM,1), py::arg("padding_dims") = std::vector<DimSize_t>(2*DIM,0), py::arg("dilation_dims") = std::vector<DimSize_t>(DIM,1), - py::arg("no_bias")= false); + py::arg("no_bias")= false, + R"mydelimiter( + Initialize a node containing a Padded Convolution operator. + + This operator performs a convolution operation with explicit padding. It applies a + kernel filter over an input tensor with specified stride and dilation settings. + + :param in_channels: Number of input channels. + :type in_channels: int + :param out_channels: Number of output channels. + :type out_channels: int + :param kernel_dims: The size of the convolutional kernel for each dimension. + :type kernel_dims: List[int] + :param stride_dims: The stride (step size) for kernel movement. + :type stride_dims: List[int] + :param padding_dims: Explicit padding to apply before convolution. + :type padding_dims: List[int] + :param dilation_dims: The dilation factor for kernel spacing. + :type dilation_dims: List[int] + :param no_bias: Whether to disable bias addition in the convolution. + :type no_bias: bool + :param name: Name of the node (optional). + :type name: str + :return: A node containing the Padded Convolution operator. + :rtype: :py:class:`PaddedConvOp` + )mydelimiter"); + m.def(("PaddedConv" + std::to_string(DIM) + "DOp").c_str(), []( const std::vector<DimSize_t>& kernel_dims, const std::vector<DimSize_t> &stride_dims, @@ -62,9 +88,28 @@ template <DimIdx_t DIM> void declare_PaddedConvOp(py::module &m) { }, py::arg("kernel_dims"), py::arg("stride_dims") = std::vector<DimSize_t>(DIM,1), py::arg("padding_dims") = std::vector<DimSize_t>(2*DIM,0), - py::arg("dilation_dims") = std::vector<DimSize_t>(DIM,1)); + py::arg("dilation_dims") = std::vector<DimSize_t>(DIM,1), + R"mydelimiter( + Initialize a Padded Convolution operator. + + This function defines a convolution operation that includes explicit padding before + applying the kernel. The padding allows control over output dimensions while maintaining + receptive field properties. + + :param kernel_dims: The size of the convolutional kernel for each dimension. + :type kernel_dims: List[int] + :param stride_dims: The stride (step size) for kernel movement. + :type stride_dims: List[int] + :param padding_dims: Padding applied before convolution. + :type padding_dims: List[int] + :param dilation_dims: The dilation factor for kernel spacing. + :type dilation_dims: List[int] + :return: A Padded Convolution operator. + :rtype: :py:class:`PaddedConvOp` + )mydelimiter"); } + template <DimIdx_t DIM> void declare_PaddedConvDepthWiseOp(py::module &m) { m.def(("PaddedConvDepthWise" + std::to_string(DIM) + "D").c_str(), [](const DimSize_t nb_channels, const std::vector<DimSize_t>& kernel_dims, @@ -86,7 +131,32 @@ template <DimIdx_t DIM> void declare_PaddedConvDepthWiseOp(py::module &m) { py::arg("stride_dims") = std::vector<DimSize_t>(DIM,1), py::arg("padding_dims") = std::vector<DimSize_t>(2*DIM,0), py::arg("dilation_dims") = std::vector<DimSize_t>(DIM,1), - py::arg("no_bias") = false); + py::arg("no_bias") = false, + R"mydelimiter( + Initialize a node containing a Depthwise Padded Convolution operator. + + This operator performs a depthwise convolution operation, where each input channel is + convolved separately with a different kernel. The operation includes explicit padding, + stride control, and dilation options. + + :param nb_channels: Number of input channels (also the number of output channels since depthwise convolution does not mix channels). + :type nb_channels: int + :param kernel_dims: The size of the convolutional kernel for each dimension. + :type kernel_dims: List[int] + :param stride_dims: The stride (step size) for kernel movement. + :type stride_dims: List[int] + :param padding_dims: Explicit padding to apply before convolution. + :type padding_dims: List[int] + :param dilation_dims: The dilation factor for kernel spacing. + :type dilation_dims: List[int] + :param no_bias: Whether to disable bias addition in the convolution. + :type no_bias: bool + :param name: Name of the node (optional). + :type name: str + :return: A node containing the Depthwise Padded Convolution operator. + :rtype: :py:class:`PaddedConvDepthWiseOp` + )mydelimiter"); + m.def(("PaddedConvDepthWise" + std::to_string(DIM) + "DOp").c_str(), []( const std::vector<DimSize_t>& kernel_dims, const std::vector<DimSize_t> &stride_dims, @@ -102,8 +172,25 @@ template <DimIdx_t DIM> void declare_PaddedConvDepthWiseOp(py::module &m) { }, py::arg("kernel_dims"), py::arg("stride_dims") = std::vector<DimSize_t>(DIM,1), py::arg("padding_dims") = std::vector<DimSize_t>(2*DIM,0), - py::arg("dilation_dims") = std::vector<DimSize_t>(DIM,1)); + py::arg("dilation_dims") = std::vector<DimSize_t>(DIM,1), + R"mydelimiter( + Initialize a Depthwise Padded Convolution operator. + + This function defines a depthwise convolution operation that includes explicit padding + before applying the kernel. Depthwise convolution applies a separate filter to each + input channel, preserving channel independence. + :param kernel_dims: The size of the convolutional kernel for each dimension. + :type kernel_dims: List[int] + :param stride_dims: The stride (step size) for kernel movement. + :type stride_dims: List[int] + :param padding_dims: Padding applied before convolution. + :type padding_dims: List[int] + :param dilation_dims: The dilation factor for kernel spacing. + :type dilation_dims: List[int] + :return: A Depthwise Padded Convolution operator. + :rtype: :py:class:`PaddedConvDepthWiseOp` + )mydelimiter"); } template <DimIdx_t DIM> void declare_PaddedAvgPoolingOp(py::module &m) { @@ -125,7 +212,29 @@ template <DimIdx_t DIM> void declare_PaddedAvgPoolingOp(py::module &m) { py::arg("stride_dims") = std::vector<DimSize_t>(DIM,1), py::arg("dilations") = std::vector<DimSize_t>(DIM,1), py::arg("padding_dims") = std::vector<DimSize_t>(2*DIM,0), - py::arg("ceil_mode") = false); + py::arg("ceil_mode") = false, + R"mydelimiter( + Initialize a node containing a Padded Average Pooling operator. + + This operator performs an average pooling operation with explicit padding. The output value + is computed as the average of input values within a defined kernel window. + + :param kernel_dims: The size of the pooling kernel for each dimension. + :type kernel_dims: List[int] + :param stride_dims: The stride (step size) for kernel movement. + :type stride_dims: List[int] + :param dilations: The dilation factor for the kernel, which increases the spacing between elements. + :type dilations: List[int] + :param padding_dims: Explicit padding to apply before pooling. + :type padding_dims: List[int] + :param ceil_mode: If set to True, the output shape is computed using ceil instead of floor. + :type ceil_mode: bool + :param name: Name of the node (optional). + :type name: str + :return: A node containing the Padded Average Pooling operator. + :rtype: :py:class:`PaddedAvgPoolingOp` + )mydelimiter"); + m.def(("PaddedAvgPooling" + std::to_string(DIM) + "DOp").c_str(), [](const std::vector<DimSize_t>& kernel_dims, const std::vector<DimSize_t> &stride_dims, const std::vector<DimSize_t> &dilations, @@ -137,12 +246,31 @@ template <DimIdx_t DIM> void declare_PaddedAvgPoolingOp(py::module &m) { AIDGE_ASSERT(dilations.size() == DIM, "dilations size [{}] does not match DIM [{}]", dilations.size(), DIM); AIDGE_ASSERT(padding_dims.size() == 2*DIM, "padding_dims size [{}] does not match DIM [{}]", padding_dims.size(), 2*DIM); - return PaddedAvgPooling_Op<DIM>(to_array<DIM>(kernel_dims.begin()), to_array<DIM>(stride_dims.begin()), to_array<DIM>(dilations.begin()), to_array<2*DIM>(padding_dims.begin()), ceil_mode); + return PaddedAvgPooling_Op<DIM>(to_array<DIM>(kernel_dims.begin()), to_array<DIM>(stride_dims.begin()), to_array<DIM>(dilations.begin()), to_array<2*DIM>(padding_dims.begin()), ceil_mode); }, py::arg("kernel_dims"), py::arg("stride_dims") = std::vector<DimSize_t>(DIM,1), py::arg("dilations") = std::vector<DimSize_t>(DIM,1), py::arg("padding_dims") = std::vector<DimSize_t>(2*DIM,0), - py::arg("ceil_mode") = false); + py::arg("ceil_mode") = false, + R"mydelimiter( + Initialize a Padded Average Pooling operator. + + This function defines an average pooling operation with explicit padding before pooling is applied. + The operation computes the average of the elements inside each kernel window. + + :param kernel_dims: The size of the pooling kernel for each dimension. + :type kernel_dims: List[int] + :param stride_dims: The stride (step size) for kernel movement. + :type stride_dims: List[int] + :param dilations: The dilation factor for the kernel, which increases the spacing between elements. + :type dilations: List[int] + :param padding_dims: Padding applied before pooling. + :type padding_dims: List[int] + :param ceil_mode: If set to True, the output shape is computed using ceil instead of floor. + :type ceil_mode: bool + :return: A Padded Average Pooling operator. + :rtype: :py:class:`PaddedAvgPoolingOp` + )mydelimiter"); } template <DimIdx_t DIM> void declare_PaddedMaxPoolingOp(py::module &m) { @@ -164,7 +292,29 @@ template <DimIdx_t DIM> void declare_PaddedMaxPoolingOp(py::module &m) { py::arg("stride_dims") = std::vector<DimSize_t>(DIM,1), py::arg("dilations") = std::vector<DimSize_t>(DIM,1), py::arg("padding_dims") = std::vector<DimSize_t>(2*DIM,0), - py::arg("ceil_mode") = false); + py::arg("ceil_mode") = false, + R"mydelimiter( + Initialize a node containing a Padded Max Pooling operator. + + This operator performs a max pooling operation with explicit padding before pooling is applied. + The output value is computed as the maximum of input values within a defined kernel window. + + :param kernel_dims: The size of the pooling kernel for each dimension. + :type kernel_dims: List[int] + :param stride_dims: The stride (step size) for kernel movement. + :type stride_dims: List[int] + :param dilations: The dilation factor for the kernel, which increases the spacing between elements. + :type dilations: List[int] + :param padding_dims: Explicit padding to apply before pooling. + :type padding_dims: List[int] + :param ceil_mode: If set to True, the output shape is computed using ceil instead of floor. + :type ceil_mode: bool + :param name: Name of the node (optional). + :type name: str + :return: A node containing the Padded Max Pooling operator. + :rtype: :py:class:`PaddedMaxPoolingOp` + )mydelimiter"); + m.def(("PaddedMaxPooling" + std::to_string(DIM) + "DOp").c_str(), [](const std::vector<DimSize_t>& kernel_dims, const std::vector<DimSize_t> &stride_dims, const std::vector<DimSize_t> &dilations, @@ -181,27 +331,99 @@ template <DimIdx_t DIM> void declare_PaddedMaxPoolingOp(py::module &m) { py::arg("stride_dims") = std::vector<DimSize_t>(DIM,1), py::arg("dilations") = std::vector<DimSize_t>(DIM,1), py::arg("padding_dims") = std::vector<DimSize_t>(2*DIM,0), - py::arg("ceil_mode") = false); + py::arg("ceil_mode") = false, + R"mydelimiter( + Initialize a Padded Max Pooling operator. + This function defines a max pooling operation with explicit padding before pooling is applied. + The operation computes the maximum of the elements inside each kernel window. + + :param kernel_dims: The size of the pooling kernel for each dimension. + :type kernel_dims: List[int] + :param stride_dims: The stride (step size) for kernel movement. + :type stride_dims: List[int] + :param dilations: The dilation factor for the kernel, which increases the spacing between elements. + :type dilations: List[int] + :param padding_dims: Padding applied before pooling. + :type padding_dims: List[int] + :param ceil_mode: If set to True, the output shape is computed using ceil instead of floor. + :type ceil_mode: bool + :return: A Padded Max Pooling operator. + :rtype: :py:class:`PaddedMaxPoolingOp` + )mydelimiter"); } + void declare_LSTMOp(py::module &m) { - m.def("LSTM", &LSTM, py::arg("in_channels"), + m.def("LSTM", &LSTM, + py::arg("in_channels"), py::arg("hidden_channels"), py::arg("seq_length"), py::arg("nobias") = false, - py::arg("name") = ""); + py::arg("name") = "", + R"mydelimiter( + Initialize a node containing an LSTM (Long Short-Term Memory) operator. + + The LSTM operator is a recurrent neural network (RNN) variant designed to model sequential data + while addressing the vanishing gradient problem. It includes gating mechanisms to control + information flow through time. + + :param in_channels: The number of input features per time step. + :type in_channels: int + :param hidden_channels: The number of hidden units in the LSTM. + :type hidden_channels: int + :param seq_length: The number of time steps in the input sequence. + :type seq_length: int + :param nobias: If set to True, no bias terms are included in the LSTM computation. + :type nobias: bool + :param name: Name of the node (optional). + :type name: str + :return: A node containing the LSTM operator. + :rtype: :py:class:`LSTM` + )mydelimiter"); + m.def("LSTMOp", &LSTM_Op, py::arg("seq_length"), - py::arg("name") = ""); + py::arg("name") = "", + R"mydelimiter( + Initialize an LSTM operation. + + This function sets up an LSTM operator to process sequential data. The LSTM maintains hidden + states over time steps, allowing it to learn long-range dependencies. + + :param seq_length: The length of the input sequence. + :type seq_length: int + :param name: Name of the node (optional). + :type name: str + :return: An LSTM operator. + :rtype: :py:class:`LSTMOp` + )mydelimiter"); } + void declare_LeakyOp(py::module &m) { m.def("Leaky", &Leaky, py::arg("nb_timesteps"), py::arg("beta"), py::arg("threshold") = 1.0, - py::arg("name") = ""); + py::arg("name") = "", + R"mydelimiter( + Initialize a Leaky neuron operator. + + The Leaky operator introduces a decay factor, allowing neuron states to "leak" over time instead of resetting + abruptly. This helps in maintaining temporal memory. + + :param nb_timesteps: The number of time steps for the operation. + :type nb_timesteps: int + :param beta: The leakage factor controlling decay over time. + :type beta: float + :param threshold: The activation threshold (default is 1.0). + :type threshold: float + :param name: Name of the node (optional). + :type name: str + :return: A node containing the Leaky operator. + :rtype: :py:class:`Leaky` + )mydelimiter"); } void init_MetaOperatorDefs(py::module &m) { diff --git a/src/operator/AvgPooling.cpp b/src/operator/AvgPooling.cpp index 6ed5f8f70..79341687c 100644 --- a/src/operator/AvgPooling.cpp +++ b/src/operator/AvgPooling.cpp @@ -107,7 +107,8 @@ Aidge::AvgPooling_Op<DIM>::computeReceptiveField(const std::vector<Aidge::DimSiz inputDims.push_back((outputDims[2+static_cast<std::size_t>(i)] - 1) * mAttributes->template getAttr<AvgPoolingAttr::StrideDims>()[static_cast<std::size_t>(i)] + 1 - + (mAttributes->template getAttr<AvgPoolingAttr::KernelDims>()[static_cast<std::size_t>(i)] - 1)); + + (mAttributes->template getAttr<AvgPoolingAttr::KernelDims>()[static_cast<std::size_t>(i)] - 1) + * mAttributes->template getAttr<AvgPoolingAttr::Dilations>()[static_cast<std::size_t>(i)]); inputIdxDims[2+i] *= mAttributes->template getAttr<AvgPoolingAttr::StrideDims>()[static_cast<std::size_t>(i)]; } std::vector<std::pair<std::vector<Aidge::DimSize_t>, std::vector<DimSize_t>>> res; -- GitLab