diff --git a/include/aidge/backend/cpu/operator/ConvImpl.hpp b/include/aidge/backend/cpu/operator/ConvImpl.hpp index e480697b6452440f043901140a07cb643f3cbdb6..1e4bcd1b0a498e8359e2c79519d462d43e416ce4 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl.hpp @@ -68,9 +68,34 @@ using ConvImpl2D_cpu = OperatorImpl_cpu<Conv2D_Op, void *, void *)>; +using Conv3D_Op = Conv_Op<3>; +using ConvImpl3D_cpu = OperatorImpl_cpu<Conv3D_Op, + void(const std::array<DimSize_t, 3> &, + const std::array<DimSize_t, 3> &, + const std::array<DimSize_t, 3> &, + const std::array<DimSize_t, 5> &, + const std::array<DimSize_t, 5> &, + const void *, + const void *, + const void *, + void *), + void(const std::array<DimSize_t, 3> &, + const std::array<DimSize_t, 3> &, + const std::array<DimSize_t, 3> &, + const std::array<DimSize_t, 5> &, + const std::array<DimSize_t, 5> &, + const void *, + const void *, + const void *, + void *, + void *, + void *)>; + // Implementation entry point registration to Operator REGISTRAR(Conv1D_Op, "cpu", Aidge::ConvImpl1D_cpu::create); REGISTRAR(Conv2D_Op, "cpu", Aidge::ConvImpl2D_cpu::create); +REGISTRAR(Conv3D_Op, "cpu", Aidge::ConvImpl3D_cpu::create); + } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_CONVIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp index d2b942f6b6f72235f5d079c0fbb402b1b4ed1373..e764eecfd6746585a7526b8dc7c2a7295c242285 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp @@ -12,7 +12,9 @@ #ifndef AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ #define AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ +#include <algorithm> #include <array> +#include <cstddef> #include <cstdint> #include "aidge/backend/cpu/operator/ConvImpl.hpp" @@ -234,7 +236,7 @@ static void conv1DBackwardWeights(const array<DimSize_t, 1> &stride, for (DimSize_t kX = 0; kX < kDim[0]; ++kX) { for (DimSize_t oX = 0; oX < oDims[2]; ++oX) { - const DimSize_t iX = oX * stride[0] + kX * dilation[0] ; + const DimSize_t iX = oX * stride[0] + kX * dilation[0]; weightsGrad[kOffsets[1] + kX] += input[iOffsets[1] + iX] * oGrad[oOffsets[1] + oX]; @@ -315,9 +317,9 @@ static void conv1DBackwardBias(const array<DimSize_t, 3> &oDims, * @param[inout] biasesGrad_ gradients of the kernel biases */ template <class I, class W, class B, class O> -void ConvImpl1D_cpu_backward_kernel(const array<DimSize_t,1> &stride, - const array<DimSize_t,1> &dilation, - const array<DimSize_t,1> &kernelDim, +void ConvImpl1D_cpu_backward_kernel(const array<DimSize_t, 1> &stride, + const array<DimSize_t, 1> &dilation, + const array<DimSize_t, 1> &kernelDim, const array<DimSize_t, 3> &inputDims, const array<DimSize_t, 3> &outputDims, const void *input_, @@ -1030,6 +1032,585 @@ REGISTRAR(ConvImpl2D_cpu, std::int32_t, std::int32_t, std::int32_t>}); + +/** + * @brief Forward kernel for 3D Convolution on CPU backend. + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam B Bias data type. + * @tparam O Output data type. + * @param strideDims stride dimensions + * @param dilationDims dilation dimensions + * @param kDims kernel dimensions + * @param iDims input dimensions. + * @param oDims output dimensions. + * @param input_ const input Tensor. + * @param weights_ const weight Tensor. + * @param biases_ const Biais Tensor. + * @param output_ Output Tensor. + */ +template <class I, class W, class B, class O> +void ConvImpl3D_cpu_forward_kernel(const array<DimSize_t, 3> &strideDims, + const array<DimSize_t, 3> &dilationDims, + const array<DimSize_t, 3> &kDims, + const array<DimSize_t, 5> &iDims, + const array<DimSize_t, 5> &oDims, + const void *input_, + const void *weights_, + const void *biases_, + void *output_) { + + //////////////////////////////////////////////////////////////////////// + // TENSOR CASTING + // FIXME: missing convolution attributes as arguments + const I *input = static_cast<const I *>(input_); + const W *weights = static_cast<const W *>(weights_); + const B *biases = static_cast<const B *>(biases_); + O *output = static_cast<O *>(output_); + + // const array<DimSize_t, 3> dilatedKernelDims{ + // dilationDims[0] * kDims[0] + 1, + // dilationDims[1] * kDims[1] + 1, + // dilationDims[2] * kDims[2] + 1}; + + //////////////////////////////////////////////////////////////////////// + // strides + // for each array they represent + // the number of elems contained in a given dimension + const array<DimSize_t, 4> iStride{ + iDims[1] * iDims[2] * iDims[3] * iDims[4], + iDims[2] * iDims[3] * iDims[4], + iDims[3] * iDims[4], + iDims[4]}; + const array<DimSize_t, 4> oStride{ + oDims[1] * oDims[2] * oDims[3] * oDims[4], + oDims[2] * oDims[3] * oDims[4], + oDims[3] * oDims[4], + oDims[4]}; + const array<DimSize_t, 4> kStride{ + iDims[1] * kDims[0] * kDims[1] * kDims[2], + kDims[0] * kDims[1] * kDims[2], + kDims[1] * kDims[2], + kDims[2]}; + + //////////////////////////////////////////////////////////////////////// + // index offsets + // NOTE: + // in/out dims = {batch, in/outChannels, + // in/outDims[0],in/outDims[1],in/outDims[2]} + array<DimSize_t, 4> iOffset{0, 0, 0, 0}; + array<DimSize_t, 4> oOffset{0, 0, 0, 0}; + // NOTE: + // kernel dims = {outChannels, inChannels, kernelDims[0], + // kernelDims[1], kernelDims[2]} + array<DimSize_t, 4> kOffset{0, 0, 0, 0}; + array<DimSize_t, 2> kDilOffset{0, 0}; + + //////////////////////////////////////////////////////////////////////// + // COMPUTATION + for (DimSize_t batch = 0; batch < iDims[0]; + ++batch, oOffset[0] += oStride[0], iOffset[0] += iStride[0]) { + + oOffset[1] = oOffset[0]; + kOffset[0] = 0; + for (DimSize_t oChannel = 0; oChannel < oDims[1]; + ++oChannel, oOffset[1] += oStride[1], kOffset[0] += kStride[0]) { + + // Filling given channel with corresponding bias value + if (biases != nullptr) { + B biasVal = biases[oChannel]; + std::fill(output + oOffset[1], + output + oOffset[1] + oStride[1], + biasVal); + } + + iOffset[1] = iOffset[0]; + kOffset[1] = kOffset[0]; + for (DimSize_t iChannel = 0; iChannel < iDims[1]; ++iChannel, + iOffset[1] += iStride[1], + kOffset[1] += kStride[1]) { + + iOffset[2] = iOffset[1]; + oOffset[2] = oOffset[1]; + for (DimSize_t oX = 0; oX < oDims[2]; ++oX, + iOffset[2] += strideDims[0] * iStride[2], + oOffset[2] += oStride[2]) { + + iOffset[3] = iOffset[2]; + oOffset[3] = oOffset[2]; + for (DimSize_t oY = 0; oY < oDims[3]; ++oY, + iOffset[3] += strideDims[1] * iStride[3], + oOffset[3] += oStride[3]) { + + for (DimSize_t iZ = 0, oZ = 0; oZ < oDims[4]; + ++oZ, iZ += strideDims[2]) { + auto oIdx = oOffset[3] + oZ; + auto iIdx = iOffset[3] + iZ; + + kOffset[2] = kOffset[1]; + kDilOffset[0] = 0; + for (DimSize_t kX = 0; kX < kDims[0]; ++kX, + kOffset[2] += kStride[2], + kDilOffset[0] += dilationDims[0] * + iStride[2]) { + + kOffset[3] = kOffset[2]; + kDilOffset[1] = kDilOffset[0]; + for (DimSize_t kY = 0; kY < kDims[1]; + ++kY, + kOffset[3] += kStride[3], + kDilOffset[1] += + dilationDims[1] * iStride[3]) { + + for (DimSize_t kZ = 0; kZ < kDims[2]; + ++kZ) { + output[oIdx] += + weights[kOffset[3] + kZ] * + input[iIdx + kDilOffset[1] + + kZ * dilationDims[2]]; + } + } + } + } + } + } + } + } + } +} + +/** + * @brief perform backpropagation for the input + * @note INPUT & OUTPUT convention is the same as in the + * forward function + * @note formula : + * for i in 0..input_size: + * for n in 0..weight_size: + * dL dYn dL + * ---- = ---- ---- + * dXi dXi Yn + * with : dYn / dXi = w_k + * for each input value + * for each weight + * for each output + * multiply the weight with the associated value + * @note kernel & stride are passed as single integers as they are just arrays + * of length 1 + * @note reminder that kernel dimensions are + * {outChannels, inChannels, {kernelDims}} + * <=> {oDims[1], iDims[1], kernelDim} + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam O Output data type. + * @param[in] stride stride parameter of the convolution operator + * @param[in] dilation dilation parameter of the convolution operator + * @param[in] kDims dimension of the kernel + * @param[in] kStrides nb of elements contained per dimension of the kernel + * @param[in] weights weights values + * @param[in] oDims dimensions of the output + * @param[in] oStrides nb of elements contained per dimension of the output + * @param[in] oGrad output gradient + * @param[in] iDims input dimensions + * @param[in] iStrides nb of elements contained per dimension of the input + * @param[inout] iGrad gradients of the input to update + */ +template <class I, class W, class O> +void conv3DBackwardInput(const array<DimSize_t, 3> &stride, + const array<DimSize_t, 3> &dilation, + const array<DimSize_t, 3> &kDims, + const array<DimSize_t, 4> &kStrides, + const W *weights, + const array<DimSize_t, 5> &oDims, + const array<DimSize_t, 4> &oStrides, + const O *oGrad, + const array<DimSize_t, 5> &iDims, + const array<DimSize_t, 4> &iStrides, + I *iGrad) { + // records index offsets for each dimension that have a stride (== all + // dimension except the last) for every parsed tensor + // these serve as checkpoints to avoid recomputing indexes at every + // iteration + array<DimSize_t, 4> iOffset{}; + array<DimSize_t, 4> oOffset{}; + array<DimSize_t, 4> kOffset{}; + array<DimSize_t, 2> iDilkernelOffset{}; // input offset for dilated kernel + + for (DimSize_t batch = 0; batch < iDims[0]; + ++batch, iOffset[0] += iStrides[0], oOffset[0] += oStrides[0]) { + + kOffset[0] = 0; + oOffset[1] = oOffset[0]; + for (DimSize_t oChannel = 0; oChannel < oDims[1]; oChannel++, + oOffset[1] += oStrides[1], + kOffset[0] += kStrides[0]) { + + iOffset[1] = iOffset[0]; + kOffset[1] = kOffset[0]; + for (DimSize_t iChannel = 0; iChannel < iDims[1]; ++iChannel, + iOffset[1] += iStrides[1], + kOffset[1] += kStrides[1]) { + + oOffset[2] = oOffset[1]; + iOffset[2] = iOffset[1]; + DimSize_t iX = 0; + for (DimSize_t oX = 0; oX < oDims[2]; ++oX, + iX += stride[0], + oOffset[2] += oStrides[2], + iOffset[2] += stride[0] * iStrides[2]) { + + DimSize_t iY = 0; + oOffset[3] = oOffset[2]; + iOffset[3] = iOffset[2]; + for (DimSize_t oY = 0; oY < oDims[3]; ++oY, + iY += stride[1], + oOffset[3] += oStrides[3], + iOffset[3] += stride[1] * iStrides[3]) { + + DimSize_t iZ = 0; + for (DimSize_t oZ = 0; oZ < oDims[4]; + ++oZ, iZ += stride[2]) { + auto oIdx = oOffset[3] + oZ; + auto iIdx = iOffset[3] + iZ; + + iDilkernelOffset[0] = 0; + kOffset[2] = kOffset[1]; + for (DimSize_t kX = 0; kX < kDims[0]; ++kX, + iDilkernelOffset[0] += dilation[0] * + iStrides[2], + kOffset[2] += kStrides[2]) { + + kOffset[3] = kOffset[2]; + iDilkernelOffset[1] = iDilkernelOffset[0]; + for (DimSize_t kY = 0; kY < kDims[1]; + ++kY, + kOffset[3] += kStrides[3], + iDilkernelOffset[1] += + dilation[1] * iStrides[3]) { + + for (DimSize_t kZ = 0; kZ < kDims[2]; + ++kZ) { + + iGrad[iIdx + iDilkernelOffset[1] + + kZ * dilation[2]] += + weights[kOffset[3] + kZ] * + oGrad[oIdx]; + } + } + } + } + } + } + } + } + } +} + +/** + * @brief computes weight backpropagation for conv3D operation + * @note INPUT & OUTPUT convention is the same as in the + * forward function + * weight grad + * for i in 0..weight_size: + * for n in 0..output_size: + * dL dYn dL + * ---- = ---- ---- + * dwi dwi Yn + * with : dYn / dwi = x_k + * @tparam I input dtype + * @tparam W weight dtype + * @tparam O output dtype + * @param[in] iDims input data dimensions + * @param[in] iStrides nb element in each dimension of input tensor + * @param[in] input input data + * @param[in] oDims output data dimmensions + * @param[in] oStrides nb element in each dimension of output tensor + * @param[in] oGrad gradients of output data + * @param[in] kDim dimensions of kernel (not taking in count In/OutChannels) + * @param[in] kStrides nb element in each dimension of kernel tensor (taking in + * count In/OutChannels) + * @param[in] stride attribute of the convolution operator + * @param[in] dilation attribute of the convolution operator + * @param[inout] weightsGrad gradients of the kernel weights + */ +template <class I, class W, class O> +void conv3DBackwardWeights(const array<DimSize_t, 5> &iDims, + const array<DimSize_t, 4> &iStrides, + const I *input, + const array<DimSize_t, 5> &oDims, + const array<DimSize_t, 4> &oStrides, + const O *oGrad, + const array<DimSize_t, 3> &kDims, + const array<DimSize_t, 4> &kStrides, + const array<DimSize_t, 3> &stride, + const array<DimSize_t, 3> &dilation, + W *weightsGrad) { + // records index offsets for each dimension that have a stride that is + // not 1 (=> all dimension except the last) for every parsed tensor + array<DimSize_t, 4> iOffsets{0, 0, 0, 0}; + array<DimSize_t, 4> oOffsets{0, 0, 0, 0}; + array<DimSize_t, 4> kOffsets{0, 0, 0, 0}; + array<DimSize_t, 3> iDilKernelOffsets{0, 0, 0}; + + for (DimSize_t batch = 0; batch < iDims[0]; ++batch) { + iOffsets[0] = batch * iStrides[0]; + oOffsets[0] = batch * oStrides[0]; + + for (DimSize_t oChannel = 0; oChannel < oDims[1]; ++oChannel) { + oOffsets[1] = oChannel * oStrides[1] + oOffsets[0]; + kOffsets[0] = oChannel * kStrides[0]; + + for (DimSize_t iChannel = 0; iChannel < iDims[1]; ++iChannel) { + iOffsets[1] = iChannel * iStrides[1] + iOffsets[0]; + kOffsets[1] = iChannel * kStrides[1] + kOffsets[0]; + + for (DimSize_t kX = 0; kX < kDims[0]; ++kX) { + kOffsets[2] = kX * kStrides[2] + kOffsets[1]; + iDilKernelOffsets[0] = kX * dilation[0] * iStrides[2]; + + for (DimSize_t kY = 0; kY < kDims[1]; ++kY) { + kOffsets[3] = kY * kStrides[3] + kOffsets[2]; + iDilKernelOffsets[1] = kY * dilation[1] * iStrides[3] + + iDilKernelOffsets[0]; + + for (DimSize_t kZ = 0; kZ < kDims[2]; ++kZ) { + iDilKernelOffsets[2] = + kZ * dilation[2] + iDilKernelOffsets[1]; + + for (DimSize_t oX = 0; oX < oDims[2]; ++oX) { + oOffsets[2] = oX * oStrides[2] + oOffsets[1]; + iOffsets[2] = + oX * stride[0] * iStrides[2] + iOffsets[1]; + + for (DimSize_t oY = 0; oY < oDims[3]; ++oY) { + oOffsets[3] = + oY * oStrides[3] + oOffsets[2]; + iOffsets[3] = + oY * stride[1] * iStrides[3] + + iOffsets[2]; + + for (DimSize_t oZ = 0, iZ = 0; + oZ < oDims[4]; + ++oZ) { + + weightsGrad[kOffsets[3] + kZ] += + input[iOffsets[3] + iZ + + iDilKernelOffsets[2]] * + oGrad[oOffsets[3] + oZ]; + iZ += stride[2]; + } + } + } + } + } + } + } + } + } +} + +/** + * @brief computes bias backpropagation for conv3D operation + * @note INPUT & OUTPUT convention is the same as in the + * forward function + * @note formula : + * Bias grad: + * for i in 0..bias_size: + * for n in 0..output_size: + * dL dYn dL + * ---- = ---- ---- + * dbi dbi Yn + * with : dYn / dbi = 1 + * + * Hence the partial derivative of the loss wrt bias is the + * output loss Hence the bias grad is just the sum of the + * loss values over the batch + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam B Bias data type. + * @tparam O Output data type. + * @param[in] oDims output tensor dimensions + * @param[in] oStrides nb of elements contained per dimension of the + * output + * @param[in] oGrad output tensor gradients + * @param[inout] biasesGrad biases gradients + */ +template <class B, class O> +static void conv3DBackwardBias(const array<DimSize_t, 5> &oDims, + const array<DimSize_t, 4> &oStrides, + const O *oGrad, + B *biasesGrad) { + // records all index offsets for output tensor + array<DimSize_t, 4> oOffsets{0, 0, 0, 0}; + for (DimSize_t batchIdx = 0; batchIdx < oDims[0]; ++batchIdx) { + oOffsets[0] = batchIdx * oStrides[0]; + + oOffsets[1] = oOffsets[0]; + for (DimSize_t oChannel = 0; oChannel < oDims[1]; + ++oChannel, oOffsets[1] += oStrides[1]) { + + oOffsets[2] = oOffsets[1]; + for (DimSize_t oX = 0; oX < oDims[2]; + ++oX, oOffsets[2] += oStrides[2]) { + + oOffsets[3] = oOffsets[2]; + for (DimSize_t oY = 0; oY < oDims[3]; + ++oY, oOffsets[3] += oStrides[3]) { + for (DimSize_t oZ = 0; oZ < oDims[4]; ++oZ) { + biasesGrad[oChannel] += oGrad[oOffsets[3] + oZ]; + } + } + } + } + } +} + +/** + * @brief Backward kernel for 3D Convolution on CPU backend. + * @note INPUT & OUTPUT convention is the same as in the + * forward function + * + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam B Bias data type. + * @tparam O Output data type. + * @param[in] const stride attribute of conv operator + * @param[in] const dilation attribute of conv operator + * @param[in] const kernelDims + * @param[in] const iDims input data dimensions + * @param[in] const oDims output data dimmensions + * @param[in] const input_ input tensor. + * @param[in] const weights_ kernel tensor. + * @param[in] const oGrad_ output tensor gradient. + * @param[inout] iGrad_ input tensor gradient. + * @param[inout] weightsGrad_ kernel weights tensor gradients + * @param[inout] biasesGrad_ kernel biases tensor gradients + */ +template <class I, class W, class B, class O> +void ConvImpl3D_cpu_backward_kernel(const array<DimSize_t, 3> &stride, + const array<DimSize_t, 3> &dilation, + const array<DimSize_t, 3> &kernelDims, + const array<DimSize_t, 5> &inputDims, + const array<DimSize_t, 5> &outputDims, + const void *input_, + const void *weights_, + const void *oGrad_, + void *iGrad_, + void *weightsGrad_, + void *biasesGrad_) { + + const I *input = static_cast<const I *>(input_); + I *iGrad = static_cast<I *>(iGrad_); + const I *outputGrad = static_cast<const I *>(oGrad_); + const W *weights = static_cast<const W *>(weights_); + W *weightsGrad = static_cast<W *>(weightsGrad_); + + ////////////////////////////// + // COMPUTING STRIDES + ////////////////////////////// + // NOTE: The ...Stride var represent the number of values contained + // in each dimension they will be used to compute the index offset + // of values while iterating on each tensor NOTE: They are 1 item + // shorter than their corresponding tensor as the number of total + // elements is not used except for gradient initialization + + // {batch_stride, channel_stride, dim0_stride, dim1_stride} + const array<DimSize_t, 4> inputStrides{ + inputDims[1] * inputDims[2] * inputDims[3] * inputDims[4], + inputDims[2] * inputDims[3] * inputDims[4], + inputDims[3] * inputDims[4], + inputDims[4]}; + const DimSize_t nbEltsInput = inputDims[0] * inputStrides[0]; + + // {batch_stride, channel_stride, dim0_stride, dim1_stride} + const array<DimSize_t, 4> outputStrides{ + outputDims[1] * outputDims[2] * outputDims[3] * outputDims[4], + outputDims[2] * outputDims[3] * outputDims[4], + outputDims[3] * outputDims[4], + outputDims[4]}; + + // NOTE: kernel dims = {iChannel, oChannel, kernelDim0, kernelDim1} + // kernel_strides = {iChannel, oChannel, kernelDim0} + const array<DimSize_t, 4> kernelStrides{ + inputDims[1] * kernelDims[0] * kernelDims[1] * kernelDims[2], + kernelDims[0] * kernelDims[1] * kernelDims[2], + kernelDims[1] * kernelDims[2], + kernelDims[2]}; + + const DimSize_t nbEltsKernel = outputDims[1] * kernelStrides[0]; + + //////////////////////////// + // prepping gradient arrays + std::fill(iGrad, iGrad + nbEltsInput, I(0)); + std::fill(weightsGrad, weightsGrad + nbEltsKernel, W(0)); + + conv3DBackwardInput(stride, + dilation, + kernelDims, + kernelStrides, + weights, + outputDims, + outputStrides, + outputGrad, + inputDims, + inputStrides, + iGrad); + + conv3DBackwardWeights(inputDims, + inputStrides, + input, + outputDims, + outputStrides, + outputGrad, + kernelDims, + kernelStrides, + stride, + dilation, + weightsGrad); + + if (biasesGrad_ != nullptr) { + B *biasesGrad = static_cast<B *>(biasesGrad_); + std::fill(biasesGrad, biasesGrad + outputDims[1], B(0)); + conv3DBackwardBias(outputDims, outputStrides, outputGrad, biasesGrad); + } +} + +// Kernels registration to implementation entry point +REGISTRAR(ConvImpl3D_cpu, + {{DataType::Any, DataFormat::NCHW}, + {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + ConvImpl3D_cpu_forward_kernel<float, float, float, float>, + ConvImpl3D_cpu_backward_kernel<float, float, float, float>}); +REGISTRAR(ConvImpl3D_cpu, + {{DataType::Any, DataFormat::NCHW}, + {DataType::Float16, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + ConvImpl3D_cpu_forward_kernel<half_float::half, + half_float::half, + half_float::half, + half_float::half>, + ConvImpl3D_cpu_backward_kernel<half_float::half, + half_float::half, + half_float::half, + half_float::half>}); +REGISTRAR(ConvImpl3D_cpu, + {{DataType::Any, DataFormat::NCHW}, + {DataType::Float64, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + ConvImpl3D_cpu_forward_kernel<double, double, double, double>, + ConvImpl3D_cpu_backward_kernel<double, double, double, double>}); +REGISTRAR(ConvImpl3D_cpu, + {{DataType::Any, DataFormat::NCHW}, + {DataType::Int32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + ConvImpl3D_cpu_forward_kernel<std::int32_t, + std::int32_t, + std::int32_t, + std::int32_t>, + ConvImpl3D_cpu_backward_kernel<std::int32_t, + std::int32_t, + std::int32_t, + std::int32_t>}); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ */ diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp index eae5f109f6af8298b90cc8e505ff44eff51bab5c..22f28d504be2a071b5b9e06abbf8106cc836c32d 100644 --- a/src/operator/ConvImpl.cpp +++ b/src/operator/ConvImpl.cpp @@ -26,7 +26,6 @@ template <> void ConvImpl1D_cpu::forward() { const auto& op_ = static_cast<const Conv_Op<1>&>(mOp); - // FIXME: uncomment the following code once memory handling will work AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator."); AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator."); @@ -104,7 +103,6 @@ template <> void ConvImpl2D_cpu::forward() { const auto& op_ = dynamic_cast<const Conv_Op<2>&>(mOp); - // FIXME: uncomment the following code once memory handling will work AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator."); AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator."); @@ -178,4 +176,79 @@ void ConvImpl2D_cpu::backward() { op.getInput(2) ? inputBiasGrad.getImpl()->rawPtr() : nullptr); } +template <> +void Aidge::ConvImpl3D_cpu::forward() { + const auto& op_ = dynamic_cast<const Conv_Op<3>&>(mOp); + + AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator."); + AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator."); + + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. + std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; + const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)); + const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)); + const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); + + // Find the correct kernel type + const auto impl = Registrar<ConvImpl3D_cpu>::create(getBestMatch(getRequiredSpec())); + // Call kernel + impl.forward(op_.strideDims(), + op_.dilationDims(), + op_.kernelDims(), + op_.getInput(0)->template dims<5>(), // input dimensions + op_.getOutput(0)->template dims<5>(), // input dimensions + input0.getImpl()->rawPtr(), // input + input1.getImpl()->rawPtr(), // weight + op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, // bias + getCPUPtr(mOp.getRawOutput(0)) // output + ); +} + +template <> void ConvImpl3D_cpu::backward() { + const auto &op = dynamic_cast<const Conv3D_Op &>(mOp); + const auto &outputGrad = op.getOutput(0)->grad(); + AIDGE_ASSERT(outputGrad, "{}: missing ouput #0 gradient", op.type()); + AIDGE_ASSERT(op.getInput(0)->grad(), + "{}: missing data input(#0) gradient", + op.type()); + AIDGE_ASSERT(op.getInput(1)->grad(), + "{}: missing weight input(#1) gradient", + op.type()); + + std::shared_ptr<Tensor> inputDataGradFallback, inputWeightGradFallback, + inputBiasGradFallback; + const auto &inputDataGrad = + op.getInput(0)->grad()->refCastFrom(inputDataGradFallback, + *(op.getOutput(0))); + const auto &inputWeightGrad = + op.getInput(1)->grad()->refCastFrom(inputWeightGradFallback, + *(op.getOutput(0))); + const auto &inputBiasGrad = + (op.getInput(2) && op.getInput(2)->grad()) + ? op.getInput(2)->grad()->refCastFrom(inputBiasGradFallback, + *(op.getOutput(0))) + : Tensor(); + + // Call kernel + const auto impl = + Registrar<ConvImpl3D_cpu>::create(getBestMatch(getRequiredSpec())); + impl.backward( + op.strideDims(), + op.dilationDims(), + op.kernelDims(), + op.getInput(0)->template dims<5>(), + op.getOutput(0)->template dims<5>(), + + getCPUPtr(op.getInput(0)), + getCPUPtr(op.getInput(1)), + getCPUPtr(outputGrad), + inputDataGrad.getImpl()->rawPtr(), + inputWeightGrad.getImpl()->rawPtr(), + op.getInput(2) ? inputBiasGrad.getImpl()->rawPtr() : nullptr); +} + } // namespace Aidge diff --git a/unit_tests/operator/Test_ConvImpl.cpp b/unit_tests/operator/Test_ConvImpl.cpp index c7242bbb6f0c7ba6632d1d5937b72e2a0d5cc218..a47b315bd89ee7e9f054311dc88c04767e518c0a 100644 --- a/unit_tests/operator/Test_ConvImpl.cpp +++ b/unit_tests/operator/Test_ConvImpl.cpp @@ -9,13 +9,14 @@ * ********************************************************************************/ +#include <aidge/utils/Types.h> #include <memory> #include <catch2/catch_test_macros.hpp> #include <fmt/core.h> #include "aidge/backend/cpu/operator/ConvImpl.hpp" -#include "aidge/data/Data.hpp" // DataType +#include "aidge/data/Data.hpp" // DataType #include "aidge/data/Tensor.hpp" #include "aidge/filler/Filler.hpp" #include "aidge/graph/Node.hpp" @@ -23,7 +24,45 @@ #include "aidge/utils/TensorUtils.hpp" #include "aidge/operator/Pad.hpp" -using namespace Aidge; +namespace Aidge { + +template <DimSize_t DIM> +static std::shared_ptr<OperatorTensor> +setupTestConv(const DimSize_t batchSize, + const DimSize_t inChannels, + const DimSize_t outChannels, + const std::array<DimSize_t, DIM> kernelSize, + const std::array<DimSize_t, DIM> dataSize, + const std::array<DimSize_t, DIM> stride, + const std::array<DimSize_t, DIM> dilation, + const std::array<DimSize_t, 2 * DIM> padding, + const std::shared_ptr<Tensor> input, + const std::shared_ptr<Tensor> weights, + const std::shared_ptr<Tensor> biases) { + input->setBackend("cpu"); + weights->setBackend("cpu"); + biases->setBackend("cpu"); + std::shared_ptr<Node> convNode; + convNode = Conv(inChannels, + outChannels, + kernelSize, + "myconv", + std::array<DimSize_t, DIM>({stride}), + dilation); + auto op = + std::static_pointer_cast<OperatorTensor>(convNode->getOperator()); + + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + op->associateInput(0, input); + op->associateInput(1, weights); + op->associateInput(2, biases); + + REQUIRE_NOTHROW(op->forwardDims(true)); + + return op; +} /** * @brief ConvDepthWise reference cpp backend forward implmentation tests. @@ -44,6 +83,7 @@ using namespace Aidge; * stride [2,2], dilation [2,2] */ TEST_CASE("[cpu/operator] Conv(forward)", "[Conv][CPU]") { + SECTION("2D") { SECTION("Conv with kernel [3,3]") { SECTION("No stride, no dilation") { std::shared_ptr<Node> myConv = Conv(3,4,{3,3}, "myconv"); @@ -1714,47 +1754,216 @@ TEST_CASE("[cpu/operator] Conv(forward)", "[Conv][CPU]") { //fmt::print("{:.^20}\n", "truth"); //(*expectedOutput).print(); REQUIRE(*(conv_op.getOutput(0)) == *expectedOutput); + } } } + SECTION("3D") { + constexpr DimSize_t DIM = 3; + SECTION("minimal test, no stride, no dilation, 1 in/outChannel") { + constexpr DimSize_t batchSize = 1; + constexpr DimSize_t inChannels = 1; + constexpr DimSize_t outChannels = 1; + constexpr std::array<DimSize_t, DIM> kernelSize = {2, 2, 2}; + constexpr std::array<DimSize_t, DIM> inDataSize = {3, 3, 3}; -} + constexpr std::array<DimSize_t, DIM> stride = {1, 1, 1}; + constexpr std::array<DimSize_t, DIM> dilation = {1, 1, 1}; + constexpr std::array<DimSize_t, 2 * DIM> padding({0, 0, 0}); -template <DimSize_t DIM> -std::shared_ptr<OperatorTensor> -setupTestConv(const DimSize_t batchSize, - const DimSize_t inChannels, - const DimSize_t outChannels, - const std::array<DimSize_t, DIM> kernelSize, - const std::array<DimSize_t, DIM> dataSize, - const std::array<DimSize_t, DIM> stride, - const std::array<DimSize_t, DIM> dilation, - const std::array<DimSize_t, 2 * DIM> padding, - const std::shared_ptr<Tensor> input, - const std::shared_ptr<Tensor> weights, - const std::shared_ptr<Tensor> biases) { - input->setBackend("cpu"); - weights->setBackend("cpu"); - biases->setBackend("cpu"); - std::shared_ptr<Node> convNode; - convNode = Conv(inChannels, - outChannels, - kernelSize, - "myconv", - std::array<DimSize_t, DIM>({stride}), - dilation); - auto op = - std::static_pointer_cast<OperatorTensor>(convNode->getOperator()); + constexpr std::array<DimSize_t, DIM> outDataSize = {2, 2, 2}; - op->setDataType(DataType::Float32); - op->setBackend("cpu"); + auto inputSize = std::vector<DimSize_t>( + {batchSize, inChannels, inDataSize[0], inDataSize[1]}); - op->associateInput(0, input); - op->associateInput(1, weights); - op->associateInput(2, biases); + auto input = std::make_shared<Tensor>(Array5D<float, + batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]>( + {{{{{{1., 2., 3.}, {4., 5., 6.}, {7., 8., 9.}}, - REQUIRE_NOTHROW(op->forwardDims(true)); + {{10., 11., 12.}, {13., 14., 15.}, {16., 17., 18.}}, - return op; + {{19., 20., 21.}, {22., 23., 24.}, {25., 26., 27.}}}}}})); + auto weights = std::make_shared<Tensor>( + Array5D<float, + outChannels, + inChannels, + kernelSize[0], + kernelSize[1], + kernelSize[2]>({{{{{{0.1, 0.2}, {0.3, 0.4}}, + + {{0.5, 0.6}, {0.7, 0.8}}}}}})); + + auto biases = std::make_shared<Tensor>( + Array1D<float, outChannels>({{0.01}})); + + auto op = setupTestConv<DIM>(batchSize, + inChannels, + outChannels, + kernelSize, + inDataSize, + stride, + dilation, + padding, + input, + weights, + biases); + + REQUIRE_NOTHROW(op->forward()); + + auto expectedOutput = Tensor(Array5D<float, + batchSize, + outChannels, + outDataSize[0], + outDataSize[1], + outDataSize[2]>( + {{{{{{35.610001, 39.209999}, {46.410000, 50.010002}}, + + {{68.010002, 71.610001}, {78.809998, 82.410004}}}}}})); + + CHECK(approxEq<float, float>(*op->getOutput(0), expectedOutput)); + } + SECTION("stride & dilation, multiple outChannels") { + constexpr DimSize_t batchSize = 1; + constexpr DimSize_t inChannels = 1; + constexpr DimSize_t outChannels = 2; + constexpr std::array<DimSize_t, DIM> kernelSize = {2, 2, 2}; + constexpr std::array<DimSize_t, DIM> inDataSize = {8, 8, 8}; + + constexpr std::array<DimSize_t, DIM> stride = {2, 3, 4}; + constexpr std::array<DimSize_t, DIM> dilation = {4, 3, 2}; + constexpr std::array<DimSize_t, 2 * DIM> padding({0, 0, 0}); + + constexpr std::array<DimSize_t, DIM> outDataSize = {2, 2, 2}; + + auto inputSize = std::vector<DimSize_t>( + {batchSize, inChannels, inDataSize[0], inDataSize[1]}); + + auto input = std::make_shared<Tensor>(Array5D<float, + batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]>( + {{{{{{1., 2., 3., 4., 5., 6., 7., 8.}, + {9., 10., 11., 12., 13., 14., 15., 16.}, + {17., 18., 19., 20., 21., 22., 23., 24.}, + {25., 26., 27., 28., 29., 30., 31., 32.}, + {33., 34., 35., 36., 37., 38., 39., 40.}, + {41., 42., 43., 44., 45., 46., 47., 48.}, + {49., 50., 51., 52., 53., 54., 55., 56.}, + {57., 58., 59., 60., 61., 62., 63., 64.}}, + + {{65., 66., 67., 68., 69., 70., 71., 72.}, + {73., 74., 75., 76., 77., 78., 79., 80.}, + {81., 82., 83., 84., 85., 86., 87., 88.}, + {89., 90., 91., 92., 93., 94., 95., 96.}, + {97., 98., 99., 100., 101., 102., 103., 104.}, + {105., 106., 107., 108., 109., 110., 111., 112.}, + {113., 114., 115., 116., 117., 118., 119., 120.}, + {121., 122., 123., 124., 125., 126., 127., 128.}}, + + {{129., 130., 131., 132., 133., 134., 135., 136.}, + {137., 138., 139., 140., 141., 142., 143., 144.}, + {145., 146., 147., 148., 149., 150., 151., 152.}, + {153., 154., 155., 156., 157., 158., 159., 160.}, + {161., 162., 163., 164., 165., 166., 167., 168.}, + {169., 170., 171., 172., 173., 174., 175., 176.}, + {177., 178., 179., 180., 181., 182., 183., 184.}, + {185., 186., 187., 188., 189., 190., 191., 192.}}, + + {{193., 194., 195., 196., 197., 198., 199., 200.}, + {201., 202., 203., 204., 205., 206., 207., 208.}, + {209., 210., 211., 212., 213., 214., 215., 216.}, + {217., 218., 219., 220., 221., 222., 223., 224.}, + {225., 226., 227., 228., 229., 230., 231., 232.}, + {233., 234., 235., 236., 237., 238., 239., 240.}, + {241., 242., 243., 244., 245., 246., 247., 248.}, + {249., 250., 251., 252., 253., 254., 255., 256.}}, + + {{257., 258., 259., 260., 261., 262., 263., 264.}, + {265., 266., 267., 268., 269., 270., 271., 272.}, + {273., 274., 275., 276., 277., 278., 279., 280.}, + {281., 282., 283., 284., 285., 286., 287., 288.}, + {289., 290., 291., 292., 293., 294., 295., 296.}, + {297., 298., 299., 300., 301., 302., 303., 304.}, + {305., 306., 307., 308., 309., 310., 311., 312.}, + {313., 314., 315., 316., 317., 318., 319., 320.}}, + + {{321., 322., 323., 324., 325., 326., 327., 328.}, + {329., 330., 331., 332., 333., 334., 335., 336.}, + {337., 338., 339., 340., 341., 342., 343., 344.}, + {345., 346., 347., 348., 349., 350., 351., 352.}, + {353., 354., 355., 356., 357., 358., 359., 360.}, + {361., 362., 363., 364., 365., 366., 367., 368.}, + {369., 370., 371., 372., 373., 374., 375., 376.}, + {377., 378., 379., 380., 381., 382., 383., 384.}}, + + {{385., 386., 387., 388., 389., 390., 391., 392.}, + {393., 394., 395., 396., 397., 398., 399., 400.}, + {401., 402., 403., 404., 405., 406., 407., 408.}, + {409., 410., 411., 412., 413., 414., 415., 416.}, + {417., 418., 419., 420., 421., 422., 423., 424.}, + {425., 426., 427., 428., 429., 430., 431., 432.}, + {433., 434., 435., 436., 437., 438., 439., 440.}, + {441., 442., 443., 444., 445., 446., 447., 448.}}, + + {{449., 450., 451., 452., 453., 454., 455., 456.}, + {457., 458., 459., 460., 461., 462., 463., 464.}, + {465., 466., 467., 468., 469., 470., 471., 472.}, + {473., 474., 475., 476., 477., 478., 479., 480.}, + {481., 482., 483., 484., 485., 486., 487., 488.}, + {489., 490., 491., 492., 493., 494., 495., 496.}, + {497., 498., 499., 500., 501., 502., 503., 504.}, + {505., 506., 507., 508., 509., 510., 511., 512.}}}}}})); + + auto weights = std::make_shared<Tensor>(Array5D<float, + outChannels, + inChannels, + kernelSize[0], + kernelSize[1], + kernelSize[2]>( + {{{{{{0.1, 0.2}, {0.3, 0.4}}, {{0.5, 0.6}, {0.7, 0.8}}}}, + + {{{{0.9, 1.0}, {1.1, 1.2}}, {{1.3, 1.4}, {1.5, 1.6}}}}}})); + + auto biases = std::make_shared<Tensor>( + Array1D<float, outChannels>({{0.01, 0.02}})); + + auto op = setupTestConv<DIM>(batchSize, + inChannels, + outChannels, + kernelSize, + inDataSize, + stride, + dilation, + padding, + input, + weights, + biases); + + REQUIRE_NOTHROW(op->forward()); + + auto expectedOutput = Tensor(Array5D<float, + batchSize, + outChannels, + outDataSize[0], + outDataSize[1], + outDataSize[2]>( + {{{{{{726.010010, 740.410034}, {812.409973, 826.809998}}, + + {{1186.810059, 1201.210083}, {1273.210083, 1287.609985}}}, + + {{{1634.820068, 1674.820068}, {1874.820068, 1914.819946}}, + + {{2914.820312, 2954.820068}, + {3154.820068, 3194.819824}}}}}})); + + CHECK(approxEq<float, float>(*op->getOutput(0), expectedOutput)); + } + } } TEST_CASE("[cpu/operator] Conv(backward)", "[Conv][CPU]") { @@ -2713,4 +2922,512 @@ TEST_CASE("[cpu/operator] Conv(backward)", "[Conv][CPU]") { } } } + SECTION("3D") { + constexpr DimSize_t DIM = 3; + SECTION("basic test, square kernel, stride, dilation") { + constexpr DimSize_t batchSize = 1; + constexpr DimSize_t inChannels = 1; + constexpr DimSize_t outChannels = 1; + constexpr std::array<DimSize_t, DIM> kernelSize = {2, 2, 2}; + constexpr std::array<DimSize_t, DIM> inDataSize = {4, 4, 4}; + + constexpr std::array<DimSize_t, DIM> stride = {2, 2, 2}; + constexpr std::array<DimSize_t, DIM> dilation = {2, 2, 2}; + constexpr std::array<DimSize_t, 2 * DIM> padding({0, 0}); + + constexpr std::array<DimSize_t, DIM> outDataSize = {1, 1, 1}; + + auto inputSize = std::vector<DimSize_t>({batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]}); + + auto input = std::make_shared<Tensor>( + Array5D<float, + batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]>({{{{{{1., 2., 3., 4.}, + {5., 6., 7., 8.}, + {9., 10., 11., 12.}, + {13., 14., 15., 16.}}, + + {{17., 18., 19., 20.}, + {21., 22., 23., 24.}, + {25., 26., 27., 28.}, + {29., 30., 31., 32.}}, + + {{33., 34., 35., 36.}, + {37., 38., 39., 40.}, + {41., 42., 43., 44.}, + {45., 46., 47., 48.}}, + + {{49., 50., 51., 52.}, + {53., 54., 55., 56.}, + {57., 58., 59., 60.}, + {61., 62., 63., 64.}}}}}})); + + auto weights = std::make_shared<Tensor>( + Array5D<float, + outChannels, + inChannels, + kernelSize[0], + kernelSize[1], + kernelSize[2]>({{{{{{0.1, 0.2}, {0.3, 0.4}}, + + {{0.5, 0.6}, {0.7, 0.8}}}}}})); + + auto biases = std::make_shared<Tensor>( + Array1D<float, outChannels>({{0.01}})); + + auto outputGrad = std::make_shared<Tensor>( + Array5D<float, + batchSize, + outChannels, + outDataSize[0], + outDataSize[1], + outDataSize[2]>({{{{{{1.}}}}}})); + + auto op = setupTestConv<DIM>(batchSize, + inChannels, + outChannels, + kernelSize, + inDataSize, + stride, + dilation, + padding, + input, + weights, + biases); + + //////////////////////////////////// + // setup gradients for backward + op->getOutput(0)->setGrad(outputGrad); + + REQUIRE_NOTHROW(op->backward()); + + SECTION("Input Grad") { + auto expectedInputGrad = std::make_shared<Tensor>( + Array5D<float, + batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]>({{{{{{0.1, 0.0, 0.2, 0.0}, + {0.0, 0.0, 0.0, 0.0}, + {0.3, 0.0, 0.4, 0.0}, + {0.0, 0.0, 0.0, 0.0}}, + + {{0.0, 0.0, 0.0, 0.0}, + {0.0, 0.0, 0.0, 0.0}, + {0.0, 0.0, 0.0, 0.0}, + {0.0, 0.0, 0.0, 0.0}}, + + {{0.5, 0.0, 0.6, 0.0}, + {0.0, 0.0, 0.0, 0.0}, + {0.7, 0.0, 0.8, 0.0}, + {0.0, 0.0, 0.0, 0.0}}, + + {{0.0, 0.0, 0.0, 0.0}, + {0.0, 0.0, 0.0, 0.0}, + {0.0, 0.0, 0.0, 0.0}, + {0.0, 0.0, 0.0, 0.0}}}}}})); + CHECK(approxEq<float, float>(*op->getInput(0)->grad(), + *expectedInputGrad)); + } + SECTION("Weight grad") { + auto expectedWeightsGrad = std::make_shared<Tensor>( + Array5D<float, + outChannels, + inChannels, + kernelSize[0], + kernelSize[1], + kernelSize[2]>({{{{{{1., 3.}, {9., 11.}}, + + {{33., 35.}, {41., 43.}}}}}})); + CHECK(approxEq<float, float>(*op->getInput(1)->grad(), + *expectedWeightsGrad)); + } + SECTION("Bias Grad") { + auto expectedBiasesGrad = std::make_shared<Tensor>( + Array1D<float, outChannels>({{1.}})); + CHECK(approxEq<float, float>(*op->getInput(2)->grad(), + *expectedBiasesGrad)); + } + } + SECTION("square kernel, multiple in/out channels") { + constexpr DimSize_t batchSize = 1; + constexpr DimSize_t inChannels = 2; + constexpr DimSize_t outChannels = 1; + constexpr std::array<DimSize_t, DIM> kernelSize = {2, 2, 2}; + constexpr std::array<DimSize_t, DIM> inDataSize = {2, 2, 2}; + + constexpr std::array<DimSize_t, DIM> stride = {1, 1, 1}; + constexpr std::array<DimSize_t, DIM> dilation = {1, 1, 1}; + constexpr std::array<DimSize_t, 2 * DIM> padding({0, 0}); + + constexpr std::array<DimSize_t, DIM> outDataSize = {1, 1, 1}; + + auto inputSize = std::vector<DimSize_t>({batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]}); + + auto input = std::make_shared<Tensor>(Array5D<float, + batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]>( + {{{{{{1.000000, 2.000000}, {3.000000, 4.000000}}, + + {{5.000000, 6.000000}, {7.000000, 8.000000}}}, + + {{{9.000000, 10.000000}, {11.000000, 12.000000}}, + + {{13.000000, 14.000000}, {15.000000, 16.000000}}}}}})); + + auto weights = std::make_shared<Tensor>(Array5D<float, + outChannels, + inChannels, + kernelSize[0], + kernelSize[1], + kernelSize[2]>( + {{{{{{0.100000, 0.200000}, {0.300000, 0.400000}}, + + {{0.500000, 0.600000}, {0.700000, 0.800000}}}, + + {{{0.900000, 1.000000}, {1.100000, 1.200000}}, + + {{1.300000, 1.400000}, {1.500000, 1.600000}}}}}})); + + auto biases = std::make_shared<Tensor>( + Array1D<float, outChannels>({{0.010000}})); + + auto outputGrad = std::make_shared<Tensor>( + Array5D<float, + batchSize, + outChannels, + outDataSize[0], + outDataSize[1], + outDataSize[2]>({{{{{{1.000000}}}}}})); + + auto op = setupTestConv<DIM>(batchSize, + inChannels, + outChannels, + kernelSize, + inDataSize, + stride, + dilation, + padding, + input, + weights, + biases); + + //////////////////////////////////// + // setup gradients for backward + op->getOutput(0)->setGrad(outputGrad); + + REQUIRE_NOTHROW(op->backward()); + + SECTION("Input Grad") { + auto expectedInputGrad = + std::make_shared<Tensor>(Array5D<float, + batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]>({{{{{{0.100000, 0.200000}, + {0.300000, 0.400000}}, + + {{0.500000, 0.600000}, + {0.700000, 0.800000}}}, + + + {{{0.900000, 1.000000}, + {1.100000, 1.200000}}, + + {{1.300000, 1.400000}, + {1.500000, 1.600000}}}}}})); + CHECK(approxEq<float, float>(*op->getInput(0)->grad(), + *expectedInputGrad)); + } + SECTION("Weight grad") { + auto expectedWeightsGrad = + std::make_shared<Tensor>(Array5D<float, + outChannels, + inChannels, + kernelSize[0], + kernelSize[1], + kernelSize[2]>({{{{{{ 1.000000, 2.000000}, + { 3.000000, 4.000000}}, + + {{ 5.000000, 6.000000}, + { 7.000000, 8.000000}}}, + + + {{{ 9.000000, 10.000000}, + {11.000000, 12.000000}}, + + {{13.000000, 14.000000}, + {15.000000, 16.000000}}}}}})); + CHECK(approxEq<float, float>(*op->getInput(1)->grad(), + *expectedWeightsGrad)); + } + SECTION("Bias Grad") { + auto expectedBiasesGrad = std::make_shared<Tensor>( + Array1D<float, outChannels>({{1.000000}})); + CHECK(approxEq<float, float>(*op->getInput(2)->grad(), + *expectedBiasesGrad)); + } + } + SECTION("non square kernel, stride, dilation, multiple " + "in/outchannels") { + constexpr DimSize_t batchSize = 1; + constexpr DimSize_t inChannels = 2; + constexpr DimSize_t outChannels = 2; + constexpr std::array<DimSize_t, DIM> kernelSize = {1, 2, 3}; + constexpr std::array<DimSize_t, DIM> inDataSize = {5, 5, 5}; + + constexpr std::array<DimSize_t, DIM> stride = {1, 2, 3}; + constexpr std::array<DimSize_t, DIM> dilation = {3, 2, 1}; + constexpr std::array<DimSize_t, 2 * DIM> padding({0, 0}); + + constexpr std::array<DimSize_t, DIM> outDataSize = {5, 2, 1}; + + auto inputSize = std::vector<DimSize_t>({batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]}); + + auto input = std::make_shared<Tensor>(Array5D<float, + batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]>( + {{{{{{1., 2., 3., 4., 5.}, + {6., 7., 8., 9., 10.}, + {11., 12., 13., 14., 15.}, + {16., 17., 18., 19., 20.}, + {21., 22., 23., 24., 25.}}, + + {{26., 27., 28., 29., 30.}, + {31., 32., 33., 34., 35.}, + {36., 37., 38., 39., 40.}, + {41., 42., 43., 44., 45.}, + {46., 47., 48., 49., 50.}}, + + {{51., 52., 53., 54., 55.}, + {56., 57., 58., 59., 60.}, + {61., 62., 63., 64., 65.}, + {66., 67., 68., 69., 70.}, + {71., 72., 73., 74., 75.}}, + + {{76., 77., 78., 79., 80.}, + {81., 82., 83., 84., 85.}, + {86., 87., 88., 89., 90.}, + {91., 92., 93., 94., 95.}, + {96., 97., 98., 99., 100.}}, + + {{101., 102., 103., 104., 105.}, + {106., 107., 108., 109., 110.}, + {111., 112., 113., 114., 115.}, + {116., 117., 118., 119., 120.}, + {121., 122., 123., 124., 125.}}}, + + {{{126., 127., 128., 129., 130.}, + {131., 132., 133., 134., 135.}, + {136., 137., 138., 139., 140.}, + {141., 142., 143., 144., 145.}, + {146., 147., 148., 149., 150.}}, + + {{151., 152., 153., 154., 155.}, + {156., 157., 158., 159., 160.}, + {161., 162., 163., 164., 165.}, + {166., 167., 168., 169., 170.}, + {171., 172., 173., 174., 175.}}, + + {{176., 177., 178., 179., 180.}, + {181., 182., 183., 184., 185.}, + {186., 187., 188., 189., 190.}, + {191., 192., 193., 194., 195.}, + {196., 197., 198., 199., 200.}}, + + {{201., 202., 203., 204., 205.}, + {206., 207., 208., 209., 210.}, + {211., 212., 213., 214., 215.}, + {216., 217., 218., 219., 220.}, + {221., 222., 223., 224., 225.}}, + + {{226., 227., 228., 229., 230.}, + {231., 232., 233., 234., 235.}, + {236., 237., 238., 239., 240.}, + {241., 242., 243., 244., 245.}, + {246., 247., 248., 249., 250.}}}}}})); + + auto weights = std::make_shared<Tensor>(Array5D<float, + outChannels, + inChannels, + kernelSize[0], + kernelSize[1], + kernelSize[2]>( + {{{{{{0.1, 0.2, 0.3}, {0.4, 0.5, 0.6}}}, + + {{{0.7, 0.8, 0.9}, {1.0, 1.1, 1.2}}}}, + + {{{{1.3, 1.4, 1.5}, {1.6, 1.7, 1.8}}}, + + {{{1.9, 2.0, 2.1}, {2.2, 2.3, 2.4}}}}}})); + + auto biases = std::make_shared<Tensor>( + Array1D<float, outChannels>({{0.01, 0.02}})); + + auto outputGrad = std::make_shared<Tensor>( + Array5D<float, + batchSize, + outChannels, + outDataSize[0], + outDataSize[1], + outDataSize[2]>({{{{{{1.}, {2.}}, + + {{3.}, {4.}}, + + {{5.}, {6.}}, + + {{7.}, {8.}}, + + {{9.}, {10.}}}, + + {{{11.}, {12.}}, + + {{13.}, {14.}}, + + {{15.}, {16.}}, + + {{17.}, {18.}}, + + {{19.}, {20.}}}}}})); + + auto op = setupTestConv<DIM>(batchSize, + inChannels, + outChannels, + kernelSize, + inDataSize, + stride, + dilation, + padding, + input, + weights, + biases); + + //////////////////////////////////// + // setup gradients for backward + op->getOutput(0)->setGrad(outputGrad); + + REQUIRE_NOTHROW(op->backward()); + + SECTION("Input Grad") { + auto expectedInputGrad = + std::make_shared<Tensor>(Array5D<float, + batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]>( + {{{{{{14.400001, 15.599999, 16.799999, 0., 0.}, + {0., 0., 0., 0., 0.}, + {33.800003, 36.400002, 39., 0., 0.}, + {0., 0., 0., 0., 0.}, + {20., 21.400002, 22.800001, 0., 0.}}, + + {{17.200001, 18.799999, 20.400000, 0., 0.}, + {0., 0., 0., 0., 0.}, + {40.599998, 44., 47.400002, 0., 0.}, + {0., 0., 0., 0., 0.}, + {24., 25.800001, 27.600000, 0., 0.}}, + + {{20.000002, 22., 24., 0., 0.}, + {0., 0., 0., 0., 0.}, + {47.400002, 51.599998, 55.800003, 0., 0.}, + {0., 0., 0., 0., 0.}, + {28., 30.200001, 32.400002, 0., 0.}}, + + {{22.800001, 25.199999, 27.600000, 0., 0.}, + {0., 0., 0., 0., 0.}, + {54.200001, 59.200001, 64.200005, 0., 0.}, + {0., 0., 0., 0., 0.}, + {32., 34.600002, 37.200001, 0., 0.}}, + + {{25.600002, 28.400000, 31.200001, 0., 0.}, + {0., 0., 0., 0., 0.}, + {61., 66.800003, 72.600006, 0., 0.}, + {0., 0., 0., 0., 0.}, + {36., 39., 42., 0., 0.}}}, + + {{{21.600000, 22.799999, 24.000002, 0., 0.}, + {0., 0., 0., 0., 0.}, + {49.400002, 52., 54.600002, 0., 0.}, + {0., 0., 0., 0., 0.}, + {28.400002, 29.799999, 31.200001, 0., 0.}}, + + {{26.799999, 28.400000, 30.000002, 0., 0.}, + {0., 0., 0., 0., 0.}, + {61., 64.400002, 67.800003, 0., 0.}, + {0., 0., 0., 0., 0.}, + {34.799999, 36.599998, 38.400002, 0., 0.}}, + + {{32., 34., 36.000004, 0., 0.}, + {0., 0., 0., 0., 0.}, + {72.599998, 76.800003, 81., 0., 0.}, + {0., 0., 0., 0., 0.}, + {41.200001, 43.400002, 45.600002, 0., 0.}}, + + {{37.200001, 39.599998, 42.000004, 0., 0.}, + {0., 0., 0., 0., 0.}, + {84.199997, 89.199997, 94.200005, 0., 0.}, + {0., 0., 0., 0., 0.}, + {47.600002, 50.200001, 52.800003, 0., 0.}}, + + {{42.399998, 45.200001, 48.000004, 0., 0.}, + {0., 0., 0., 0., 0.}, + {95.800003, 101.599998, 107.400009, 0., 0.}, + {0., 0., 0., 0., 0.}, + {54., 57., 60., 0., 0.}}}}}})); + CHECK(approxEq<float, float>(*op->getInput(0)->grad(), + *expectedInputGrad)); + } + SECTION("Weight grad") { + auto expectedWeightsGrad = std::make_shared< + Tensor>(Array5D<float, + outChannels, + inChannels, + kernelSize[0], + kernelSize[1], + kernelSize[2]>( + {{{{{{4105., 4160., 4215.}, {4655., 4710., 4765.}}}, + + {{{10980., 11035., 11090.}, {11530., 11585., 11640.}}}}, + + {{{{9705., 9860., 10015.}, {11255., 11410., 11565.}}}, + + {{{29080., 29235., 29390.}, + {30630., 30785., 30940.}}}}}})); + CHECK(approxEq<float, float>(*op->getInput(1)->grad(), + *expectedWeightsGrad)); + } + SECTION("Bias Grad") { + auto expectedBiasesGrad = std::make_shared<Tensor>( + Array1D<float, outChannels>({{55., 155.}})); + CHECK(approxEq<float, float>(*op->getInput(2)->grad(), + *expectedBiasesGrad)); + } + } + } } + +} // namespace Aidge